1
1
mirror of https://github.com/wader/fq.git synced 2024-12-19 19:31:37 +03:00
fq/format/mp4/mp4.go

466 lines
13 KiB
Go
Raw Normal View History

2020-06-08 03:29:51 +03:00
package mp4
2021-09-14 18:01:25 +03:00
// Tries to decode ISOBMFF quicktime mov
// Uses naming from ISOBMFF when possible
// ISO/IEC 14496-12
2020-06-08 03:29:51 +03:00
// Quicktime file format https://developer.apple.com/standards/qtff-2001.pdf
// FLAC in ISOBMFF https://github.com/xiph/flac/blob/master/doc/isoflac.txt
2021-09-14 18:01:25 +03:00
// vp9 in ISOBMFF https://www.webmproject.org/vp9/mp4/
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/Metadata/Metadata.html#//apple_ref/doc/uid/TP40000939-CH1-SW43
2020-06-08 03:29:51 +03:00
// TODO: validate structure better? trak/stco etc
// TODO: keep track of structure somehow to detect errors
2021-09-14 18:01:25 +03:00
// TODO: ISO-14496 says mp4 mdat can begin and end with original header/trailer (no used?)
2020-06-08 03:29:51 +03:00
// TODO: split into mov and mp4 decoder?
// TODO: split into mp4_box decoder? needs complex in/out args?
// TODO: better probe, find first 2 boxes, should be free,ftyp or mdat?
import (
"embed"
"fmt"
"github.com/wader/fq/format"
"github.com/wader/fq/internal/cmpex"
"github.com/wader/fq/pkg/decode"
"github.com/wader/fq/pkg/interp"
2022-08-31 11:34:32 +03:00
"golang.org/x/exp/slices"
2020-06-08 03:29:51 +03:00
)
//go:embed mp4.jq
//go:embed mp4.md
2020-06-08 03:29:51 +03:00
var mp4FS embed.FS
var aacFrameGroup decode.Group
var av1CCRGroup decode.Group
var av1FrameGroup decode.Group
var avcAUGroup decode.Group
var avcDCRGroup decode.Group
var flacFrameGroup decode.Group
var flacMetadatablocksGroup decode.Group
var hevcAUGroup decode.Group
var hevcCDCRGroup decode.Group
var iccProfileGroup decode.Group
var id3v2Group decode.Group
var imageGroup decode.Group
var jpegGroup decode.Group
var mp3FrameGroup decode.Group
var mpegESGroup decode.Group
var mpegPESPacketSampleGroup decode.Group
var opusPacketFrameGroup decode.Group
var pngGroup decode.Group
var proResFrameGroup decode.Group
var protoBufWidevineGroup decode.Group
var psshPlayreadyGroup decode.Group
var vorbisPacketGroup decode.Group
var vp9FrameGroup decode.Group
var vpxCCRGroup decode.Group
2020-06-08 03:29:51 +03:00
func init() {
interp.RegisterFormat(
2023-05-01 14:19:04 +03:00
format.MP4,
&decode.Format{
Description: "ISOBMFF, QuickTime and similar",
Groups: []*decode.Group{
format.Probe,
format.Image, // avif
},
DecodeFn: mp4Decode,
2023-05-01 14:19:04 +03:00
DefaultInArg: format.MP4_In{
DecodeSamples: true,
AllowTruncated: false,
},
Dependencies: []decode.Dependency{
2023-05-01 14:19:04 +03:00
{Groups: []*decode.Group{format.AAC_Frame}, Out: &aacFrameGroup},
{Groups: []*decode.Group{format.AV1_CCR}, Out: &av1CCRGroup},
{Groups: []*decode.Group{format.AV1_Frame}, Out: &av1FrameGroup},
{Groups: []*decode.Group{format.AVC_AU}, Out: &avcAUGroup},
{Groups: []*decode.Group{format.AVC_DCR}, Out: &avcDCRGroup},
{Groups: []*decode.Group{format.FLAC_Frame}, Out: &flacFrameGroup},
{Groups: []*decode.Group{format.FLAC_Metadatablocks}, Out: &flacMetadatablocksGroup},
{Groups: []*decode.Group{format.HEVC_AU}, Out: &hevcAUGroup},
{Groups: []*decode.Group{format.HEVC_DCR}, Out: &hevcCDCRGroup},
{Groups: []*decode.Group{format.ICC_Profile}, Out: &iccProfileGroup},
{Groups: []*decode.Group{format.ID3v2}, Out: &id3v2Group},
{Groups: []*decode.Group{format.Image}, Out: &imageGroup},
2023-05-01 14:19:04 +03:00
{Groups: []*decode.Group{format.JPEG}, Out: &jpegGroup},
{Groups: []*decode.Group{format.MP3_Frame}, Out: &mp3FrameGroup},
{Groups: []*decode.Group{format.MPEG_ES}, Out: &mpegESGroup},
{Groups: []*decode.Group{format.MPEG_PES_Packet}, Out: &mpegPESPacketSampleGroup},
{Groups: []*decode.Group{format.Opus_Packet}, Out: &opusPacketFrameGroup},
{Groups: []*decode.Group{format.PNG}, Out: &pngGroup},
{Groups: []*decode.Group{format.Prores_Frame}, Out: &proResFrameGroup},
{Groups: []*decode.Group{format.ProtobufWidevine}, Out: &protoBufWidevineGroup},
2023-05-01 14:19:04 +03:00
{Groups: []*decode.Group{format.PSSH_Playready}, Out: &psshPlayreadyGroup},
{Groups: []*decode.Group{format.Vorbis_Packet}, Out: &vorbisPacketGroup},
{Groups: []*decode.Group{format.VP9_Frame}, Out: &vp9FrameGroup},
{Groups: []*decode.Group{format.VPX_CCR}, Out: &vpxCCRGroup},
},
})
interp.RegisterFS(mp4FS)
2020-06-08 03:29:51 +03:00
}
type stsc struct {
firstChunk int
samplesPerChunk int
2020-06-08 03:29:51 +03:00
}
type moof struct {
offset int64
defaultSampleSize int64
defaultSampleDescriptionIndex int
truns []trun
2022-07-07 20:02:10 +03:00
sencs []senc
}
// TODO: nothing for now
type senc struct {
entries []struct{}
}
type trun struct {
dataOffset int64
samplesSizes []int64
2020-06-08 03:29:51 +03:00
}
type sampleDescription struct {
dataFormat string
originalFormat string
2020-06-08 03:29:51 +03:00
}
type stsz struct {
size int64
count int
}
2020-06-08 03:29:51 +03:00
type track struct {
seenHdlr bool
id int
2020-06-08 03:29:51 +03:00
sampleDescriptions []sampleDescription
subType string
stco []int64
2020-06-08 03:29:51 +03:00
stsc []stsc
stsz []stsz
formatInArg any
2020-06-08 03:29:51 +03:00
objectType int // if data format is "mp4a"
defaultIVSize int
moofs []*moof // for fmp4
2020-06-08 03:29:51 +03:00
}
type pathEntry struct {
typ string
data any
}
2020-06-08 03:29:51 +03:00
type decodeContext struct {
2023-05-01 14:19:04 +03:00
opts format.MP4_In
path []pathEntry
tracks map[int]*track
}
func (ctx *decodeContext) lookupTrack(id int) *track {
t, ok := ctx.tracks[id]
if !ok {
t = &track{id: id}
ctx.tracks[id] = t
}
return t
2020-06-08 03:29:51 +03:00
}
func (ctx *decodeContext) isParent(typ string) bool {
return ctx.parent().typ == typ
2020-06-08 03:29:51 +03:00
}
func (ctx *decodeContext) parent() pathEntry {
return ctx.path[len(ctx.path)-2]
}
2020-06-08 03:29:51 +03:00
func (ctx *decodeContext) findParent(typ string) any {
for i := len(ctx.path) - 1; i >= 0; i-- {
p := ctx.path[i]
if p.typ == typ {
return p.data
}
}
return nil
}
func (ctx *decodeContext) rootBox() *rootBox {
t, _ := ctx.findParent("").(*rootBox)
return t
}
func (ctx *decodeContext) currentTrakBox() *trakBox {
t, _ := ctx.findParent("trak").(*trakBox)
return t
}
func (ctx *decodeContext) currentTrafBox() *trafBox {
t, _ := ctx.findParent("traf").(*trafBox)
return t
}
func (ctx *decodeContext) currentMoofBox() *moofBox {
t, _ := ctx.findParent("moof").(*moofBox)
return t
}
func (ctx *decodeContext) currentMetaBox() *metaBox {
t, _ := ctx.findParent("meta").(*metaBox)
return t
}
func (ctx *decodeContext) currentTrack() *track {
if t := ctx.currentTrakBox(); t != nil {
return ctx.lookupTrack(t.trackID)
}
if t := ctx.currentTrafBox(); t != nil {
return ctx.lookupTrack(t.trackID)
}
return nil
}
func mp4Tracks(d *decode.D, ctx *decodeContext) {
2020-06-08 03:29:51 +03:00
// keep track order stable
var sortedTracks []*track
for _, t := range ctx.tracks {
2020-06-08 03:29:51 +03:00
sortedTracks = append(sortedTracks, t)
}
slices.SortFunc(sortedTracks, func(a, b *track) int { return cmpex.Compare(a.id, b.id) })
2020-06-08 03:29:51 +03:00
d.FieldArray("tracks", func(d *decode.D) {
2020-06-08 03:29:51 +03:00
for _, t := range sortedTracks {
2022-07-07 20:02:10 +03:00
decodeSampleRange := func(d *decode.D, t *track, decodeSample bool, dataFormat string, name string, firstBit int64, nBits int64, inArg any) {
2021-12-12 17:25:57 +03:00
d.RangeFn(firstBit, nBits, func(d *decode.D) {
2022-07-07 20:02:10 +03:00
if !decodeSample {
d.FieldRawLen(name, d.BitsLeft())
return
}
2021-12-12 17:25:57 +03:00
switch {
case dataFormat == "fLaC":
d.FieldFormatLen(name, nBits, &flacFrameGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "Opus":
d.FieldFormatLen(name, nBits, &opusPacketFrameGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "vp09":
d.FieldFormatLen(name, nBits, &vp9FrameGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "avc1":
d.FieldFormatLen(name, nBits, &avcAUGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "hev1",
dataFormat == "hvc1":
d.FieldFormatLen(name, nBits, &hevcAUGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "av01":
d.FieldFormatLen(name, nBits, &av1FrameGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeMP3:
d.FieldFormatLen(name, nBits, &mp3FrameGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeAAC:
d.FieldFormatLen(name, nBits, &aacFrameGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeVORBIS:
d.FieldFormatLen(name, nBits, &vorbisPacketGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "mp4v" && t.objectType == format.MPEGObjectTypeMPEG2VideoMain:
d.FieldFormatLen(name, nBits, &mpegPESPacketSampleGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "mp4v" && t.objectType == format.MPEGObjectTypeMJPEG:
d.FieldFormatLen(name, nBits, &jpegGroup, inArg)
case dataFormat == "mp4v" && t.objectType == format.MPEGObjectTypePNG:
d.FieldFormatLen(name, nBits, &pngGroup, inArg)
2021-12-12 17:25:57 +03:00
case dataFormat == "jpeg":
d.FieldFormatLen(name, nBits, &jpegGroup, inArg)
case dataFormat == "apch",
dataFormat == "apcn",
dataFormat == "scpa",
dataFormat == "apco",
dataFormat == "ap4h":
d.FieldFormatLen(name, nBits, &proResFrameGroup, inArg)
2020-06-08 03:29:51 +03:00
default:
d.FieldRawLen(name, d.BitsLeft())
2021-12-12 17:25:57 +03:00
}
})
2020-06-08 03:29:51 +03:00
}
d.FieldStruct("track", func(d *decode.D) {
d.FieldValueUint("id", uint64(t.id))
trackSDDataFormat := "unknown"
if len(t.sampleDescriptions) > 0 {
sd := t.sampleDescriptions[0]
trackSDDataFormat = sd.dataFormat
if sd.originalFormat != "" {
trackSDDataFormat = sd.originalFormat
}
2020-06-08 03:29:51 +03:00
}
2022-10-03 00:30:10 +03:00
d.FieldValueStr("data_format", trackSDDataFormat, dataFormatNames)
switch trackSDDataFormat {
case "lpcm",
"raw ",
"twos",
"sowt",
"in24",
"in32",
"fl23",
"fl64",
"alaw",
"ulaw":
// TODO: treat raw samples format differently, a bit too much to have one field per sample.
// maybe in some future fq could have smart array fields
return
}
d.FieldArray("samples", func(d *decode.D) {
// TODO: warning? could also be init fragment etc
if len(t.stsz) > 0 && len(t.stsc) > 0 && len(t.stco) > 0 {
stszIndex := 0
stszEntryNr := 0
sampleNr := 0
stscIndex := 0
stscEntryNr := 0
stcoIndex := 0
2020-06-08 03:29:51 +03:00
stszEntry := t.stsz[stszIndex]
2020-06-08 03:29:51 +03:00
stscEntry := t.stsc[stscIndex]
sampleOffset := t.stco[stcoIndex]
2020-06-08 03:29:51 +03:00
logStrFn := func() string {
return fmt.Sprintf("%d: %s: nr=%d: stsz[%d/%d] nr=%d %#v stsc[%d/%d] nr=%d %#v stco[%d/%d]=%d \n",
t.id,
trackSDDataFormat,
sampleNr,
stszIndex, len(t.stsz), stszEntryNr, stszEntry,
stscIndex, len(t.stsc), stscEntryNr, stscEntry,
stcoIndex, len(t.stco), sampleOffset,
)
}
for stszIndex < len(t.stsz) {
if stszEntryNr >= stszEntry.count {
stszIndex++
if stszIndex >= len(t.stsz) {
// TODO: warning if unused stsc/stco entries?
break
}
stszEntry = t.stsz[stszIndex]
stszEntryNr = 0
2020-06-08 03:29:51 +03:00
}
if stscEntryNr >= stscEntry.samplesPerChunk {
stscEntryNr = 0
stcoIndex++
if stcoIndex >= len(t.stco) {
d.Fatalf("outside stco: %s", logStrFn())
}
sampleOffset = t.stco[stcoIndex]
if stscIndex < len(t.stsc)-1 && stcoIndex >= t.stsc[stscIndex+1].firstChunk-1 {
stscIndex++
if stscIndex >= len(t.stsc) {
d.Fatalf("outside stsc: %s", logStrFn())
}
stscEntry = t.stsc[stscIndex]
}
}
// log.Println(logStrFn())
2020-06-08 03:29:51 +03:00
2022-07-07 20:02:10 +03:00
decodeSampleRange(d, t, ctx.opts.DecodeSamples, trackSDDataFormat, "sample", sampleOffset*8, stszEntry.size*8, t.formatInArg)
sampleOffset += stszEntry.size
stscEntryNr++
stszEntryNr++
sampleNr++
2020-06-08 03:29:51 +03:00
}
}
sampleNr := 0
2020-06-08 03:29:51 +03:00
for _, m := range t.moofs {
2022-07-07 20:02:10 +03:00
for trunNr, trun := range m.truns {
var senc senc
if trunNr < len(m.sencs) {
senc = m.sencs[trunNr]
}
sampleOffset := m.offset + trun.dataOffset
2022-07-07 20:02:10 +03:00
for trunSampleNr, sz := range trun.samplesSizes {
dataFormat := trackSDDataFormat
if m.defaultSampleDescriptionIndex != 0 && m.defaultSampleDescriptionIndex-1 < len(t.sampleDescriptions) {
sd := t.sampleDescriptions[m.defaultSampleDescriptionIndex-1]
dataFormat = sd.dataFormat
if sd.originalFormat != "" {
dataFormat = sd.originalFormat
}
}
2020-06-08 03:29:51 +03:00
// logStrFn := func() string {
// return fmt.Sprintf("%d: %s: %d: (%s): sz=%d %d+%d=%d",
// t.id,
// dataFormat,
// sampleNr,
// trackSDDataFormat,
// sz,
// m.offset,
// m.dataOffset,
// sampleOffset,
// )
// }
// log.Println(logStrFn())
2022-07-07 20:02:10 +03:00
decodeSample := ctx.opts.DecodeSamples
if trunSampleNr < len(senc.entries) {
// TODO: encrypted
decodeSample = false
}
decodeSampleRange(d, t, decodeSample, dataFormat, "sample", sampleOffset*8, sz*8, t.formatInArg)
sampleOffset += sz
sampleNr++
}
2020-06-08 03:29:51 +03:00
}
}
})
})
}
})
}
func mp4Decode(d *decode.D) any {
2023-05-01 14:19:04 +03:00
var mi format.MP4_In
d.ArgAs(&mi)
ctx := &decodeContext{
opts: mi,
path: []pathEntry{{typ: "root"}},
tracks: map[int]*track{},
}
// TODO: nicer, validate functions without field?
d.AssertLeastBytesLeft(16)
size := d.U32()
if size < 8 {
d.Fatalf("first box size too small < 8")
}
firstType := d.UTF8(4)
switch firstType {
case "styp", // mp4 segment
"ftyp", // mp4 file
"free", // seems to happen
"moov", // seems to happen
"pnot", // video preview file
"jP ": // JPEG 2000
default:
d.Errorf("no styp, ftyp, free or moov box found")
}
d.SeekRel(-8 * 8)
ctx.path = []pathEntry{{typ: "", data: &rootBox{}}}
decodeBoxes(ctx, d)
if len(ctx.tracks) > 0 {
mp4Tracks(d, ctx)
}
2020-06-08 03:29:51 +03:00
return nil
}