mirror of
https://github.com/wader/fq.git
synced 2024-11-29 12:14:17 +03:00
494 lines
14 KiB
Go
494 lines
14 KiB
Go
package mp4
|
|
|
|
// Tries to decode ISOBMFF quicktime mov
|
|
// Uses naming from ISOBMFF when possible
|
|
// ISO/IEC 14496-12
|
|
// Quicktime file format https://developer.apple.com/standards/qtff-2001.pdf
|
|
// FLAC in ISOBMFF https://github.com/xiph/flac/blob/master/doc/isoflac.txt
|
|
// vp9 in ISOBMFF https://www.webmproject.org/vp9/mp4/
|
|
// https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/Metadata/Metadata.html#//apple_ref/doc/uid/TP40000939-CH1-SW43
|
|
|
|
// TODO: validate structure better? trak/stco etc
|
|
// TODO: keep track of structure somehow to detect errors
|
|
// TODO: ISO-14496 says mp4 mdat can begin and end with original header/trailer (no used?)
|
|
// TODO: split into mov and mp4 decoder?
|
|
// TODO: split into mp4_box decoder? needs complex in/out args?
|
|
// TODO: better probe, find first 2 boxes, should be free,ftyp or mdat?
|
|
|
|
import (
|
|
"cmp"
|
|
"embed"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/wader/fq/format"
|
|
"github.com/wader/fq/pkg/decode"
|
|
"github.com/wader/fq/pkg/interp"
|
|
"golang.org/x/exp/slices"
|
|
)
|
|
|
|
//go:embed mp4.jq
|
|
//go:embed mp4.md
|
|
var mp4FS embed.FS
|
|
|
|
var aacFrameGroup decode.Group
|
|
var av1CCRGroup decode.Group
|
|
var av1FrameGroup decode.Group
|
|
var avcAUGroup decode.Group
|
|
var avcDCRGroup decode.Group
|
|
var flacFrameGroup decode.Group
|
|
var flacMetadatablocksGroup decode.Group
|
|
var hevcAUGroup decode.Group
|
|
var hevcCDCRGroup decode.Group
|
|
var iccProfileGroup decode.Group
|
|
var id3v2Group decode.Group
|
|
var imageGroup decode.Group
|
|
var jp2cGroup decode.Group
|
|
var jpegGroup decode.Group
|
|
var mp3FrameGroup decode.Group
|
|
var mpegESGroup decode.Group
|
|
var mpegPESPacketSampleGroup decode.Group
|
|
var opusPacketFrameGroup decode.Group
|
|
var pngGroup decode.Group
|
|
var proResFrameGroup decode.Group
|
|
var protoBufWidevineGroup decode.Group
|
|
var psshPlayreadyGroup decode.Group
|
|
var vorbisPacketGroup decode.Group
|
|
var vp9FrameGroup decode.Group
|
|
var vpxCCRGroup decode.Group
|
|
|
|
func init() {
|
|
interp.RegisterFormat(
|
|
format.MP4,
|
|
&decode.Format{
|
|
Description: "ISOBMFF, QuickTime and similar",
|
|
Groups: []*decode.Group{
|
|
format.Probe,
|
|
format.Image, // avif
|
|
},
|
|
DecodeFn: mp4Decode,
|
|
DefaultInArg: format.MP4_In{
|
|
DecodeSamples: true,
|
|
AllowTruncated: false,
|
|
},
|
|
Dependencies: []decode.Dependency{
|
|
{Groups: []*decode.Group{format.AAC_Frame}, Out: &aacFrameGroup},
|
|
{Groups: []*decode.Group{format.AV1_CCR}, Out: &av1CCRGroup},
|
|
{Groups: []*decode.Group{format.AV1_Frame}, Out: &av1FrameGroup},
|
|
{Groups: []*decode.Group{format.AVC_AU}, Out: &avcAUGroup},
|
|
{Groups: []*decode.Group{format.AVC_DCR}, Out: &avcDCRGroup},
|
|
{Groups: []*decode.Group{format.FLAC_Frame}, Out: &flacFrameGroup},
|
|
{Groups: []*decode.Group{format.FLAC_Metadatablocks}, Out: &flacMetadatablocksGroup},
|
|
{Groups: []*decode.Group{format.HEVC_AU}, Out: &hevcAUGroup},
|
|
{Groups: []*decode.Group{format.HEVC_DCR}, Out: &hevcCDCRGroup},
|
|
{Groups: []*decode.Group{format.ICC_Profile}, Out: &iccProfileGroup},
|
|
{Groups: []*decode.Group{format.ID3v2}, Out: &id3v2Group},
|
|
{Groups: []*decode.Group{format.Image}, Out: &imageGroup},
|
|
{Groups: []*decode.Group{format.JP2C}, Out: &jp2cGroup},
|
|
{Groups: []*decode.Group{format.JPEG}, Out: &jpegGroup},
|
|
{Groups: []*decode.Group{format.MP3_Frame}, Out: &mp3FrameGroup},
|
|
{Groups: []*decode.Group{format.MPEG_ES}, Out: &mpegESGroup},
|
|
{Groups: []*decode.Group{format.MPEG_PES_Packet}, Out: &mpegPESPacketSampleGroup},
|
|
{Groups: []*decode.Group{format.Opus_Packet}, Out: &opusPacketFrameGroup},
|
|
{Groups: []*decode.Group{format.PNG}, Out: &pngGroup},
|
|
{Groups: []*decode.Group{format.Prores_Frame}, Out: &proResFrameGroup},
|
|
{Groups: []*decode.Group{format.ProtobufWidevine}, Out: &protoBufWidevineGroup},
|
|
{Groups: []*decode.Group{format.PSSH_Playready}, Out: &psshPlayreadyGroup},
|
|
{Groups: []*decode.Group{format.Vorbis_Packet}, Out: &vorbisPacketGroup},
|
|
{Groups: []*decode.Group{format.VP9_Frame}, Out: &vp9FrameGroup},
|
|
{Groups: []*decode.Group{format.VPX_CCR}, Out: &vpxCCRGroup},
|
|
},
|
|
})
|
|
interp.RegisterFS(mp4FS)
|
|
}
|
|
|
|
type stsc struct {
|
|
firstChunk int
|
|
samplesPerChunk int
|
|
}
|
|
|
|
type moof struct {
|
|
offset int64
|
|
defaultSampleSize int64
|
|
defaultSampleDescriptionIndex int
|
|
truns []trun
|
|
sencs []senc
|
|
}
|
|
|
|
// TODO: nothing for now
|
|
type senc struct {
|
|
entries []struct{}
|
|
}
|
|
|
|
type trun struct {
|
|
dataOffset int64
|
|
samplesSizes []int64
|
|
}
|
|
|
|
type sampleDescription struct {
|
|
dataFormat string
|
|
originalFormat string
|
|
}
|
|
|
|
type stsz struct {
|
|
size int64
|
|
count int
|
|
}
|
|
|
|
type track struct {
|
|
seenHdlr bool
|
|
fragment bool
|
|
id int
|
|
sampleDescriptions []sampleDescription
|
|
subType string
|
|
stco []int64
|
|
stsc []stsc
|
|
stsz []stsz
|
|
formatInArg any
|
|
objectType int // if data format is "mp4a"
|
|
defaultIVSize int
|
|
moofs []*moof // for fmp4
|
|
dref bool
|
|
drefURL string
|
|
}
|
|
|
|
type pathEntry struct {
|
|
typ string
|
|
data any
|
|
}
|
|
|
|
type decodeContext struct {
|
|
opts format.MP4_In
|
|
path []pathEntry
|
|
tracks []*track
|
|
}
|
|
|
|
func (ctx *decodeContext) isParent(typ string) bool {
|
|
return ctx.parent().typ == typ
|
|
}
|
|
|
|
func (ctx *decodeContext) parent() pathEntry {
|
|
return ctx.path[len(ctx.path)-2]
|
|
}
|
|
|
|
func (ctx *decodeContext) findParent(typ string) any {
|
|
for i := len(ctx.path) - 1; i >= 0; i-- {
|
|
p := ctx.path[i]
|
|
if p.typ == typ {
|
|
return p.data
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (ctx *decodeContext) rootBox() *rootBox {
|
|
t, _ := ctx.findParent("").(*rootBox)
|
|
return t
|
|
}
|
|
|
|
func (ctx *decodeContext) currentTrakBox() *trakBox {
|
|
t, _ := ctx.findParent("trak").(*trakBox)
|
|
return t
|
|
}
|
|
|
|
func (ctx *decodeContext) currentTrafBox() *trafBox {
|
|
t, _ := ctx.findParent("traf").(*trafBox)
|
|
return t
|
|
}
|
|
|
|
func (ctx *decodeContext) currentMoofBox() *moofBox {
|
|
t, _ := ctx.findParent("moof").(*moofBox)
|
|
return t
|
|
}
|
|
|
|
func (ctx *decodeContext) currentMetaBox() *metaBox {
|
|
t, _ := ctx.findParent("meta").(*metaBox)
|
|
return t
|
|
}
|
|
|
|
func (ctx *decodeContext) currentTrack() *track {
|
|
if t := ctx.currentTrakBox(); t != nil {
|
|
return t.track
|
|
}
|
|
if t := ctx.currentTrafBox(); t != nil {
|
|
return t.track
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func mp4Tracks(d *decode.D, ctx *decodeContext) {
|
|
type trackCollected struct {
|
|
track *track
|
|
order int
|
|
moofss [][]*moof
|
|
}
|
|
|
|
var tracksCollected []*trackCollected
|
|
tracksCollectedSeen := map[int]*trackCollected{}
|
|
for i, t := range ctx.tracks {
|
|
tc, ok := tracksCollectedSeen[t.id]
|
|
if !ok {
|
|
tc = &trackCollected{
|
|
order: i,
|
|
track: t,
|
|
}
|
|
tracksCollectedSeen[t.id] = tc
|
|
tracksCollected = append(tracksCollected, tc)
|
|
}
|
|
|
|
// TODO: error if not fragmented and seen before?
|
|
|
|
tc.moofss = append(tc.moofss, t.moofs)
|
|
}
|
|
|
|
// sort by id then order in file
|
|
slices.SortStableFunc(tracksCollected, func(a, b *trackCollected) int {
|
|
if r := cmp.Compare(a.track.id, b.track.id); r != 0 {
|
|
return r
|
|
}
|
|
return cmp.Compare(a.order, b.order)
|
|
})
|
|
|
|
d.FieldArray("tracks", func(d *decode.D) {
|
|
for _, tc := range tracksCollected {
|
|
decodeSampleRange := func(d *decode.D, t *track, decodeSample bool, dataFormat string, name string, firstBit int64, nBits int64, inArg any) {
|
|
d.RangeFn(firstBit, nBits, func(d *decode.D) {
|
|
if !decodeSample {
|
|
d.FieldRawLen(name, d.BitsLeft())
|
|
return
|
|
}
|
|
|
|
switch {
|
|
case dataFormat == "fLaC":
|
|
d.FieldFormatLen(name, nBits, &flacFrameGroup, inArg)
|
|
case dataFormat == "Opus":
|
|
d.FieldFormatLen(name, nBits, &opusPacketFrameGroup, inArg)
|
|
case dataFormat == "vp09":
|
|
d.FieldFormatLen(name, nBits, &vp9FrameGroup, inArg)
|
|
case dataFormat == "avc1":
|
|
d.FieldFormatLen(name, nBits, &avcAUGroup, inArg)
|
|
case dataFormat == "hev1",
|
|
dataFormat == "hvc1":
|
|
d.FieldFormatLen(name, nBits, &hevcAUGroup, inArg)
|
|
case dataFormat == "av01":
|
|
d.FieldFormatLen(name, nBits, &av1FrameGroup, inArg)
|
|
case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeMP3:
|
|
d.FieldFormatLen(name, nBits, &mp3FrameGroup, inArg)
|
|
case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeAAC:
|
|
d.FieldFormatLen(name, nBits, &aacFrameGroup, inArg)
|
|
case dataFormat == "mp4a" && t.objectType == format.MPEGObjectTypeVORBIS:
|
|
d.FieldFormatLen(name, nBits, &vorbisPacketGroup, inArg)
|
|
case dataFormat == "mp4v" && t.objectType == format.MPEGObjectTypeMPEG2VideoMain:
|
|
d.FieldFormatLen(name, nBits, &mpegPESPacketSampleGroup, inArg)
|
|
case dataFormat == "mp4v" && t.objectType == format.MPEGObjectTypeMJPEG:
|
|
d.FieldFormatLen(name, nBits, &jpegGroup, inArg)
|
|
case dataFormat == "mp4v" && t.objectType == format.MPEGObjectTypePNG:
|
|
d.FieldFormatLen(name, nBits, &pngGroup, inArg)
|
|
case dataFormat == "jpeg":
|
|
d.FieldFormatLen(name, nBits, &jpegGroup, inArg)
|
|
case dataFormat == "apch",
|
|
dataFormat == "apcn",
|
|
dataFormat == "scpa",
|
|
dataFormat == "apco",
|
|
dataFormat == "ap4h":
|
|
d.FieldFormatLen(name, nBits, &proResFrameGroup, inArg)
|
|
default:
|
|
d.FieldRawLen(name, d.BitsLeft())
|
|
}
|
|
})
|
|
}
|
|
|
|
d.FieldStruct("track", func(d *decode.D) {
|
|
t := tc.track
|
|
|
|
d.FieldValueUint("id", uint64(t.id))
|
|
|
|
trackSDDataFormat := "unknown"
|
|
if len(t.sampleDescriptions) > 0 {
|
|
sd := t.sampleDescriptions[0]
|
|
trackSDDataFormat = sd.dataFormat
|
|
if sd.originalFormat != "" {
|
|
trackSDDataFormat = sd.originalFormat
|
|
}
|
|
}
|
|
d.FieldValueStr("data_format", trackSDDataFormat, dataFormatNames)
|
|
|
|
if t.dref && t.drefURL != "" {
|
|
d.FieldValueStr("data_reference_url", t.drefURL)
|
|
return
|
|
}
|
|
|
|
switch trackSDDataFormat {
|
|
case "lpcm",
|
|
"raw ",
|
|
"twos",
|
|
"sowt",
|
|
"in24",
|
|
"in32",
|
|
"fl23",
|
|
"fl64",
|
|
"alaw",
|
|
"ulaw":
|
|
// TODO: treat raw samples format differently, a bit too much to have one field per sample.
|
|
// maybe in some future fq could have smart array fields
|
|
return
|
|
}
|
|
|
|
d.FieldArray("samples", func(d *decode.D) {
|
|
// TODO: warning? could also be init fragment etc
|
|
|
|
if len(t.stsz) > 0 && len(t.stsc) > 0 && len(t.stco) > 0 {
|
|
stszIndex := 0
|
|
stszEntryNr := 0
|
|
sampleNr := 0
|
|
stscIndex := 0
|
|
stscEntryNr := 0
|
|
stcoIndex := 0
|
|
|
|
stszEntry := t.stsz[stszIndex]
|
|
stscEntry := t.stsc[stscIndex]
|
|
sampleOffset := t.stco[stcoIndex]
|
|
|
|
logStrFn := func() string {
|
|
return fmt.Sprintf("%d: %s: nr=%d: stsz[%d/%d] nr=%d %#v stsc[%d/%d] nr=%d %#v stco[%d/%d]=%d \n",
|
|
t.id,
|
|
trackSDDataFormat,
|
|
sampleNr,
|
|
stszIndex, len(t.stsz), stszEntryNr, stszEntry,
|
|
stscIndex, len(t.stsc), stscEntryNr, stscEntry,
|
|
stcoIndex, len(t.stco), sampleOffset,
|
|
)
|
|
}
|
|
|
|
for stszIndex < len(t.stsz) {
|
|
if stszEntryNr >= stszEntry.count {
|
|
stszIndex++
|
|
if stszIndex >= len(t.stsz) {
|
|
// TODO: warning if unused stsc/stco entries?
|
|
break
|
|
}
|
|
|
|
stszEntry = t.stsz[stszIndex]
|
|
stszEntryNr = 0
|
|
}
|
|
|
|
if stscEntryNr >= stscEntry.samplesPerChunk {
|
|
stscEntryNr = 0
|
|
stcoIndex++
|
|
if stcoIndex >= len(t.stco) {
|
|
d.Fatalf("outside stco: %s", logStrFn())
|
|
}
|
|
sampleOffset = t.stco[stcoIndex]
|
|
|
|
if stscIndex < len(t.stsc)-1 && stcoIndex >= t.stsc[stscIndex+1].firstChunk-1 {
|
|
stscIndex++
|
|
if stscIndex >= len(t.stsc) {
|
|
d.Fatalf("outside stsc: %s", logStrFn())
|
|
}
|
|
stscEntry = t.stsc[stscIndex]
|
|
}
|
|
}
|
|
|
|
decodeSampleRange(d, t, ctx.opts.DecodeSamples, trackSDDataFormat, "sample", sampleOffset*8, stszEntry.size*8, t.formatInArg)
|
|
|
|
sampleOffset += stszEntry.size
|
|
stscEntryNr++
|
|
stszEntryNr++
|
|
sampleNr++
|
|
}
|
|
}
|
|
|
|
sampleNr := 0
|
|
|
|
for _, ms := range tc.moofss {
|
|
for _, m := range ms {
|
|
for trunNr, trun := range m.truns {
|
|
var senc senc
|
|
if trunNr < len(m.sencs) {
|
|
senc = m.sencs[trunNr]
|
|
}
|
|
sampleOffset := m.offset + trun.dataOffset
|
|
|
|
for trunSampleNr, sz := range trun.samplesSizes {
|
|
dataFormat := trackSDDataFormat
|
|
if m.defaultSampleDescriptionIndex != 0 && m.defaultSampleDescriptionIndex-1 < len(t.sampleDescriptions) {
|
|
sd := t.sampleDescriptions[m.defaultSampleDescriptionIndex-1]
|
|
dataFormat = sd.dataFormat
|
|
if sd.originalFormat != "" {
|
|
dataFormat = sd.originalFormat
|
|
}
|
|
}
|
|
|
|
// logStrFn := func() string {
|
|
// return fmt.Sprintf("%d: %s: %d: (%s): sz=%d %d+%d=%d",
|
|
// t.id,
|
|
// dataFormat,
|
|
// sampleNr,
|
|
// trackSDDataFormat,
|
|
// sz,
|
|
// m.offset,
|
|
// m.dataOffset,
|
|
// sampleOffset,
|
|
// )
|
|
// }
|
|
|
|
decodeSample := ctx.opts.DecodeSamples
|
|
if trunSampleNr < len(senc.entries) {
|
|
// TODO: encrypted
|
|
decodeSample = false
|
|
}
|
|
|
|
decodeSampleRange(d, t, decodeSample, dataFormat, "sample", sampleOffset*8, sz*8, t.formatInArg)
|
|
|
|
sampleOffset += sz
|
|
sampleNr++
|
|
}
|
|
}
|
|
}
|
|
}
|
|
})
|
|
})
|
|
}
|
|
})
|
|
}
|
|
|
|
func mp4Decode(d *decode.D) any {
|
|
var mi format.MP4_In
|
|
d.ArgAs(&mi)
|
|
|
|
ctx := &decodeContext{
|
|
opts: mi,
|
|
path: []pathEntry{{typ: "root"}},
|
|
tracks: []*track{},
|
|
}
|
|
|
|
// TODO: nicer, validate functions without field?
|
|
d.AssertLeastBytesLeft(16)
|
|
size := d.U32()
|
|
if size < 8 {
|
|
d.Fatalf("first box size too small < 8")
|
|
}
|
|
firstType := strings.TrimSpace(d.UTF8(4))
|
|
switch firstType {
|
|
case "styp", // mp4 segment
|
|
"ftyp", // mp4 file
|
|
"free", // seems to happen
|
|
"moov", // seems to happen
|
|
"pnot", // video preview file
|
|
"jP": // JPEG 2000
|
|
default:
|
|
d.Errorf("no styp, ftyp, free or moov box found")
|
|
}
|
|
|
|
d.SeekRel(-8 * 8)
|
|
|
|
ctx.path = []pathEntry{{typ: "", data: &rootBox{}}}
|
|
|
|
decodeBoxes(ctx, d)
|
|
if len(ctx.tracks) > 0 {
|
|
mp4Tracks(d, ctx)
|
|
}
|
|
|
|
return nil
|
|
}
|