package mp4 import ( "bytes" "encoding/binary" "fmt" "time" "github.com/wader/fq/format" "github.com/wader/fq/pkg/decode" "github.com/wader/fq/pkg/scalar" "golang.org/x/text/encoding/charmap" ) // TODO: keep track of list of sampleSize/entries instead and change sample read code const maxSampleEntryCount = 10_000_000 const ( boxSizeRestOfFile = 0 boxSizeUse64bitSize = 1 ) var boxSizeNames = scalar.UintMapDescription{ boxSizeRestOfFile: "Rest of file", boxSizeUse64bitSize: "Use 64 bit size", } var mediaTimeNames = scalar.SintMapDescription{ -1: "empty", } var subTypeNames = scalar.StrMapDescription{ "alis": "Alias Data", "camm": "Camera Metadata", "crsm": "Clock Reference", "data": "Data", "hint": "Hint Track", "ipsm": "IPMP", "m7sm": "MPEG-7 Stream", "mdir": "Metadata", "mdta": "Metadata Tags", "meta": "NRT Metadata", "mjsm": "MPEG-J", "nrtm": "Non-Real Time Metadata", "ocsm": "Object Content", "odsm": "Object Descriptor", "pict": "Picture", "priv": "Private", "psmd": "Panasonic Static Metadata", "sbtl": "Subtitle", "sdsm": "Scene Description", "soun": "Audio Track", "subp": "Subpicture", "text": "Text", "tmcd": "Time Code", "url ": "URL", "vide": "Video Track", } var dataFormatNames = scalar.StrMapDescription{ // additional codecs "apch": "Apple ProRes 422 High Quality", "apcn": "Apple ProRes 422 Standard Definition", "apcs": "Apple ProRes 422 LT", "apco": "Apple ProRes 422 Proxy", "ap4h": "Apple ProRes 4444", "jpeg": "JPEG Image", // codecs from https://mp4ra.org/ "3gvo": "3GPP Video Orientation", "a3d1": "Multiview Video Coding", "a3d2": "Multiview Video Coding", "a3d3": "Multiview Video Coding", "a3d4": "Multiview Video Coding", "a3ds": "Auro-Cx 3D audio", "ac-3": "AC-3 audio", "ac-4": "AC-4 audio", "alac": "Apple lossless audio codec", "alaw": "a-Law", "av01": "AV1 video", "avc1": "Advanced Video Coding", "avc2": "Advanced Video Coding", "avc3": "Advanced Video Coding", "avc4": "Advanced Video Coding", "avcp": "Advanced Video Coding Parameters", "dra1": "DRA Audio", "drac": "Dirac Video Coder", "dts-": "Dependent base layer for DTS layered audio", "dts+": "Enhancement layer for DTS layered audio", "dtsc": "DTS Coherent Acoustics audio", "dtse": "DTS Express low bit rate audio, also known as DTS LBR", "dtsh": "DTS-HD High Resolution Audio", "dtsl": "DTS-HD Master Audio", "dtsx": "DTS:X", "dvav": "AVC-based “Dolby Vision”", "dvhe": "HEVC-based “Dolby Vision”", "ec-3": "Enhanced AC-3 audio", "enca": "Encrypted/Protected audio", "encf": "Encrypted/Protected font", "encm": "Encrypted/Protected metadata stream", "encs": "Encrypted Systems stream", "enct": "Encrypted Text", "encv": "Encrypted/protected video", "fdp ": "File delivery hints", "fLaC": "Fres Lossless Audio Codec", "g719": "ITU-T Recommendation G.719 (2008)", "g726": "ITU-T Recommendation G.726 (1990)", "hev1": "High Efficiency Video Coding", "hvc1": "High Efficiency Video Coding", "hvt1": "High Efficiency Video Coding", "ixse": "DVB Track Level Index Track", "lhe1": "Layered High Efficiency Video Coding", "lht1": "Layered High Efficiency Video Coding", "lhv1": "Layered High Efficiency Video Coding", "m2ts": "MPEG-2 transport stream for DMB", "m4ae": "MPEG-4 Audio Enhancement MP4v1/2", "mett": "Text timed metadata that is not XML", "metx": "XML timed metadata", "mha1": "MPEG-H Audio (single stream, uncapsulated)", "mha2": "MPEG-H Audio (multi-stream, unencapsulated)", "mhm1": "MPEG-H Audio (single stream, MHAS encapsulated)", "mhm2": "MPEG-H Audio (multi-stream, MHAS encapsulated)", "mjp2": "Motion JPEG 2000", "mlix": "DVB Movie level index track", "mlpa": "MLP Audio", "mp4a": "MPEG-4 Audio", "mp4s": "MPEG-4 Systems", "mp4v": "MPEG-4 Visual", "mvc1": "Multiview coding", "mvc2": "Multiview coding", "mvc3": "Multiview coding", "mvc4": "Multiview coding", "mvd1": "Multiview coding", "mvd2": "Multiview coding", "mvd3": "Multiview coding", "mvd4": "Multiview coding", "oksd": "OMA Keys", "Opus": "Opus audio coding", "pm2t": "Protected MPEG-2 Transport", "prtp": "Protected RTP Reception", "raw ": "Uncompressed audio", "resv": "Restricted Video", "rm2t": "MPEG-2 Transport Reception", "rrtp": "RTP reception", "rsrp": "SRTP Reception", "rtmd": "Real Time Metadata Sample Entry(XAVC Format)", "rtp ": "RTP Hints", "s263": "ITU H.263 video (3GPP format)", "samr": "Narrowband AMR voice", "sawb": "Wideband AMR voice", "sawp": "Extended AMR-WB (AMR-WB+)", "sevc": "EVRC Voice", "sm2t": "MPEG-2 Transport Server", "sqcp": "13K Voice", "srtp": "SRTP Hints", "ssmv": "SMV Voice", "STGS": "Subtitle Sample Entry (HMMP)", "stpp": "Subtitles (Timed Text)", "svc1": "Scalable Video Coding", "svc2": "Scalable Video Coding", "svcM": "SVC Metadata", "tc64": "64 bit timecode samples", "tmcd": "32 bit timecode samples", "twos": "Uncompressed 16-bit audio", "tx3g": "Timed Text stream", "ulaw": "Samples have been compressed using uLaw 2:1.", "unid": "Dynamic Range Control (DRC) data", "urim": "Binary timed metadata identified by URI", "vc-1": "SMPTE VC-1", "vp08": "VP8 video", "vp09": "VP9 video", "wvtt": "WebVTT", } var ( uuidIsmlManifestBytes = [16]byte{0xa5, 0xd4, 0x0b, 0x30, 0xe8, 0x14, 0x11, 0xdd, 0xba, 0x2f, 0x08, 0x00, 0x20, 0x0c, 0x9a, 0x66} uuidXmpBytes = [16]byte{0xbe, 0x7a, 0xcf, 0xcb, 0x97, 0xa9, 0x42, 0xe8, 0x9c, 0x71, 0x99, 0x94, 0x91, 0xe3, 0xaf, 0xac} uuidSphericalBytes = [16]byte{0xff, 0xcc, 0x82, 0x63, 0xf8, 0x55, 0x4a, 0x93, 0x88, 0x14, 0x58, 0x7a, 0x02, 0x52, 0x1f, 0xdd} uuidPspUsmtBytes = [16]byte{0x55, 0x53, 0x4d, 0x54, 0x21, 0xd2, 0x4f, 0xce, 0xbb, 0x88, 0x69, 0x5c, 0xfa, 0xc9, 0xc7, 0x40} uuidTfxdBytes = [16]byte{0x6d, 0x1d, 0x9b, 0x05, 0x42, 0xd5, 0x44, 0xe6, 0x80, 0xe2, 0x14, 0x1d, 0xaf, 0xf7, 0x57, 0xb2} uuidTfrfBytes = [16]byte{0xd4, 0x80, 0x7e, 0xf2, 0xca, 0x39, 0x46, 0x95, 0x8e, 0x54, 0x26, 0xcb, 0x9e, 0x46, 0xa7, 0x9f} uuidProfBytes = [16]byte{0x50, 0x52, 0x4f, 0x46, 0x21, 0xd2, 0x4f, 0xce, 0xbb, 0x88, 0x69, 0x5c, 0xfa, 0xc9, 0xc7, 0x40} uuidIpodBytes = [16]byte{0x6b, 0x68, 0x40, 0xf2, 0x5f, 0x24, 0x4f, 0xc5, 0xba, 0x39, 0xa5, 0x1b, 0xcf, 0x03, 0x23, 0xf3} ) var uuidNames = scalar.RawBytesMap{ {Bytes: uuidIsmlManifestBytes[:], Scalar: scalar.BitBuf{Sym: "isml_manifest"}}, {Bytes: uuidXmpBytes[:], Scalar: scalar.BitBuf{Sym: "xmp"}}, {Bytes: uuidSphericalBytes[:], Scalar: scalar.BitBuf{Sym: "spherical"}}, {Bytes: uuidPspUsmtBytes[:], Scalar: scalar.BitBuf{Sym: "psp_usmt"}}, {Bytes: uuidTfxdBytes[:], Scalar: scalar.BitBuf{Sym: "tfxd"}}, {Bytes: uuidTfrfBytes[:], Scalar: scalar.BitBuf{Sym: "tfrf"}}, {Bytes: uuidProfBytes[:], Scalar: scalar.BitBuf{Sym: "prof"}}, {Bytes: uuidIpodBytes[:], Scalar: scalar.BitBuf{Sym: "ipod"}}, } // ISO 639-2/T language code 3 * 5bit packed uint + 1 zero bit func decodeLang(d *decode.D) string { d.U1() return string([]byte{ byte(d.U5()) + 0x60, byte(d.U5()) + 0x60, byte(d.U5()) + 0x60}, ) } // Quicktime time seconds in January 1, 1904 UTC var quicktimeEpochDate = time.Date(1904, time.January, 4, 0, 0, 0, 0, time.UTC) var uintActualQuicktimeEpoch = scalar.UintActualDate(quicktimeEpochDate, time.RFC3339) func decodeMvhdFieldMatrix(d *decode.D, name string) { d.FieldStruct(name, func(d *decode.D) { d.FieldFP32("a") d.FieldFP32("b") d.FieldFP("u", 32, 30) d.FieldFP32("c") d.FieldFP32("d") d.FieldFP("v", 32, 30) d.FieldFP32("x") d.FieldFP32("y") d.FieldFP("w", 32, 30) }) } // ISO 14496-12 8.40.2.3 Sample dependency box semantics func decodeSampleFlags(d *decode.D) { d.FieldU4("reserved0") d.FieldU2("is_leading") d.FieldU2("sample_depends_on", scalar.UintMap{ 0: scalar.Uint{Sym: "unknown"}, 1: scalar.Uint{Sym: "other", Description: "Not I-picture"}, 2: scalar.Uint{Sym: "none", Description: "Is I-picture"}, }) d.FieldU2("sample_is_depended_on", scalar.UintMap{ 0: scalar.Uint{Sym: "unknown"}, 1: scalar.Uint{Sym: "other", Description: "Not disposable"}, 2: scalar.Uint{Sym: "none", Description: "Is disposable"}, }) d.FieldU2("sample_has_redundancy", scalar.UintMap{ 0: scalar.Uint{Sym: "unknown"}, 2: scalar.Uint{Sym: "none", Description: "No redundant coding"}, }) d.FieldU3("sample_padding_value") d.FieldU1("sample_is_non_sync_sample") d.FieldU16("sample_degradation_priority") } func decodeBoxWithParentData(ctx *decodeContext, d *decode.D, parentData any, extraTypeMappers ...scalar.StrMapper) { var typ string var dataSize uint64 typeMappers := []scalar.StrMapper{boxDescriptions} if len(extraTypeMappers) > 0 { typeMappers = append(typeMappers, extraTypeMappers...) } boxSize := d.FieldU32("size", boxSizeNames) typ = d.FieldStr("type", 4, charmap.ISO8859_1, typeMappers...) switch boxSize { case boxSizeRestOfFile: dataSize = uint64(d.Len()-d.Pos()) / 8 case boxSizeUse64bitSize: boxSize = d.FieldU64("size64") dataSize = boxSize - 16 default: dataSize = boxSize - 8 } if ctx.opts.AllowTruncated && dataSize > uint64(d.BitsLeft()/8) { dataSize = uint64(d.BitsLeft() / 8) } if parentData != nil { ctx.path[len(ctx.path)-1].data = parentData } ctx.path = append(ctx.path, pathEntry{typ: typ, data: parentData}) d.FramedFn(int64(dataSize*8), func(d *decode.D) { decodeBox(ctx, d, typ) }) ctx.path = ctx.path[0 : len(ctx.path)-1] } func decodeBoxes(ctx *decodeContext, d *decode.D, extraTypeMappers ...scalar.StrMapper) { decodeBoxesWithParentData(ctx, d, nil, extraTypeMappers...) } func decodeBoxesWithParentData(ctx *decodeContext, d *decode.D, parentData any, extraTypeMappers ...scalar.StrMapper) { d.FieldStructArrayLoop("boxes", "box", func() bool { return d.BitsLeft() >= 8*8 }, func(d *decode.D) { decodeBoxWithParentData(ctx, d, parentData, extraTypeMappers...) }) if d.BitsLeft() > 0 { // "Some sample descriptions terminate with four zero bytes that are not otherwise indicated." if d.BitsLeft() >= 32 && d.PeekUintBits(32) == 0 { d.FieldU32("zero_terminator") } if d.BitsLeft() > 0 { d.FieldRawLen("padding", d.BitsLeft()) } } } type irefBox struct { version int } type trakBox struct { trackID int } type moofBox struct { offset int64 } type trafBox struct { trackID int baseDataOffset int64 moof *moof } type metaBox struct { subType string keys *keysBox } type keysBoxKey struct { namespace string name string } type keysBox struct { keys []keysBoxKey } func decodeBoxIrefEntry(ctx *decodeContext, d *decode.D) { irefBox, ok := ctx.parent().data.(*irefBox) if !ok { d.FieldRawLen("data", d.BitsLeft()) return } idSize := 16 if irefBox.version != 0 { idSize = 32 } d.FieldU("from_id", idSize) count := d.FieldU16("count") d.FieldArray("ids", func(d *decode.D) { for i := uint64(0); i < count; i++ { d.FieldU("id", idSize) } }) } func decodeBoxFtyp(d *decode.D) { brand := d.FieldUTF8("major_brand", 4) d.FieldU32("minor_version", scalar.UintFn(func(s scalar.Uint) (scalar.Uint, error) { switch brand { case "qt ": // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap1/qtff1.html#//apple_ref/doc/uid/TP40000939-CH203-BBCGDDDF // "For QuickTime movie files, this takes the form of four binary-coded decimal values, indicating the century, // year, and month of the QuickTime File Format Specification, followed by a binary coded decimal zero. For example, // for the June 2004 minor version, this field is set to the BCD values 20 04 06 00." s.Description = fmt.Sprintf("%.4d.%.2d", (s.Actual>>24)&0xff_ff, (s.Actual>>8)&0xff) } return s, nil })) numBrands := d.BitsLeft() / 8 / 4 var i int64 d.FieldArrayLoop("brands", func() bool { return i < numBrands }, func(d *decode.D) { d.FieldUTF8("brand", 4, brandDescriptions, scalar.ActualTrimSpace) i++ }) } func decodeBox(ctx *decodeContext, d *decode.D, typ string) { switch typ { case "ftyp": decodeBoxFtyp(d) case "styp": decodeBoxFtyp(d) case "mvhd": version := d.FieldU8("version") d.FieldU24("flags") switch version { case 0: d.FieldU32("creation_time", uintActualQuicktimeEpoch) d.FieldU32("modification_time", uintActualQuicktimeEpoch) d.FieldU32("time_scale") d.FieldU32("duration") case 1: d.FieldU64("creation_time", uintActualQuicktimeEpoch) d.FieldU64("modification_time", uintActualQuicktimeEpoch) d.FieldU32("time_scale") d.FieldU64("duration") default: return } d.FieldFP32("preferred_rate") d.FieldFP16("preferred_volume") d.FieldUTF8("reserved", 10) decodeMvhdFieldMatrix(d, "matrix_structure") d.FieldU32("preview_time") d.FieldU32("preview_duration") d.FieldU32("poster_time") d.FieldU32("selection_time") d.FieldU32("selection_duration") d.FieldU32("current_time") d.FieldU32("next_track_id") case "trak": decodeBoxesWithParentData(ctx, d, &trakBox{}) case "edts": decodeBoxes(ctx, d) case "elst": version := d.FieldU8("version") d.FieldU24("flags") entryCount := d.FieldU32("entry_count") var i uint64 d.FieldStructArrayLoop("entries", "entry", func() bool { return i < entryCount }, func(d *decode.D) { switch version { case 0: d.FieldS32("segment_duration") d.FieldS32("media_time", mediaTimeNames) case 1: d.FieldS64("segment_duration") d.FieldS64("media_time", mediaTimeNames) default: return } d.FieldFP32("media_rate") i++ }) case "tref": decodeBoxes(ctx, d) case "tkhd": var trackID int version := d.FieldU8("version") d.FieldStruct("flags", func(d *decode.D) { d.FieldU20("unused") d.FieldBool("size_is_aspect_ratio") d.FieldBool("in_preview") d.FieldBool("in_movie") d.FieldBool("enabled") }) switch version { case 0: d.FieldU32("creation_time", uintActualQuicktimeEpoch) d.FieldU32("modification_time", uintActualQuicktimeEpoch) trackID = int(d.FieldU32("track_id")) d.FieldU32("reserved1") d.FieldU32("duration") case 1: d.FieldU64("creation_time", uintActualQuicktimeEpoch) d.FieldU64("modification_time", uintActualQuicktimeEpoch) trackID = int(d.FieldU32("track_id")) d.FieldU32("reserved1") d.FieldU64("duration") default: return } d.FieldRawLen("reserved2", 8*8) d.FieldU16("layer") d.FieldU16("alternate_group") d.FieldFP16("volume") d.FieldU16("reserved3") decodeMvhdFieldMatrix(d, "matrix_structure") d.FieldFP32("track_width") d.FieldFP32("track_height") if t := ctx.currentTrakBox(); t != nil { t.trackID = trackID _ = ctx.currentTrack() } case "mdia": decodeBoxes(ctx, d) case "mdhd": version := d.FieldU8("version") d.FieldU24("flags") // TODO: timestamps switch version { case 0: d.FieldU32("creation_time", uintActualQuicktimeEpoch) d.FieldU32("modification_time", uintActualQuicktimeEpoch) d.FieldU32("time_scale") d.FieldU32("duration") case 1: d.FieldU64("creation_time", uintActualQuicktimeEpoch) d.FieldU64("modification_time", uintActualQuicktimeEpoch) d.FieldU32("time_scale") d.FieldU64("duration") default: return } d.FieldStrFn("language", decodeLang) d.FieldU16("quality") case "vmhd": d.FieldU8("version") d.FieldU24("flags") d.FieldU16("graphicsmode") d.FieldArray("opcolor", func(d *decode.D) { // TODO: is FP16? d.FieldU16("value") d.FieldU16("value") d.FieldU16("value") }) case "hdlr": d.FieldU8("version") d.FieldU24("flags") d.FieldUTF8NullFixedLen("component_type", 4) subType := d.FieldUTF8("component_subtype", 4, subTypeNames, scalar.ActualTrimSpace) d.FieldUTF8NullFixedLen("component_manufacturer", 4) d.FieldU32("component_flags") d.FieldU32("component_flags_mask") // TODO: sometimes has a length prefix byte, how to know? d.FieldUTF8NullFixedLen("component_name", int(d.BitsLeft()/8)) if t := ctx.currentTrack(); t != nil { t.seenHdlr = true // component_type seems to be all zero sometimes so can't look for "mhlr" switch subType { case "vide", "soun": t.subType = subType } } else if m := ctx.currentMetaBox(); m != nil { m.subType = subType } case "minf": decodeBoxes(ctx, d) case "dinf": decodeBoxes(ctx, d) case "dref": d.FieldU8("version") d.FieldU24("flags") entryCount := d.FieldU32("entry_count") var i uint64 d.FieldStructArrayLoop("boxes", "box", func() bool { return i < entryCount }, func(d *decode.D) { size := d.FieldU32("size") d.FieldUTF8("type", 4) d.FieldU8("version") d.FieldU24("flags") dataSize := size - 12 d.FieldRawLen("data", int64(dataSize*8)) i++ }) case "stbl": decodeBoxes(ctx, d) case "stsd": d.FieldU8("version") d.FieldU24("flags") entryCount := d.FieldU32("entry_count") var i uint64 // note called "boxes" here instead of "sample_descriptions" and data format is named "type". // this is to make it easier to threat them as normal boxes d.FieldArrayLoop("boxes", func() bool { return i < entryCount }, func(d *decode.D) { d.FieldStruct("box", func(d *decode.D) { size := d.FieldU32("size") dataFormat := d.FieldUTF8("type", 4, dataFormatNames) subType := "" if t := ctx.currentTrack(); t != nil { t.sampleDescriptions = append(t.sampleDescriptions, sampleDescription{ dataFormat: dataFormat, }) if t.seenHdlr { subType = t.subType } else { // TODO: seems to be ffmpeg mov.c, where is this documented in specs? // no hdlr box found, guess using dataFormat // ex PNG samples but there is no hdlr box saying it's video, but the esds says MPEGObjectTypePNG switch dataFormat { case "mp4v": subType = "vide" case "mp4a": subType = "soun" } } } d.FramedFn(int64(size-8)*8, func(d *decode.D) { d.FieldRawLen("reserved", 6*8) d.FieldU16("data_reference_index") switch subType { case "soun", "vide": version := d.FieldU16("version") d.FieldU16("revision_level") d.FieldU32("max_packet_size") // TODO: vendor for some subtype? switch subType { case "soun": // AudioSampleEntry // https://developer.apple.com/library/archive/documentation/QuickTime/QTFF/QTFFChap3/qtff3.html#//apple_ref/doc/uid/TP40000939-CH205-SW1 switch version { case 0: d.FieldU16("num_audio_channels") d.FieldU16("sample_size") d.FieldU16("compression_id") d.FieldU16("packet_size") d.FieldFP32("sample_rate") if d.BitsLeft() > 0 { decodeBoxes(ctx, d) } case 1: d.FieldU16("num_audio_channels") d.FieldU16("sample_size") d.FieldU16("compression_id") d.FieldU16("packet_size") d.FieldFP32("sample_rate") d.FieldU32("samples_per_packet") d.FieldU32("bytes_per_packet") d.FieldU32("bytes_per_frame") d.FieldU32("bytes_per_sample") if d.BitsLeft() > 0 { decodeBoxes(ctx, d) } case 2: d.FieldU16("always_3") d.FieldU16("always_16") d.FieldU16("always_minus_2") // TODO: as in const -2? d.FieldU16("always_0") d.FieldU32("always_65536") d.FieldU32("size_of_struct_only") d.FieldF64("audio_sample_rate") d.FieldU32("num_audio_channels") d.FieldU32("always_7f000000") d.FieldU32("const_bits_per_channel") d.FieldU32("format_specific_flags") d.FieldU32("const_bytes_per_audio_packet") d.FieldU32("const_lpcm_frames_per_audio_packet") if d.BitsLeft() > 0 { decodeBoxes(ctx, d) } default: d.FieldRawLen("data", d.BitsLeft()) } case "vide": // VideoSampleEntry // TODO: version 0 and 1 same? switch version { case 0, 1: d.FieldU32("temporal_quality") d.FieldU32("spatial_quality") d.FieldU16("width") d.FieldU16("height") d.FieldFP32("horizontal_resolution") d.FieldFP32("vertical_resolution") d.FieldU32("data_size") d.FieldU16("frame_count") d.FieldUTF8ShortStringFixedLen("compressor_name", 32) d.FieldU16("depth") d.FieldS16("color_table_id") // TODO: if 0 decode ctab if d.BitsLeft() > 0 { decodeBoxes(ctx, d) } default: d.FieldRawLen("data", d.BitsLeft()) } // case "hint": TODO: Hint entry default: d.FieldRawLen("data", d.BitsLeft()) } default: d.FieldRawLen("data", d.BitsLeft()) } }) }) i++ }) case "avcC": _, v := d.FieldFormat("descriptor", avcDCRFormat, nil) avcDcrOut, ok := v.(format.AvcDcrOut) if !ok { panic(fmt.Sprintf("expected AvcDcrOut got %#+v", v)) } if t := ctx.currentTrack(); t != nil { t.formatInArg = format.AvcAuIn{LengthSize: avcDcrOut.LengthSize} //nolint:gosimple } case "hvcC": _, v := d.FieldFormat("descriptor", hevcCDCRFormat, nil) hevcDcrOut, ok := v.(format.HevcDcrOut) if !ok { panic(fmt.Sprintf("expected HevcDcrOut got %#+v", v)) } if t := ctx.currentTrack(); t != nil { t.formatInArg = format.HevcAuIn{LengthSize: hevcDcrOut.LengthSize} //nolint:gosimple } case "dfLa": d.FieldU8("version") d.FieldU24("flags") _, v := d.FieldFormat("descriptor", flacMetadatablocksFormat, nil) flacMetadatablockOut, ok := v.(format.FlacMetadatablocksOut) if !ok { panic(fmt.Sprintf("expected FlacMetadatablockOut got %#+v", v)) } if flacMetadatablockOut.HasStreamInfo { if t := ctx.currentTrack(); t != nil { t.formatInArg = format.FlacFrameIn{BitsPerSample: int(flacMetadatablockOut.StreamInfo.BitsPerSample)} } } case "dOps": d.FieldFormat("descriptor", opusPacketFrameFormat, nil) case "av1C": d.FieldFormat("descriptor", av1CCRFormat, nil) case "vpcC": d.FieldU8("version") d.FieldU24("flags") d.FieldFormat("descriptor", vpxCCRFormat, nil) case "esds": d.FieldU32("version") _, v := d.FieldFormat("descriptor", mpegESFormat, nil) mpegEsOut, ok := v.(format.MpegEsOut) if !ok { panic(fmt.Sprintf("expected mpegEsOut got %#+v", v)) } if t := ctx.currentTrack(); t != nil && len(mpegEsOut.DecoderConfigs) > 0 { dc := mpegEsOut.DecoderConfigs[0] t.objectType = dc.ObjectType t.formatInArg = format.AACFrameIn{ObjectType: dc.ASCObjectType} } case "stts": d.FieldU8("version") d.FieldU24("flags") numEntries := d.FieldU32("entry_count") var i uint64 d.FieldStructArrayLoop("entries", "entry", func() bool { return i < numEntries }, func(d *decode.D) { d.FieldU32("count") d.FieldU32("delta") i++ }) case "stsc": d.FieldU8("version") d.FieldU24("flags") entryCount := d.FieldU32("entry_count") var i uint64 d.FieldStructArrayLoop("entries", "entry", func() bool { return i < entryCount }, func(d *decode.D) { firstChunk := uint32(d.FieldU32("first_chunk")) samplesPerChunk := uint32(d.FieldU32("samples_per_chunk")) d.FieldU32("sample_description_id") if t := ctx.currentTrack(); t != nil { t.stsc = append(t.stsc, stsc{ firstChunk: int(firstChunk), samplesPerChunk: int(samplesPerChunk), }) } i++ }) case "stsz": d.FieldU8("version") d.FieldU24("flags") // TODO: bytes_per_sample from audio stsd? sampleSize := d.FieldU32("sample_size") entryCount := d.FieldU32("entry_count") t := ctx.currentTrack() if t != nil && len(t.stsz) > 0 { d.Errorf("multiple stsz or stz2 boxes") } if sampleSize == 0 { var i uint64 d.FieldArrayLoop("entries", func() bool { return i < entryCount }, func(d *decode.D) { size := uint32(d.FieldU32("size")) if t != nil { t.stsz = append(t.stsz, stsz{ size: int64(size), count: 1, }) } i++ }) } else { if t != nil { t.stsz = append(t.stsz, stsz{ size: int64(sampleSize), count: int(entryCount), }) } } case "stz2": d.FieldU8("version") d.FieldU24("flags") fieldSize := d.FieldU32("field_size") if fieldSize > 16 { d.Errorf("field_size %d > 16", fieldSize) } entryCount := d.FieldU32("entry_count") var i uint64 t := ctx.currentTrack() if t != nil && len(t.stsz) > 0 { d.Errorf("multiple stsz or stz2 boxes") } d.FieldArrayLoop("entries", func() bool { return i < entryCount }, func(d *decode.D) { size := uint32(d.FieldU("size", int(fieldSize))) if t != nil { t.stsz = append(t.stsz, stsz{ size: int64(size), count: 1, }) } i++ }) case "stco": d.FieldU8("version") d.FieldU24("flags") entryCount := d.FieldU32("entry_count") var i uint64 t := ctx.currentTrack() d.FieldArrayLoop("entries", func() bool { return i < entryCount }, func(d *decode.D) { chunkOffset := d.FieldU32("chunk_offset") if t != nil { t.stco = append(t.stco, int64(chunkOffset)) } i++ }) case "stss": d.FieldU8("version") d.FieldU24("flags") entryCount := d.FieldU32("entry_count") d.FieldArray("entries", func(d *decode.D) { for i := uint64(0); i < entryCount; i++ { d.FieldU32("sample_number") } }) case "sdtp": d.FieldU8("version") d.FieldU24("flags") // TODO: should be count from stsz // TODO: can we know count here or do we need to defer decoding somehow? d.FieldArray("entries", func(d *decode.D) { for d.NotEnd() { d.FieldStruct("entry", func(d *decode.D) { d.FieldU2("reserved") values := scalar.UintMapSymStr{ 0: "unknown", 1: "yes", 2: "no", } d.FieldU2("sample_depends_on", values) d.FieldU2("sample_is_depended_on", values) d.FieldU2("sample_has_redundancy", values) }) } }) case "ctts": d.FieldU8("version") d.FieldU24("flags") entryCount := d.FieldU32("entry_count") var i uint64 d.FieldStructArrayLoop("entries", "entry", func() bool { return i < entryCount }, func(d *decode.D) { d.FieldS32("sample_count") d.FieldS32("sample_offset") i++ }) // TODO: refactor: merge with stco? case "co64": d.FieldU8("version") d.FieldU24("flags") entryCount := d.FieldU32("entry_count") var i uint64 t := ctx.currentTrack() d.FieldArrayLoop("entries", func() bool { return i < entryCount }, func(d *decode.D) { offset := d.FieldU64("offset") if t != nil { t.stco = append(t.stco, int64(offset)) } i++ }) case "sidx": version := d.FieldU8("version") d.FieldU24("flags") d.FieldU32("reference_id") d.FieldU32("timescale") if version == 0 { d.FieldU32("pts") d.FieldU32("offset") } else { d.FieldU64("pts") d.FieldU64("offset") } d.FieldU16("reserved") numEntries := d.FieldU16("entry_count") var i uint64 d.FieldStructArrayLoop("entries", "entry", func() bool { return i < numEntries }, func(d *decode.D) { d.FieldU1("reference_type") d.FieldU31("size") d.FieldU32("duration") d.FieldU1("starts_with_sap") d.FieldU3("sap_type") d.FieldU28("sap_delta_time") i++ }) case "udta": decodeBoxes(ctx, d) case "meta": // TODO: meta box sometimes has a 4 byte unknown field? (flag/version?) maybeFlags := d.PeekUintBits(32) if maybeFlags == 0 { // TODO: rename? d.FieldU32("maybe_flags") } decodeBoxesWithParentData(ctx, d, &metaBox{}) case "ilst": if mb := ctx.currentMetaBox(); mb != nil && mb.keys != nil && len(mb.keys.keys) > 0 { // meta box had a keys box var b [4]byte typeSymMapper := scalar.StrMapSymStr{} for k, v := range mb.keys.keys { // type will be a uint32 be integer // +1 as they seem to be counted from 1 binary.BigEndian.PutUint32(b[:], uint32(k+1)) typeSymMapper[string(b[:])] = v.namespace + "." + v.name } decodeBoxes(ctx, d, typeSymMapper) } else { decodeBoxes(ctx, d) } case "data": d.FieldU8("version") d.FieldU24("flags") d.FieldU32("reserved") if ctx.isParent("covr") { d.FieldFormatOrRawLen("data", d.BitsLeft(), imageFormat, nil) } else { d.FieldUTF8("data", int(d.BitsLeft()/8)) } case "moov": decodeBoxes(ctx, d) case "moof": offset := (d.Pos() / 8) - 8 decodeBoxesWithParentData(ctx, d, &moofBox{offset: offset}) // Track Fragment case "traf": decodeBoxesWithParentData(ctx, d, &trafBox{}) // Movie Fragment Header case "mfhd": d.FieldU8("version") d.FieldU24("flags") d.FieldU32("sequence_number") // Track Fragment Header case "tfhd": d.FieldU8("version") baseDataOffsetPresent := false sampleDescriptionIndexPresent := false defaultSampleDurationPresent := false defaultSampleSizePresent := false defaultSampleFlagsPresent := false d.FieldStruct("flags", func(d *decode.D) { d.FieldU7("unused0") d.FieldBool("duration_is_empty") d.FieldU10("unused1") defaultSampleFlagsPresent = d.FieldBool("default_sample_flags_present") defaultSampleSizePresent = d.FieldBool("default_sample_size_present") defaultSampleDurationPresent = d.FieldBool("default_sample_duration_present") d.FieldU1("unused2") sampleDescriptionIndexPresent = d.FieldBool("sample_description_index_present") baseDataOffsetPresent = d.FieldBool("base_data_offset_present") }) trackID := int(d.FieldU32("track_id")) m := &moof{} if mb := ctx.currentMoofBox(); mb != nil { m.offset = mb.offset } baseDataOffset := int64(0) if baseDataOffsetPresent { baseDataOffset = int64(d.FieldU64("base_data_offset")) } if sampleDescriptionIndexPresent { m.defaultSampleDescriptionIndex = int(d.FieldU32("sample_description_index")) } if defaultSampleDurationPresent { d.FieldU32("default_sample_duration") } if defaultSampleSizePresent { m.defaultSampleSize = int64(d.FieldU32("default_sample_size")) } if defaultSampleFlagsPresent { d.FieldStruct("default_sample_flags", decodeSampleFlags) } if t := ctx.currentTrafBox(); t != nil { t.trackID = trackID t.moof = m t.baseDataOffset = baseDataOffset } if t := ctx.currentTrack(); t != nil { t.moofs = append(t.moofs, m) } // Track Fragment Run case "trun": m := &moof{} if t := ctx.currentTrafBox(); t != nil { m = t.moof } d.FieldU8("version") sampleCompositionTimeOffsetsPresent := false sampleFlagsPresent := false sampleSizePresent := false sampleDurationPresent := false firstSampleFlagsPresent := false dataOffsetPresent := false d.FieldStruct("flags", func(d *decode.D) { d.FieldU12("unused0") sampleCompositionTimeOffsetsPresent = d.FieldBool("sample_composition_time_offsets_present") sampleFlagsPresent = d.FieldBool("sample_flags_present") sampleSizePresent = d.FieldBool("sample_size_present") sampleDurationPresent = d.FieldBool("sample_duration_present") d.FieldU5("unused1") firstSampleFlagsPresent = d.FieldBool("first_sample_flags_present") d.FieldU1("unused2") dataOffsetPresent = d.FieldBool("data_offset_present") }) sampleCount := d.FieldU32("sample_count") dataOffset := int64(0) if dataOffsetPresent { dataOffset = d.FieldS32("data_offset") } if firstSampleFlagsPresent { d.FieldStruct("first_sample_flags", decodeSampleFlags) } if sampleCount > maxSampleEntryCount { d.Errorf("too many sample trun entries %d > %d", sampleCount, maxSampleEntryCount) } t := trun{ dataOffset: dataOffset, } d.FieldArray("samples", func(d *decode.D) { for i := uint64(0); i < sampleCount; i++ { sampleSize := m.defaultSampleSize d.FieldStruct("sample", func(d *decode.D) { if sampleDurationPresent { d.FieldU32("sample_duration") } if sampleSizePresent { sampleSize = int64(d.FieldU32("sample_size")) } if sampleFlagsPresent { d.FieldStruct("sample_flags", decodeSampleFlags) } if sampleCompositionTimeOffsetsPresent { d.FieldU32("sample_composition_time_offset") } }) t.samplesSizes = append(t.samplesSizes, sampleSize) } }) m.truns = append(m.truns, t) case "tfdt": version := d.FieldU8("version") d.FieldU24("flags") if version == 1 { d.FieldU64("start_time") } else { d.FieldU32("start_time") } case "mvex": decodeBoxes(ctx, d) case "trex": d.FieldU8("version") d.FieldU24("flags") d.FieldU32("track_id") d.FieldU32("default_sample_description_index") d.FieldU32("default_sample_duration") d.FieldU32("default_sample_size") d.FieldStruct("default_sample_flags", decodeSampleFlags) case "mfra": decodeBoxes(ctx, d) case "tfra": version := d.FieldU8("version") d.FieldU24("flags") d.FieldU32("track_id") d.FieldU26("reserved") lengthSizeOfTrafNum := d.FieldU2("length_size_of_traf_num") sampleLengthSizeOfTrunNum := d.FieldU2("sample_length_size_of_trun_num") lengthSizeOfSampleNum := d.FieldU2("length_size_of_sample_num") entryCount := d.FieldU32("entry_count") d.FieldArray("entries", func(d *decode.D) { for i := uint64(0); i < entryCount; i++ { d.FieldStruct("entry", func(d *decode.D) { if version == 1 { d.FieldU64("time") d.FieldU64("moof_offset") } else { d.FieldU32("time") d.FieldU32("moof_offset") } d.FieldU("traf_number", int(lengthSizeOfTrafNum+1)*8) d.FieldU("trun_number", int(sampleLengthSizeOfTrunNum+1)*8) d.FieldU("sample_number", int(lengthSizeOfSampleNum+1)*8) }) } }) case "mfro": d.FieldU8("version") d.FieldU24("flags") d.FieldU32("mfra_size") // TODO: item location // HEIC image case "iloc": version := d.FieldU8("version") d.FieldU24("flags") offsetSize := d.FieldU4("offset_size") lengthSize := d.FieldU4("length_size") baseOffsetSize := d.FieldU4("base_offset_size") var indexSize uint64 switch version { case 1, 2: indexSize = d.FieldU4("index_size") default: d.FieldU4("reserved") } var itemCount uint64 if version < 2 { itemCount = d.FieldU16("item_count") } else { itemCount = d.FieldU32("item_count") } d.FieldArray("items", func(d *decode.D) { for i := uint64(0); i < itemCount; i++ { d.FieldStruct("item", func(d *decode.D) { switch version { case 0, 1: d.FieldU16("id") case 2: d.FieldU32("id") } switch version { case 1, 2: d.FieldU12("reserved") d.FieldU4("construction_method") } d.FieldU16("data_reference_index") d.FieldU("base_offset", int(baseOffsetSize)*8) extentCount := d.FieldU16("extent_count") d.FieldArray("extends", func(d *decode.D) { for i := uint64(0); i < extentCount; i++ { d.FieldStruct("extent", func(d *decode.D) { if (version == 1 || version == 2) && indexSize > 0 { d.FieldU("index", int(offsetSize)*8) } d.FieldU("offset", int(offsetSize)*8) d.FieldU("length", int(lengthSize)*8) }) } }) }) } }) case "infe": d.FieldU8("version") d.FieldU24("flags") d.FieldU16("id") d.FieldU16("protection_index") d.FieldUTF8Null("item_name") // TODO: really optional? seems so if d.NotEnd() { d.FieldUTF8Null("content_type") } if d.NotEnd() { d.FieldUTF8Null("content_encoding") } case "iinf": version := d.FieldU8("version") d.FieldU24("flags") if version == 0 { _ = d.FieldU16("entry_count") decodeBoxes(ctx, d) } else { d.FieldRawLen("data", d.BitsLeft()) } case "iprp": decodeBoxes(ctx, d) case "ipco": decodeBoxes(ctx, d) case "ID32": d.FieldU8("version") d.FieldU24("flags") d.FieldU1("pad") // ISO-639-2/T as 3*5 bit integers - 0x60 d.FieldStrFn("language", func(d *decode.D) string { s := "" for i := 0; i < 3; i++ { s += fmt.Sprintf("%c", int(d.U5())+0x60) } return s }) d.FieldFormat("data", id3v2Format, nil) case "mehd": version := d.FieldU8("version") d.FieldU24("flags") switch version { case 0: d.FieldU32("fragment_duration") case 1: d.FieldU64("fragment_duration") } case "pssh": var ( systemIDCommon = [16]byte{0x10, 0x77, 0xef, 0xec, 0xc0, 0xb2, 0x4d, 0x02, 0xac, 0xe3, 0x3c, 0x1e, 0x52, 0xe2, 0xfb, 0x4b} systemIDWidevine = [16]byte{0xed, 0xef, 0x8b, 0xa9, 0x79, 0xd6, 0x4a, 0xce, 0xa3, 0xc8, 0x27, 0xdc, 0xd5, 0x1d, 0x21, 0xed} systemIDPlayReady = [16]byte{0x9a, 0x04, 0xf0, 0x79, 0x98, 0x40, 0x42, 0x86, 0xab, 0x92, 0xe6, 0x5b, 0xe0, 0x88, 0x5f, 0x95} systemIDFairPlay = [16]byte{0x94, 0xce, 0x86, 0xfb, 0x07, 0xff, 0x4f, 0x43, 0xad, 0xb8, 0x93, 0xd2, 0xfa, 0x96, 0x8c, 0xa2} ) systemIDNames := scalar.RawBytesMap{ {Bytes: systemIDCommon[:], Scalar: scalar.BitBuf{Sym: "common"}}, {Bytes: systemIDWidevine[:], Scalar: scalar.BitBuf{Sym: "widevine"}}, {Bytes: systemIDPlayReady[:], Scalar: scalar.BitBuf{Sym: "playready"}}, {Bytes: systemIDFairPlay[:], Scalar: scalar.BitBuf{Sym: "fairplay"}}, } version := d.FieldU8("version") d.FieldU24("flags") systemIDBR := d.FieldRawLen("system_id", 16*8, systemIDNames) // TODO: make nicer systemID := d.ReadAllBits(systemIDBR) switch version { case 0: case 1: kidCount := d.FieldU32("kid_count") d.FieldArray("kids", func(d *decode.D) { for i := uint64(0); i < kidCount; i++ { d.FieldRawLen("kid", 16*8) } }) } dataLen := d.FieldU32("data_size") switch { case bytes.Equal(systemID, systemIDWidevine[:]): d.FieldFormatLen("data", int64(dataLen)*8, protoBufWidevineFormat, nil) case bytes.Equal(systemID, systemIDPlayReady[:]): d.FieldFormatLen("data", int64(dataLen)*8, psshPlayreadyFormat, nil) case systemID == nil: fallthrough default: d.FieldRawLen("data", int64(dataLen)*8) } case "sinf": decodeBoxes(ctx, d) case "frma": format := d.FieldUTF8("format", 4) // set to original data format // TODO: how to handle multiple descriptors? track current? if t := ctx.currentTrack(); t != nil && len(t.sampleDescriptions) > 0 { t.sampleDescriptions[0].originalFormat = format } case "schm": d.FieldU8("version") d.FieldU24("flags") d.FieldUTF8("encryption_type", 4) d.FieldU16("encryption_version") if d.BitsLeft() > 0 { d.FieldUTF8("uri", int(d.BitsLeft())/8) } case "schi": decodeBoxes(ctx, d) case "btrt": d.FieldU32("decoding_buffer_size") d.FieldU32("max_bitrate") d.FieldU32("avg_bitrate") case "pasp": d.FieldU32("h_spacing") d.FieldU32("v_spacing") case "uuid": d.FieldRawLen("uuid", 16*8, scalar.RawUUID, uuidNames) d.FieldRawLen("data", d.BitsLeft()) case "keys": mb := ctx.currentMetaBox() var kb *keysBox if mb != nil { kb = &keysBox{} mb.keys = kb } d.FieldU8("version") d.FieldU24("flags") entryCount := d.FieldU32("entry_count") d.FieldArray("entries", func(d *decode.D) { for i := uint64(0); i < entryCount; i++ { d.FieldStruct("entry", func(d *decode.D) { keySize := d.FieldU32("key_size") namespace := d.FieldUTF8("key_namespace", 4) name := d.FieldUTF8("key_name", int(keySize)-8) if kb != nil { kb.keys = append(kb.keys, keysBoxKey{ namespace: namespace, name: name, }) } }) } }) case "wave": decodeBoxes(ctx, d) case "saiz": d.FieldU8("version") flags := d.FieldU24("flags") if flags&0b1 != 0 { d.FieldU32("aux_info_type") d.FieldU32("aux_info_type_parameter") } defaultSampleInfoSize := d.FieldU8("default_sample_info_size") sampleCount := d.FieldU32("sample_count") if defaultSampleInfoSize == 0 { d.FieldArray("sample_size_info_table", func(d *decode.D) { for i := uint64(0); i < sampleCount; i++ { d.FieldU8("sample_size") } }) } case "sgpd": version := d.FieldU8("version") d.FieldU24("flags") d.FieldUTF8("grouping_type", 4) var defaultLength uint64 if version == 1 { defaultLength = d.FieldU32("default_length") } if version >= 2 { d.FieldU32("default_sample_description_index") } entryCount := d.FieldU32("entry_count") d.FieldArray("entries", func(d *decode.D) { for i := uint64(0); i < entryCount; i++ { entryLen := defaultLength if version == 1 { if defaultLength == 0 { entryLen = d.FieldU32("description_length") } else if entryLen == 0 { d.Fatalf("sgpd groups entry len <= 0 version 1") } } else if entryLen == 0 { d.Fatalf("sgpd groups entry len <= 0") } d.FieldRawLen("data", int64(entryLen)*8) } }) case "sbgp": version := d.FieldU8("version") d.FieldU24("flags") d.FieldUTF8("grouping_type", 4) if version == 1 { d.FieldU32("grouping_type_parameter") } entryCount := d.FieldU32("entry_count") d.FieldArray("entries", func(d *decode.D) { for i := uint64(0); i < entryCount; i++ { d.FieldStruct("entry", func(d *decode.D) { d.FieldU32("sample_count") d.FieldU32("group_description_index") }) } }) case "saio": version := d.FieldU8("version") flags := d.FieldU24("flags") if flags&0b1 != 0 { d.FieldU32("aux_info_type") d.FieldU32("aux_info_type_parameter") } entryCount := d.FieldU32("entry_count") d.FieldArray("entries", func(d *decode.D) { for i := uint64(0); i < entryCount; i++ { if version == 0 { d.FieldU32("offset") } else { d.FieldU64("offset") } } }) case "senc": d.FieldU8("version") flags := d.FieldU24("flags") t := ctx.currentTrack() if t == nil { // need to know iv size return } m := &moof{} if t := ctx.currentTrafBox(); t != nil { m = t.moof } s := senc{} sampleCount := d.FieldU32("sample_count") d.FieldArray("samples", func(d *decode.D) { for i := uint64(0); i < sampleCount; i++ { d.FieldStruct("entry", func(d *decode.D) { if t.defaultIVSize != 0 { d.FieldRawLen("iv", int64(t.defaultIVSize*8)) } if flags&0b10 != 0 { subSampleCount := d.FieldU16("subsample_count") d.FieldArray("subsamples", func(d *decode.D) { for i := uint64(0); i < subSampleCount; i++ { d.FieldStruct("entry", func(d *decode.D) { d.FieldU16("bytes_of_clean_data") d.FieldU32("bytes_of_encrypted_data") }) } }) } }) // TODO: add iv etc s.entries = append(s.entries, struct{}{}) } }) m.sencs = append(m.sencs, s) case "tenc": version := d.FieldU8("version") d.FieldU24("flags") d.FieldU8("reserved0") switch version { case 0: d.FieldU8("reserved1") default: d.FieldU4("default_crypto_bytes") d.FieldU4("default_skip_bytes") } defaultIsEncrypted := d.FieldU8("default_is_encrypted") defaultIVSize := d.FieldU8("default_iv_size") d.FieldRawLen("default_kid", 8*16) if defaultIsEncrypted != 0 && defaultIVSize == 0 { defaultConstantIVSize := d.FieldU8("default_constant_iv_size") d.FieldRawLen("default_constant_iv", int64(defaultConstantIVSize)*8) } if t := ctx.currentTrack(); t != nil { t.defaultIVSize = int(defaultIVSize) } case "covr": decodeBoxes(ctx, d) case "dec3": d.FieldU13("data_rate") d.FieldU3("num_ind_sub") d.FieldU2("fscod") d.FieldU5("bsid") d.FieldU5("bsmod") d.FieldU3("acmod") d.FieldU1("lfeon") d.FieldU3("reserved0") numDepSub := d.FieldU4("num_dep_sub") if numDepSub > 0 { d.FieldU9("chan_loc") } else { d.FieldU1("reserved1") } if d.BitsLeft() >= 16 { d.FieldU7("reserved2") ec3JocFlag := d.FieldBool("ec3_job_flag") if ec3JocFlag { d.FieldU1("ec3_job_complexity") } } case "dac4": d.FieldU3("ac4_dsi_version") bitstreamVersion := d.FieldU7("bitstream_version") d.FieldU1("fs_index") d.FieldU4("frame_rate_index") d.FieldU9("n_presentation") if bitstreamVersion > 1 { hasProgramID := d.FieldBool("has_program_id") if hasProgramID { d.FieldU16("short_program_id") hasUUID := d.FieldBool("has_uuid") if hasUUID { d.FieldRawLen("uuid", 16*8) } } } // if ac4DsiVersion == 1 { // d.FieldU2("bit_rate_mode") // d.FieldU32("bit_rate") // d.FieldU32("bit_rate_precision") // } // if ac4DsiVersion == 1 { // d.FieldArray("presentations", func(d *decode.D) { // for i := uint64(0); i < nPresentation; i++ { // d.FieldStruct("presentation", func(d *decode.D) { // d.FieldU8("presentation_version") // presBytes := d.FieldUintFn("pres_bytes", func() (uint64, decode.DisplayFormat, string) { // n := d.U8() // if n == 0x0ff { // n += d.U16() // } // return n, decode.NumberDecimal, "" // }) // d.FieldRawLen("data", int64(presBytes)*8) // }) // } // }) // } if d.BitsLeft() > 0 { d.FieldRawLen("data", d.BitsLeft()) } case "tapt": decodeBoxes(ctx, d) case "clef": d.FieldU8("version") d.FieldU24("flags") d.FieldFP32("width") d.FieldFP32("height") case "prof": d.FieldU8("version") d.FieldU24("flags") d.FieldFP32("width") d.FieldFP32("height") case "enof": d.FieldU8("version") d.FieldU24("flags") d.FieldFP32("width") d.FieldFP32("height") case "clap": d.FieldU32("aperture_width_n") d.FieldU32("aperture_width_d") d.FieldU32("aperture_height_n") d.FieldU32("aperture_height_d") d.FieldU32("horiz_off_n") d.FieldU32("horiz_off_d") d.FieldU32("vert_off_n") d.FieldU32("vert_off_d") case "smhd": d.FieldU8("version") d.FieldU24("flags") d.FieldFP16("balance") d.FieldU16("reserved") case "colr": parameterType := d.FieldUTF8("parameter_type", 4) switch parameterType { case "nclx", "nclc": d.FieldU16("primaries_index", format.ISO_23091_2_ColourPrimariesMap) d.FieldU16("transfer_function_index", format.ISO_23091_2_TransferCharacteristicMap) d.FieldU16("matrix_index", format.ISO_23091_2_MatrixCoefficients) switch parameterType { case "nclx": d.FieldU8("color_range") } case "prof": d.FieldFormat("profile", iccProfileFormat, nil) default: d.FieldRawLen("data", d.BitsLeft()) } case "ispe": d.FieldU8("version") d.FieldU24("flags") d.FieldU32("image_width") d.FieldU32("image_height") case "ipma": version := d.FieldU8("version") flags := d.FieldU24("flags") entryCount := d.FieldU32("entry_count") d.FieldArray("entries", func(d *decode.D) { for i := uint64(0); i < entryCount; i++ { d.FieldStruct("entry", func(d *decode.D) { if version < 1 { d.FieldU16("item_id") } else { d.FieldU32("item_id") } associationCount := d.FieldU8("association_count") d.FieldArray("associations", func(d *decode.D) { for j := uint64(0); j < associationCount; j++ { d.FieldStruct("association", func(d *decode.D) { d.FieldBool("essential") if flags&0b1 != 0 { d.FieldU15("property_index") } else { d.FieldU7("item_id") } }) } }) }) } }) case "pitm": version := d.FieldU8("version") d.FieldU24("flags") if version < 1 { d.FieldU16("item_id") } else { d.FieldU32("item_id") } case "iref": version := d.FieldU8("version") d.FieldU24("flags") decodeBoxesWithParentData(ctx, d, &irefBox{version: int(version)}) case "dimg": decodeBoxIrefEntry(ctx, d) case "thmb": decodeBoxIrefEntry(ctx, d) case "cdsc": decodeBoxIrefEntry(ctx, d) case "irot": d.FieldU8("rotation", scalar.UintMapSymUint{ 0: 0, 1: 90, 2: 180, 3: 270, }) case "hnti": decodeBoxes(ctx, d) case "hint": decodeBoxes(ctx, d) case "pdin": d.FieldU8("version") d.FieldU24("flags") d.FieldArray("entries", func(d *decode.D) { for !d.End() { d.FieldStruct("entry", func(d *decode.D) { d.FieldU32("rate") d.FieldU32("initial_delay") }) } }) default: // there are at least 4 ways to encode udta metadata in mov/mp4 files. // // mdta subtype: // // udta: // meta // hdlr with subtype "mdta" // keys with 1-based to key namespace.name table // ilst // -box (box type is 32bit BE 1-based number into table above) // data box with value // // mdir subtype: // // udta // meta // hdlr with subtype "mdir" // ilst // © or similar // data with value // // no-meta-box with length and language: // // udta // © or similar // value length and language // // no-meta-box value rest of box: // // udta // // value rest of box if mb := ctx.currentMetaBox(); mb != nil && ctx.parent().typ == "ilst" { // unknown type under a meta box with ilst as parent, decode as boxes // is probably one or more data boxes decodeBoxes(ctx, d) } else if ctx.parent().typ == "udta" { // TODO: better probe? ffmpeg uses box name heuristics? // if 16 length field seems to match assume box with length, language and value // otherwise decode as box with value rest of box probeLength := d.PeekUintBits(16) // +2 for length field, +2 for language field if (probeLength+2+2)*8 == uint64(d.BitsLeft()) { length := d.FieldU16("length") d.FieldStrFn("language", decodeLang) d.FieldUTF8("value", int(length)) } else { d.FieldUTF8("value", int(d.BitsLeft()/8)) } } else { d.FieldRawLen("data", d.BitsLeft()) } } }