package flac import ( "encoding/binary" "math/bits" "github.com/wader/fq/format" "github.com/wader/fq/format/registry" "github.com/wader/fq/internal/mathextra" "github.com/wader/fq/pkg/checksum" "github.com/wader/fq/pkg/decode" "github.com/wader/fq/pkg/scalar" ) func init() { registry.MustRegister(decode.Format{ Name: format.FLAC_FRAME, Description: "FLAC frame", DecodeFn: frameDecode, }) } const ( SampleRateStreaminfo = 0b0000 ) const ( SampleSizeStreaminfo = 0b000 ) const ( BlockingStrategyFixed = 0 BlockingStrategyVariable = 1 ) var BlockingStrategyNames = scalar.UToSymStr{ BlockingStrategyFixed: "fixed", BlockingStrategyVariable: "variable", } const ( BlockSizeEndOfHeader8 = 0b0110 BlockSizeEndOfHeader16 = 0b0111 ) const ( SampeleRateEndOfHeader8 = 0b1100 SampeleRateEndOfHeader16 = 0b1101 SampeleRateEndOfHeader160 = 0b1110 ) const ( SubframeConstant = "constant" SubframeVerbatim = "verbatim" SubframeFixed = "fixed" SubframeLPC = "lpc" SubframeReserved = "reserved" ) const ( ChannelLeftSide = 0b1000 ChannelSideRight = 0b1001 ChannelMidSide = 0b1010 ) const ( ResidualCodingMethodRice = 0b00 ResidualCodingMethodRice2 = 0b01 ) var ResidualCodingMethodMap = scalar.UToScalar{ ResidualCodingMethodRice: scalar.S{Sym: uint64(4), Description: "rice"}, ResidualCodingMethodRice2: scalar.S{Sym: uint64(5), Description: "rice2"}, } // TODO: generic enough? func utf8Uint(d *decode.D) uint64 { n := d.U8() // leading ones, bit negate and count zeroes c := bits.LeadingZeros8(^uint8(n)) // 0b0xxxxxxx 1 byte // 0b110xxxxx 2 byte // 0b1110xxxx 3 byte // 0b11110xxx 4 byte switch c { case 0: // nop case 2, 3, 4: n = n & ((1 << (8 - c - 1)) - 1) for i := 1; i < c; i++ { n = n<<6 | d.U8()&0x3f } default: d.Errorf("invalid UTF8Uint") } return n } // in argument is an optional FlacFrameIn struct with stream info func frameDecode(d *decode.D, in interface{}) interface{} { var inStreamInfo *format.FlacStreamInfo ffi, ok := in.(format.FlacFrameIn) if ok { inStreamInfo = &ffi.StreamInfo } frameStart := d.Pos() blockSize := 0 channelAssignment := uint64(0) channels := 0 sampleSize := 0 sideChannelIndex := -1 d.FieldStruct("header", func(d *decode.D) { // <14> 11111111111110 d.FieldU14("sync", d.AssertU(0b11111111111110), scalar.Bin) // <1> Reserved // 0 : mandatory value // 1 : reserved for future use d.FieldU1("reserved0", d.AssertU(0)) // <1> Blocking strategy: // 0 : fixed-blocksize stream; frame header encodes the frame number // 1 : variable-blocksize stream; frame header encodes the sample number blockingStrategy := d.FieldU1("blocking_strategy", BlockingStrategyNames) // <4> Block size in inter-channel samples: // 0000 : reserved // 0001 : 192 samples // 0010-0101 : 576 * (2^(n-2)) samples, i.e. 576/1152/2304/4608 // 0110 : get 8 bit (blocksize-1) from end of header // 0111 : get 16 bit (blocksize-1) from end of header // 1000-1111 : 256 * (2^(n-8)) samples, i.e. 256/512/1024/2048/4096/8192/16384/32768 var blockSizeMap = scalar.UToScalar{ 0b0000: {Description: "reserved"}, 0b0001: {Sym: uint64(192)}, 0b0010: {Sym: uint64(576)}, 0b0011: {Sym: uint64(1152)}, 0b0100: {Sym: uint64(2304)}, 0b0101: {Sym: uint64(4608)}, 0b0110: {Description: "end of header (8 bit)"}, 0b0111: {Description: "end of header (16 bit)"}, 0b1000: {Sym: uint64(256)}, 0b1001: {Sym: uint64(512)}, 0b1010: {Sym: uint64(1024)}, 0b1011: {Sym: uint64(2048)}, 0b1100: {Sym: uint64(4096)}, 0b1101: {Sym: uint64(8192)}, 0b1110: {Sym: uint64(16384)}, 0b1111: {Sym: uint64(32768)}, } blockSizeS := d.FieldScalarU4("block_size", blockSizeMap, scalar.Bin) if blockSizeS.Sym != nil { blockSize = int(blockSizeS.SymU()) } // <4> Sample rate: // 0000 : get from STREAMINFO metadata block // 0001 : 88.2kHz // 0010 : 176.4kHz // 0011 : 192kHz // 0100 : 8kHz // 0101 : 16kHz // 0110 : 22.05kHz // 0111 : 24kHz // 1000 : 32kHz // 1001 : 44.1kHz // 1010 : 48kHz // 1011 : 96kHz // 1100 : get 8 bit sample rate (in kHz) from end of header // 1101 : get 16 bit sample rate (in Hz) from end of header // 1110 : get 16 bit sample rate (in tens of Hz) from end of header // 1111 : invalid, to prevent sync-fooling string of 1s var sampleRateMap = scalar.UToScalar{ 0b0000: {Description: "from streaminfo"}, 0b0001: {Sym: uint64(88200)}, 0b0010: {Sym: uint64(176400)}, 0b0011: {Sym: uint64(192000)}, 0b0100: {Sym: uint64(8000)}, 0b0101: {Sym: uint64(16000)}, 0b0110: {Sym: uint64(22050)}, 0b0111: {Sym: uint64(24000)}, 0b1000: {Sym: uint64(32000)}, 0b1001: {Sym: uint64(44100)}, 0b1010: {Sym: uint64(48000)}, 0b1011: {Sym: uint64(96000)}, 0b1100: {Description: "end of header (8 bit*1000)"}, 0b1101: {Description: "end of header (16 bit)"}, 0b1110: {Description: "end of header (16 bit*10)"}, 0b1111: {Description: "invalid"}, } sampleRateS := d.FieldScalarU4("sample_rate", sampleRateMap, scalar.Bin) switch sampleRateS.ActualU() { case SampleRateStreaminfo: if inStreamInfo == nil { d.Fatalf("streaminfo required for sample rate") } } // <4> Channel assignment // 0000-0111 : (number of independent channels)-1. Where defined, the channel order follows SMPTE/ITU-R recommendations. The assignments are as follows: // 1 channel: mono // 2 channels: left, right // 3 channels: left, right, center // 4 channels: front left, front right, back left, back right // 5 channels: front left, front right, front center, back/surround left, back/surround right // 6 channels: front left, front right, front center, LFE, back/surround left, back/surround right // 7 channels: front left, front right, front center, LFE, back center, side left, side right // 8 channels: front left, front right, front center, LFE, back left, back right, side left, side right // 1000 : left/side stereo: channel 0 is the left channel, channel 1 is the side(difference) channel // 1001 : right/side stereo: channel 0 is the side(difference) channel, channel 1 is the right channel // 1010 : mid/side stereo: channel 0 is the mid(average) channel, channel 1 is the side(difference) channel // 1011-1111 : reserved // TODO: extract to tables and cleanup var channelAssignmentMap = scalar.UToScalar{ 0: {Sym: uint64(1), Description: "mono"}, 1: {Sym: uint64(2), Description: "lr"}, 2: {Sym: uint64(3), Description: "lrc"}, 3: {Sym: uint64(4), Description: "fl,fr,bl,br"}, 4: {Sym: uint64(5), Description: "fl,fr,fc,back/surround left,back/surround right"}, 5: {Sym: uint64(6), Description: "fl,fr,fc,lfe,back/surround left,back/surround right"}, 6: {Sym: uint64(7), Description: "fl,fr,fc,lfe,back center,sl,sr"}, 7: {Sym: uint64(8), Description: "fl,fr,fc,lfe,back left,br,sl,sr"}, 0b1000: {Sym: uint64(2), Description: "left/side stereo"}, 0b1001: {Sym: uint64(2), Description: "right/side stereo"}, 0b1010: {Sym: uint64(2), Description: "mid/side stereo"}, 0b1011: {Sym: nil, Description: "reserved"}, 0b1100: {Sym: nil, Description: "reserved"}, 0b1101: {Sym: nil, Description: "reserved"}, 0b1111: {Sym: nil, Description: "reserved"}, } channelAssignmentS := d.FieldScalarU4("channel_assignment", channelAssignmentMap) if channelAssignmentS.Sym == nil { d.Fatalf("unknown number of channels") } channelAssignment = channelAssignmentS.ActualU() channels = int(channelAssignmentS.SymU()) switch channelAssignmentS.ActualU() { case ChannelLeftSide: sideChannelIndex = 1 case ChannelSideRight: sideChannelIndex = 0 case ChannelMidSide: sideChannelIndex = 1 } if sideChannelIndex != -1 { d.FieldValueU("side_channel_index", uint64(sideChannelIndex)) } // <3> Sample size in bits: // 000 : get from STREAMINFO metadata block // 001 : 8 bits per sample // 010 : 12 bits per sample // 011 : reserved // 100 : 16 bits per sample // 101 : 20 bits per sample // 110 : 24 bits per sample // 111 : reserved var sampleSizeMap = scalar.UToScalar{ 0b000: {Description: "from streaminfo"}, 0b001: {Sym: uint64(8)}, 0b010: {Sym: uint64(12)}, 0b011: {Description: "reserved"}, 0b100: {Sym: uint64(16)}, 0b101: {Sym: uint64(20)}, 0b110: {Sym: uint64(24)}, 0b111: {Description: "reserved"}, } sampleSizeS := d.FieldScalarU3("sample_size", sampleSizeMap, scalar.Bin) switch sampleSizeS.ActualU() { case SampleSizeStreaminfo: if inStreamInfo == nil { d.Fatalf("streaminfo required for sample size") } sampleSize = int(inStreamInfo.BitPerSample) default: if sampleSizeS.Sym != nil { sampleSize = int(sampleSizeS.SymU()) } } // <1> Reserved: // 0 : mandatory value // 1 : reserved for future use d.FieldU1("reserved1", d.AssertU(0)) d.FieldStruct("end_of_header", func(d *decode.D) { // if(variable blocksize) // <8-56>:"UTF-8" coded sample number (decoded number is 36 bits) [4] // else // <8-48>:"UTF-8" coded frame number (decoded number is 31 bits) [4] // 0 : fixed-blocksize stream; frame header encodes the frame number // 1 : variable-blocksize stream; frame header encodes the sample number switch blockingStrategy { case BlockingStrategyVariable: d.FieldUFn("sample_number", utf8Uint) case BlockingStrategyFixed: d.FieldUFn("frame_number", utf8Uint) } // if(blocksize bits == 011x) // 8/16 bit (blocksize-1) // 0110 : get 8 bit (blocksize-1) from end of header // 0111 : get 16 bit (blocksize-1) from end of header switch blockSizeS.ActualU() { case BlockSizeEndOfHeader8: blockSize = int(d.FieldU8("block_size", scalar.UAdd(1))) case BlockSizeEndOfHeader16: blockSize = int(d.FieldU16("block_size", scalar.UAdd(1))) } // if(sample rate bits == 11xx) // 8/16 bit sample rate // 1100 : get 8 bit sample rate (in kHz) from end of header // 1101 : get 16 bit sample rate (in Hz) from end of header // 1110 : get 16 bit sample rate (in tens of Hz) from end of header switch sampleRateS.ActualU() { case SampeleRateEndOfHeader8: d.FieldUFn("sample_rate", func(d *decode.D) uint64 { return d.U8() * 1000 }) case SampeleRateEndOfHeader16: d.FieldU16("sample_rate") case SampeleRateEndOfHeader160: d.FieldUFn("sample_rate", func(d *decode.D) uint64 { return d.U16() * 10 }) } }) headerCRC := &checksum.CRC{Bits: 8, Table: checksum.ATM8Table} d.MustCopyBits(headerCRC, d.BitBufRange(frameStart, d.Pos()-frameStart)) d.FieldU8("crc", d.ValidateUBytes(headerCRC.Sum(nil)), scalar.Hex) }) var channelSamples [][]int64 d.FieldArray("subframes", func(d *decode.D) { for channelIndex := 0; channelIndex < channels; channelIndex++ { d.FieldStruct("subframe", func(d *decode.D) { // <1> Zero bit padding, to prevent sync-fooling string of 1s d.FieldU1("zero_bit", d.AssertU(0)) // <6> Subframe type: // 000000 : SUBFRAME_CONSTANT // 000001 : SUBFRAME_VERBATIM // 00001x : reserved // 0001xx : reserved // 001xxx : if(xxx <= 4) SUBFRAME_FIXED, xxx=order ; else reserved // 01xxxx : reserved // 1xxxxx : SUBFRAME_LPC, xxxxx=order-1 lpcOrder := -1 var subframeTypeRangeMap = scalar.URangeToScalar{ {Range: [2]uint64{0b000000, 0b000000}, S: scalar.S{Sym: SubframeConstant}}, {Range: [2]uint64{0b000001, 0b000001}, S: scalar.S{Sym: SubframeVerbatim}}, {Range: [2]uint64{0b000010, 0b000011}, S: scalar.S{Sym: SubframeReserved}}, {Range: [2]uint64{0b000100, 0b000111}, S: scalar.S{Sym: SubframeReserved}}, {Range: [2]uint64{0b001000, 0b001100}, S: scalar.S{Sym: SubframeFixed}}, {Range: [2]uint64{0b001101, 0b001111}, S: scalar.S{Sym: SubframeReserved}}, {Range: [2]uint64{0b010000, 0b011111}, S: scalar.S{Sym: SubframeReserved}}, {Range: [2]uint64{0b100000, 0b111111}, S: scalar.S{Sym: SubframeLPC}}, } subframeTypeS := d.FieldScalarU6("subframe_type", subframeTypeRangeMap, scalar.Bin) switch subframeTypeS.SymStr() { case SubframeFixed: lpcOrder = int(subframeTypeS.ActualU() & 0b111) case SubframeLPC: lpcOrder = int((subframeTypeS.ActualU() & 0b11111) + 1) } if lpcOrder != -1 { d.FieldValueU("lpc_order", uint64(lpcOrder)) } // 'Wasted bits-per-sample' flag: // 0 : no wasted bits-per-sample in source subblock, k=0 // 1 : k wasted bits-per-sample in source subblock, k-1 follows, unary coded; e.g. k=3 => 001 follows, k=7 => 0000001 follows. wastedBitsFlag := d.FieldU1("wasted_bits_flag") var wastedBitsK int if wastedBitsFlag != 0 { wastedBitsK = int(d.FieldUnary("wasted_bits_k", 0, scalar.UAdd(1))) } subframeSampleSize := sampleSize - wastedBitsK if subframeSampleSize < 1 { d.Fatalf("subframeSampleSize %d < 1", subframeSampleSize) } // if channel is side, add en extra sample bit // https://github.com/xiph/flac/blob/37e675b777d4e0de53ac9ff69e2aea10d92e729c/src/libFLAC/stream_decoder.c#L2040 if channelIndex == sideChannelIndex { subframeSampleSize++ } d.FieldValueU("subframe_sample_size", uint64(subframeSampleSize)) decodeWarmupSamples := func(samples []int64, n int, sampleSize int) { if len(samples) < n { d.Fatalf("decodeWarmupSamples outside block size") } d.FieldArray("warmup_samples", func(d *decode.D) { for i := 0; i < n; i++ { samples[i] = d.FieldS("value", sampleSize) } }) } decodeResiduals := func(samples []int64) { samplesLen := len(samples) n := 0 // <2> Residual coding method: // 00 : partitioned Rice coding with 4-bit Rice parameter; RESIDUAL_CODING_METHOD_PARTITIONED_RICE follows // 01 : partitioned Rice coding with 5-bit Rice parameter; RESIDUAL_CODING_METHOD_PARTITIONED_RICE2 follows // 10-11 : reserved var riceEscape int var riceBits int residualCodingMethod := d.FieldU2("residual_coding_method", scalar.UToScalar{ 0b00: scalar.S{Sym: uint64(4), Description: "rice"}, 0b01: scalar.S{Sym: uint64(5), Description: "rice2"}, }) switch residualCodingMethod { case ResidualCodingMethodRice: riceEscape = 0b1111 riceBits = 4 case ResidualCodingMethodRice2: riceEscape = 0b11111 riceBits = 5 } // <4> Partition order. partitionOrder := int(d.FieldU4("partition_order")) // There will be 2^order partitions. ricePartitions := 1 << partitionOrder d.FieldValueU("rice_partitions", uint64(ricePartitions)) d.FieldArray("partitions", func(d *decode.D) { for i := 0; i < ricePartitions; i++ { d.FieldStruct("partition", func(d *decode.D) { // Encoding parameter: // <4(+5)> Encoding parameter: // 0000-1110 : Rice parameter. // 1111 : Escape code, meaning the partition is in unencoded binary form using n bits per sample; n follows as a 5-bit number. // Or: // <5(+5)> Encoding parameter: // 00000-11110 : Rice parameter. // 11111 : Escape code, meaning the partition is in unencoded binary form using n bits per sample; n follows as a 5-bit number. // Encoded residual. The number of samples (n) in the partition is determined as follows: // if the partition order is zero, n = frame's blocksize - predictor order // else if this is not the first partition of the subframe, n = (frame's blocksize / (2^partition order)) // else n = (frame's blocksize / (2^partition order)) - predictor order var count int if partitionOrder == 0 { count = blockSize - lpcOrder } else if i != 0 { count = blockSize / ricePartitions } else { count = (blockSize / ricePartitions) - lpcOrder } d.FieldValueU("count", uint64(count)) riceParameter := int(d.FieldU("rice_parameter", riceBits)) if samplesLen < n+count { d.Fatalf("decodeResiduals outside block size") } if riceParameter == riceEscape { escapeSampleSize := int(d.FieldU5("escape_sample_size")) d.FieldRawLen("samples", int64(count*escapeSampleSize)) } else { samplesStart := d.Pos() for j := 0; j < count; j++ { high := d.Unary(0) _ = high low := d.U(riceParameter) _ = low samples[n] = mathextra.ZigZag(high<> shift) } } var samples []int64 switch subframeTypeS.SymStr() { case SubframeConstant: samples = make([]int64, blockSize) // Unencoded constant value of the subblock, n = frame's bits-per-sample. v := d.FieldS("value", subframeSampleSize) for i := 0; i < blockSize; i++ { samples[i] = v } case SubframeVerbatim: samples = make([]int64, blockSize) // Unencoded subblock; n = frame's bits-per-sample, i = frame's blocksize. // TODO: refactor into some kind of FieldBitBufLenFn? d.RangeFn(d.Pos(), int64(blockSize*subframeSampleSize), func(d *decode.D) { d.FieldRawLen("samples", d.BitsLeft()) }) for i := 0; i < blockSize; i++ { samples[i] = d.S(subframeSampleSize) } case SubframeFixed: samples = make([]int64, blockSize) // Unencoded warm-up samples (n = frame's bits-per-sample * predictor order). decodeWarmupSamples(samples, lpcOrder, subframeSampleSize) // Encoded residual decodeResiduals(samples[lpcOrder:]) // http://www.hpl.hp.com/techreports/1999/HPL-1999-144.pdf fixedCoeffs := [][]int64{ {}, {1}, {2, -1}, {3, -3, 1}, {4, -6, 4, -1}, } coeffs := fixedCoeffs[lpcOrder] decodeLPC(lpcOrder, samples, coeffs, 0) case SubframeLPC: samples = make([]int64, blockSize) // Unencoded warm-up samples (n = frame's bits-per-sample * lpc order). decodeWarmupSamples(samples, lpcOrder, subframeSampleSize) // <4> (Quantized linear predictor coefficients' precision in bits)-1 (1111 = invalid). precision := int(d.FieldU4("precision", scalar.UAdd(1))) // <5> Quantized linear predictor coefficient shift needed in bits (NOTE: this number is signed two's-complement). shift := d.FieldS5("shift") if shift < 0 { d.Fatalf("negative LPC shift %d", shift) } // Unencoded predictor coefficients (n = qlp coeff precision * lpc order) (NOTE: the coefficients are signed two's-complement). var coeffs []int64 d.FieldArray("coefficients", func(d *decode.D) { for i := 0; i < lpcOrder; i++ { coeffs = append(coeffs, d.FieldS("value", precision)) } }) // Encoded residual decodeResiduals(samples[lpcOrder:]) decodeLPC(lpcOrder, samples, coeffs, shift) } if wastedBitsK != 0 { for i := 0; i < len(samples); i++ { samples[i] <<= wastedBitsK } } channelSamples = append(channelSamples, samples) }) } }) // Zero-padding to byte alignment. d.FieldU("byte_align", d.ByteAlignBits(), d.AssertU(0)) // <16> CRC-16 (polynomial = x^16 + x^15 + x^2 + x^0, initialized with 0) of everything before the crc, back to and including the frame header sync code footerCRC := &checksum.CRC{Bits: 16, Table: checksum.ANSI16Table} d.MustCopyBits(footerCRC, d.BitBufRange(frameStart, d.Pos()-frameStart)) d.FieldRawLen("footer_crc", 16, d.ValidateBitBuf(footerCRC.Sum(nil)), scalar.RawHex) streamSamples := len(channelSamples[0]) for j := 0; j < len(channelSamples); j++ { if streamSamples > len(channelSamples[j]) { d.Fatalf("different amount of samples in channels %d != %d", streamSamples, len(channelSamples[j])) } } // Transform mid/side channels into left, right // mid = (left + right)/2 // side = left - right switch channelAssignment { case ChannelLeftSide: for i := 0; i < len(channelSamples[0]); i++ { channelSamples[1][i] = channelSamples[0][i] - channelSamples[1][i] } case ChannelSideRight: for i := 0; i < len(channelSamples[0]); i++ { channelSamples[0][i] = channelSamples[1][i] + channelSamples[0][i] } case ChannelMidSide: for i := 0; i < len(channelSamples[0]); i++ { m := channelSamples[0][i] s := channelSamples[1][i] m = m<<1 | s&1 channelSamples[0][i] = (m + s) >> 1 channelSamples[1][i] = (m - s) >> 1 } default: // not stereo or no side channel } outSampleSize := sampleSize + (sampleSize % 8) bytesPerSample := outSampleSize / 8 p := 0 le := binary.LittleEndian interleavedSamplesBuf := ffi.SamplesBuf interleavedSamplesBufLen := len(channelSamples) * streamSamples * bytesPerSample // TODO: decode read buffer? // reuse buffer if possible if interleavedSamplesBuf == nil || len(interleavedSamplesBuf) < interleavedSamplesBufLen { interleavedSamplesBuf = make([]byte, interleavedSamplesBufLen) } // TODO: speedup by using more cache friendly memory layout for samples for i := 0; i < streamSamples; i++ { for j := 0; j < len(channelSamples); j++ { s := channelSamples[j][i] switch outSampleSize { case 8: interleavedSamplesBuf[p] = byte(s) case 16: le.PutUint16(interleavedSamplesBuf[p:], uint16(s)) case 24: interleavedSamplesBuf[p] = byte(s) le.PutUint16(interleavedSamplesBuf[p+1:], uint16(s>>8)) case 32: le.PutUint32(interleavedSamplesBuf[p:], uint32(s)) } p += bytesPerSample } } return format.FlacFrameOut{ SamplesBuf: interleavedSamplesBuf, Samples: uint64(streamSamples), Channels: channels, BitsPerSample: outSampleSize, } }