1
1
mirror of https://github.com/wader/fq.git synced 2024-11-23 00:57:15 +03:00

decode: Some format decode and sub buffer work

This commit is contained in:
Mattias Wadman 2021-11-20 16:22:00 +01:00
parent 74b57501b5
commit 0480a2f722
14 changed files with 725 additions and 719 deletions

View File

@ -50,7 +50,7 @@
|`mpeg_pes_packet` |MPEG&nbsp;Packetized&nbsp;elementary&nbsp;stream&nbsp;packet |<sub></sub>|
|`mpeg_spu` |Sub&nbsp;Picture&nbsp;Unit&nbsp;(DVD&nbsp;subtitle) |<sub></sub>|
|`mpeg_ts` |MPEG&nbsp;Transport&nbsp;Stream |<sub></sub>|
|`ogg` |OGG&nbsp;file |<sub>`ogg_page` `vorbis_packet` `vorbis_comment` `opus_packet` `flac_metadatablock` `flac_frame`</sub>|
|`ogg` |OGG&nbsp;file |<sub>`ogg_page` `vorbis_packet` `opus_packet` `flac_metadatablock` `flac_frame`</sub>|
|`ogg_page` |OGG&nbsp;page |<sub></sub>|
|`opus_packet` |Opus&nbsp;packet |<sub>`vorbis_comment`</sub>|
|`png` |Portable&nbsp;Network&nbsp;Graphics&nbsp;file |<sub>`icc_profile` `exif`</sub>|

File diff suppressed because it is too large Load Diff

Before

Width:  |  Height:  |  Size: 84 KiB

After

Width:  |  Height:  |  Size: 84 KiB

View File

@ -6,14 +6,11 @@ package bzip2
// TODO: empty file, no streams
import (
"bytes"
"compress/bzip2"
"hash/crc32"
"io"
"errors"
"github.com/wader/fq/format"
"github.com/wader/fq/format/registry"
"github.com/wader/fq/pkg/bitio"
"github.com/wader/fq/pkg/decode"
)
@ -91,29 +88,22 @@ func bzip2Decode(d *decode.D, in interface{}) interface{} {
compressedStart := d.Pos()
compressedBB := d.BitBufRange(0, d.Len())
deflateR := bzip2.NewReader(compressedBB)
uncompressed := &bytes.Buffer{}
crc32W := crc32.NewIEEE()
if _, err := d.Copy(io.MultiWriter(uncompressed, crc32W), deflateR); err != nil {
d.Fatalf(err.Error())
}
// calculatedCRC32 := crc32W.Sum(nil)
uncompressedBB := bitio.NewBufferFromBytes(uncompressed.Bytes(), -1)
dv, _, _ := d.FieldTryFormatBitBuf("uncompressed", uncompressedBB, probeGroup, nil)
if dv == nil {
readCompressedSize, uncompressedBB, dv, _, err := d.TryFieldReaderRangeFormat("uncompressed", 0, d.Len(), bzip2.NewReader, probeGroup, nil)
if dv == nil && errors.As(err, &decode.FormatsError{}) {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}
p, err := compressedBB.Pos()
if err != nil {
d.IOPanic((err))
}
// uncompressed := &bytes.Buffer{}
// crc32W := crc32.NewIEEE()
// if _, err := d.Copy(io.MultiWriter(uncompressed, crc32W), deflateR); err != nil {
// d.Fatalf(err.Error())
// }
// // calculatedCRC32 := crc32W.Sum(nil)
// TODO: compressedSize is a horrible hack for now
// HACK: bzip2.NewReader will read from start of whole buffer and then we figure out compressedSize ourself
// "It is important to note that none of the fields within a StreamBlock or StreamFooter are necessarily byte-aligned"
const footerByteSize = 10
compressedSize := (p - compressedStart) - footerByteSize*8
compressedSize := (readCompressedSize - compressedStart) - footerByteSize*8
for i := 0; i < 8; i++ {
d.SeekAbs(compressedStart + compressedSize)
if d.PeekBits(48) == footerMagic {

View File

@ -5,8 +5,8 @@ package gz
// TODO: verify isize?
import (
"bytes"
"compress/flate"
"errors"
"hash/crc32"
"io"
@ -97,26 +97,24 @@ func gzDecode(d *decode.D, in interface{}) interface{} {
d.FieldRawLen("header_crc", 16, d.RawHex)
}
compressedLen := d.BitsLeft() - ((4 + 4) * 8) // len-(crc32+isize)
compressedBB := d.FieldRawLen("compressed", compressedLen)
crc32W := crc32.NewIEEE()
var rFn func(r io.Reader) io.Reader
switch compressionMethod {
case delfateMethod:
deflateR := flate.NewReader(compressedBB)
uncompressed := &bytes.Buffer{}
if _, err := d.Copy(io.MultiWriter(uncompressed, crc32W), deflateR); err != nil {
d.Fatalf(err.Error())
}
uncompressedBB := bitio.NewBufferFromBytes(uncompressed.Bytes(), -1)
dv, _, _ := d.FieldTryFormatBitBuf("uncompressed", uncompressedBB, probeFormat, nil)
if dv == nil {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}
default:
d.FieldRawLen("compressed", compressedLen)
// *bitio.Buffer implements io.ByteReader so hat deflate don't do own
// buffering and might read more than needed messing up knowing compressed size
rFn = func(r io.Reader) io.Reader { return flate.NewReader(r) }
}
readCompressedSize, uncompressedBB, dv, _, err := d.TryFieldReaderRangeFormat("uncompressed", d.Pos(), d.BitsLeft(), rFn, probeFormat, nil)
if dv == nil && errors.As(err, &decode.FormatsError{}) {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}
d.FieldRawLen("compressed", readCompressedSize)
crc32W := crc32.NewIEEE()
if _, err := io.Copy(crc32W, uncompressedBB.Copy()); err != nil {
d.IOPanic(err)
}
d.FieldRawLen("crc32", 32, d.ValidateBitBuf(bitio.ReverseBytes(crc32W.Sum(nil))), d.RawHex)
d.FieldU32LE("isize")

View File

@ -13,8 +13,8 @@ $ fq -d gzip verbose /test.gz
0x00| 41 02 ea 5f | A.._ | mtime: 1609171521 0x4-0x7.7 (4)
0x00| 00 | . | extra_flags: 0 0x8-0x8.7 (1)
0x00| 03 | . | os: "Unix" (3) 0x9-0x9.7 (1)
0x0|74 65 73 74 0a| |test.| | uncompressed: raw bits 0x0-0x4.7 (5)
0x00| 2b 49 2d 2e e1 02| +I-...| compressed: raw bits 0xa-0x10.7 (7)
0x10|00 |. |
0x0|74 65 73 74 0a| |test.| | uncompressed: raw bits 0x0-0x4.7 (5)
0x10| c6 35 b9 3b | .5.; | crc32: "c635b93b" (raw bits) (valid) 0x11-0x14.7 (4)
0x10| 05 00 00 00| | ....| | isize: 5 0x15-0x18.7 (4)

View File

@ -28,9 +28,9 @@ $ fq . /json.gz
0x00| 65 0a 08 61 | e..a | mtime: 1627916901
0x00| 00 | . | extra_flags: 0
0x00| 03 | . | os: "Unix" (3)
0x0|7b 22 61 22 3a 20 31 32 33 7d |{"a": 123} | uncompressed: {} (json)
0x00| ab 56 4a 54 b2 52| .VJT.R| compressed: raw bits
0x10|30 34 32 ae e5 02 00 |042.... |
0x0|7b 22 61 22 3a 20 31 32 33 7d |{"a": 123} | uncompressed: {} (json)
0x10| 20 ac d2 9c | ... | crc32: "20acd29c" (raw bits) (valid)
0x10| 0b 00 00 00| | ....|| isize: 11
$ fq tovalue /json.gz

View File

@ -14,7 +14,6 @@ import (
var oggPageFormat decode.Group
var vorbisPacketFormat decode.Group
var vorbisCommentFormat decode.Group
var opusPacketFormat decode.Group
var flacMetadatablockFormat decode.Group
var flacFrameFormat decode.Group
@ -28,7 +27,6 @@ func init() {
Dependencies: []decode.Dependency{
{Names: []string{format.OGG_PAGE}, Group: &oggPageFormat},
{Names: []string{format.VORBIS_PACKET}, Group: &vorbisPacketFormat},
{Names: []string{format.VORBIS_COMMENT}, Group: &vorbisCommentFormat},
{Names: []string{format.OPUS_PACKET}, Group: &opusPacketFormat},
{Names: []string{format.FLAC_METADATABLOCK}, Group: &flacMetadatablockFormat},
{Names: []string{format.FLAC_FRAME}, Group: &flacFrameFormat},
@ -126,10 +124,14 @@ func decodeOgg(d *decode.D, in interface{}) interface{} {
switch s.codec {
case codecVorbis:
// TODO: err
_, _, _ = s.packetD.FieldTryFormatBitBuf("packet", bb, vorbisPacketFormat, nil)
if _, _, err := s.packetD.TryFieldFormatBitBuf("packet", bb, vorbisPacketFormat, nil); err != nil {
s.packetD.FieldRootBitBuf("packet", bb)
}
case codecOpus:
// TODO: err
_, _, _ = s.packetD.FieldTryFormatBitBuf("packet", bb, opusPacketFormat, nil)
if _, _, err := s.packetD.TryFieldFormatBitBuf("packet", bb, opusPacketFormat, nil); err != nil {
s.packetD.FieldRootBitBuf("packet", bb)
}
case codecFlac:
var firstByte byte
bs, err := bb.PeekBytes(1)

View File

@ -2,11 +2,12 @@ package vorbis
import (
"encoding/base64"
"errors"
"io"
"strings"
"github.com/wader/fq/format"
"github.com/wader/fq/format/registry"
"github.com/wader/fq/pkg/bitio"
"github.com/wader/fq/pkg/decode"
)
@ -30,20 +31,22 @@ func commentDecode(d *decode.D, in interface{}) interface{} {
i := uint64(0)
d.FieldStructArrayLoop("user_comments", "user_comment", func() bool { return i < userCommentListLength }, func(d *decode.D) {
userCommentLength := d.FieldU32LE("length")
userCommentStart := d.Pos()
userComment := d.FieldUTF8("comment", int(userCommentLength))
pairParts := strings.SplitN(userComment, "=", 2)
if len(pairParts) == 2 {
k, v := strings.ToUpper(pairParts[0]), pairParts[1]
var metadataBlockPicture = "METADATA_BLOCK_PICTURE"
if k == metadataBlockPicture {
// METADATA_BLOCK_PICTURE=<base64-flac-picture-metadatablock>
bs, err := base64.StdEncoding.DecodeString(v)
if err == nil {
bb := bitio.NewBufferFromBytes(bs, -1)
d.FieldFormatBitBuf("picture", bb, flacPicture, nil)
} else {
panic(err)
}
var metadataBlockPicturePreix = "METADATA_BLOCK_PICTURE="
var metadataBlockPicturePrefixLower = "metadata_block_picture="
if strings.HasPrefix(userComment, metadataBlockPicturePreix) ||
strings.HasPrefix(userComment, metadataBlockPicturePrefixLower) {
base64Offset := int64(len(metadataBlockPicturePreix)) * 8
base64Len := int64(len(userComment))*8 - base64Offset
rFn := func(r io.Reader) io.Reader { return base64.NewDecoder(base64.StdEncoding, r) }
_, uncompressedBB, dv, _, err := d.TryFieldReaderRangeFormat("picture", userCommentStart+base64Offset, base64Len, rFn, flacPicture, nil)
if dv == nil && errors.As(err, &decode.FormatsError{}) {
d.FieldRootBitBuf("picture", uncompressedBB)
}
}
i++

View File

@ -6,11 +6,11 @@ package zip
import (
"bytes"
"compress/flate"
"errors"
"io"
"github.com/wader/fq/format"
"github.com/wader/fq/format/registry"
"github.com/wader/fq/pkg/bitio"
"github.com/wader/fq/pkg/decode"
)
@ -252,7 +252,7 @@ func zipDecode(d *decode.D, in interface{}) interface{} {
d.FieldStruct("last_modification_date", fieldMSDOSTime)
d.FieldStruct("last_modification_time", fieldMSDOSDate)
d.FieldU32("crc32_uncompressed", d.Hex)
compressedSize := d.FieldU32("compressed_size")
compressedSizeBytes := d.FieldU32("compressed_size")
d.FieldU32("uncompressed_size")
fileNameLength := d.FieldU16("file_name_length")
extraFieldLength := d.FieldU16("extra_field_length")
@ -268,57 +268,43 @@ func zipDecode(d *decode.D, in interface{}) interface{} {
}
})
})
compressedSize := int64(compressedSizeBytes) * 8
compressedStart := d.Pos()
compressedLimit := int64(compressedSize) * 8
compressedLimit := compressedSize
if compressedLimit == 0 {
compressedLimit = d.BitsLeft()
}
compressedStart := d.Pos()
d.LenFn(compressedLimit, func(d *decode.D) {
if compressionMethod == compressionMethodNone {
d.FieldRawLen("uncompressed", int64(compressedSize)*8)
return
}
var decompressR io.Reader
compressedBB := d.BitBufRange(d.Pos(), d.BitsLeft())
if compressionMethod == compressionMethodNone {
d.FieldRawLen("uncompressed", compressedSize)
} else {
var rFn func(r io.Reader) io.Reader
switch compressionMethod {
case compressionMethodDeflated:
// *bitio.Buffer implements io.ByteReader so hat deflate don't do own
// buffering and might read more than needed messing up knowing compressed size
decompressR = flate.NewReader(compressedBB)
rFn = func(r io.Reader) io.Reader { return flate.NewReader(r) }
}
if decompressR != nil {
uncompressed := &bytes.Buffer{}
if _, err := d.Copy(uncompressed, decompressR); err != nil {
d.IOPanic(err)
}
uncompressedBB := bitio.NewBufferFromBytes(uncompressed.Bytes(), -1)
dv, _, _ := d.FieldTryFormatBitBuf("uncompressed", uncompressedBB, probeFormat, nil)
if dv == nil {
if rFn != nil {
readCompressedSize, uncompressedBB, dv, _, err := d.TryFieldReaderRangeFormat("uncompressed", d.Pos(), compressedLimit, rFn, probeFormat, nil)
if dv == nil && errors.As(err, &decode.FormatsError{}) {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}
// no compressed size, is a streaming zip, figure out size by checking what
// position compressed buffer ended at
if compressedSize == 0 {
pos, err := compressedBB.Pos()
if err != nil {
d.IOPanic(err)
}
compressedSize = uint64(pos) / 8
compressedSize = readCompressedSize
}
d.FieldRawLen("compressed", compressedSize)
} else {
if compressedSize != 0 {
d.FieldRawLen("compressed", compressedSize)
}
}
}
if compressedSize != 0 {
d.FieldRawLen("compressed", int64(compressedSize)*8)
}
})
d.SeekAbs(compressedStart + int64(compressedSize*8))
d.SeekAbs(compressedStart + compressedSize)
if hasDataDescriptor {
d.FieldStruct("data_indicator", func(d *decode.D) {

View File

@ -113,7 +113,11 @@ func decode(ctx context.Context, bb *bitio.Buffer, group Group, opts Options) (*
d.Value.postProcess()
}
return d.Value, decodeV, formatsErr
if len(formatsErr.Errs) > 0 {
return d.Value, decodeV, formatsErr
}
return d.Value, decodeV, nil
}
return nil, nil, formatsErr
@ -255,13 +259,13 @@ func (d *D) FillGaps(r ranges.Range, namePrefix string) {
// Errorf stops decode with a reason unless forced
func (d *D) Errorf(format string, a ...interface{}) {
if !d.Options.Force {
panic(DecoderError{Reason: format, Pos: d.Pos()})
panic(DecoderError{Reason: fmt.Sprintf(format, a...), Pos: d.Pos()})
}
}
// Fatalf stops decode with a reason regardless of forced
func (d *D) Fatalf(format string, a ...interface{}) {
panic(DecoderError{Reason: format, Pos: d.Pos()})
panic(DecoderError{Reason: fmt.Sprintf(format, a...), Pos: d.Pos()})
}
func (d *D) IOPanic(err error) {
@ -798,7 +802,7 @@ func (d *D) FieldFormatRange(name string, firstBit int64, nBits int64, group Gro
return dv, v
}
func (d *D) FieldTryFormatBitBuf(name string, bb *bitio.Buffer, group Group, inArg interface{}) (*Value, interface{}, error) {
func (d *D) TryFieldFormatBitBuf(name string, bb *bitio.Buffer, group Group, inArg interface{}) (*Value, interface{}, error) {
dv, v, err := decode(d.Ctx, bb, group, Options{
Name: name,
Force: d.Options.Force,
@ -821,7 +825,7 @@ func (d *D) FieldTryFormatBitBuf(name string, bb *bitio.Buffer, group Group, inA
}
func (d *D) FieldFormatBitBuf(name string, bb *bitio.Buffer, group Group, inArg interface{}) (*Value, interface{}) {
dv, v, err := d.FieldTryFormatBitBuf(name, bb, group, inArg)
dv, v, err := d.TryFieldFormatBitBuf(name, bb, group, inArg)
if dv == nil || dv.Errors() != nil {
panic(err)
}
@ -871,3 +875,35 @@ func (d *D) FieldFormatReaderLen(name string, nBits int64, fn func(r io.Reader)
return d.FieldFormatBitBuf(name, zbb, group, nil)
}
func (d *D) TryFieldReaderRangeFormat(name string, startBit int64, nBits int64, fn func(r io.Reader) io.Reader, group Group, inArg interface{}) (int64, *bitio.Buffer, *Value, interface{}, error) {
bitLen := nBits
if bitLen == -1 {
bitLen = d.BitsLeft()
}
bb, err := d.bitBuf.BitBufRange(startBit, bitLen)
if err != nil {
return 0, nil, nil, nil, err
}
r := fn(bb)
// TODO: check if io.Closer?
rb, err := ioutil.ReadAll(r)
if err != nil {
return 0, nil, nil, nil, err
}
cz, err := bb.Pos()
rbb := bitio.NewBufferFromBytes(rb, -1)
if err != nil {
return 0, nil, nil, nil, err
}
dv, v, err := d.TryFieldFormatBitBuf(name, rbb, group, inArg)
return cz, rbb, dv, v, err
}
func (d *D) FieldReaderRangeFormat(name string, startBit int64, nBits int64, fn func(r io.Reader) io.Reader, group Group, inArg interface{}) (int64, *bitio.Buffer, *Value, interface{}) {
cz, rbb, dv, v, err := d.TryFieldReaderRangeFormat(name, startBit, nBits, fn, group, inArg)
if err != nil {
d.IOPanic(err)
}
return cz, rbb, dv, v
}

View File

@ -84,7 +84,7 @@ type DecoderError struct {
}
func (e DecoderError) Error() string {
return fmt.Sprintf("failed to validate at position %s: %s", num.Bits(e.Pos).StringByteBits(16), e.Reason)
return fmt.Sprintf("error at position %s: %s", num.Bits(e.Pos).StringByteBits(16), e.Reason)
}
func (DecoderError) IsRecoverableError() bool { return true }

View File

@ -35,7 +35,7 @@ stderr:
error: --decode-file filea: open testdata/nonexisting: no such file or directory
$ fq -n -d mp4 --decode-file filea /test.mp3 '$filea'
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.: {} /test.mp3 (mp4)
| | | error: mp4: failed to validate at position 0x8: no styp, ftyp, free or moov box found
| | | error: mp4: error at position 0x8: no styp, ftyp, free or moov box found
0x000|49 44 33 04 00 00 00 00 00 23 54 53 53 45 00 00|ID3......#TSSE..| unknown0: raw bits
* |until 0x283.7 (end) (644) | |
$ fq -n --argjson a '(' '$a'

View File

@ -423,7 +423,7 @@ mp3> .frames[0].xing | d, f, v
mp3> ^D
$ fq -n '"broken" | mp3 | d'
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.: {} (mp3)
| | | error: mp3: failed to validate at position 0x0: no frames found
| | | error: mp3: error at position 0x0: no frames found
| | | headers: [0]
| | | frames: [0]
0x0|62 72 6f 6b 65 6e| |broken| | unknown0: raw bits

View File

@ -269,7 +269,7 @@ func (dvb decodeValueBase) DecodeValue() *decode.Value {
func (dvb decodeValueBase) Display(w io.Writer, opts Options) error { return dump(dvb.dv, w, opts) }
func (dvb decodeValueBase) ToBufferView() (BufferRange, error) {
return BufferRange{bb: dvb.dv.RootBitBuf, r: dvb.dv.Range, unit: 8}, nil
return BufferRange{bb: dvb.dv.RootBitBuf, r: dvb.dv.InnerRange(), unit: 8}, nil
}
func (dvb decodeValueBase) ExtKeys() []string {
kv := []string{