From 06245d12958cf96c56ae738b15b4d8581e20e4bd Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Tue, 8 Feb 2022 18:44:48 +0100 Subject: [PATCH] binary,decode,doc: Rename buffer to binary and add some documentation Rename buffer to binary. Still some work left what to call buffer/binary in decode code. Document decode value and binary type Fix proper unit padding for tobytes and add still undocumenated extra padding argument. Add some additional binary tests --- doc/TODO.md | 5 +- doc/usage.md | 105 +++++++--- internal/bitioextra/zeroreadatseeker.go | 58 ++++++ pkg/bitio/bitio.go | 2 +- pkg/bitio/ioreader.go | 2 +- pkg/bitio/ioreadseeker_test.go | 2 +- pkg/bitio/multireader.go | 2 +- pkg/decode/decode.go | 18 +- pkg/decode/value.go | 2 +- pkg/interp/{buffer.go => binary.go} | 205 ++++++++++--------- pkg/interp/binary.jq | 8 + pkg/interp/buffer.jq | 4 - pkg/interp/decode.go | 22 +- pkg/interp/dump.go | 10 +- pkg/interp/funcs.go | 40 ++-- pkg/interp/interp.go | 4 +- pkg/interp/interp.jq | 2 +- pkg/interp/match.go | 12 +- pkg/interp/match.jq | 52 ++--- pkg/interp/testdata/buffer.fqtest | 92 +++++---- pkg/interp/testdata/value_array.fqtest | 4 +- pkg/interp/testdata/value_boolean.fqtest | 4 +- pkg/interp/testdata/value_json_array.fqtest | 4 +- pkg/interp/testdata/value_json_object.fqtest | 4 +- pkg/interp/testdata/value_null.fqtest | 4 +- pkg/interp/testdata/value_number.fqtest | 4 +- pkg/interp/testdata/value_object.fqtest | 4 +- pkg/interp/testdata/value_string.fqtest | 4 +- 28 files changed, 409 insertions(+), 270 deletions(-) create mode 100644 internal/bitioextra/zeroreadatseeker.go rename pkg/interp/{buffer.go => binary.go} (64%) create mode 100644 pkg/interp/binary.jq delete mode 100644 pkg/interp/buffer.jq diff --git a/doc/TODO.md b/doc/TODO.md index 86b76394..41d070e8 100644 --- a/doc/TODO.md +++ b/doc/TODO.md @@ -1,8 +1,7 @@ ### Known bugs to fix -- `fq -n '"aabbccdd" | hex | tobytes[1:] | raw | tobytes'` create buffer `aabbcc` should be `bbccdd`. I think decode (raw in this case) is confused by root value buffer. -- Buffers/string duality is confusing, most string functions should be wrapped to understand buffers. -- `fq -n '[([0xab] | tobits[:4]), ([0xdc] | tobits[:4]), 0] | tobytes'` should create a `ad` buffer, now `a0`. Probably because use of `io.Copy` that will ends up padding on byte boundaries. Should use `bitio.Copy` and create a `bitio.Writer` that can transform to a `io.Writer`. +- `fq -n '"aabbccdd" | hex | tobytes[1:] | raw | tobytes'` create binary `aabbcc` should be `bbccdd`. I think decode (raw in this case) is confused by root value buffer. +- Buffers/string duality is confusing, most string functions should be wrapped to understand binary. - REPL cancel seems to sometimes exit a sub-REPl without properly cleanup options. - Value errors, can only be accessed with `._error`. - Framed (add unknown in gaps) decode should be on struct level not format? diff --git a/doc/usage.md b/doc/usage.md index 7069c856..42d981ab 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -196,7 +196,7 @@ fq -n 'def f: .. | select(format=="avc_sps"); diff(input|f; input|f)' a.mp4 b.mp #### Extract first JPEG found in file -Recursively look for first value that is a `jpeg` decode value root. Use `tobytes` to get bytes buffer for value. Redirect bytes to a file. +Recursively look for first value that is a `jpeg` decode value root. Use `tobytes` to get bytes for value. Redirect bytes to a file. ```sh fq 'first(.. | select(format=="jpeg")) | tobytes' file > file.jpeg @@ -269,6 +269,63 @@ single argument `1, 2` (a lambda expression that output `1` and then output `2`) achieved. - Expressions have one implicit input and output value. This how pipelines like `1 | . * 2` work. + +## Types specific to fq + +fq has two additional types compared to jq, decode value and binary. In standard jq expressions they will in most case behave as some standard jq type. + +### Decode value + +This type is returned by decoders and it used to represent parts of the decoed input. It can act as all JSON types, object, array, number, string etc. + +Each decode value has these propties: +- A bit range in the input + - Can be accessed as a binary using `tobits`/`tobytes`. Use the `start` and `size` keys to postion and size. + - `.name` as bytes `.name | tobytes` + - Bit 4-8 of `.name` as bits `.name | tobits[4:8]` + +Each non-compound decode value has these propties: +- An actual value: + - This is the decoded representation of the bits, a number, string, bool etc. + - Can be accessed using `toactual`. +- An optional symbolic value: + - Is usually a mapping of the actual to symbolic value, ex: map number to a string value. + - Can be accessed using `tosym`. +- An optional description: + - Can be accessed using `todescription` + +The JSON value of a decode value is the symbolic value if available otherwise the actual value. To explicitly access the JSON value use `tovalue`. In most expression this is not needed as it will be done automactically. + +### Binary + +Raw bits with a unit size, 1 (bits) or 8 (bytes). Will act as a string in standard jq expressions. + +Are created using `tobits`/`tobytes` functions from decode values or binary lists. + +Can be sliced using the jq `[start:end]` slice syntax. + +#### Binary array + +Is an array of numbers, strings, binaries or other nested binary arrays. When used as input to `tobits`/`tobytes` the following rules are used: +- Number is a byte so has to be 0-255 +- String it's UTF8 code point representation +- Binary as is +- Binary array used recursively + +Similar to and inspired by erlang io-lists. + +Some examples: + +`[0, 123, 255] | tobytes` will be 3 bytes 0, 123 and 255 + +`[0, [123, 255]] | tobytes` same as above + +`[0, 1, 1, 0, 0, 1, 1, 0 | tobits]` will be 1 byte 0x66 an "f" + +`[(.a | tobytes[-10:]), 255, (.b | tobits[:10])]` the concatenation of the last 10 bytes of `.a`, a byte with value 255 and the first 10 bits of `.b`. + +TODO: padding and alignment + ## Functions - All standard library functions from jq @@ -297,32 +354,32 @@ unary uses input and if more than one argument all as arguments ignoring the inp - `todescription` description of value - `torepr` convert decode value into what it reptresents. For example convert msgpack decode value into a value representing its JSON representation. - - All regexp functions work with buffers as input and pattern argument with these differences - from the string versions: + - All regexp functions work with binary as input and pattern argument with these differences + compared to when using string input: - All offset and length will be in bytes. - - For `capture` the `.string` value is a buffer. - - If pattern is a buffer it will be matched literally and not as a regexp. - - If pattern is a buffer or flags include "b" each input byte will be read as separate code points - - `scan_toend($v)`, `scan_toend($v; $flags)` works the same as `scan` but output buffer are from start of match to - end of buffer. + - For `capture` the `.string` value is a binary. + - If pattern is a binary it will be matched literally and not as a regexp. + - If pattern is a binary or flags include "b" each input byte will be read as separate code points + - `scan_toend($v)`, `scan_toend($v; $flags)` works the same as `scan` but output binary are from start of match to + end of binary. instead of possibly multi-byte UTF-8 codepoints. This allows to match raw bytes. Ex: `match("\u00ff"; "b")` will match the byte `0xff` and not the UTF-8 encoded codepoint for 255, `match("[^\u00ff]"; "b")` will match all non-`0xff` bytes. - `grep` functions take 1 or 2 arguments. First is a scalar to match, where a string is - treated as a regexp. A buffer scalar will be matches exact bytes. Second argument are regexp - flags with addition that "b" will treat each byte in the input buffer as a code point, this + treated as a regexp. A binary will be matches exact bytes. Second argument are regexp + flags with addition that "b" will treat each byte in the input binary as a code point, this makes it possible to match exact bytes. - - `grep($v)`, `grep($v; $flags)` recursively match value and buffer + - `grep($v)`, `grep($v; $flags)` recursively match value and binary - `vgrep($v)`, `vgrep($v; $flags)` recursively match value - - `bgrep($v)`, `bgrep($v; $flags)` recursively match buffer + - `bgrep($v)`, `bgrep($v; $flags)` recursively match binary - `fgrep($v)`, `fgrep($v; $flags)` recursively match field name - `grep_by(f)` recursively match using a filter. Ex: `grep_by(. > 180 and . < 200)`, `first(grep_by(format == "id3v2"))`. - - Buffers: - - `tobits` - Transform input into a bits buffer not preserving source range, will start at zero. - - `tobitsrange` - Transform input into a bits buffer preserving source range if possible. - - `tobytes` - Transform input into a bytes buffer not preserving source range, will start at zero. - - `tobytesrange` - Transform input into a byte buffer preserving source range if possible. - - `buffer[start:end]`, `buffer[:end]`, `buffer[start:]` - Create a sub buffer from start to end in buffer units preserving source range. + - Binary: + - `tobits` - Transform input to binary with bit as unit, does not preserving source range, will start at zero. + - `tobitsrange` - Transform input to binary with bit as unit, preserves source range if possible. + - `tobytes` - Transform input to binary with byte as unit, does not preserving source range, will start at zero. + - `tobytesrange` - Transform input binary with byte as unit, preserves source range if possible. + - `.[start:end]`, `.[:end]`, `.[start:]` - Slice binary from start to end preserving source range. - `open` open file for reading - All decode function takes a optional option argument. The only option currently is `force` to ignore decoder asserts. For example to decode as mp3 and ignore assets do `mp3({force: true})` or `decode("mp3"; {force: true})`, from command line @@ -331,11 +388,11 @@ you currently have to do `fq -d raw 'mp3({force: true})' file`. - `probe`, `probe($opts)` probe and decode format - `mp3`, `mp3($opts)`, ..., ``, `($opts)` same as `decode()($opts)`, `decode($format; $opts)` decode as format - Display shows hexdump/ASCII/tree for decode values and JSON for other values. - - `d`/`d($opts)` display value and truncate long arrays and buffers + - `d`/`d($opts)` display value and truncate long arrays and binaries - `da`/`da($opts)` display value and don't truncate arrays - - `dd`/`dd($opts)` display value and don't truncate arrays or buffers - - `dv`/`dv($opts)` verbosely display value and don't truncate arrays but truncate buffers - - `ddv`/`ddv($opts)` verbosely display value and don't truncate arrays or buffers + - `dd`/`dd($opts)` display value and don't truncate arrays or binaries + - `dv`/`dv($opts)` verbosely display value and don't truncate arrays but truncate binaries + - `ddv`/`ddv($opts)` verbosely display value and don't truncate arrays or binaries - `p`/`preview` show preview of field tree - `hd`/`hexdump` hexdump value - `repl` nested REPL, must be last in a pipeline. `1 | repl`, can "slurp" outputs `1, 2, 3 | repl`. @@ -397,10 +454,6 @@ A value has these special keys (TODO: remove, are internal) - TODO: unknown gaps -## Binary and IO lists - -- TODO: similar to erlang io lists, [], binary, string (utf8) and numbers - ## Own decoders and use as library TODO diff --git a/internal/bitioextra/zeroreadatseeker.go b/internal/bitioextra/zeroreadatseeker.go new file mode 100644 index 00000000..4d400623 --- /dev/null +++ b/internal/bitioextra/zeroreadatseeker.go @@ -0,0 +1,58 @@ +package bitioextra + +import ( + "io" + + "github.com/wader/fq/pkg/bitio" +) + +type ZeroReadAtSeeker struct { + pos int64 + nBits int64 +} + +func NewZeroAtSeeker(nBits int64) *ZeroReadAtSeeker { + return &ZeroReadAtSeeker{nBits: nBits} +} + +func (z *ZeroReadAtSeeker) SeekBits(bitOffset int64, whence int) (int64, error) { + p := z.pos + switch whence { + case io.SeekStart: + p = bitOffset + case io.SeekCurrent: + p += bitOffset + case io.SeekEnd: + p = z.nBits + bitOffset + default: + panic("unknown whence") + } + + if p < 0 || p > z.nBits { + return z.pos, bitio.ErrOffset + } + z.pos = p + + return p, nil +} + +func (z *ZeroReadAtSeeker) ReadBitsAt(p []byte, nBits int64, bitOff int64) (n int64, err error) { + if bitOff < 0 || bitOff > z.nBits { + return 0, bitio.ErrOffset + } + if bitOff == z.nBits { + return 0, io.EOF + } + + lBits := z.nBits - bitOff + rBits := nBits + if rBits > lBits { + rBits = lBits + } + rBytes := bitio.BitsByteCount(rBits) + for i := int64(0); i < rBytes; i++ { + p[i] = 0 + } + + return rBits, nil +} diff --git a/pkg/bitio/bitio.go b/pkg/bitio/bitio.go index aaab8f54..8ec5f9bf 100644 --- a/pkg/bitio/bitio.go +++ b/pkg/bitio/bitio.go @@ -110,7 +110,7 @@ func BitStringFromBytes(buf []byte, nBits int64) string { return sb.String() } -// CopyBuffer bits from src to dst using provided buffer +// CopyBuffer bits from src to dst using provided byte buffer // Similar to io.CopyBuffer func CopyBuffer(dst Writer, src Reader, buf []byte) (n int64, err error) { // same default size as io.Copy diff --git a/pkg/bitio/ioreader.go b/pkg/bitio/ioreader.go index 014d8970..b9027534 100644 --- a/pkg/bitio/ioreader.go +++ b/pkg/bitio/ioreader.go @@ -62,7 +62,7 @@ func (r *IOReader) Read(p []byte) (n int, err error) { if err != nil { return 0, err } - return 1, nil + return 1, r.rErr } return 0, r.rErr } diff --git a/pkg/bitio/ioreadseeker_test.go b/pkg/bitio/ioreadseeker_test.go index 6348b21f..fe098a9b 100644 --- a/pkg/bitio/ioreadseeker_test.go +++ b/pkg/bitio/ioreadseeker_test.go @@ -108,7 +108,7 @@ func Test(t *testing.T) { for _, p := range bsParts { bsBRs = append(bsBRs, sb(p)) } - bsBR, err := bitio.NewMultiBitReader(bsBRs...) + bsBR, err := bitio.NewMultiReader(bsBRs...) if err != nil { panic(err) } diff --git a/pkg/bitio/multireader.go b/pkg/bitio/multireader.go index a9b7992f..c71688ad 100644 --- a/pkg/bitio/multireader.go +++ b/pkg/bitio/multireader.go @@ -31,7 +31,7 @@ type MultiReader struct { readerEnds []int64 } -func NewMultiBitReader(rs ...ReadAtSeeker) (*MultiReader, error) { +func NewMultiReader(rs ...ReadAtSeeker) (*MultiReader, error) { readerEnds := make([]int64, len(rs)) var esSum int64 for i, r := range rs { diff --git a/pkg/decode/decode.go b/pkg/decode/decode.go index e7684174..50e13d46 100644 --- a/pkg/decode/decode.go +++ b/pkg/decode/decode.go @@ -112,7 +112,7 @@ func decode(ctx context.Context, br bitio.ReaderAtSeeker, group Group, opts Opti if err := d.Value.WalkRootPreOrder(func(v *Value, rootV *Value, depth int, rootDepth int) error { minMaxRange = ranges.MinMax(minMaxRange, v.Range) v.Range.Start += decodeRange.Start - v.RootBitBuf = br + v.RootReader = br return nil }); err != nil { return nil, nil, err @@ -165,7 +165,7 @@ func newDecoder(ctx context.Context, format Format, br bitio.ReaderAtSeeker, opt Value: &Value{ Name: name, V: rootV, - RootBitBuf: br, + RootReader: br, Range: ranges.Range{Start: 0, Len: 0}, IsRoot: opts.IsRoot, }, @@ -184,7 +184,7 @@ func (d *D) FieldDecoder(name string, bitBuf bitio.ReaderAtSeeker, v interface{} Name: name, V: v, Range: ranges.Range{Start: d.Pos(), Len: 0}, - RootBitBuf: bitBuf, + RootReader: bitBuf, }, Options: d.Options, @@ -289,7 +289,7 @@ func (d *D) FillGaps(r ranges.Range, namePrefix string) { for i, gap := range gaps { br, err := bitioextra.Range(d.bitBuf, gap.Start, gap.Len) if err != nil { - d.IOPanic(err, "FillGaps: BitBufRange") + d.IOPanic(err, "FillGaps: Range") } v := &Value{ @@ -298,7 +298,7 @@ func (d *D) FillGaps(r ranges.Range, namePrefix string) { Actual: br, Unknown: true, }, - RootBitBuf: d.bitBuf, + RootReader: d.bitBuf, Range: gap, } @@ -779,7 +779,7 @@ func (d *D) FieldArrayLoop(name string, condFn func() bool, fn func(d *D)) *D { func (d *D) FieldRangeFn(name string, firstBit int64, nBits int64, fn func() *Value) *Value { v := fn() v.Name = name - v.RootBitBuf = d.bitBuf + v.RootReader = d.bitBuf v.Range = ranges.Range{Start: firstBit, Len: nBits} d.AddChild(v) @@ -892,7 +892,7 @@ func (d *D) RangeFn(firstBit int64, nBits int64, fn func(d *D)) int64 { // TODO: refactor, similar to decode() if err := sd.Value.WalkRootPreOrder(func(v *Value, rootV *Value, depth int, rootDepth int) error { //v.Range.Start += firstBit - v.RootBitBuf = d.Value.RootBitBuf + v.RootReader = d.Value.RootReader endPos = mathextra.MaxInt64(endPos, v.Range.Stop()) return nil @@ -1070,7 +1070,7 @@ func (d *D) FieldRootBitBuf(name string, br bitio.ReaderAtSeeker, sms ...scalar. v := &Value{} v.V = &scalar.S{Actual: br} v.Name = name - v.RootBitBuf = br + v.RootReader = br v.IsRoot = true v.Range = ranges.Range{Start: d.Pos(), Len: brLen} @@ -1164,7 +1164,7 @@ func (d *D) TryFieldValue(name string, fn func() (*Value, error)) (*Value, error v, err := fn() stop := d.Pos() v.Name = name - v.RootBitBuf = d.bitBuf + v.RootReader = d.bitBuf v.Range = ranges.Range{Start: start, Len: stop - start} if err != nil { return nil, err diff --git a/pkg/decode/value.go b/pkg/decode/value.go index 7888f0ef..4fba6c06 100644 --- a/pkg/decode/value.go +++ b/pkg/decode/value.go @@ -27,7 +27,7 @@ type Value struct { V interface{} // scalar.S or Compound (array/struct) Index int // index in parent array/struct Range ranges.Range - RootBitBuf bitio.ReaderAtSeeker + RootReader bitio.ReaderAtSeeker IsRoot bool // TODO: rework? } diff --git a/pkg/interp/buffer.go b/pkg/interp/binary.go similarity index 64% rename from pkg/interp/buffer.go rename to pkg/interp/binary.go index 788f6088..25adad8e 100644 --- a/pkg/interp/buffer.go +++ b/pkg/interp/binary.go @@ -21,24 +21,37 @@ import ( func init() { functionRegisterFns = append(functionRegisterFns, func(i *Interp) []Function { return []Function{ - {"_tobitsrange", 0, 2, i._toBitsRange, nil}, + {"_tobits", 3, 3, i._toBits, nil}, {"open", 0, 0, i._open, nil}, } }) } -type ToBuffer interface { - ToBuffer() (Buffer, error) +type ToBinary interface { + ToBinary() (Binary, error) } -func toBitBuf(v interface{}) (bitio.ReaderAtSeeker, error) { - return toBitBufEx(v, false) -} - -func toBitBufEx(v interface{}, inArray bool) (bitio.ReaderAtSeeker, error) { +func toBinary(v interface{}) (Binary, error) { switch vv := v.(type) { - case ToBuffer: - bv, err := vv.ToBuffer() + case ToBinary: + return vv.ToBinary() + default: + br, err := toBitReader(v) + if err != nil { + return Binary{}, err + } + return newBinaryFromBitReader(br, 8, 0) + } +} + +func toBitReader(v interface{}) (bitio.ReaderAtSeeker, error) { + return toBitReaderEx(v, false) +} + +func toBitReaderEx(v interface{}, inArray bool) (bitio.ReaderAtSeeker, error) { + switch vv := v.(type) { + case ToBinary: + bv, err := vv.ToBinary() if err != nil { return nil, err } @@ -53,7 +66,7 @@ func toBitBufEx(v interface{}, inArray bool) (bitio.ReaderAtSeeker, error) { if inArray { if bi.Cmp(big.NewInt(255)) > 0 || bi.Cmp(big.NewInt(0)) < 0 { - return nil, fmt.Errorf("buffer byte list must be bytes (0-255) got %v", bi) + return nil, fmt.Errorf("byte in binary list must be bytes (0-255) got %v", bi) } n := bi.Uint64() b := [1]byte{byte(n)} @@ -78,103 +91,88 @@ func toBitBufEx(v interface{}, inArray bool) (bitio.ReaderAtSeeker, error) { rr := make([]bitio.ReadAtSeeker, 0, len(vv)) // TODO: optimize byte array case, flatten into one slice for _, e := range vv { - eBR, eErr := toBitBufEx(e, true) + eBR, eErr := toBitReaderEx(e, true) if eErr != nil { return nil, eErr } rr = append(rr, eBR) } - mb, err := bitio.NewMultiBitReader(rr...) + mb, err := bitio.NewMultiReader(rr...) if err != nil { return nil, err } return mb, nil default: - return nil, fmt.Errorf("value can't be a buffer") + return nil, fmt.Errorf("value can't be a binary") } } -func toBuffer(v interface{}) (Buffer, error) { - switch vv := v.(type) { - case ToBuffer: - return vv.ToBuffer() - default: - br, err := toBitBuf(v) - if err != nil { - return Buffer{}, err - } - return newBufferFromBuffer(br, 8) +// note is used to implement tobytes* also +func (i *Interp) _toBits(c interface{}, a []interface{}) interface{} { + unit, ok := gojqextra.ToInt(a[0]) + if !ok { + return gojqextra.FuncTypeError{Name: "_tobits", V: a[0]} } -} - -// note is used to implement tobytes*/0 also -func (i *Interp) _toBitsRange(c interface{}, a []interface{}) interface{} { - var unit int - var r bool - var ok bool - - if len(a) >= 1 { - unit, ok = gojqextra.ToInt(a[0]) - if !ok { - return gojqextra.FuncTypeError{Name: "_tobitsrange", V: a[0]} - } - } else { - unit = 1 + keepRange, ok := gojqextra.ToBoolean(a[1]) + if !ok { + return gojqextra.FuncTypeError{Name: "_tobits", V: a[1]} } - - if len(a) >= 2 { - r, ok = gojqextra.ToBoolean(a[1]) - if !ok { - return gojqextra.FuncTypeError{Name: "_tobitsrange", V: a[1]} - } - } else { - r = true + padToUnits, ok := gojqextra.ToInt(a[2]) + if !ok { + return gojqextra.FuncTypeError{Name: "_tobits", V: a[2]} } // TODO: unit > 8? - bv, err := toBuffer(c) + bv, err := toBinary(c) if err != nil { return err } - bv.unit = unit - if !r { - br, err := bv.toBuffer() - if err != nil { - return err - } - bb, err := newBufferFromBuffer(br, unit) - if err != nil { - return err - } - return bb + pad := int64(unit * padToUnits) + if pad == 0 { + pad = int64(unit) } - return bv + bv.unit = unit + bv.pad = (pad - bv.r.Len%pad) % pad + + if keepRange { + return bv + } + + br, err := bv.toReader() + if err != nil { + return err + } + bb, err := newBinaryFromBitReader(br, bv.unit, bv.pad) + if err != nil { + return err + } + return bb } type openFile struct { - Buffer + Binary filename string progressFn progressreadseeker.ProgressFn } var _ Value = (*openFile)(nil) -var _ ToBuffer = (*openFile)(nil) +var _ ToBinary = (*openFile)(nil) func (of *openFile) Display(w io.Writer, opts Options) error { _, err := fmt.Fprintf(w, "\n", of.filename) return err } -func (of *openFile) ToBuffer() (Buffer, error) { - return newBufferFromBuffer(of.br, 8) +func (of *openFile) ToBinary() (Binary, error) { + return newBinaryFromBitReader(of.br, 8, 0) } -// def open: #:: string| => buffer +// def open: #:: string| => binary // opens a file for reading from filesystem // TODO: when to close? when br loses all refs? need to use finalizer somehow? func (i *Interp) _open(c interface{}, a []interface{}) interface{} { @@ -254,29 +252,31 @@ func (i *Interp) _open(c interface{}, a []interface{}) interface{} { return bbf } -var _ Value = Buffer{} -var _ ToBuffer = Buffer{} +var _ Value = Binary{} +var _ ToBinary = Binary{} -type Buffer struct { +type Binary struct { br bitio.ReaderAtSeeker r ranges.Range unit int + pad int64 } -func newBufferFromBuffer(br bitio.ReaderAtSeeker, unit int) (Buffer, error) { +func newBinaryFromBitReader(br bitio.ReaderAtSeeker, unit int, pad int64) (Binary, error) { l, err := bitioextra.Len(br) if err != nil { - return Buffer{}, err + return Binary{}, err } - return Buffer{ + return Binary{ br: br, r: ranges.Range{Start: 0, Len: l}, unit: unit, + pad: pad, }, nil } -func (b Buffer) toBytesBuffer(r ranges.Range) (*bytes.Buffer, error) { +func (b Binary) toBytesBuffer(r ranges.Range) (*bytes.Buffer, error) { br, err := bitioextra.Range(b.br, r.Start, r.Len) if err != nil { return nil, err @@ -289,9 +289,9 @@ func (b Buffer) toBytesBuffer(r ranges.Range) (*bytes.Buffer, error) { return buf, nil } -func (Buffer) ExtType() string { return "buffer" } +func (Binary) ExtType() string { return "binary" } -func (Buffer) ExtKeys() []string { +func (Binary) ExtKeys() []string { return []string{ "size", "start", @@ -301,18 +301,18 @@ func (Buffer) ExtKeys() []string { } } -func (b Buffer) ToBuffer() (Buffer, error) { +func (b Binary) ToBinary() (Binary, error) { return b, nil } -func (b Buffer) JQValueLength() interface{} { +func (b Binary) JQValueLength() interface{} { return int(b.r.Len / int64(b.unit)) } -func (b Buffer) JQValueSliceLen() interface{} { +func (b Binary) JQValueSliceLen() interface{} { return b.JQValueLength() } -func (b Buffer) JQValueIndex(index int) interface{} { +func (b Binary) JQValueIndex(index int) interface{} { if index < 0 { return nil } @@ -326,17 +326,17 @@ func (b Buffer) JQValueIndex(index int) interface{} { return new(big.Int).Rsh(new(big.Int).SetBytes(buf.Bytes()), extraBits) } -func (b Buffer) JQValueSlice(start int, end int) interface{} { +func (b Binary) JQValueSlice(start int, end int) interface{} { rStart := int64(start * b.unit) rLen := int64((end - start) * b.unit) - return Buffer{ + return Binary{ br: b.br, r: ranges.Range{Start: b.r.Start + rStart, Len: rLen}, unit: b.unit, } } -func (b Buffer) JQValueKey(name string) interface{} { +func (b Binary) JQValueKey(name string) interface{} { switch name { case "size": return new(big.Int).SetInt64(b.r.Len / int64(b.unit)) @@ -353,28 +353,28 @@ func (b Buffer) JQValueKey(name string) interface{} { if b.unit == 1 { return b } - return Buffer{br: b.br, r: b.r, unit: 1} + return Binary{br: b.br, r: b.r, unit: 1} case "bytes": if b.unit == 8 { return b } - return Buffer{br: b.br, r: b.r, unit: 8} + return Binary{br: b.br, r: b.r, unit: 8} } return nil } -func (b Buffer) JQValueEach() interface{} { +func (b Binary) JQValueEach() interface{} { return nil } -func (b Buffer) JQValueType() string { - return "buffer" +func (b Binary) JQValueType() string { + return "binary" } -func (b Buffer) JQValueKeys() interface{} { - return gojqextra.FuncTypeNameError{Name: "keys", Typ: "buffer"} +func (b Binary) JQValueKeys() interface{} { + return gojqextra.FuncTypeNameError{Name: "keys", Typ: "binary"} } -func (b Buffer) JQValueHas(key interface{}) interface{} { - return gojqextra.HasKeyTypeError{L: "buffer", R: fmt.Sprintf("%v", key)} +func (b Binary) JQValueHas(key interface{}) interface{} { + return gojqextra.HasKeyTypeError{L: "binary", R: fmt.Sprintf("%v", key)} } -func (b Buffer) JQValueToNumber() interface{} { +func (b Binary) JQValueToNumber() interface{} { buf, err := b.toBytesBuffer(b.r) if err != nil { return err @@ -382,23 +382,23 @@ func (b Buffer) JQValueToNumber() interface{} { extraBits := uint((8 - b.r.Len%8) % 8) return new(big.Int).Rsh(new(big.Int).SetBytes(buf.Bytes()), extraBits) } -func (b Buffer) JQValueToString() interface{} { +func (b Binary) JQValueToString() interface{} { return b.JQValueToGoJQ() } -func (b Buffer) JQValueToGoJQ() interface{} { +func (b Binary) JQValueToGoJQ() interface{} { buf, err := b.toBytesBuffer(b.r) if err != nil { return err } return buf.String() } -func (b Buffer) JQValueUpdate(key interface{}, u interface{}, delpath bool) interface{} { - return gojqextra.NonUpdatableTypeError{Key: fmt.Sprintf("%v", key), Typ: "buffer"} +func (b Binary) JQValueUpdate(key interface{}, u interface{}, delpath bool) interface{} { + return gojqextra.NonUpdatableTypeError{Key: fmt.Sprintf("%v", key), Typ: "binary"} } -func (b Buffer) Display(w io.Writer, opts Options) error { +func (b Binary) Display(w io.Writer, opts Options) error { if opts.RawOutput { - br, err := b.toBuffer() + br, err := b.toReader() if err != nil { return err } @@ -413,6 +413,13 @@ func (b Buffer) Display(w io.Writer, opts Options) error { return hexdump(w, b, opts) } -func (b Buffer) toBuffer() (bitio.ReaderAtSeeker, error) { - return bitioextra.Range(b.br, b.r.Start, b.r.Len) +func (b Binary) toReader() (bitio.ReaderAtSeeker, error) { + br, err := bitioextra.Range(b.br, b.r.Start, b.r.Len) + if err != nil { + return nil, err + } + if b.pad == 0 { + return br, nil + } + return bitio.NewMultiReader(bitioextra.NewZeroAtSeeker(b.pad), br) } diff --git a/pkg/interp/binary.jq b/pkg/interp/binary.jq new file mode 100644 index 00000000..4866e5f2 --- /dev/null +++ b/pkg/interp/binary.jq @@ -0,0 +1,8 @@ +def tobits: _tobits(1; false; 0); +def tobytes: _tobits(8; false; 0); +def tobitsrange: _tobits(1; true; 0); +def tobytesrange: _tobits(8; true; 0); +def tobits($pad): _tobits(1; false; $pad); +def tobytes($pad): _tobits(8; false; $pad); +def tobitsrange($pad): _tobits(1; true; $pad); +def tobytesrange($pad): _tobits(8; true; $pad); \ No newline at end of file diff --git a/pkg/interp/buffer.jq b/pkg/interp/buffer.jq deleted file mode 100644 index 05407cb9..00000000 --- a/pkg/interp/buffer.jq +++ /dev/null @@ -1,4 +0,0 @@ -def tobitsrange: _tobitsrange; -def tobytesrange: _tobitsrange(8); -def tobits: _tobitsrange(1; false); -def tobytes: _tobitsrange(8; false); diff --git a/pkg/interp/decode.go b/pkg/interp/decode.go index 7aefe578..6d0e022d 100644 --- a/pkg/interp/decode.go +++ b/pkg/interp/decode.go @@ -44,7 +44,7 @@ func (err expectedExtkeyError) Error() string { // used by _isDecodeValue type DecodeValue interface { Value - ToBuffer + ToBinary DecodeValue() *decode.Value } @@ -188,7 +188,7 @@ func (i *Interp) _decode(c interface{}, a []interface{}) interface{} { } } - bv, err := toBuffer(c) + bv, err := toBinary(c) if err != nil { return err } @@ -276,7 +276,7 @@ func makeDecodeValue(dv *decode.Value) interface{} { switch vv := vv.Value().(type) { case bitio.ReaderAtSeeker: // is lazy so that in situations where the decode value is only used to - // create another buffer we don't have to read and create a string, ex: + // create another binary we don't have to read and create a string, ex: // .unknown0 | tobytes[1:] | ... return decodeValue{ JQValue: &gojqextra.Lazy{ @@ -364,8 +364,8 @@ func (dvb decodeValueBase) DecodeValue() *decode.Value { } func (dvb decodeValueBase) Display(w io.Writer, opts Options) error { return dump(dvb.dv, w, opts) } -func (dvb decodeValueBase) ToBuffer() (Buffer, error) { - return Buffer{br: dvb.dv.RootBitBuf, r: dvb.dv.InnerRange(), unit: 8}, nil +func (dvb decodeValueBase) ToBinary() (Binary, error) { + return Binary{br: dvb.dv.RootReader, r: dvb.dv.InnerRange(), unit: 8}, nil } func (decodeValueBase) ExtType() string { return "decode_value" } func (dvb decodeValueBase) ExtKeys() []string { @@ -479,14 +479,14 @@ func (dvb decodeValueBase) JQValueKey(name string) interface{} { return nil } case "_bits": - return Buffer{ - br: dv.RootBitBuf, + return Binary{ + br: dv.RootReader, r: dv.Range, unit: 1, } case "_bytes": - return Buffer{ - br: dv.RootBitBuf, + return Binary{ + br: dv.RootReader, r: dv.Range, unit: 8, } @@ -543,11 +543,11 @@ func (v decodeValue) JQValueToGoJQEx(optsFn func() Options) interface{} { return v.JQValueToGoJQ() } - bv, err := v.decodeValueBase.ToBuffer() + bv, err := v.decodeValueBase.ToBinary() if err != nil { return err } - br, err := bv.toBuffer() + br, err := bv.toReader() if err != nil { return err } diff --git a/pkg/interp/dump.go b/pkg/interp/dump.go index 7552c793..8593d0ed 100644 --- a/pkg/interp/dump.go +++ b/pkg/interp/dump.go @@ -219,7 +219,7 @@ func dumpEx(v *decode.Value, buf []byte, cw *columnwriter.Writer, depth int, roo printErrs(depth, valueErr) } - rootBitLen, err := bitioextra.Len(rootV.RootBitBuf) + rootBitLen, err := bitioextra.Len(rootV.RootReader) if err != nil { return err } @@ -267,7 +267,7 @@ func dumpEx(v *decode.Value, buf []byte, cw *columnwriter.Writer, depth int, roo cfmt(colAddr, "%s%s\n", rootIndent, deco.DumpAddr.F(mathextra.PadFormatInt(startLineByte, opts.AddrBase, true, addrWidth))) - vBR, err := bitioextra.Range(rootV.RootBitBuf, startByte*8, displaySizeBits) + vBR, err := bitioextra.Range(rootV.RootReader, startByte*8, displaySizeBits) if err != nil { return err } @@ -364,8 +364,8 @@ func dump(v *decode.Value, w io.Writer, opts Options) error { })) } -func hexdump(w io.Writer, bv Buffer, opts Options) error { - br, err := bv.toBuffer() +func hexdump(w io.Writer, bv Binary, opts Options) error { + br, err := bv.toReader() if err != nil { return err } @@ -389,7 +389,7 @@ func hexdump(w io.Writer, bv Buffer, opts Options) error { // TODO: hack V: &scalar.S{Actual: br}, Range: bv.r, - RootBitBuf: biib, + RootReader: biib, }, w, opts, diff --git a/pkg/interp/funcs.go b/pkg/interp/funcs.go index b7242a0d..89b11426 100644 --- a/pkg/interp/funcs.go +++ b/pkg/interp/funcs.go @@ -23,26 +23,26 @@ func init() { return []Function{ {"_hexdump", 1, 1, nil, i._hexdump}, - {"hex", 0, 0, makeStringBitBufTransformFn( + {"hex", 0, 0, makeStringBinaryTransformFn( func(r io.Reader) (io.Reader, error) { return hex.NewDecoder(r), nil }, func(r io.Writer) (io.Writer, error) { return hex.NewEncoder(r), nil }, ), nil}, - {"base64", 0, 0, makeStringBitBufTransformFn( + {"base64", 0, 0, makeStringBinaryTransformFn( func(r io.Reader) (io.Reader, error) { return base64.NewDecoder(base64.StdEncoding, r), nil }, func(r io.Writer) (io.Writer, error) { return base64.NewEncoder(base64.StdEncoding, r), nil }, ), nil}, - {"rawbase64", 0, 0, makeStringBitBufTransformFn( + {"rawbase64", 0, 0, makeStringBinaryTransformFn( func(r io.Reader) (io.Reader, error) { return base64.NewDecoder(base64.RawURLEncoding, r), nil }, func(r io.Writer) (io.Writer, error) { return base64.NewEncoder(base64.RawURLEncoding, r), nil }, ), nil}, - {"urlbase64", 0, 0, makeStringBitBufTransformFn( + {"urlbase64", 0, 0, makeStringBinaryTransformFn( func(r io.Reader) (io.Reader, error) { return base64.NewDecoder(base64.URLEncoding, r), nil }, func(r io.Writer) (io.Writer, error) { return base64.NewEncoder(base64.URLEncoding, r), nil }, ), nil}, - {"nal_unescape", 0, 0, makeBitBufTransformFn(func(r io.Reader) (io.Reader, error) { + {"nal_unescape", 0, 0, makeBinaryTransformFn(func(r io.Reader) (io.Reader, error) { return &decode.NALUnescapeReader{Reader: r}, nil }), nil}, @@ -57,15 +57,15 @@ func init() { }) } -// transform byte string <-> buffer using fn:s -func makeStringBitBufTransformFn( +// transform byte string <-> binary using fn:s +func makeStringBinaryTransformFn( decodeFn func(r io.Reader) (io.Reader, error), encodeFn func(w io.Writer) (io.Writer, error), ) func(c interface{}, a []interface{}) interface{} { return func(c interface{}, a []interface{}) interface{} { switch c := c.(type) { case string: - br, err := toBitBuf(c) + br, err := toBitReader(c) if err != nil { return err } @@ -80,13 +80,13 @@ func makeStringBitBufTransformFn( return err } - bb, err := newBufferFromBuffer(bitio.NewBitReader(buf.Bytes(), -1), 8) + bb, err := newBinaryFromBitReader(bitio.NewBitReader(buf.Bytes(), -1), 8, 0) if err != nil { return err } return bb default: - br, err := toBitBuf(c) + br, err := toBitReader(c) if err != nil { return err } @@ -110,10 +110,10 @@ func makeStringBitBufTransformFn( } } -// transform to buffer using fn -func makeBitBufTransformFn(fn func(r io.Reader) (io.Reader, error)) func(c interface{}, a []interface{}) interface{} { +// transform to binary using fn +func makeBinaryTransformFn(fn func(r io.Reader) (io.Reader, error)) func(c interface{}, a []interface{}) interface{} { return func(c interface{}, a []interface{}) interface{} { - inBR, err := toBitBuf(c) + inBR, err := toBitReader(c) if err != nil { return err } @@ -130,7 +130,7 @@ func makeBitBufTransformFn(fn func(r io.Reader) (io.Reader, error)) func(c inter outBR := bitio.NewBitReader(outBuf.Bytes(), -1) - bb, err := newBufferFromBuffer(outBR, 8) + bb, err := newBinaryFromBitReader(outBR, 8, 0) if err != nil { return err } @@ -138,10 +138,10 @@ func makeBitBufTransformFn(fn func(r io.Reader) (io.Reader, error)) func(c inter } } -// transform to buffer using fn +// transform to binary using fn func makeHashFn(fn func() (hash.Hash, error)) func(c interface{}, a []interface{}) interface{} { return func(c interface{}, a []interface{}) interface{} { - inBR, err := toBitBuf(c) + inBR, err := toBitReader(c) if err != nil { return err } @@ -156,7 +156,7 @@ func makeHashFn(fn func() (hash.Hash, error)) func(c interface{}, a []interface{ outBR := bitio.NewBitReader(h.Sum(nil), -1) - bb, err := newBufferFromBuffer(outBR, 8) + bb, err := newBinaryFromBitReader(outBR, 8, 0) if err != nil { return err } @@ -234,7 +234,7 @@ func (i *Interp) aesCtr(c interface{}, a []interface{}) interface{} { ivBytes = make([]byte, block.BlockSize()) } - br, err := toBitBuf(c) + br, err := toBitReader(c) if err != nil { return err } @@ -245,7 +245,7 @@ func (i *Interp) aesCtr(c interface{}, a []interface{}) interface{} { return err } - bb, err := newBufferFromBuffer(bitio.NewBitReader(buf.Bytes(), -1), 8) + bb, err := newBinaryFromBitReader(bitio.NewBitReader(buf.Bytes(), -1), 8, 0) if err != nil { return err } @@ -254,7 +254,7 @@ func (i *Interp) aesCtr(c interface{}, a []interface{}) interface{} { func (i *Interp) _hexdump(c interface{}, a []interface{}) gojq.Iter { opts := i.Options(a[0]) - bv, err := toBuffer(c) + bv, err := toBinary(c) if err != nil { return gojq.NewIter(err) } diff --git a/pkg/interp/interp.go b/pkg/interp/interp.go index 4153326e..2937771f 100644 --- a/pkg/interp/interp.go +++ b/pkg/interp/interp.go @@ -35,7 +35,7 @@ import ( //go:embed interp.jq //go:embed internal.jq //go:embed options.jq -//go:embed buffer.jq +//go:embed binary.jq //go:embed decode.jq //go:embed match.jq //go:embed funcs.jq @@ -270,7 +270,7 @@ func toBigInt(v interface{}) (*big.Int, error) { func toBytes(v interface{}) ([]byte, error) { switch v := v.(type) { default: - br, err := toBitBuf(v) + br, err := toBitReader(v) if err != nil { return nil, fmt.Errorf("value is not bytes") } diff --git a/pkg/interp/interp.jq b/pkg/interp/interp.jq index a808722b..1271df14 100644 --- a/pkg/interp/interp.jq +++ b/pkg/interp/interp.jq @@ -1,6 +1,6 @@ include "internal"; include "options"; -include "buffer"; +include "binary"; include "decode"; include "match"; include "funcs"; diff --git a/pkg/interp/match.go b/pkg/interp/match.go index cfb80816..6b0159d1 100644 --- a/pkg/interp/match.go +++ b/pkg/interp/match.go @@ -15,15 +15,15 @@ import ( func init() { functionRegisterFns = append(functionRegisterFns, func(i *Interp) []Function { return []Function{ - {"_match_buffer", 1, 2, nil, i._bufferMatch}, + {"_match_binary", 1, 2, nil, i._binaryMatch}, } }) } -func (i *Interp) _bufferMatch(c interface{}, a []interface{}) gojq.Iter { +func (i *Interp) _binaryMatch(c interface{}, a []interface{}) gojq.Iter { var ok bool - bv, err := toBuffer(c) + bv, err := toBinary(c) if err != nil { return gojq.NewIter(err) } @@ -70,7 +70,7 @@ func (i *Interp) _bufferMatch(c interface{}, a []interface{}) gojq.Iter { } sreNames := sre.SubexpNames() - br, err := bv.toBuffer() + br, err := bv.toReader() if err != nil { return gojq.NewIter(err) } @@ -92,7 +92,7 @@ func (i *Interp) _bufferMatch(c interface{}, a []interface{}) gojq.Iter { var off int64 prevOff := int64(-1) return iterFn(func() (interface{}, bool) { - // TODO: correct way to handle empty match for buffer, move one byte forward? + // TODO: correct way to handle empty match for binary, move one byte forward? // > "asdasd" | [match(""; "g")], [(tobytes | match(""; "g"))] | length // 7 // 1 @@ -127,7 +127,7 @@ func (i *Interp) _bufferMatch(c interface{}, a []interface{}) gojq.Iter { if start != -1 { matchBitOff := (off + int64(start)) * 8 matchLength := int64(end-start) * 8 - bbo := Buffer{ + bbo := Binary{ br: bv.br, r: ranges.Range{ Start: bv.r.Start + matchBitOff, diff --git a/pkg/interp/match.jq b/pkg/interp/match.jq index 30c39f64..32574505 100644 --- a/pkg/interp/match.jq +++ b/pkg/interp/match.jq @@ -1,10 +1,10 @@ -def _buffer_fn(f): +def _binary_fn(f): ( . as $c | tobytesrange | f ); -def _buffer_try_orig(bfn; fn): +def _binary_try_orig(bfn; fn): ( . as $c | if type == "string" then fn else @@ -15,27 +15,27 @@ def _buffer_try_orig(bfn; fn): end ); -# overloads to support buffer +# overloads to support binary def _orig_test($val): test($val); def _orig_test($regex; $flags): test($regex; $flags); -def _test_buffer($regex; $flags): - ( isempty(_match_buffer($regex; $flags)) +def _test_binary($regex; $flags): + ( isempty(_match_binary($regex; $flags)) | not ); -def test($val): _buffer_try_orig(_test_buffer($val; ""); _orig_test($val)); -def test($regex; $flags): _buffer_try_orig(_test_buffer($regex; $flags); _orig_test($regex; $flags)); +def test($val): _binary_try_orig(_test_binary($val; ""); _orig_test($val)); +def test($regex; $flags): _binary_try_orig(_test_binary($regex; $flags); _orig_test($regex; $flags)); def _orig_match($val): match($val); def _orig_match($regex; $flags): match($regex; $flags); -def match($val): _buffer_try_orig(_match_buffer($val); _orig_match($val)); -def match($regex; $flags): _buffer_try_orig(_match_buffer($regex; $flags); _orig_match($regex; $flags)); +def match($val): _binary_try_orig(_match_binary($val); _orig_match($val)); +def match($regex; $flags): _binary_try_orig(_match_binary($regex; $flags); _orig_match($regex; $flags)); def _orig_capture($val): capture($val); def _orig_capture($regex; $flags): capture($regex; $flags); -def _capture_buffer($regex; $flags): +def _capture_binary($regex; $flags): ( . as $b - | _match_buffer($regex; $flags) + | _match_binary($regex; $flags) | .captures | map( ( select(.name) @@ -44,25 +44,25 @@ def _capture_buffer($regex; $flags): ) | from_entries ); -def capture($val): _buffer_try_orig(_capture_buffer($val; ""); _orig_capture($val)); -def capture($regex; $flags): _buffer_try_orig(_capture_buffer($regex; $flags); _orig_capture($regex; $flags)); +def capture($val): _binary_try_orig(_capture_binary($val; ""); _orig_capture($val)); +def capture($regex; $flags): _binary_try_orig(_capture_binary($regex; $flags); _orig_capture($regex; $flags)); def _orig_scan($val): scan($val); def _orig_scan($regex; $flags): scan($regex; $flags); -def _scan_buffer($regex; $flags): +def _scan_binary($regex; $flags): ( . as $b - | _match_buffer($regex; $flags) + | _match_binary($regex; $flags) | $b[.offset:.offset+.length] ); -def scan($val): _buffer_try_orig(_scan_buffer($val; "g"); _orig_scan($val)); -def scan($regex; $flags): _buffer_try_orig(_scan_buffer($regex; "g"+$flags); _orig_scan($regex; $flags)); +def scan($val): _binary_try_orig(_scan_binary($val; "g"); _orig_scan($val)); +def scan($regex; $flags): _binary_try_orig(_scan_binary($regex; "g"+$flags); _orig_scan($regex; $flags)); def _orig_splits($val): splits($val); def _orig_splits($regex; $flags): splits($regex; $flags); -def _splits_buffer($regex; $flags): +def _splits_binary($regex; $flags): ( . as $b - # last null output is to do a last iteration that output from end of last match to end of buffer - | foreach (_match_buffer($regex; $flags), null) as $m ( + # last null output is to do a last iteration that output from end of last match to end of binary + | foreach (_match_binary($regex; $flags), null) as $m ( {prev: null, curr: null}; ( .prev = .curr | .curr = $m @@ -73,8 +73,8 @@ def _splits_buffer($regex; $flags): end ) ); -def splits($val): _buffer_try_orig(_splits_buffer($val; "g"); _orig_splits($val)); -def splits($regex; $flags): _buffer_try_orig(_splits_buffer($regex; "g"+$flags); _orig_splits($regex; $flags)); +def splits($val): _binary_try_orig(_splits_binary($val; "g"); _orig_splits($val)); +def splits($regex; $flags): _binary_try_orig(_splits_binary($regex; "g"+$flags); _orig_splits($regex; $flags)); # same as regexp.QuoteMeta def _quote_meta: @@ -87,11 +87,11 @@ def split($val): [splits($val | _quote_meta)]; def split($regex; $flags): [splits($regex; $flags)]; # TODO: rename -# same as scan but outputs buffer from start of match to end of buffer +# same as scan but outputs binary from start of match to end of binary def _scan_toend($regex; $flags): ( . as $b - | _match_buffer($regex; $flags) + | _match_binary($regex; $flags) | $b[.offset:] ); -def scan_toend($val): _buffer_fn(_scan_toend($val; "g")); -def scan_toend($regex; $flags): _buffer_fn(_scan_toend($regex; "g"+$flags)); +def scan_toend($val): _binary_fn(_scan_toend($val; "g")); +def scan_toend($regex; $flags): _binary_fn(_scan_toend($regex; "g"+$flags)); diff --git a/pkg/interp/testdata/buffer.fqtest b/pkg/interp/testdata/buffer.fqtest index 7678abf8..364d37a6 100644 --- a/pkg/interp/testdata/buffer.fqtest +++ b/pkg/interp/testdata/buffer.fqtest @@ -51,9 +51,9 @@ mp3> [1, 2, 3, [1, 2, 3], .headers[0].magic] | tobytes |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0|01 02 03 01 02 03 49 44 33| |......ID3| |.: raw bits 0x0-0x8.7 (9) mp3> [-1] | tobytes -error: buffer byte list must be bytes (0-255) got -1 +error: byte in binary list must be bytes (0-255) got -1 mp3> [256] | tobytes -error: buffer byte list must be bytes (0-255) got 256 +error: byte in binary list must be bytes (0-255) got 256 mp3> ^D $ fq -d mp3 -i . /test.mp3 mp3> .frames[1] | tobits | ., .start, .stop, .size, .[4:17], (tobits, tobytes, tobitsrange, tobytesrange | ., .start, .stop, .size, .[4:17]) @@ -256,41 +256,59 @@ mp3> "fq" | tobits | chunk(range(17)+1) | tobytes | tostring "fq" "fq" "fq" -mp3> range(17) | [range(.) | 1 | tobits] | tobytes - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| - | | |.: raw bits 0x0-NA (0) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|80| |.| |.: raw bits 0x0-0x0 (0.1) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|c0| |.| |.: raw bits 0x0-0x0.1 (0.2) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|e0| |.| |.: raw bits 0x0-0x0.2 (0.3) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|f0| |.| |.: raw bits 0x0-0x0.3 (0.4) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|f8| |.| |.: raw bits 0x0-0x0.4 (0.5) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|fc| |.| |.: raw bits 0x0-0x0.5 (0.6) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|fe| |.| |.: raw bits 0x0-0x0.6 (0.7) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|ff| |.| |.: raw bits 0x0-0x0.7 (1) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|ff 80| |..| |.: raw bits 0x0-0x1 (1.1) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|ff c0| |..| |.: raw bits 0x0-0x1.1 (1.2) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|ff e0| |..| |.: raw bits 0x0-0x1.2 (1.3) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|ff f0| |..| |.: raw bits 0x0-0x1.3 (1.4) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|ff f8| |..| |.: raw bits 0x0-0x1.4 (1.5) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|ff fc| |..| |.: raw bits 0x0-0x1.5 (1.6) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|ff fe| |..| |.: raw bits 0x0-0x1.6 (1.7) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|ff ff| |..| |.: raw bits 0x0-0x1.7 (2) +mp3> 1 | tobits(range(10)) | hex +"80" +"80" +"40" +"20" +"10" +"08" +"04" +"02" +"01" +"0080" +mp3> 1 | tobytes(range(5)) | hex +"01" +"01" +"0001" +"000001" +"00000001" +mp3> range(17) | [range(.) | 1 | tobits] | tobits | hex +"" +"80" +"c0" +"e0" +"f0" +"f8" +"fc" +"fe" +"ff" +"ff80" +"ffc0" +"ffe0" +"fff0" +"fff8" +"fffc" +"fffe" +"ffff" +mp3> range(17) | [range(.) | 1 | tobits] | tobytes | hex +"" +"01" +"03" +"07" +"0f" +"1f" +"3f" +"7f" +"ff" +"01ff" +"03ff" +"07ff" +"0fff" +"1fff" +"3fff" +"7fff" +"ffff" mp3> "c9dfdac2f6ef68e5db666b6fbeee66d9c7deda66bebfbfe860bfbfbfe9d1636bbfbebf" | hex | tobits | reduce chunk(8)[] as $c ({h:[],g:[]}; .h += [(0|tobits), $c[0:7]] | .g |= . + [if length % 8 == 0 then (0|tobits) else empty end, $c[7:8]]) | .h, .g | tobytes |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x00|64 6f 6d 61 7b 77 34 72 6d 33 35 37 5f 77 33 6c|doma{w4rm357_w3l|.: raw bits 0x0-0x22.7 (35) diff --git a/pkg/interp/testdata/value_array.fqtest b/pkg/interp/testdata/value_array.fqtest index af447ae3..087e0f24 100644 --- a/pkg/interp/testdata/value_array.fqtest +++ b/pkg/interp/testdata/value_array.fqtest @@ -166,14 +166,14 @@ mp3> .headers._bits | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x00|49 44 33 04 00 00 00 00 00 23 54 53 53 45 00 00|ID3......#TSSE..|.: raw bits 0x0-0x2c.7 (45) * |until 0x2c.7 (45) | | -"buffer" +"binary" 360 mp3> mp3> .headers._bytes | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x00|49 44 33 04 00 00 00 00 00 23 54 53 53 45 00 00|ID3......#TSSE..|.: raw bits 0x0-0x2c.7 (45) * |until 0x2c.7 (45) | | -"buffer" +"binary" 45 mp3> mp3> .headers._error | ., type, length? diff --git a/pkg/interp/testdata/value_boolean.fqtest b/pkg/interp/testdata/value_boolean.fqtest index 74d648b7..52fa1672 100644 --- a/pkg/interp/testdata/value_boolean.fqtest +++ b/pkg/interp/testdata/value_boolean.fqtest @@ -72,13 +72,13 @@ mp3> .headers[0].flags.unsynchronisation._path | ., type, length? mp3> .headers[0].flags.unsynchronisation._bits | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0| 00 | . |.: raw bits 0x5-0x5 (0.1) -"buffer" +"binary" 1 mp3> mp3> .headers[0].flags.unsynchronisation._bytes | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0| 00 | . |.: raw bits 0x5-0x5 (0.1) -"buffer" +"binary" 0 mp3> mp3> .headers[0].flags.unsynchronisation._error | ., type, length? diff --git a/pkg/interp/testdata/value_json_array.fqtest b/pkg/interp/testdata/value_json_array.fqtest index 47f30180..a75b11f0 100644 --- a/pkg/interp/testdata/value_json_array.fqtest +++ b/pkg/interp/testdata/value_json_array.fqtest @@ -81,13 +81,13 @@ json> (.)._path | ., type, length? json> (.)._bits | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0|5b 5d| |[]| |.: raw bits 0x0-0x1.7 (2) -"buffer" +"binary" 16 json> json> (.)._bytes | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0|5b 5d| |[]| |.: raw bits 0x0-0x1.7 (2) -"buffer" +"binary" 2 json> json> (.)._error | ., type, length? diff --git a/pkg/interp/testdata/value_json_object.fqtest b/pkg/interp/testdata/value_json_object.fqtest index f3a01f0f..5955dc33 100644 --- a/pkg/interp/testdata/value_json_object.fqtest +++ b/pkg/interp/testdata/value_json_object.fqtest @@ -71,13 +71,13 @@ json> (.)._path | ., type, length? json> (.)._bits | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0|7b 7d| |{}| |.: raw bits 0x0-0x1.7 (2) -"buffer" +"binary" 16 json> json> (.)._bytes | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0|7b 7d| |{}| |.: raw bits 0x0-0x1.7 (2) -"buffer" +"binary" 2 json> json> (.)._error | ., type, length? diff --git a/pkg/interp/testdata/value_null.fqtest b/pkg/interp/testdata/value_null.fqtest index 1618e839..76b26cb2 100644 --- a/pkg/interp/testdata/value_null.fqtest +++ b/pkg/interp/testdata/value_null.fqtest @@ -84,13 +84,13 @@ mp3> .headers[0].padding._path | ., type, length? mp3> .headers[0].padding._bits | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x20| 00 00 00 00 00 00 00 00 00 00 | .......... |.: raw bits 0x23-0x2c.7 (10) -"buffer" +"binary" 80 mp3> mp3> .headers[0].padding._bytes | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x20| 00 00 00 00 00 00 00 00 00 00 | .......... |.: raw bits 0x23-0x2c.7 (10) -"buffer" +"binary" 10 mp3> mp3> .headers[0].padding._error | ., type, length? diff --git a/pkg/interp/testdata/value_number.fqtest b/pkg/interp/testdata/value_number.fqtest index 6a31451d..73e1bab3 100644 --- a/pkg/interp/testdata/value_number.fqtest +++ b/pkg/interp/testdata/value_number.fqtest @@ -72,13 +72,13 @@ mp3> .headers[0].version._path | ., type, length? mp3> .headers[0].version._bits | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0| 04 | . |.: raw bits 0x3-0x3.7 (1) -"buffer" +"binary" 8 mp3> mp3> .headers[0].version._bytes | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0| 04 | . |.: raw bits 0x3-0x3.7 (1) -"buffer" +"binary" 1 mp3> mp3> .headers[0].version._error | ., type, length? diff --git a/pkg/interp/testdata/value_object.fqtest b/pkg/interp/testdata/value_object.fqtest index 6970924e..82f68493 100644 --- a/pkg/interp/testdata/value_object.fqtest +++ b/pkg/interp/testdata/value_object.fqtest @@ -88,13 +88,13 @@ mp3> .headers[0].flags._path | ., type, length? mp3> .headers[0].flags._bits | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0| 00 | . |.: raw bits 0x5-0x5.7 (1) -"buffer" +"binary" 8 mp3> mp3> .headers[0].flags._bytes | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0| 00 | . |.: raw bits 0x5-0x5.7 (1) -"buffer" +"binary" 1 mp3> mp3> .headers[0].flags._error | ., type, length? diff --git a/pkg/interp/testdata/value_string.fqtest b/pkg/interp/testdata/value_string.fqtest index ebdb6f32..6da30283 100644 --- a/pkg/interp/testdata/value_string.fqtest +++ b/pkg/interp/testdata/value_string.fqtest @@ -84,13 +84,13 @@ mp3> .headers[0].magic._path | ., type, length? mp3> .headers[0].magic._bits | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0|49 44 33 |ID3 |.: raw bits 0x0-0x2.7 (3) -"buffer" +"binary" 24 mp3> mp3> .headers[0].magic._bytes | ., type, length? |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0|49 44 33 |ID3 |.: raw bits 0x0-0x2.7 (3) -"buffer" +"binary" 3 mp3> mp3> .headers[0].magic._error | ., type, length?