From aab32cf2dbd25c6ecf03ea1f63b924867e495d9c Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Sun, 21 Nov 2021 21:27:44 +0100 Subject: [PATCH] gojqextra,interp: Add lazy string to speed usage of decode value buffer where string is not used --- format/bzip2/bzip2.go | 2 +- format/gif/gif.go | 2 +- format/gzip/gzip.go | 2 +- internal/gojqextra/types.go | 60 +++++++++++++++++++++++++++++++++++++ pkg/bitio/buffer.go | 3 +- pkg/interp/bufferrange.go | 4 +-- pkg/interp/dump.go | 6 ++-- pkg/interp/value.go | 21 ++++++++----- 8 files changed, 83 insertions(+), 17 deletions(-) diff --git a/format/bzip2/bzip2.go b/format/bzip2/bzip2.go index cfa0ece0..2074ba17 100644 --- a/format/bzip2/bzip2.go +++ b/format/bzip2/bzip2.go @@ -114,7 +114,7 @@ func bzip2Decode(d *decode.D, in interface{}) interface{} { } blockCRC32W := crc32.NewIEEE() - if _, err := d.Copy(blockCRC32W, bitFlipReader{uncompressedBB.Copy()}); err != nil { + if _, err := d.Copy(blockCRC32W, bitFlipReader{uncompressedBB.Clone()}); err != nil { d.IOPanic(err) } blockCRC32N := bits.Reverse32(binary.BigEndian.Uint32(blockCRC32W.Sum(nil))) diff --git a/format/gif/gif.go b/format/gif/gif.go index 633c1aac..374ba79f 100644 --- a/format/gif/gif.go +++ b/format/gif/gif.go @@ -92,7 +92,7 @@ func gifDecode(d *decode.D, in interface{}) interface{} { d.FieldU8("terminator") seenTerminator = true } - d.MustCopy(dataBytes, b.Copy()) + d.MustCopy(dataBytes, b.Clone()) }) } }) diff --git a/format/gzip/gzip.go b/format/gzip/gzip.go index 48fb3c47..d8598845 100644 --- a/format/gzip/gzip.go +++ b/format/gzip/gzip.go @@ -112,7 +112,7 @@ func gzDecode(d *decode.D, in interface{}) interface{} { d.FieldRawLen("compressed", readCompressedSize) crc32W := crc32.NewIEEE() - if _, err := io.Copy(crc32W, uncompressedBB.Copy()); err != nil { + if _, err := io.Copy(crc32W, uncompressedBB.Clone()); err != nil { d.IOPanic(err) } d.FieldU32("crc32", d.ValidateU(uint64(binary.LittleEndian.Uint32(crc32W.Sum(nil)))), d.Hex) diff --git a/internal/gojqextra/types.go b/internal/gojqextra/types.go index a933f13f..f0418bcb 100644 --- a/internal/gojqextra/types.go +++ b/internal/gojqextra/types.go @@ -241,6 +241,66 @@ func (v String) JQValueToNumber() interface{} { return gojq.NormalizeNumbers(str func (v String) JQValueToString() interface{} { return string(v) } func (v String) JQValueToGoJQ() interface{} { return string(v) } +// lazy string + +var _ gojq.JQValue = &LazyString{} + +type LazyString struct { + Fn func() ([]rune, error) + called bool + rs []rune +} + +func (v *LazyString) wrap(fn func(rs []rune) interface{}) interface{} { + if !v.called { + rs, err := v.Fn() + if err != nil { + return err + } + v.called = true + v.rs = rs + } + return fn(v.rs) +} + +func (v *LazyString) JQValueLength() interface{} { + return v.wrap(func(rs []rune) interface{} { return len(rs) }) +} +func (v *LazyString) JQValueSliceLen() interface{} { + return v.wrap(func(rs []rune) interface{} { return len(rs) }) +} +func (v *LazyString) JQValueIndex(index int) interface{} { + // -1 outside after string, -2 outside before string + if index < 0 { + return "" + } + return v.wrap(func(rs []rune) interface{} { return fmt.Sprintf("%c", rs[index]) }) +} +func (v *LazyString) JQValueSlice(start int, end int) interface{} { + return v.wrap(func(rs []rune) interface{} { return string(rs[start:end]) }) + +} +func (v *LazyString) JQValueKey(name string) interface{} { return ExpectedObjectError{Typ: "string"} } +func (v *LazyString) JQValueUpdate(key interface{}, u interface{}, delpath bool) interface{} { + return expectedArrayOrObject(key, "string") +} +func (v *LazyString) JQValueEach() interface{} { return IteratorError{Typ: "string"} } +func (v *LazyString) JQValueKeys() interface{} { return FuncTypeNameError{Name: "keys", Typ: "string"} } +func (v *LazyString) JQValueHas(key interface{}) interface{} { + return FuncTypeNameError{Name: "has", Typ: "string"} +} +func (v *LazyString) JQValueType() string { return "string" } +func (v *LazyString) JQValueToNumber() interface{} { + return v.wrap(func(rs []rune) interface{} { return gojq.NormalizeNumbers(string(rs)) }) + +} +func (v *LazyString) JQValueToString() interface{} { + return v.wrap(func(rs []rune) interface{} { return string(rs) }) +} +func (v *LazyString) JQValueToGoJQ() interface{} { + return v.wrap(func(rs []rune) interface{} { return string(rs) }) +} + // boolean var _ gojq.JQValue = Boolean(true) diff --git a/pkg/bitio/buffer.go b/pkg/bitio/buffer.go index e233e616..583bc31a 100644 --- a/pkg/bitio/buffer.go +++ b/pkg/bitio/buffer.go @@ -101,7 +101,8 @@ func (b *Buffer) BitBufRange(firstBitOffset int64, nBits int64) (*Buffer, error) }, nil } -func (b *Buffer) Copy() *Buffer { +// Clone buffer and reset position to zero +func (b *Buffer) Clone() *Buffer { return &Buffer{ br: NewSectionBitReader(b.br, 0, b.bitLen), bitLen: b.bitLen, diff --git a/pkg/interp/bufferrange.go b/pkg/interp/bufferrange.go index 9abd1a5c..99df8102 100644 --- a/pkg/interp/bufferrange.go +++ b/pkg/interp/bufferrange.go @@ -184,7 +184,7 @@ func (bv BufferRange) toBytesBuffer(r ranges.Range) (*bytes.Buffer, error) { return nil, err } buf := &bytes.Buffer{} - if _, err := io.Copy(buf, bb.Copy()); err != nil { + if _, err := io.Copy(buf, bb.Clone()); err != nil { return nil, err } return buf, nil @@ -300,7 +300,7 @@ func (bv BufferRange) Display(w io.Writer, opts Options) error { if err != nil { return err } - if _, err := io.Copy(w, bb.Copy()); err != nil { + if _, err := io.Copy(w, bb.Clone()); err != nil { return err } return nil diff --git a/pkg/interp/dump.go b/pkg/interp/dump.go index 3d98b6a3..fe6b3e0d 100644 --- a/pkg/interp/dump.go +++ b/pkg/interp/dump.go @@ -265,13 +265,13 @@ func dumpEx(v *decode.Value, buf []byte, cw *columnwriter.Writer, depth int, roo if vBitBuf != nil { if _, err := io.CopyBuffer( hexpairwriter.New(cw.Columns[colHex], opts.LineBytes, int(startLineByteOffset), hexpairFn), - io.LimitReader(vBitBuf.Copy(), displaySizeBytes), + io.LimitReader(vBitBuf.Clone(), displaySizeBytes), buf); err != nil { return err } if _, err := io.CopyBuffer( asciiwriter.New(cw.Columns[colASCII], opts.LineBytes, int(startLineByteOffset), asciiFn), - io.LimitReader(vBitBuf.Copy(), displaySizeBytes), + io.LimitReader(vBitBuf.Clone(), displaySizeBytes), buf); err != nil { return err } @@ -359,7 +359,7 @@ func hexdump(w io.Writer, bv BufferRange, opts Options) error { // TODO: hack V: decode.Scalar{Actual: bb}, Range: bv.r, - RootBitBuf: bv.bb.Copy(), + RootBitBuf: bv.bb.Clone(), }, w, opts, diff --git a/pkg/interp/value.go b/pkg/interp/value.go index 43ebf510..0bc1df30 100644 --- a/pkg/interp/value.go +++ b/pkg/interp/value.go @@ -191,14 +191,19 @@ func makeDecodeValue(dv *decode.Value) interface{} { case decode.Scalar: switch vv := vv.Value().(type) { case *bitio.Buffer: - buf := &bytes.Buffer{} - if _, err := io.Copy(buf, vv.Copy()); err != nil { - return err - } - // TODO: split *bitio.Buffer into just marker (bit range in root bitbuf) - // or *bitio.Buffer if actually other bitbuf + // is lazy so that in situations where the decode value is only used to + // create another buffer we don't have to read and create a string, ex: + // .unknown0 | tobytes[1:] | ... return decodeValue{ - JQValue: gojqextra.String(buf.String()), + JQValue: &gojqextra.LazyString{ + Fn: func() ([]rune, error) { + buf := &bytes.Buffer{} + if _, err := io.Copy(buf, vv.Clone()); err != nil { + return nil, err + } + return []rune(buf.String()), nil + }, + }, decodeValueBase: decodeValueBase{dv}, bitsFormat: true, } @@ -446,7 +451,7 @@ func (v decodeValue) JQValueToGoJQEx(optsFn func() Options) interface{} { return err } - s, err := optsFn().BitsFormatFn(bb.Copy()) + s, err := optsFn().BitsFormatFn(bb.Clone()) if err != nil { return err }