mirror of
https://github.com/wader/fq.git
synced 2024-11-24 11:16:09 +03:00
06245d1295
Rename buffer to binary. Still some work left what to call buffer/binary in decode code. Document decode value and binary type Fix proper unit padding for tobytes and add still undocumenated extra padding argument. Add some additional binary tests
167 lines
3.4 KiB
Go
167 lines
3.4 KiB
Go
package interp
|
|
|
|
import (
|
|
"io"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/wader/fq/internal/gojqextra"
|
|
"github.com/wader/fq/internal/ioextra"
|
|
"github.com/wader/fq/pkg/bitio"
|
|
"github.com/wader/fq/pkg/ranges"
|
|
"github.com/wader/gojq"
|
|
)
|
|
|
|
func init() {
|
|
functionRegisterFns = append(functionRegisterFns, func(i *Interp) []Function {
|
|
return []Function{
|
|
{"_match_binary", 1, 2, nil, i._binaryMatch},
|
|
}
|
|
})
|
|
}
|
|
|
|
func (i *Interp) _binaryMatch(c interface{}, a []interface{}) gojq.Iter {
|
|
var ok bool
|
|
|
|
bv, err := toBinary(c)
|
|
if err != nil {
|
|
return gojq.NewIter(err)
|
|
}
|
|
|
|
var re string
|
|
var byteRunes bool
|
|
var global bool
|
|
|
|
switch a0 := a[0].(type) {
|
|
case string:
|
|
re = a0
|
|
default:
|
|
reBuf, err := toBytes(a0)
|
|
if err != nil {
|
|
return gojq.NewIter(err)
|
|
}
|
|
var reRs []rune
|
|
for _, b := range reBuf {
|
|
reRs = append(reRs, rune(b))
|
|
}
|
|
byteRunes = true
|
|
// escape paratheses runes etc
|
|
re = regexp.QuoteMeta(string(reRs))
|
|
}
|
|
|
|
var flags string
|
|
if len(a) > 1 {
|
|
flags, ok = a[1].(string)
|
|
if !ok {
|
|
return gojq.NewIter(gojqextra.FuncTypeNameError{Name: "find", Typ: "string"})
|
|
}
|
|
}
|
|
|
|
if strings.Contains(flags, "b") {
|
|
byteRunes = true
|
|
}
|
|
global = strings.Contains(flags, "g")
|
|
|
|
// TODO: err to string
|
|
// TODO: extract to regexpextra? "all" FindReaderSubmatchIndex that can iter?
|
|
sre, err := gojqextra.CompileRegexp(re, "gimb", flags)
|
|
if err != nil {
|
|
return gojq.NewIter(err)
|
|
}
|
|
sreNames := sre.SubexpNames()
|
|
|
|
br, err := bv.toReader()
|
|
if err != nil {
|
|
return gojq.NewIter(err)
|
|
}
|
|
|
|
var rr interface {
|
|
io.RuneReader
|
|
io.Seeker
|
|
}
|
|
// raw bytes regexp matching is a bit tricky, what we do is to read each byte as a codepoint (ByteRuneReader)
|
|
// and then we can use UTF-8 encoded codepoint to match a raw byte. So for example \u00ff (encoded as 0xc3 0xbf)
|
|
// will match the byte \0xff
|
|
if byteRunes {
|
|
// byte mode, read each byte as a rune
|
|
rr = ioextra.ByteRuneReader{RS: bitio.NewIOReadSeeker(br)}
|
|
} else {
|
|
rr = ioextra.RuneReadSeeker{RS: bitio.NewIOReadSeeker(br)}
|
|
}
|
|
|
|
var off int64
|
|
prevOff := int64(-1)
|
|
return iterFn(func() (interface{}, bool) {
|
|
// TODO: correct way to handle empty match for binary, move one byte forward?
|
|
// > "asdasd" | [match(""; "g")], [(tobytes | match(""; "g"))] | length
|
|
// 7
|
|
// 1
|
|
if prevOff == off {
|
|
return nil, false
|
|
}
|
|
|
|
if prevOff != -1 && !global {
|
|
return nil, false
|
|
}
|
|
|
|
_, err = rr.Seek(off, io.SeekStart)
|
|
if err != nil {
|
|
return err, false
|
|
}
|
|
|
|
l := sre.FindReaderSubmatchIndex(rr)
|
|
if l == nil {
|
|
return nil, false
|
|
}
|
|
|
|
var captures []interface{}
|
|
var firstCapture map[string]interface{}
|
|
|
|
for i := 0; i < len(l)/2; i++ {
|
|
start, end := l[i*2], l[i*2+1]
|
|
capture := map[string]interface{}{
|
|
"offset": int(off) + start,
|
|
"length": end - start,
|
|
}
|
|
|
|
if start != -1 {
|
|
matchBitOff := (off + int64(start)) * 8
|
|
matchLength := int64(end-start) * 8
|
|
bbo := Binary{
|
|
br: bv.br,
|
|
r: ranges.Range{
|
|
Start: bv.r.Start + matchBitOff,
|
|
Len: matchLength,
|
|
},
|
|
unit: 8,
|
|
}
|
|
|
|
capture["string"] = bbo
|
|
} else {
|
|
capture["string"] = nil
|
|
}
|
|
|
|
if i > 0 {
|
|
if sreNames[i] != "" {
|
|
capture["name"] = sreNames[i]
|
|
} else {
|
|
capture["name"] = nil
|
|
}
|
|
}
|
|
|
|
if i == 0 {
|
|
firstCapture = capture
|
|
}
|
|
|
|
captures = append(captures, capture)
|
|
}
|
|
|
|
prevOff = off
|
|
off = off + int64(l[1])
|
|
|
|
firstCapture["captures"] = captures[1:]
|
|
|
|
return firstCapture, true
|
|
})
|
|
}
|