mirror of
https://github.com/wader/fq.git
synced 2024-12-18 02:41:44 +03:00
e9d9f8aef9
Rename s/interface{}/any/g Preparation for using generics in decode API and native jq funcations etc Remove some unused linter ignores as linter has been fixed
167 lines
3.3 KiB
Go
167 lines
3.3 KiB
Go
package interp
|
|
|
|
import (
|
|
"io"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"github.com/wader/fq/internal/gojqextra"
|
|
"github.com/wader/fq/internal/ioextra"
|
|
"github.com/wader/fq/pkg/bitio"
|
|
"github.com/wader/fq/pkg/ranges"
|
|
"github.com/wader/gojq"
|
|
)
|
|
|
|
func init() {
|
|
functionRegisterFns = append(functionRegisterFns, func(i *Interp) []Function {
|
|
return []Function{
|
|
{"_match_binary", 1, 2, nil, i._binaryMatch},
|
|
}
|
|
})
|
|
}
|
|
|
|
func (i *Interp) _binaryMatch(c any, a []any) gojq.Iter {
|
|
var ok bool
|
|
|
|
bv, err := toBinary(c)
|
|
if err != nil {
|
|
return gojq.NewIter(err)
|
|
}
|
|
|
|
var re string
|
|
var byteRunes bool
|
|
var global bool
|
|
|
|
switch a0 := a[0].(type) {
|
|
case string:
|
|
re = a0
|
|
default:
|
|
reBuf, err := toBytes(a0)
|
|
if err != nil {
|
|
return gojq.NewIter(err)
|
|
}
|
|
var reRs []rune
|
|
for _, b := range reBuf {
|
|
reRs = append(reRs, rune(b))
|
|
}
|
|
byteRunes = true
|
|
// escape paratheses runes etc
|
|
re = regexp.QuoteMeta(string(reRs))
|
|
}
|
|
|
|
var flags string
|
|
if len(a) > 1 {
|
|
flags, ok = a[1].(string)
|
|
if !ok {
|
|
return gojq.NewIter(gojqextra.FuncTypeNameError{Name: "find", Typ: "string"})
|
|
}
|
|
}
|
|
|
|
if strings.Contains(flags, "b") {
|
|
byteRunes = true
|
|
}
|
|
global = strings.Contains(flags, "g")
|
|
|
|
// TODO: err to string
|
|
// TODO: extract to regexpextra? "all" FindReaderSubmatchIndex that can iter?
|
|
sre, err := gojqextra.CompileRegexp(re, "gimb", flags)
|
|
if err != nil {
|
|
return gojq.NewIter(err)
|
|
}
|
|
sreNames := sre.SubexpNames()
|
|
|
|
br, err := bv.toReader()
|
|
if err != nil {
|
|
return gojq.NewIter(err)
|
|
}
|
|
|
|
var rr interface {
|
|
io.RuneReader
|
|
io.Seeker
|
|
}
|
|
// raw bytes regexp matching is a bit tricky, what we do is to read each byte as a codepoint (ByteRuneReader)
|
|
// and then we can use UTF-8 encoded codepoint to match a raw byte. So for example \u00ff (encoded as 0xc3 0xbf)
|
|
// will match the byte \0xff
|
|
if byteRunes {
|
|
// byte mode, read each byte as a rune
|
|
rr = ioextra.ByteRuneReader{RS: bitio.NewIOReadSeeker(br)}
|
|
} else {
|
|
rr = ioextra.RuneReadSeeker{RS: bitio.NewIOReadSeeker(br)}
|
|
}
|
|
|
|
var off int64
|
|
prevOff := int64(-1)
|
|
return iterFn(func() (any, bool) {
|
|
// TODO: correct way to handle empty match for binary, move one byte forward?
|
|
// > "asdasd" | [match(""; "g")], [(tobytes | match(""; "g"))] | length
|
|
// 7
|
|
// 1
|
|
if prevOff == off {
|
|
return nil, false
|
|
}
|
|
|
|
if prevOff != -1 && !global {
|
|
return nil, false
|
|
}
|
|
|
|
_, err = rr.Seek(off, io.SeekStart)
|
|
if err != nil {
|
|
return err, false
|
|
}
|
|
|
|
l := sre.FindReaderSubmatchIndex(rr)
|
|
if l == nil {
|
|
return nil, false
|
|
}
|
|
|
|
var captures []any
|
|
var firstCapture map[string]any
|
|
|
|
for i := 0; i < len(l)/2; i++ {
|
|
start, end := l[i*2], l[i*2+1]
|
|
capture := map[string]any{
|
|
"offset": int(off) + start,
|
|
"length": end - start,
|
|
}
|
|
|
|
if start != -1 {
|
|
matchBitOff := (off + int64(start)) * 8
|
|
matchLength := int64(end-start) * 8
|
|
bbo := Binary{
|
|
br: bv.br,
|
|
r: ranges.Range{
|
|
Start: bv.r.Start + matchBitOff,
|
|
Len: matchLength,
|
|
},
|
|
unit: 8,
|
|
}
|
|
|
|
capture["string"] = bbo
|
|
} else {
|
|
capture["string"] = nil
|
|
}
|
|
|
|
if i > 0 {
|
|
if sreNames[i] != "" {
|
|
capture["name"] = sreNames[i]
|
|
} else {
|
|
capture["name"] = nil
|
|
}
|
|
}
|
|
|
|
if i == 0 {
|
|
firstCapture = capture
|
|
}
|
|
|
|
captures = append(captures, capture)
|
|
}
|
|
|
|
prevOff = off
|
|
off = off + int64(l[1])
|
|
|
|
firstCapture["captures"] = captures[1:]
|
|
|
|
return firstCapture, true
|
|
})
|
|
}
|