2021-11-16 13:41:14 +03:00
|
|
|
package interp
|
|
|
|
|
|
|
|
import (
|
|
|
|
"io"
|
2021-12-04 19:20:12 +03:00
|
|
|
"regexp"
|
2021-11-16 13:41:14 +03:00
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/wader/fq/internal/gojqextra"
|
|
|
|
"github.com/wader/fq/internal/ioextra"
|
2022-01-24 23:21:48 +03:00
|
|
|
"github.com/wader/fq/pkg/bitio"
|
2021-11-16 13:41:14 +03:00
|
|
|
"github.com/wader/fq/pkg/ranges"
|
|
|
|
"github.com/wader/gojq"
|
|
|
|
)
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
functionRegisterFns = append(functionRegisterFns, func(i *Interp) []Function {
|
|
|
|
return []Function{
|
2022-02-08 20:44:48 +03:00
|
|
|
{"_match_binary", 1, 2, nil, i._binaryMatch},
|
2021-11-16 13:41:14 +03:00
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2022-05-20 16:10:41 +03:00
|
|
|
func (i *Interp) _binaryMatch(c any, a []any) gojq.Iter {
|
2021-11-16 13:41:14 +03:00
|
|
|
var ok bool
|
|
|
|
|
2022-02-08 20:44:48 +03:00
|
|
|
bv, err := toBinary(c)
|
2021-11-16 13:41:14 +03:00
|
|
|
if err != nil {
|
|
|
|
return gojq.NewIter(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var re string
|
|
|
|
var byteRunes bool
|
2021-12-14 03:45:01 +03:00
|
|
|
var global bool
|
2021-11-16 13:41:14 +03:00
|
|
|
|
|
|
|
switch a0 := a[0].(type) {
|
|
|
|
case string:
|
|
|
|
re = a0
|
|
|
|
default:
|
|
|
|
reBuf, err := toBytes(a0)
|
|
|
|
if err != nil {
|
|
|
|
return gojq.NewIter(err)
|
|
|
|
}
|
|
|
|
var reRs []rune
|
|
|
|
for _, b := range reBuf {
|
|
|
|
reRs = append(reRs, rune(b))
|
|
|
|
}
|
|
|
|
byteRunes = true
|
2021-12-04 19:20:12 +03:00
|
|
|
// escape paratheses runes etc
|
|
|
|
re = regexp.QuoteMeta(string(reRs))
|
2021-11-16 13:41:14 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
var flags string
|
|
|
|
if len(a) > 1 {
|
|
|
|
flags, ok = a[1].(string)
|
|
|
|
if !ok {
|
|
|
|
return gojq.NewIter(gojqextra.FuncTypeNameError{Name: "find", Typ: "string"})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if strings.Contains(flags, "b") {
|
|
|
|
byteRunes = true
|
|
|
|
}
|
2021-12-14 03:45:01 +03:00
|
|
|
global = strings.Contains(flags, "g")
|
2021-11-16 13:41:14 +03:00
|
|
|
|
|
|
|
// TODO: err to string
|
|
|
|
// TODO: extract to regexpextra? "all" FindReaderSubmatchIndex that can iter?
|
|
|
|
sre, err := gojqextra.CompileRegexp(re, "gimb", flags)
|
|
|
|
if err != nil {
|
|
|
|
return gojq.NewIter(err)
|
|
|
|
}
|
2021-12-14 03:45:01 +03:00
|
|
|
sreNames := sre.SubexpNames()
|
2021-11-16 13:41:14 +03:00
|
|
|
|
2022-02-08 20:44:48 +03:00
|
|
|
br, err := bv.toReader()
|
2021-11-16 13:41:14 +03:00
|
|
|
if err != nil {
|
|
|
|
return gojq.NewIter(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var rr interface {
|
|
|
|
io.RuneReader
|
|
|
|
io.Seeker
|
|
|
|
}
|
|
|
|
// raw bytes regexp matching is a bit tricky, what we do is to read each byte as a codepoint (ByteRuneReader)
|
|
|
|
// and then we can use UTF-8 encoded codepoint to match a raw byte. So for example \u00ff (encoded as 0xc3 0xbf)
|
|
|
|
// will match the byte \0xff
|
|
|
|
if byteRunes {
|
|
|
|
// byte mode, read each byte as a rune
|
2022-01-24 23:21:48 +03:00
|
|
|
rr = ioextra.ByteRuneReader{RS: bitio.NewIOReadSeeker(br)}
|
2021-11-16 13:41:14 +03:00
|
|
|
} else {
|
2022-01-24 23:21:48 +03:00
|
|
|
rr = ioextra.RuneReadSeeker{RS: bitio.NewIOReadSeeker(br)}
|
2021-11-16 13:41:14 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
var off int64
|
2021-12-14 03:45:01 +03:00
|
|
|
prevOff := int64(-1)
|
2022-05-20 16:10:41 +03:00
|
|
|
return iterFn(func() (any, bool) {
|
2022-02-08 20:44:48 +03:00
|
|
|
// TODO: correct way to handle empty match for binary, move one byte forward?
|
2021-12-14 03:45:01 +03:00
|
|
|
// > "asdasd" | [match(""; "g")], [(tobytes | match(""; "g"))] | length
|
|
|
|
// 7
|
|
|
|
// 1
|
|
|
|
if prevOff == off {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
|
|
|
|
if prevOff != -1 && !global {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
|
2021-11-16 13:41:14 +03:00
|
|
|
_, err = rr.Seek(off, io.SeekStart)
|
|
|
|
if err != nil {
|
|
|
|
return err, false
|
|
|
|
}
|
|
|
|
|
|
|
|
l := sre.FindReaderSubmatchIndex(rr)
|
|
|
|
if l == nil {
|
|
|
|
return nil, false
|
|
|
|
}
|
|
|
|
|
2022-05-20 16:10:41 +03:00
|
|
|
var captures []any
|
|
|
|
var firstCapture map[string]any
|
2021-12-14 03:45:01 +03:00
|
|
|
|
|
|
|
for i := 0; i < len(l)/2; i++ {
|
|
|
|
start, end := l[i*2], l[i*2+1]
|
2022-05-20 16:10:41 +03:00
|
|
|
capture := map[string]any{
|
2021-12-14 03:45:01 +03:00
|
|
|
"offset": int(off) + start,
|
|
|
|
"length": end - start,
|
|
|
|
}
|
|
|
|
|
|
|
|
if start != -1 {
|
|
|
|
matchBitOff := (off + int64(start)) * 8
|
|
|
|
matchLength := int64(end-start) * 8
|
2022-02-08 20:44:48 +03:00
|
|
|
bbo := Binary{
|
2022-01-24 23:21:48 +03:00
|
|
|
br: bv.br,
|
2021-12-14 03:45:01 +03:00
|
|
|
r: ranges.Range{
|
|
|
|
Start: bv.r.Start + matchBitOff,
|
|
|
|
Len: matchLength,
|
|
|
|
},
|
|
|
|
unit: 8,
|
|
|
|
}
|
|
|
|
|
|
|
|
capture["string"] = bbo
|
|
|
|
} else {
|
|
|
|
capture["string"] = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if i > 0 {
|
|
|
|
if sreNames[i] != "" {
|
|
|
|
capture["name"] = sreNames[i]
|
|
|
|
} else {
|
|
|
|
capture["name"] = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if i == 0 {
|
|
|
|
firstCapture = capture
|
|
|
|
}
|
|
|
|
|
|
|
|
captures = append(captures, capture)
|
2021-11-16 13:41:14 +03:00
|
|
|
}
|
|
|
|
|
2021-12-14 03:45:01 +03:00
|
|
|
prevOff = off
|
2021-11-16 13:41:14 +03:00
|
|
|
off = off + int64(l[1])
|
|
|
|
|
2021-12-14 03:45:01 +03:00
|
|
|
firstCapture["captures"] = captures[1:]
|
|
|
|
|
|
|
|
return firstCapture, true
|
2021-11-16 13:41:14 +03:00
|
|
|
})
|
|
|
|
}
|