1
1
mirror of https://github.com/wader/fq.git synced 2024-12-25 14:23:18 +03:00
fq/pkg/interp/match.go
Mattias Wadman 1ddea1ada3 interp,format: Refactor registry usage and use function helpers
Move registry to interp and add support for functions and filesystems.
This will be used later for allow formats to add own functions and fq code.

Add gojqextra function helpers to have more comfortable API to add functions.
Takes care of argument type casting and JQValue:s and some more things.

Refactor interp package to use new function helper and registry. Probably
fixes a bunch of JQValue bugs and other type errors.

Refactor out some mpeg nal things to mpeg format.

Refactor interp jq code into display.q and init.jq.

Remove undocumented aes_ctr funciton, was a test. Hopefully will add more crypto things laster.
2022-07-16 19:24:13 +02:00

153 lines
3.1 KiB
Go

package interp
import (
"io"
"regexp"
"strings"
"github.com/wader/fq/internal/gojqextra"
"github.com/wader/fq/internal/ioextra"
"github.com/wader/fq/pkg/bitio"
"github.com/wader/fq/pkg/ranges"
"github.com/wader/gojq"
)
func init() {
RegisterIter2("_match_binary", (*Interp)._binaryMatch)
}
func (i *Interp) _binaryMatch(c any, pattern any, flags string) gojq.Iter {
bv, err := toBinary(c)
if err != nil {
return gojq.NewIter(err)
}
var re string
var byteRunes bool
var global bool
switch pattern := pattern.(type) {
case string:
re = pattern
default:
reBuf, err := toBytes(pattern)
if err != nil {
return gojq.NewIter(err)
}
var reRs []rune
for _, b := range reBuf {
reRs = append(reRs, rune(b))
}
byteRunes = true
// escape paratheses runes etc
re = regexp.QuoteMeta(string(reRs))
}
if strings.Contains(flags, "b") {
byteRunes = true
}
global = strings.Contains(flags, "g")
// TODO: err to string
// TODO: extract to regexpextra? "all" FindReaderSubmatchIndex that can iter?
sre, err := gojqextra.CompileRegexp(re, "gimb", flags)
if err != nil {
return gojq.NewIter(err)
}
sreNames := sre.SubexpNames()
br, err := bv.toReader()
if err != nil {
return gojq.NewIter(err)
}
var rr interface {
io.RuneReader
io.Seeker
}
// raw bytes regexp matching is a bit tricky, what we do is to read each byte as a codepoint (ByteRuneReader)
// and then we can use UTF-8 encoded codepoint to match a raw byte. So for example \u00ff (encoded as 0xc3 0xbf)
// will match the byte \0xff
if byteRunes {
// byte mode, read each byte as a rune
rr = ioextra.ByteRuneReader{RS: bitio.NewIOReadSeeker(br)}
} else {
rr = ioextra.RuneReadSeeker{RS: bitio.NewIOReadSeeker(br)}
}
var off int64
prevOff := int64(-1)
return iterFn(func() (any, bool) {
// TODO: correct way to handle empty match for binary, move one byte forward?
// > "asdasd" | [match(""; "g")], [(tobytes | match(""; "g"))] | length
// 7
// 1
if prevOff == off {
return nil, false
}
if prevOff != -1 && !global {
return nil, false
}
_, err = rr.Seek(off, io.SeekStart)
if err != nil {
return err, false
}
l := sre.FindReaderSubmatchIndex(rr)
if l == nil {
return nil, false
}
var captures []any
var firstCapture map[string]any
for i := 0; i < len(l)/2; i++ {
start, end := l[i*2], l[i*2+1]
capture := map[string]any{
"offset": int(off) + start,
"length": end - start,
}
if start != -1 {
matchBitOff := (off + int64(start)) * 8
matchLength := int64(end-start) * 8
bbo := Binary{
br: bv.br,
r: ranges.Range{
Start: bv.r.Start + matchBitOff,
Len: matchLength,
},
unit: 8,
}
capture["string"] = bbo
} else {
capture["string"] = nil
}
if i > 0 {
if sreNames[i] != "" {
capture["name"] = sreNames[i]
} else {
capture["name"] = nil
}
}
if i == 0 {
firstCapture = capture
}
captures = append(captures, capture)
}
prevOff = off
off = off + int64(l[1])
firstCapture["captures"] = captures[1:]
return firstCapture, true
})
}