1
1
mirror of https://github.com/wader/fq.git synced 2024-12-23 13:22:58 +03:00

interp: Add buffer match support to find and grep

This commit is contained in:
Mattias Wadman 2021-10-17 01:26:30 +02:00
parent 984ba1aa43
commit 7298a4cd8d
7 changed files with 77 additions and 22 deletions

View File

@ -142,11 +142,16 @@ notable is support for arbitrary-precision integers.
- `format_root/0` return root value of format for value - `format_root/0` return root value of format for value
- `parent/0` return parent value - `parent/0` return parent value
- `parents/0` output parents of value - `parents/0` output parents of value
- `find` and `grep` all take 1 or 2 arguments. First is a scalar to match, where a string is
treated as a regexp. A buffer will be matches exact bytes. Second argument is regexp
flags with addition to "b" which will treat each byte in the input buffer as a rune, this
makes it possible to match exact bytes, ex: `find("\u00ff"; b")` will match the byte `0xff` and not
the UTF-8 codepoint `0xff`.
- `find/1`, `find/2` match in buffer and output match buffers
- `grep/1`, `grep/2` recursively match value and buffer - `grep/1`, `grep/2` recursively match value and buffer
- `vgrep/1`, `vgrep/2` recursively match value - `vgrep/1`, `vgrep/2` recursively match value
- `bgrep/1`, `bgrep/2` recursively match buffer - `bgrep/1`, `bgrep/2` recursively match buffer
- `fgrep/1`, `fgrep/2` recursively match field name - `fgrep/1`, `fgrep/2` recursively match field name
- `find/1`, `find/2` match in buffer and output match buffers
- `open` open file for reading - `open` open file for reading
- `probe` or `decode` probe format and decode - `probe` or `decode` probe format and decode
- `mp3`, `matroska`, ..., `<name>`, `decode([name])` force decode as format - `mp3`, `matroska`, ..., `<name>`, `decode([name])` force decode as format

View File

@ -105,6 +105,7 @@ func Copy(dst BitWriter, src BitReader) (n int64, err error) {
return CopyBuffer(dst, src, nil) return CopyBuffer(dst, src, nil)
} }
// BitsByteCount returns smallest amount of bytes to fit nBits bits
func BitsByteCount(nBits int64) int64 { func BitsByteCount(nBits int64) int64 {
n := nBits / 8 n := nBits / 8
if nBits%8 != 0 { if nBits%8 != 0 {

View File

@ -817,12 +817,24 @@ func (i *Interp) find(c interface{}, a []interface{}) gojq.Iter {
} }
var re string var re string
re, ok = a[0].(string) var flags string
if !ok {
return gojq.NewIter(gojqextra.FuncTypeError{Name: "find", Typ: "string"}) switch a0 := a[0].(type) {
case string:
re = a0
default:
reBuf, err := toBytes(a0)
if err != nil {
return gojq.NewIter(err)
}
var reRs []rune
for _, b := range reBuf {
reRs = append(reRs, rune(b))
}
flags = "b"
re = string(reRs)
} }
var flags string
if len(a) > 1 { if len(a) > 1 {
flags, ok = a[1].(string) flags, ok = a[1].(string)
if !ok { if !ok {

View File

@ -15,10 +15,12 @@ def _value_grep_string_cond($v; $flags):
else false else false
end end
)? // false; )? // false;
def _value_grep_other_cond($v; $flags): def _value_grep_other_cond($v; $flags):
( _tovalue ( _tovalue
| . == $v | . == $v
)? // false; )? // false;
def vgrep($v; $flags): def vgrep($v; $flags):
_grep( _grep(
$v; $v;
@ -26,35 +28,40 @@ def vgrep($v; $flags):
_value_grep_string_cond($v; $flags); _value_grep_string_cond($v; $flags);
_value_grep_other_cond($v; $flags) _value_grep_other_cond($v; $flags)
); );
def vgrep($v): vgrep($v; ""); def vgrep($v): vgrep($v; "");
def _buf_grep_string_cond($v; $flags): def _buf_grep_any_cond($v; $flags):
(isempty(find($v; $flags)) | not)? // false; (isempty(find($v; $flags)) | not)? // false;
def bgrep($v; $flags): def bgrep($v; $flags):
_grep( _grep(
$v; $v;
_is_scalar; _is_scalar;
_buf_grep_string_cond($v; $flags); _buf_grep_any_cond($v; $flags);
empty _buf_grep_any_cond($v; $flags)
); );
def bgrep($v): bgrep($v; ""); def bgrep($v): bgrep($v; "");
def grep($v; $flags): def grep($v; $flags):
_grep( _grep(
$v; $v;
_is_scalar; _is_scalar;
_buf_grep_string_cond($v; $flags) or _value_grep_string_cond($v; $flags); _buf_grep_any_cond($v; $flags) or _value_grep_string_cond($v; $flags);
_value_grep_other_cond($v; $flags) _buf_grep_any_cond($v; $flags) or _value_grep_other_cond($v; $flags)
); );
def grep($v): grep($v; ""); def grep($v): grep($v; "");
def _field_grep_string_cond($v; $flags): def _field_grep_string_cond($v; $flags):
(has("_name") and (._name | test($v; $flags)))? // false; (._name | test($v; $flags))? // false;
def fgrep($v; $flags): def fgrep($v; $flags):
_grep( _grep(
$v; $v;
true; _is_decode_value;
_field_grep_string_cond($v; $flags); _field_grep_string_cond($v; $flags);
empty empty
); );
def fgrep($v): fgrep($v; ""); def fgrep($v): fgrep($v; "");

View File

@ -267,6 +267,7 @@ func toBigInt(v interface{}) (*big.Int, error) {
func toBytes(v interface{}) ([]byte, error) { func toBytes(v interface{}) ([]byte, error) {
switch v := v.(type) { switch v := v.(type) {
// TODO: remove?
case []byte: case []byte:
return v, nil return v, nil
default: default:
@ -307,10 +308,15 @@ func toBufferEx(v interface{}, inArray bool) (*bitio.Buffer, error) {
} }
if inArray { if inArray {
b := [1]byte{byte(bi.Uint64())} if bi.Cmp(big.NewInt(255)) > 0 || bi.Cmp(big.NewInt(0)) < 0 {
return nil, fmt.Errorf("buffer byte list must be bytes (0-255) got %v", bi)
}
n := bi.Uint64()
b := [1]byte{byte(n)}
return bitio.NewBufferFromBytes(b[:], -1), nil return bitio.NewBufferFromBytes(b[:], -1), nil
} }
// TODO: how should this work? "0xf | tobytes" 4bits or 8bits? now 4
padBefore := (8 - (bi.BitLen() % 8)) % 8 padBefore := (8 - (bi.BitLen() % 8)) % 8
bb, err := bitio.NewBufferFromBytes(bi.Bytes(), -1).BitBufRange(int64(padBefore), int64(bi.BitLen())) bb, err := bitio.NewBufferFromBytes(bi.Bytes(), -1).BitBufRange(int64(padBefore), int64(bi.BitLen()))
if err != nil { if err != nil {
@ -319,7 +325,7 @@ func toBufferEx(v interface{}, inArray bool) (*bitio.Buffer, error) {
return bb, nil return bb, nil
case []interface{}: case []interface{}:
var rr []bitio.BitReadAtSeeker var rr []bitio.BitReadAtSeeker
// TODO: optimize byte array case // TODO: optimize byte array case, flatten into one slice
for _, e := range vv { for _, e := range vv {
eBB, eErr := toBufferEx(e, true) eBB, eErr := toBufferEx(e, true)
if eErr != nil { if eErr != nil {

View File

@ -40,3 +40,18 @@ $ fq -d mp3 '.frames[0].padding | ("", "md5", "base64", "snippet") as $f | toval
"ca9c491ac66b2c62500882e93f3719a8" "ca9c491ac66b2c62500882e93f3719a8"
"AAAAAAA=" "AAAAAAA="
"<5>AAAAAAA=" "<5>AAAAAAA="
$ fq -d mp3 -i . /test.mp3
mp3> [1, 2, 3] | tobytes
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|01 02 03| |...| |.: none 0x0-0x2.7 (3)
mp3> [1, 2, 3, [1, 2, 3]] | tobytes
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|01 02 03 01 02 03| |......| |.: none 0x0-0x5.7 (6)
mp3> [1, 2, 3, [1, 2, 3], .headers[0].magic] | tobytes
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|01 02 03 01 02 03 49 44 33| |......ID3| |.: none 0x0-0x8.7 (9)
mp3> [-1] | tobytes
error: buffer byte list must be bytes (0-255) got -1
mp3> [256] | tobytes
error: buffer byte list must be bytes (0-255) got 256
mp3> ^D

View File

@ -1,5 +1,5 @@
$ fq -i -d mp3 . /test.mp3 $ fq -i -d mp3 . /test.mp3
mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff") mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x20| 40| @|.frames[0].header.sample_rate: 44100 0x20| 40| @|.frames[0].header.sample_rate: 44100
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
@ -14,7 +14,9 @@ mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct) 0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info" 0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff") |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x20| 40| @|.frames[0].header.sample_rate: 44100 0x20| 40| @|.frames[0].header.sample_rate: 44100
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
@ -29,10 +31,10 @@ mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct) 0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info" 0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
mp3> fgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff") mp3> fgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct) 0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff") mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct) 0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
@ -41,6 +43,8 @@ mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct) 0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info" 0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
mp3> "64ff65ff66" | hex | bgrep("\u00ff"; "b") mp3> "64ff65ff66" | hex | bgrep("\u00ff"; "b")
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|64 ff 65 ff 66| |d.e.f| |.: none 0x0-0x4.7 (5) 0x0|64 ff 65 ff 66| |d.e.f| |.: none 0x0-0x4.7 (5)
@ -59,4 +63,9 @@ mp3> "aöaöa" | find("\u00c3"; "b")
0x0| c3 b6 61 c3 b6 61| | ..a..a| |.: none 0x1-0x6.7 (6) 0x0| c3 b6 61 c3 b6 61| | ..a..a| |.: none 0x1-0x6.7 (6)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0| c3 b6 61| | ..a| |.: none 0x4-0x6.7 (3) 0x0| c3 b6 61| | ..a| |.: none 0x4-0x6.7 (3)
mp3> "aöaöa" | find([0xc3])
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0| c3 b6 61 c3 b6 61| | ..a..a| |.: none 0x1-0x6.7 (6)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0| c3 b6 61| | ..a| |.: none 0x4-0x6.7 (3)
mp3> ^D mp3> ^D