mirror of
https://github.com/wader/fq.git
synced 2024-12-23 13:22:58 +03:00
interp: Add buffer match support to find and grep
This commit is contained in:
parent
984ba1aa43
commit
7298a4cd8d
@ -142,11 +142,16 @@ notable is support for arbitrary-precision integers.
|
||||
- `format_root/0` return root value of format for value
|
||||
- `parent/0` return parent value
|
||||
- `parents/0` output parents of value
|
||||
- `find` and `grep` all take 1 or 2 arguments. First is a scalar to match, where a string is
|
||||
treated as a regexp. A buffer will be matches exact bytes. Second argument is regexp
|
||||
flags with addition to "b" which will treat each byte in the input buffer as a rune, this
|
||||
makes it possible to match exact bytes, ex: `find("\u00ff"; b")` will match the byte `0xff` and not
|
||||
the UTF-8 codepoint `0xff`.
|
||||
- `find/1`, `find/2` match in buffer and output match buffers
|
||||
- `grep/1`, `grep/2` recursively match value and buffer
|
||||
- `vgrep/1`, `vgrep/2` recursively match value
|
||||
- `bgrep/1`, `bgrep/2` recursively match buffer
|
||||
- `fgrep/1`, `fgrep/2` recursively match field name
|
||||
- `find/1`, `find/2` match in buffer and output match buffers
|
||||
- `open` open file for reading
|
||||
- `probe` or `decode` probe format and decode
|
||||
- `mp3`, `matroska`, ..., `<name>`, `decode([name])` force decode as format
|
||||
|
@ -105,6 +105,7 @@ func Copy(dst BitWriter, src BitReader) (n int64, err error) {
|
||||
return CopyBuffer(dst, src, nil)
|
||||
}
|
||||
|
||||
// BitsByteCount returns smallest amount of bytes to fit nBits bits
|
||||
func BitsByteCount(nBits int64) int64 {
|
||||
n := nBits / 8
|
||||
if nBits%8 != 0 {
|
||||
|
@ -817,12 +817,24 @@ func (i *Interp) find(c interface{}, a []interface{}) gojq.Iter {
|
||||
}
|
||||
|
||||
var re string
|
||||
re, ok = a[0].(string)
|
||||
if !ok {
|
||||
return gojq.NewIter(gojqextra.FuncTypeError{Name: "find", Typ: "string"})
|
||||
var flags string
|
||||
|
||||
switch a0 := a[0].(type) {
|
||||
case string:
|
||||
re = a0
|
||||
default:
|
||||
reBuf, err := toBytes(a0)
|
||||
if err != nil {
|
||||
return gojq.NewIter(err)
|
||||
}
|
||||
var reRs []rune
|
||||
for _, b := range reBuf {
|
||||
reRs = append(reRs, rune(b))
|
||||
}
|
||||
flags = "b"
|
||||
re = string(reRs)
|
||||
}
|
||||
|
||||
var flags string
|
||||
if len(a) > 1 {
|
||||
flags, ok = a[1].(string)
|
||||
if !ok {
|
||||
|
@ -15,10 +15,12 @@ def _value_grep_string_cond($v; $flags):
|
||||
else false
|
||||
end
|
||||
)? // false;
|
||||
|
||||
def _value_grep_other_cond($v; $flags):
|
||||
( _tovalue
|
||||
| . == $v
|
||||
)? // false;
|
||||
|
||||
def vgrep($v; $flags):
|
||||
_grep(
|
||||
$v;
|
||||
@ -26,35 +28,40 @@ def vgrep($v; $flags):
|
||||
_value_grep_string_cond($v; $flags);
|
||||
_value_grep_other_cond($v; $flags)
|
||||
);
|
||||
|
||||
def vgrep($v): vgrep($v; "");
|
||||
|
||||
def _buf_grep_string_cond($v; $flags):
|
||||
def _buf_grep_any_cond($v; $flags):
|
||||
(isempty(find($v; $flags)) | not)? // false;
|
||||
def bgrep($v; $flags):
|
||||
_grep(
|
||||
$v;
|
||||
_is_scalar;
|
||||
_buf_grep_string_cond($v; $flags);
|
||||
empty
|
||||
_buf_grep_any_cond($v; $flags);
|
||||
_buf_grep_any_cond($v; $flags)
|
||||
);
|
||||
|
||||
def bgrep($v): bgrep($v; "");
|
||||
|
||||
def grep($v; $flags):
|
||||
_grep(
|
||||
$v;
|
||||
_is_scalar;
|
||||
_buf_grep_string_cond($v; $flags) or _value_grep_string_cond($v; $flags);
|
||||
_value_grep_other_cond($v; $flags)
|
||||
_buf_grep_any_cond($v; $flags) or _value_grep_string_cond($v; $flags);
|
||||
_buf_grep_any_cond($v; $flags) or _value_grep_other_cond($v; $flags)
|
||||
);
|
||||
|
||||
def grep($v): grep($v; "");
|
||||
|
||||
def _field_grep_string_cond($v; $flags):
|
||||
(has("_name") and (._name | test($v; $flags)))? // false;
|
||||
(._name | test($v; $flags))? // false;
|
||||
|
||||
def fgrep($v; $flags):
|
||||
_grep(
|
||||
$v;
|
||||
true;
|
||||
_is_decode_value;
|
||||
_field_grep_string_cond($v; $flags);
|
||||
empty
|
||||
);
|
||||
|
||||
def fgrep($v): fgrep($v; "");
|
||||
|
@ -267,6 +267,7 @@ func toBigInt(v interface{}) (*big.Int, error) {
|
||||
|
||||
func toBytes(v interface{}) ([]byte, error) {
|
||||
switch v := v.(type) {
|
||||
// TODO: remove?
|
||||
case []byte:
|
||||
return v, nil
|
||||
default:
|
||||
@ -307,10 +308,15 @@ func toBufferEx(v interface{}, inArray bool) (*bitio.Buffer, error) {
|
||||
}
|
||||
|
||||
if inArray {
|
||||
b := [1]byte{byte(bi.Uint64())}
|
||||
if bi.Cmp(big.NewInt(255)) > 0 || bi.Cmp(big.NewInt(0)) < 0 {
|
||||
return nil, fmt.Errorf("buffer byte list must be bytes (0-255) got %v", bi)
|
||||
}
|
||||
n := bi.Uint64()
|
||||
b := [1]byte{byte(n)}
|
||||
return bitio.NewBufferFromBytes(b[:], -1), nil
|
||||
}
|
||||
|
||||
// TODO: how should this work? "0xf | tobytes" 4bits or 8bits? now 4
|
||||
padBefore := (8 - (bi.BitLen() % 8)) % 8
|
||||
bb, err := bitio.NewBufferFromBytes(bi.Bytes(), -1).BitBufRange(int64(padBefore), int64(bi.BitLen()))
|
||||
if err != nil {
|
||||
@ -319,7 +325,7 @@ func toBufferEx(v interface{}, inArray bool) (*bitio.Buffer, error) {
|
||||
return bb, nil
|
||||
case []interface{}:
|
||||
var rr []bitio.BitReadAtSeeker
|
||||
// TODO: optimize byte array case
|
||||
// TODO: optimize byte array case, flatten into one slice
|
||||
for _, e := range vv {
|
||||
eBB, eErr := toBufferEx(e, true)
|
||||
if eErr != nil {
|
||||
|
15
pkg/interp/testdata/buffer.fqtest
vendored
15
pkg/interp/testdata/buffer.fqtest
vendored
@ -40,3 +40,18 @@ $ fq -d mp3 '.frames[0].padding | ("", "md5", "base64", "snippet") as $f | toval
|
||||
"ca9c491ac66b2c62500882e93f3719a8"
|
||||
"AAAAAAA="
|
||||
"<5>AAAAAAA="
|
||||
$ fq -d mp3 -i . /test.mp3
|
||||
mp3> [1, 2, 3] | tobytes
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0|01 02 03| |...| |.: none 0x0-0x2.7 (3)
|
||||
mp3> [1, 2, 3, [1, 2, 3]] | tobytes
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0|01 02 03 01 02 03| |......| |.: none 0x0-0x5.7 (6)
|
||||
mp3> [1, 2, 3, [1, 2, 3], .headers[0].magic] | tobytes
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0|01 02 03 01 02 03 49 44 33| |......ID3| |.: none 0x0-0x8.7 (9)
|
||||
mp3> [-1] | tobytes
|
||||
error: buffer byte list must be bytes (0-255) got -1
|
||||
mp3> [256] | tobytes
|
||||
error: buffer byte list must be bytes (0-255) got 256
|
||||
mp3> ^D
|
||||
|
17
pkg/interp/testdata/grep.fqtest
vendored
17
pkg/interp/testdata/grep.fqtest
vendored
@ -1,5 +1,5 @@
|
||||
$ fq -i -d mp3 . /test.mp3
|
||||
mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
||||
mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x20| 40| @|.frames[0].header.sample_rate: 44100
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
@ -14,7 +14,9 @@ mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|
||||
mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||
mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x20| 40| @|.frames[0].header.sample_rate: 44100
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
@ -29,10 +31,10 @@ mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|
||||
mp3> fgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
||||
mp3> fgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||
mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
||||
mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
@ -41,6 +43,8 @@ mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||
mp3> "64ff65ff66" | hex | bgrep("\u00ff"; "b")
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0|64 ff 65 ff 66| |d.e.f| |.: none 0x0-0x4.7 (5)
|
||||
@ -59,4 +63,9 @@ mp3> "aöaöa" | find("\u00c3"; "b")
|
||||
0x0| c3 b6 61 c3 b6 61| | ..a..a| |.: none 0x1-0x6.7 (6)
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0| c3 b6 61| | ..a| |.: none 0x4-0x6.7 (3)
|
||||
mp3> "aöaöa" | find([0xc3])
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0| c3 b6 61 c3 b6 61| | ..a..a| |.: none 0x1-0x6.7 (6)
|
||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||
0x0| c3 b6 61| | ..a| |.: none 0x4-0x6.7 (3)
|
||||
mp3> ^D
|
||||
|
Loading…
Reference in New Issue
Block a user