mirror of
https://github.com/wader/fq.git
synced 2024-12-23 13:22:58 +03:00
interp: Add buffer match support to find and grep
This commit is contained in:
parent
984ba1aa43
commit
7298a4cd8d
@ -142,11 +142,16 @@ notable is support for arbitrary-precision integers.
|
|||||||
- `format_root/0` return root value of format for value
|
- `format_root/0` return root value of format for value
|
||||||
- `parent/0` return parent value
|
- `parent/0` return parent value
|
||||||
- `parents/0` output parents of value
|
- `parents/0` output parents of value
|
||||||
|
- `find` and `grep` all take 1 or 2 arguments. First is a scalar to match, where a string is
|
||||||
|
treated as a regexp. A buffer will be matches exact bytes. Second argument is regexp
|
||||||
|
flags with addition to "b" which will treat each byte in the input buffer as a rune, this
|
||||||
|
makes it possible to match exact bytes, ex: `find("\u00ff"; b")` will match the byte `0xff` and not
|
||||||
|
the UTF-8 codepoint `0xff`.
|
||||||
|
- `find/1`, `find/2` match in buffer and output match buffers
|
||||||
- `grep/1`, `grep/2` recursively match value and buffer
|
- `grep/1`, `grep/2` recursively match value and buffer
|
||||||
- `vgrep/1`, `vgrep/2` recursively match value
|
- `vgrep/1`, `vgrep/2` recursively match value
|
||||||
- `bgrep/1`, `bgrep/2` recursively match buffer
|
- `bgrep/1`, `bgrep/2` recursively match buffer
|
||||||
- `fgrep/1`, `fgrep/2` recursively match field name
|
- `fgrep/1`, `fgrep/2` recursively match field name
|
||||||
- `find/1`, `find/2` match in buffer and output match buffers
|
|
||||||
- `open` open file for reading
|
- `open` open file for reading
|
||||||
- `probe` or `decode` probe format and decode
|
- `probe` or `decode` probe format and decode
|
||||||
- `mp3`, `matroska`, ..., `<name>`, `decode([name])` force decode as format
|
- `mp3`, `matroska`, ..., `<name>`, `decode([name])` force decode as format
|
||||||
|
@ -105,6 +105,7 @@ func Copy(dst BitWriter, src BitReader) (n int64, err error) {
|
|||||||
return CopyBuffer(dst, src, nil)
|
return CopyBuffer(dst, src, nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// BitsByteCount returns smallest amount of bytes to fit nBits bits
|
||||||
func BitsByteCount(nBits int64) int64 {
|
func BitsByteCount(nBits int64) int64 {
|
||||||
n := nBits / 8
|
n := nBits / 8
|
||||||
if nBits%8 != 0 {
|
if nBits%8 != 0 {
|
||||||
|
@ -817,12 +817,24 @@ func (i *Interp) find(c interface{}, a []interface{}) gojq.Iter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var re string
|
var re string
|
||||||
re, ok = a[0].(string)
|
var flags string
|
||||||
if !ok {
|
|
||||||
return gojq.NewIter(gojqextra.FuncTypeError{Name: "find", Typ: "string"})
|
switch a0 := a[0].(type) {
|
||||||
|
case string:
|
||||||
|
re = a0
|
||||||
|
default:
|
||||||
|
reBuf, err := toBytes(a0)
|
||||||
|
if err != nil {
|
||||||
|
return gojq.NewIter(err)
|
||||||
|
}
|
||||||
|
var reRs []rune
|
||||||
|
for _, b := range reBuf {
|
||||||
|
reRs = append(reRs, rune(b))
|
||||||
|
}
|
||||||
|
flags = "b"
|
||||||
|
re = string(reRs)
|
||||||
}
|
}
|
||||||
|
|
||||||
var flags string
|
|
||||||
if len(a) > 1 {
|
if len(a) > 1 {
|
||||||
flags, ok = a[1].(string)
|
flags, ok = a[1].(string)
|
||||||
if !ok {
|
if !ok {
|
||||||
|
@ -15,10 +15,12 @@ def _value_grep_string_cond($v; $flags):
|
|||||||
else false
|
else false
|
||||||
end
|
end
|
||||||
)? // false;
|
)? // false;
|
||||||
|
|
||||||
def _value_grep_other_cond($v; $flags):
|
def _value_grep_other_cond($v; $flags):
|
||||||
( _tovalue
|
( _tovalue
|
||||||
| . == $v
|
| . == $v
|
||||||
)? // false;
|
)? // false;
|
||||||
|
|
||||||
def vgrep($v; $flags):
|
def vgrep($v; $flags):
|
||||||
_grep(
|
_grep(
|
||||||
$v;
|
$v;
|
||||||
@ -26,35 +28,40 @@ def vgrep($v; $flags):
|
|||||||
_value_grep_string_cond($v; $flags);
|
_value_grep_string_cond($v; $flags);
|
||||||
_value_grep_other_cond($v; $flags)
|
_value_grep_other_cond($v; $flags)
|
||||||
);
|
);
|
||||||
|
|
||||||
def vgrep($v): vgrep($v; "");
|
def vgrep($v): vgrep($v; "");
|
||||||
|
|
||||||
def _buf_grep_string_cond($v; $flags):
|
def _buf_grep_any_cond($v; $flags):
|
||||||
(isempty(find($v; $flags)) | not)? // false;
|
(isempty(find($v; $flags)) | not)? // false;
|
||||||
def bgrep($v; $flags):
|
def bgrep($v; $flags):
|
||||||
_grep(
|
_grep(
|
||||||
$v;
|
$v;
|
||||||
_is_scalar;
|
_is_scalar;
|
||||||
_buf_grep_string_cond($v; $flags);
|
_buf_grep_any_cond($v; $flags);
|
||||||
empty
|
_buf_grep_any_cond($v; $flags)
|
||||||
);
|
);
|
||||||
|
|
||||||
def bgrep($v): bgrep($v; "");
|
def bgrep($v): bgrep($v; "");
|
||||||
|
|
||||||
def grep($v; $flags):
|
def grep($v; $flags):
|
||||||
_grep(
|
_grep(
|
||||||
$v;
|
$v;
|
||||||
_is_scalar;
|
_is_scalar;
|
||||||
_buf_grep_string_cond($v; $flags) or _value_grep_string_cond($v; $flags);
|
_buf_grep_any_cond($v; $flags) or _value_grep_string_cond($v; $flags);
|
||||||
_value_grep_other_cond($v; $flags)
|
_buf_grep_any_cond($v; $flags) or _value_grep_other_cond($v; $flags)
|
||||||
);
|
);
|
||||||
|
|
||||||
def grep($v): grep($v; "");
|
def grep($v): grep($v; "");
|
||||||
|
|
||||||
def _field_grep_string_cond($v; $flags):
|
def _field_grep_string_cond($v; $flags):
|
||||||
(has("_name") and (._name | test($v; $flags)))? // false;
|
(._name | test($v; $flags))? // false;
|
||||||
|
|
||||||
def fgrep($v; $flags):
|
def fgrep($v; $flags):
|
||||||
_grep(
|
_grep(
|
||||||
$v;
|
$v;
|
||||||
true;
|
_is_decode_value;
|
||||||
_field_grep_string_cond($v; $flags);
|
_field_grep_string_cond($v; $flags);
|
||||||
empty
|
empty
|
||||||
);
|
);
|
||||||
|
|
||||||
def fgrep($v): fgrep($v; "");
|
def fgrep($v): fgrep($v; "");
|
||||||
|
@ -267,6 +267,7 @@ func toBigInt(v interface{}) (*big.Int, error) {
|
|||||||
|
|
||||||
func toBytes(v interface{}) ([]byte, error) {
|
func toBytes(v interface{}) ([]byte, error) {
|
||||||
switch v := v.(type) {
|
switch v := v.(type) {
|
||||||
|
// TODO: remove?
|
||||||
case []byte:
|
case []byte:
|
||||||
return v, nil
|
return v, nil
|
||||||
default:
|
default:
|
||||||
@ -307,10 +308,15 @@ func toBufferEx(v interface{}, inArray bool) (*bitio.Buffer, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if inArray {
|
if inArray {
|
||||||
b := [1]byte{byte(bi.Uint64())}
|
if bi.Cmp(big.NewInt(255)) > 0 || bi.Cmp(big.NewInt(0)) < 0 {
|
||||||
|
return nil, fmt.Errorf("buffer byte list must be bytes (0-255) got %v", bi)
|
||||||
|
}
|
||||||
|
n := bi.Uint64()
|
||||||
|
b := [1]byte{byte(n)}
|
||||||
return bitio.NewBufferFromBytes(b[:], -1), nil
|
return bitio.NewBufferFromBytes(b[:], -1), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: how should this work? "0xf | tobytes" 4bits or 8bits? now 4
|
||||||
padBefore := (8 - (bi.BitLen() % 8)) % 8
|
padBefore := (8 - (bi.BitLen() % 8)) % 8
|
||||||
bb, err := bitio.NewBufferFromBytes(bi.Bytes(), -1).BitBufRange(int64(padBefore), int64(bi.BitLen()))
|
bb, err := bitio.NewBufferFromBytes(bi.Bytes(), -1).BitBufRange(int64(padBefore), int64(bi.BitLen()))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -319,7 +325,7 @@ func toBufferEx(v interface{}, inArray bool) (*bitio.Buffer, error) {
|
|||||||
return bb, nil
|
return bb, nil
|
||||||
case []interface{}:
|
case []interface{}:
|
||||||
var rr []bitio.BitReadAtSeeker
|
var rr []bitio.BitReadAtSeeker
|
||||||
// TODO: optimize byte array case
|
// TODO: optimize byte array case, flatten into one slice
|
||||||
for _, e := range vv {
|
for _, e := range vv {
|
||||||
eBB, eErr := toBufferEx(e, true)
|
eBB, eErr := toBufferEx(e, true)
|
||||||
if eErr != nil {
|
if eErr != nil {
|
||||||
|
15
pkg/interp/testdata/buffer.fqtest
vendored
15
pkg/interp/testdata/buffer.fqtest
vendored
@ -40,3 +40,18 @@ $ fq -d mp3 '.frames[0].padding | ("", "md5", "base64", "snippet") as $f | toval
|
|||||||
"ca9c491ac66b2c62500882e93f3719a8"
|
"ca9c491ac66b2c62500882e93f3719a8"
|
||||||
"AAAAAAA="
|
"AAAAAAA="
|
||||||
"<5>AAAAAAA="
|
"<5>AAAAAAA="
|
||||||
|
$ fq -d mp3 -i . /test.mp3
|
||||||
|
mp3> [1, 2, 3] | tobytes
|
||||||
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
|
0x0|01 02 03| |...| |.: none 0x0-0x2.7 (3)
|
||||||
|
mp3> [1, 2, 3, [1, 2, 3]] | tobytes
|
||||||
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
|
0x0|01 02 03 01 02 03| |......| |.: none 0x0-0x5.7 (6)
|
||||||
|
mp3> [1, 2, 3, [1, 2, 3], .headers[0].magic] | tobytes
|
||||||
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
|
0x0|01 02 03 01 02 03 49 44 33| |......ID3| |.: none 0x0-0x8.7 (9)
|
||||||
|
mp3> [-1] | tobytes
|
||||||
|
error: buffer byte list must be bytes (0-255) got -1
|
||||||
|
mp3> [256] | tobytes
|
||||||
|
error: buffer byte list must be bytes (0-255) got 256
|
||||||
|
mp3> ^D
|
||||||
|
17
pkg/interp/testdata/grep.fqtest
vendored
17
pkg/interp/testdata/grep.fqtest
vendored
@ -1,5 +1,5 @@
|
|||||||
$ fq -i -d mp3 . /test.mp3
|
$ fq -i -d mp3 . /test.mp3
|
||||||
mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
0x20| 40| @|.frames[0].header.sample_rate: 44100
|
0x20| 40| @|.frames[0].header.sample_rate: 44100
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
@ -14,7 +14,9 @@ mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
|||||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|
||||||
mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||||
|
mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
0x20| 40| @|.frames[0].header.sample_rate: 44100
|
0x20| 40| @|.frames[0].header.sample_rate: 44100
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
@ -29,10 +31,10 @@ mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
|||||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|
||||||
mp3> fgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
mp3> fgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||||
mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
@ -41,6 +43,8 @@ mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|
|||||||
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|
||||||
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|
||||||
mp3> "64ff65ff66" | hex | bgrep("\u00ff"; "b")
|
mp3> "64ff65ff66" | hex | bgrep("\u00ff"; "b")
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
0x0|64 ff 65 ff 66| |d.e.f| |.: none 0x0-0x4.7 (5)
|
0x0|64 ff 65 ff 66| |d.e.f| |.: none 0x0-0x4.7 (5)
|
||||||
@ -59,4 +63,9 @@ mp3> "aöaöa" | find("\u00c3"; "b")
|
|||||||
0x0| c3 b6 61 c3 b6 61| | ..a..a| |.: none 0x1-0x6.7 (6)
|
0x0| c3 b6 61 c3 b6 61| | ..a..a| |.: none 0x1-0x6.7 (6)
|
||||||
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
0x0| c3 b6 61| | ..a| |.: none 0x4-0x6.7 (3)
|
0x0| c3 b6 61| | ..a| |.: none 0x4-0x6.7 (3)
|
||||||
|
mp3> "aöaöa" | find([0xc3])
|
||||||
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
|
0x0| c3 b6 61 c3 b6 61| | ..a..a| |.: none 0x1-0x6.7 (6)
|
||||||
|
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
|
||||||
|
0x0| c3 b6 61| | ..a| |.: none 0x4-0x6.7 (3)
|
||||||
mp3> ^D
|
mp3> ^D
|
||||||
|
Loading…
Reference in New Issue
Block a user