From 36fd74a1814b26926eea02b2c925a87411504040 Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Sun, 17 Oct 2021 14:48:50 +0200 Subject: [PATCH] Add comment how raw byte regexp matching works --- pkg/interp/funcs.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/interp/funcs.go b/pkg/interp/funcs.go index 1c2e2917..9c7b7c72 100644 --- a/pkg/interp/funcs.go +++ b/pkg/interp/funcs.go @@ -882,6 +882,9 @@ func (i *Interp) find(c interface{}, a []interface{}) gojq.Iter { io.RuneReader io.Seeker } + // raw bytes regexp matching is a bit tricky, what we do is to read each byte as a codepoint (ByteRuneReader) + // and then we can use UTF-8 encoded codepoint to match a raw byte. So for example \u00ff (encoded as 0xc3 0xbf) + // will match the byte \0xff if strings.Contains(flags, "b") { // byte mode, read each byte as a rune rr = ioextra.ByteRuneReader{RS: bb}