1
1
mirror of https://github.com/wader/fq.git synced 2024-12-28 08:02:28 +03:00

toml,xml: Fail fast on invalid content

encoding/xml and github.com/BurntSushi/toml both reads a lot before detecting
that it can't decode. Now we instead read one UTF-8 and make sure it's valid
xml or toml.

Should speed up probing

Related to #586 bigzero-zip.zip
This commit is contained in:
Mattias Wadman 2023-02-22 16:16:35 +01:00
parent aaf60ec250
commit 56edb59e83
3 changed files with 80 additions and 7 deletions

View File

@ -63,9 +63,9 @@ true = true
toml: top-level values must be Go maps or structs toml: top-level values must be Go maps or structs
---- ----
error at position 0x0: root object has no values error at position 0x0: EOF
---- ----
$ fq -n '"" | from_toml' $ fq -n '" " | from_toml'
exitcode: 5 exitcode: 5
stderr: stderr:
error: error at position 0x0: root object has no values error: error at position 0x1: root object has no values

View File

@ -1,8 +1,12 @@
package toml package toml
import ( import (
"bufio"
"bytes" "bytes"
"embed" "embed"
"fmt"
"io"
"unicode/utf8"
"github.com/BurntSushi/toml" "github.com/BurntSushi/toml"
"github.com/wader/fq/format" "github.com/wader/fq/format"
@ -29,11 +33,38 @@ func init() {
interp.RegisterFunc0("to_toml", toTOML) interp.RegisterFunc0("to_toml", toTOML)
} }
func decodeTOMLSeekFirstValidRune(br io.ReadSeeker) error {
buf := bufio.NewReader(br)
r, sz, err := buf.ReadRune()
if err != nil {
return err
}
if _, err := br.Seek(0, io.SeekStart); err != nil {
return err
}
if r == utf8.RuneError && sz == 1 {
return fmt.Errorf("invalid UTF-8")
}
if r == 0 {
return fmt.Errorf("TOML can't contain null bytes")
}
return nil
}
func decodeTOML(d *decode.D) any { func decodeTOML(d *decode.D) any {
br := d.RawLen(d.Len()) bbr := d.RawLen(d.Len())
var r any var r any
if _, err := toml.NewDecoder(bitio.NewIOReader(br)).Decode(&r); err != nil { br := bitio.NewIOReadSeeker(bbr)
// github.com/BurntSushi/toml currently does a ReadAll which might be expensive
// try find invalid toml (null bytes etc) faster and more efficient
if err := decodeTOMLSeekFirstValidRune(br); err != nil {
d.Fatalf("%s", err)
}
if _, err := toml.NewDecoder(br).Decode(&r); err != nil {
d.Fatalf("%s", err) d.Fatalf("%s", err)
} }
var s scalar.Any var s scalar.Any

View File

@ -7,15 +7,18 @@ package xml
// TODO: rewrite ns stack // TODO: rewrite ns stack
import ( import (
"bufio"
"bytes" "bytes"
"embed" "embed"
"encoding/xml" "encoding/xml"
"errors" "errors"
"fmt"
"html" "html"
"io" "io"
"regexp" "regexp"
"strconv" "strconv"
"strings" "strings"
"unicode/utf8"
"github.com/wader/fq/format" "github.com/wader/fq/format"
"github.com/wader/fq/internal/gojqex" "github.com/wader/fq/internal/gojqex"
@ -247,15 +250,54 @@ func fromXMLToArray(n xmlNode) any {
return f(n, nil) return f(n, nil)
} }
// from golang encoding/xml, copyright 2009 The Go Authors
// the Char production of https://www.xml.com/axml/testaxml.htm,
// Section 2.2 Characters.
func isInCharacterRange(r rune) (inrange bool) {
return r == 0x09 ||
r == 0x0A ||
r == 0x0D ||
r >= 0x20 && r <= 0xD7FF ||
r >= 0xE000 && r <= 0xFFFD ||
r >= 0x10000 && r <= 0x10FFFF
}
func decodeXMLSeekFirstValidRune(br io.ReadSeeker) error {
buf := bufio.NewReader(br)
r, sz, err := buf.ReadRune()
if err != nil {
return err
}
if _, err := br.Seek(0, io.SeekStart); err != nil {
return err
}
if r == utf8.RuneError && sz == 1 {
return fmt.Errorf("invalid UTF-8")
}
if !isInCharacterRange(r) {
return fmt.Errorf("illegal character code %U", r)
}
return nil
}
func decodeXML(d *decode.D) any { func decodeXML(d *decode.D) any {
var xi format.XMLIn var xi format.XMLIn
d.ArgAs(&xi) d.ArgAs(&xi)
br := d.RawLen(d.Len()) bbr := d.RawLen(d.Len())
var r any var r any
var err error var err error
xd := xml.NewDecoder(bitio.NewIOReader(br)) br := bitio.NewIOReadSeeker(bbr)
// this reimplements same xml rune range validation as ecoding/xml but fails faster
if err := decodeXMLSeekFirstValidRune(br); err != nil {
d.Fatalf("%s", err)
}
xd := xml.NewDecoder(br)
xd.Strict = false xd.Strict = false
var n xmlNode var n xmlNode
if err := xd.Decode(&n); err != nil { if err := xd.Decode(&n); err != nil {