mirror of
https://github.com/wader/fq.git
synced 2024-12-27 15:42:07 +03:00
toml,xml: Fail fast on invalid content
encoding/xml and github.com/BurntSushi/toml both reads a lot before detecting that it can't decode. Now we instead read one UTF-8 and make sure it's valid xml or toml. Should speed up probing Related to #586 bigzero-zip.zip
This commit is contained in:
parent
aaf60ec250
commit
56edb59e83
6
format/toml/testdata/toml.fqtest
vendored
6
format/toml/testdata/toml.fqtest
vendored
@ -63,9 +63,9 @@ true = true
|
||||
toml: top-level values must be Go maps or structs
|
||||
----
|
||||
|
||||
error at position 0x0: root object has no values
|
||||
error at position 0x0: EOF
|
||||
----
|
||||
$ fq -n '"" | from_toml'
|
||||
$ fq -n '" " | from_toml'
|
||||
exitcode: 5
|
||||
stderr:
|
||||
error: error at position 0x0: root object has no values
|
||||
error: error at position 0x1: root object has no values
|
||||
|
@ -1,8 +1,12 @@
|
||||
package toml
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"embed"
|
||||
"fmt"
|
||||
"io"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/wader/fq/format"
|
||||
@ -29,11 +33,38 @@ func init() {
|
||||
interp.RegisterFunc0("to_toml", toTOML)
|
||||
}
|
||||
|
||||
func decodeTOMLSeekFirstValidRune(br io.ReadSeeker) error {
|
||||
buf := bufio.NewReader(br)
|
||||
r, sz, err := buf.ReadRune()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := br.Seek(0, io.SeekStart); err != nil {
|
||||
return err
|
||||
}
|
||||
if r == utf8.RuneError && sz == 1 {
|
||||
return fmt.Errorf("invalid UTF-8")
|
||||
}
|
||||
if r == 0 {
|
||||
return fmt.Errorf("TOML can't contain null bytes")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func decodeTOML(d *decode.D) any {
|
||||
br := d.RawLen(d.Len())
|
||||
bbr := d.RawLen(d.Len())
|
||||
var r any
|
||||
|
||||
if _, err := toml.NewDecoder(bitio.NewIOReader(br)).Decode(&r); err != nil {
|
||||
br := bitio.NewIOReadSeeker(bbr)
|
||||
|
||||
// github.com/BurntSushi/toml currently does a ReadAll which might be expensive
|
||||
// try find invalid toml (null bytes etc) faster and more efficient
|
||||
if err := decodeTOMLSeekFirstValidRune(br); err != nil {
|
||||
d.Fatalf("%s", err)
|
||||
}
|
||||
|
||||
if _, err := toml.NewDecoder(br).Decode(&r); err != nil {
|
||||
d.Fatalf("%s", err)
|
||||
}
|
||||
var s scalar.Any
|
||||
|
@ -7,15 +7,18 @@ package xml
|
||||
// TODO: rewrite ns stack
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"embed"
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"fmt"
|
||||
"html"
|
||||
"io"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/wader/fq/format"
|
||||
"github.com/wader/fq/internal/gojqex"
|
||||
@ -247,15 +250,54 @@ func fromXMLToArray(n xmlNode) any {
|
||||
return f(n, nil)
|
||||
}
|
||||
|
||||
// from golang encoding/xml, copyright 2009 The Go Authors
|
||||
// the Char production of https://www.xml.com/axml/testaxml.htm,
|
||||
// Section 2.2 Characters.
|
||||
func isInCharacterRange(r rune) (inrange bool) {
|
||||
return r == 0x09 ||
|
||||
r == 0x0A ||
|
||||
r == 0x0D ||
|
||||
r >= 0x20 && r <= 0xD7FF ||
|
||||
r >= 0xE000 && r <= 0xFFFD ||
|
||||
r >= 0x10000 && r <= 0x10FFFF
|
||||
}
|
||||
|
||||
func decodeXMLSeekFirstValidRune(br io.ReadSeeker) error {
|
||||
buf := bufio.NewReader(br)
|
||||
r, sz, err := buf.ReadRune()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := br.Seek(0, io.SeekStart); err != nil {
|
||||
return err
|
||||
}
|
||||
if r == utf8.RuneError && sz == 1 {
|
||||
return fmt.Errorf("invalid UTF-8")
|
||||
}
|
||||
if !isInCharacterRange(r) {
|
||||
return fmt.Errorf("illegal character code %U", r)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func decodeXML(d *decode.D) any {
|
||||
var xi format.XMLIn
|
||||
d.ArgAs(&xi)
|
||||
|
||||
br := d.RawLen(d.Len())
|
||||
bbr := d.RawLen(d.Len())
|
||||
var r any
|
||||
var err error
|
||||
|
||||
xd := xml.NewDecoder(bitio.NewIOReader(br))
|
||||
br := bitio.NewIOReadSeeker(bbr)
|
||||
|
||||
// this reimplements same xml rune range validation as ecoding/xml but fails faster
|
||||
if err := decodeXMLSeekFirstValidRune(br); err != nil {
|
||||
d.Fatalf("%s", err)
|
||||
}
|
||||
|
||||
xd := xml.NewDecoder(br)
|
||||
|
||||
xd.Strict = false
|
||||
var n xmlNode
|
||||
if err := xd.Decode(&n); err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user