Pull request 1921: 6003-relax-scan-limit

Updates #6003.

Squashed commit of the following:

commit 1cc42303c29edc621802fc182ccb5701e412f099
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date:   Thu Jul 13 13:47:41 2023 +0300

    all: fix chlog

commit e835084c7aac6384ea7b0886e6b3b1d614438baa
Author: Ainar Garipov <A.Garipov@AdGuard.COM>
Date:   Thu Jul 13 13:40:45 2023 +0300

    rulelist: imp longer line handling
This commit is contained in:
Ainar Garipov 2023-07-13 13:57:32 +03:00
parent de63eeabfa
commit f22d893845
5 changed files with 53 additions and 23 deletions

View File

@ -23,11 +23,18 @@ See also the [v0.107.35 GitHub milestone][ms-v0.107.35].
NOTE: Add new changes BELOW THIS COMMENT.
-->
### Fixed
- `bufio.Scanner: token too long` errors when trying to add filtering-rule lists
with lines over 1024 bytes long ([#6003]).
### Removed
- Default exposure of the non-standard ports 784 and 8853 for DNS-over-QUIC in
the `Dockerfile`.
[#6003]: https://github.com/AdguardTeam/AdGuardHome/issues/6003
<!--
NOTE: Add new changes ABOVE THIS COMMENT.
-->

View File

@ -943,7 +943,7 @@ func New(c *Config, blockFilters []Filter) (d *DNSFilter, err error) {
d = &DNSFilter{
bufPool: &sync.Pool{
New: func() (buf any) {
bufVal := make([]byte, rulelist.MaxRuleLen)
bufVal := make([]byte, rulelist.DefaultRuleBufSize)
return &bufVal
},

View File

@ -7,6 +7,7 @@ import (
"hash/crc32"
"io"
"unicode"
"unicode/utf8"
"github.com/AdguardTeam/golibs/errors"
)
@ -48,19 +49,29 @@ type ParseResult struct {
// nil.
func (p *Parser) Parse(dst io.Writer, src io.Reader, buf []byte) (r *ParseResult, err error) {
s := bufio.NewScanner(src)
s.Buffer(buf, MaxRuleLen)
lineIdx := 0
// Don't use [DefaultRuleBufSize] as the maximum size, since some
// filtering-rule lists compressed by e.g. HostlistsCompiler can have very
// large lines. The buffer optimization still works for the more common
// case of reasonably-sized lines.
//
// See https://github.com/AdguardTeam/AdGuardHome/issues/6003.
s.Buffer(buf, bufio.MaxScanTokenSize)
// Use a one-based index for lines and columns, since these errors end up in
// the frontend, and users are more familiar with one-based line and column
// indexes.
lineNum := 1
for s.Scan() {
var n int
n, err = p.processLine(dst, s.Bytes(), lineIdx)
n, err = p.processLine(dst, s.Bytes(), lineNum)
p.written += n
if err != nil {
// Don't wrap the error, because it's informative enough as is.
return p.result(), err
}
lineIdx++
lineNum++
}
r = p.result()
@ -81,7 +92,7 @@ func (p *Parser) result() (r *ParseResult) {
// processLine processes a single line. It may write to dst, and if it does, n
// is the number of bytes written.
func (p *Parser) processLine(dst io.Writer, line []byte, lineIdx int) (n int, err error) {
func (p *Parser) processLine(dst io.Writer, line []byte, lineNum int) (n int, err error) {
trimmed := bytes.TrimSpace(line)
if p.written == 0 && isHTMLLine(trimmed) {
return 0, ErrHTML
@ -94,10 +105,13 @@ func (p *Parser) processLine(dst io.Writer, line []byte, lineIdx int) (n int, er
badIdx, isRule = p.parseLineTitle(trimmed)
}
if badIdx != -1 {
badRune, _ := utf8.DecodeRune(trimmed[badIdx:])
return 0, fmt.Errorf(
"line at index %d: character at index %d: non-printable character",
lineIdx,
badIdx+bytes.Index(line, trimmed),
"line %d: character %d: non-printable character %q",
lineNum,
badIdx+bytes.Index(line, trimmed)+1,
badRune,
)
}

View File

@ -17,6 +17,9 @@ import (
func TestParser_Parse(t *testing.T) {
t.Parallel()
longRule := strings.Repeat("a", rulelist.DefaultRuleBufSize+1) + "\n"
tooLongRule := strings.Repeat("a", bufio.MaxScanTokenSize+1) + "\n"
testCases := []struct {
name string
in string
@ -80,20 +83,28 @@ func TestParser_Parse(t *testing.T) {
testRuleTextBlocked +
">>>\x7F<<<",
wantDst: testRuleTextBlocked,
wantErrMsg: "line at index 2: " +
"character at index 3: " +
"non-printable character",
wantErrMsg: "line 3: " +
"character 4: " +
"non-printable character '\\x7f'",
wantTitle: "Test Title",
wantRulesNum: 1,
wantWritten: len(testRuleTextBlocked),
}, {
name: "too_long",
in: strings.Repeat("a", rulelist.MaxRuleLen+1),
in: tooLongRule,
wantDst: "",
wantErrMsg: "scanning filter contents: " + bufio.ErrTooLong.Error(),
wantErrMsg: "scanning filter contents: bufio.Scanner: token too long",
wantTitle: "",
wantRulesNum: 0,
wantWritten: 0,
}, {
name: "longer_than_default",
in: longRule,
wantDst: longRule,
wantErrMsg: "",
wantTitle: "",
wantRulesNum: 1,
wantWritten: len(longRule),
}, {
name: "bad_tab_and_comment",
in: testRuleTextBadTab,
@ -118,7 +129,7 @@ func TestParser_Parse(t *testing.T) {
t.Parallel()
dst := &bytes.Buffer{}
buf := make([]byte, rulelist.MaxRuleLen)
buf := make([]byte, rulelist.DefaultRuleBufSize)
p := rulelist.NewParser()
r, err := p.Parse(dst, strings.NewReader(tc.in), buf)
@ -145,7 +156,7 @@ func TestParser_Parse_writeError(t *testing.T) {
return 1, errors.Error("test error")
},
}
buf := make([]byte, rulelist.MaxRuleLen)
buf := make([]byte, rulelist.DefaultRuleBufSize)
p := rulelist.NewParser()
r, err := p.Parse(dst, strings.NewReader(testRuleTextBlocked), buf)
@ -165,7 +176,7 @@ func TestParser_Parse_checksums(t *testing.T) {
"# Another comment.\n"
)
buf := make([]byte, rulelist.MaxRuleLen)
buf := make([]byte, rulelist.DefaultRuleBufSize)
p := rulelist.NewParser()
r, err := p.Parse(&bytes.Buffer{}, strings.NewReader(withoutComments), buf)
@ -192,7 +203,7 @@ var (
func BenchmarkParser_Parse(b *testing.B) {
dst := &bytes.Buffer{}
src := strings.NewReader(strings.Repeat(testRuleTextBlocked, 1000))
buf := make([]byte, rulelist.MaxRuleLen)
buf := make([]byte, rulelist.DefaultRuleBufSize)
p := rulelist.NewParser()
b.ReportAllocs()

View File

@ -4,8 +4,6 @@
// TODO(a.garipov): Expand.
package rulelist
// MaxRuleLen is the maximum length of a line with a filtering rule, in bytes.
//
// TODO(a.garipov): Consider changing this to a rune length, like AdGuardDNS
// does.
const MaxRuleLen = 1024
// DefaultRuleBufSize is the default length of a buffer used to read a line with
// a filtering rule, in bytes.
const DefaultRuleBufSize = 1024