Add a test to demonstrate huge memory usage due from having too many regexps

This commit is contained in:
Eugene Bujak 2018-10-04 00:20:53 +03:00
parent 8c76e17b1b
commit 3aac7e7bc9

View File

@ -1,9 +1,13 @@
package dnsfilter
import (
"archive/zip"
"bytes"
"io/ioutil"
"net/http"
"net/http/httptest"
"path"
"runtime/pprof"
"strings"
"testing"
"time"
@ -13,9 +17,175 @@ import (
"os"
"runtime"
"github.com/shirou/gopsutil/process"
"go.uber.org/goleak"
)
// first in file because it must be run first
func TestLotsOfRulesMemoryUsage(t *testing.T) {
start := getRSS()
trace("RSS before loading rules - %d kB\n", start/1024)
dumpMemProfile(_Func() + "1.pprof")
d := NewForTest()
defer d.Destroy()
err := loadTestRules(d)
if err != nil {
t.Error(err)
}
afterLoad := getRSS()
trace("RSS after loading rules - %d kB (%d kB diff)\n", afterLoad/1024, (afterLoad-start)/1024)
dumpMemProfile(_Func() + "2.pprof")
tests := []struct {
host string
match bool
}{
{"asdasdasd_adsajdasda_asdasdjashdkasdasdasdasd_adsajdasda_asdasdjashdkasd.thisistesthost.com", false},
{"asdasdasd_adsajdasda_asdasdjashdkasdasdasdasd_adsajdasda_asdasdjashdkasd.ad.doubleclick.net", true},
}
for _, testcase := range tests {
ret, err := d.CheckHost(testcase.host)
if err != nil {
t.Errorf("Error while matching host %s: %s", testcase.host, err)
}
if !ret.IsFiltered && ret.IsFiltered != testcase.match {
t.Errorf("Expected hostname %s to not match", testcase.host)
}
if ret.IsFiltered && ret.IsFiltered != testcase.match {
t.Errorf("Expected hostname %s to match", testcase.host)
}
}
afterMatch := getRSS()
trace("RSS after matching - %d kB (%d kB diff)\n", afterMatch/1024, (afterMatch-afterLoad)/1024)
dumpMemProfile(_Func() + "3.pprof")
}
func getRSS() uint64 {
proc, err := process.NewProcess(int32(os.Getpid()))
if err != nil {
panic(err)
}
minfo, err := proc.MemoryInfo()
return minfo.RSS
}
func dumpMemProfile(name string) {
runtime.GC()
f, err := os.Create(name)
if err != nil {
panic(err)
}
defer f.Close()
runtime.GC() // update the stats before writing them
err = pprof.WriteHeapProfile(f)
if err != nil {
panic(err)
}
}
const topHostsFilename = "../tests/top-1m.csv"
func fetchTopHostsFromNet() {
trace("Fetching top hosts from network")
resp, err := http.Get("http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip")
if err != nil {
panic(err)
}
defer resp.Body.Close()
trace("Reading zipfile body")
zipfile, err := ioutil.ReadAll(resp.Body)
if err != nil {
panic(err)
}
trace("Opening zipfile")
r, err := zip.NewReader(bytes.NewReader(zipfile), int64(len(zipfile)))
if err != nil {
panic(err)
}
if len(r.File) != 1 {
panic(fmt.Errorf("zipfile must have only one entry: %+v", r))
}
f := r.File[0]
trace("Unpacking file %s from zipfile", f.Name)
rc, err := f.Open()
if err != nil {
panic(err)
}
trace("Reading file %s contents", f.Name)
body, err := ioutil.ReadAll(rc)
if err != nil {
panic(err)
}
rc.Close()
trace("Writing file %s contents to disk", f.Name)
err = ioutil.WriteFile(topHostsFilename+".tmp", body, 0644)
if err != nil {
panic(err)
}
err = os.Rename(topHostsFilename+".tmp", topHostsFilename)
if err != nil {
panic(err)
}
}
func getTopHosts() {
// if file doesn't exist, fetch it
if _, err := os.Stat(topHostsFilename); os.IsNotExist(err) {
// file does not exist, fetch it
fetchTopHostsFromNet()
}
}
func TestLotsOfRulesLotsOfHostsMemoryUsage(t *testing.T) {
start := getRSS()
trace("RSS before loading rules - %d kB\n", start/1024)
dumpMemProfile(_Func() + "1.pprof")
d := NewForTest()
defer d.Destroy()
mustLoadTestRules(d)
trace("Have %d rules", d.Count())
afterLoad := getRSS()
trace("RSS after loading rules - %d kB (%d kB diff)\n", afterLoad/1024, (afterLoad-start)/1024)
dumpMemProfile(_Func() + "2.pprof")
getTopHosts()
hostnames, err := os.Open(topHostsFilename)
if err != nil {
t.Fatal(err)
}
defer hostnames.Close()
afterHosts := getRSS()
trace("RSS after loading hosts - %d kB (%d kB diff)\n", afterHosts/1024, (afterHosts-afterLoad)/1024)
dumpMemProfile(_Func() + "2.pprof")
{
scanner := bufio.NewScanner(hostnames)
for scanner.Scan() {
line := scanner.Text()
records := strings.Split(line, ",")
ret, err := d.CheckHost(records[1] + "." + records[1])
if err != nil {
t.Error(err)
}
if ret.Reason.Matched() {
// log.Printf("host \"%s\" mathed. Rule \"%s\", reason: %v", host, ret.Rule, ret.Reason)
}
}
}
afterMatch := getRSS()
trace("RSS after matching - %d kB (%d kB diff)\n", afterMatch/1024, (afterMatch-afterLoad)/1024)
dumpMemProfile(_Func() + "3.pprof")
}
func TestRuleToRegexp(t *testing.T) {
tests := []struct {
rule string
@ -114,6 +284,13 @@ func loadTestRules(d *Dnsfilter) error {
return err
}
func mustLoadTestRules(d *Dnsfilter) {
err := loadTestRules(d)
if err != nil {
panic(err)
}
}
func NewForTest() *Dnsfilter {
d := New()
purgeCaches()
@ -126,7 +303,9 @@ func NewForTest() *Dnsfilter {
func TestSanityCheck(t *testing.T) {
d := NewForTest()
defer d.Destroy()
d.checkAddRule(t, "||doubleclick.net^")
d.checkMatch(t, "doubleclick.net")
d.checkMatch(t, "www.doubleclick.net")
d.checkMatchEmpty(t, "nodoubleclick.net")
d.checkMatchEmpty(t, "doubleclick.net.ru")
@ -134,6 +313,72 @@ func TestSanityCheck(t *testing.T) {
d.checkAddRuleFail(t, "lkfaojewhoawehfwacoefawr$@#$@3413841384")
}
func TestSuffixMatching1(t *testing.T) {
d := NewForTest()
defer d.Destroy()
d.checkAddRule(t, "||doubleclick.net^")
d.checkMatch(t, "doubleclick.net")
d.checkMatch(t, "www.doubleclick.net")
d.checkMatchEmpty(t, "nodoubleclick.net")
d.checkMatchEmpty(t, "doubleclick.net.ru")
}
func TestSuffixMatching2(t *testing.T) {
d := NewForTest()
defer d.Destroy()
d.checkAddRule(t, "|doubleclick.net^")
d.checkMatch(t, "doubleclick.net")
d.checkMatchEmpty(t, "www.doubleclick.net")
d.checkMatchEmpty(t, "nodoubleclick.net")
d.checkMatchEmpty(t, "doubleclick.net.ru")
}
func TestSuffixMatching3(t *testing.T) {
d := NewForTest()
defer d.Destroy()
d.checkAddRule(t, "doubleclick.net^")
d.checkMatch(t, "doubleclick.net")
d.checkMatch(t, "www.doubleclick.net")
d.checkMatch(t, "nodoubleclick.net")
d.checkMatchEmpty(t, "doubleclick.net.ru")
}
func TestSuffixMatching4(t *testing.T) {
d := NewForTest()
defer d.Destroy()
d.checkAddRule(t, "*doubleclick.net^")
d.checkMatch(t, "doubleclick.net")
d.checkMatch(t, "www.doubleclick.net")
d.checkMatch(t, "nodoubleclick.net")
d.checkMatchEmpty(t, "doubleclick.net.ru")
}
func TestSuffixMatching5(t *testing.T) {
d := NewForTest()
defer d.Destroy()
d.checkAddRule(t, "|*doubleclick.net^")
d.checkMatch(t, "doubleclick.net")
d.checkMatch(t, "www.doubleclick.net")
d.checkMatch(t, "nodoubleclick.net")
d.checkMatchEmpty(t, "doubleclick.net.ru")
}
func TestSuffixMatching6(t *testing.T) {
d := NewForTest()
defer d.Destroy()
d.checkAddRule(t, "||*doubleclick.net^")
d.checkMatch(t, "doubleclick.net")
d.checkMatch(t, "www.doubleclick.net")
d.checkMatch(t, "nodoubleclick.net")
d.checkMatchEmpty(t, "doubleclick.net.ru")
}
func TestCount(t *testing.T) {
d := NewForTest()
defer d.Destroy()
@ -219,46 +464,6 @@ func TestAddRuleFail(t *testing.T) {
d.checkAddRuleFail(t, "lkfaojewhoawehfwacoefawr$@#$@3413841384")
}
func TestLotsOfRulesMemoryUsage(t *testing.T) {
var start, afterLoad, end runtime.MemStats
runtime.GC()
runtime.ReadMemStats(&start)
fmt.Printf("Memory usage before loading rules - %d kB alloc, %d kB sys\n", start.Alloc/1024, start.Sys/1024)
d := NewForTest()
defer d.Destroy()
err := loadTestRules(d)
if err != nil {
t.Error(err)
}
runtime.GC()
runtime.ReadMemStats(&afterLoad)
fmt.Printf("Memory usage after loading rules - %d kB alloc, %d kB sys\n", afterLoad.Alloc/1024, afterLoad.Sys/1024)
tests := []struct {
host string
match bool
}{
{"asdasdasd_adsajdasda_asdasdjashdkasdasdasdasd_adsajdasda_asdasdjashdkasd.thisistesthost.com", false},
{"asdasdasd_adsajdasda_asdasdjashdkasdasdasdasd_adsajdasda_asdasdjashdkasd.ad.doubleclick.net", true},
}
for _, testcase := range tests {
ret, err := d.CheckHost(testcase.host)
if err != nil {
t.Errorf("Error while matching host %s: %s", testcase.host, err)
}
if !ret.IsFiltered && ret.IsFiltered != testcase.match {
t.Errorf("Expected hostname %s to not match", testcase.host)
}
if ret.IsFiltered && ret.IsFiltered != testcase.match {
t.Errorf("Expected hostname %s to match", testcase.host)
}
}
runtime.GC()
runtime.ReadMemStats(&end)
fmt.Printf("Memory usage after matching - %d kB alloc, %d kB sys\n", afterLoad.Alloc/1024, afterLoad.Sys/1024)
}
func TestSafeBrowsing(t *testing.T) {
testCases := []string{
"",
@ -571,6 +776,80 @@ func BenchmarkLotsOfRulesMatchParallel(b *testing.B) {
})
}
func BenchmarkLotsOfRulesLotsOfHosts(b *testing.B) {
d := NewForTest()
defer d.Destroy()
mustLoadTestRules(d)
getTopHosts()
hostnames, err := os.Open(topHostsFilename)
if err != nil {
b.Fatal(err)
}
defer hostnames.Close()
scanner := bufio.NewScanner(hostnames)
b.ResetTimer()
for n := 0; n < b.N; n++ {
havedata := scanner.Scan()
if !havedata {
hostnames.Seek(0, 0)
scanner = bufio.NewScanner(hostnames)
havedata = scanner.Scan()
}
if !havedata {
b.Fatal(scanner.Err())
}
line := scanner.Text()
records := strings.Split(line, ",")
ret, err := d.CheckHost(records[1] + "." + records[1])
if err != nil {
b.Error(err)
}
if ret.Reason.Matched() {
// log.Printf("host \"%s\" mathed. Rule \"%s\", reason: %v", host, ret.Rule, ret.Reason)
}
}
}
func BenchmarkLotsOfRulesLotsOfHostsParallel(b *testing.B) {
d := NewForTest()
defer d.Destroy()
mustLoadTestRules(d)
getTopHosts()
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
hostnames, err := os.Open(topHostsFilename)
if err != nil {
b.Fatal(err)
}
defer hostnames.Close()
scanner := bufio.NewScanner(hostnames)
for pb.Next() {
havedata := scanner.Scan()
if !havedata {
hostnames.Seek(0, 0)
scanner = bufio.NewScanner(hostnames)
havedata = scanner.Scan()
}
if !havedata {
b.Fatal(scanner.Err())
}
line := scanner.Text()
records := strings.Split(line, ",")
ret, err := d.CheckHost(records[1] + "." + records[1])
if err != nil {
b.Error(err)
}
if ret.Reason.Matched() {
// log.Printf("host \"%s\" mathed. Rule \"%s\", reason: %v", host, ret.Rule, ret.Reason)
}
}
})
}
func BenchmarkSafeBrowsing(b *testing.B) {
d := NewForTest()
defer d.Destroy()
@ -645,8 +924,12 @@ func TestMain(m *testing.M) {
// helper functions for debugging and testing
//
func purgeCaches() {
safebrowsingCache.Purge()
parentalCache.Purge()
if safebrowsingCache != nil {
safebrowsingCache.Purge()
}
if parentalCache != nil {
parentalCache.Purge()
}
}
func _Func() string {