diff --git a/querylog/qlog_file.go b/querylog/qlog_file.go index 83f0d30d..63557098 100644 --- a/querylog/qlog_file.go +++ b/querylog/qlog_file.go @@ -6,12 +6,14 @@ import ( "sync" "time" + "github.com/AdguardTeam/golibs/log" + "github.com/pkg/errors" ) var ErrSeekNotFound = errors.New("Seek not found the record") -const bufferSize = 64 * 1024 // 64 KB is the buffer size +const bufferSize = 256 * 1024 // 256 KB is the buffer size type QLogFile struct { file *os.File // the query log file @@ -48,12 +50,16 @@ func NewQLogFile(path string) (*QLogFile, error) { // it shifts seek position to 3/4 of the file. Otherwise, to 1/4 of the file. // 5. It performs the search again, every time the search scope is narrowed twice. // -// It returns the position of the line with the timestamp we were looking for. +// It returns the position of the the line with the timestamp we were looking for +// so that when we call "ReadNext" this line was returned. // If we could not find it, it returns 0 and ErrSeekNotFound func (q *QLogFile) Seek(timestamp uint64) (int64, error) { q.lock.Lock() defer q.lock.Unlock() + // Empty the buffer + q.buffer = nil + // First of all, check the file size fileInfo, err := q.file.Stat() if err != nil { @@ -61,38 +67,64 @@ func (q *QLogFile) Seek(timestamp uint64) (int64, error) { } // Define the search scope - start := int64(0) - end := fileInfo.Size() - probe := (end - start) / 2 + start := int64(0) // start of the search interval (position in the file) + end := fileInfo.Size() // end of the search interval (position in the file) + probe := (end - start) / 2 // probe -- approximate index of the line we'll try to check + var line string + var lineIdx int64 // index of the probe line in the file + var lastProbeLineIdx int64 // index of the last probe line - // Get the line - line, _, err := q.readProbeLine(probe) - if err != nil { - return 0, err + // Count seek depth in order to detect mistakes + // If depth is too large, we should stop the search + depth := 0 + + for { + // Get the line at the specified position + line, lineIdx, err = q.readProbeLine(probe) + if err != nil { + return 0, err + } + + // Get the timestamp from the query log record + ts := q.readTimestamp(line) + + if ts == 0 { + return 0, ErrSeekNotFound + } + + if ts == timestamp { + // Hurray, returning the result + break + } + + if lastProbeLineIdx == lineIdx { + // If we're testing the same line twice then most likely + // the scope is too narrow and we won't find anything anymore + return 0, ErrSeekNotFound + } + + // Narrow the scope and repeat the search + if ts > timestamp { + // If the timestamp we're looking for is OLDER than what we found + // Then the line is somewhere on the LEFT side from the current probe position + end = probe + probe = start + (end-start)/2 + } else { + // If the timestamp we're looking for is NEWER than what we found + // Then the line is somewhere on the RIGHT side from the current probe position + start = probe + probe = start + (end-start)/2 + } + + depth++ + if depth >= 100 { + log.Error("Seek depth is too high, aborting. File %s, ts %v", q.file.Name(), timestamp) + return 0, ErrSeekNotFound + } } - // Get the timestamp from the query log record - ts := q.readTimestamp(line) - - if ts == timestamp { - // Hurray, returning the result - return probe, nil - } - - // Narrow the scope and repeat the search - if ts > timestamp { - end := probe - probe = (end - start) / 2 - } else { - start := probe - probe = (end - start) / 2 - } - - // TODO: temp - q.position = probe - - // TODO: Check start/stop/probe values and loop this - return 0, ErrSeekNotFound + q.position = lineIdx + int64(len(line)) + return q.position, nil } // SeekStart changes the current position to the end of the file @@ -102,6 +134,9 @@ func (q *QLogFile) SeekStart() (int64, error) { q.lock.Lock() defer q.lock.Unlock() + // Empty the buffer + q.buffer = nil + // First of all, check the file size fileInfo, err := q.file.Stat() if err != nil { @@ -111,7 +146,6 @@ func (q *QLogFile) SeekStart() (int64, error) { // Place the position to the very end of file q.position = fileInfo.Size() - 1 if q.position < 0 { - // TODO: test empty file q.position = 0 } return q.position, nil @@ -160,12 +194,13 @@ func (q *QLogFile) Close() error { // 4. read the line from the buffer func (q *QLogFile) readNextLine(position int64) (string, int64, error) { relativePos := position - q.bufferStart - if q.buffer == nil || relativePos < maxEntrySize { + if q.buffer == nil || (relativePos < maxEntrySize && q.bufferStart != 0) { // Time to re-init the buffer err := q.initBuffer(position) if err != nil { return "", 0, err } + relativePos = position - q.bufferStart } // Look for the end of the prev line @@ -201,7 +236,6 @@ func (q *QLogFile) initBuffer(position int64) error { q.buffer = make([]byte, bufferSize) } q.bufferLen, err = q.file.Read(q.buffer) - // TODO: validate bufferLen if err != nil { return err } @@ -218,7 +252,6 @@ func (q *QLogFile) readProbeLine(position int64) (string, int64, error) { seekPosition := int64(0) relativePos := position // position relative to the buffer we're going to read if (position - maxEntrySize) > 0 { - // TODO: cover this case in tests seekPosition = position - maxEntrySize relativePos = maxEntrySize } @@ -267,12 +300,12 @@ func (q *QLogFile) readTimestamp(str string) uint64 { } if len(val) == 0 { - // TODO: log + log.Error("Couldn't find timestamp in %s: %s", q.file.Name(), str) return 0 } tm, err := time.Parse(time.RFC3339, val) if err != nil { - // TODO: log + log.Error("Couldn't parse timestamp in %s: %s", q.file.Name(), val) return 0 } return uint64(tm.UnixNano()) diff --git a/querylog/qlog_file_test.go b/querylog/qlog_file_test.go index 6ba5780f..e9bc70e8 100644 --- a/querylog/qlog_file_test.go +++ b/querylog/qlog_file_test.go @@ -14,15 +14,157 @@ import ( ) func TestQLogFileEmpty(t *testing.T) { - // TODO: test empty file + testDir := prepareTestDir() + defer func() { _ = os.RemoveAll(testDir) }() + testFile := prepareTestFile(testDir, 0) + + // create the new QLogFile instance + q, err := NewQLogFile(testFile) + assert.Nil(t, err) + assert.NotNil(t, q) + + // seek to the start + pos, err := q.SeekStart() + assert.Nil(t, err) + assert.Equal(t, int64(0), pos) + + // try reading anyway + line, err := q.ReadNext() + assert.Equal(t, io.EOF, err) + assert.Equal(t, "", line) } func TestQLogFileLarge(t *testing.T) { - // TODO: test reading large file + // should be large enough + count := 50000 + + testDir := prepareTestDir() + defer func() { _ = os.RemoveAll(testDir) }() + testFile := prepareTestFile(testDir, count) + + // create the new QLogFile instance + q, err := NewQLogFile(testFile) + assert.Nil(t, err) + assert.NotNil(t, q) + + // seek to the start + pos, err := q.SeekStart() + assert.Nil(t, err) + assert.NotEqual(t, int64(0), pos) + + read := 0 + var line string + for err == nil { + line, err = q.ReadNext() + if err == nil { + assert.True(t, len(line) > 0) + read += 1 + } + } + + assert.Equal(t, count, read) + assert.Equal(t, io.EOF, err) } -func TestQLogFileSeek(t *testing.T) { - // TODO: test seek method on a small file +func TestQLogFileSeekLargeFile(t *testing.T) { + // more or less big file + count := 10000 + + testDir := prepareTestDir() + defer func() { _ = os.RemoveAll(testDir) }() + testFile := prepareTestFile(testDir, count) + + // create the new QLogFile instance + q, err := NewQLogFile(testFile) + assert.Nil(t, err) + assert.NotNil(t, q) + + // CASE 1: NOT TOO OLD LINE + testSeekLine(t, q, 300) + + // CASE 2: OLD LINE + testSeekLine(t, q, count-300) + + // CASE 3: FIRST LINE + testSeekLine(t, q, 0) + + // CASE 4: LAST LINE + testSeekLine(t, q, count) + + // CASE 5: Seek non-existent (too low) + _, err = q.Seek(123) + assert.NotNil(t, err) + + // CASE 6: Seek non-existent (too high) + ts, _ := time.Parse(time.RFC3339, "2100-01-02T15:04:05Z07:00") + _, err = q.Seek(uint64(ts.UnixNano())) + assert.NotNil(t, err) +} + +func TestQLogFileSeekSmallFile(t *testing.T) { + // more or less big file + count := 10 + + testDir := prepareTestDir() + defer func() { _ = os.RemoveAll(testDir) }() + testFile := prepareTestFile(testDir, count) + + // create the new QLogFile instance + q, err := NewQLogFile(testFile) + assert.Nil(t, err) + assert.NotNil(t, q) + + // CASE 1: NOT TOO OLD LINE + testSeekLine(t, q, 2) + + // CASE 2: OLD LINE + testSeekLine(t, q, count-2) + + // CASE 3: FIRST LINE + testSeekLine(t, q, 0) + + // CASE 4: LAST LINE + testSeekLine(t, q, count) + + // CASE 5: Seek non-existent (too low) + _, err = q.Seek(123) + assert.NotNil(t, err) + + // CASE 6: Seek non-existent (too high) + ts, _ := time.Parse(time.RFC3339, "2100-01-02T15:04:05Z07:00") + _, err = q.Seek(uint64(ts.UnixNano())) + assert.NotNil(t, err) +} + +func testSeekLine(t *testing.T, q *QLogFile, lineNumber int) { + line, err := getQLogLine(q, lineNumber) + assert.Nil(t, err) + ts := q.readTimestamp(line) + assert.NotEqual(t, uint64(0), ts) + + // try seeking to that line now + pos, err := q.Seek(ts) + assert.Nil(t, err) + assert.NotEqual(t, int64(0), pos) + + testLine, err := q.ReadNext() + assert.Nil(t, err) + assert.Equal(t, line, testLine) +} + +func getQLogLine(q *QLogFile, lineNumber int) (string, error) { + _, err := q.SeekStart() + if err != nil { + return "", err + } + + for i := 1; i < lineNumber; i++ { + _, err := q.ReadNext() + if err != nil { + return "", err + } + } + return q.ReadNext() } // Check adding and loading (with filtering) entries from disk and memory