1
1
mirror of https://github.com/walles/moar.git synced 2024-11-30 02:34:13 +03:00
Commit Graph

43 Commits

Author SHA1 Message Date
Johan Walles
5e847f4cd7 Improve large file reading performance by 10%
By reading the file in two passes.

The first pass just counts the lines in the file.

The second pass starts by preallocating the correct number of lines, and
then reads the file.

The whole time saving comes from not having to dynamically resize the
lines slice while reading the file.
2021-05-03 20:33:25 +02:00
Johan Walles
97884d1805 Add a large file benchmark 2021-05-03 19:15:43 +02:00
Johan Walles
c8bc7b0161 Don't highlight files larger than 1MB
They make the highlighter just run in the background slowly eating up
memory.
2021-04-24 16:14:30 +02:00
Johan Walles
095693eb0b Wait for highlighting to complete in pager tests 2021-04-22 19:26:29 +02:00
Johan Walles
515000eec8 Add file reading benchmark 2021-04-22 07:56:10 +02:00
Johan Walles
c8e2d90ec5 Initial tests of highlighted lines count 2021-04-21 22:57:08 +02:00
Johan Walles
e88aa3f43e Skip xz compressed file in one more place 2021-04-15 14:57:56 +02:00
Johan Walles
9b86ae7b19 Fix long-line test
On Windows the sample file gets cloned with Windows newlines, breaking
the file-size assumption.

And having a constant here makes the test simpler and better anyway,
Windows or not.
2021-04-15 14:51:51 +02:00
Johan Walles
e502b68bc9 Only test xz decompression if xz binary is available 2021-04-15 13:24:24 +02:00
Johan Walles
4bba75645b Use our own empty file rather than /dev/null
Needed for unit testing on Windows.
2021-04-15 13:20:00 +02:00
Johan Walles
d5827bbc99 Parse lines on demand and only once
This improves line processing performance by 40%.

Fixes #36.
diff --git m/ansiTokenizer.go m/ansiTokenizer.go
index d991e23..056a227 100644
--- m/ansiTokenizer.go
+++ m/ansiTokenizer.go
@@ -23,6 +23,44 @@ type Token struct {
 	Style tcell.Style
 }

+// A Line represents a line of text that can / will be paged
+type Line struct {
+	raw    *string
+	plain  *string
+	tokens []Token
+}
+
+// NewLine creates a new Line from a (potentially ANSI / man page formatted) string
+func NewLine(raw string) *Line {
+	return &Line{
+		raw:    &raw,
+		plain:  nil,
+		tokens: nil,
+	}
+}
+
+// Tokens returns a representation of the string split into styled tokens
+func (line *Line) Tokens() []Token {
+	line.parse()
+	return line.tokens
+}
+
+// Plain returns a plain text representation of the initial string
+func (line *Line) Plain() string {
+	line.parse()
+	return *line.plain
+}
+
+func (line *Line) parse() {
+	if line.raw == nil {
+		// Already done
+		return
+	}
+
+	line.tokens, line.plain = tokensFromString(*line.raw)
+	line.raw = nil
+}
+
 // SetManPageFormatFromEnv parses LESS_TERMCAP_xx environment variables and
 // adapts the moar output accordingly.
 func SetManPageFormatFromEnv() {
diff --git m/pager.go m/pager.go
index 412e05b..98efa9a 100644
--- m/pager.go
+++ m/pager.go
@@ -111,7 +111,7 @@ func NewPager(r *Reader) *Pager {
 	}
 }

-func (p *Pager) _AddLine(fileLineNumber *int, maxPrefixLength int, screenLineNumber int, line string) {
+func (p *Pager) _AddLine(fileLineNumber *int, maxPrefixLength int, screenLineNumber int, line *Line) {
 	screenWidth, _ := p.screen.Size()

 	prefixLength := 0
@@ -138,7 +138,7 @@ func (p *Pager) _AddLine(fileLineNumber *int, maxPrefixLength int, screenLineNum
 func createScreenLine(
 	stringIndexAtColumnZero int,
 	screenColumnsCount int,
-	line string,
+	line *Line,
 	search *regexp.Regexp,
 ) []Token {
 	var returnMe []Token
@@ -152,14 +152,14 @@ func createScreenLine(
 		searchHitDelta = -1
 	}

-	tokens, plainString := tokensFromString(line)
-	if stringIndexAtColumnZero >= len(tokens) {
+	if stringIndexAtColumnZero >= len(line.Tokens()) {
 		// Nothing (more) to display, never mind
 		return returnMe
 	}

-	matchRanges := getMatchRanges(plainString, search)
-	for _, token := range tokens[stringIndexAtColumnZero:] {
+	plain := line.Plain()
+	matchRanges := getMatchRanges(&plain, search)
+	for _, token := range line.Tokens()[stringIndexAtColumnZero:] {
 		if len(returnMe) >= screenColumnsCount {
 			// We are trying to add a character to the right of the screen.
 			// Indicate that this line continues to the right.
@@ -232,7 +232,8 @@ func (p *Pager) _AddLines(spinner string) {
 		// This happens when we're done
 		eofSpinner = "---"
 	}
-	p._AddLine(nil, 0, screenLineNumber, _EofMarkerFormat+eofSpinner)
+	spinnerLine := NewLine(_EofMarkerFormat + eofSpinner)
+	p._AddLine(nil, 0, screenLineNumber, spinnerLine)

 	switch p.mode {
 	case _Searching:
@@ -329,8 +330,8 @@ func (p *Pager) _FindFirstHitLineOneBased(firstLineOneBased int, backwards bool)
 			return nil
 		}

-		_, lineText := tokensFromString(*line)
-		if p.searchPattern.MatchString(*lineText) {
+		lineText := line.Plain()
+		if p.searchPattern.MatchString(lineText) {
 			return &lineNumber
 		}

diff --git m/pager_test.go m/pager_test.go
index 65fa3c2..ce0f79b 100644
--- m/pager_test.go
+++ m/pager_test.go
@@ -265,13 +265,15 @@ func assertTokenRangesEqual(t *testing.T, actual []Token, expected []Token) {
 }

 func TestCreateScreenLineBase(t *testing.T) {
-	line := createScreenLine(0, 3, "", nil)
-	assert.Assert(t, len(line) == 0)
+	line := NewLine("")
+	screenLine := createScreenLine(0, 3, line, nil)
+	assert.Assert(t, len(screenLine) == 0)
 }

 func TestCreateScreenLineOverflowRight(t *testing.T) {
-	line := createScreenLine(0, 3, "012345", nil)
-	assertTokenRangesEqual(t, line, []Token{
+	line := NewLine("012345")
+	screenLine := createScreenLine(0, 3, line, nil)
+	assertTokenRangesEqual(t, screenLine, []Token{
 		createExpectedCell('0', tcell.StyleDefault),
 		createExpectedCell('1', tcell.StyleDefault),
 		createExpectedCell('>', tcell.StyleDefault.Reverse(true)),
@@ -279,8 +281,9 @@ func TestCreateScreenLineOverflowRight(t *testing.T) {
 }

 func TestCreateScreenLineUnderflowLeft(t *testing.T) {
-	line := createScreenLine(1, 3, "012", nil)
-	assertTokenRangesEqual(t, line, []Token{
+	line := NewLine("012")
+	screenLine := createScreenLine(1, 3, line, nil)
+	assertTokenRangesEqual(t, screenLine, []Token{
 		createExpectedCell('<', tcell.StyleDefault.Reverse(true)),
 		createExpectedCell('1', tcell.StyleDefault),
 		createExpectedCell('2', tcell.StyleDefault),
@@ -293,8 +296,9 @@ func TestCreateScreenLineSearchHit(t *testing.T) {
 		panic(err)
 	}

-	line := createScreenLine(0, 3, "abc", pattern)
-	assertTokenRangesEqual(t, line, []Token{
+	line := NewLine("abc")
+	screenLine := createScreenLine(0, 3, line, pattern)
+	assertTokenRangesEqual(t, screenLine, []Token{
 		createExpectedCell('a', tcell.StyleDefault),
 		createExpectedCell('b', tcell.StyleDefault.Reverse(true)),
 		createExpectedCell('c', tcell.StyleDefault),
@@ -307,8 +311,9 @@ func TestCreateScreenLineUtf8SearchHit(t *testing.T) {
 		panic(err)
 	}

-	line := createScreenLine(0, 3, "åäö", pattern)
-	assertTokenRangesEqual(t, line, []Token{
+	line := NewLine("åäö")
+	screenLine := createScreenLine(0, 3, line, pattern)
+	assertTokenRangesEqual(t, screenLine, []Token{
 		createExpectedCell('å', tcell.StyleDefault),
 		createExpectedCell('ä', tcell.StyleDefault.Reverse(true)),
 		createExpectedCell('ö', tcell.StyleDefault),
@@ -318,9 +323,10 @@ func TestCreateScreenLineUtf8SearchHit(t *testing.T) {
 func TestCreateScreenLineScrolledUtf8SearchHit(t *testing.T) {
 	pattern := regexp.MustCompile("ä")

-	line := createScreenLine(1, 4, "ååäö", pattern)
+	line := NewLine("ååäö")
+	screenLine := createScreenLine(1, 4, line, pattern)

-	assertTokenRangesEqual(t, line, []Token{
+	assertTokenRangesEqual(t, screenLine, []Token{
 		createExpectedCell('<', tcell.StyleDefault.Reverse(true)),
 		createExpectedCell('å', tcell.StyleDefault),
 		createExpectedCell('ä', tcell.StyleDefault.Reverse(true)),
@@ -331,9 +337,10 @@ func TestCreateScreenLineScrolledUtf8SearchHit(t *testing.T) {
 func TestCreateScreenLineScrolled2Utf8SearchHit(t *testing.T) {
 	pattern := regexp.MustCompile("ä")

-	line := createScreenLine(2, 4, "åååäö", pattern)
+	line := NewLine("åååäö")
+	screenLine := createScreenLine(2, 4, line, pattern)

-	assertTokenRangesEqual(t, line, []Token{
+	assertTokenRangesEqual(t, screenLine, []Token{
 		createExpectedCell('<', tcell.StyleDefault.Reverse(true)),
 		createExpectedCell('å', tcell.StyleDefault),
 		createExpectedCell('ä', tcell.StyleDefault.Reverse(true)),
diff --git m/reader.go m/reader.go
index 418c4c5..d47b710 100644
--- m/reader.go
+++ m/reader.go
@@ -29,7 +29,7 @@ import (
 //
 // This package provides query methods for the struct, no peeking!!
 type Reader struct {
-	lines   []string
+	lines   []*Line
 	name    *string
 	lock    *sync.Mutex
 	err     error
@@ -41,7 +41,7 @@ type Reader struct {

 // Lines contains a number of lines from the reader, plus metadata
 type Lines struct {
-	lines []string
+	lines []*Line

 	// One-based line number of the first line returned
 	firstLineOneBased int
@@ -136,7 +136,7 @@ func readStream(stream io.Reader, reader *Reader, fromFilter *exec.Cmd) {
 		}

 		reader.lock.Lock()
-		reader.lines = append(reader.lines, string(completeLine))
+		reader.lines = append(reader.lines, NewLine(string(completeLine)))
 		reader.lock.Unlock()

 		// This is how to do a non-blocking write to a channel:
@@ -172,7 +172,7 @@ func NewReaderFromStream(name string, reader io.Reader) *Reader {
 // If fromFilter is not nil this method will wait() for it,
 // and effectively takes over ownership for it.
 func newReaderFromStream(reader io.Reader, fromFilter *exec.Cmd) *Reader {
-	var lines []string
+	var lines []*Line
 	var lock = &sync.Mutex{}
 	done := make(chan bool, 1)

@@ -201,9 +201,11 @@ func newReaderFromStream(reader io.Reader, fromFilter *exec.Cmd) *Reader {
 // Moar in the bottom left corner of the screen.
 func NewReaderFromText(name string, text string) *Reader {
 	noExternalNewlines := strings.Trim(text, "\n")
-	lines := []string{}
+	lines := []*Line{}
 	if len(noExternalNewlines) > 0 {
-		lines = strings.Split(noExternalNewlines, "\n")
+		for _, line := range strings.Split(noExternalNewlines, "\n") {
+			lines = append(lines, NewLine(line))
+		}
 	}
 	done := make(chan bool, 1)
 	done <- true
@@ -380,7 +382,7 @@ func (r *Reader) GetLineCount() int {
 }

 // GetLine gets a line. If the requested line number is out of bounds, nil is returned.
-func (r *Reader) GetLine(lineNumberOneBased int) *string {
+func (r *Reader) GetLine(lineNumberOneBased int) *Line {
 	r.lock.Lock()
 	defer r.lock.Unlock()

@@ -390,7 +392,7 @@ func (r *Reader) GetLine(lineNumberOneBased int) *string {
 	if lineNumberOneBased > len(r.lines) {
 		return nil
 	}
-	return &r.lines[lineNumberOneBased-1]
+	return r.lines[lineNumberOneBased-1]
 }

 // GetLines gets the indicated lines from the input
diff --git m/reader_test.go m/reader_test.go
index 2ba7326..0e2aed2 100644
--- m/reader_test.go
+++ m/reader_test.go
@@ -158,8 +158,8 @@ func TestGetLongLine(t *testing.T) {
 	assert.Equal(t, len(lines.lines), 1)

 	line := lines.lines[0]
-	assert.Assert(t, strings.HasPrefix(line, "1 2 3 4"), "<%s>", line)
-	assert.Assert(t, strings.HasSuffix(line, "0123456789"), line)
+	assert.Assert(t, strings.HasPrefix(line.Plain(), "1 2 3 4"), "<%s>", line)
+	assert.Assert(t, strings.HasSuffix(line.Plain(), "0123456789"), line)

 	stat, err := os.Stat(file)
 	if err != nil {
@@ -168,7 +168,7 @@ func TestGetLongLine(t *testing.T) {
 	fileSize := stat.Size()

 	// The "+1" is because the Reader strips off the ending linefeed
-	assert.Equal(t, len(line)+1, int(fileSize))
+	assert.Equal(t, len(line.Plain())+1, int(fileSize))
 }

 func getReaderWithLineCount(totalLines int) *Reader {
@@ -219,7 +219,7 @@ func testCompressedFile(t *testing.T, filename string) {
 		panic(err)
 	}

-	assert.Equal(t, reader.GetLines(1, 5).lines[0], "This is a compressed file", "%s", filename)
+	assert.Equal(t, reader.GetLines(1, 5).lines[0].Plain(), "This is a compressed file", "%s", filename)
 }

 func TestCompressedFiles(t *testing.T) {

Change-Id: Id8671001ec7c1038e2df0b87a45d346a1f1dd663
2021-01-11 10:42:34 +01:00
Johan Walles
570d780bc2 NewReaderFromStream(): Make name mandatory
This makes the API somewhat simpler to use. To support not providing any
name we still allow the empty string as the name, and document that
thoroughly.
2020-12-30 19:00:03 +01:00
Johan Walles
2b18347022 Make a lot fewer functions public 2020-12-29 22:57:44 +01:00
Johan Walles
7f7b0107d5 Make the correct reader constructors public 2020-12-29 17:19:56 +01:00
Johan Walles
1ac14b1c23 Fix private-functions naming
It's start-with-lowercase, not start-with-underscore.
2020-12-29 17:08:54 +01:00
Johan Walles
ebf28a5cce Fix test running problem
By not highlighting text files.

Fixes #29.
2020-11-17 18:20:58 +01:00
Johan Walles
ac6fcadb12 Improve diagnostics on a test failure 2020-11-17 17:16:57 +01:00
Johan Walles
b2d01b4ad4 Test filter-stderr in error report 2020-03-28 10:10:38 +01:00
Johan Walles
b2cfdf63c3 Fix a broken test
Why isn't this caught by the compiler?

Change-Id: I70e6c0bfb48542a2ac13abd04adbd28e47535eb0
2019-11-19 17:30:41 +01:00
Johan Walles
d36cb1c561 Fix an off-by-one error in a test
Change-Id: I425bc33a611ba96b4d95676b6231e7bc46a87463
2019-11-19 16:58:01 +01:00
Johan Walles
9a2ab6df98 Add test for reading long lines
Change-Id: Ib24518c681f1ffc93192a985cd573fa37006d1f8
2019-11-19 16:53:17 +01:00
Johan Walles
b781d09a72 Add another test case
diff --git m/reader_test.go m/reader_test.go
index 176192b..bed1e33 100644
--- m/reader_test.go
+++ m/reader_test.go
@@ -30,6 +30,12 @@ func _TestGetLineCount(t *testing.T, reader *Reader) {
 	if err != nil {
 		t.Error("Error counting lines of", *reader.name, err)
 	}
+
+	if strings.HasSuffix(*reader.name, "/line-without-newline.txt") {
+		// "wc -l" thinks this file contains zero lines
+		fileLineCount = 1
+	}
+
 	if reader.GetLineCount() != fileLineCount {
 		t.Errorf("Got %d lines but expected %d: <%s>",
 			reader.GetLineCount(), fileLineCount, *reader.name)
diff --git sample-files/line-without-newline.txt sample-files/line-without-newline.txt
new file mode 100644
index 0000000..2260c57
--- /dev/null
+++ sample-files/line-without-newline.txt
@@ -0,0 +1 @@
+This file contains no newlines
\ No newline at end of file

Change-Id: Ic2801ce3477a7afd4537d340385c884c5f2b7438
2019-11-19 14:47:44 +01:00
Johan Walles
8fbf3dfaa7 Report file IO problems even for filtered files
Before this change, trying to open a filterable file that didn't exist,
like "REAXDME.md" for example, would result in us displaying an empty
pager.

With this change in place, that now results in an error message.
2019-07-25 07:46:58 +02:00
Johan Walles
ebba21a409 Dodge corner case issue
If call "highlight" on an empty file the output will have a line.

We don't really care about that.

This change renames empty.txt (which highlight wants to highlight) into
just "empty", which highlight will not try to highligt, so no line is
added and the test passes.
2019-07-15 18:49:25 +02:00
Johan Walles
0f0c8a1111 Don't highlight files with unsupported file extensions 2019-07-15 18:46:53 +02:00
Johan Walles
c4c2027dfc Comment out some failing tests
They do find errors, but this branch is not the place to fix those.
2019-07-10 06:44:10 +02:00
Johan Walles
c1665113e6 Add more tests, find more problems... 2019-07-10 00:21:36 +02:00
Johan Walles
a43aafe7b9 Fix concurrency issues in the tests...
... and unveil actual highlighting problems.
2019-07-09 23:41:49 +02:00
Johan Walles
ddee885e92 Make tests compile
But not pass. The tests need to wait for the reader to get done before
verifying what it did.
2019-07-09 19:57:32 +02:00
Johan Walles
94007d767c Plan for more filter testing 2019-06-25 19:44:22 +02:00
Johan Walles
fd10508c8d Implement transparent decompression 2019-06-24 21:55:33 +02:00
Johan Walles
e640c7d856 Add (failing) compressed-files tests 2019-06-23 21:30:11 +02:00
Johan Walles
70500056af Show file name and line numbers 2019-06-21 23:24:53 +02:00
Johan Walles
7a33e82ba1 It's already there 2019-06-18 20:19:07 +02:00
Johan Walles
5cc47df367 Render broken UTF8 as red-on-white question marks
The default broken UTF8 marker in Go doesn't render well in my terminal.
2019-06-18 19:08:35 +02:00
Johan Walles
3011b15a09 Add note about broken UTF8 2019-06-17 22:22:17 +02:00
Johan Walles
9619c020e4 Add support for rendering TABs 2019-06-17 21:39:57 +02:00
Johan Walles
42c45aedae Simplify Reader API 2019-06-16 21:54:25 +02:00
Johan Walles
a1cdf0c200 Add logging 2019-06-16 10:02:19 +02:00
Johan Walles
38f3c28faf FIXME: Add broken-UTF8 tests 2019-06-15 17:13:10 +02:00
Johan Walles
e4b83d91b5 Fix another off-by-one error 2019-06-14 06:59:19 +02:00
Johan Walles
0abaf16da5 Fix off-by-one error in the _Reader 2019-06-14 06:49:27 +02:00
Johan Walles
da30eaf956 Handle displaying empty files
And introduce a unit test!
2019-06-13 20:04:51 +02:00