1
1
mirror of https://github.com/walles/moar.git synced 2025-01-08 14:30:57 +03:00

Special case stripping string formatting

Stripping string formatting is on the hot path while searching. This
change makes BenchmarkPlainTextSearch() over 7x faster.

But it also has problems with tab expansion so some tests fail, let's
see how we should handle that.
This commit is contained in:
Johan Walles 2021-04-24 17:20:09 +02:00
parent c8a8cb4517
commit 2c20fc31fe
2 changed files with 33 additions and 37 deletions

View File

@ -35,23 +35,16 @@ func NewLine(raw string) *Line {
// Tokens returns a representation of the string split into styled tokens
func (line *Line) Tokens() []twin.Cell {
cells, _ := cellsFromString(*line.raw)
return cells
return cellsFromString(*line.raw)
}
// Plain returns a plain text representation of the initial string
func (line *Line) Plain() string {
line.parse()
return *line.plain
}
func (line *Line) parse() {
if line.plain != nil {
// Already done
return
if line.plain == nil {
plain := withoutFormatting(*line.raw)
line.plain = &plain
}
_, line.plain = cellsFromString(*line.raw)
return *line.plain
}
// SetManPageFormatFromEnv parses LESS_TERMCAP_xx environment variables and
@ -78,14 +71,22 @@ func resetManPageFormatForTesting() {
func termcapToStyle(termcap string) twin.Style {
// Add a character to be sure we have one to take the format from
cells, _ := cellsFromString(termcap + "x")
cells := cellsFromString(termcap + "x")
return cells[len(cells)-1].Style
}
// cellsFromString turns a (formatted) string into a series of screen cells,
// and an unformatted string
func cellsFromString(s string) ([]twin.Cell, *string) {
var tokens []twin.Cell
func withoutFormatting(s string) string {
builder := strings.Builder{}
for _, styledString := range styledStringsFromString(s) {
builder.WriteString(styledString.String)
}
return builder.String()
}
// Turn a (formatted) string into a series of screen cells
func cellsFromString(s string) []twin.Cell {
var cells []twin.Cell
// Specs: https://en.wikipedia.org/wiki/ANSI_escape_code#3-bit_and_4-bit
styleBrokenUtf8 := twin.StyleDefault.Background(twin.NewColor16(1)).Foreground(twin.NewColor16(7))
@ -96,42 +97,36 @@ func cellsFromString(s string) ([]twin.Cell, *string) {
case '\x09': // TAB
for {
tokens = append(tokens, twin.Cell{
cells = append(cells, twin.Cell{
Rune: ' ',
Style: styledString.Style,
})
if (len(tokens))%_TabSize == 0 {
if (len(cells))%_TabSize == 0 {
// We arrived at the next tab stop
break
}
}
case '<27>': // Go's broken-UTF8 marker
tokens = append(tokens, twin.Cell{
cells = append(cells, twin.Cell{
Rune: '?',
Style: styleBrokenUtf8,
})
case '\x08': // Backspace
tokens = append(tokens, twin.Cell{
cells = append(cells, twin.Cell{
Rune: '<',
Style: styleBrokenUtf8,
})
default:
tokens = append(tokens, token)
cells = append(cells, token)
}
}
}
var stringBuilder strings.Builder
stringBuilder.Grow(len(tokens))
for _, token := range tokens {
stringBuilder.WriteRune(token.Rune)
}
plainString := stringBuilder.String()
return tokens, &plainString
return cells
}
// Consume 'x<x', where '<' is backspace and the result is a bold 'x'

View File

@ -37,11 +37,12 @@ func TestTokenize(t *testing.T) {
var loglines strings.Builder
log.SetOutput(&loglines)
tokens, plainString := cellsFromString(line)
if len(tokens) != utf8.RuneCountInString(*plainString) {
tokens := cellsFromString(line)
plainString := withoutFormatting(line)
if len(tokens) != utf8.RuneCountInString(plainString) {
t.Errorf("%s:%d: len(tokens)=%d, len(plainString)=%d for: <%s>",
fileName, lineNumber,
len(tokens), utf8.RuneCountInString(*plainString), line)
len(tokens), utf8.RuneCountInString(plainString), line)
continue
}
@ -54,7 +55,7 @@ func TestTokenize(t *testing.T) {
}
func TestUnderline(t *testing.T) {
tokens, _ := cellsFromString("a\x1b[4mb\x1b[24mc")
tokens := cellsFromString("a\x1b[4mb\x1b[24mc")
assert.Equal(t, len(tokens), 3)
assert.Equal(t, tokens[0], twin.Cell{Rune: 'a', Style: twin.StyleDefault})
assert.Equal(t, tokens[1], twin.Cell{Rune: 'b', Style: twin.StyleDefault.WithAttr(twin.AttrUnderline)})
@ -63,14 +64,14 @@ func TestUnderline(t *testing.T) {
func TestManPages(t *testing.T) {
// Bold
tokens, _ := cellsFromString("ab\bbc")
tokens := cellsFromString("ab\bbc")
assert.Equal(t, len(tokens), 3)
assert.Equal(t, tokens[0], twin.Cell{Rune: 'a', Style: twin.StyleDefault})
assert.Equal(t, tokens[1], twin.Cell{Rune: 'b', Style: twin.StyleDefault.WithAttr(twin.AttrBold)})
assert.Equal(t, tokens[2], twin.Cell{Rune: 'c', Style: twin.StyleDefault})
// Underline
tokens, _ = cellsFromString("a_\bbc")
tokens = cellsFromString("a_\bbc")
assert.Equal(t, len(tokens), 3)
assert.Equal(t, tokens[0], twin.Cell{Rune: 'a', Style: twin.StyleDefault})
assert.Equal(t, tokens[1], twin.Cell{Rune: 'b', Style: twin.StyleDefault.WithAttr(twin.AttrUnderline)})
@ -78,7 +79,7 @@ func TestManPages(t *testing.T) {
// Bullet point 1, taken from doing this on my macOS system:
// env PAGER="hexdump -C" man printf | moar
tokens, _ = cellsFromString("a+\b+\bo\bob")
tokens = cellsFromString("a+\b+\bo\bob")
assert.Equal(t, len(tokens), 3)
assert.Equal(t, tokens[0], twin.Cell{Rune: 'a', Style: twin.StyleDefault})
assert.Equal(t, tokens[1], twin.Cell{Rune: '•', Style: twin.StyleDefault})
@ -86,7 +87,7 @@ func TestManPages(t *testing.T) {
// Bullet point 2, taken from doing this using the "fish" shell on my macOS system:
// man printf | hexdump -C | moar
tokens, _ = cellsFromString("a+\bob")
tokens = cellsFromString("a+\bob")
assert.Equal(t, len(tokens), 3)
assert.Equal(t, tokens[0], twin.Cell{Rune: 'a', Style: twin.StyleDefault})
assert.Equal(t, tokens[1], twin.Cell{Rune: '•', Style: twin.StyleDefault})