mirror of
https://github.com/walles/moar.git
synced 2025-01-08 14:30:57 +03:00
Special case stripping string formatting
Stripping string formatting is on the hot path while searching. This change makes BenchmarkPlainTextSearch() over 7x faster. But it also has problems with tab expansion so some tests fail, let's see how we should handle that.
This commit is contained in:
parent
c8a8cb4517
commit
2c20fc31fe
@ -35,23 +35,16 @@ func NewLine(raw string) *Line {
|
||||
|
||||
// Tokens returns a representation of the string split into styled tokens
|
||||
func (line *Line) Tokens() []twin.Cell {
|
||||
cells, _ := cellsFromString(*line.raw)
|
||||
return cells
|
||||
return cellsFromString(*line.raw)
|
||||
}
|
||||
|
||||
// Plain returns a plain text representation of the initial string
|
||||
func (line *Line) Plain() string {
|
||||
line.parse()
|
||||
return *line.plain
|
||||
}
|
||||
|
||||
func (line *Line) parse() {
|
||||
if line.plain != nil {
|
||||
// Already done
|
||||
return
|
||||
if line.plain == nil {
|
||||
plain := withoutFormatting(*line.raw)
|
||||
line.plain = &plain
|
||||
}
|
||||
|
||||
_, line.plain = cellsFromString(*line.raw)
|
||||
return *line.plain
|
||||
}
|
||||
|
||||
// SetManPageFormatFromEnv parses LESS_TERMCAP_xx environment variables and
|
||||
@ -78,14 +71,22 @@ func resetManPageFormatForTesting() {
|
||||
|
||||
func termcapToStyle(termcap string) twin.Style {
|
||||
// Add a character to be sure we have one to take the format from
|
||||
cells, _ := cellsFromString(termcap + "x")
|
||||
cells := cellsFromString(termcap + "x")
|
||||
return cells[len(cells)-1].Style
|
||||
}
|
||||
|
||||
// cellsFromString turns a (formatted) string into a series of screen cells,
|
||||
// and an unformatted string
|
||||
func cellsFromString(s string) ([]twin.Cell, *string) {
|
||||
var tokens []twin.Cell
|
||||
func withoutFormatting(s string) string {
|
||||
builder := strings.Builder{}
|
||||
for _, styledString := range styledStringsFromString(s) {
|
||||
builder.WriteString(styledString.String)
|
||||
}
|
||||
|
||||
return builder.String()
|
||||
}
|
||||
|
||||
// Turn a (formatted) string into a series of screen cells
|
||||
func cellsFromString(s string) []twin.Cell {
|
||||
var cells []twin.Cell
|
||||
|
||||
// Specs: https://en.wikipedia.org/wiki/ANSI_escape_code#3-bit_and_4-bit
|
||||
styleBrokenUtf8 := twin.StyleDefault.Background(twin.NewColor16(1)).Foreground(twin.NewColor16(7))
|
||||
@ -96,42 +97,36 @@ func cellsFromString(s string) ([]twin.Cell, *string) {
|
||||
|
||||
case '\x09': // TAB
|
||||
for {
|
||||
tokens = append(tokens, twin.Cell{
|
||||
cells = append(cells, twin.Cell{
|
||||
Rune: ' ',
|
||||
Style: styledString.Style,
|
||||
})
|
||||
|
||||
if (len(tokens))%_TabSize == 0 {
|
||||
if (len(cells))%_TabSize == 0 {
|
||||
// We arrived at the next tab stop
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
case '<27>': // Go's broken-UTF8 marker
|
||||
tokens = append(tokens, twin.Cell{
|
||||
cells = append(cells, twin.Cell{
|
||||
Rune: '?',
|
||||
Style: styleBrokenUtf8,
|
||||
})
|
||||
|
||||
case '\x08': // Backspace
|
||||
tokens = append(tokens, twin.Cell{
|
||||
cells = append(cells, twin.Cell{
|
||||
Rune: '<',
|
||||
Style: styleBrokenUtf8,
|
||||
})
|
||||
|
||||
default:
|
||||
tokens = append(tokens, token)
|
||||
cells = append(cells, token)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var stringBuilder strings.Builder
|
||||
stringBuilder.Grow(len(tokens))
|
||||
for _, token := range tokens {
|
||||
stringBuilder.WriteRune(token.Rune)
|
||||
}
|
||||
plainString := stringBuilder.String()
|
||||
return tokens, &plainString
|
||||
return cells
|
||||
}
|
||||
|
||||
// Consume 'x<x', where '<' is backspace and the result is a bold 'x'
|
||||
|
@ -37,11 +37,12 @@ func TestTokenize(t *testing.T) {
|
||||
var loglines strings.Builder
|
||||
log.SetOutput(&loglines)
|
||||
|
||||
tokens, plainString := cellsFromString(line)
|
||||
if len(tokens) != utf8.RuneCountInString(*plainString) {
|
||||
tokens := cellsFromString(line)
|
||||
plainString := withoutFormatting(line)
|
||||
if len(tokens) != utf8.RuneCountInString(plainString) {
|
||||
t.Errorf("%s:%d: len(tokens)=%d, len(plainString)=%d for: <%s>",
|
||||
fileName, lineNumber,
|
||||
len(tokens), utf8.RuneCountInString(*plainString), line)
|
||||
len(tokens), utf8.RuneCountInString(plainString), line)
|
||||
continue
|
||||
}
|
||||
|
||||
@ -54,7 +55,7 @@ func TestTokenize(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestUnderline(t *testing.T) {
|
||||
tokens, _ := cellsFromString("a\x1b[4mb\x1b[24mc")
|
||||
tokens := cellsFromString("a\x1b[4mb\x1b[24mc")
|
||||
assert.Equal(t, len(tokens), 3)
|
||||
assert.Equal(t, tokens[0], twin.Cell{Rune: 'a', Style: twin.StyleDefault})
|
||||
assert.Equal(t, tokens[1], twin.Cell{Rune: 'b', Style: twin.StyleDefault.WithAttr(twin.AttrUnderline)})
|
||||
@ -63,14 +64,14 @@ func TestUnderline(t *testing.T) {
|
||||
|
||||
func TestManPages(t *testing.T) {
|
||||
// Bold
|
||||
tokens, _ := cellsFromString("ab\bbc")
|
||||
tokens := cellsFromString("ab\bbc")
|
||||
assert.Equal(t, len(tokens), 3)
|
||||
assert.Equal(t, tokens[0], twin.Cell{Rune: 'a', Style: twin.StyleDefault})
|
||||
assert.Equal(t, tokens[1], twin.Cell{Rune: 'b', Style: twin.StyleDefault.WithAttr(twin.AttrBold)})
|
||||
assert.Equal(t, tokens[2], twin.Cell{Rune: 'c', Style: twin.StyleDefault})
|
||||
|
||||
// Underline
|
||||
tokens, _ = cellsFromString("a_\bbc")
|
||||
tokens = cellsFromString("a_\bbc")
|
||||
assert.Equal(t, len(tokens), 3)
|
||||
assert.Equal(t, tokens[0], twin.Cell{Rune: 'a', Style: twin.StyleDefault})
|
||||
assert.Equal(t, tokens[1], twin.Cell{Rune: 'b', Style: twin.StyleDefault.WithAttr(twin.AttrUnderline)})
|
||||
@ -78,7 +79,7 @@ func TestManPages(t *testing.T) {
|
||||
|
||||
// Bullet point 1, taken from doing this on my macOS system:
|
||||
// env PAGER="hexdump -C" man printf | moar
|
||||
tokens, _ = cellsFromString("a+\b+\bo\bob")
|
||||
tokens = cellsFromString("a+\b+\bo\bob")
|
||||
assert.Equal(t, len(tokens), 3)
|
||||
assert.Equal(t, tokens[0], twin.Cell{Rune: 'a', Style: twin.StyleDefault})
|
||||
assert.Equal(t, tokens[1], twin.Cell{Rune: '•', Style: twin.StyleDefault})
|
||||
@ -86,7 +87,7 @@ func TestManPages(t *testing.T) {
|
||||
|
||||
// Bullet point 2, taken from doing this using the "fish" shell on my macOS system:
|
||||
// man printf | hexdump -C | moar
|
||||
tokens, _ = cellsFromString("a+\bob")
|
||||
tokens = cellsFromString("a+\bob")
|
||||
assert.Equal(t, len(tokens), 3)
|
||||
assert.Equal(t, tokens[0], twin.Cell{Rune: 'a', Style: twin.StyleDefault})
|
||||
assert.Equal(t, tokens[1], twin.Cell{Rune: '•', Style: twin.StyleDefault})
|
||||
|
Loading…
Reference in New Issue
Block a user