1
1
mirror of https://github.com/walles/moar.git synced 2024-09-19 07:58:00 +03:00
moar/m/ansiTokenizer.go

290 lines
6.4 KiB
Go
Raw Normal View History

package m
import (
2019-10-27 11:15:16 +03:00
"errors"
2019-10-27 23:40:30 +03:00
"fmt"
2019-06-16 21:57:03 +03:00
"log"
"regexp"
"strings"
"github.com/gdamore/tcell"
)
2019-06-17 22:39:57 +03:00
const _TabSize = 4
2019-06-16 21:57:03 +03:00
// Token is a rune with a style to be written to a cell on screen
type Token struct {
Rune rune
Style tcell.Style
}
// TokensFromString turns a (formatted) string into a series of tokens,
// and an unformatted string
func TokensFromString(logger *log.Logger, s string) ([]Token, *string) {
var tokens []Token
styleBrokenUtf8 := tcell.StyleDefault.Background(7).Foreground(1)
2019-06-16 21:57:03 +03:00
for _, styledString := range _StyledStringsFromString(logger, s) {
2019-06-27 22:39:46 +03:00
for _, token := range _TokensFromStyledString(styledString) {
switch token.Rune {
case '\x09': // TAB
2019-06-17 22:39:57 +03:00
for {
tokens = append(tokens, Token{
Rune: ' ',
Style: styledString.Style,
})
if (len(tokens))%_TabSize == 0 {
// We arrived at the next tab stop
break
}
}
case '<27>': // Go's broken-UTF8 marker
tokens = append(tokens, Token{
Rune: '?',
Style: styleBrokenUtf8,
})
2019-06-27 22:39:46 +03:00
case '\x08': // Backspace
2019-06-17 22:39:57 +03:00
tokens = append(tokens, Token{
2019-06-27 22:39:46 +03:00
Rune: '<',
Style: styleBrokenUtf8,
2019-06-17 22:39:57 +03:00
})
2019-06-27 22:39:46 +03:00
default:
tokens = append(tokens, token)
}
}
}
plainString := ""
for _, token := range tokens {
plainString += string(token.Rune)
}
return tokens, &plainString
2019-06-27 22:39:46 +03:00
}
func _TokensFromStyledString(styledString _StyledString) []Token {
tokens := make([]Token, 0, len(styledString.String)+1)
oneBack := '\x00'
twoBack := '\x00'
for _, char := range []rune(styledString.String) {
if oneBack == '\x08' && twoBack != '\x00' {
// Something-Backspace-Something
replacement := (*Token)(nil)
if char == twoBack {
replacement = &Token{
Rune: twoBack,
Style: styledString.Style.Bold(true),
}
2019-06-17 22:39:57 +03:00
}
2019-06-27 22:39:46 +03:00
if twoBack == '_' {
replacement = &Token{
Rune: char,
Style: styledString.Style.Underline(true),
}
}
2019-07-08 09:58:53 +03:00
// FIXME: Man page formatting fails, if I do (in bash)...
// "man printf|hexdump -C|grep -10 leading| grep --color 08"
// ... I get...
// "000003e0 20 20 20 20 20 20 20 20 2b 08 2b 08 6f 08 6f 20 | +.+.o.o |"
// ... wich "less" renders as a bold "o". We should as well.
//
// I don't get the logic though, the sequence is:
// plus-backspace-plus-backspace-o-backspace-o
//
// Maybe the interpretation should be:
// "Make a bold +, then erase that and replace it with a bold o"?
2019-06-27 22:39:46 +03:00
if replacement != nil {
tokens = append(tokens[0:len(tokens)-2], *replacement)
twoBack = oneBack
oneBack = char
continue
}
// No match, just keep going
}
2019-06-27 22:39:46 +03:00
tokens = append(tokens, Token{
Rune: char,
Style: styledString.Style,
})
twoBack = oneBack
oneBack = char
}
return tokens
}
type _StyledString struct {
String string
Style tcell.Style
}
2019-06-16 21:57:03 +03:00
func _StyledStringsFromString(logger *log.Logger, s string) []_StyledString {
// This function was inspired by the
// https://golang.org/pkg/regexp/#Regexp.Split source code
pattern := regexp.MustCompile("\x1b\\[([0-9;]*m)")
matches := pattern.FindAllStringIndex(s, -1)
styledStrings := make([]_StyledString, 0, len(matches)+1)
style := tcell.StyleDefault
beg := 0
end := 0
for _, match := range matches {
end = match[0]
2019-06-16 10:23:25 +03:00
if end > beg {
2019-06-27 22:39:46 +03:00
// Found non-zero length string
styledStrings = append(styledStrings, _StyledString{
String: s[beg:end],
Style: style,
})
}
matchedPart := s[match[0]:match[1]]
2019-06-16 21:57:03 +03:00
style = _UpdateStyle(logger, style, matchedPart)
beg = match[1]
}
if end != len(s) {
styledStrings = append(styledStrings, _StyledString{
String: s[beg:],
Style: style,
})
}
return styledStrings
}
// _UpdateStyle parses a string of the form "ESC[33m" into changes to style
2019-06-16 21:57:03 +03:00
func _UpdateStyle(logger *log.Logger, style tcell.Style, escapeSequence string) tcell.Style {
2019-10-27 11:15:16 +03:00
numbers := strings.Split(escapeSequence[2:len(escapeSequence)-1], ";")
index := 0
for index < len(numbers) {
number := numbers[index]
index++
switch number {
case "", "0", "00":
style = tcell.StyleDefault
2019-06-16 21:57:03 +03:00
2019-06-16 21:58:19 +03:00
case "1":
style = style.Bold(true)
case "4":
style = style.Underline(true)
2019-06-16 22:39:27 +03:00
case "7":
style = style.Reverse(true)
case "27":
style = style.Reverse(false)
2019-06-16 21:57:03 +03:00
// Foreground colors
case "30":
style = style.Foreground(0)
case "31":
style = style.Foreground(1)
case "32":
style = style.Foreground(2)
case "33":
style = style.Foreground(3)
case "34":
style = style.Foreground(4)
case "35":
style = style.Foreground(5)
case "36":
style = style.Foreground(6)
case "37":
style = style.Foreground(7)
2019-10-27 11:15:16 +03:00
case "38":
var err error = nil
var color *tcell.Color
2019-10-27 23:40:30 +03:00
index, color, err = consumeCompositeColor(numbers, index-1)
2019-10-27 11:15:16 +03:00
if err != nil {
logger.Printf("Foreground: %s", err.Error())
return style
}
style = style.Foreground(*color)
2019-07-15 14:34:42 +03:00
case "39":
style = style.Foreground(tcell.ColorDefault)
2019-06-16 21:57:03 +03:00
// Background colors
case "40":
style = style.Background(0)
2019-06-16 21:57:03 +03:00
case "41":
style = style.Background(1)
2019-06-16 21:57:03 +03:00
case "42":
style = style.Background(2)
2019-06-16 21:57:03 +03:00
case "43":
style = style.Background(3)
2019-06-16 21:57:03 +03:00
case "44":
style = style.Background(4)
2019-06-16 21:57:03 +03:00
case "45":
style = style.Background(5)
2019-06-16 21:57:03 +03:00
case "46":
style = style.Background(6)
2019-06-16 21:57:03 +03:00
case "47":
style = style.Background(7)
2019-10-27 11:15:16 +03:00
case "48":
var err error = nil
var color *tcell.Color
2019-10-27 23:40:30 +03:00
index, color, err = consumeCompositeColor(numbers, index-1)
2019-10-27 11:15:16 +03:00
if err != nil {
logger.Printf("Background: %s", err.Error())
return style
}
style = style.Background(*color)
2019-07-15 14:34:42 +03:00
case "49":
style = style.Background(tcell.ColorDefault)
2019-06-16 21:57:03 +03:00
default:
logger.Printf("Unrecognized ANSI SGR code <%s>", number)
}
}
return style
}
2019-10-27 11:15:16 +03:00
2019-10-27 23:40:30 +03:00
// numbers is a list of numbers from a ANSI SGR string
2019-10-27 11:15:16 +03:00
// index points to either 38 or 48 in that string
//
// This method will return:
// * The first index in the string that this function did not consume
// * A color value that can be applied to a style
2019-10-27 23:40:30 +03:00
func consumeCompositeColor(numbers []string, index int) (int, *tcell.Color, error) {
2019-10-28 22:09:08 +03:00
baseIndex := index
2019-10-27 23:40:30 +03:00
if numbers[index] != "38" && numbers[index] != "48" {
err := fmt.Errorf(
"Unknown start of color sequence <%s>, expected 38 (foreground) or 48 (background): <CSI %sm>",
numbers[index],
2019-10-28 22:09:08 +03:00
strings.Join(numbers[baseIndex:], ";"))
return -1, nil, err
}
index++
if index >= len(numbers) {
err := fmt.Errorf(
"Incomplete color sequence: <CSI %sm>",
strings.Join(numbers[baseIndex:], ";"))
2019-10-27 23:40:30 +03:00
return -1, nil, err
}
2019-10-27 11:15:16 +03:00
return -1, nil, errors.New("Unimplemented")
}