mirror of https://github.com/walles/moar.git synced 2024-09-11 12:15:43 +03:00
2023-11-13 08:51:05 +01:00

283 lines
6.6 KiB

package m
import (
log "github.com/sirupsen/logrus"
const esc = '\x1b'
type styledStringSplitter struct {
input string
lineNumberOneBased *int
nextByteIndex int
previousByteIndex int
inProgressString strings.Builder
inProgressStyle twin.Style
parts []_StyledString
trailer twin.Style
func styledStringsFromString(s string, lineNumberOneBased *int) styledStringsWithTrailer {
if !strings.ContainsAny(s, "\x1b") {
// This shortcut makes BenchmarkPlainTextSearch() perform a lot better
return styledStringsWithTrailer{
trailer: twin.StyleDefault,
styledStrings: []_StyledString{{
String: s,
Style: twin.StyleDefault,
splitter := styledStringSplitter{
input: s,
lineNumberOneBased: lineNumberOneBased,
return styledStringsWithTrailer{
trailer: splitter.trailer,
styledStrings: splitter.parts,
func (s *styledStringSplitter) nextChar() rune {
if s.nextByteIndex >= len(s.input) {
s.previousByteIndex = s.nextByteIndex
return -1
char, size := utf8.DecodeRuneInString(s.input[s.nextByteIndex:])
s.previousByteIndex = s.nextByteIndex
s.nextByteIndex += size
return char
// Returns whatever the last call to nextChar() returned
func (s *styledStringSplitter) lastChar() rune {
if s.previousByteIndex >= len(s.input) {
return -1
char, _ := utf8.DecodeRuneInString(s.input[s.previousByteIndex:])
return char
func (s *styledStringSplitter) run() {
char := s.nextChar()
for {
if char == -1 {
if char == esc {
escIndex := s.previousByteIndex
err := s.handleEscape()
if err != nil {
header := ""
if s.lineNumberOneBased != nil {
header = fmt.Sprintf("Line %d: ", *s.lineNumberOneBased)
failed := s.input[escIndex:s.nextByteIndex]
log.Debug(header, "<", strings.ReplaceAll(failed, "\x1b", "ESC"), ">: ", err)
// Somewhere in handleEscape(), we got a character that was
// unexpected. We need to treat everything up to before that
// character as just plain runes.
for _, char := range s.input[escIndex:s.previousByteIndex] {
// Start over with the character that caused the problem
char = s.lastChar()
} else {
char = s.nextChar()
func (s *styledStringSplitter) handleRune(char rune) {
func (s *styledStringSplitter) handleEscape() error {
char := s.nextChar()
if char == '[' || char == ']' {
// Got the start of a CSI or an OSC sequence
return s.consumeControlSequence(char)
return fmt.Errorf("Unhandled Fe sequence ESC%c", char)
func (s *styledStringSplitter) consumeControlSequence(charAfterEsc rune) error {
// Points to right after "ESC["
startIndex := s.nextByteIndex
// We're looking for a letter to end the CSI sequence
for {
char := s.nextChar()
if char == -1 {
return fmt.Errorf("Line ended in the middle of a control sequence")
// Range from here:
// https://en.wikipedia.org/wiki/ANSI_escape_code#CSI_(Control_Sequence_Introducer)_sequences
if char >= 0x30 && char <= 0x3f {
// Sequence still in progress
if charAfterEsc == ']' && s.input[startIndex:s.nextByteIndex] == "8;;" {
// Special case, here comes the URL
return s.handleUrl()
// The end, handle what we got
endIndexExclusive := s.nextByteIndex
return s.handleCompleteControlSequence(charAfterEsc, s.input[startIndex:endIndexExclusive])
// If the whole CSI sequence is ESC[33m, you should call this function with just
// "33m".
func (s *styledStringSplitter) handleCompleteControlSequence(charAfterEsc rune, sequence string) error {
if charAfterEsc == ']' {
return s.handleOsc(sequence)
if charAfterEsc != '[' {
return fmt.Errorf("Unexpected charAfterEsc: %c", charAfterEsc)
if sequence == "K" || sequence == "0K" {
// Clear to end of line
s.trailer = s.inProgressStyle
return nil
lastChar := sequence[len(sequence)-1]
if lastChar == 'm' {
newStyle, err := rawUpdateStyle(s.inProgressStyle, sequence)
if err != nil {
return err
return nil
return fmt.Errorf("Unhandled CSI type %q", lastChar)
func (s *styledStringSplitter) handleOsc(sequence string) error {
if strings.HasPrefix(sequence, "133;") && len(sequence) == len("133;A") {
// Got ESC]133;X, where "X" could be anything. These are prompt hints,
// and rendering those makes no sense. We should just ignore them:
// https://gitlab.freedesktop.org/Per_Bothner/specifications/blob/master/proposals/semantic-prompts.md
endMarker := s.nextChar()
if endMarker == '\x07' {
return nil
if endMarker == esc {
if s.nextChar() == '\\' {
return nil
} else {
return fmt.Errorf("Expected ESC \\ after ESC]133;X, got %q", s.lastChar())
return fmt.Errorf("Unhandled OSC sequence")
// We just got ESC]8; and should now read the URL. URLs end with ASCII 7 BEL or ESC \.
func (s *styledStringSplitter) handleUrl() error {
// Valid URL characters.
// Ref: https://stackoverflow.com/a/1547940/473672
const validChars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~:/?#[]@!$&'()*+,;="
// Points to right after "ESC]8;"
urlStartIndex := s.nextByteIndex
justSawEsc := false
for {
char := s.nextChar()
if char == -1 {
return fmt.Errorf("Line ended in the middle of a URL")
if justSawEsc {
if char != '\\' {
return fmt.Errorf("Expected ESC \\ but got ESC %q", char)
// End of URL
urlEndIndexExclusive := s.nextByteIndex - 2
url := s.input[urlStartIndex:urlEndIndexExclusive]
return nil
// Invariant: justSawEsc == false
if char == esc {
justSawEsc = true
if char == '\x07' {
// End of URL
urlEndIndexExclusive := s.nextByteIndex - 1
url := s.input[urlStartIndex:urlEndIndexExclusive]
return nil
if !strings.ContainsRune(validChars, char) {
return fmt.Errorf("Invalid URL character: %q", char)
// It's a valid URL char, keep going
func (s *styledStringSplitter) startNewPart(style twin.Style) {
if style == s.inProgressStyle {
// No need to start a new part
s.inProgressStyle = style
func (s *styledStringSplitter) finalizeCurrentPart() {
if s.inProgressString.Len() == 0 {
// Nothing to do
s.parts = append(s.parts, _StyledString{
String: s.inProgressString.String(),
Style: s.inProgressStyle,