started to split things into a sep. lexer

2024-08-16 11:10:37 +03:00 · 2018-04-21 18:45:10 +01:00 · 2018-04-21 18:45:10 +01:00 · 33cf82bb0c
commit 33cf82bb0c
parent 172a7bd777
3 changed files with 111 additions and 33 deletions
--- a/gui/buffer.go
+++ b/gui/buffer.go
@ -14,6 +14,7 @@ import (

 	"github.com/felixangell/go-rope"
 	"github.com/felixangell/phi-editor/cfg"
+	"github.com/felixangell/phi-editor/lex"
 	"github.com/felixangell/strife"
 	"github.com/veandco/go-sdl2/sdl"
 )
@ -774,6 +775,18 @@ var ex, ey = 0, 0

 var compiledRegex = map[string]*regexp.Regexp{}

+// runs up a lexer instance
+func lexFindMatches(matches *map[int]syntaxRuneInfo, currLine string, toMatch map[string]bool, bg int, fg int) {
+	// start up a lexer instance and
+	// lex the line.
+	lexer := lex.New(currLine)
+	for _, tok := range lexer.Tokenize() {
+		if _, ok := toMatch[tok.Lexeme]; ok {
+			(*matches)[tok.Start] = syntaxRuneInfo{bg, -1, len(tok.Lexeme)}
+		}
+	}
+}
+
 func (b *Buffer) syntaxHighlightLine(currLine string) map[int]syntaxRuneInfo {
 	matches := map[int]syntaxRuneInfo{}

@ -788,10 +801,9 @@ func (b *Buffer) syntaxHighlightLine(currLine string) map[int]syntaxRuneInfo {
 	}

 	// HOLY SLOW BATMAN
-	for charIndex := 0; charIndex < len(currLine); charIndex++ {
-		for syntaxIndex, syntax := range subjects {
-
-			if syntax.Pattern != "" {
+	for syntaxIndex, syntax := range subjects {
+		if syntax.Pattern != "" {
+			for charIndex := 0; charIndex < len(currLine); charIndex++ {
 				// we have a regex pattern

 				// FIXME this is also very slow!
@ -819,36 +831,18 @@ func (b *Buffer) syntaxHighlightLine(currLine string) map[int]syntaxRuneInfo {
 						charIndex = charIndex + matchedStrLen
 					}
 				}
-
-			} else {
-
-				for _, subject := range syntax.Match {
-					if charIndex+len(subject)+1 > len(currLine) {
-						continue
-					}
-					a := currLine[charIndex : charIndex+len(subject)+1]
-
-					// we only want to match words. so we check that it has a space
-					// before or after the subject word.
-					if strings.Compare(string(a), subject+" ") == 0 || strings.Compare(string(a), " "+subject) == 0 {
-						// hack
-						offs := 0
-
-						if _, ok := matches[charIndex]; !ok {
-							// the second branch was true
-							// so we have to offset this index by one
-							if a[0] == ' ' {
-								offs++
-							}
-
-							matches[charIndex+offs] = syntaxRuneInfo{colours[syntaxIndex], -1, len(subject)}
-							break
-						}
-						charIndex += len(subject)+offs
-					}
-				}
-
 			}
+		} else {
+			// FIXME bit of cleanup is due here!
+
+			matchList := make(map[string]bool, len(syntax.Match))
+			for _, val := range syntax.Match {
+				matchList[val] = true
+			}
+
+			background := colours[syntaxIndex]
+			foreground := 0
+			lexFindMatches(&matches, currLine, matchList, background, foreground)
 		}
 	}

--- a/lex/lex.go
+++ b/lex/lex.go
@ -0,0 +1,67 @@
+package lex
+
+type Lexer struct {
+	pos int
+	input []rune
+}
+
+func New(input string) *Lexer {
+	return &Lexer {
+		pos: 0,
+		input: []rune(input),
+	}
+}
+
+func (l *Lexer) consume() rune {
+	consumed := l.peek()
+	l.pos++
+	return consumed
+}
+
+func (l *Lexer) next(offs int) rune {
+	return l.input[l.pos + offs]
+}
+
+func (l *Lexer) peek() rune {
+	return l.input[l.pos]
+}
+
+func (l *Lexer) hasNext() bool {
+	return l.pos < len(l.input)
+}
+
+func (l *Lexer) Tokenize() []*Token {
+	var result []*Token
+	for l.hasNext() {
+		// TODO make it so that we can generate
+		// lexers from the config files
+		// allowing the user to put token
+		// matching criteria in here. for now
+		// we'll just go with a simple lexer
+		// that splits strings by spaces/tabs/etc
+
+		// skip all the layout characters
+		// we dont care about these.
+		for l.hasNext() && l.peek() <= ' ' {
+			l.consume()
+		}
+
+		startPos := l.pos
+		for l.hasNext() {
+			// we run into a layout pos
+			if l.peek() <= ' ' {
+				break
+			}
+
+			l.consume()
+		}
+
+		// this should be a recognized
+		// token i think?
+
+		lexeme := string(l.input[startPos:l.pos])
+		tok := NewToken(lexeme, Word, startPos)
+		result = append(result, tok)
+	}
+	return result
+}
--- a/lex/token.go
+++ b/lex/token.go
@ -0,0 +1,17 @@
+package lex
+
+type TokenType uint
+
+const (
+	Word TokenType = iota
+)
+
+type Token struct {
+	Lexeme string
+	Type TokenType
+	Start int
+}
+
+func NewToken(lexeme string, kind TokenType, start int) *Token {
+	return &Token {lexeme, kind, start}
+}