phi/lex/lex.go
Felix Angell 8687f612d4 command palette is now has a hand written lexer
this means that we can 'type check' command palette arguments, e.g. if something should be a string or not, etc.
it uses the same lexer which is used for the syntax highlighting ... let's see how long that lasts.
2019-03-02 20:54:01 +00:00

156 lines
3.0 KiB
Go

package lex
import (
"fmt"
"unicode"
)
type Lexer struct {
startingPos int
pos int
input []rune
}
func New(input string) *Lexer {
return &Lexer{
startingPos: 0,
pos: 0,
input: []rune(input),
}
}
func (l *Lexer) consume() rune {
consumed := l.peek()
l.pos++
return consumed
}
func (l *Lexer) expect(c rune) (rune, bool) {
if l.hasNext() && l.peek() == c {
return l.consume(), true
}
if !l.hasNext() {
return rune(0), false
}
// TODO, fail?
return l.consume(), true
}
func (l *Lexer) next(offs int) rune {
return l.input[l.pos+offs]
}
func (l *Lexer) peek() rune {
return l.input[l.pos]
}
func (l *Lexer) hasNext() bool {
return l.pos < len(l.input)
}
func (l *Lexer) recognizeString() *Token {
l.expect('"')
for l.hasNext() && l.peek() != '"' {
l.consume()
}
l.expect('"')
return NewToken(l.captureLexeme(), String, l.startingPos)
}
func (l *Lexer) recognizeCharacter() *Token {
l.expect('\'')
for l.hasNext() && l.peek() != '\'' {
l.consume()
}
l.expect('\'')
return NewToken(l.captureLexeme(), Character, l.startingPos)
}
func (l *Lexer) recognizeNumber() *Token {
for l.hasNext() && unicode.IsDigit(l.peek()) {
l.consume()
}
if l.hasNext() && l.peek() == '.' {
l.consume()
for l.hasNext() && unicode.IsDigit(l.peek()) {
l.consume()
}
}
return NewToken(l.captureLexeme(), Number, l.startingPos)
}
func (l *Lexer) recognizeSymbol() *Token {
l.consume()
return NewToken(l.captureLexeme(), Symbol, l.startingPos)
}
func (l *Lexer) recognizeWord() *Token {
for l.hasNext() && (unicode.IsLetter(l.peek()) || unicode.IsDigit(l.peek())) {
l.consume()
}
if l.hasNext() {
curr := l.peek()
if curr == '_' || curr == '-' {
l.consume()
for l.hasNext() && (unicode.IsLetter(l.peek()) || unicode.IsDigit(l.peek())) {
l.consume()
}
}
}
return NewToken(l.captureLexeme(), Word, l.startingPos)
}
func (l *Lexer) captureLexeme() string {
return string(l.input[l.startingPos:l.pos])
}
func (l *Lexer) Tokenize() []*Token {
var result []*Token
for l.hasNext() {
// TODO make it so that we can generate
// lexers from the config files
// allowing the user to put token
// matching criteria in here. for now
// we'll just go with a simple lexer
// that splits strings by spaces/tabs/etc
// skip all the layout characters
// we dont care about these.
for l.hasNext() && l.peek() <= ' ' {
l.consume()
}
l.startingPos = l.pos
if token := func() *Token {
if !l.hasNext() {
return nil
}
curr := l.peek()
switch {
case curr == '"':
return l.recognizeString()
case curr == '\'':
return l.recognizeCharacter()
case unicode.IsLetter(curr):
return l.recognizeWord()
case unicode.IsDigit(curr):
return l.recognizeNumber()
case unicode.IsGraphic(curr):
return l.recognizeSymbol()
case curr == ' ':
return nil
}
panic(fmt.Sprintln("unhandled input! ", string(curr)))
}(); token != nil {
result = append(result, token)
}
}
return result
}