mirror of
https://github.com/kovidgoyal/kitty.git
synced 2024-09-19 18:47:26 +03:00
Port new shlex code to Go
This commit is contained in:
parent
04eafbea9b
commit
a1f2a7df4d
@ -28,6 +28,9 @@ func TestParseSSHArgs(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(ans) == 0 {
|
||||
ans = []string{}
|
||||
}
|
||||
return ans
|
||||
}
|
||||
|
||||
@ -39,7 +42,7 @@ func TestParseSSHArgs(t *testing.T) {
|
||||
check := func(a, b any) {
|
||||
diff := cmp.Diff(a, b)
|
||||
if diff != "" {
|
||||
t.Fatalf("Unexpected value for args: %s\n%s", args, diff)
|
||||
t.Fatalf("Unexpected value for args: %#v\n%s", args, diff)
|
||||
}
|
||||
}
|
||||
check(split(expected_ssh_args), ssh_args)
|
||||
|
@ -109,7 +109,6 @@ next_word(Shlex *self, PyObject *args UNUSED) {
|
||||
switch(ch) {
|
||||
case STRING_WITHOUT_ESCAPES_DELIM:
|
||||
set_state(self, WORD);
|
||||
if (self->buf_pos && self->state == NORMAL) return get_word(self);
|
||||
break;
|
||||
default: write_ch(self, ch); break;
|
||||
} break;
|
||||
@ -117,13 +116,9 @@ next_word(Shlex *self, PyObject *args UNUSED) {
|
||||
switch(ch) {
|
||||
case STRING_WITH_ESCAPES_DELIM:
|
||||
set_state(self, WORD);
|
||||
if (self->buf_pos && self->state == NORMAL) return get_word(self);
|
||||
break;
|
||||
case ESCAPE_CHAR:
|
||||
if (self->src_pos < self->src_sz) {
|
||||
Py_UCS4 nch = PyUnicode_READ(self->kind, self->src_data, self->src_pos); self->src_pos++;
|
||||
write_ch(self, nch);
|
||||
}
|
||||
write_escape_ch(self);
|
||||
break;
|
||||
default: write_ch(self, ch); break;
|
||||
} break;
|
||||
|
@ -12,419 +12,204 @@ To process a stream of strings:
|
||||
for ; token, err := l.Next(); err != nil {
|
||||
// process token
|
||||
}
|
||||
|
||||
To access the raw token stream (which includes tokens for spaces):
|
||||
|
||||
t := NewTokenizer(os.Stdin)
|
||||
for ; token, err := t.Next(); err != nil {
|
||||
// process token
|
||||
}
|
||||
*/
|
||||
package shlex
|
||||
|
||||
// Based on https://pkg.go.dev/github.com/google/shlex with many improvements
|
||||
// Relicensed to GPLv3 since all my additions.changes are GPLv3 which makes the
|
||||
// original work with was APL2 also GPLv3
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// TokenType is a top-level token classification: A word, space, unknown.
|
||||
type TokenType int
|
||||
|
||||
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
|
||||
type runeTokenClass int
|
||||
|
||||
// the internal state used by the lexer state machine
|
||||
type lexerState int
|
||||
|
||||
// Token is a (type, value) pair representing a lexographical token.
|
||||
type Token struct {
|
||||
Type TokenType
|
||||
Value string
|
||||
Pos int64
|
||||
type Word struct {
|
||||
Value string // The word is empty if EOF is reached
|
||||
Pos int // The position in the input string of the word or the trailer
|
||||
Err error // Indicates an error (unterminated string or trailing unescaped backslash)
|
||||
Trailer string // Extra trailing data such as an unterminated string or an unescaped backslash. Present only if Err != nil
|
||||
}
|
||||
|
||||
// Named classes of UTF-8 runes
|
||||
const (
|
||||
spaceRunes = " \t\r\n"
|
||||
escapingQuoteRunes = `"`
|
||||
nonEscapingQuoteRunes = "'"
|
||||
escapeRunes = `\`
|
||||
)
|
||||
|
||||
// Classes of rune token
|
||||
const (
|
||||
unknownRuneClass runeTokenClass = iota
|
||||
spaceRuneClass
|
||||
escapingQuoteRuneClass
|
||||
nonEscapingQuoteRuneClass
|
||||
escapeRuneClass
|
||||
eofRuneClass
|
||||
)
|
||||
|
||||
// Classes of lexographic token
|
||||
const (
|
||||
UnknownToken TokenType = iota
|
||||
WordToken
|
||||
SpaceToken
|
||||
)
|
||||
|
||||
func (t TokenType) String() string {
|
||||
switch t {
|
||||
default:
|
||||
return "UnknownToken"
|
||||
case WordToken:
|
||||
return "WordToken"
|
||||
case SpaceToken:
|
||||
return "SpaceToken"
|
||||
}
|
||||
}
|
||||
type lexer_state int
|
||||
|
||||
// Lexer state machine states
|
||||
const (
|
||||
startState lexerState = iota // no runes have been seen
|
||||
inWordState // processing regular runes in a word
|
||||
inSpaceState // processing runes in a space
|
||||
escapingState // we have just consumed an escape rune; the next rune is literal
|
||||
escapingQuotedState // we have just consumed an escape rune within a quoted string
|
||||
quotingEscapingState // we are within a quoted string that supports escaping ("...")
|
||||
quotingState // we are within a string that does not support escaping ('...')
|
||||
lex_normal lexer_state = iota
|
||||
word
|
||||
string_without_escapes
|
||||
string_with_escapes
|
||||
)
|
||||
|
||||
// tokenClassifier is used for classifying rune characters.
|
||||
type tokenClassifier map[rune]runeTokenClass
|
||||
|
||||
func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenClass) {
|
||||
for _, runeChar := range runes {
|
||||
typeMap[runeChar] = tokenType
|
||||
}
|
||||
}
|
||||
|
||||
// newDefaultClassifier creates a new classifier for ASCII characters.
|
||||
func newDefaultClassifier() tokenClassifier {
|
||||
t := tokenClassifier{}
|
||||
t.addRuneClass(spaceRunes, spaceRuneClass)
|
||||
t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
|
||||
t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
|
||||
t.addRuneClass(escapeRunes, escapeRuneClass)
|
||||
return t
|
||||
}
|
||||
|
||||
// ClassifyRune classifiees a rune
|
||||
func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
|
||||
return t[runeVal]
|
||||
}
|
||||
|
||||
// Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
|
||||
type Lexer Tokenizer
|
||||
|
||||
// NewLexer creates a new lexer from an input stream.
|
||||
func NewLexer(x io.RuneReader) *Lexer {
|
||||
|
||||
return (*Lexer)(NewTokenizer(x))
|
||||
type Lexer struct {
|
||||
state lexer_state
|
||||
src string
|
||||
src_sz, src_pos, word_start int
|
||||
buf strings.Builder
|
||||
}
|
||||
|
||||
// Next returns the next word, or an error. If there are no more words,
|
||||
// the error will be io.EOF.
|
||||
func (l *Lexer) Next() (string, error) {
|
||||
for {
|
||||
token, err := (*Tokenizer)(l).Next()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
switch token.Type {
|
||||
case WordToken:
|
||||
return token.Value, nil
|
||||
case SpaceToken:
|
||||
// skip spaces
|
||||
default:
|
||||
return "", fmt.Errorf("Unknown token type: %s", token.Type)
|
||||
}
|
||||
}
|
||||
// NewLexer creates a new lexer from an input string.
|
||||
func NewLexer(x string) *Lexer {
|
||||
return &Lexer{src: x, src_sz: len(x)}
|
||||
}
|
||||
|
||||
// Tokenizer turns an input stream into a sequence of typed tokens
|
||||
type Tokenizer struct {
|
||||
input io.RuneReader
|
||||
classifier tokenClassifier
|
||||
pos int64
|
||||
redo_rune struct {
|
||||
char rune
|
||||
sz int
|
||||
rune_type runeTokenClass
|
||||
}
|
||||
func (self *Lexer) start_word() {
|
||||
self.buf.Reset()
|
||||
self.word_start = self.src_pos - 1
|
||||
}
|
||||
|
||||
// NewTokenizer creates a new tokenizer from an input stream.
|
||||
func NewTokenizer(input io.RuneReader) *Tokenizer {
|
||||
classifier := newDefaultClassifier()
|
||||
return &Tokenizer{
|
||||
input: input,
|
||||
classifier: classifier}
|
||||
func (self *Lexer) get_word() Word {
|
||||
return Word{Pos: self.word_start, Value: self.buf.String()}
|
||||
}
|
||||
|
||||
var ErrTrailingEscape error = errors.New("EOF found after escape character")
|
||||
var ErrTrailingQuoteEscape error = errors.New("EOF found after escape character for double quote")
|
||||
var ErrUnclosedDoubleQuote error = errors.New("EOF found when expecting closing double quote")
|
||||
var ErrUnclosedSingleQuote error = errors.New("EOF found when expecting closing single quote")
|
||||
func (self *Lexer) write_ch(ch byte) {
|
||||
self.buf.WriteByte(ch)
|
||||
}
|
||||
|
||||
// scanStream scans the stream for the next token using the internal state machine.
|
||||
// It will panic if it encounters a rune which it does not know how to handle.
|
||||
func (t *Tokenizer) scanStream() (*Token, error) {
|
||||
state := startState
|
||||
var tokenType TokenType
|
||||
var nextRune rune
|
||||
var nextRuneType runeTokenClass
|
||||
var err error
|
||||
var sz int
|
||||
value := strings.Builder{}
|
||||
pos_at_start := t.pos
|
||||
|
||||
unread_rune := func() {
|
||||
t.redo_rune.sz = sz
|
||||
t.redo_rune.char = nextRune
|
||||
t.redo_rune.rune_type = nextRuneType
|
||||
t.pos -= int64(sz)
|
||||
}
|
||||
|
||||
token := func() *Token {
|
||||
return &Token{tokenType, value.String(), pos_at_start}
|
||||
}
|
||||
|
||||
for {
|
||||
if t.redo_rune.sz > 0 {
|
||||
nextRune, sz = t.redo_rune.char, t.redo_rune.sz
|
||||
nextRuneType = t.redo_rune.rune_type
|
||||
t.redo_rune.sz = 0
|
||||
} else {
|
||||
nextRune, sz, err = t.input.ReadRune()
|
||||
nextRuneType = t.classifier.ClassifyRune(nextRune)
|
||||
func (self *Lexer) write_escaped_ch() bool {
|
||||
ch, count := utf8.DecodeRuneInString(self.src[self.src_pos:])
|
||||
if count > 0 {
|
||||
self.src_pos += count
|
||||
if ch != utf8.RuneError {
|
||||
self.buf.WriteRune(ch)
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
if err == io.EOF {
|
||||
nextRuneType = eofRuneClass
|
||||
err = nil
|
||||
} else if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
t.pos += int64(sz)
|
||||
|
||||
switch state {
|
||||
case startState: // no runes read yet
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
return nil, io.EOF
|
||||
}
|
||||
case spaceRuneClass:
|
||||
{
|
||||
tokenType = SpaceToken
|
||||
value.WriteRune(nextRune)
|
||||
state = inSpaceState
|
||||
}
|
||||
case escapingQuoteRuneClass:
|
||||
{
|
||||
tokenType = WordToken
|
||||
state = quotingEscapingState
|
||||
}
|
||||
case nonEscapingQuoteRuneClass:
|
||||
{
|
||||
tokenType = WordToken
|
||||
state = quotingState
|
||||
}
|
||||
case escapeRuneClass:
|
||||
{
|
||||
tokenType = WordToken
|
||||
state = escapingState
|
||||
}
|
||||
default:
|
||||
{
|
||||
tokenType = WordToken
|
||||
value.WriteRune(nextRune)
|
||||
state = inWordState
|
||||
}
|
||||
// Next returns the next word. At EOF Word.Value will be ""
|
||||
func (self *Lexer) Next() (ans Word) {
|
||||
const string_with_escapes_delim = '"'
|
||||
const string_without_escapes_delim = '\''
|
||||
const escape_char = '\\'
|
||||
for self.src_pos < self.src_sz {
|
||||
ch := self.src[self.src_pos]
|
||||
self.src_pos++
|
||||
switch self.state {
|
||||
case lex_normal:
|
||||
switch ch {
|
||||
case ' ', '\n', '\r', '\t':
|
||||
case string_with_escapes_delim:
|
||||
self.state = string_with_escapes
|
||||
self.start_word()
|
||||
case string_without_escapes_delim:
|
||||
self.state = string_without_escapes
|
||||
self.start_word()
|
||||
case escape_char:
|
||||
self.start_word()
|
||||
if !self.write_escaped_ch() {
|
||||
ans.Trailer = "\\"
|
||||
ans.Err = fmt.Errorf("Extra backslash at end of input")
|
||||
ans.Pos = self.word_start
|
||||
return
|
||||
}
|
||||
self.state = word
|
||||
default:
|
||||
self.state = word
|
||||
self.start_word()
|
||||
self.write_ch(ch)
|
||||
}
|
||||
case inSpaceState: // in a sequence of spaces separating words
|
||||
{
|
||||
switch nextRuneType {
|
||||
case spaceRuneClass:
|
||||
{
|
||||
value.WriteRune(nextRune)
|
||||
}
|
||||
default:
|
||||
{
|
||||
unread_rune()
|
||||
return token(), err
|
||||
}
|
||||
case word:
|
||||
switch ch {
|
||||
case ' ', '\n', '\r', '\t':
|
||||
self.state = lex_normal
|
||||
if self.buf.Len() > 0 {
|
||||
return self.get_word()
|
||||
}
|
||||
}
|
||||
case inWordState: // in a regular word
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
return token(), err
|
||||
}
|
||||
case spaceRuneClass:
|
||||
{
|
||||
unread_rune()
|
||||
return token(), err
|
||||
}
|
||||
case escapingQuoteRuneClass:
|
||||
{
|
||||
state = quotingEscapingState
|
||||
}
|
||||
case nonEscapingQuoteRuneClass:
|
||||
{
|
||||
state = quotingState
|
||||
}
|
||||
case escapeRuneClass:
|
||||
{
|
||||
state = escapingState
|
||||
}
|
||||
default:
|
||||
{
|
||||
value.WriteRune(nextRune)
|
||||
}
|
||||
case string_with_escapes_delim:
|
||||
self.state = string_with_escapes
|
||||
case string_without_escapes_delim:
|
||||
self.state = string_without_escapes
|
||||
case escape_char:
|
||||
if !self.write_escaped_ch() {
|
||||
ans.Pos = self.word_start
|
||||
ans.Trailer = self.buf.String() + "\\"
|
||||
ans.Err = fmt.Errorf("Extra backslash at end of input")
|
||||
return
|
||||
}
|
||||
default:
|
||||
self.write_ch(ch)
|
||||
}
|
||||
case escapingState: // the rune after an escape character
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = ErrTrailingEscape
|
||||
return token(), err
|
||||
}
|
||||
default:
|
||||
{
|
||||
state = inWordState
|
||||
value.WriteRune(nextRune)
|
||||
}
|
||||
}
|
||||
case string_without_escapes:
|
||||
switch ch {
|
||||
case string_without_escapes_delim:
|
||||
self.state = word
|
||||
default:
|
||||
self.write_ch(ch)
|
||||
}
|
||||
case escapingQuotedState: // the next rune after an escape character, in double quotes
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = ErrTrailingQuoteEscape
|
||||
return token(), err
|
||||
}
|
||||
default:
|
||||
{
|
||||
state = quotingEscapingState
|
||||
value.WriteRune(nextRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
case quotingEscapingState: // in escaping double quotes
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = ErrUnclosedDoubleQuote
|
||||
return token(), err
|
||||
}
|
||||
case escapingQuoteRuneClass:
|
||||
{
|
||||
state = inWordState
|
||||
}
|
||||
case escapeRuneClass:
|
||||
{
|
||||
state = escapingQuotedState
|
||||
}
|
||||
default:
|
||||
{
|
||||
value.WriteRune(nextRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
case quotingState: // in non-escaping single quotes
|
||||
{
|
||||
switch nextRuneType {
|
||||
case eofRuneClass:
|
||||
{
|
||||
err = ErrUnclosedSingleQuote
|
||||
return token(), err
|
||||
}
|
||||
case nonEscapingQuoteRuneClass:
|
||||
{
|
||||
state = inWordState
|
||||
}
|
||||
default:
|
||||
{
|
||||
value.WriteRune(nextRune)
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
{
|
||||
return nil, fmt.Errorf("Unexpected state: %v", state)
|
||||
case string_with_escapes:
|
||||
switch ch {
|
||||
case string_with_escapes_delim:
|
||||
self.state = word
|
||||
case escape_char:
|
||||
self.write_escaped_ch()
|
||||
default:
|
||||
self.write_ch(ch)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
switch self.state {
|
||||
case word:
|
||||
self.state = lex_normal
|
||||
if self.buf.Len() > 0 {
|
||||
return self.get_word()
|
||||
}
|
||||
case string_with_escapes, string_without_escapes:
|
||||
self.state = lex_normal
|
||||
ans.Trailer = self.buf.String()
|
||||
ans.Pos = self.word_start
|
||||
ans.Err = fmt.Errorf("Unterminated string at end of input")
|
||||
return
|
||||
case lex_normal:
|
||||
|
||||
// Next returns the next token in the stream.
|
||||
func (t *Tokenizer) Next() (*Token, error) {
|
||||
return t.scanStream()
|
||||
}
|
||||
|
||||
// Pos returns the current position in the string as a byte offset
|
||||
func (t *Tokenizer) Pos() int64 {
|
||||
return t.pos
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Split partitions a string into a slice of strings.
|
||||
func Split(s string) ([]string, error) {
|
||||
l := NewLexer(strings.NewReader(s))
|
||||
subStrings := make([]string, 0)
|
||||
func Split(s string) (ans []string, err error) {
|
||||
l := NewLexer(s)
|
||||
var word Word
|
||||
for {
|
||||
word, err := l.Next()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return subStrings, nil
|
||||
}
|
||||
return subStrings, err
|
||||
word = l.Next()
|
||||
if word.Err != nil {
|
||||
return ans, word.Err
|
||||
}
|
||||
subStrings = append(subStrings, word)
|
||||
if word.Value == "" {
|
||||
break
|
||||
}
|
||||
ans = append(ans, word.Value)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// SplitForCompletion partitions a string into a slice of strings. It differs from Split in being
|
||||
// more relaxed about errors and also adding an empty string at the end if s ends with a SpaceToken.
|
||||
// more relaxed about errors and also adding an empty string at the end if s ends with a Space.
|
||||
func SplitForCompletion(s string) (argv []string, position_of_last_arg int) {
|
||||
t := NewTokenizer(strings.NewReader(s))
|
||||
t := NewLexer(s)
|
||||
argv = make([]string, 0, len(s)/4)
|
||||
token := &Token{}
|
||||
for {
|
||||
ntoken, err := t.Next()
|
||||
if err == io.EOF {
|
||||
if token.Type == SpaceToken {
|
||||
argv = append(argv, "")
|
||||
token.Pos += int64(len(token.Value))
|
||||
word := t.Next()
|
||||
if word.Value == "" {
|
||||
if word.Trailer == "" {
|
||||
trimmed := strings.TrimRight(s, " ")
|
||||
if len(trimmed) < len(s) { // trailing spaces
|
||||
pos := position_of_last_arg
|
||||
if len(argv) > 0 {
|
||||
pos += len(argv[len(argv)-1])
|
||||
}
|
||||
if pos < len(s) { // trailing whitespace
|
||||
argv = append(argv, "")
|
||||
position_of_last_arg += len(s) - pos + 1
|
||||
}
|
||||
}
|
||||
} else {
|
||||
argv = append(argv, word.Trailer)
|
||||
position_of_last_arg = word.Pos
|
||||
}
|
||||
return argv, int(token.Pos)
|
||||
break
|
||||
}
|
||||
if ntoken == nil {
|
||||
return []string{}, -1
|
||||
}
|
||||
switch ntoken.Type {
|
||||
case WordToken:
|
||||
argv = append(argv, ntoken.Value)
|
||||
case SpaceToken:
|
||||
// skip spaces
|
||||
default:
|
||||
return []string{}, -1
|
||||
}
|
||||
token = ntoken
|
||||
position_of_last_arg = word.Pos
|
||||
argv = append(argv, word.Value)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -1,7 +1,6 @@
|
||||
package shlex
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
@ -13,78 +12,24 @@ var (
|
||||
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten eleven 'twelve\\' thirteen=13 fourteen/14"
|
||||
)
|
||||
|
||||
func TestClassifier(t *testing.T) {
|
||||
classifier := newDefaultClassifier()
|
||||
tests := map[rune]runeTokenClass{
|
||||
' ': spaceRuneClass,
|
||||
'"': escapingQuoteRuneClass,
|
||||
'\'': nonEscapingQuoteRuneClass}
|
||||
for runeChar, want := range tests {
|
||||
got := classifier.ClassifyRune(runeChar)
|
||||
if got != want {
|
||||
t.Errorf("ClassifyRune(%v) -> %v. Want: %v", runeChar, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenizer(t *testing.T) {
|
||||
testInput := testString
|
||||
expectedTokens := []*Token{
|
||||
{WordToken, "one", 0},
|
||||
{SpaceToken, " ", 3},
|
||||
{WordToken, "two", 4},
|
||||
{SpaceToken, " ", 7},
|
||||
{WordToken, "three four", 8},
|
||||
{SpaceToken, " ", 20},
|
||||
{WordToken, "five \"six\"", 21},
|
||||
{SpaceToken, " ", 35},
|
||||
{WordToken, "seven#eight", 36},
|
||||
{SpaceToken, " ", 47},
|
||||
{WordToken, "#", 48},
|
||||
{SpaceToken, " ", 49},
|
||||
{WordToken, "nine", 50},
|
||||
{SpaceToken, " ", 54},
|
||||
{WordToken, "#", 55},
|
||||
{SpaceToken, " ", 56},
|
||||
{WordToken, "ten", 57},
|
||||
{SpaceToken, " ", 60},
|
||||
{WordToken, "eleven", 61},
|
||||
{SpaceToken, " ", 67},
|
||||
{WordToken, "twelve\\", 68},
|
||||
{SpaceToken, " ", 77},
|
||||
{WordToken, "thirteen=13", 78},
|
||||
{SpaceToken, " ", 89},
|
||||
{WordToken, "fourteen/14", 90},
|
||||
}
|
||||
|
||||
tokenizer := NewTokenizer(strings.NewReader(testInput))
|
||||
for i, want := range expectedTokens {
|
||||
got, err := tokenizer.Next()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if diff := cmp.Diff(want, got); diff != "" {
|
||||
t.Fatalf("Tokenizer.Next()[%v] of: %s:\n%s", i, testString, diff)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestLexer(t *testing.T) {
|
||||
testInput := testString
|
||||
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
||||
|
||||
lexer := NewLexer(strings.NewReader(testInput))
|
||||
lexer := NewLexer(testInput)
|
||||
for i, want := range expectedStrings {
|
||||
got, err := lexer.Next()
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if got != want {
|
||||
got := lexer.Next()
|
||||
if got.Value != want {
|
||||
t.Errorf("Lexer.Next()[%v] of %q -> %v. Want: %v", i, testString, got, want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type Tok struct {
|
||||
Pos int
|
||||
Val string
|
||||
}
|
||||
|
||||
func TestSplit(t *testing.T) {
|
||||
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
||||
got, err := Split(testString)
|
||||
@ -99,6 +44,43 @@ func TestSplit(t *testing.T) {
|
||||
t.Errorf("Split(%q)[%v] -> %v. Want: %v", testString, i, got[i], want[i])
|
||||
}
|
||||
}
|
||||
|
||||
for _, x := range []string{
|
||||
`abc\`, `\`, `'abc`, `'`, `"`, `asd\`,
|
||||
} {
|
||||
_, err := Split(x)
|
||||
if err == nil {
|
||||
t.Fatalf("Failed to get an error for: %#v", x)
|
||||
}
|
||||
}
|
||||
s := func(q string) (ans []Tok) {
|
||||
l := NewLexer(q)
|
||||
for {
|
||||
w := l.Next()
|
||||
if w.Err != nil {
|
||||
t.Fatal(w.Err)
|
||||
}
|
||||
if w.Value == "" {
|
||||
break
|
||||
}
|
||||
ans = append(ans, Tok{w.Pos, w.Value})
|
||||
}
|
||||
return
|
||||
}
|
||||
for q, expected := range map[string][]Tok{
|
||||
`"ab"`: {{0, "ab"}},
|
||||
`x "ab"y \m`: {{0, `x`}, {2, `aby`}, {8, `m`}},
|
||||
`x'y"\z'1`: {{0, `xy"\z1`}},
|
||||
`\abc\ d`: {{0, `abc d`}},
|
||||
``: nil,
|
||||
` `: nil,
|
||||
" \tabc\n\t\r ": {{2, "abc"}},
|
||||
} {
|
||||
if diff := cmp.Diff(expected, s(q)); diff != "" {
|
||||
t.Fatalf("Failed for string: %#v\n%s", q, diff)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestSplitForCompletion(t *testing.T) {
|
||||
@ -108,7 +90,7 @@ func TestSplitForCompletion(t *testing.T) {
|
||||
t.Fatalf("Failed to split: %s\n%s", cmdline, diff)
|
||||
}
|
||||
if last_arg_pos != actual_pos {
|
||||
t.Fatalf("Failed to split: %s\n Last arg pos: %d != %d", cmdline, last_arg_pos, actual_pos)
|
||||
t.Fatalf("Failed to split: %#v\n Last arg pos: %d != %d", cmdline, last_arg_pos, actual_pos)
|
||||
}
|
||||
}
|
||||
test("a b", 2, "a", "b")
|
||||
|
Loading…
Reference in New Issue
Block a user