mirror of
https://github.com/kanaka/mal.git
synced 2024-09-20 01:57:09 +03:00
171 lines
4.2 KiB
Plaintext
171 lines
4.2 KiB
Plaintext
|
import "./types" for MalVal, MalSymbol, MalList, MalVector, MalMap
|
||
|
|
||
|
class Tokenizer {
|
||
|
construct new(s) {
|
||
|
_s = s
|
||
|
}
|
||
|
|
||
|
tokenize() {
|
||
|
_pos = 0
|
||
|
var tokens = []
|
||
|
while (true) {
|
||
|
var token = nextToken()
|
||
|
if (token == null) break
|
||
|
if (token.count > 0) tokens.add(token)
|
||
|
}
|
||
|
return tokens
|
||
|
}
|
||
|
|
||
|
static eolChars { "\r\n" }
|
||
|
static whitespace { " ,\r\n\t" }
|
||
|
static delimiters { "[]{}()'`^@" }
|
||
|
static separators { Tokenizer.whitespace + "[]{}()'\"`,;" }
|
||
|
|
||
|
nextToken() {
|
||
|
if (isEOF()) return null
|
||
|
var ch = curr
|
||
|
if (Tokenizer.whitespace.contains(ch)) {
|
||
|
advance()
|
||
|
return ""
|
||
|
}
|
||
|
if (Tokenizer.delimiters.contains(ch)) {
|
||
|
advance()
|
||
|
return ch
|
||
|
}
|
||
|
if (ch == "~") {
|
||
|
advance()
|
||
|
if (!isEOF() && curr == "@") {
|
||
|
advance()
|
||
|
return "~@"
|
||
|
} else {
|
||
|
return "~"
|
||
|
}
|
||
|
}
|
||
|
if (ch == ";") {
|
||
|
advance()
|
||
|
while (!isEOF() && !Tokenizer.eolChars.contains(curr)) advance()
|
||
|
return ""
|
||
|
}
|
||
|
if (ch == "\"") {
|
||
|
var s = ch
|
||
|
advance()
|
||
|
while (!isEOF() && curr != "\"") {
|
||
|
if (curr == "\\") {
|
||
|
s = s + curr
|
||
|
advance()
|
||
|
if (isEOF()) Fiber.abort("expected '\"', got EOF 111")
|
||
|
}
|
||
|
s = s + curr
|
||
|
advance()
|
||
|
}
|
||
|
if (isEOF()) Fiber.abort("expected '\"', got EOF 222")
|
||
|
s = s + curr
|
||
|
advance()
|
||
|
return s
|
||
|
}
|
||
|
var token = ch
|
||
|
advance()
|
||
|
while (!isEOF() && !Tokenizer.separators.contains(curr)) {
|
||
|
token = token + curr
|
||
|
advance()
|
||
|
}
|
||
|
return token
|
||
|
}
|
||
|
|
||
|
curr { _s[_pos] }
|
||
|
isEOF() { _pos >= _s.count }
|
||
|
advance() { _pos = _pos + 1 }
|
||
|
}
|
||
|
|
||
|
class Reader {
|
||
|
construct new(tokens) {
|
||
|
_tokens = tokens
|
||
|
_pos = 0
|
||
|
}
|
||
|
|
||
|
next() {
|
||
|
if (_pos >= _tokens.count) return null
|
||
|
var token = _tokens[_pos]
|
||
|
_pos = _pos + 1
|
||
|
return token
|
||
|
}
|
||
|
|
||
|
peek() {
|
||
|
if (_pos >= _tokens.count) return null
|
||
|
return _tokens[_pos]
|
||
|
}
|
||
|
}
|
||
|
|
||
|
class MalReader {
|
||
|
static parse_str(token) {
|
||
|
if (token.count <= 2) return ""
|
||
|
return token[1..-2].replace("\\\\", "\u029e").replace("\\\"", "\"").replace("\\n", "\n").replace("\u029e", "\\")
|
||
|
}
|
||
|
|
||
|
static is_all_digits(s) {
|
||
|
if (s.count == 0) return false
|
||
|
return s.all { |c| c.bytes[0] >= 0x30 && c.bytes[0] <= 0x39 }
|
||
|
}
|
||
|
|
||
|
static is_number(token) {
|
||
|
return token.startsWith("-") ? is_all_digits(token[1..-1]) : is_all_digits(token)
|
||
|
}
|
||
|
|
||
|
static read_atom(rdr) {
|
||
|
var token = rdr.next()
|
||
|
if (is_number(token)) return Num.fromString(token)
|
||
|
if (token.startsWith("\"")) return parse_str(token)
|
||
|
if (token.startsWith(":")) return MalVal.newKeyword(token[1..-1])
|
||
|
if (token == "nil") return null
|
||
|
if (token == "true") return true
|
||
|
if (token == "false") return false
|
||
|
return MalSymbol.new(token)
|
||
|
}
|
||
|
|
||
|
static read_seq(rdr, start, end) {
|
||
|
var token = rdr.next()
|
||
|
if (token != start) Fiber.abort("expected '%(start)'")
|
||
|
var elements = []
|
||
|
token = rdr.peek()
|
||
|
while (token != end) {
|
||
|
if (!token) Fiber.abort("expected '%(end)', got EOF")
|
||
|
elements.add(read_form(rdr))
|
||
|
token = rdr.peek()
|
||
|
}
|
||
|
rdr.next()
|
||
|
return elements
|
||
|
}
|
||
|
|
||
|
static reader_macro(rdr, sym) {
|
||
|
rdr.next()
|
||
|
return MalList.new([MalSymbol.new(sym), read_form(rdr)])
|
||
|
}
|
||
|
|
||
|
static read_form(rdr) {
|
||
|
var token = rdr.peek()
|
||
|
if (token == "'") return reader_macro(rdr, "quote")
|
||
|
if (token == "`") return reader_macro(rdr, "quasiquote")
|
||
|
if (token == "~") return reader_macro(rdr, "unquote")
|
||
|
if (token == "~@") return reader_macro(rdr, "splice-unquote")
|
||
|
if (token == "^") {
|
||
|
rdr.next()
|
||
|
var meta = read_form(rdr)
|
||
|
return MalList.new([MalSymbol.new("with-meta"), read_form(rdr), meta])
|
||
|
}
|
||
|
if (token == "@") return reader_macro(rdr, "deref")
|
||
|
if (token == "(") return MalList.new(read_seq(rdr, "(", ")"))
|
||
|
if (token == ")") Fiber.abort("unexpected ')'")
|
||
|
if (token == "[") return MalVector.new(read_seq(rdr, "[", "]"))
|
||
|
if (token == "]") Fiber.abort("unexpected ']'")
|
||
|
if (token == "{") return MalMap.fromList(read_seq(rdr, "{", "}"))
|
||
|
if (token == "}") Fiber.abort("unexpected '}'")
|
||
|
return read_atom(rdr)
|
||
|
}
|
||
|
|
||
|
static read_str(s) {
|
||
|
var tokens = Tokenizer.new(s).tokenize()
|
||
|
if (tokens.count == 0) return null
|
||
|
return read_form(Reader.new(tokens))
|
||
|
}
|
||
|
}
|