mirror of
https://github.com/kanaka/mal.git
synced 2024-09-21 02:27:10 +03:00
4aa0ebdf47
Add a step1 test to make sure that implementations are properly throwing an error on unclosed strings. Fix 47 implementations and update the guide to note the correct behavior.
160 lines
4.0 KiB
OpenEdge ABL
160 lines
4.0 KiB
OpenEdge ABL
#include "yeti_regex.i"
|
|
require, "types.i"
|
|
|
|
TOKENIZER_REGEXP = regcomp("[[:space:],]*(~@|[][{}()'`~@]|\"([\\].|[^\\\"])*\"?|;.*|[^][[:space:]{}()'\"`~@,;]*)", newline=1)
|
|
|
|
func tokenize(str)
|
|
{
|
|
match0 = ""
|
|
match1 = ""
|
|
pos = 1
|
|
tokens = []
|
|
while (1) {
|
|
m = regmatch(TOKENIZER_REGEXP, str, match0, match1, start=pos, indices=1)
|
|
if (m == 0) break
|
|
b = match1(1)
|
|
e = match1(2) - 1
|
|
if (e < b) {
|
|
pos = match1(2) + 1
|
|
continue
|
|
}
|
|
token = strpart(str, b:e)
|
|
pos = match1(2)
|
|
if (strpart(token, 1:1) == ";") continue
|
|
grow, tokens, [token]
|
|
}
|
|
return tokens
|
|
}
|
|
|
|
struct Reader {
|
|
pointer tokens
|
|
int pos
|
|
}
|
|
|
|
func reader_peek(rdr)
|
|
{
|
|
if (rdr.pos > numberof(*rdr.tokens)) return string(0)
|
|
return (*rdr.tokens)(rdr.pos)
|
|
}
|
|
|
|
func reader_next(rdr)
|
|
{
|
|
token = reader_peek(rdr)
|
|
rdr.pos += 1
|
|
return token
|
|
}
|
|
|
|
NUMBER_REGEXP = regcomp("^-?[0-9]+$")
|
|
STR_REGEXP = regcomp("^\".*\"$")
|
|
STR_BAD_REGEXP = regcomp("^\".*$")
|
|
|
|
func unescape(s)
|
|
{
|
|
s = strpart(s, 2:-1) // remove surrounding quotes
|
|
s = streplaceall(s, "\\\\", "\x01")
|
|
s = streplaceall(s, "\\n", "\n")
|
|
s = streplaceall(s, "\\\"", "\"")
|
|
return streplaceall(s, "\x01", "\\")
|
|
}
|
|
|
|
func read_atom(rdr)
|
|
{
|
|
token = reader_next(rdr)
|
|
if (token == "nil") return MAL_NIL
|
|
else if (token == "true") return MAL_TRUE
|
|
else if (token == "false") return MAL_FALSE
|
|
else if (regmatch(NUMBER_REGEXP, token)) return MalNumber(val=tonum(token))
|
|
else if (regmatch(STR_REGEXP, token)) return MalString(val=unescape(token))
|
|
else if (regmatch(STR_BAD_REGEXP, token)) return MalError(message=("expected '\"', got EOF"))
|
|
else if (strpart(token, 1:1) == ":") return MalKeyword(val=strpart(token, 2:))
|
|
else return MalSymbol(val=token)
|
|
}
|
|
|
|
func read_seq(rdr, start_char, end_char)
|
|
{
|
|
token = reader_next(rdr)
|
|
if (token != start_char) {
|
|
return MalError(message=("expected '" + start_char + "', got EOF"))
|
|
}
|
|
|
|
elements = []
|
|
token = reader_peek(rdr)
|
|
while (token != end_char) {
|
|
if (token == string(0)) {
|
|
return MalError(message=("expected '" + end_char + "', got EOF"))
|
|
}
|
|
e = read_form(rdr)
|
|
if (structof(e) == MalError) return e
|
|
grow, elements, [&e]
|
|
token = reader_peek(rdr)
|
|
}
|
|
token = reader_next(rdr)
|
|
return elements
|
|
}
|
|
|
|
func read_list(rdr)
|
|
{
|
|
seq = read_seq(rdr, "(", ")")
|
|
if (structof(seq) == MalError) return seq
|
|
return MalList(val=&seq)
|
|
}
|
|
|
|
func read_vector(rdr)
|
|
{
|
|
seq = read_seq(rdr, "[", "]")
|
|
if (structof(seq) == MalError) return seq
|
|
return MalVector(val=&seq)
|
|
}
|
|
|
|
func read_hashmap(rdr)
|
|
{
|
|
seq = read_seq(rdr, "{", "}")
|
|
if (structof(seq) == MalError) return seq
|
|
return array_to_hashmap(seq)
|
|
}
|
|
|
|
func reader_macro(rdr, symbol_name)
|
|
{
|
|
shortcut = reader_next(rdr)
|
|
form = read_form(rdr)
|
|
if (structof(form) == MalError) return form
|
|
seq = [&MalSymbol(val=symbol_name), &form]
|
|
return MalList(val=&seq)
|
|
}
|
|
|
|
func reader_with_meta_macro(rdr)
|
|
{
|
|
shortcut = reader_next(rdr)
|
|
meta = read_form(rdr)
|
|
if (structof(meta) == MalError) return meta
|
|
form = read_form(rdr)
|
|
if (structof(form) == MalError) return form
|
|
seq = [&MalSymbol(val="with-meta"), &form, &meta]
|
|
return MalList(val=&seq)
|
|
}
|
|
|
|
func read_form(rdr)
|
|
{
|
|
token = reader_peek(rdr)
|
|
if (token == "'") return reader_macro(rdr, "quote")
|
|
else if (token == "`") return reader_macro(rdr, "quasiquote")
|
|
else if (token == "~") return reader_macro(rdr, "unquote")
|
|
else if (token == "~@") return reader_macro(rdr, "splice-unquote")
|
|
else if (token == "@") return reader_macro(rdr, "deref")
|
|
else if (token == "^") return reader_with_meta_macro(rdr)
|
|
else if (token == "(") return read_list(rdr)
|
|
else if (token == ")") return MalError(message="unexpected ')'")
|
|
else if (token == "[") return read_vector(rdr)
|
|
else if (token == "]") return MalError(message="unexpected ']'")
|
|
else if (token == "{") return read_hashmap(rdr)
|
|
else if (token == "}") return MalError(message="unexpected '}'")
|
|
else return read_atom(rdr)
|
|
}
|
|
|
|
func read_str(str)
|
|
{
|
|
tokens = tokenize(str)
|
|
rdr = Reader(tokens=&tokens, pos=1)
|
|
return read_form(rdr)
|
|
}
|