1
1
mirror of https://github.com/kanaka/mal.git synced 2024-09-20 01:57:09 +03:00
mal/rpython/reader.py
2015-10-30 22:05:49 -05:00

134 lines
3.9 KiB
Python

import sys
IS_RPYTHON = sys.argv[0].endswith('rpython')
if IS_RPYTHON:
from rpython.rlib.rsre import rsre_re as re
else:
import re
import mal_types as types
from mal_types import (MalSym, MalInt, MalStr, _keywordu,
_list, _listl, _vectorl, _hash_mapl)
class Blank(Exception): pass
class Reader():
def __init__(self, tokens, position=0):
self.tokens = tokens
self.position = position
def next(self):
self.position += 1
return self.tokens[self.position-1]
def peek(self):
if len(self.tokens) > self.position:
return self.tokens[self.position]
else:
return None
def tokenize(str):
re_str = "[\s,]*(~@|[\[\]{}()'`~^@]|\"(?:[\\\\].|[^\\\\\"])*\"|;.*|[^\s\[\]{}()'\"`@,;]+)"
if IS_RPYTHON:
tok_re = re_str
else:
tok_re = re.compile(re_str)
return [t for t in re.findall(tok_re, str) if t[0] != ';']
def read_atom(reader):
if IS_RPYTHON:
int_re = '-?[0-9]+$'
float_re = '-?[0-9][0-9.]*$'
else:
int_re = re.compile('-?[0-9]+$')
float_re = re.compile('-?[0-9][0-9.]*$')
token = reader.next()
if re.match(int_re, token): return MalInt(int(token))
## elif re.match(float_re, token): return int(token)
elif token[0] == '"':
end = len(token)-1
if end < 2:
return MalStr(u"")
else:
s = unicode(token[1:end])
s = types._replace(u'\\"', u'"', s)
s = types._replace(u'\\n', u"\n", s)
s = types._replace(u'\\\\', u"\\", s)
return MalStr(s)
elif token[0] == ':': return _keywordu(unicode(token[1:]))
elif token == "nil": return types.nil
elif token == "true": return types.true
elif token == "false": return types.false
else: return MalSym(unicode(token))
def read_sequence(reader, start='(', end=')'):
ast = []
token = reader.next()
if token != start: types.throw_str("expected '" + start + "'")
token = reader.peek()
while token != end:
if not token: types.throw_str("expected '" + end + "', got EOF")
ast.append(read_form(reader))
token = reader.peek()
reader.next()
return ast
def read_list(reader):
lst = read_sequence(reader, '(', ')')
return _listl(lst)
def read_vector(reader):
lst = read_sequence(reader, '[', ']')
return _vectorl(lst)
def read_hash_map(reader):
lst = read_sequence(reader, '{', '}')
return _hash_mapl(lst)
def read_form(reader):
token = reader.peek()
# reader macros/transforms
if token[0] == ';':
reader.next()
return None
elif token == '\'':
reader.next()
return _list(MalSym(u'quote'), read_form(reader))
elif token == '`':
reader.next()
return _list(MalSym(u'quasiquote'), read_form(reader))
elif token == '~':
reader.next()
return _list(MalSym(u'unquote'), read_form(reader))
elif token == '~@':
reader.next()
return _list(MalSym(u'splice-unquote'), read_form(reader))
elif token == '^':
reader.next()
meta = read_form(reader)
return _list(MalSym(u'with-meta'), read_form(reader), meta)
elif token == '@':
reader.next()
return _list(MalSym(u'deref'), read_form(reader))
# list
elif token == ')': types.throw_str("unexpected ')'")
elif token == '(': return read_list(reader)
# vector
elif token == ']': types.throw_str("unexpected ']'");
elif token == '[': return read_vector(reader);
# hash-map
elif token == '}': types.throw_str("unexpected '}'");
elif token == '{': return read_hash_map(reader);
# atom
else: return read_atom(reader);
def read_str(str):
tokens = tokenize(str)
if len(tokens) == 0: raise Blank("Blank Line")
return read_form(Reader(tokens))