1
1
mirror of https://github.com/kanaka/mal.git synced 2024-09-21 10:37:58 +03:00
mal/rust/reader.rs
Joel Martin 4ef4b17cd0 rust: Update rust and update/refactor implementation
This rewrites the rust implementation to use many new features of the
current version of rust.

The refactor is much more concise (only 2/3rds the size) and switches
to using a lot of the more functional features (iterators, closures,
etc) that have been added or improved in rust.

Unfortunately, the implementation is a fair bit slower (about 30% on
perf3). It's not clear why this is the case but concision and being
more idiomatic wins over performance.
2018-07-13 17:21:00 -05:00

143 lines
3.6 KiB
Rust

use std::rc::Rc;
use regex::{Regex,Captures};
use types::{MalVal,MalRet,MalErr,error,hash_map};
use types::MalVal::{Nil,Bool,Int,Str,Sym,List,Vector};
use types::MalErr::ErrString;
#[derive(Debug, Clone)]
struct Reader {
tokens: Vec<String>,
pos: usize,
}
impl Reader {
fn next(&mut self) -> Result<String,MalErr> {
self.pos = self.pos + 1;
Ok(self.tokens.get(self.pos-1)
.ok_or(ErrString("underflow".to_string()))?.to_string())
}
fn peek(&self) -> Result<String,MalErr> {
Ok(self.tokens.get(self.pos)
.ok_or(ErrString("underflow".to_string()))?.to_string())
}
}
fn tokenize(str: &str) -> Vec<String> {
lazy_static! {
static ref RE: Regex = Regex::new(r###"[\s,]*(~@|[\[\]{}()'`~^@]|"(?:\\.|[^\\"])*"?|;.*|[^\s\[\]{}('"`,;)]+)"###).unwrap();
}
let mut res = vec![];
for cap in RE.captures_iter(str) {
if cap[1].starts_with(";") { continue }
res.push(String::from(&cap[1]));
}
res
}
fn unescape_str(s: &str) -> String {
lazy_static! {
static ref RE: Regex = Regex::new(r#"\\(.)"#).unwrap();
}
RE.replace_all(&s, |caps: &Captures| {
format!("{}", if &caps[1] == "n" { "\n" } else { &caps[1] })
}).to_string()
}
fn read_atom(rdr: &mut Reader) -> MalRet {
lazy_static! {
static ref INT_RE: Regex = Regex::new(r"^-?[0-9]+$").unwrap();
}
let token = rdr.next()?;
match &token[..] {
"nil" => Ok(Nil),
"false" => Ok(Bool(false)),
"true" => Ok(Bool(true)),
_ => {
if INT_RE.is_match(&token) {
Ok(Int(token.parse().unwrap()))
} else if token.starts_with("\"") {
if token.ends_with("\"") {
Ok(Str(unescape_str(&token[1..token.len()-1])))
} else {
error("expected '\"', got EOF")
}
} else if token.starts_with(":") {
Ok(Str(format!("\u{29e}{}", &token[1..])))
} else {
Ok(Sym(token.to_string()))
}
}
}
}
fn read_seq(rdr: &mut Reader, end: &str) -> MalRet {
let mut seq : Vec<MalVal> = vec![];
rdr.next()?;
loop {
let token = match rdr.peek() {
Ok(t) => t,
Err(_) => return error(&format!("expected '{}', got EOF", end))
};
if token == end { break }
seq.push(read_form(rdr)?)
}
let _ = rdr.next();
match end {
")" => Ok(list!(seq)),
"]" => Ok(vector!(seq)),
"}" => hash_map(seq),
_ => error("read_seq unknown end value"),
}
}
fn read_form(rdr: &mut Reader) -> MalRet {
let token = rdr.peek()?;
match &token[..] {
"'" => {
let _ = rdr.next();
Ok(list![Sym("quote".to_string()), read_form(rdr)?])
},
"`" => {
let _ = rdr.next();
Ok(list![Sym("quasiquote".to_string()), read_form(rdr)?])
},
"~" => {
let _ = rdr.next();
Ok(list![Sym("unquote".to_string()), read_form(rdr)?])
},
"~@" => {
let _ = rdr.next();
Ok(list![Sym("splice-unquote".to_string()), read_form(rdr)?])
},
"^" => {
let _ = rdr.next();
let meta = read_form(rdr)?;
Ok(list![Sym("with-meta".to_string()), read_form(rdr)?, meta])
},
"@" => {
let _ = rdr.next();
Ok(list![Sym("deref".to_string()), read_form(rdr)?])
},
")" => error("unexpected ')'"),
"(" => read_seq(rdr, ")"),
"]" => error("unexpected ']'"),
"[" => read_seq(rdr, "]"),
"}" => error("unexpected '}'"),
"{" => read_seq(rdr, "}"),
_ => read_atom(rdr),
}
}
pub fn read_str(str: String) -> MalRet {
let tokens = tokenize(&str);
//println!("tokens: {:?}", tokens);
if tokens.len() == 0 {
return error("no input");
}
read_form(&mut Reader { pos: 0, tokens: tokens })
}
// vim: ts=2:sw=2:expandtab