Switch to bumpalo

This commit is contained in:
Richard Feldman 2019-09-02 15:07:45 -04:00
parent 9f07ebabfe
commit 505b9f7b02
4 changed files with 160 additions and 36 deletions

14
Cargo.lock generated
View File

@ -18,6 +18,11 @@ name = "autocfg"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bumpalo"
version = "2.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.2"
@ -253,6 +258,7 @@ dependencies = [
name = "roc"
version = "0.1.0"
dependencies = [
"bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"combine 3.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"dogged 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"fraction 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -264,7 +270,6 @@ dependencies = [
"num 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"petgraph 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"pretty_assertions 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"typed-arena 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -306,11 +311,6 @@ dependencies = [
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "typed-arena"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "typenum"
version = "1.10.0"
@ -362,6 +362,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum ascii 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "91e320562a8fa3286a481b7189f89578ace6b20df99e123c87f2f509c957c5d6"
"checksum autocfg 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "0e49efa51329a5fd37e7c79db4621af617cd4e3e5bc224939808d076077077bf"
"checksum bumpalo 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ad807f2fc2bf185eeb98ff3a901bd46dc5ad58163d0fa4577ba0d25674d71708"
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum cfg-if 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "082bb9b28e00d3c9d39cc03e64ce4cea0f1bb9b3fde493f0cbc008472d22bdf4"
"checksum combine 3.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "da3da6baa321ec19e1cc41d31bf599f00c783d0517095cdaf0332e3fe8d20680"
@ -396,7 +397,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
"checksum sized-chunks 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a2a2eb3fe454976eefb479f78f9b394d34d661b647c6326a3a6e66f68bb12c26"
"checksum syn 0.15.40 (registry+https://github.com/rust-lang/crates.io-index)" = "bc945221ccf4a7e8c31222b9d1fc77aefdd6638eb901a6ce457a3dc29d4c31e8"
"checksum typed-arena 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7f70f5c346cc11bc044ae427ab2feae213350dca9e2d637047797d5ff316a646"
"checksum typenum 1.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "612d636f949607bdf9b123b4a6f6d966dedf3ff669f7f045890d3a4a73948169"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
"checksum unindent 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "834b4441326c660336850c5c0926cc20548e848967a5f57bc20c2b741c8d41f4"

View File

@ -12,7 +12,7 @@ im-rc = "13.0.0"
fraction = "0.6.2"
num = "0.2.0"
fxhash = "0.2.1"
typed-arena = "1.5.0"
bumpalo = "2.6.0"
[dev-dependencies]
pretty_assertions = "0.5.1"

View File

@ -24,7 +24,7 @@ extern crate im_rc;
extern crate fraction;
extern crate num;
extern crate fxhash;
extern crate typed_arena;
extern crate bumpalo;
#[macro_use] extern crate combine;
#[macro_use] extern crate log;

View File

@ -1,6 +1,6 @@
use region;
use operator::Operator;
use typed_arena::Arena;
use bumpalo::Bump;
use std::mem;
// Strategy:
@ -51,7 +51,7 @@ type VariantName = str;
/// A parsed expression. This uses lifetimes extensively for two reasons:
///
/// 1. It uses Arena::alloc for all allocations, which returns a reference.
/// 1. It uses Bump::alloc for all allocations, which returns a reference.
/// 2. It often stores references into the input string instead of allocating.
///
/// This dramatically reduces allocations during parsing. Once parsing is done,
@ -187,24 +187,18 @@ fn pattern_size() {
}
type ParseResult<'a, Output> = Result<(State<'a>, Output), State<'a>>;
struct Env<'a> {
expr_allocator: Arena<Expr<'a>>,
pattern_allocator: Arena<Pattern<'a>>,
state: State<'a>,
}
type ParseResult<'a, Output> = Result<(State<'a>, Output), (State<'a>, Attempting)>;
trait Parser<'a, Output> {
fn parse(&self, &'a Env<'a>) -> ParseResult<'a, Output>;
fn parse(&self, &'a Bump, &'a State<'a>, attempting: Attempting) -> ParseResult<'a, Output>;
}
impl<'a, F, Output> Parser<'a, Output> for F
where F: Fn(&'a Env<'a>) -> ParseResult<'a, Output>,
where F: Fn(&'a Bump, &'a State<'a>, Attempting) -> ParseResult<'a, Output>,
{
fn parse(&self, env: &'a Env<'a>) -> ParseResult<'a, Output> {
self(env)
fn parse(&self, arena: &'a Bump, state: &'a State<'a>, attempting: Attempting) -> ParseResult<'a, Output> {
self(arena, state, attempting)
}
}
@ -213,20 +207,29 @@ where
P: Parser<'a, Before>,
F: Fn(Before) -> After,
{
move |env|
move |arena, state, attempting|
parser
.parse(env)
.parse(arena, state, attempting)
.map(|(next_state, output)| (next_state, transform(output)))
}
fn attempt<'a, P, Val>(attempting: Attempting, parser: P) -> impl Parser<'a, Val>
where
P: Parser<'a, Val>,
{
move |arena, state, _|
parser.parse(arena, state, attempting)
}
/// A keyword with no newlines in it.
fn keyword<'a>(kw: &'static str) -> impl Parser<'a, ()> {
// We can't have newlines because we don't attempt to advance the row
// in the state, only the column.
debug_assert!(!kw.contains("\n"));
move |env: &'a Env| {
let input = env.state.input;
move |arena: &'a Bump, state: &'a State<'a>, attempting| {
let input = state.input;
match input.get(0..kw.len()) {
Some(next) if next == kw => {
@ -234,12 +237,12 @@ fn keyword<'a>(kw: &'static str) -> impl Parser<'a, ()> {
Ok((State {
input: &input[len..],
column: env.state.column + len as u32,
column: state.column + len as u32,
..env.state
..*state
}, ()))
},
_ => Err(env.state.clone()),
_ => Err((state.clone(), attempting)),
}
}
}
@ -249,19 +252,19 @@ where
P: Parser<'a, A>,
F: Fn(&A) -> bool,
{
move |env| {
if let Ok((next_state, output)) = parser.parse(env) {
move |arena: &'a Bump, state: &'a State<'a>, attempting| {
if let Ok((next_state, output)) = parser.parse(arena, state, attempting) {
if predicate(&output) {
return Ok((next_state, output));
}
}
Err(env.state.clone())
Err((state.clone(), attempting))
}
}
fn any<'a>(env: &'a Env) -> ParseResult<'a, char> {
let input = env.state.input;
fn any<'a>(arena: &'a Bump, state: &'a State<'a>, attempting: Attempting) -> ParseResult<'a, char> {
let input = state.input;
match input.chars().next() {
Some(ch) => {
@ -269,7 +272,7 @@ fn any<'a>(env: &'a Env) -> ParseResult<'a, char> {
let mut new_state = State {
input: &input[len..],
..env.state
..*state
};
if ch == '\n' {
@ -279,7 +282,7 @@ fn any<'a>(env: &'a Env) -> ParseResult<'a, char> {
Ok((new_state, ch))
}
_ => Err(env.state.clone()),
_ => Err((state.clone(), attempting)),
}
}
@ -296,3 +299,124 @@ pub enum Attempting {
Keyword,
}
// fn string_literal<'a>(arena: &'a Bump, state: &'a State<'a>, attempting: Attempting) -> Expr {
// between(char('"'), char('"'),
// zero_or_more(
// choice((
// // Handle the edge cases where the interpolation happens
// // to be at the very beginning of the string literal,
// // or immediately following the previous interpolation.
// attempt(string("\\("))
// .with(value("".to_string()))
// .and(located(ident()).skip(char(')'))),
// // Parse a bunch of non-interpolated characters until we hit \(
// one_or_more(string_body())
// .map(|chars: Vec<char>| chars.into_iter().collect::<String>())
// .and(choice((
// attempt(string("\\(").with(located(ident()).skip(char(')')))),
// // If we never encountered \( then we hit the end of
// // the string literal. Use empty Ident here because
// // we're going to pop this Ident off the array anyhow.
// located(value("".to_string()))
// ))),
// ))
// )
// .map(|mut pairs| {
// match pairs.pop() {
// None => Expr::EmptyStr,
// Some(( trailing_str, located_name )) => {
// let mut ident_pairs = pairs.into_iter().map(|(string, located_name)| {
// ( string, located_name.map(|name| Ident::Unqualified(name.clone())) )
// }).collect::<Vec<(String, Located<Ident>)>>();
// if located_name.value.is_empty() {
// if ident_pairs.is_empty() {
// // We didn't find any interpolation at all. This is a string literal!
// Expr::Str(trailing_str.to_string())
// } else {
// Expr::InterpolatedStr(ident_pairs, trailing_str.to_string())
// }
// } else {
// // This is an interpolated string where the interpolation
// // happened to occur at the very end of the literal.
// // Put the tuple back.
// ident_pairs.push((
// trailing_str,
// located_name.map(|name| Ident::Unqualified(name.clone()))
// ));
// Expr::InterpolatedStr(ident_pairs, "".to_string())
// }
// }
// }
// }))
// }
fn string_literal<'a>(arena: &'a Bump, state: &'a State<'a>, attempting: Attempting) -> impl Parser<'a, ()> {
move |arena: &'a Bump, state: &'a State<'a>, attempting| {
let input = state.input;
if input.first() != Ok('\n') {
return Err((input, attempting))
}
// match input.get(0..kw.len()) {
// Some(next) if next == kw => {
// let len = kw.len();
// Ok((State {
// input: &input[len..],
// column: state.column + len as u32,
// ..*state
// }, ()))
// },
// _ => Err((state.clone(), attempting)),
// }
// }
// parser(|input: &mut I| {
// let (parsed_char, consumed) = try!(any().parse_lazy(input).into());
// let mut escaped = satisfy_map(|escaped_char| {
// // NOTE! When modifying this, revisit char_body too!
// // Their implementations are similar but not the same.
// match escaped_char {
// '"' => Some('"'),
// '\\' => Some('\\'),
// 't' => Some('\t'),
// 'n' => Some('\n'),
// 'r' => Some('\r'),
// _ => None,
// }
// });
// match parsed_char {
// '\\' => {
// if look_ahead(char('(')).parse_stream(input).is_ok() {
// // If we hit a \( then we're doing string interpolation.
// // Bail out after consuming the backslash!
// Err(Consumed::Empty(I::Error::empty(input.position()).into()))
// } else {
// consumed.combine(|_| {
// // Try to parse basic backslash-escaped literals
// // e.g. \t, \n, \r
// escaped.parse_stream(input).or_else(|_|
// // If we didn't find any of those, try \u{...}
// unicode_code_pt().parse_stream(input)
// )
// })
// }
// },
// '"' => {
// // Never consume a double quote unless it was preceded by a
// // backslash. This means we're at the end of the string literal!
// Err(Consumed::Empty(I::Error::empty(input.position()).into()))
// },
// _ => Ok((parsed_char, consumed))
// }
// })
}
}