First pass at parsing case.

This commit is contained in:
Richard Feldman 2019-10-04 17:39:18 +03:00
parent ebaed27193
commit 59bdb21ea2
6 changed files with 179 additions and 30 deletions

View File

@ -752,7 +752,7 @@ fn canonicalize<'a>(
| ast::Expr::QualifiedField(_, _) | ast::Expr::QualifiedField(_, _)
| ast::Expr::AccessorFunction(_) | ast::Expr::AccessorFunction(_)
| ast::Expr::If(_) | ast::Expr::If(_)
| ast::Expr::Case(_) | ast::Expr::Case(_, _)
| ast::Expr::Variant(_, _) | ast::Expr::Variant(_, _)
| ast::Expr::MalformedIdent(_) | ast::Expr::MalformedIdent(_)
| ast::Expr::MalformedClosure | ast::Expr::MalformedClosure

View File

@ -70,7 +70,10 @@ pub enum Expr<'a> {
// Conditionals // Conditionals
If(&'a (Loc<Expr<'a>>, Loc<Expr<'a>>, Loc<Expr<'a>>)), If(&'a (Loc<Expr<'a>>, Loc<Expr<'a>>, Loc<Expr<'a>>)),
Case(&'a (Loc<Expr<'a>>, &'a [(Loc<Pattern<'a>>, Loc<Expr<'a>>)])), Case(
&'a Loc<Expr<'a>>,
Vec<'a, &'a (Loc<Pattern<'a>>, Loc<Expr<'a>>)>,
),
// Blank Space (e.g. comments, spaces, newlines) before or after an expression. // Blank Space (e.g. comments, spaces, newlines) before or after an expression.
// We preserve this for the formatter; canonicalization ignores it. // We preserve this for the formatter; canonicalization ignores it.
@ -310,7 +313,6 @@ fn pattern_size() {
/// "currently attempting to parse a list." This helps error messages! /// "currently attempting to parse a list." This helps error messages!
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Attempting { pub enum Attempting {
Expression,
List, List,
Keyword, Keyword,
StringLiteral, StringLiteral,
@ -325,6 +327,8 @@ pub enum Attempting {
Module, Module,
Record, Record,
Identifier, Identifier,
CaseCondition,
CaseBranch,
} }
impl<'a> Expr<'a> { impl<'a> Expr<'a> {

View File

@ -24,6 +24,7 @@ use bumpalo::collections::String;
use bumpalo::collections::Vec; use bumpalo::collections::Vec;
use bumpalo::Bump; use bumpalo::Bump;
use operator::Operator; use operator::Operator;
use parse;
use parse::ast::{Attempting, Def, Expr, Pattern, Spaceable}; use parse::ast::{Attempting, Def, Expr, Pattern, Spaceable};
use parse::blankspace::{ use parse::blankspace::{
space0, space0_after, space0_around, space0_before, space1, space1_around, space1_before, space0, space0_after, space0_around, space0_before, space1, space1_around, space1_before,
@ -32,11 +33,10 @@ use parse::ident::{ident, Ident, MaybeQualified};
use parse::number_literal::number_literal; use parse::number_literal::number_literal;
use parse::parser::{ use parse::parser::{
and, attempt, between, char, either, loc, map, map_with_arena, not, not_followed_by, one_of16, and, attempt, between, char, either, loc, map, map_with_arena, not, not_followed_by, one_of16,
one_of2, one_of4, one_of5, one_of9, one_or_more, optional, sep_by0, skip_first, skip_second, one_of2, one_of5, one_of9, one_or_more, optional, sep_by0, skip_first, skip_second, string,
string, then, unexpected, unexpected_eof, zero_or_more, Either, Fail, FailReason, ParseResult, then, unexpected, unexpected_eof, zero_or_more, Either, Fail, FailReason, ParseResult, Parser,
Parser, State, State,
}; };
use parse::string_literal::string_literal;
use region::Located; use region::Located;
// pub fn api<'a>() -> impl Parser<'a, Module<'a>> { // pub fn api<'a>() -> impl Parser<'a, Module<'a>> {
@ -121,7 +121,7 @@ fn parse_expr<'a>(min_indent: u16, arena: &'a Bump, state: State<'a>) -> ParseRe
}, },
); );
attempt(Attempting::Expression, expr_parser).parse(arena, state) expr_parser.parse(arena, state)
} }
pub fn loc_parenthetical_expr<'a>(min_indent: u16) -> impl Parser<'a, Located<Expr<'a>>> { pub fn loc_parenthetical_expr<'a>(min_indent: u16) -> impl Parser<'a, Located<Expr<'a>>> {
@ -290,7 +290,7 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>,
| Expr::AssignField(_, _) | Expr::AssignField(_, _)
| Expr::Defs(_) | Expr::Defs(_)
| Expr::If(_) | Expr::If(_)
| Expr::Case(_) | Expr::Case(_, _)
| Expr::MalformedClosure | Expr::MalformedClosure
| Expr::QualifiedField(_, _) => Err(Fail { | Expr::QualifiedField(_, _) => Err(Fail {
attempting: Attempting::Def, attempting: Attempting::Def,
@ -543,14 +543,19 @@ fn parse_closure_param<'a>(
} }
fn pattern<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> { fn pattern<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
one_of4( one_of5(
underscore_pattern(), underscore_pattern(),
variant_pattern(), variant_pattern(),
ident_pattern(), ident_pattern(),
record_destructure(min_indent), record_destructure(min_indent),
string_pattern(),
) )
} }
fn string_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
map(parse::string_literal::parse(), Pattern::StrLiteral)
}
fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> { fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
map(char('_'), |_| Pattern::Underscore) map(char('_'), |_| Pattern::Underscore)
} }
@ -576,10 +581,115 @@ fn ident_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
map(unqualified_ident(), Pattern::Identifier) map(unqualified_ident(), Pattern::Identifier)
} }
pub fn case_expr<'a>(_min_indent: u16) -> impl Parser<'a, Expr<'a>> { pub fn case_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
map(string(keyword::CASE), |_| { then(
panic!("TODO implement WHEN"); and(
}) case_with_indent(),
attempt(
Attempting::CaseCondition,
skip_second(
space1_around(
loc(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent,
),
string(keyword::WHEN),
),
),
),
move |arena, state, (case_indent, loc_condition)| {
if case_indent < min_indent {
panic!("TODO case wasns't indented enough");
}
// Everything in the branches must be indented at least as much as the case itself.
let min_indent = case_indent;
let (branches, state) =
attempt(Attempting::CaseBranch, case_branches(min_indent)).parse(arena, state)?;
Ok((Expr::Case(arena.alloc(loc_condition), branches), state))
},
)
}
pub fn case_branches<'a>(
min_indent: u16,
) -> impl Parser<'a, Vec<'a, &'a (Located<Pattern<'a>>, Located<Expr<'a>>)>> {
move |arena, state| {
let mut branches: Vec<'a, &'a (Located<Pattern<'a>>, Located<Expr<'a>>)> =
Vec::with_capacity_in(2, arena);
// 1. Parse the first branch and get its indentation level. (It must be >= min_indent.)
// 2. Parse the other branches. Their indentation levels must be == the first branch's.
let (mut loc_first_pattern, state) =
space1_before(loc(pattern(min_indent)), min_indent).parse(arena, state)?;
let original_indent = state.indent_col;
let indented_more = original_indent + 1;
let (spaces_before_arrow, state) = space0(min_indent).parse(arena, state)?;
// Record the spaces before the first "->", if any.
if !spaces_before_arrow.is_empty() {
let region = loc_first_pattern.region;
let value =
Pattern::SpaceAfter(arena.alloc(loc_first_pattern.value), spaces_before_arrow);
loc_first_pattern = Located { region, value };
};
// Parse the first "->" and the expression after it.
let (loc_first_expr, mut state) = skip_first(
string("->"),
// The expr must be indented more than the pattern preceding it
space0_before(
loc(move |arena, state| parse_expr(indented_more, arena, state)),
indented_more,
),
)
.parse(arena, state)?;
// Record this as the first branch, then optionally parse additional branches.
branches.push(arena.alloc((loc_first_pattern, loc_first_expr)));
let branch_parser = and(
then(
space1_around(loc(pattern(min_indent)), min_indent),
move |_arena, state, loc_pattern| {
if state.indent_col == original_indent {
Ok((loc_pattern, state))
} else {
panic!(
"TODO additional branch didn't have same indentation as first branch"
);
}
},
),
skip_first(
string("->"),
space1_before(
loc(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent,
),
),
);
loop {
match branch_parser.parse(arena, state) {
Ok((next_output, next_state)) => {
state = next_state;
branches.push(arena.alloc(next_output));
}
Err((_, old_state)) => {
state = old_state;
break;
}
}
}
Ok((branches, state))
}
} }
pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> { pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
@ -716,6 +826,14 @@ pub fn equals_with_indent<'a>() -> impl Parser<'a, u16> {
} }
} }
pub fn case_with_indent<'a>() -> impl Parser<'a, u16> {
move |arena, state: State<'a>| {
string(keyword::CASE)
.parse(arena, state)
.map(|((), state)| ((state.indent_col, state)))
}
}
fn ident_to_expr<'a>(src: Ident<'a>) -> Expr<'a> { fn ident_to_expr<'a>(src: Ident<'a>) -> Expr<'a> {
match src { match src {
Ident::Var(info) => Expr::Var(info.module_parts, info.value), Ident::Var(info) => Expr::Var(info.module_parts, info.value),
@ -860,6 +978,10 @@ fn unqualified_ident<'a>() -> impl Parser<'a, &'a str> {
variant_or_ident(|first_char| first_char.is_lowercase()) variant_or_ident(|first_char| first_char.is_lowercase())
} }
pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
map(parse::string_literal::parse(), Expr::Str)
}
fn variant_or_ident<'a, F>(pred: F) -> impl Parser<'a, &'a str> fn variant_or_ident<'a, F>(pred: F) -> impl Parser<'a, &'a str>
where where
F: Fn(char) -> bool, F: Fn(char) -> bool,

View File

@ -1,9 +1,9 @@
use bumpalo::Bump; use bumpalo::Bump;
use parse::ast::{Attempting, Expr}; use parse::ast::Attempting;
use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State}; use parse::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
use std::char; use std::char;
pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { pub fn parse<'a>() -> impl Parser<'a, &'a str> {
move |arena: &'a Bump, state: State<'a>| { move |arena: &'a Bump, state: State<'a>| {
let mut chars = state.input.chars(); let mut chars = state.input.chars();
@ -36,23 +36,23 @@ pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
// Potentially end the string (unless this is an escaped `"`!) // Potentially end the string (unless this is an escaped `"`!)
if ch == '"' && prev_ch != '\\' { if ch == '"' && prev_ch != '\\' {
let expr = if parsed_chars == 2 { let string = if parsed_chars == 2 {
if let Some('"') = chars.next() { if let Some('"') = chars.next() {
// If the first three chars were all `"`, then this // If the first three chars were all `"`, then this
// literal begins with `"""` and is a block string. // literal begins with `"""` and is a block string.
return parse_block_string(arena, state, &mut chars); return parse_block_string(arena, state, &mut chars);
} else { } else {
Expr::Str("") ""
} }
} else { } else {
// Start at 1 so we omit the opening `"`. // Start at 1 so we omit the opening `"`.
// Subtract 1 from parsed_chars so we omit the closing `"`. // Subtract 1 from parsed_chars so we omit the closing `"`.
Expr::Str(&state.input[1..(parsed_chars - 1)]) &state.input[1..(parsed_chars - 1)]
}; };
let next_state = state.advance_without_indenting(parsed_chars)?; let next_state = state.advance_without_indenting(parsed_chars)?;
return Ok((expr, next_state)); return Ok((string, next_state));
} else if ch == '\n' { } else if ch == '\n' {
// This is a single-line string, which cannot have newlines! // This is a single-line string, which cannot have newlines!
// Treat this as an unclosed string literal, and consume // Treat this as an unclosed string literal, and consume
@ -83,7 +83,7 @@ fn parse_block_string<'a, I>(
_arena: &'a Bump, _arena: &'a Bump,
_state: State<'a>, _state: State<'a>,
_chars: &mut I, _chars: &mut I,
) -> ParseResult<'a, Expr<'a>> ) -> ParseResult<'a, &'a str>
where where
I: Iterator<Item = char>, I: Iterator<Item = char>,
{ {

View File

@ -13,17 +13,14 @@ use roc::ident::Ident;
use roc::parse; use roc::parse;
use roc::parse::ast::{self, Attempting}; use roc::parse::ast::{self, Attempting};
use roc::parse::blankspace::space0_before; use roc::parse::blankspace::space0_before;
use roc::parse::parser::{attempt, loc, map, Fail, Parser, State}; use roc::parse::parser::{loc, map, Fail, Parser, State};
use roc::region::{Located, Region}; use roc::region::{Located, Region};
pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>, Fail> { pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>, Fail> {
let state = State::new(&input, Attempting::Module); let state = State::new(&input, Attempting::Module);
let parser = attempt( let parser = map(space0_before(loc(parse::expr(0)), 0), |loc_expr| {
Attempting::Expression, loc_expr.value
map(space0_before(loc(parse::expr(0)), 0), |loc_expr| { });
loc_expr.value
}),
);
let answer = parser.parse(&arena, state); let answer = parser.parse(&arena, state);
answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail) answer.map(|(expr, _)| expr).map_err(|(fail, _)| fail)

View File

@ -111,7 +111,7 @@ mod test_parse {
#[test] #[test]
fn empty_source_file() { fn empty_source_file() {
assert_parsing_fails("", FailReason::Eof(Region::zero()), Attempting::Expression); assert_parsing_fails("", FailReason::Eof(Region::zero()), Attempting::Module);
} }
#[test] #[test]
@ -131,7 +131,7 @@ mod test_parse {
assert_parsing_fails( assert_parsing_fails(
&too_long_str, &too_long_str,
FailReason::LineTooLong(0), FailReason::LineTooLong(0),
Attempting::Expression, Attempting::Module,
); );
} }
@ -818,6 +818,32 @@ mod test_parse {
); );
} }
// CASE
// #[test]
// fn two_branch_case() {
// let arena = Bump::new();
// let module_parts = Vec::new_in(&arena).into_bump_slice();
// let arg1 = Located::new(0, 0, 2, 3, Var(module_parts, "b"));
// let arg2 = Located::new(0, 0, 4, 5, Var(module_parts, "c"));
// let arg3 = Located::new(0, 0, 6, 7, Var(module_parts, "d"));
// let args = bumpalo::vec![in &arena; arg1, arg2, arg3];
// let tuple = arena.alloc((Located::new(0, 0, 0, 1, Var(module_parts, "a")), args));
// let expected = Expr::Apply(tuple);
// let actual = parse_with(
// &arena,
// indoc!(
// r#"
// case foo bar baz when
// "blah" -> foo a b
// "mise" -> bar c d
// "#
// ),
// );
// assert_eq!(Ok(expected), actual);
// }
// TODO test hex/oct/binary parsing // TODO test hex/oct/binary parsing
// //
// TODO test for \t \r and \n in string literals *outside* unicode escape sequence! // TODO test for \t \r and \n in string literals *outside* unicode escape sequence!