Merge pull request #432 from rtfeldman/utf8

Lazily validate UTF-8 when parsing
This commit is contained in:
Richard Feldman 2020-07-27 01:59:44 -04:00 committed by GitHub
commit 8b3dd6c90c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 1024 additions and 833 deletions

7
Cargo.lock generated
View File

@ -527,6 +527,12 @@ version = "1.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "env_logger"
version = "0.6.2"
@ -2229,6 +2235,7 @@ name = "roc_parse"
version = "0.1.0"
dependencies = [
"bumpalo",
"encode_unicode",
"indoc",
"inlinable_string",
"pretty_assertions",

View File

@ -33,6 +33,7 @@ use roc_types::types::Type;
use std::hash::Hash;
use std::io::{self, Write};
use std::path::PathBuf;
use std::str::from_utf8_unchecked;
use target_lexicon::Triple;
pub fn main() -> io::Result<()> {
@ -145,7 +146,7 @@ fn report_parse_error(fail: Fail) {
}
fn print_output(src: &str) -> Result<String, Fail> {
gen(src, Triple::host(), OptLevel::Normal).map(|(answer, answer_type)| {
gen(src.as_bytes(), Triple::host(), OptLevel::Normal).map(|(answer, answer_type)| {
format!("\n{} \u{001b}[35m:\u{001b}[0m {}", answer, answer_type)
})
}
@ -154,7 +155,7 @@ pub fn repl_home() -> ModuleId {
ModuleIds::default().get_or_insert(&"REPL".into())
}
pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, String), Fail> {
pub fn gen(src: &[u8], target: Triple, opt_level: OptLevel) -> Result<(String, String), Fail> {
use roc_reporting::report::{can_problem, type_problem, RocDocAllocator, DEFAULT_PALETTE};
// Look up the types and expressions of the `provided` values
@ -169,13 +170,16 @@ pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, St
interns,
problems: can_problems,
..
} = can_expr(src)?;
} = can_expr(src)?; // IMPORTANT: we must bail out here if there were UTF-8 errors!
let subs = Subs::new(var_store.into());
let mut type_problems = Vec::new();
let (content, mut subs) = infer_expr(subs, &mut type_problems, &constraint, var);
// SAFETY: we've already verified that this is valid UTF-8 during parsing.
let src_lines: Vec<&str> = unsafe { from_utf8_unchecked(src).split('\n').collect() };
// Report problems
let src_lines: Vec<&str> = src.split('\n').collect();
let palette = DEFAULT_PALETTE;
// Report parsing and canonicalization problems
@ -386,8 +390,11 @@ pub fn infer_expr(
(content, solved.into_inner())
}
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
pub fn parse_loc_with<'a>(
arena: &'a Bump,
bytes: &'a [u8],
) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&bytes, Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);
@ -396,14 +403,14 @@ pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast
.map_err(|(fail, _)| fail)
}
pub fn can_expr(expr_str: &str) -> Result<CanExprOut, Fail> {
can_expr_with(&Bump::new(), repl_home(), expr_str)
pub fn can_expr(expr_bytes: &[u8]) -> Result<CanExprOut, Fail> {
can_expr_with(&Bump::new(), repl_home(), expr_bytes)
}
// TODO make this return a named struct instead of a big tuple
#[allow(clippy::type_complexity)]
pub fn uniq_expr(
expr_str: &str,
expr_bytes: &[u8],
) -> Result<
(
Located<roc_can::expr::Expr>,
@ -419,14 +426,14 @@ pub fn uniq_expr(
> {
let declared_idents: &ImMap<Ident, (Symbol, Region)> = &ImMap::default();
uniq_expr_with(&Bump::new(), expr_str, declared_idents)
uniq_expr_with(&Bump::new(), expr_bytes, declared_idents)
}
// TODO make this return a named struct instead of a big tuple
#[allow(clippy::type_complexity)]
pub fn uniq_expr_with(
arena: &Bump,
expr_str: &str,
expr_bytes: &[u8],
declared_idents: &ImMap<Ident, (Symbol, Region)>,
) -> Result<
(
@ -450,7 +457,7 @@ pub fn uniq_expr_with(
var,
interns,
..
} = can_expr_with(arena, home, expr_str)?;
} = can_expr_with(arena, home, expr_bytes)?;
// double check
let mut var_store = VarStore::new(old_var_store.fresh());
@ -505,8 +512,8 @@ pub struct CanExprOut {
pub constraint: Constraint,
}
pub fn can_expr_with(arena: &Bump, home: ModuleId, expr_str: &str) -> Result<CanExprOut, Fail> {
let loc_expr = parse_loc_with(&arena, expr_str)?;
pub fn can_expr_with(arena: &Bump, home: ModuleId, expr_bytes: &[u8]) -> Result<CanExprOut, Fail> {
let loc_expr = parse_loc_with(&arena, expr_bytes)?;
let mut var_store = VarStore::default();
let var = var_store.fresh();
let expected = Expected::NoExpectation(Type::Variable(var));

View File

@ -27,7 +27,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View File

@ -20,7 +20,7 @@ mod test_fmt {
use roc_parse::parser::{Fail, Parser, State};
fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc!(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);
@ -55,7 +55,7 @@ mod test_fmt {
let src = src.trim_end();
let expected = expected.trim_end();
match module::header().parse(&arena, State::new(&src, Attempting::Module)) {
match module::header().parse(&arena, State::new(src.as_bytes(), Attempting::Module)) {
Ok((actual, state)) => {
let mut buf = String::new_in(&arena);

View File

@ -87,7 +87,7 @@ pub fn infer_expr(
}
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View File

@ -19,9 +19,10 @@ use roc_solve::solve;
use roc_types::solved_types::Solved;
use roc_types::subs::{Subs, VarStore, Variable};
use std::collections::{HashMap, HashSet};
use std::fs::read_to_string;
use std::fs;
use std::io;
use std::path::{Path, PathBuf};
use std::str::from_utf8_unchecked;
use std::sync::{Arc, Mutex};
use tokio::sync::mpsc;
use tokio::task::spawn_blocking;
@ -63,7 +64,7 @@ struct ModuleHeader {
imported_modules: MutSet<ModuleId>,
exposes: Vec<Symbol>,
exposed_imports: MutMap<Ident, (Symbol, Region)>,
src: Box<str>,
src: Box<[u8]>,
}
#[derive(Debug)]
@ -526,58 +527,70 @@ fn load_module(
load_filename(filename, msg_tx, module_ids)
}
fn parse_src(
filename: PathBuf,
msg_tx: MsgSender,
module_ids: SharedModules<'_, '_>,
src_bytes: &[u8],
) -> Result<ModuleId, LoadingProblem> {
let state = State::new(src_bytes, Attempting::Module);
let arena = Bump::new();
// TODO figure out if there's a way to address this clippy error
// without introducing a borrow error. ("let and return" is literally
// what the borrow checker suggested using here to fix the problem, so...)
#[allow(clippy::let_and_return)]
let answer = match roc_parse::module::header().parse(&arena, state) {
Ok((ast::Module::Interface { header }, state)) => {
let module_id = send_header(
header.name,
header.exposes.into_bump_slice(),
header.imports.into_bump_slice(),
state,
module_ids,
msg_tx,
);
Ok(module_id)
}
Ok((ast::Module::App { header }, state)) => match module_ids {
MaybeShared::Shared(_, _) => {
// If this is Shared, it means we're trying to import
// an app module which is not the root. Not alllowed!
Err(LoadingProblem::TriedToImportAppModule)
}
unique_modules @ MaybeShared::Unique(_, _) => {
let module_id = send_header(
header.name,
header.provides.into_bump_slice(),
header.imports.into_bump_slice(),
state,
unique_modules,
msg_tx,
);
Ok(module_id)
}
},
Err((fail, _)) => Err(LoadingProblem::ParsingFailed { filename, fail }),
};
answer
}
/// Load a module by its filename
///
/// This has two unsafe calls:
///
/// * memory map the filename instead of doing a buffered read
/// * assume the contents of the file are valid UTF-8
fn load_filename(
filename: PathBuf,
msg_tx: MsgSender,
module_ids: SharedModules<'_, '_>,
) -> Result<ModuleId, LoadingProblem> {
match read_to_string(&filename) {
Ok(src) => {
let arena = Bump::new();
let state = State::new(&src, Attempting::Module);
// TODO figure out if there's a way to address this clippy error
// without introducing a borrow error. ("let and return" is literally
// what the borrow checker suggested using here to fix the problem, so...)
#[allow(clippy::let_and_return)]
let answer = match roc_parse::module::header().parse(&arena, state) {
Ok((ast::Module::Interface { header }, state)) => {
let module_id = send_header(
header.name,
header.exposes.into_bump_slice(),
header.imports.into_bump_slice(),
state,
module_ids,
msg_tx,
);
Ok(module_id)
}
Ok((ast::Module::App { header }, state)) => match module_ids {
MaybeShared::Shared(_, _) => {
// If this is Shared, it means we're trying to import
// an app module which is not the root. Not alllowed!
Err(LoadingProblem::TriedToImportAppModule)
}
unique_modules @ MaybeShared::Unique(_, _) => {
let module_id = send_header(
header.name,
header.provides.into_bump_slice(),
header.imports.into_bump_slice(),
state,
unique_modules,
msg_tx,
);
Ok(module_id)
}
},
Err((fail, _)) => Err(LoadingProblem::ParsingFailed { filename, fail }),
};
answer
}
match fs::read(&filename) {
Ok(bytes) => parse_src(filename, msg_tx, module_ids, bytes.as_ref()),
Err(err) => Err(LoadingProblem::FileProblem {
filename,
error: err.kind(),
@ -746,7 +759,7 @@ fn send_header<'a>(
// Box up the input &str for transfer over the wire.
// We'll need this in order to continue parsing later.
let src: Box<str> = state.input.to_string().into();
let src: Box<[u8]> = state.bytes.into();
// Send the deps to the coordinator thread for processing,
// then continue on to parsing and canonicalizing defs.
@ -961,7 +974,7 @@ fn parse_and_constrain(
let (parsed_defs, _) = module_defs()
.parse(&arena, state)
.expect("TODO gracefully handle parse error on module defs");
.expect("TODO gracefully handle parse error on module defs. IMPORTANT: Bail out entirely if there are any BadUtf8 problems! That means the whole source file is not valid UTF-8 and any other errors we report may get mis-reported. We rely on this for safety in an `unsafe` block later on in this function.");
let (module, declarations, ident_ids, constraint, problems) = match canonicalize_module_defs(
&arena,
@ -1001,9 +1014,13 @@ fn parse_and_constrain(
}
};
let src = header.src;
let imported_modules = header.imported_modules;
// SAFETY: By this point we've already incrementally verified that there
// are no UTF-8 errors in these bytes. If there had been any UTF-8 errors,
// we'd have bailed out before now.
let src: Box<str> = unsafe { from_utf8_unchecked(header.src.as_ref()).to_string().into() };
tokio::spawn(async move {
let mut tx = msg_tx;

View File

@ -92,7 +92,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View File

@ -53,7 +53,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View File

@ -11,6 +11,7 @@ roc_region = { path = "../region" }
roc_module = { path = "../module" }
bumpalo = { version = "3.2", features = ["collections"] }
inlinable_string = "0.1"
encode_unicode = "0.3"
[dev-dependencies]
pretty_assertions = "0.5.1"

View File

@ -1,6 +1,8 @@
use crate::ast::CommentOrNewline::{self, *};
use crate::ast::Spaceable;
use crate::parser::{self, and, unexpected, unexpected_eof, Parser, State};
use crate::parser::{
self, and, peek_utf8_char, unexpected, unexpected_eof, FailReason, Parser, State,
};
use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
@ -216,147 +218,179 @@ fn spaces<'a>(
) -> impl Parser<'a, &'a [CommentOrNewline<'a>]> {
move |arena: &'a Bump, state: State<'a>| {
let original_state = state.clone();
let chars = state.input.chars().peekable();
let mut space_list = Vec::new_in(arena);
let mut chars_parsed = 0;
let mut bytes_parsed = 0;
let mut comment_line_buf = String::new_in(arena);
let mut line_state = LineState::Normal;
let mut state = state;
let mut any_newlines = false;
for ch in chars {
chars_parsed += 1;
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, utf8_len)) => {
bytes_parsed += utf8_len;
match line_state {
LineState::Normal => {
match ch {
' ' => {
// Don't check indentation here; it might not be enough
// indentation yet, but maybe it will be after more spaces happen!
state = state.advance_spaces(1)?;
}
'\r' => {
// Ignore carriage returns.
state = state.advance_spaces(1)?;
}
'\n' => {
// No need to check indentation because we're about to reset it anyway.
state = state.newline()?;
match line_state {
LineState::Normal => {
match ch {
' ' => {
// Don't check indentation here; it might not be enough
// indentation yet, but maybe it will be after more spaces happen!
state = state.advance_spaces(1)?;
}
'\r' => {
// Ignore carriage returns.
state = state.advance_spaces(1)?;
}
'\n' => {
// No need to check indentation because we're about to reset it anyway.
state = state.newline()?;
// Newlines only get added to the list when they're outside comments.
space_list.push(Newline);
// Newlines only get added to the list when they're outside comments.
space_list.push(Newline);
any_newlines = true;
}
'#' => {
// Check indentation to make sure we were indented enough
// before this comment began.
state = state
.check_indent(min_indent)
.map_err(|(fail, _)| (fail, original_state.clone()))?
.advance_without_indenting(1)?;
// We're now parsing a line comment!
line_state = LineState::Comment;
}
nonblank => {
return if require_at_least_one && chars_parsed <= 1 {
// We've parsed 1 char and it was not a space,
// but we require parsing at least one space!
Err(unexpected(nonblank, 0, state.clone(), state.attempting))
} else {
// First make sure we were indented enough!
//
// (We only do this if we've encountered any newlines.
// Otherwise, we assume indentation is already correct.
// It's actively important for correctness that we skip
// this check if there are no newlines, because otherwise
// we would have false positives for single-line defs.)
if any_newlines {
any_newlines = true;
}
'#' => {
// Check indentation to make sure we were indented enough
// before this comment began.
state = state
.check_indent(min_indent)
.map_err(|(fail, _)| (fail, original_state))?;
.map_err(|(fail, _)| (fail, original_state.clone()))?
.advance_without_indenting(1)?;
// We're now parsing a line comment!
line_state = LineState::Comment;
}
Ok((space_list.into_bump_slice(), state))
};
}
}
}
LineState::Comment => {
match ch {
' ' => {
// If we're in a line comment, this won't affect indentation anyway.
state = state.advance_without_indenting(1)?;
if comment_line_buf.len() == 1 {
match comment_line_buf.chars().next() {
Some('#') => {
// This is a comment begining with `## ` - that is,
// a doc comment.
_ => {
return if require_at_least_one && bytes_parsed <= 1 {
// We've parsed 1 char and it was not a space,
// but we require parsing at least one space!
Err(unexpected(0, state.clone(), state.attempting))
} else {
// First make sure we were indented enough!
//
// (The space is important; otherwise, this is not
// a doc comment, but rather something like a
// big separator block, e.g. ############)
line_state = LineState::DocComment;
// (We only do this if we've encountered any newlines.
// Otherwise, we assume indentation is already correct.
// It's actively important for correctness that we skip
// this check if there are no newlines, because otherwise
// we would have false positives for single-line defs.)
if any_newlines {
state = state
.check_indent(min_indent)
.map_err(|(fail, _)| (fail, original_state))?;
}
// This is now the beginning of the doc comment.
comment_line_buf.clear();
}
_ => {
Ok((space_list.into_bump_slice(), state))
};
}
}
}
LineState::Comment => {
match ch {
' ' => {
// If we're in a line comment, this won't affect indentation anyway.
state = state.advance_without_indenting(1)?;
if comment_line_buf.len() == 1 {
match comment_line_buf.chars().next() {
Some('#') => {
// This is a comment begining with `## ` - that is,
// a doc comment.
//
// (The space is important; otherwise, this is not
// a doc comment, but rather something like a
// big separator block, e.g. ############)
line_state = LineState::DocComment;
// This is now the beginning of the doc comment.
comment_line_buf.clear();
}
_ => {
comment_line_buf.push(ch);
}
}
} else {
comment_line_buf.push(ch);
}
}
} else {
comment_line_buf.push(ch);
'\n' => {
state = state.newline()?;
// This was a newline, so end this line comment.
space_list.push(LineComment(comment_line_buf.into_bump_str()));
comment_line_buf = String::new_in(arena);
line_state = LineState::Normal;
}
nonblank => {
// Chars can have btye lengths of more than 1!
state = state.advance_without_indenting(nonblank.len_utf8())?;
comment_line_buf.push(nonblank);
}
}
}
'\n' => {
state = state.newline()?;
LineState::DocComment => {
match ch {
' ' => {
// If we're in a doc comment, this won't affect indentation anyway.
state = state.advance_without_indenting(1)?;
// This was a newline, so end this line comment.
space_list.push(LineComment(comment_line_buf.into_bump_str()));
comment_line_buf = String::new_in(arena);
comment_line_buf.push(ch);
}
'\n' => {
state = state.newline()?;
line_state = LineState::Normal;
}
nonblank => {
// Chars can have btye lengths of more than 1!
state = state.advance_without_indenting(nonblank.len_utf8())?;
// This was a newline, so end this doc comment.
space_list.push(DocComment(comment_line_buf.into_bump_str()));
comment_line_buf = String::new_in(arena);
comment_line_buf.push(nonblank);
line_state = LineState::Normal;
}
nonblank => {
state = state.advance_without_indenting(utf8_len)?;
comment_line_buf.push(nonblank);
}
}
}
}
}
LineState::DocComment => {
match ch {
' ' => {
// If we're in a doc comment, this won't affect indentation anyway.
state = state.advance_without_indenting(1)?;
Err(FailReason::BadUtf8) => {
// If we hit an invalid UTF-8 character, bail out immediately.
return state.fail(FailReason::BadUtf8);
}
Err(_) => {
if require_at_least_one && bytes_parsed == 0 {
return Err(unexpected_eof(0, state.attempting, state));
} else {
let space_slice = space_list.into_bump_slice();
comment_line_buf.push(ch);
// First make sure we were indented enough!
//
// (We only do this if we've encountered any newlines.
// Otherwise, we assume indentation is already correct.
// It's actively important for correctness that we skip
// this check if there are no newlines, because otherwise
// we would have false positives for single-line defs.)
if any_newlines {
return Ok((
space_slice,
state
.check_indent(min_indent)
.map_err(|(fail, _)| (fail, original_state))?,
));
}
'\n' => {
state = state.newline()?;
// This was a newline, so end this doc comment.
space_list.push(DocComment(comment_line_buf.into_bump_str()));
comment_line_buf = String::new_in(arena);
line_state = LineState::Normal;
}
nonblank => {
// Chars can have btye lengths of more than 1!
state = state.advance_without_indenting(nonblank.len_utf8())?;
comment_line_buf.push(nonblank);
}
return Ok((space_slice, state));
}
}
}
};
}
if require_at_least_one && chars_parsed == 0 {
// If we didn't parse anything, return unexpected EOF
if require_at_least_one && original_state.bytes.len() == state.bytes.len() {
Err(unexpected_eof(0, state.attempting, state))
} else {
// First make sure we were indented enough!

View File

@ -8,8 +8,8 @@ use crate::ident::{global_tag_or_ident, ident, lowercase_ident, Ident};
use crate::keyword;
use crate::number_literal::number_literal;
use crate::parser::{
self, allocated, char, fail, not, not_followed_by, optional, sep_by1, string, then, unexpected,
unexpected_eof, Either, Fail, FailReason, ParseResult, Parser, State,
self, allocated, ascii_char, ascii_string, fail, not, not_followed_by, optional, sep_by1, then,
unexpected, unexpected_eof, Either, Fail, FailReason, ParseResult, Parser, State,
};
use crate::type_annotation;
use bumpalo::collections::string::String;
@ -22,7 +22,7 @@ pub fn expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
// Recursive parsers must not directly invoke functions which return (impl Parser),
// as this causes rustc to stack overflow. Thus, parse_expr must be a
// separate function which recurses by calling itself directly.
move |arena, state| parse_expr(min_indent, arena, state)
move |arena, state: State<'a>| parse_expr(min_indent, arena, state)
}
macro_rules! loc_parenthetical_expr {
@ -30,7 +30,7 @@ macro_rules! loc_parenthetical_expr {
then(
loc!(and!(
between!(
char('('),
ascii_char('(' ),
map_with_arena!(
space0_around(
loc!(move |arena, state| parse_expr($min_indent, arena, state)),
@ -43,7 +43,7 @@ macro_rules! loc_parenthetical_expr {
}
}
),
char(')')
ascii_char(')' )
),
optional(either!(
// There may optionally be function args after the ')'
@ -59,7 +59,7 @@ macro_rules! loc_parenthetical_expr {
// as if there were any args they'd have consumed it anyway
// e.g. in `((foo bar) baz.blah)` the `.blah` will be consumed by the `baz` parser
either!(
one_or_more!(skip_first!(char('.'), lowercase_ident())),
one_or_more!(skip_first!(ascii_char('.' ), lowercase_ident())),
and!(space0($min_indent), equals_with_indent())
)
))
@ -170,7 +170,7 @@ pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
one_of!(
map_with_arena!(
and!(
loc!(char('!')),
loc!(ascii_char('!')),
loc!(move |arena, state| parse_expr(min_indent, arena, state))
),
|arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located<Expr<'a>>)| {
@ -179,7 +179,7 @@ pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
),
map_with_arena!(
and!(
loc!(char('-')),
loc!(ascii_char('-')),
loc!(move |arena, state| parse_expr(min_indent, arena, state))
),
|arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located<Expr<'a>>)| {
@ -450,9 +450,9 @@ pub fn loc_parenthetical_def<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
let (loc_tuple, state) = loc!(and!(
space0_after(
between!(
char('('),
ascii_char('('),
space0_around(loc_pattern(min_indent), min_indent),
char(')')
ascii_char(')')
),
min_indent,
),
@ -482,7 +482,7 @@ pub fn loc_parenthetical_def<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
/// The '=' used in a def can't be followed by another '=' (or else it's actually
/// an "==") and also it can't be followed by '>' (or else it's actually an "=>")
fn equals_for_def<'a>() -> impl Parser<'a, ()> {
not_followed_by(char('='), one_of!(char('='), char('>')))
not_followed_by(ascii_char('='), one_of!(ascii_char('='), ascii_char('>')))
}
/// A definition, consisting of one of these:
@ -513,7 +513,7 @@ pub fn def<'a>(min_indent: u16) -> impl Parser<'a, Def<'a>> {
),
// Annotation
skip_first!(
char(':'),
ascii_char(':'),
// Spaces after the ':' (at a normal indentation level) and then the type.
// The type itself must be indented more than the pattern and ':'
space0_before(type_annotation::located(indented_more), indented_more)
@ -811,12 +811,12 @@ fn loc_parse_function_arg<'a>(
fn reserved_keyword<'a>() -> impl Parser<'a, ()> {
one_of!(
string(keyword::IF),
string(keyword::THEN),
string(keyword::ELSE),
string(keyword::WHEN),
string(keyword::IS),
string(keyword::AS)
ascii_string(keyword::IF),
ascii_string(keyword::THEN),
ascii_string(keyword::ELSE),
ascii_string(keyword::WHEN),
ascii_string(keyword::IS),
ascii_string(keyword::AS)
)
}
@ -824,7 +824,7 @@ fn closure<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
map_with_arena!(
skip_first!(
// All closures start with a '\' - e.g. (\x -> x + 1)
char('\\'),
ascii_char('\\'),
// Once we see the '\', we're committed to parsing this as a closure.
// It may turn out to be malformed, but it is definitely a closure.
optional(and!(
@ -833,13 +833,13 @@ fn closure<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
Attempting::ClosureParams,
// Params are comma-separated
sep_by1(
char(','),
ascii_char(','),
space0_around(loc_closure_param(min_indent), min_indent)
)
),
skip_first!(
// Parse the -> which separates params from body
string("->"),
ascii_string("->"),
// Parse the body
attempt!(
Attempting::ClosureBody,
@ -877,9 +877,9 @@ fn parse_closure_param<'a>(
// If you wrap it in parens, you can match any arbitrary pattern at all.
// e.g. \User.UserId userId -> ...
between!(
char('('),
ascii_char('('),
space0_around(loc_pattern(min_indent), min_indent),
char(')')
ascii_char(')')
)
)
.parse(arena, state)
@ -903,9 +903,9 @@ fn loc_pattern<'a>(min_indent: u16) -> impl Parser<'a, Located<Pattern<'a>>> {
fn loc_parenthetical_pattern<'a>(min_indent: u16) -> impl Parser<'a, Located<Pattern<'a>>> {
between!(
char('('),
ascii_char('('),
move |arena, state| loc_pattern(min_indent).parse(arena, state),
char(')')
ascii_char(')')
)
}
@ -923,13 +923,13 @@ fn string_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
}
fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
map!(char('_'), |_| Pattern::Underscore)
map!(ascii_char('_'), |_| Pattern::Underscore)
}
fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
then(
collection!(
char('{'),
ascii_char('{'),
move |arena: &'a bumpalo::Bump,
state: crate::parser::State<'a>|
-> crate::parser::ParseResult<'a, Located<crate::ast::Pattern<'a>>> {
@ -947,10 +947,13 @@ fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
// (This is true in both literals and types.)
let (opt_loc_val, state) = crate::parser::optional(either!(
skip_first!(
char(':'),
ascii_char(':'),
space0_before(loc_pattern(min_indent), min_indent)
),
skip_first!(char('?'), space0_before(loc!(expr(min_indent)), min_indent))
skip_first!(
ascii_char('?'),
space0_before(loc!(expr(min_indent)), min_indent)
)
))
.parse(arena, state)?;
@ -987,8 +990,8 @@ fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
Ok((answer, state))
},
char(','),
char('}'),
ascii_char(','),
ascii_char('}'),
min_indent
),
move |_arena, state, loc_patterns| {
@ -1109,7 +1112,7 @@ mod when {
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent,
),
string(keyword::IS)
ascii_string(keyword::IS)
)
)
),
@ -1132,7 +1135,7 @@ mod when {
/// Parsing when with indentation.
fn when_with_indent<'a>() -> impl Parser<'a, u16> {
move |arena, state: State<'a>| {
string(keyword::WHEN)
ascii_string(keyword::WHEN)
.parse(arena, state)
.map(|((), state)| (state.indent_col, state))
}
@ -1185,7 +1188,7 @@ mod when {
}
);
loop {
while !state.bytes.is_empty() {
match branch_parser.parse(arena, state) {
Ok((next_output, next_state)) => {
state = next_state;
@ -1210,11 +1213,11 @@ mod when {
) -> impl Parser<'a, (Vec<'a, Located<Pattern<'a>>>, Option<Located<Expr<'a>>>)> {
and!(
sep_by1(
char('|'),
ascii_char('|'),
space0_around(loc_pattern(min_indent), min_indent),
),
optional(skip_first!(
string(keyword::IF),
ascii_string(keyword::IF),
// TODO we should require space before the expression but not after
space1_around(
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
@ -1240,7 +1243,7 @@ mod when {
/// Parsing the righthandside of a branch in a when conditional.
fn branch_result<'a>(indent: u16) -> impl Parser<'a, Located<Expr<'a>>> {
skip_first!(
string("->"),
ascii_string("->"),
space0_before(
loc!(move |arena, state| parse_expr(indent, arena, state)),
indent,
@ -1253,7 +1256,7 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
map_with_arena!(
and!(
skip_first!(
string(keyword::IF),
ascii_string(keyword::IF),
space1_around(
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent,
@ -1261,14 +1264,14 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
),
and!(
skip_first!(
string(keyword::THEN),
ascii_string(keyword::THEN),
space1_around(
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent,
)
),
skip_first!(
string(keyword::ELSE),
ascii_string(keyword::ELSE),
space1_before(
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
min_indent,
@ -1310,10 +1313,15 @@ fn unary_negate_function_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
// Try to parse a number literal *before* trying to parse unary negate,
// because otherwise (foo -1) will parse as (foo (Num.neg 1))
loc!(number_literal()),
loc!(char('-'))
loc!(ascii_char('-'))
)
),
one_of!(char(' '), char('#'), char('\n'), char('>')),
one_of!(
ascii_char(' '),
ascii_char('#'),
ascii_char('\n'),
ascii_char('>')
),
),
move |arena, state, (spaces, num_or_minus_char)| {
match num_or_minus_char {
@ -1530,17 +1538,15 @@ pub fn ident_without_apply<'a>() -> impl Parser<'a, Expr<'a>> {
/// Like equals_for_def(), except it produces the indent_col of the state rather than ()
pub fn equals_with_indent<'a>() -> impl Parser<'a, u16> {
move |_arena, state: State<'a>| {
let mut iter = state.input.chars();
match iter.next() {
Some(ch) if ch == '=' => {
match iter.peekable().peek() {
match state.bytes.first() {
Some(&byte) if byte == b'=' => {
match state.bytes.get(1) {
// The '=' must not be followed by another `=` or `>`
// (See equals_for_def() for explanation)
Some(next_ch) if next_ch != &'=' && next_ch != &'>' => {
Some(&next_byte) if next_byte != b'=' && next_byte != b'>' => {
Ok((state.indent_col, state.advance_without_indenting(1)?))
}
Some(next_ch) => Err(unexpected(*next_ch, 0, state, Attempting::Def)),
Some(_) => Err(unexpected(0, state, Attempting::Def)),
None => Err(unexpected_eof(
1,
Attempting::Def,
@ -1548,21 +1554,17 @@ pub fn equals_with_indent<'a>() -> impl Parser<'a, u16> {
)),
}
}
Some(ch) => Err(unexpected(ch, 0, state, Attempting::Def)),
Some(_) => Err(unexpected(0, state, Attempting::Def)),
None => Err(unexpected_eof(0, Attempting::Def, state)),
}
}
}
pub fn colon_with_indent<'a>() -> impl Parser<'a, u16> {
move |_arena, state: State<'a>| {
let mut iter = state.input.chars();
match iter.next() {
Some(ch) if ch == ':' => Ok((state.indent_col, state.advance_without_indenting(1)?)),
Some(ch) => Err(unexpected(ch, 0, state, Attempting::Def)),
None => Err(unexpected_eof(0, Attempting::Def, state)),
}
move |_arena, state: State<'a>| match state.bytes.first() {
Some(&byte) if byte == b':' => Ok((state.indent_col, state.advance_without_indenting(1)?)),
Some(_) => Err(unexpected(0, state, Attempting::Def)),
None => Err(unexpected_eof(0, Attempting::Def, state)),
}
}
@ -1606,32 +1608,32 @@ fn binop<'a>() -> impl Parser<'a, BinOp> {
// with other valid operators (e.g. "<=" begins with "<") must
// come before the shorter ones; otherwise, they will never
// be reached because the shorter one will pass and consume!
map!(string("|>"), |_| BinOp::Pizza),
map!(string("=="), |_| BinOp::Equals),
map!(string("!="), |_| BinOp::NotEquals),
map!(string("&&"), |_| BinOp::And),
map!(string("||"), |_| BinOp::Or),
map!(char('+'), |_| BinOp::Plus),
map!(char('*'), |_| BinOp::Star),
map!(char('-'), |_| BinOp::Minus),
map!(string("//"), |_| BinOp::DoubleSlash),
map!(char('/'), |_| BinOp::Slash),
map!(string("<="), |_| BinOp::LessThanOrEq),
map!(char('<'), |_| BinOp::LessThan),
map!(string(">="), |_| BinOp::GreaterThanOrEq),
map!(char('>'), |_| BinOp::GreaterThan),
map!(char('^'), |_| BinOp::Caret),
map!(string("%%"), |_| BinOp::DoublePercent),
map!(char('%'), |_| BinOp::Percent)
map!(ascii_string("|>"), |_| BinOp::Pizza),
map!(ascii_string("=="), |_| BinOp::Equals),
map!(ascii_string("!="), |_| BinOp::NotEquals),
map!(ascii_string("&&"), |_| BinOp::And),
map!(ascii_string("||"), |_| BinOp::Or),
map!(ascii_char('+'), |_| BinOp::Plus),
map!(ascii_char('*'), |_| BinOp::Star),
map!(ascii_char('-'), |_| BinOp::Minus),
map!(ascii_string("//"), |_| BinOp::DoubleSlash),
map!(ascii_char('/'), |_| BinOp::Slash),
map!(ascii_string("<="), |_| BinOp::LessThanOrEq),
map!(ascii_char('<'), |_| BinOp::LessThan),
map!(ascii_string(">="), |_| BinOp::GreaterThanOrEq),
map!(ascii_char('>'), |_| BinOp::GreaterThan),
map!(ascii_char('^'), |_| BinOp::Caret),
map!(ascii_string("%%"), |_| BinOp::DoublePercent),
map!(ascii_char('%'), |_| BinOp::Percent)
)
}
pub fn list_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
let elems = collection!(
char('['),
ascii_char('['),
loc!(expr(min_indent)),
char(','),
char(']'),
ascii_char(','),
ascii_char(']'),
min_indent
);
@ -1673,9 +1675,11 @@ pub fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
};
// there can be field access, e.g. `{ x : 4 }.x`
let (accesses, state) =
optional(one_or_more!(skip_first!(char('.'), lowercase_ident())))
.parse(arena, state)?;
let (accesses, state) = optional(one_or_more!(skip_first!(
ascii_char('.'),
lowercase_ident()
)))
.parse(arena, state)?;
if let Some(fields) = accesses {
for field in fields {
@ -1768,7 +1772,7 @@ pub fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
/// This is mainly for matching tags in closure params, e.g. \@Foo -> ...
pub fn private_tag<'a>() -> impl Parser<'a, &'a str> {
map_with_arena!(
skip_first!(char('@'), global_tag()),
skip_first!(ascii_char('@'), global_tag()),
|arena: &'a Bump, name: &'a str| {
let mut buf = String::with_capacity_in(1 + name.len(), arena);

View File

@ -1,6 +1,6 @@
use crate::ast::Attempting;
use crate::keyword;
use crate::parser::{unexpected, unexpected_eof, Fail, FailReason, ParseResult, Parser, State};
use crate::parser::{peek_utf8_char, unexpected, Fail, FailReason, ParseResult, Parser, State};
use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
@ -67,129 +67,126 @@ impl<'a> Ident<'a> {
/// Sometimes we may want to check for those later in the process, and give
/// more contextually-aware error messages than "unexpected `if`" or the like.
#[inline(always)]
pub fn parse_ident<'a, I>(
pub fn parse_ident<'a>(
arena: &'a Bump,
chars: &mut I,
state: State<'a>,
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
where
I: Iterator<Item = char>,
{
mut state: State<'a>,
) -> ParseResult<'a, (Ident<'a>, Option<char>)> {
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena);
let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
let mut is_capitalized;
let is_accessor_fn;
let mut is_private_tag = false;
let mut chars_parsed;
// Identifiers and accessor functions must start with either a letter or a dot.
// If this starts with neither, it must be something else!
match chars.next() {
Some(ch) => {
if ch == '@' {
// '@' must always be followed by a capital letter!
match chars.next() {
Some(ch) if ch.is_uppercase() => {
part_buf.push('@');
part_buf.push(ch);
match peek_utf8_char(&state) {
Ok((first_ch, bytes_parsed)) => {
if first_ch.is_alphabetic() {
part_buf.push(first_ch);
is_private_tag = true;
is_capitalized = true;
is_accessor_fn = false;
chars_parsed = 2;
}
Some(ch) => {
return Err(unexpected(ch, 0, state, Attempting::Identifier));
}
None => {
return Err(unexpected_eof(0, Attempting::Identifier, state));
}
}
} else if ch.is_alphabetic() {
part_buf.push(ch);
is_capitalized = ch.is_uppercase();
is_capitalized = first_ch.is_uppercase();
is_accessor_fn = false;
chars_parsed = 1;
} else if ch == '.' {
state = state.advance_without_indenting(bytes_parsed)?;
} else if first_ch == '.' {
is_capitalized = false;
is_accessor_fn = true;
chars_parsed = 1;
state = state.advance_without_indenting(bytes_parsed)?;
} else if first_ch == '@' {
state = state.advance_without_indenting(bytes_parsed)?;
// '@' must always be followed by a capital letter!
match peek_utf8_char(&state) {
Ok((next_ch, next_bytes_parsed)) => {
if next_ch.is_uppercase() {
state = state.advance_without_indenting(next_bytes_parsed)?;
part_buf.push('@');
part_buf.push(next_ch);
is_private_tag = true;
is_capitalized = true;
is_accessor_fn = false;
} else {
return Err(unexpected(
bytes_parsed + next_bytes_parsed,
state,
Attempting::Identifier,
));
}
}
Err(reason) => return state.fail(reason),
}
} else {
return Err(unexpected(ch, 0, state, Attempting::Identifier));
return Err(unexpected(0, state, Attempting::Identifier));
}
}
None => {
return Err(unexpected_eof(0, Attempting::Identifier, state));
Err(reason) => return state.fail(reason),
}
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A dot ('.')
if ch.is_alphabetic() {
if part_buf.is_empty() {
// Capitalization is determined by the first character in the part.
is_capitalized = ch.is_uppercase();
}
part_buf.push(ch);
} else if ch.is_ascii_digit() {
// Parts may not start with numbers!
if part_buf.is_empty() {
return malformed(
Some(ch),
arena,
state,
capitalized_parts,
noncapitalized_parts,
);
}
part_buf.push(ch);
} else if ch == '.' {
// There are two posssible errors here:
//
// 1. Having two consecutive dots is an error.
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
return malformed(
Some(ch),
arena,
state,
capitalized_parts,
noncapitalized_parts,
);
}
if is_capitalized {
capitalized_parts.push(part_buf.into_bump_str());
} else {
noncapitalized_parts.push(part_buf.into_bump_str());
}
// Now that we've recorded the contents of the current buffer, reset it.
part_buf = String::new_in(arena);
} else {
// This must be the end of the identifier. We're done!
break;
}
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
};
let mut next_char = None;
while let Some(ch) = chars.next() {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A dot ('.')
if ch.is_alphabetic() {
if part_buf.is_empty() {
// Capitalization is determined by the first character in the part.
is_capitalized = ch.is_uppercase();
}
part_buf.push(ch);
} else if ch.is_ascii_digit() {
// Parts may not start with numbers!
if part_buf.is_empty() {
return malformed(
Some(ch),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
}
part_buf.push(ch);
} else if ch == '.' {
// There are two posssible errors here:
//
// 1. Having two consecutive dots is an error.
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
return malformed(
Some(ch),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
}
if is_capitalized {
capitalized_parts.push(part_buf.into_bump_str());
} else {
noncapitalized_parts.push(part_buf.into_bump_str());
}
// Now that we've recorded the contents of the current buffer, reset it.
part_buf = String::new_in(arena);
} else {
// This must be the end of the identifier. We're done!
next_char = Some(ch);
break;
}
chars_parsed += 1;
}
if part_buf.is_empty() {
@ -200,10 +197,9 @@ where
// If we made it this far and don't have a next_char, then necessarily
// we have consumed a '.' char previously.
return malformed(
next_char.or_else(|| Some('.')),
Some('.'),
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
@ -224,14 +220,7 @@ where
Ident::AccessorFunction(value)
} else {
return malformed(
None,
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
}
} else if noncapitalized_parts.is_empty() {
// We have capitalized parts only, so this must be a tag.
@ -245,33 +234,19 @@ where
}
} else {
// This is a qualified tag, which is not allowed!
return malformed(
None,
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
}
}
None => {
// We had neither capitalized nor noncapitalized parts,
// yet we made it this far. The only explanation is that this was
// a stray '.' drifting through the cosmos.
return Err(unexpected('.', 1, state, Attempting::Identifier));
return Err(unexpected(1, state, Attempting::Identifier));
}
}
} else if is_private_tag {
// This is qualified field access with an '@' in front, which does not make sense!
return malformed(
None,
arena,
state,
chars,
capitalized_parts,
noncapitalized_parts,
);
return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
} else {
// We have multiple noncapitalized parts, so this must be field access.
Ident::Access {
@ -280,22 +255,16 @@ where
}
};
let state = state.advance_without_indenting(chars_parsed)?;
Ok(((answer, next_char), state))
Ok(((answer, None), state))
}
fn malformed<'a, I>(
fn malformed<'a>(
opt_bad_char: Option<char>,
arena: &'a Bump,
state: State<'a>,
chars: &mut I,
mut state: State<'a>,
capitalized_parts: Vec<&'a str>,
noncapitalized_parts: Vec<&'a str>,
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
where
I: Iterator<Item = char>,
{
) -> ParseResult<'a, (Ident<'a>, Option<char>)> {
// Reconstruct the original string that we've been parsing.
let mut full_string = String::new_in(arena);
@ -311,30 +280,35 @@ where
// Consume the remaining chars in the identifier.
let mut next_char = None;
for ch in chars {
// We can't use ch.is_alphanumeric() here because that passes for
// things that are "numeric" but not ASCII digits, like `¾`
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
full_string.push(ch);
} else {
next_char = Some(ch);
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// We can't use ch.is_alphanumeric() here because that passes for
// things that are "numeric" but not ASCII digits, like `¾`
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
full_string.push(ch);
} else {
next_char = Some(ch);
break;
break;
}
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
}
let chars_parsed = full_string.len();
Ok((
(Ident::Malformed(full_string.into_bump_str()), next_char),
state.advance_without_indenting(chars_parsed)?,
state,
))
}
pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
move |arena: &'a Bump, state: State<'a>| {
// Discard next_char; we don't need it.
let ((string, _), state) = parse_ident(arena, &mut state.input.chars(), state)?;
let ((string, _), state) = parse_ident(arena, state)?;
Ok((string, state))
}
@ -344,52 +318,47 @@ pub fn global_tag_or_ident<'a, F>(pred: F) -> impl Parser<'a, &'a str>
where
F: Fn(char) -> bool,
{
move |arena, state: State<'a>| {
let mut chars = state.input.chars();
move |arena, mut state: State<'a>| {
// pred will determine if this is a tag or ident (based on capitalization)
let first_letter = match chars.next() {
Some(first_char) => {
if pred(first_char) {
first_char
} else {
return Err(unexpected(
first_char,
0,
state,
Attempting::RecordFieldLabel,
));
let (first_letter, bytes_parsed) = match peek_utf8_char(&state) {
Ok((first_letter, bytes_parsed)) => {
if !pred(first_letter) {
return Err(unexpected(0, state, Attempting::RecordFieldLabel));
}
(first_letter, bytes_parsed)
}
None => {
return Err(unexpected_eof(0, Attempting::RecordFieldLabel, state));
}
Err(reason) => return state.fail(reason),
};
let mut buf = String::with_capacity_in(1, arena);
buf.push(first_letter);
for ch in chars {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A ':' indicating the end of the field
if ch.is_alphabetic() || ch.is_ascii_digit() {
buf.push(ch);
} else {
// This is the end of the field. We're done!
break;
}
state = state.advance_without_indenting(bytes_parsed)?;
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A ':' indicating the end of the field
if ch.is_alphabetic() || ch.is_ascii_digit() {
buf.push(ch);
state = state.advance_without_indenting(bytes_parsed)?;
} else {
// This is the end of the field. We're done!
break;
}
}
Err(reason) => return state.fail(reason),
};
}
let chars_parsed = buf.len();
Ok((
buf.into_bump_str(),
state.advance_without_indenting(chars_parsed)?,
))
Ok((buf.into_bump_str(), state))
}
}

View File

@ -6,7 +6,10 @@ use crate::blankspace::{space0_around, space1};
use crate::expr::def;
use crate::header::ModuleName;
use crate::ident::unqualified_ident;
use crate::parser::{self, char, loc, optional, string, unexpected, unexpected_eof, Parser, State};
use crate::parser::{
self, ascii_char, ascii_string, loc, optional, peek_utf8_char, peek_utf8_char_at, unexpected,
Parser, State,
};
use bumpalo::collections::{String, Vec};
use roc_region::all::Located;
@ -30,7 +33,10 @@ pub fn app_module<'a>() -> impl Parser<'a, Module<'a>> {
pub fn interface_header<'a>() -> impl Parser<'a, InterfaceHeader<'a>> {
parser::map(
and!(
skip_first!(string("interface"), and!(space1(1), loc!(module_name()))),
skip_first!(
ascii_string("interface"),
and!(space1(1), loc!(module_name()))
),
and!(exposes(), imports())
),
|(
@ -56,72 +62,68 @@ pub fn interface_header<'a>() -> impl Parser<'a, InterfaceHeader<'a>> {
#[inline(always)]
pub fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>> {
move |arena, state: State<'a>| {
let mut chars = state.input.chars();
move |arena, mut state: State<'a>| {
match peek_utf8_char(&state) {
Ok((first_letter, bytes_parsed)) => {
if !first_letter.is_uppercase() {
return Err(unexpected(0, state, Attempting::Module));
};
let first_letter = match chars.next() {
Some(first_char) => {
// Module names must all be uppercase
if first_char.is_uppercase() {
first_char
} else {
return Err(unexpected(
first_char,
0,
state,
Attempting::RecordFieldLabel,
));
}
}
None => {
return Err(unexpected_eof(0, Attempting::Identifier, state));
}
};
let mut buf = String::with_capacity_in(4, arena);
let mut buf = String::with_capacity_in(1, arena);
buf.push(first_letter);
buf.push(first_letter);
state = state.advance_without_indenting(bytes_parsed)?;
while let Some(ch) = chars.next() {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A '.' separating module parts
if ch.is_alphabetic() || ch.is_ascii_digit() {
buf.push(ch);
} else if ch == '.' {
match chars.next() {
Some(next) => {
if next.is_uppercase() {
// If we hit another uppercase letter, keep going!
buf.push('.');
buf.push(next);
} else {
// We have finished parsing the module name.
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// There may be an identifier after this '.',
// e.g. "baz" in `Foo.Bar.baz`
break;
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A '.' separating module parts
if ch.is_alphabetic() || ch.is_ascii_digit() {
state = state.advance_without_indenting(bytes_parsed)?;
buf.push(ch);
} else if ch == '.' {
match peek_utf8_char_at(&state, 1) {
Ok((next, next_bytes_parsed)) => {
if next.is_uppercase() {
// If we hit another uppercase letter, keep going!
buf.push('.');
buf.push(next);
state = state.advance_without_indenting(
bytes_parsed + next_bytes_parsed,
)?;
} else {
// We have finished parsing the module name.
//
// There may be an identifier after this '.',
// e.g. "baz" in `Foo.Bar.baz`
return Ok((
ModuleName::new(buf.into_bump_str()),
state,
));
}
}
Err(reason) => return state.fail(reason),
}
} else {
// This is the end of the module name. We're done!
break;
}
}
}
None => {
// A module name can't end with a '.'
return Err(unexpected_eof(0, Attempting::Identifier, state));
Err(reason) => return state.fail(reason),
}
}
} else {
// This is the end of the module name. We're done!
break;
Ok((ModuleName::new(buf.into_bump_str()), state))
}
Err(reason) => state.fail(reason),
}
let chars_parsed = buf.len();
Ok((
ModuleName::new(buf.into_bump_str()),
state.advance_without_indenting(chars_parsed)?,
))
}
}
@ -129,7 +131,7 @@ pub fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>> {
fn app_header<'a>() -> impl Parser<'a, AppHeader<'a>> {
parser::map(
and!(
skip_first!(string("app"), and!(space1(1), loc!(module_name()))),
skip_first!(ascii_string("app"), and!(space1(1), loc!(module_name()))),
and!(provides(), imports())
),
|(
@ -167,8 +169,14 @@ fn provides<'a>() -> impl Parser<
),
> {
and!(
and!(skip_second!(space1(1), string("provides")), space1(1)),
collection!(char('['), loc!(exposes_entry()), char(','), char(']'), 1)
and!(skip_second!(space1(1), ascii_string("provides")), space1(1)),
collection!(
ascii_char('['),
loc!(exposes_entry()),
ascii_char(','),
ascii_char(']'),
1
)
)
}
@ -181,8 +189,14 @@ fn exposes<'a>() -> impl Parser<
),
> {
and!(
and!(skip_second!(space1(1), string("exposes")), space1(1)),
collection!(char('['), loc!(exposes_entry()), char(','), char(']'), 1)
and!(skip_second!(space1(1), ascii_string("exposes")), space1(1)),
collection!(
ascii_char('['),
loc!(exposes_entry()),
ascii_char(','),
ascii_char(']'),
1
)
)
}
@ -195,8 +209,14 @@ fn imports<'a>() -> impl Parser<
),
> {
and!(
and!(skip_second!(space1(1), string("imports")), space1(1)),
collection!(char('['), loc!(imports_entry()), char(','), char(']'), 1)
and!(skip_second!(space1(1), ascii_string("imports")), space1(1)),
collection!(
ascii_char('['),
loc!(imports_entry()),
ascii_char(','),
ascii_char(']'),
1
)
)
}
@ -213,8 +233,14 @@ fn imports_entry<'a>() -> impl Parser<'a, ImportsEntry<'a>> {
module_name(),
// e.g. `.{ Task, after}`
optional(skip_first!(
char('.'),
collection!(char('{'), loc!(exposes_entry()), char(','), char('}'), 1)
ascii_char('.'),
collection!(
ascii_char('{'),
loc!(exposes_entry()),
ascii_char(','),
ascii_char('}'),
1
)
))
),
|arena,

View File

@ -1,23 +1,19 @@
use crate::ast::{Attempting, Base, Expr};
use crate::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
use std::char;
use std::str::from_utf8_unchecked;
pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
move |_arena, state: State<'a>| {
let mut chars = state.input.chars();
let bytes = &mut state.bytes.iter();
match chars.next() {
Some(first_ch) => {
match bytes.next() {
Some(&first_byte) => {
// Number literals must start with either an '-' or a digit.
if first_ch == '-' || first_ch.is_ascii_digit() {
parse_number_literal(first_ch, &mut chars, state)
if first_byte == b'-' || (first_byte as char).is_ascii_digit() {
parse_number_literal(first_byte as char, bytes, state)
} else {
Err(unexpected(
first_ch,
first_ch.len_utf8(),
state,
Attempting::NumberLiteral,
))
Err(unexpected(1, state, Attempting::NumberLiteral))
}
}
None => Err(unexpected_eof(0, state.attempting, state)),
@ -28,11 +24,11 @@ pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
#[inline(always)]
fn parse_number_literal<'a, I>(
first_ch: char,
chars: &mut I,
bytes: &mut I,
state: State<'a>,
) -> ParseResult<'a, Expr<'a>>
where
I: Iterator<Item = char>,
I: Iterator<Item = &'a u8>,
{
use self::LiteralType::*;
@ -40,13 +36,12 @@ where
// We already parsed 1 character (which may have been a minus sign).
let mut bytes_parsed = 1;
let mut prev_ch = first_ch;
let mut prev_byte = first_ch as u8;
let mut has_parsed_digits = first_ch.is_ascii_digit();
for next_ch in chars {
for &next_byte in bytes {
let err_unexpected = || {
Err(unexpected(
next_ch,
bytes_parsed,
state.clone(),
Attempting::NumberLiteral,
@ -55,91 +50,91 @@ where
let is_potentially_non_base10 = || {
(bytes_parsed == 1 && first_ch == '0')
|| (bytes_parsed == 2 && first_ch == '-' && prev_ch == '0')
|| (bytes_parsed == 2 && first_ch == '-' && prev_byte == b'0')
};
if next_ch == '.' {
if typ == Float {
// You only get one decimal point!
return err_unexpected();
} else {
typ = Float;
match next_byte as char {
'.' => {
if typ == Float {
// You only get one decimal point!
return err_unexpected();
} else {
typ = Float;
}
}
} else if next_ch == 'x' {
if is_potentially_non_base10() {
typ = Hex;
} else {
return err_unexpected();
'x' => {
if is_potentially_non_base10() {
typ = Hex;
} else {
return err_unexpected();
}
}
} else if next_ch == 'b' && typ == Num {
// We have to check for typ == Num because otherwise we get a false
// positive here when parsing a hex literal that happens to have
// a 'b' in it, e.g. 0xbbbb
if is_potentially_non_base10() {
typ = Binary;
} else {
return err_unexpected();
'b' if typ == Num => {
// We have to check for typ == Num because otherwise we get a false
// positive here when parsing a hex literal that happens to have
// a 'b' in it, e.g. 0xbbbb
if is_potentially_non_base10() {
typ = Binary;
} else {
return err_unexpected();
}
}
} else if next_ch == 'o' {
if is_potentially_non_base10() {
typ = Octal;
} else {
return err_unexpected();
'o' => {
if is_potentially_non_base10() {
typ = Octal;
} else {
return err_unexpected();
}
}
} else if next_ch.is_ascii_digit() {
has_parsed_digits = true;
} else if next_ch != '_' &&
next_ch if next_ch.is_ascii_digit() => {
has_parsed_digits = true;
}
next_ch
if next_ch != '_' &&
// ASCII alphabetic chars (like 'a' and 'f') are allowed in Hex int literals.
// We parse them in any int literal, so we can give a more helpful error
// in canonicalization (e.g. "the character 'f' is not allowed in Octal literals"
// or "the character 'g' is outside the range of valid Hex literals")
!next_ch.is_ascii_alphabetic()
{
if has_parsed_digits {
// We hit an invalid number literal character; we're done!
break;
} else {
// No digits! We likely parsed a minus sign that's actually an operator.
return err_unexpected();
!next_ch.is_ascii_alphabetic() =>
{
if has_parsed_digits {
// We hit an invalid number literal character; we're done!
break;
} else {
// No digits! We likely parsed a minus sign that's actually an operator.
return err_unexpected();
}
}
_ => {}
}
// Since we only consume characters in the ASCII range for number literals,
// this will always be exactly 1. There's no need to call next_ch.utf8_len().
bytes_parsed += 1;
prev_ch = next_ch;
prev_byte = next_byte;
}
let from_base = |base| {
let is_negative = first_ch == '-';
let string = if is_negative {
&state.input[3..bytes_parsed]
} else {
&state.input[2..bytes_parsed]
};
Expr::NonBase10Int {
is_negative,
string,
base,
}
};
// At this point we have a number, and will definitely succeed.
// If the number is malformed (outside the supported range),
// we'll succeed with an appropriate Expr which records that.
let expr = match typ {
Num => Expr::Num(&state.input[0..bytes_parsed]),
Float => Expr::Float(&state.input[0..bytes_parsed]),
match typ {
Num => Ok((
// SAFETY: it's safe to use from_utf8_unchecked here, because we've
// already validated that this range contains only ASCII digits
Expr::Num(unsafe { from_utf8_unchecked(&state.bytes[0..bytes_parsed]) }),
state.advance_without_indenting(bytes_parsed)?,
)),
Float => Ok((
// SAFETY: it's safe to use from_utf8_unchecked here, because we've
// already validated that this range contains only ASCII digits
Expr::Float(unsafe { from_utf8_unchecked(&state.bytes[0..bytes_parsed]) }),
state.advance_without_indenting(bytes_parsed)?,
)),
// For these we trim off the 0x/0o/0b part
Hex => from_base(Base::Hex),
Octal => from_base(Base::Octal),
Binary => from_base(Base::Binary),
};
let next_state = state.advance_without_indenting(bytes_parsed)?;
Ok((expr, next_state))
Hex => from_base(Base::Hex, first_ch, bytes_parsed, state),
Octal => from_base(Base::Octal, first_ch, bytes_parsed, state),
Binary => from_base(Base::Binary, first_ch, bytes_parsed, state),
}
}
#[derive(Debug, PartialEq, Eq)]
@ -150,3 +145,29 @@ enum LiteralType {
Octal,
Binary,
}
fn from_base(
base: Base,
first_ch: char,
bytes_parsed: usize,
state: State<'_>,
) -> ParseResult<'_, Expr<'_>> {
let is_negative = first_ch == '-';
let bytes = if is_negative {
&state.bytes[3..bytes_parsed]
} else {
&state.bytes[2..bytes_parsed]
};
match parse_utf8(bytes) {
Ok(string) => Ok((
Expr::NonBase10Int {
is_negative,
string,
base,
},
state.advance_without_indenting(bytes_parsed)?,
)),
Err(reason) => state.fail(reason),
}
}

View File

@ -1,14 +1,17 @@
use crate::ast::Attempting;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use encode_unicode::CharExt;
use roc_region::all::{Located, Region};
use std::fmt;
use std::str::from_utf8;
use std::{char, u16};
/// A position in a source file.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Clone, PartialEq, Eq)]
pub struct State<'a> {
/// The raw input string.
pub input: &'a str,
/// The raw input bytes from the file.
pub bytes: &'a [u8],
/// Current line of the input
pub line: u32,
@ -39,15 +42,15 @@ pub enum Either<First, Second> {
}
impl<'a> State<'a> {
pub fn new(input: &'a str, attempting: Attempting) -> State<'a> {
pub fn new(bytes: &'a [u8], attempting: Attempting) -> State<'a> {
State {
input,
bytes,
line: 0,
column: 0,
indent_col: 0,
is_indenting: true,
attempting,
original_len: input.len(),
original_len: bytes.len(),
}
}
@ -69,7 +72,7 @@ impl<'a> State<'a> {
///
/// So if the parser has consumed 8 bytes, this function will return 8.
pub fn bytes_consumed(&self) -> usize {
self.original_len - self.input.len()
self.original_len - self.bytes.len()
}
/// Increments the line, then resets column, indent_col, and is_indenting.
@ -77,7 +80,7 @@ impl<'a> State<'a> {
pub fn newline(&self) -> Result<Self, (Fail, Self)> {
match self.line.checked_add(1) {
Some(line) => Ok(State {
input: &self.input[1..],
bytes: &self.bytes[1..],
line,
column: 0,
indent_col: 0,
@ -99,11 +102,11 @@ impl<'a> State<'a> {
/// This assumes we are *not* advancing with spaces, or at least that
/// any spaces on the line were preceded by non-spaces - which would mean
/// they weren't eligible to indent anyway.
pub fn advance_without_indenting(&self, quantity: usize) -> Result<Self, (Fail, Self)> {
pub fn advance_without_indenting(self, quantity: usize) -> Result<Self, (Fail, Self)> {
match (self.column as usize).checked_add(quantity) {
Some(column_usize) if column_usize <= u16::MAX as usize => {
Ok(State {
input: &self.input[quantity..],
bytes: &self.bytes[quantity..],
line: self.line,
column: column_usize as u16,
indent_col: self.indent_col,
@ -141,7 +144,7 @@ impl<'a> State<'a> {
};
Ok(State {
input: &self.input[spaces..],
bytes: &self.bytes[spaces..],
line: self.line,
column: column_usize as u16,
indent_col,
@ -169,6 +172,35 @@ impl<'a> State<'a> {
end_line: self.line,
}
}
/// Return a failing ParseResult for the given FailReason
pub fn fail<T>(self, reason: FailReason) -> Result<(T, Self), (Fail, Self)> {
Err((
Fail {
reason,
attempting: self.attempting,
},
self,
))
}
}
impl<'a> fmt::Debug for State<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "State {{")?;
match from_utf8(self.bytes) {
Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?,
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?,
}
write!(f, "\n\t(line, col): ({}, {}),", self.line, self.column)?;
write!(f, "\n\tindent_col: {}", self.indent_col)?;
write!(f, "\n\tis_indenting: {:?}", self.is_indenting)?;
write!(f, "\n\tattempting: {:?}", self.attempting)?;
write!(f, "\n\toriginal_len: {}", self.original_len)?;
write!(f, "\n}}")
}
}
#[test]
@ -182,13 +214,14 @@ pub type ParseResult<'a, Output> = Result<(Output, State<'a>), (Fail, State<'a>)
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FailReason {
Unexpected(char, Region),
Unexpected(Region),
OutdentedTooFar,
ConditionFailed,
LineTooLong(u32 /* which line was too long */),
TooManyLines,
Eof(Region),
InvalidPattern,
BadUtf8,
ReservedKeyword(Region),
ArgumentsBeforeEquals(Region),
}
@ -332,13 +365,12 @@ pub fn unexpected_eof(
}
pub fn unexpected(
ch: char,
chars_consumed: usize,
state: State<'_>,
attempting: Attempting,
) -> (Fail, State<'_>) {
checked_unexpected(chars_consumed, state, |region| Fail {
reason: FailReason::Unexpected(ch, region),
reason: FailReason::Unexpected(region),
attempting,
})
}
@ -385,9 +417,9 @@ fn line_too_long(attempting: Attempting, state: State<'_>) -> (Fail, State<'_>)
// (for example) the LineTooLong initially occurs in the middle of
// a one_of chain, which would otherwise prevent it from propagating.
let column = u16::MAX;
let input = state.input.get(0..state.input.len()).unwrap();
let bytes = state.bytes.get(0..state.bytes.len()).unwrap();
let state = State {
input,
bytes,
line: state.line,
indent_col: state.indent_col,
is_indenting: state.is_indenting,
@ -399,29 +431,90 @@ fn line_too_long(attempting: Attempting, state: State<'_>) -> (Fail, State<'_>)
(fail, state)
}
/// A single char.
pub fn char<'a>(expected: char) -> impl Parser<'a, ()> {
move |_arena, state: State<'a>| match state.input.chars().next() {
Some(actual) if expected == actual => Ok(((), state.advance_without_indenting(1)?)),
Some(other_ch) => Err(unexpected(other_ch, 0, state, Attempting::Keyword)),
/// A single ASCII char.
pub fn ascii_char<'a>(expected: char) -> impl Parser<'a, ()> {
// Make sure this really is an ASCII char!
debug_assert!(expected.len_utf8() == 1);
move |_arena, state: State<'a>| match state.bytes.first() {
Some(&actual) if expected == actual as char => {
Ok(((), state.advance_without_indenting(1)?))
}
Some(_) => Err(unexpected(0, state, Attempting::Keyword)),
_ => Err(unexpected_eof(0, Attempting::Keyword, state)),
}
}
/// A hardcoded keyword string with no newlines in it.
pub fn string<'a>(keyword: &'static str) -> impl Parser<'a, ()> {
// We can't have newlines because we don't attempt to advance the row
// in the state, only the column.
debug_assert!(!keyword.contains('\n'));
/// A single UTF-8-encoded char. This will both parse *and* validate that the
/// char is valid UTF-8.
pub fn utf8_char2<'a>() -> impl Parser<'a, char> {
move |_arena, state: State<'a>| {
if !state.bytes.is_empty() {
match char::from_utf8_slice_start(state.bytes) {
Ok((ch, bytes_parsed)) => Ok((ch, state.advance_without_indenting(bytes_parsed)?)),
Err(_) => state.fail(FailReason::BadUtf8),
}
} else {
Err(unexpected_eof(0, state.attempting, state))
}
}
}
/// A single UTF-8-encoded char. This will both parse *and* validate that the
/// char is valid UTF-8, but it will *not* advance the state.
pub fn peek_utf8_char<'a>(state: &State<'a>) -> Result<(char, usize), FailReason> {
if !state.bytes.is_empty() {
match char::from_utf8_slice_start(state.bytes) {
Ok((ch, len_utf8)) => Ok((ch, len_utf8)),
Err(_) => Err(FailReason::BadUtf8),
}
} else {
Err(FailReason::Eof(
Region::zero(), /* TODO get a better region */
))
}
}
/// A single UTF-8-encoded char, with an offset. This will both parse *and*
/// validate that the char is valid UTF-8, but it will *not* advance the state.
pub fn peek_utf8_char_at<'a>(
state: &State<'a>,
offset: usize,
) -> Result<(char, usize), FailReason> {
if state.bytes.len() > offset {
let bytes = &state.bytes[offset..];
match char::from_utf8_slice_start(bytes) {
Ok((ch, len_utf8)) => Ok((ch, len_utf8)),
Err(_) => Err(FailReason::BadUtf8),
}
} else {
Err(FailReason::Eof(
Region::zero(), /* TODO get a better region */
))
}
}
/// A hardcoded string with no newlines, consisting only of ASCII characters
pub fn ascii_string<'a>(keyword: &'static str) -> impl Parser<'a, ()> {
// Verify that this really is exclusively ASCII characters.
// The `unsafe` block in this function relies upon this assumption!
//
// Also, this can't have newlines because we don't attempt to advance
// the row in the state, only the column.
debug_assert!(keyword.chars().all(|ch| ch.len_utf8() == 1 && ch != '\n'));
move |_arena, state: State<'a>| {
let input = state.input;
let len = keyword.len();
// TODO do this comparison in one SIMD instruction (on supported systems)
match input.get(0..len) {
Some(next_str) if next_str == keyword => {
Ok(((), state.advance_without_indenting(len)?))
match state.bytes.get(0..len) {
Some(next_str) => {
if next_str == keyword.as_bytes() {
Ok(((), state.advance_without_indenting(len)?))
} else {
Err(unexpected(len, state, Attempting::Keyword))
}
}
_ => Err(unexpected_eof(0, Attempting::Keyword, state)),
}
@ -686,7 +779,7 @@ macro_rules! collection {
// We could change the AST to add extra storage specifically to
// support empty literals containing newlines or comments, but this
// does not seem worth even the tiniest regression in compiler performance.
zero_or_more!($crate::parser::char(' ')),
zero_or_more!($crate::parser::ascii_char(' ')),
skip_second!(
$crate::parser::sep_by0(
$delimiter,
@ -912,6 +1005,7 @@ macro_rules! record_field {
use $crate::ast::AssignedField::*;
use $crate::blankspace::{space0, space0_before};
use $crate::ident::lowercase_ident;
use $crate::parser::ascii_char;
use $crate::parser::Either::*;
// You must have a field name, e.g. "email"
@ -922,8 +1016,8 @@ macro_rules! record_field {
// Having a value is optional; both `{ email }` and `{ email: blah }` work.
// (This is true in both literals and types.)
let (opt_loc_val, state) = $crate::parser::optional(either!(
skip_first!(char(':'), space0_before($val_parser, $min_indent)),
skip_first!(char('?'), space0_before($val_parser, $min_indent))
skip_first!(ascii_char(':'), space0_before($val_parser, $min_indent)),
skip_first!(ascii_char('?'), space0_before($val_parser, $min_indent))
))
.parse(arena, state)?;
@ -952,10 +1046,10 @@ macro_rules! record_field {
macro_rules! record_without_update {
($val_parser:expr, $min_indent:expr) => {
collection!(
char('{'),
ascii_char('{'),
loc!(record_field!($val_parser, $min_indent)),
char(','),
char('}'),
ascii_char(','),
ascii_char('}'),
$min_indent
)
};
@ -965,7 +1059,7 @@ macro_rules! record_without_update {
macro_rules! record {
($val_parser:expr, $min_indent:expr) => {
skip_first!(
$crate::parser::char('{'),
$crate::parser::ascii_char('{'),
and!(
// You can optionally have an identifier followed by an '&' to
// make this a record update, e.g. { Foo.user & username: "blah" }.
@ -981,7 +1075,7 @@ macro_rules! record {
)),
$min_indent
),
$crate::parser::char('&')
$crate::parser::ascii_char('&')
)),
loc!(skip_first!(
// We specifically allow space characters inside here, so that
@ -995,16 +1089,16 @@ macro_rules! record {
// We could change the AST to add extra storage specifically to
// support empty literals containing newlines or comments, but this
// does not seem worth even the tiniest regression in compiler performance.
zero_or_more!($crate::parser::char(' ')),
zero_or_more!($crate::parser::ascii_char(' ')),
skip_second!(
$crate::parser::sep_by0(
$crate::parser::char(','),
$crate::parser::ascii_char(','),
$crate::blankspace::space0_around(
loc!(record_field!($val_parser, $min_indent)),
$min_indent
)
),
$crate::parser::char('}')
$crate::parser::ascii_char('}')
)
))
)
@ -1067,3 +1161,10 @@ where
{
attempt!(attempting, parser)
}
pub fn parse_utf8(bytes: &[u8]) -> Result<&str, FailReason> {
match from_utf8(bytes) {
Ok(string) => Ok(string),
Err(_) => Err(FailReason::BadUtf8),
}
}

View File

@ -1,8 +1,7 @@
use crate::ast::Attempting;
use crate::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use std::char;
pub enum StringLiteral<'a> {
Line(&'a str),
@ -11,14 +10,15 @@ pub enum StringLiteral<'a> {
pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
move |arena: &'a Bump, state: State<'a>| {
let mut chars = state.input.chars();
let mut bytes = state.bytes.iter();
// String literals must start with a quote.
// If this doesn't, it must not be a string literal!
match chars.next() {
Some('"') => (),
Some(other_char) => {
return Err(unexpected(other_char, 0, state, Attempting::StringLiteral));
match bytes.next() {
Some(&byte) => {
if byte != b'"' {
return Err(unexpected(0, state, Attempting::StringLiteral));
}
}
None => {
return Err(unexpected_eof(0, Attempting::StringLiteral, state));
@ -35,44 +35,49 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
// Since we're keeping the entire raw string, all we need to track is
// how many characters we've parsed. So far, that's 1 (the opening `"`).
let mut parsed_chars = 1;
let mut prev_ch = '"';
let mut prev_byte = b'"';
while let Some(ch) = chars.next() {
while let Some(&byte) = bytes.next() {
parsed_chars += 1;
// Potentially end the string (unless this is an escaped `"`!)
if ch == '"' && prev_ch != '\\' {
let string = if parsed_chars == 2 {
if let Some('"') = chars.next() {
// If the first three chars were all `"`, then this
// literal begins with `"""` and is a block string.
return parse_block_string(arena, state, &mut chars);
} else {
""
if byte == b'"' && prev_byte != b'\\' {
let (string, state) = if parsed_chars == 2 {
match bytes.next() {
Some(byte) if *byte == b'"' => {
// If the first three chars were all `"`, then this
// literal begins with `"""` and is a block string.
return parse_block_string(arena, state, &mut bytes);
}
_ => ("", state.advance_without_indenting(2)?),
}
} else {
// Start at 1 so we omit the opening `"`.
// Subtract 1 from parsed_chars so we omit the closing `"`.
&state.input[1..(parsed_chars - 1)]
let string_bytes = &state.bytes[1..(parsed_chars - 1)];
match parse_utf8(string_bytes) {
Ok(string) => (string, state.advance_without_indenting(parsed_chars)?),
Err(reason) => {
return state.fail(reason);
}
}
};
let next_state = state.advance_without_indenting(parsed_chars)?;
return Ok((StringLiteral::Line(string), next_state));
} else if ch == '\n' {
return Ok((StringLiteral::Line(string), state));
} else if byte == b'\n' {
// This is a single-line string, which cannot have newlines!
// Treat this as an unclosed string literal, and consume
// all remaining chars. This will mask all other errors, but
// it should make it easiest to debug; the file will be a giant
// error starting from where the open quote appeared.
return Err(unexpected(
'\n',
state.input.len() - 1,
state.bytes.len() - 1,
state,
Attempting::StringLiteral,
));
} else {
prev_ch = ch;
prev_byte = byte;
}
}
@ -88,48 +93,64 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
fn parse_block_string<'a, I>(
arena: &'a Bump,
state: State<'a>,
chars: &mut I,
bytes: &mut I,
) -> ParseResult<'a, StringLiteral<'a>>
where
I: Iterator<Item = char>,
I: Iterator<Item = &'a u8>,
{
// So far we have consumed the `"""` and that's it.
let mut parsed_chars = 3;
let mut prev_ch = '"';
let mut prev_byte = b'"';
let mut quotes_seen = 0;
// start at 3 to omit the opening `"`.
let mut line_start = 3;
let mut lines = Vec::new_in(arena);
let mut lines: Vec<'a, &'a str> = Vec::new_in(arena);
for ch in chars {
for byte in bytes {
parsed_chars += 1;
// Potentially end the string (unless this is an escaped `"`!)
if ch == '"' && prev_ch != '\\' {
if *byte == b'"' && prev_byte != b'\\' {
if quotes_seen == 2 {
// three consecutive qoutes, end string
// Subtract 3 from parsed_chars so we omit the closing `"`.
let string = &state.input[line_start..(parsed_chars - 3)];
lines.push(string);
let line_bytes = &state.bytes[line_start..(parsed_chars - 3)];
let next_state = state.advance_without_indenting(parsed_chars)?;
return match parse_utf8(line_bytes) {
Ok(line) => {
let state = state.advance_without_indenting(parsed_chars)?;
return Ok((StringLiteral::Block(arena.alloc(lines)), next_state));
lines.push(line);
Ok((StringLiteral::Block(arena.alloc(lines)), state))
}
Err(reason) => state.fail(reason),
};
}
quotes_seen += 1;
} else if ch == '\n' {
} else if *byte == b'\n' {
// note this includes the newline
let string = &state.input[line_start..parsed_chars];
lines.push(string);
quotes_seen = 0;
line_start = parsed_chars;
let line_bytes = &state.bytes[line_start..parsed_chars];
match parse_utf8(line_bytes) {
Ok(line) => {
lines.push(line);
quotes_seen = 0;
line_start = parsed_chars;
}
Err(reason) => {
return state.fail(reason);
}
}
} else {
quotes_seen = 0;
}
prev_ch = ch;
prev_byte = *byte;
}
// We ran out of characters before finding 3 closing quotes
@ -137,6 +158,6 @@ where
parsed_chars,
// TODO custom BlockStringLiteral?
Attempting::StringLiteral,
state.clone(),
state,
))
}

View File

@ -4,8 +4,8 @@ use crate::expr::{global_tag, private_tag};
use crate::ident::join_module_parts;
use crate::keyword;
use crate::parser::{
allocated, char, not, optional, string, unexpected, unexpected_eof, Either, ParseResult,
Parser, State,
allocated, ascii_char, ascii_string, not, optional, peek_utf8_char, unexpected, Either,
ParseResult, Parser, State,
};
use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec;
@ -22,10 +22,10 @@ macro_rules! tag_union {
map!(
and!(
collection!(
char('['),
ascii_char('['),
loc!(tag_type($min_indent)),
char(','),
char(']'),
ascii_char(','),
ascii_char(']'),
$min_indent
),
optional(
@ -61,7 +61,7 @@ pub fn term<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>>
and!(
space1(min_indent),
skip_first!(
string(keyword::AS),
ascii_string(keyword::AS),
space1_before(term(min_indent), min_indent)
)
)
@ -89,7 +89,7 @@ pub fn term<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>>
/// The `*` type variable, e.g. in (List *) Wildcard,
fn loc_wildcard<'a>() -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
map!(loc!(char('*')), |loc_val: Located<()>| {
map!(loc!(ascii_char('*')), |loc_val: Located<()>| {
loc_val.map(|_| TypeAnnotation::Wildcard)
})
}
@ -97,7 +97,7 @@ fn loc_wildcard<'a>() -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
pub fn loc_applied_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
skip_first!(
// Once we hit an "as", stop parsing args
not(string(keyword::AS)),
not(ascii_string(keyword::AS)),
one_of!(
loc_wildcard(),
loc_parenthetical_type(min_indent),
@ -112,12 +112,12 @@ pub fn loc_applied_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnot
#[inline(always)]
fn loc_parenthetical_type<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
between!(
char('('),
ascii_char('('),
space0_around(
move |arena, state| expression(min_indent).parse(arena, state),
min_indent,
),
char(')')
ascii_char(')')
)
}
@ -208,7 +208,7 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
move |arena, state: State<'a>| {
let (first, state) = space0_before(term(min_indent), min_indent).parse(arena, state)?;
let (rest, state) = zero_or_more!(skip_first!(
char(','),
ascii_char(','),
space0_around(term(min_indent), min_indent)
))
.parse(arena, state)?;
@ -216,7 +216,7 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
// TODO this space0 is dropped, so newlines just before the function arrow when there
// is only one argument are not seen by the formatter. Can we do better?
let (is_function, state) =
optional(skip_first!(space0(min_indent), string("->"))).parse(arena, state)?;
optional(skip_first!(space0(min_indent), ascii_string("->"))).parse(arena, state)?;
if is_function.is_some() {
let (return_type, state) =
@ -263,67 +263,70 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
fn parse_concrete_type<'a>(
arena: &'a Bump,
state: State<'a>,
mut state: State<'a>,
) -> ParseResult<'a, TypeAnnotation<'a>> {
let mut chars = state.input.chars();
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
let mut parts: Vec<&'a str> = Vec::new_in(arena);
// Qualified types must start with a capitalized letter.
match chars.next() {
Some(ch) => {
if ch.is_alphabetic() && ch.is_uppercase() {
part_buf.push(ch);
match peek_utf8_char(&state) {
Ok((first_letter, bytes_parsed)) => {
if first_letter.is_alphabetic() && first_letter.is_uppercase() {
part_buf.push(first_letter);
} else {
return Err(unexpected(ch, 0, state, Attempting::ConcreteType));
return Err(unexpected(0, state, Attempting::ConcreteType));
}
}
None => {
return Err(unexpected_eof(0, Attempting::ConcreteType, state));
}
};
let mut chars_parsed = 1;
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
let mut next_char = None;
while let Some(ch) = chars.next() {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A dot ('.')
if ch.is_alphabetic() {
if part_buf.is_empty() && !ch.is_uppercase() {
// Each part must begin with a capital letter.
return malformed(Some(ch), arena, state, &mut chars, parts);
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
// * A dot ('.')
if ch.is_alphabetic() {
if part_buf.is_empty() && !ch.is_uppercase() {
// Each part must begin with a capital letter.
return malformed(Some(ch), arena, state, parts);
}
part_buf.push(ch);
} else if ch.is_ascii_digit() {
// Parts may not start with numbers!
if part_buf.is_empty() {
return malformed(Some(ch), arena, state, parts);
}
part_buf.push(ch);
} else if ch == '.' {
// Having two consecutive dots is an error.
if part_buf.is_empty() {
return malformed(Some(ch), arena, state, parts);
}
parts.push(part_buf.into_bump_str());
// Now that we've recorded the contents of the current buffer, reset it.
part_buf = String::new_in(arena);
} else {
// This must be the end of the type. We're done!
next_char = Some(ch);
break;
}
state = state.advance_without_indenting(bytes_parsed)?;
}
part_buf.push(ch);
} else if ch.is_ascii_digit() {
// Parts may not start with numbers!
if part_buf.is_empty() {
return malformed(Some(ch), arena, state, &mut chars, parts);
}
part_buf.push(ch);
} else if ch == '.' {
// Having two consecutive dots is an error.
if part_buf.is_empty() {
return malformed(Some(ch), arena, state, &mut chars, parts);
}
parts.push(part_buf.into_bump_str());
// Now that we've recorded the contents of the current buffer, reset it.
part_buf = String::new_in(arena);
} else {
// This must be the end of the type. We're done!
next_char = Some(ch);
break;
Err(reason) => return state.fail(reason),
}
chars_parsed += 1;
}
if part_buf.is_empty() {
@ -333,23 +336,16 @@ fn parse_concrete_type<'a>(
//
// If we made it this far and don't have a next_char, then necessarily
// we have consumed a '.' char previously.
return malformed(
next_char.or_else(|| Some('.')),
arena,
state,
&mut chars,
parts,
);
return malformed(next_char.or_else(|| Some('.')), arena, state, parts);
}
if part_buf.is_empty() {
// We had neither capitalized nor noncapitalized parts,
// yet we made it this far. The only explanation is that this was
// a stray '.' drifting through the cosmos.
return Err(unexpected('.', 1, state, Attempting::Identifier));
return Err(unexpected(1, state, Attempting::Identifier));
}
let state = state.advance_without_indenting(chars_parsed)?;
let answer = TypeAnnotation::Apply(
join_module_parts(arena, parts.into_bump_slice()),
part_buf.into_bump_str(),
@ -361,58 +357,55 @@ fn parse_concrete_type<'a>(
fn parse_type_variable<'a>(
arena: &'a Bump,
state: State<'a>,
mut state: State<'a>,
) -> ParseResult<'a, TypeAnnotation<'a>> {
let mut chars = state.input.chars();
let mut buf = String::new_in(arena);
// Type variables must start with a lowercase letter.
match chars.next() {
Some(ch) => {
if ch.is_alphabetic() && ch.is_lowercase() {
buf.push(ch);
match peek_utf8_char(&state) {
Ok((first_letter, bytes_parsed)) => {
// Type variables must start with a lowercase letter.
if first_letter.is_alphabetic() && first_letter.is_lowercase() {
buf.push(first_letter);
} else {
return Err(unexpected(ch, 0, state, Attempting::TypeVariable));
return Err(unexpected(0, state, Attempting::TypeVariable));
}
}
None => {
return Err(unexpected_eof(0, Attempting::TypeVariable, state));
}
};
let mut chars_parsed = 1;
for ch in chars {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
if ch.is_alphabetic() || ch.is_ascii_digit() {
buf.push(ch);
} else {
// This must be the end of the type. We're done!
break;
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// After the first character, only these are allowed:
//
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
if ch.is_alphabetic() || ch.is_ascii_digit() {
buf.push(ch);
} else {
// This must be the end of the type. We're done!
break;
}
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
chars_parsed += 1;
}
let state = state.advance_without_indenting(chars_parsed)?;
let answer = TypeAnnotation::BoundVariable(buf.into_bump_str());
Ok((answer, state))
}
fn malformed<'a, I>(
fn malformed<'a>(
opt_bad_char: Option<char>,
arena: &'a Bump,
state: State<'a>,
chars: &mut I,
mut state: State<'a>,
parts: Vec<&'a str>,
) -> ParseResult<'a, TypeAnnotation<'a>>
where
I: Iterator<Item = char>,
{
) -> ParseResult<'a, TypeAnnotation<'a>> {
// Reconstruct the original string that we've been parsing.
let mut full_string = String::new_in(arena);
@ -423,20 +416,25 @@ where
}
// Consume the remaining chars in the identifier.
for ch in chars {
// We can't use ch.is_alphanumeric() here because that passes for
// things that are "numeric" but not ASCII digits, like `¾`
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
full_string.push(ch);
} else {
break;
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, bytes_parsed)) => {
// We can't use ch.is_alphanumeric() here because that passes for
// things that are "numeric" but not ASCII digits, like `¾`
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
full_string.push(ch);
} else {
break;
}
state = state.advance_without_indenting(bytes_parsed)?;
}
Err(reason) => return state.fail(reason),
}
}
let chars_parsed = full_string.len();
Ok((
TypeAnnotation::Malformed(full_string.into_bump_str()),
state.advance_without_indenting(chars_parsed)?,
state,
))
}

View File

@ -13,7 +13,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View File

@ -918,17 +918,12 @@ mod test_parse {
let arena = Bump::new();
let arg = arena.alloc(Located::new(0, 0, 5, 6, Num("1")));
let args = bumpalo::vec![in &arena; &*arg];
let expr = Var {
module_name: "",
ident: "whee",
};
let expected = Expr::Apply(
arena.alloc(Located::new(
0,
0,
0,
4,
Var {
module_name: "",
ident: "whee",
},
)),
arena.alloc(Located::new(0, 0, 0, 4, expr)),
args,
CalledVia::Space,
);
@ -1040,16 +1035,11 @@ mod test_parse {
fn unary_negation() {
let arena = Bump::new();
let loc_op = Located::new(0, 0, 0, 1, UnaryOp::Negate);
let loc_arg1_expr = Located::new(
0,
0,
1,
4,
Var {
module_name: "",
ident: "foo",
},
);
let arg1_expr = Var {
module_name: "",
ident: "foo",
};
let loc_arg1_expr = Located::new(0, 0, 1, 4, arg1_expr);
let expected = UnaryOp(arena.alloc(loc_arg1_expr), loc_op);
let actual = parse_with(&arena, "-foo");
@ -1060,16 +1050,11 @@ mod test_parse {
fn unary_not() {
let arena = Bump::new();
let loc_op = Located::new(0, 0, 0, 1, UnaryOp::Not);
let loc_arg1_expr = Located::new(
0,
0,
1,
5,
Var {
module_name: "",
ident: "blah",
},
);
let arg1_expr = Var {
module_name: "",
ident: "blah",
};
let loc_arg1_expr = Located::new(0, 0, 1, 5, arg1_expr);
let expected = UnaryOp(arena.alloc(loc_arg1_expr), loc_op);
let actual = parse_with(&arena, "!blah");
@ -2092,7 +2077,7 @@ mod test_parse {
"#
);
let actual = interface_header()
.parse(&arena, State::new(&src, Attempting::Module))
.parse(&arena, State::new(src.as_bytes(), Attempting::Module))
.map(|tuple| tuple.0);
assert_eq!(Ok(expected), actual);
@ -2121,7 +2106,7 @@ mod test_parse {
"#
);
let actual = interface_header()
.parse(&arena, State::new(&src, Attempting::Module))
.parse(&arena, State::new(src.as_bytes(), Attempting::Module))
.map(|tuple| tuple.0);
assert_eq!(Ok(expected), actual);
@ -2174,7 +2159,7 @@ mod test_parse {
"#
);
let actual = module_defs()
.parse(&arena, State::new(&src, Attempting::Module))
.parse(&arena, State::new(src.as_bytes(), Attempting::Module))
.map(|tuple| tuple.0);
assert_eq!(Ok(expected), actual);

View File

@ -91,7 +91,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View File

@ -93,7 +93,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);

View File

@ -93,7 +93,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
#[allow(dead_code)]
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
let state = State::new(&input, Attempting::Module);
let state = State::new(input.as_bytes(), Attempting::Module);
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
let answer = parser.parse(&arena, state);