mirror of
https://github.com/roc-lang/roc.git
synced 2024-09-22 00:09:33 +03:00
Merge pull request #432 from rtfeldman/utf8
Lazily validate UTF-8 when parsing
This commit is contained in:
commit
8b3dd6c90c
7
Cargo.lock
generated
7
Cargo.lock
generated
@ -527,6 +527,12 @@ version = "1.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
|
||||
|
||||
[[package]]
|
||||
name = "encode_unicode"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.6.2"
|
||||
@ -2229,6 +2235,7 @@ name = "roc_parse"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"encode_unicode",
|
||||
"indoc",
|
||||
"inlinable_string",
|
||||
"pretty_assertions",
|
||||
|
@ -33,6 +33,7 @@ use roc_types::types::Type;
|
||||
use std::hash::Hash;
|
||||
use std::io::{self, Write};
|
||||
use std::path::PathBuf;
|
||||
use std::str::from_utf8_unchecked;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
pub fn main() -> io::Result<()> {
|
||||
@ -145,7 +146,7 @@ fn report_parse_error(fail: Fail) {
|
||||
}
|
||||
|
||||
fn print_output(src: &str) -> Result<String, Fail> {
|
||||
gen(src, Triple::host(), OptLevel::Normal).map(|(answer, answer_type)| {
|
||||
gen(src.as_bytes(), Triple::host(), OptLevel::Normal).map(|(answer, answer_type)| {
|
||||
format!("\n{} \u{001b}[35m:\u{001b}[0m {}", answer, answer_type)
|
||||
})
|
||||
}
|
||||
@ -154,7 +155,7 @@ pub fn repl_home() -> ModuleId {
|
||||
ModuleIds::default().get_or_insert(&"REPL".into())
|
||||
}
|
||||
|
||||
pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, String), Fail> {
|
||||
pub fn gen(src: &[u8], target: Triple, opt_level: OptLevel) -> Result<(String, String), Fail> {
|
||||
use roc_reporting::report::{can_problem, type_problem, RocDocAllocator, DEFAULT_PALETTE};
|
||||
|
||||
// Look up the types and expressions of the `provided` values
|
||||
@ -169,13 +170,16 @@ pub fn gen(src: &str, target: Triple, opt_level: OptLevel) -> Result<(String, St
|
||||
interns,
|
||||
problems: can_problems,
|
||||
..
|
||||
} = can_expr(src)?;
|
||||
} = can_expr(src)?; // IMPORTANT: we must bail out here if there were UTF-8 errors!
|
||||
|
||||
let subs = Subs::new(var_store.into());
|
||||
let mut type_problems = Vec::new();
|
||||
let (content, mut subs) = infer_expr(subs, &mut type_problems, &constraint, var);
|
||||
|
||||
// SAFETY: we've already verified that this is valid UTF-8 during parsing.
|
||||
let src_lines: Vec<&str> = unsafe { from_utf8_unchecked(src).split('\n').collect() };
|
||||
|
||||
// Report problems
|
||||
let src_lines: Vec<&str> = src.split('\n').collect();
|
||||
let palette = DEFAULT_PALETTE;
|
||||
|
||||
// Report parsing and canonicalization problems
|
||||
@ -386,8 +390,11 @@ pub fn infer_expr(
|
||||
(content, solved.into_inner())
|
||||
}
|
||||
|
||||
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
pub fn parse_loc_with<'a>(
|
||||
arena: &'a Bump,
|
||||
bytes: &'a [u8],
|
||||
) -> Result<Located<ast::Expr<'a>>, Fail> {
|
||||
let state = State::new(&bytes, Attempting::Module);
|
||||
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
@ -396,14 +403,14 @@ pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast
|
||||
.map_err(|(fail, _)| fail)
|
||||
}
|
||||
|
||||
pub fn can_expr(expr_str: &str) -> Result<CanExprOut, Fail> {
|
||||
can_expr_with(&Bump::new(), repl_home(), expr_str)
|
||||
pub fn can_expr(expr_bytes: &[u8]) -> Result<CanExprOut, Fail> {
|
||||
can_expr_with(&Bump::new(), repl_home(), expr_bytes)
|
||||
}
|
||||
|
||||
// TODO make this return a named struct instead of a big tuple
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub fn uniq_expr(
|
||||
expr_str: &str,
|
||||
expr_bytes: &[u8],
|
||||
) -> Result<
|
||||
(
|
||||
Located<roc_can::expr::Expr>,
|
||||
@ -419,14 +426,14 @@ pub fn uniq_expr(
|
||||
> {
|
||||
let declared_idents: &ImMap<Ident, (Symbol, Region)> = &ImMap::default();
|
||||
|
||||
uniq_expr_with(&Bump::new(), expr_str, declared_idents)
|
||||
uniq_expr_with(&Bump::new(), expr_bytes, declared_idents)
|
||||
}
|
||||
|
||||
// TODO make this return a named struct instead of a big tuple
|
||||
#[allow(clippy::type_complexity)]
|
||||
pub fn uniq_expr_with(
|
||||
arena: &Bump,
|
||||
expr_str: &str,
|
||||
expr_bytes: &[u8],
|
||||
declared_idents: &ImMap<Ident, (Symbol, Region)>,
|
||||
) -> Result<
|
||||
(
|
||||
@ -450,7 +457,7 @@ pub fn uniq_expr_with(
|
||||
var,
|
||||
interns,
|
||||
..
|
||||
} = can_expr_with(arena, home, expr_str)?;
|
||||
} = can_expr_with(arena, home, expr_bytes)?;
|
||||
|
||||
// double check
|
||||
let mut var_store = VarStore::new(old_var_store.fresh());
|
||||
@ -505,8 +512,8 @@ pub struct CanExprOut {
|
||||
pub constraint: Constraint,
|
||||
}
|
||||
|
||||
pub fn can_expr_with(arena: &Bump, home: ModuleId, expr_str: &str) -> Result<CanExprOut, Fail> {
|
||||
let loc_expr = parse_loc_with(&arena, expr_str)?;
|
||||
pub fn can_expr_with(arena: &Bump, home: ModuleId, expr_bytes: &[u8]) -> Result<CanExprOut, Fail> {
|
||||
let loc_expr = parse_loc_with(&arena, expr_bytes)?;
|
||||
let mut var_store = VarStore::default();
|
||||
let var = var_store.fresh();
|
||||
let expected = Expected::NoExpectation(Type::Variable(var));
|
||||
|
@ -27,7 +27,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let state = State::new(input.as_bytes(), Attempting::Module);
|
||||
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
|
@ -20,7 +20,7 @@ mod test_fmt {
|
||||
use roc_parse::parser::{Fail, Parser, State};
|
||||
|
||||
fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Expr<'a>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let state = State::new(input.as_bytes(), Attempting::Module);
|
||||
let parser = space0_before(loc!(roc_parse::expr::expr(0)), 0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
@ -55,7 +55,7 @@ mod test_fmt {
|
||||
let src = src.trim_end();
|
||||
let expected = expected.trim_end();
|
||||
|
||||
match module::header().parse(&arena, State::new(&src, Attempting::Module)) {
|
||||
match module::header().parse(&arena, State::new(src.as_bytes(), Attempting::Module)) {
|
||||
Ok((actual, state)) => {
|
||||
let mut buf = String::new_in(&arena);
|
||||
|
||||
|
@ -87,7 +87,7 @@ pub fn infer_expr(
|
||||
}
|
||||
|
||||
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let state = State::new(input.as_bytes(), Attempting::Module);
|
||||
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
|
@ -19,9 +19,10 @@ use roc_solve::solve;
|
||||
use roc_types::solved_types::Solved;
|
||||
use roc_types::subs::{Subs, VarStore, Variable};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::fs::read_to_string;
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::from_utf8_unchecked;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use tokio::sync::mpsc;
|
||||
use tokio::task::spawn_blocking;
|
||||
@ -63,7 +64,7 @@ struct ModuleHeader {
|
||||
imported_modules: MutSet<ModuleId>,
|
||||
exposes: Vec<Symbol>,
|
||||
exposed_imports: MutMap<Ident, (Symbol, Region)>,
|
||||
src: Box<str>,
|
||||
src: Box<[u8]>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -526,58 +527,70 @@ fn load_module(
|
||||
load_filename(filename, msg_tx, module_ids)
|
||||
}
|
||||
|
||||
fn parse_src(
|
||||
filename: PathBuf,
|
||||
msg_tx: MsgSender,
|
||||
module_ids: SharedModules<'_, '_>,
|
||||
src_bytes: &[u8],
|
||||
) -> Result<ModuleId, LoadingProblem> {
|
||||
let state = State::new(src_bytes, Attempting::Module);
|
||||
let arena = Bump::new();
|
||||
|
||||
// TODO figure out if there's a way to address this clippy error
|
||||
// without introducing a borrow error. ("let and return" is literally
|
||||
// what the borrow checker suggested using here to fix the problem, so...)
|
||||
#[allow(clippy::let_and_return)]
|
||||
let answer = match roc_parse::module::header().parse(&arena, state) {
|
||||
Ok((ast::Module::Interface { header }, state)) => {
|
||||
let module_id = send_header(
|
||||
header.name,
|
||||
header.exposes.into_bump_slice(),
|
||||
header.imports.into_bump_slice(),
|
||||
state,
|
||||
module_ids,
|
||||
msg_tx,
|
||||
);
|
||||
|
||||
Ok(module_id)
|
||||
}
|
||||
Ok((ast::Module::App { header }, state)) => match module_ids {
|
||||
MaybeShared::Shared(_, _) => {
|
||||
// If this is Shared, it means we're trying to import
|
||||
// an app module which is not the root. Not alllowed!
|
||||
Err(LoadingProblem::TriedToImportAppModule)
|
||||
}
|
||||
unique_modules @ MaybeShared::Unique(_, _) => {
|
||||
let module_id = send_header(
|
||||
header.name,
|
||||
header.provides.into_bump_slice(),
|
||||
header.imports.into_bump_slice(),
|
||||
state,
|
||||
unique_modules,
|
||||
msg_tx,
|
||||
);
|
||||
|
||||
Ok(module_id)
|
||||
}
|
||||
},
|
||||
Err((fail, _)) => Err(LoadingProblem::ParsingFailed { filename, fail }),
|
||||
};
|
||||
|
||||
answer
|
||||
}
|
||||
|
||||
/// Load a module by its filename
|
||||
///
|
||||
/// This has two unsafe calls:
|
||||
///
|
||||
/// * memory map the filename instead of doing a buffered read
|
||||
/// * assume the contents of the file are valid UTF-8
|
||||
fn load_filename(
|
||||
filename: PathBuf,
|
||||
msg_tx: MsgSender,
|
||||
module_ids: SharedModules<'_, '_>,
|
||||
) -> Result<ModuleId, LoadingProblem> {
|
||||
match read_to_string(&filename) {
|
||||
Ok(src) => {
|
||||
let arena = Bump::new();
|
||||
let state = State::new(&src, Attempting::Module);
|
||||
|
||||
// TODO figure out if there's a way to address this clippy error
|
||||
// without introducing a borrow error. ("let and return" is literally
|
||||
// what the borrow checker suggested using here to fix the problem, so...)
|
||||
#[allow(clippy::let_and_return)]
|
||||
let answer = match roc_parse::module::header().parse(&arena, state) {
|
||||
Ok((ast::Module::Interface { header }, state)) => {
|
||||
let module_id = send_header(
|
||||
header.name,
|
||||
header.exposes.into_bump_slice(),
|
||||
header.imports.into_bump_slice(),
|
||||
state,
|
||||
module_ids,
|
||||
msg_tx,
|
||||
);
|
||||
|
||||
Ok(module_id)
|
||||
}
|
||||
Ok((ast::Module::App { header }, state)) => match module_ids {
|
||||
MaybeShared::Shared(_, _) => {
|
||||
// If this is Shared, it means we're trying to import
|
||||
// an app module which is not the root. Not alllowed!
|
||||
Err(LoadingProblem::TriedToImportAppModule)
|
||||
}
|
||||
unique_modules @ MaybeShared::Unique(_, _) => {
|
||||
let module_id = send_header(
|
||||
header.name,
|
||||
header.provides.into_bump_slice(),
|
||||
header.imports.into_bump_slice(),
|
||||
state,
|
||||
unique_modules,
|
||||
msg_tx,
|
||||
);
|
||||
|
||||
Ok(module_id)
|
||||
}
|
||||
},
|
||||
Err((fail, _)) => Err(LoadingProblem::ParsingFailed { filename, fail }),
|
||||
};
|
||||
|
||||
answer
|
||||
}
|
||||
match fs::read(&filename) {
|
||||
Ok(bytes) => parse_src(filename, msg_tx, module_ids, bytes.as_ref()),
|
||||
Err(err) => Err(LoadingProblem::FileProblem {
|
||||
filename,
|
||||
error: err.kind(),
|
||||
@ -746,7 +759,7 @@ fn send_header<'a>(
|
||||
|
||||
// Box up the input &str for transfer over the wire.
|
||||
// We'll need this in order to continue parsing later.
|
||||
let src: Box<str> = state.input.to_string().into();
|
||||
let src: Box<[u8]> = state.bytes.into();
|
||||
|
||||
// Send the deps to the coordinator thread for processing,
|
||||
// then continue on to parsing and canonicalizing defs.
|
||||
@ -961,7 +974,7 @@ fn parse_and_constrain(
|
||||
|
||||
let (parsed_defs, _) = module_defs()
|
||||
.parse(&arena, state)
|
||||
.expect("TODO gracefully handle parse error on module defs");
|
||||
.expect("TODO gracefully handle parse error on module defs. IMPORTANT: Bail out entirely if there are any BadUtf8 problems! That means the whole source file is not valid UTF-8 and any other errors we report may get mis-reported. We rely on this for safety in an `unsafe` block later on in this function.");
|
||||
|
||||
let (module, declarations, ident_ids, constraint, problems) = match canonicalize_module_defs(
|
||||
&arena,
|
||||
@ -1001,9 +1014,13 @@ fn parse_and_constrain(
|
||||
}
|
||||
};
|
||||
|
||||
let src = header.src;
|
||||
let imported_modules = header.imported_modules;
|
||||
|
||||
// SAFETY: By this point we've already incrementally verified that there
|
||||
// are no UTF-8 errors in these bytes. If there had been any UTF-8 errors,
|
||||
// we'd have bailed out before now.
|
||||
let src: Box<str> = unsafe { from_utf8_unchecked(header.src.as_ref()).to_string().into() };
|
||||
|
||||
tokio::spawn(async move {
|
||||
let mut tx = msg_tx;
|
||||
|
||||
|
@ -92,7 +92,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let state = State::new(input.as_bytes(), Attempting::Module);
|
||||
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
|
@ -53,7 +53,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let state = State::new(input.as_bytes(), Attempting::Module);
|
||||
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
|
@ -11,6 +11,7 @@ roc_region = { path = "../region" }
|
||||
roc_module = { path = "../module" }
|
||||
bumpalo = { version = "3.2", features = ["collections"] }
|
||||
inlinable_string = "0.1"
|
||||
encode_unicode = "0.3"
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = "0.5.1"
|
||||
|
@ -1,6 +1,8 @@
|
||||
use crate::ast::CommentOrNewline::{self, *};
|
||||
use crate::ast::Spaceable;
|
||||
use crate::parser::{self, and, unexpected, unexpected_eof, Parser, State};
|
||||
use crate::parser::{
|
||||
self, and, peek_utf8_char, unexpected, unexpected_eof, FailReason, Parser, State,
|
||||
};
|
||||
use bumpalo::collections::string::String;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
@ -216,147 +218,179 @@ fn spaces<'a>(
|
||||
) -> impl Parser<'a, &'a [CommentOrNewline<'a>]> {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let original_state = state.clone();
|
||||
let chars = state.input.chars().peekable();
|
||||
let mut space_list = Vec::new_in(arena);
|
||||
let mut chars_parsed = 0;
|
||||
let mut bytes_parsed = 0;
|
||||
let mut comment_line_buf = String::new_in(arena);
|
||||
let mut line_state = LineState::Normal;
|
||||
let mut state = state;
|
||||
let mut any_newlines = false;
|
||||
|
||||
for ch in chars {
|
||||
chars_parsed += 1;
|
||||
while !state.bytes.is_empty() {
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((ch, utf8_len)) => {
|
||||
bytes_parsed += utf8_len;
|
||||
|
||||
match line_state {
|
||||
LineState::Normal => {
|
||||
match ch {
|
||||
' ' => {
|
||||
// Don't check indentation here; it might not be enough
|
||||
// indentation yet, but maybe it will be after more spaces happen!
|
||||
state = state.advance_spaces(1)?;
|
||||
}
|
||||
'\r' => {
|
||||
// Ignore carriage returns.
|
||||
state = state.advance_spaces(1)?;
|
||||
}
|
||||
'\n' => {
|
||||
// No need to check indentation because we're about to reset it anyway.
|
||||
state = state.newline()?;
|
||||
match line_state {
|
||||
LineState::Normal => {
|
||||
match ch {
|
||||
' ' => {
|
||||
// Don't check indentation here; it might not be enough
|
||||
// indentation yet, but maybe it will be after more spaces happen!
|
||||
state = state.advance_spaces(1)?;
|
||||
}
|
||||
'\r' => {
|
||||
// Ignore carriage returns.
|
||||
state = state.advance_spaces(1)?;
|
||||
}
|
||||
'\n' => {
|
||||
// No need to check indentation because we're about to reset it anyway.
|
||||
state = state.newline()?;
|
||||
|
||||
// Newlines only get added to the list when they're outside comments.
|
||||
space_list.push(Newline);
|
||||
// Newlines only get added to the list when they're outside comments.
|
||||
space_list.push(Newline);
|
||||
|
||||
any_newlines = true;
|
||||
}
|
||||
'#' => {
|
||||
// Check indentation to make sure we were indented enough
|
||||
// before this comment began.
|
||||
state = state
|
||||
.check_indent(min_indent)
|
||||
.map_err(|(fail, _)| (fail, original_state.clone()))?
|
||||
.advance_without_indenting(1)?;
|
||||
|
||||
// We're now parsing a line comment!
|
||||
line_state = LineState::Comment;
|
||||
}
|
||||
nonblank => {
|
||||
return if require_at_least_one && chars_parsed <= 1 {
|
||||
// We've parsed 1 char and it was not a space,
|
||||
// but we require parsing at least one space!
|
||||
Err(unexpected(nonblank, 0, state.clone(), state.attempting))
|
||||
} else {
|
||||
// First make sure we were indented enough!
|
||||
//
|
||||
// (We only do this if we've encountered any newlines.
|
||||
// Otherwise, we assume indentation is already correct.
|
||||
// It's actively important for correctness that we skip
|
||||
// this check if there are no newlines, because otherwise
|
||||
// we would have false positives for single-line defs.)
|
||||
if any_newlines {
|
||||
any_newlines = true;
|
||||
}
|
||||
'#' => {
|
||||
// Check indentation to make sure we were indented enough
|
||||
// before this comment began.
|
||||
state = state
|
||||
.check_indent(min_indent)
|
||||
.map_err(|(fail, _)| (fail, original_state))?;
|
||||
.map_err(|(fail, _)| (fail, original_state.clone()))?
|
||||
.advance_without_indenting(1)?;
|
||||
|
||||
// We're now parsing a line comment!
|
||||
line_state = LineState::Comment;
|
||||
}
|
||||
|
||||
Ok((space_list.into_bump_slice(), state))
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
LineState::Comment => {
|
||||
match ch {
|
||||
' ' => {
|
||||
// If we're in a line comment, this won't affect indentation anyway.
|
||||
state = state.advance_without_indenting(1)?;
|
||||
|
||||
if comment_line_buf.len() == 1 {
|
||||
match comment_line_buf.chars().next() {
|
||||
Some('#') => {
|
||||
// This is a comment begining with `## ` - that is,
|
||||
// a doc comment.
|
||||
_ => {
|
||||
return if require_at_least_one && bytes_parsed <= 1 {
|
||||
// We've parsed 1 char and it was not a space,
|
||||
// but we require parsing at least one space!
|
||||
Err(unexpected(0, state.clone(), state.attempting))
|
||||
} else {
|
||||
// First make sure we were indented enough!
|
||||
//
|
||||
// (The space is important; otherwise, this is not
|
||||
// a doc comment, but rather something like a
|
||||
// big separator block, e.g. ############)
|
||||
line_state = LineState::DocComment;
|
||||
// (We only do this if we've encountered any newlines.
|
||||
// Otherwise, we assume indentation is already correct.
|
||||
// It's actively important for correctness that we skip
|
||||
// this check if there are no newlines, because otherwise
|
||||
// we would have false positives for single-line defs.)
|
||||
if any_newlines {
|
||||
state = state
|
||||
.check_indent(min_indent)
|
||||
.map_err(|(fail, _)| (fail, original_state))?;
|
||||
}
|
||||
|
||||
// This is now the beginning of the doc comment.
|
||||
comment_line_buf.clear();
|
||||
}
|
||||
_ => {
|
||||
Ok((space_list.into_bump_slice(), state))
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
LineState::Comment => {
|
||||
match ch {
|
||||
' ' => {
|
||||
// If we're in a line comment, this won't affect indentation anyway.
|
||||
state = state.advance_without_indenting(1)?;
|
||||
|
||||
if comment_line_buf.len() == 1 {
|
||||
match comment_line_buf.chars().next() {
|
||||
Some('#') => {
|
||||
// This is a comment begining with `## ` - that is,
|
||||
// a doc comment.
|
||||
//
|
||||
// (The space is important; otherwise, this is not
|
||||
// a doc comment, but rather something like a
|
||||
// big separator block, e.g. ############)
|
||||
line_state = LineState::DocComment;
|
||||
|
||||
// This is now the beginning of the doc comment.
|
||||
comment_line_buf.clear();
|
||||
}
|
||||
_ => {
|
||||
comment_line_buf.push(ch);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
comment_line_buf.push(ch);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
comment_line_buf.push(ch);
|
||||
'\n' => {
|
||||
state = state.newline()?;
|
||||
|
||||
// This was a newline, so end this line comment.
|
||||
space_list.push(LineComment(comment_line_buf.into_bump_str()));
|
||||
comment_line_buf = String::new_in(arena);
|
||||
|
||||
line_state = LineState::Normal;
|
||||
}
|
||||
nonblank => {
|
||||
// Chars can have btye lengths of more than 1!
|
||||
state = state.advance_without_indenting(nonblank.len_utf8())?;
|
||||
|
||||
comment_line_buf.push(nonblank);
|
||||
}
|
||||
}
|
||||
}
|
||||
'\n' => {
|
||||
state = state.newline()?;
|
||||
LineState::DocComment => {
|
||||
match ch {
|
||||
' ' => {
|
||||
// If we're in a doc comment, this won't affect indentation anyway.
|
||||
state = state.advance_without_indenting(1)?;
|
||||
|
||||
// This was a newline, so end this line comment.
|
||||
space_list.push(LineComment(comment_line_buf.into_bump_str()));
|
||||
comment_line_buf = String::new_in(arena);
|
||||
comment_line_buf.push(ch);
|
||||
}
|
||||
'\n' => {
|
||||
state = state.newline()?;
|
||||
|
||||
line_state = LineState::Normal;
|
||||
}
|
||||
nonblank => {
|
||||
// Chars can have btye lengths of more than 1!
|
||||
state = state.advance_without_indenting(nonblank.len_utf8())?;
|
||||
// This was a newline, so end this doc comment.
|
||||
space_list.push(DocComment(comment_line_buf.into_bump_str()));
|
||||
comment_line_buf = String::new_in(arena);
|
||||
|
||||
comment_line_buf.push(nonblank);
|
||||
line_state = LineState::Normal;
|
||||
}
|
||||
nonblank => {
|
||||
state = state.advance_without_indenting(utf8_len)?;
|
||||
|
||||
comment_line_buf.push(nonblank);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
LineState::DocComment => {
|
||||
match ch {
|
||||
' ' => {
|
||||
// If we're in a doc comment, this won't affect indentation anyway.
|
||||
state = state.advance_without_indenting(1)?;
|
||||
Err(FailReason::BadUtf8) => {
|
||||
// If we hit an invalid UTF-8 character, bail out immediately.
|
||||
return state.fail(FailReason::BadUtf8);
|
||||
}
|
||||
Err(_) => {
|
||||
if require_at_least_one && bytes_parsed == 0 {
|
||||
return Err(unexpected_eof(0, state.attempting, state));
|
||||
} else {
|
||||
let space_slice = space_list.into_bump_slice();
|
||||
|
||||
comment_line_buf.push(ch);
|
||||
// First make sure we were indented enough!
|
||||
//
|
||||
// (We only do this if we've encountered any newlines.
|
||||
// Otherwise, we assume indentation is already correct.
|
||||
// It's actively important for correctness that we skip
|
||||
// this check if there are no newlines, because otherwise
|
||||
// we would have false positives for single-line defs.)
|
||||
if any_newlines {
|
||||
return Ok((
|
||||
space_slice,
|
||||
state
|
||||
.check_indent(min_indent)
|
||||
.map_err(|(fail, _)| (fail, original_state))?,
|
||||
));
|
||||
}
|
||||
'\n' => {
|
||||
state = state.newline()?;
|
||||
|
||||
// This was a newline, so end this doc comment.
|
||||
space_list.push(DocComment(comment_line_buf.into_bump_str()));
|
||||
comment_line_buf = String::new_in(arena);
|
||||
|
||||
line_state = LineState::Normal;
|
||||
}
|
||||
nonblank => {
|
||||
// Chars can have btye lengths of more than 1!
|
||||
state = state.advance_without_indenting(nonblank.len_utf8())?;
|
||||
|
||||
comment_line_buf.push(nonblank);
|
||||
}
|
||||
return Ok((space_slice, state));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
if require_at_least_one && chars_parsed == 0 {
|
||||
// If we didn't parse anything, return unexpected EOF
|
||||
if require_at_least_one && original_state.bytes.len() == state.bytes.len() {
|
||||
Err(unexpected_eof(0, state.attempting, state))
|
||||
} else {
|
||||
// First make sure we were indented enough!
|
||||
|
@ -8,8 +8,8 @@ use crate::ident::{global_tag_or_ident, ident, lowercase_ident, Ident};
|
||||
use crate::keyword;
|
||||
use crate::number_literal::number_literal;
|
||||
use crate::parser::{
|
||||
self, allocated, char, fail, not, not_followed_by, optional, sep_by1, string, then, unexpected,
|
||||
unexpected_eof, Either, Fail, FailReason, ParseResult, Parser, State,
|
||||
self, allocated, ascii_char, ascii_string, fail, not, not_followed_by, optional, sep_by1, then,
|
||||
unexpected, unexpected_eof, Either, Fail, FailReason, ParseResult, Parser, State,
|
||||
};
|
||||
use crate::type_annotation;
|
||||
use bumpalo::collections::string::String;
|
||||
@ -22,7 +22,7 @@ pub fn expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
// Recursive parsers must not directly invoke functions which return (impl Parser),
|
||||
// as this causes rustc to stack overflow. Thus, parse_expr must be a
|
||||
// separate function which recurses by calling itself directly.
|
||||
move |arena, state| parse_expr(min_indent, arena, state)
|
||||
move |arena, state: State<'a>| parse_expr(min_indent, arena, state)
|
||||
}
|
||||
|
||||
macro_rules! loc_parenthetical_expr {
|
||||
@ -30,7 +30,7 @@ macro_rules! loc_parenthetical_expr {
|
||||
then(
|
||||
loc!(and!(
|
||||
between!(
|
||||
char('('),
|
||||
ascii_char('(' ),
|
||||
map_with_arena!(
|
||||
space0_around(
|
||||
loc!(move |arena, state| parse_expr($min_indent, arena, state)),
|
||||
@ -43,7 +43,7 @@ macro_rules! loc_parenthetical_expr {
|
||||
}
|
||||
}
|
||||
),
|
||||
char(')')
|
||||
ascii_char(')' )
|
||||
),
|
||||
optional(either!(
|
||||
// There may optionally be function args after the ')'
|
||||
@ -59,7 +59,7 @@ macro_rules! loc_parenthetical_expr {
|
||||
// as if there were any args they'd have consumed it anyway
|
||||
// e.g. in `((foo bar) baz.blah)` the `.blah` will be consumed by the `baz` parser
|
||||
either!(
|
||||
one_or_more!(skip_first!(char('.'), lowercase_ident())),
|
||||
one_or_more!(skip_first!(ascii_char('.' ), lowercase_ident())),
|
||||
and!(space0($min_indent), equals_with_indent())
|
||||
)
|
||||
))
|
||||
@ -170,7 +170,7 @@ pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
one_of!(
|
||||
map_with_arena!(
|
||||
and!(
|
||||
loc!(char('!')),
|
||||
loc!(ascii_char('!')),
|
||||
loc!(move |arena, state| parse_expr(min_indent, arena, state))
|
||||
),
|
||||
|arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located<Expr<'a>>)| {
|
||||
@ -179,7 +179,7 @@ pub fn unary_op<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
),
|
||||
map_with_arena!(
|
||||
and!(
|
||||
loc!(char('-')),
|
||||
loc!(ascii_char('-')),
|
||||
loc!(move |arena, state| parse_expr(min_indent, arena, state))
|
||||
),
|
||||
|arena: &'a Bump, (loc_op, loc_expr): (Located<()>, Located<Expr<'a>>)| {
|
||||
@ -450,9 +450,9 @@ pub fn loc_parenthetical_def<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
|
||||
let (loc_tuple, state) = loc!(and!(
|
||||
space0_after(
|
||||
between!(
|
||||
char('('),
|
||||
ascii_char('('),
|
||||
space0_around(loc_pattern(min_indent), min_indent),
|
||||
char(')')
|
||||
ascii_char(')')
|
||||
),
|
||||
min_indent,
|
||||
),
|
||||
@ -482,7 +482,7 @@ pub fn loc_parenthetical_def<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
|
||||
/// The '=' used in a def can't be followed by another '=' (or else it's actually
|
||||
/// an "==") and also it can't be followed by '>' (or else it's actually an "=>")
|
||||
fn equals_for_def<'a>() -> impl Parser<'a, ()> {
|
||||
not_followed_by(char('='), one_of!(char('='), char('>')))
|
||||
not_followed_by(ascii_char('='), one_of!(ascii_char('='), ascii_char('>')))
|
||||
}
|
||||
|
||||
/// A definition, consisting of one of these:
|
||||
@ -513,7 +513,7 @@ pub fn def<'a>(min_indent: u16) -> impl Parser<'a, Def<'a>> {
|
||||
),
|
||||
// Annotation
|
||||
skip_first!(
|
||||
char(':'),
|
||||
ascii_char(':'),
|
||||
// Spaces after the ':' (at a normal indentation level) and then the type.
|
||||
// The type itself must be indented more than the pattern and ':'
|
||||
space0_before(type_annotation::located(indented_more), indented_more)
|
||||
@ -811,12 +811,12 @@ fn loc_parse_function_arg<'a>(
|
||||
|
||||
fn reserved_keyword<'a>() -> impl Parser<'a, ()> {
|
||||
one_of!(
|
||||
string(keyword::IF),
|
||||
string(keyword::THEN),
|
||||
string(keyword::ELSE),
|
||||
string(keyword::WHEN),
|
||||
string(keyword::IS),
|
||||
string(keyword::AS)
|
||||
ascii_string(keyword::IF),
|
||||
ascii_string(keyword::THEN),
|
||||
ascii_string(keyword::ELSE),
|
||||
ascii_string(keyword::WHEN),
|
||||
ascii_string(keyword::IS),
|
||||
ascii_string(keyword::AS)
|
||||
)
|
||||
}
|
||||
|
||||
@ -824,7 +824,7 @@ fn closure<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
map_with_arena!(
|
||||
skip_first!(
|
||||
// All closures start with a '\' - e.g. (\x -> x + 1)
|
||||
char('\\'),
|
||||
ascii_char('\\'),
|
||||
// Once we see the '\', we're committed to parsing this as a closure.
|
||||
// It may turn out to be malformed, but it is definitely a closure.
|
||||
optional(and!(
|
||||
@ -833,13 +833,13 @@ fn closure<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
Attempting::ClosureParams,
|
||||
// Params are comma-separated
|
||||
sep_by1(
|
||||
char(','),
|
||||
ascii_char(','),
|
||||
space0_around(loc_closure_param(min_indent), min_indent)
|
||||
)
|
||||
),
|
||||
skip_first!(
|
||||
// Parse the -> which separates params from body
|
||||
string("->"),
|
||||
ascii_string("->"),
|
||||
// Parse the body
|
||||
attempt!(
|
||||
Attempting::ClosureBody,
|
||||
@ -877,9 +877,9 @@ fn parse_closure_param<'a>(
|
||||
// If you wrap it in parens, you can match any arbitrary pattern at all.
|
||||
// e.g. \User.UserId userId -> ...
|
||||
between!(
|
||||
char('('),
|
||||
ascii_char('('),
|
||||
space0_around(loc_pattern(min_indent), min_indent),
|
||||
char(')')
|
||||
ascii_char(')')
|
||||
)
|
||||
)
|
||||
.parse(arena, state)
|
||||
@ -903,9 +903,9 @@ fn loc_pattern<'a>(min_indent: u16) -> impl Parser<'a, Located<Pattern<'a>>> {
|
||||
|
||||
fn loc_parenthetical_pattern<'a>(min_indent: u16) -> impl Parser<'a, Located<Pattern<'a>>> {
|
||||
between!(
|
||||
char('('),
|
||||
ascii_char('('),
|
||||
move |arena, state| loc_pattern(min_indent).parse(arena, state),
|
||||
char(')')
|
||||
ascii_char(')')
|
||||
)
|
||||
}
|
||||
|
||||
@ -923,13 +923,13 @@ fn string_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
|
||||
}
|
||||
|
||||
fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
|
||||
map!(char('_'), |_| Pattern::Underscore)
|
||||
map!(ascii_char('_'), |_| Pattern::Underscore)
|
||||
}
|
||||
|
||||
fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
|
||||
then(
|
||||
collection!(
|
||||
char('{'),
|
||||
ascii_char('{'),
|
||||
move |arena: &'a bumpalo::Bump,
|
||||
state: crate::parser::State<'a>|
|
||||
-> crate::parser::ParseResult<'a, Located<crate::ast::Pattern<'a>>> {
|
||||
@ -947,10 +947,13 @@ fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
|
||||
// (This is true in both literals and types.)
|
||||
let (opt_loc_val, state) = crate::parser::optional(either!(
|
||||
skip_first!(
|
||||
char(':'),
|
||||
ascii_char(':'),
|
||||
space0_before(loc_pattern(min_indent), min_indent)
|
||||
),
|
||||
skip_first!(char('?'), space0_before(loc!(expr(min_indent)), min_indent))
|
||||
skip_first!(
|
||||
ascii_char('?'),
|
||||
space0_before(loc!(expr(min_indent)), min_indent)
|
||||
)
|
||||
))
|
||||
.parse(arena, state)?;
|
||||
|
||||
@ -987,8 +990,8 @@ fn record_destructure<'a>(min_indent: u16) -> impl Parser<'a, Pattern<'a>> {
|
||||
|
||||
Ok((answer, state))
|
||||
},
|
||||
char(','),
|
||||
char('}'),
|
||||
ascii_char(','),
|
||||
ascii_char('}'),
|
||||
min_indent
|
||||
),
|
||||
move |_arena, state, loc_patterns| {
|
||||
@ -1109,7 +1112,7 @@ mod when {
|
||||
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
|
||||
min_indent,
|
||||
),
|
||||
string(keyword::IS)
|
||||
ascii_string(keyword::IS)
|
||||
)
|
||||
)
|
||||
),
|
||||
@ -1132,7 +1135,7 @@ mod when {
|
||||
/// Parsing when with indentation.
|
||||
fn when_with_indent<'a>() -> impl Parser<'a, u16> {
|
||||
move |arena, state: State<'a>| {
|
||||
string(keyword::WHEN)
|
||||
ascii_string(keyword::WHEN)
|
||||
.parse(arena, state)
|
||||
.map(|((), state)| (state.indent_col, state))
|
||||
}
|
||||
@ -1185,7 +1188,7 @@ mod when {
|
||||
}
|
||||
);
|
||||
|
||||
loop {
|
||||
while !state.bytes.is_empty() {
|
||||
match branch_parser.parse(arena, state) {
|
||||
Ok((next_output, next_state)) => {
|
||||
state = next_state;
|
||||
@ -1210,11 +1213,11 @@ mod when {
|
||||
) -> impl Parser<'a, (Vec<'a, Located<Pattern<'a>>>, Option<Located<Expr<'a>>>)> {
|
||||
and!(
|
||||
sep_by1(
|
||||
char('|'),
|
||||
ascii_char('|'),
|
||||
space0_around(loc_pattern(min_indent), min_indent),
|
||||
),
|
||||
optional(skip_first!(
|
||||
string(keyword::IF),
|
||||
ascii_string(keyword::IF),
|
||||
// TODO we should require space before the expression but not after
|
||||
space1_around(
|
||||
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
|
||||
@ -1240,7 +1243,7 @@ mod when {
|
||||
/// Parsing the righthandside of a branch in a when conditional.
|
||||
fn branch_result<'a>(indent: u16) -> impl Parser<'a, Located<Expr<'a>>> {
|
||||
skip_first!(
|
||||
string("->"),
|
||||
ascii_string("->"),
|
||||
space0_before(
|
||||
loc!(move |arena, state| parse_expr(indent, arena, state)),
|
||||
indent,
|
||||
@ -1253,7 +1256,7 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
map_with_arena!(
|
||||
and!(
|
||||
skip_first!(
|
||||
string(keyword::IF),
|
||||
ascii_string(keyword::IF),
|
||||
space1_around(
|
||||
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
|
||||
min_indent,
|
||||
@ -1261,14 +1264,14 @@ pub fn if_expr<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
),
|
||||
and!(
|
||||
skip_first!(
|
||||
string(keyword::THEN),
|
||||
ascii_string(keyword::THEN),
|
||||
space1_around(
|
||||
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
|
||||
min_indent,
|
||||
)
|
||||
),
|
||||
skip_first!(
|
||||
string(keyword::ELSE),
|
||||
ascii_string(keyword::ELSE),
|
||||
space1_before(
|
||||
loc!(move |arena, state| parse_expr(min_indent, arena, state)),
|
||||
min_indent,
|
||||
@ -1310,10 +1313,15 @@ fn unary_negate_function_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<Exp
|
||||
// Try to parse a number literal *before* trying to parse unary negate,
|
||||
// because otherwise (foo -1) will parse as (foo (Num.neg 1))
|
||||
loc!(number_literal()),
|
||||
loc!(char('-'))
|
||||
loc!(ascii_char('-'))
|
||||
)
|
||||
),
|
||||
one_of!(char(' '), char('#'), char('\n'), char('>')),
|
||||
one_of!(
|
||||
ascii_char(' '),
|
||||
ascii_char('#'),
|
||||
ascii_char('\n'),
|
||||
ascii_char('>')
|
||||
),
|
||||
),
|
||||
move |arena, state, (spaces, num_or_minus_char)| {
|
||||
match num_or_minus_char {
|
||||
@ -1530,17 +1538,15 @@ pub fn ident_without_apply<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||
/// Like equals_for_def(), except it produces the indent_col of the state rather than ()
|
||||
pub fn equals_with_indent<'a>() -> impl Parser<'a, u16> {
|
||||
move |_arena, state: State<'a>| {
|
||||
let mut iter = state.input.chars();
|
||||
|
||||
match iter.next() {
|
||||
Some(ch) if ch == '=' => {
|
||||
match iter.peekable().peek() {
|
||||
match state.bytes.first() {
|
||||
Some(&byte) if byte == b'=' => {
|
||||
match state.bytes.get(1) {
|
||||
// The '=' must not be followed by another `=` or `>`
|
||||
// (See equals_for_def() for explanation)
|
||||
Some(next_ch) if next_ch != &'=' && next_ch != &'>' => {
|
||||
Some(&next_byte) if next_byte != b'=' && next_byte != b'>' => {
|
||||
Ok((state.indent_col, state.advance_without_indenting(1)?))
|
||||
}
|
||||
Some(next_ch) => Err(unexpected(*next_ch, 0, state, Attempting::Def)),
|
||||
Some(_) => Err(unexpected(0, state, Attempting::Def)),
|
||||
None => Err(unexpected_eof(
|
||||
1,
|
||||
Attempting::Def,
|
||||
@ -1548,21 +1554,17 @@ pub fn equals_with_indent<'a>() -> impl Parser<'a, u16> {
|
||||
)),
|
||||
}
|
||||
}
|
||||
Some(ch) => Err(unexpected(ch, 0, state, Attempting::Def)),
|
||||
Some(_) => Err(unexpected(0, state, Attempting::Def)),
|
||||
None => Err(unexpected_eof(0, Attempting::Def, state)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn colon_with_indent<'a>() -> impl Parser<'a, u16> {
|
||||
move |_arena, state: State<'a>| {
|
||||
let mut iter = state.input.chars();
|
||||
|
||||
match iter.next() {
|
||||
Some(ch) if ch == ':' => Ok((state.indent_col, state.advance_without_indenting(1)?)),
|
||||
Some(ch) => Err(unexpected(ch, 0, state, Attempting::Def)),
|
||||
None => Err(unexpected_eof(0, Attempting::Def, state)),
|
||||
}
|
||||
move |_arena, state: State<'a>| match state.bytes.first() {
|
||||
Some(&byte) if byte == b':' => Ok((state.indent_col, state.advance_without_indenting(1)?)),
|
||||
Some(_) => Err(unexpected(0, state, Attempting::Def)),
|
||||
None => Err(unexpected_eof(0, Attempting::Def, state)),
|
||||
}
|
||||
}
|
||||
|
||||
@ -1606,32 +1608,32 @@ fn binop<'a>() -> impl Parser<'a, BinOp> {
|
||||
// with other valid operators (e.g. "<=" begins with "<") must
|
||||
// come before the shorter ones; otherwise, they will never
|
||||
// be reached because the shorter one will pass and consume!
|
||||
map!(string("|>"), |_| BinOp::Pizza),
|
||||
map!(string("=="), |_| BinOp::Equals),
|
||||
map!(string("!="), |_| BinOp::NotEquals),
|
||||
map!(string("&&"), |_| BinOp::And),
|
||||
map!(string("||"), |_| BinOp::Or),
|
||||
map!(char('+'), |_| BinOp::Plus),
|
||||
map!(char('*'), |_| BinOp::Star),
|
||||
map!(char('-'), |_| BinOp::Minus),
|
||||
map!(string("//"), |_| BinOp::DoubleSlash),
|
||||
map!(char('/'), |_| BinOp::Slash),
|
||||
map!(string("<="), |_| BinOp::LessThanOrEq),
|
||||
map!(char('<'), |_| BinOp::LessThan),
|
||||
map!(string(">="), |_| BinOp::GreaterThanOrEq),
|
||||
map!(char('>'), |_| BinOp::GreaterThan),
|
||||
map!(char('^'), |_| BinOp::Caret),
|
||||
map!(string("%%"), |_| BinOp::DoublePercent),
|
||||
map!(char('%'), |_| BinOp::Percent)
|
||||
map!(ascii_string("|>"), |_| BinOp::Pizza),
|
||||
map!(ascii_string("=="), |_| BinOp::Equals),
|
||||
map!(ascii_string("!="), |_| BinOp::NotEquals),
|
||||
map!(ascii_string("&&"), |_| BinOp::And),
|
||||
map!(ascii_string("||"), |_| BinOp::Or),
|
||||
map!(ascii_char('+'), |_| BinOp::Plus),
|
||||
map!(ascii_char('*'), |_| BinOp::Star),
|
||||
map!(ascii_char('-'), |_| BinOp::Minus),
|
||||
map!(ascii_string("//"), |_| BinOp::DoubleSlash),
|
||||
map!(ascii_char('/'), |_| BinOp::Slash),
|
||||
map!(ascii_string("<="), |_| BinOp::LessThanOrEq),
|
||||
map!(ascii_char('<'), |_| BinOp::LessThan),
|
||||
map!(ascii_string(">="), |_| BinOp::GreaterThanOrEq),
|
||||
map!(ascii_char('>'), |_| BinOp::GreaterThan),
|
||||
map!(ascii_char('^'), |_| BinOp::Caret),
|
||||
map!(ascii_string("%%"), |_| BinOp::DoublePercent),
|
||||
map!(ascii_char('%'), |_| BinOp::Percent)
|
||||
)
|
||||
}
|
||||
|
||||
pub fn list_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
let elems = collection!(
|
||||
char('['),
|
||||
ascii_char('['),
|
||||
loc!(expr(min_indent)),
|
||||
char(','),
|
||||
char(']'),
|
||||
ascii_char(','),
|
||||
ascii_char(']'),
|
||||
min_indent
|
||||
);
|
||||
|
||||
@ -1673,9 +1675,11 @@ pub fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
};
|
||||
|
||||
// there can be field access, e.g. `{ x : 4 }.x`
|
||||
let (accesses, state) =
|
||||
optional(one_or_more!(skip_first!(char('.'), lowercase_ident())))
|
||||
.parse(arena, state)?;
|
||||
let (accesses, state) = optional(one_or_more!(skip_first!(
|
||||
ascii_char('.'),
|
||||
lowercase_ident()
|
||||
)))
|
||||
.parse(arena, state)?;
|
||||
|
||||
if let Some(fields) = accesses {
|
||||
for field in fields {
|
||||
@ -1768,7 +1772,7 @@ pub fn record_literal<'a>(min_indent: u16) -> impl Parser<'a, Expr<'a>> {
|
||||
/// This is mainly for matching tags in closure params, e.g. \@Foo -> ...
|
||||
pub fn private_tag<'a>() -> impl Parser<'a, &'a str> {
|
||||
map_with_arena!(
|
||||
skip_first!(char('@'), global_tag()),
|
||||
skip_first!(ascii_char('@'), global_tag()),
|
||||
|arena: &'a Bump, name: &'a str| {
|
||||
let mut buf = String::with_capacity_in(1 + name.len(), arena);
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
use crate::ast::Attempting;
|
||||
use crate::keyword;
|
||||
use crate::parser::{unexpected, unexpected_eof, Fail, FailReason, ParseResult, Parser, State};
|
||||
use crate::parser::{peek_utf8_char, unexpected, Fail, FailReason, ParseResult, Parser, State};
|
||||
use bumpalo::collections::string::String;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
@ -67,129 +67,126 @@ impl<'a> Ident<'a> {
|
||||
/// Sometimes we may want to check for those later in the process, and give
|
||||
/// more contextually-aware error messages than "unexpected `if`" or the like.
|
||||
#[inline(always)]
|
||||
pub fn parse_ident<'a, I>(
|
||||
pub fn parse_ident<'a>(
|
||||
arena: &'a Bump,
|
||||
chars: &mut I,
|
||||
state: State<'a>,
|
||||
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
|
||||
where
|
||||
I: Iterator<Item = char>,
|
||||
{
|
||||
mut state: State<'a>,
|
||||
) -> ParseResult<'a, (Ident<'a>, Option<char>)> {
|
||||
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
|
||||
let mut capitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
||||
let mut noncapitalized_parts: Vec<&'a str> = Vec::new_in(arena);
|
||||
let mut is_capitalized;
|
||||
let is_accessor_fn;
|
||||
let mut is_private_tag = false;
|
||||
let mut chars_parsed;
|
||||
|
||||
// Identifiers and accessor functions must start with either a letter or a dot.
|
||||
// If this starts with neither, it must be something else!
|
||||
match chars.next() {
|
||||
Some(ch) => {
|
||||
if ch == '@' {
|
||||
// '@' must always be followed by a capital letter!
|
||||
match chars.next() {
|
||||
Some(ch) if ch.is_uppercase() => {
|
||||
part_buf.push('@');
|
||||
part_buf.push(ch);
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((first_ch, bytes_parsed)) => {
|
||||
if first_ch.is_alphabetic() {
|
||||
part_buf.push(first_ch);
|
||||
|
||||
is_private_tag = true;
|
||||
is_capitalized = true;
|
||||
is_accessor_fn = false;
|
||||
|
||||
chars_parsed = 2;
|
||||
}
|
||||
Some(ch) => {
|
||||
return Err(unexpected(ch, 0, state, Attempting::Identifier));
|
||||
}
|
||||
None => {
|
||||
return Err(unexpected_eof(0, Attempting::Identifier, state));
|
||||
}
|
||||
}
|
||||
} else if ch.is_alphabetic() {
|
||||
part_buf.push(ch);
|
||||
|
||||
is_capitalized = ch.is_uppercase();
|
||||
is_capitalized = first_ch.is_uppercase();
|
||||
is_accessor_fn = false;
|
||||
|
||||
chars_parsed = 1;
|
||||
} else if ch == '.' {
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
} else if first_ch == '.' {
|
||||
is_capitalized = false;
|
||||
is_accessor_fn = true;
|
||||
|
||||
chars_parsed = 1;
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
} else if first_ch == '@' {
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
|
||||
// '@' must always be followed by a capital letter!
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((next_ch, next_bytes_parsed)) => {
|
||||
if next_ch.is_uppercase() {
|
||||
state = state.advance_without_indenting(next_bytes_parsed)?;
|
||||
|
||||
part_buf.push('@');
|
||||
part_buf.push(next_ch);
|
||||
|
||||
is_private_tag = true;
|
||||
is_capitalized = true;
|
||||
is_accessor_fn = false;
|
||||
} else {
|
||||
return Err(unexpected(
|
||||
bytes_parsed + next_bytes_parsed,
|
||||
state,
|
||||
Attempting::Identifier,
|
||||
));
|
||||
}
|
||||
}
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
} else {
|
||||
return Err(unexpected(ch, 0, state, Attempting::Identifier));
|
||||
return Err(unexpected(0, state, Attempting::Identifier));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
return Err(unexpected_eof(0, Attempting::Identifier, state));
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
|
||||
while !state.bytes.is_empty() {
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((ch, bytes_parsed)) => {
|
||||
// After the first character, only these are allowed:
|
||||
//
|
||||
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
|
||||
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||
// * A dot ('.')
|
||||
if ch.is_alphabetic() {
|
||||
if part_buf.is_empty() {
|
||||
// Capitalization is determined by the first character in the part.
|
||||
is_capitalized = ch.is_uppercase();
|
||||
}
|
||||
|
||||
part_buf.push(ch);
|
||||
} else if ch.is_ascii_digit() {
|
||||
// Parts may not start with numbers!
|
||||
if part_buf.is_empty() {
|
||||
return malformed(
|
||||
Some(ch),
|
||||
arena,
|
||||
state,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
}
|
||||
|
||||
part_buf.push(ch);
|
||||
} else if ch == '.' {
|
||||
// There are two posssible errors here:
|
||||
//
|
||||
// 1. Having two consecutive dots is an error.
|
||||
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
|
||||
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
|
||||
return malformed(
|
||||
Some(ch),
|
||||
arena,
|
||||
state,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
}
|
||||
|
||||
if is_capitalized {
|
||||
capitalized_parts.push(part_buf.into_bump_str());
|
||||
} else {
|
||||
noncapitalized_parts.push(part_buf.into_bump_str());
|
||||
}
|
||||
|
||||
// Now that we've recorded the contents of the current buffer, reset it.
|
||||
part_buf = String::new_in(arena);
|
||||
} else {
|
||||
// This must be the end of the identifier. We're done!
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
}
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
};
|
||||
|
||||
let mut next_char = None;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
// After the first character, only these are allowed:
|
||||
//
|
||||
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
|
||||
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||
// * A dot ('.')
|
||||
if ch.is_alphabetic() {
|
||||
if part_buf.is_empty() {
|
||||
// Capitalization is determined by the first character in the part.
|
||||
is_capitalized = ch.is_uppercase();
|
||||
}
|
||||
|
||||
part_buf.push(ch);
|
||||
} else if ch.is_ascii_digit() {
|
||||
// Parts may not start with numbers!
|
||||
if part_buf.is_empty() {
|
||||
return malformed(
|
||||
Some(ch),
|
||||
arena,
|
||||
state,
|
||||
chars,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
}
|
||||
|
||||
part_buf.push(ch);
|
||||
} else if ch == '.' {
|
||||
// There are two posssible errors here:
|
||||
//
|
||||
// 1. Having two consecutive dots is an error.
|
||||
// 2. Having capitalized parts after noncapitalized (e.g. `foo.Bar`) is an error.
|
||||
if part_buf.is_empty() || (is_capitalized && !noncapitalized_parts.is_empty()) {
|
||||
return malformed(
|
||||
Some(ch),
|
||||
arena,
|
||||
state,
|
||||
chars,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
}
|
||||
|
||||
if is_capitalized {
|
||||
capitalized_parts.push(part_buf.into_bump_str());
|
||||
} else {
|
||||
noncapitalized_parts.push(part_buf.into_bump_str());
|
||||
}
|
||||
|
||||
// Now that we've recorded the contents of the current buffer, reset it.
|
||||
part_buf = String::new_in(arena);
|
||||
} else {
|
||||
// This must be the end of the identifier. We're done!
|
||||
|
||||
next_char = Some(ch);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
chars_parsed += 1;
|
||||
}
|
||||
|
||||
if part_buf.is_empty() {
|
||||
@ -200,10 +197,9 @@ where
|
||||
// If we made it this far and don't have a next_char, then necessarily
|
||||
// we have consumed a '.' char previously.
|
||||
return malformed(
|
||||
next_char.or_else(|| Some('.')),
|
||||
Some('.'),
|
||||
arena,
|
||||
state,
|
||||
chars,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
@ -224,14 +220,7 @@ where
|
||||
|
||||
Ident::AccessorFunction(value)
|
||||
} else {
|
||||
return malformed(
|
||||
None,
|
||||
arena,
|
||||
state,
|
||||
chars,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
|
||||
}
|
||||
} else if noncapitalized_parts.is_empty() {
|
||||
// We have capitalized parts only, so this must be a tag.
|
||||
@ -245,33 +234,19 @@ where
|
||||
}
|
||||
} else {
|
||||
// This is a qualified tag, which is not allowed!
|
||||
return malformed(
|
||||
None,
|
||||
arena,
|
||||
state,
|
||||
chars,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// We had neither capitalized nor noncapitalized parts,
|
||||
// yet we made it this far. The only explanation is that this was
|
||||
// a stray '.' drifting through the cosmos.
|
||||
return Err(unexpected('.', 1, state, Attempting::Identifier));
|
||||
return Err(unexpected(1, state, Attempting::Identifier));
|
||||
}
|
||||
}
|
||||
} else if is_private_tag {
|
||||
// This is qualified field access with an '@' in front, which does not make sense!
|
||||
return malformed(
|
||||
None,
|
||||
arena,
|
||||
state,
|
||||
chars,
|
||||
capitalized_parts,
|
||||
noncapitalized_parts,
|
||||
);
|
||||
return malformed(None, arena, state, capitalized_parts, noncapitalized_parts);
|
||||
} else {
|
||||
// We have multiple noncapitalized parts, so this must be field access.
|
||||
Ident::Access {
|
||||
@ -280,22 +255,16 @@ where
|
||||
}
|
||||
};
|
||||
|
||||
let state = state.advance_without_indenting(chars_parsed)?;
|
||||
|
||||
Ok(((answer, next_char), state))
|
||||
Ok(((answer, None), state))
|
||||
}
|
||||
|
||||
fn malformed<'a, I>(
|
||||
fn malformed<'a>(
|
||||
opt_bad_char: Option<char>,
|
||||
arena: &'a Bump,
|
||||
state: State<'a>,
|
||||
chars: &mut I,
|
||||
mut state: State<'a>,
|
||||
capitalized_parts: Vec<&'a str>,
|
||||
noncapitalized_parts: Vec<&'a str>,
|
||||
) -> ParseResult<'a, (Ident<'a>, Option<char>)>
|
||||
where
|
||||
I: Iterator<Item = char>,
|
||||
{
|
||||
) -> ParseResult<'a, (Ident<'a>, Option<char>)> {
|
||||
// Reconstruct the original string that we've been parsing.
|
||||
let mut full_string = String::new_in(arena);
|
||||
|
||||
@ -311,30 +280,35 @@ where
|
||||
// Consume the remaining chars in the identifier.
|
||||
let mut next_char = None;
|
||||
|
||||
for ch in chars {
|
||||
// We can't use ch.is_alphanumeric() here because that passes for
|
||||
// things that are "numeric" but not ASCII digits, like `¾`
|
||||
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
full_string.push(ch);
|
||||
} else {
|
||||
next_char = Some(ch);
|
||||
while !state.bytes.is_empty() {
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((ch, bytes_parsed)) => {
|
||||
// We can't use ch.is_alphanumeric() here because that passes for
|
||||
// things that are "numeric" but not ASCII digits, like `¾`
|
||||
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
full_string.push(ch);
|
||||
} else {
|
||||
next_char = Some(ch);
|
||||
|
||||
break;
|
||||
break;
|
||||
}
|
||||
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
}
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
}
|
||||
|
||||
let chars_parsed = full_string.len();
|
||||
|
||||
Ok((
|
||||
(Ident::Malformed(full_string.into_bump_str()), next_char),
|
||||
state.advance_without_indenting(chars_parsed)?,
|
||||
state,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn ident<'a>() -> impl Parser<'a, Ident<'a>> {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
// Discard next_char; we don't need it.
|
||||
let ((string, _), state) = parse_ident(arena, &mut state.input.chars(), state)?;
|
||||
let ((string, _), state) = parse_ident(arena, state)?;
|
||||
|
||||
Ok((string, state))
|
||||
}
|
||||
@ -344,52 +318,47 @@ pub fn global_tag_or_ident<'a, F>(pred: F) -> impl Parser<'a, &'a str>
|
||||
where
|
||||
F: Fn(char) -> bool,
|
||||
{
|
||||
move |arena, state: State<'a>| {
|
||||
let mut chars = state.input.chars();
|
||||
|
||||
move |arena, mut state: State<'a>| {
|
||||
// pred will determine if this is a tag or ident (based on capitalization)
|
||||
let first_letter = match chars.next() {
|
||||
Some(first_char) => {
|
||||
if pred(first_char) {
|
||||
first_char
|
||||
} else {
|
||||
return Err(unexpected(
|
||||
first_char,
|
||||
0,
|
||||
state,
|
||||
Attempting::RecordFieldLabel,
|
||||
));
|
||||
let (first_letter, bytes_parsed) = match peek_utf8_char(&state) {
|
||||
Ok((first_letter, bytes_parsed)) => {
|
||||
if !pred(first_letter) {
|
||||
return Err(unexpected(0, state, Attempting::RecordFieldLabel));
|
||||
}
|
||||
|
||||
(first_letter, bytes_parsed)
|
||||
}
|
||||
None => {
|
||||
return Err(unexpected_eof(0, Attempting::RecordFieldLabel, state));
|
||||
}
|
||||
Err(reason) => return state.fail(reason),
|
||||
};
|
||||
|
||||
let mut buf = String::with_capacity_in(1, arena);
|
||||
|
||||
buf.push(first_letter);
|
||||
|
||||
for ch in chars {
|
||||
// After the first character, only these are allowed:
|
||||
//
|
||||
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
|
||||
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||
// * A ':' indicating the end of the field
|
||||
if ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
buf.push(ch);
|
||||
} else {
|
||||
// This is the end of the field. We're done!
|
||||
break;
|
||||
}
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
|
||||
while !state.bytes.is_empty() {
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((ch, bytes_parsed)) => {
|
||||
// After the first character, only these are allowed:
|
||||
//
|
||||
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
|
||||
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||
// * A ':' indicating the end of the field
|
||||
if ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
buf.push(ch);
|
||||
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
} else {
|
||||
// This is the end of the field. We're done!
|
||||
break;
|
||||
}
|
||||
}
|
||||
Err(reason) => return state.fail(reason),
|
||||
};
|
||||
}
|
||||
|
||||
let chars_parsed = buf.len();
|
||||
|
||||
Ok((
|
||||
buf.into_bump_str(),
|
||||
state.advance_without_indenting(chars_parsed)?,
|
||||
))
|
||||
Ok((buf.into_bump_str(), state))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6,7 +6,10 @@ use crate::blankspace::{space0_around, space1};
|
||||
use crate::expr::def;
|
||||
use crate::header::ModuleName;
|
||||
use crate::ident::unqualified_ident;
|
||||
use crate::parser::{self, char, loc, optional, string, unexpected, unexpected_eof, Parser, State};
|
||||
use crate::parser::{
|
||||
self, ascii_char, ascii_string, loc, optional, peek_utf8_char, peek_utf8_char_at, unexpected,
|
||||
Parser, State,
|
||||
};
|
||||
use bumpalo::collections::{String, Vec};
|
||||
use roc_region::all::Located;
|
||||
|
||||
@ -30,7 +33,10 @@ pub fn app_module<'a>() -> impl Parser<'a, Module<'a>> {
|
||||
pub fn interface_header<'a>() -> impl Parser<'a, InterfaceHeader<'a>> {
|
||||
parser::map(
|
||||
and!(
|
||||
skip_first!(string("interface"), and!(space1(1), loc!(module_name()))),
|
||||
skip_first!(
|
||||
ascii_string("interface"),
|
||||
and!(space1(1), loc!(module_name()))
|
||||
),
|
||||
and!(exposes(), imports())
|
||||
),
|
||||
|(
|
||||
@ -56,72 +62,68 @@ pub fn interface_header<'a>() -> impl Parser<'a, InterfaceHeader<'a>> {
|
||||
|
||||
#[inline(always)]
|
||||
pub fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>> {
|
||||
move |arena, state: State<'a>| {
|
||||
let mut chars = state.input.chars();
|
||||
move |arena, mut state: State<'a>| {
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((first_letter, bytes_parsed)) => {
|
||||
if !first_letter.is_uppercase() {
|
||||
return Err(unexpected(0, state, Attempting::Module));
|
||||
};
|
||||
|
||||
let first_letter = match chars.next() {
|
||||
Some(first_char) => {
|
||||
// Module names must all be uppercase
|
||||
if first_char.is_uppercase() {
|
||||
first_char
|
||||
} else {
|
||||
return Err(unexpected(
|
||||
first_char,
|
||||
0,
|
||||
state,
|
||||
Attempting::RecordFieldLabel,
|
||||
));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
return Err(unexpected_eof(0, Attempting::Identifier, state));
|
||||
}
|
||||
};
|
||||
let mut buf = String::with_capacity_in(4, arena);
|
||||
|
||||
let mut buf = String::with_capacity_in(1, arena);
|
||||
buf.push(first_letter);
|
||||
|
||||
buf.push(first_letter);
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
// After the first character, only these are allowed:
|
||||
//
|
||||
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
|
||||
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||
// * A '.' separating module parts
|
||||
if ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
buf.push(ch);
|
||||
} else if ch == '.' {
|
||||
match chars.next() {
|
||||
Some(next) => {
|
||||
if next.is_uppercase() {
|
||||
// If we hit another uppercase letter, keep going!
|
||||
buf.push('.');
|
||||
buf.push(next);
|
||||
} else {
|
||||
// We have finished parsing the module name.
|
||||
while !state.bytes.is_empty() {
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((ch, bytes_parsed)) => {
|
||||
// After the first character, only these are allowed:
|
||||
//
|
||||
// There may be an identifier after this '.',
|
||||
// e.g. "baz" in `Foo.Bar.baz`
|
||||
break;
|
||||
// * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers
|
||||
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||
// * A '.' separating module parts
|
||||
if ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
|
||||
buf.push(ch);
|
||||
} else if ch == '.' {
|
||||
match peek_utf8_char_at(&state, 1) {
|
||||
Ok((next, next_bytes_parsed)) => {
|
||||
if next.is_uppercase() {
|
||||
// If we hit another uppercase letter, keep going!
|
||||
buf.push('.');
|
||||
buf.push(next);
|
||||
|
||||
state = state.advance_without_indenting(
|
||||
bytes_parsed + next_bytes_parsed,
|
||||
)?;
|
||||
} else {
|
||||
// We have finished parsing the module name.
|
||||
//
|
||||
// There may be an identifier after this '.',
|
||||
// e.g. "baz" in `Foo.Bar.baz`
|
||||
return Ok((
|
||||
ModuleName::new(buf.into_bump_str()),
|
||||
state,
|
||||
));
|
||||
}
|
||||
}
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
} else {
|
||||
// This is the end of the module name. We're done!
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
None => {
|
||||
// A module name can't end with a '.'
|
||||
return Err(unexpected_eof(0, Attempting::Identifier, state));
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// This is the end of the module name. We're done!
|
||||
break;
|
||||
|
||||
Ok((ModuleName::new(buf.into_bump_str()), state))
|
||||
}
|
||||
Err(reason) => state.fail(reason),
|
||||
}
|
||||
|
||||
let chars_parsed = buf.len();
|
||||
|
||||
Ok((
|
||||
ModuleName::new(buf.into_bump_str()),
|
||||
state.advance_without_indenting(chars_parsed)?,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
@ -129,7 +131,7 @@ pub fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>> {
|
||||
fn app_header<'a>() -> impl Parser<'a, AppHeader<'a>> {
|
||||
parser::map(
|
||||
and!(
|
||||
skip_first!(string("app"), and!(space1(1), loc!(module_name()))),
|
||||
skip_first!(ascii_string("app"), and!(space1(1), loc!(module_name()))),
|
||||
and!(provides(), imports())
|
||||
),
|
||||
|(
|
||||
@ -167,8 +169,14 @@ fn provides<'a>() -> impl Parser<
|
||||
),
|
||||
> {
|
||||
and!(
|
||||
and!(skip_second!(space1(1), string("provides")), space1(1)),
|
||||
collection!(char('['), loc!(exposes_entry()), char(','), char(']'), 1)
|
||||
and!(skip_second!(space1(1), ascii_string("provides")), space1(1)),
|
||||
collection!(
|
||||
ascii_char('['),
|
||||
loc!(exposes_entry()),
|
||||
ascii_char(','),
|
||||
ascii_char(']'),
|
||||
1
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@ -181,8 +189,14 @@ fn exposes<'a>() -> impl Parser<
|
||||
),
|
||||
> {
|
||||
and!(
|
||||
and!(skip_second!(space1(1), string("exposes")), space1(1)),
|
||||
collection!(char('['), loc!(exposes_entry()), char(','), char(']'), 1)
|
||||
and!(skip_second!(space1(1), ascii_string("exposes")), space1(1)),
|
||||
collection!(
|
||||
ascii_char('['),
|
||||
loc!(exposes_entry()),
|
||||
ascii_char(','),
|
||||
ascii_char(']'),
|
||||
1
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@ -195,8 +209,14 @@ fn imports<'a>() -> impl Parser<
|
||||
),
|
||||
> {
|
||||
and!(
|
||||
and!(skip_second!(space1(1), string("imports")), space1(1)),
|
||||
collection!(char('['), loc!(imports_entry()), char(','), char(']'), 1)
|
||||
and!(skip_second!(space1(1), ascii_string("imports")), space1(1)),
|
||||
collection!(
|
||||
ascii_char('['),
|
||||
loc!(imports_entry()),
|
||||
ascii_char(','),
|
||||
ascii_char(']'),
|
||||
1
|
||||
)
|
||||
)
|
||||
}
|
||||
|
||||
@ -213,8 +233,14 @@ fn imports_entry<'a>() -> impl Parser<'a, ImportsEntry<'a>> {
|
||||
module_name(),
|
||||
// e.g. `.{ Task, after}`
|
||||
optional(skip_first!(
|
||||
char('.'),
|
||||
collection!(char('{'), loc!(exposes_entry()), char(','), char('}'), 1)
|
||||
ascii_char('.'),
|
||||
collection!(
|
||||
ascii_char('{'),
|
||||
loc!(exposes_entry()),
|
||||
ascii_char(','),
|
||||
ascii_char('}'),
|
||||
1
|
||||
)
|
||||
))
|
||||
),
|
||||
|arena,
|
||||
|
@ -1,23 +1,19 @@
|
||||
use crate::ast::{Attempting, Base, Expr};
|
||||
use crate::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
|
||||
use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
|
||||
use std::char;
|
||||
use std::str::from_utf8_unchecked;
|
||||
|
||||
pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||
move |_arena, state: State<'a>| {
|
||||
let mut chars = state.input.chars();
|
||||
let bytes = &mut state.bytes.iter();
|
||||
|
||||
match chars.next() {
|
||||
Some(first_ch) => {
|
||||
match bytes.next() {
|
||||
Some(&first_byte) => {
|
||||
// Number literals must start with either an '-' or a digit.
|
||||
if first_ch == '-' || first_ch.is_ascii_digit() {
|
||||
parse_number_literal(first_ch, &mut chars, state)
|
||||
if first_byte == b'-' || (first_byte as char).is_ascii_digit() {
|
||||
parse_number_literal(first_byte as char, bytes, state)
|
||||
} else {
|
||||
Err(unexpected(
|
||||
first_ch,
|
||||
first_ch.len_utf8(),
|
||||
state,
|
||||
Attempting::NumberLiteral,
|
||||
))
|
||||
Err(unexpected(1, state, Attempting::NumberLiteral))
|
||||
}
|
||||
}
|
||||
None => Err(unexpected_eof(0, state.attempting, state)),
|
||||
@ -28,11 +24,11 @@ pub fn number_literal<'a>() -> impl Parser<'a, Expr<'a>> {
|
||||
#[inline(always)]
|
||||
fn parse_number_literal<'a, I>(
|
||||
first_ch: char,
|
||||
chars: &mut I,
|
||||
bytes: &mut I,
|
||||
state: State<'a>,
|
||||
) -> ParseResult<'a, Expr<'a>>
|
||||
where
|
||||
I: Iterator<Item = char>,
|
||||
I: Iterator<Item = &'a u8>,
|
||||
{
|
||||
use self::LiteralType::*;
|
||||
|
||||
@ -40,13 +36,12 @@ where
|
||||
|
||||
// We already parsed 1 character (which may have been a minus sign).
|
||||
let mut bytes_parsed = 1;
|
||||
let mut prev_ch = first_ch;
|
||||
let mut prev_byte = first_ch as u8;
|
||||
let mut has_parsed_digits = first_ch.is_ascii_digit();
|
||||
|
||||
for next_ch in chars {
|
||||
for &next_byte in bytes {
|
||||
let err_unexpected = || {
|
||||
Err(unexpected(
|
||||
next_ch,
|
||||
bytes_parsed,
|
||||
state.clone(),
|
||||
Attempting::NumberLiteral,
|
||||
@ -55,91 +50,91 @@ where
|
||||
|
||||
let is_potentially_non_base10 = || {
|
||||
(bytes_parsed == 1 && first_ch == '0')
|
||||
|| (bytes_parsed == 2 && first_ch == '-' && prev_ch == '0')
|
||||
|| (bytes_parsed == 2 && first_ch == '-' && prev_byte == b'0')
|
||||
};
|
||||
|
||||
if next_ch == '.' {
|
||||
if typ == Float {
|
||||
// You only get one decimal point!
|
||||
return err_unexpected();
|
||||
} else {
|
||||
typ = Float;
|
||||
match next_byte as char {
|
||||
'.' => {
|
||||
if typ == Float {
|
||||
// You only get one decimal point!
|
||||
return err_unexpected();
|
||||
} else {
|
||||
typ = Float;
|
||||
}
|
||||
}
|
||||
} else if next_ch == 'x' {
|
||||
if is_potentially_non_base10() {
|
||||
typ = Hex;
|
||||
} else {
|
||||
return err_unexpected();
|
||||
'x' => {
|
||||
if is_potentially_non_base10() {
|
||||
typ = Hex;
|
||||
} else {
|
||||
return err_unexpected();
|
||||
}
|
||||
}
|
||||
} else if next_ch == 'b' && typ == Num {
|
||||
// We have to check for typ == Num because otherwise we get a false
|
||||
// positive here when parsing a hex literal that happens to have
|
||||
// a 'b' in it, e.g. 0xbbbb
|
||||
if is_potentially_non_base10() {
|
||||
typ = Binary;
|
||||
} else {
|
||||
return err_unexpected();
|
||||
'b' if typ == Num => {
|
||||
// We have to check for typ == Num because otherwise we get a false
|
||||
// positive here when parsing a hex literal that happens to have
|
||||
// a 'b' in it, e.g. 0xbbbb
|
||||
if is_potentially_non_base10() {
|
||||
typ = Binary;
|
||||
} else {
|
||||
return err_unexpected();
|
||||
}
|
||||
}
|
||||
} else if next_ch == 'o' {
|
||||
if is_potentially_non_base10() {
|
||||
typ = Octal;
|
||||
} else {
|
||||
return err_unexpected();
|
||||
'o' => {
|
||||
if is_potentially_non_base10() {
|
||||
typ = Octal;
|
||||
} else {
|
||||
return err_unexpected();
|
||||
}
|
||||
}
|
||||
} else if next_ch.is_ascii_digit() {
|
||||
has_parsed_digits = true;
|
||||
} else if next_ch != '_' &&
|
||||
next_ch if next_ch.is_ascii_digit() => {
|
||||
has_parsed_digits = true;
|
||||
}
|
||||
next_ch
|
||||
if next_ch != '_' &&
|
||||
// ASCII alphabetic chars (like 'a' and 'f') are allowed in Hex int literals.
|
||||
// We parse them in any int literal, so we can give a more helpful error
|
||||
// in canonicalization (e.g. "the character 'f' is not allowed in Octal literals"
|
||||
// or "the character 'g' is outside the range of valid Hex literals")
|
||||
!next_ch.is_ascii_alphabetic()
|
||||
{
|
||||
if has_parsed_digits {
|
||||
// We hit an invalid number literal character; we're done!
|
||||
break;
|
||||
} else {
|
||||
// No digits! We likely parsed a minus sign that's actually an operator.
|
||||
return err_unexpected();
|
||||
!next_ch.is_ascii_alphabetic() =>
|
||||
{
|
||||
if has_parsed_digits {
|
||||
// We hit an invalid number literal character; we're done!
|
||||
break;
|
||||
} else {
|
||||
// No digits! We likely parsed a minus sign that's actually an operator.
|
||||
return err_unexpected();
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Since we only consume characters in the ASCII range for number literals,
|
||||
// this will always be exactly 1. There's no need to call next_ch.utf8_len().
|
||||
bytes_parsed += 1;
|
||||
prev_ch = next_ch;
|
||||
prev_byte = next_byte;
|
||||
}
|
||||
|
||||
let from_base = |base| {
|
||||
let is_negative = first_ch == '-';
|
||||
let string = if is_negative {
|
||||
&state.input[3..bytes_parsed]
|
||||
} else {
|
||||
&state.input[2..bytes_parsed]
|
||||
};
|
||||
|
||||
Expr::NonBase10Int {
|
||||
is_negative,
|
||||
string,
|
||||
base,
|
||||
}
|
||||
};
|
||||
|
||||
// At this point we have a number, and will definitely succeed.
|
||||
// If the number is malformed (outside the supported range),
|
||||
// we'll succeed with an appropriate Expr which records that.
|
||||
let expr = match typ {
|
||||
Num => Expr::Num(&state.input[0..bytes_parsed]),
|
||||
Float => Expr::Float(&state.input[0..bytes_parsed]),
|
||||
match typ {
|
||||
Num => Ok((
|
||||
// SAFETY: it's safe to use from_utf8_unchecked here, because we've
|
||||
// already validated that this range contains only ASCII digits
|
||||
Expr::Num(unsafe { from_utf8_unchecked(&state.bytes[0..bytes_parsed]) }),
|
||||
state.advance_without_indenting(bytes_parsed)?,
|
||||
)),
|
||||
Float => Ok((
|
||||
// SAFETY: it's safe to use from_utf8_unchecked here, because we've
|
||||
// already validated that this range contains only ASCII digits
|
||||
Expr::Float(unsafe { from_utf8_unchecked(&state.bytes[0..bytes_parsed]) }),
|
||||
state.advance_without_indenting(bytes_parsed)?,
|
||||
)),
|
||||
// For these we trim off the 0x/0o/0b part
|
||||
Hex => from_base(Base::Hex),
|
||||
Octal => from_base(Base::Octal),
|
||||
Binary => from_base(Base::Binary),
|
||||
};
|
||||
|
||||
let next_state = state.advance_without_indenting(bytes_parsed)?;
|
||||
|
||||
Ok((expr, next_state))
|
||||
Hex => from_base(Base::Hex, first_ch, bytes_parsed, state),
|
||||
Octal => from_base(Base::Octal, first_ch, bytes_parsed, state),
|
||||
Binary => from_base(Base::Binary, first_ch, bytes_parsed, state),
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
@ -150,3 +145,29 @@ enum LiteralType {
|
||||
Octal,
|
||||
Binary,
|
||||
}
|
||||
|
||||
fn from_base(
|
||||
base: Base,
|
||||
first_ch: char,
|
||||
bytes_parsed: usize,
|
||||
state: State<'_>,
|
||||
) -> ParseResult<'_, Expr<'_>> {
|
||||
let is_negative = first_ch == '-';
|
||||
let bytes = if is_negative {
|
||||
&state.bytes[3..bytes_parsed]
|
||||
} else {
|
||||
&state.bytes[2..bytes_parsed]
|
||||
};
|
||||
|
||||
match parse_utf8(bytes) {
|
||||
Ok(string) => Ok((
|
||||
Expr::NonBase10Int {
|
||||
is_negative,
|
||||
string,
|
||||
base,
|
||||
},
|
||||
state.advance_without_indenting(bytes_parsed)?,
|
||||
)),
|
||||
Err(reason) => state.fail(reason),
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +1,17 @@
|
||||
use crate::ast::Attempting;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use encode_unicode::CharExt;
|
||||
use roc_region::all::{Located, Region};
|
||||
use std::fmt;
|
||||
use std::str::from_utf8;
|
||||
use std::{char, u16};
|
||||
|
||||
/// A position in a source file.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
pub struct State<'a> {
|
||||
/// The raw input string.
|
||||
pub input: &'a str,
|
||||
/// The raw input bytes from the file.
|
||||
pub bytes: &'a [u8],
|
||||
|
||||
/// Current line of the input
|
||||
pub line: u32,
|
||||
@ -39,15 +42,15 @@ pub enum Either<First, Second> {
|
||||
}
|
||||
|
||||
impl<'a> State<'a> {
|
||||
pub fn new(input: &'a str, attempting: Attempting) -> State<'a> {
|
||||
pub fn new(bytes: &'a [u8], attempting: Attempting) -> State<'a> {
|
||||
State {
|
||||
input,
|
||||
bytes,
|
||||
line: 0,
|
||||
column: 0,
|
||||
indent_col: 0,
|
||||
is_indenting: true,
|
||||
attempting,
|
||||
original_len: input.len(),
|
||||
original_len: bytes.len(),
|
||||
}
|
||||
}
|
||||
|
||||
@ -69,7 +72,7 @@ impl<'a> State<'a> {
|
||||
///
|
||||
/// So if the parser has consumed 8 bytes, this function will return 8.
|
||||
pub fn bytes_consumed(&self) -> usize {
|
||||
self.original_len - self.input.len()
|
||||
self.original_len - self.bytes.len()
|
||||
}
|
||||
|
||||
/// Increments the line, then resets column, indent_col, and is_indenting.
|
||||
@ -77,7 +80,7 @@ impl<'a> State<'a> {
|
||||
pub fn newline(&self) -> Result<Self, (Fail, Self)> {
|
||||
match self.line.checked_add(1) {
|
||||
Some(line) => Ok(State {
|
||||
input: &self.input[1..],
|
||||
bytes: &self.bytes[1..],
|
||||
line,
|
||||
column: 0,
|
||||
indent_col: 0,
|
||||
@ -99,11 +102,11 @@ impl<'a> State<'a> {
|
||||
/// This assumes we are *not* advancing with spaces, or at least that
|
||||
/// any spaces on the line were preceded by non-spaces - which would mean
|
||||
/// they weren't eligible to indent anyway.
|
||||
pub fn advance_without_indenting(&self, quantity: usize) -> Result<Self, (Fail, Self)> {
|
||||
pub fn advance_without_indenting(self, quantity: usize) -> Result<Self, (Fail, Self)> {
|
||||
match (self.column as usize).checked_add(quantity) {
|
||||
Some(column_usize) if column_usize <= u16::MAX as usize => {
|
||||
Ok(State {
|
||||
input: &self.input[quantity..],
|
||||
bytes: &self.bytes[quantity..],
|
||||
line: self.line,
|
||||
column: column_usize as u16,
|
||||
indent_col: self.indent_col,
|
||||
@ -141,7 +144,7 @@ impl<'a> State<'a> {
|
||||
};
|
||||
|
||||
Ok(State {
|
||||
input: &self.input[spaces..],
|
||||
bytes: &self.bytes[spaces..],
|
||||
line: self.line,
|
||||
column: column_usize as u16,
|
||||
indent_col,
|
||||
@ -169,6 +172,35 @@ impl<'a> State<'a> {
|
||||
end_line: self.line,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a failing ParseResult for the given FailReason
|
||||
pub fn fail<T>(self, reason: FailReason) -> Result<(T, Self), (Fail, Self)> {
|
||||
Err((
|
||||
Fail {
|
||||
reason,
|
||||
attempting: self.attempting,
|
||||
},
|
||||
self,
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for State<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "State {{")?;
|
||||
|
||||
match from_utf8(self.bytes) {
|
||||
Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?,
|
||||
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?,
|
||||
}
|
||||
|
||||
write!(f, "\n\t(line, col): ({}, {}),", self.line, self.column)?;
|
||||
write!(f, "\n\tindent_col: {}", self.indent_col)?;
|
||||
write!(f, "\n\tis_indenting: {:?}", self.is_indenting)?;
|
||||
write!(f, "\n\tattempting: {:?}", self.attempting)?;
|
||||
write!(f, "\n\toriginal_len: {}", self.original_len)?;
|
||||
write!(f, "\n}}")
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -182,13 +214,14 @@ pub type ParseResult<'a, Output> = Result<(Output, State<'a>), (Fail, State<'a>)
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum FailReason {
|
||||
Unexpected(char, Region),
|
||||
Unexpected(Region),
|
||||
OutdentedTooFar,
|
||||
ConditionFailed,
|
||||
LineTooLong(u32 /* which line was too long */),
|
||||
TooManyLines,
|
||||
Eof(Region),
|
||||
InvalidPattern,
|
||||
BadUtf8,
|
||||
ReservedKeyword(Region),
|
||||
ArgumentsBeforeEquals(Region),
|
||||
}
|
||||
@ -332,13 +365,12 @@ pub fn unexpected_eof(
|
||||
}
|
||||
|
||||
pub fn unexpected(
|
||||
ch: char,
|
||||
chars_consumed: usize,
|
||||
state: State<'_>,
|
||||
attempting: Attempting,
|
||||
) -> (Fail, State<'_>) {
|
||||
checked_unexpected(chars_consumed, state, |region| Fail {
|
||||
reason: FailReason::Unexpected(ch, region),
|
||||
reason: FailReason::Unexpected(region),
|
||||
attempting,
|
||||
})
|
||||
}
|
||||
@ -385,9 +417,9 @@ fn line_too_long(attempting: Attempting, state: State<'_>) -> (Fail, State<'_>)
|
||||
// (for example) the LineTooLong initially occurs in the middle of
|
||||
// a one_of chain, which would otherwise prevent it from propagating.
|
||||
let column = u16::MAX;
|
||||
let input = state.input.get(0..state.input.len()).unwrap();
|
||||
let bytes = state.bytes.get(0..state.bytes.len()).unwrap();
|
||||
let state = State {
|
||||
input,
|
||||
bytes,
|
||||
line: state.line,
|
||||
indent_col: state.indent_col,
|
||||
is_indenting: state.is_indenting,
|
||||
@ -399,29 +431,90 @@ fn line_too_long(attempting: Attempting, state: State<'_>) -> (Fail, State<'_>)
|
||||
(fail, state)
|
||||
}
|
||||
|
||||
/// A single char.
|
||||
pub fn char<'a>(expected: char) -> impl Parser<'a, ()> {
|
||||
move |_arena, state: State<'a>| match state.input.chars().next() {
|
||||
Some(actual) if expected == actual => Ok(((), state.advance_without_indenting(1)?)),
|
||||
Some(other_ch) => Err(unexpected(other_ch, 0, state, Attempting::Keyword)),
|
||||
/// A single ASCII char.
|
||||
pub fn ascii_char<'a>(expected: char) -> impl Parser<'a, ()> {
|
||||
// Make sure this really is an ASCII char!
|
||||
debug_assert!(expected.len_utf8() == 1);
|
||||
|
||||
move |_arena, state: State<'a>| match state.bytes.first() {
|
||||
Some(&actual) if expected == actual as char => {
|
||||
Ok(((), state.advance_without_indenting(1)?))
|
||||
}
|
||||
Some(_) => Err(unexpected(0, state, Attempting::Keyword)),
|
||||
_ => Err(unexpected_eof(0, Attempting::Keyword, state)),
|
||||
}
|
||||
}
|
||||
|
||||
/// A hardcoded keyword string with no newlines in it.
|
||||
pub fn string<'a>(keyword: &'static str) -> impl Parser<'a, ()> {
|
||||
// We can't have newlines because we don't attempt to advance the row
|
||||
// in the state, only the column.
|
||||
debug_assert!(!keyword.contains('\n'));
|
||||
/// A single UTF-8-encoded char. This will both parse *and* validate that the
|
||||
/// char is valid UTF-8.
|
||||
pub fn utf8_char2<'a>() -> impl Parser<'a, char> {
|
||||
move |_arena, state: State<'a>| {
|
||||
if !state.bytes.is_empty() {
|
||||
match char::from_utf8_slice_start(state.bytes) {
|
||||
Ok((ch, bytes_parsed)) => Ok((ch, state.advance_without_indenting(bytes_parsed)?)),
|
||||
Err(_) => state.fail(FailReason::BadUtf8),
|
||||
}
|
||||
} else {
|
||||
Err(unexpected_eof(0, state.attempting, state))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single UTF-8-encoded char. This will both parse *and* validate that the
|
||||
/// char is valid UTF-8, but it will *not* advance the state.
|
||||
pub fn peek_utf8_char<'a>(state: &State<'a>) -> Result<(char, usize), FailReason> {
|
||||
if !state.bytes.is_empty() {
|
||||
match char::from_utf8_slice_start(state.bytes) {
|
||||
Ok((ch, len_utf8)) => Ok((ch, len_utf8)),
|
||||
Err(_) => Err(FailReason::BadUtf8),
|
||||
}
|
||||
} else {
|
||||
Err(FailReason::Eof(
|
||||
Region::zero(), /* TODO get a better region */
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// A single UTF-8-encoded char, with an offset. This will both parse *and*
|
||||
/// validate that the char is valid UTF-8, but it will *not* advance the state.
|
||||
pub fn peek_utf8_char_at<'a>(
|
||||
state: &State<'a>,
|
||||
offset: usize,
|
||||
) -> Result<(char, usize), FailReason> {
|
||||
if state.bytes.len() > offset {
|
||||
let bytes = &state.bytes[offset..];
|
||||
|
||||
match char::from_utf8_slice_start(bytes) {
|
||||
Ok((ch, len_utf8)) => Ok((ch, len_utf8)),
|
||||
Err(_) => Err(FailReason::BadUtf8),
|
||||
}
|
||||
} else {
|
||||
Err(FailReason::Eof(
|
||||
Region::zero(), /* TODO get a better region */
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
/// A hardcoded string with no newlines, consisting only of ASCII characters
|
||||
pub fn ascii_string<'a>(keyword: &'static str) -> impl Parser<'a, ()> {
|
||||
// Verify that this really is exclusively ASCII characters.
|
||||
// The `unsafe` block in this function relies upon this assumption!
|
||||
//
|
||||
// Also, this can't have newlines because we don't attempt to advance
|
||||
// the row in the state, only the column.
|
||||
debug_assert!(keyword.chars().all(|ch| ch.len_utf8() == 1 && ch != '\n'));
|
||||
|
||||
move |_arena, state: State<'a>| {
|
||||
let input = state.input;
|
||||
let len = keyword.len();
|
||||
|
||||
// TODO do this comparison in one SIMD instruction (on supported systems)
|
||||
match input.get(0..len) {
|
||||
Some(next_str) if next_str == keyword => {
|
||||
Ok(((), state.advance_without_indenting(len)?))
|
||||
match state.bytes.get(0..len) {
|
||||
Some(next_str) => {
|
||||
if next_str == keyword.as_bytes() {
|
||||
Ok(((), state.advance_without_indenting(len)?))
|
||||
} else {
|
||||
Err(unexpected(len, state, Attempting::Keyword))
|
||||
}
|
||||
}
|
||||
_ => Err(unexpected_eof(0, Attempting::Keyword, state)),
|
||||
}
|
||||
@ -686,7 +779,7 @@ macro_rules! collection {
|
||||
// We could change the AST to add extra storage specifically to
|
||||
// support empty literals containing newlines or comments, but this
|
||||
// does not seem worth even the tiniest regression in compiler performance.
|
||||
zero_or_more!($crate::parser::char(' ')),
|
||||
zero_or_more!($crate::parser::ascii_char(' ')),
|
||||
skip_second!(
|
||||
$crate::parser::sep_by0(
|
||||
$delimiter,
|
||||
@ -912,6 +1005,7 @@ macro_rules! record_field {
|
||||
use $crate::ast::AssignedField::*;
|
||||
use $crate::blankspace::{space0, space0_before};
|
||||
use $crate::ident::lowercase_ident;
|
||||
use $crate::parser::ascii_char;
|
||||
use $crate::parser::Either::*;
|
||||
|
||||
// You must have a field name, e.g. "email"
|
||||
@ -922,8 +1016,8 @@ macro_rules! record_field {
|
||||
// Having a value is optional; both `{ email }` and `{ email: blah }` work.
|
||||
// (This is true in both literals and types.)
|
||||
let (opt_loc_val, state) = $crate::parser::optional(either!(
|
||||
skip_first!(char(':'), space0_before($val_parser, $min_indent)),
|
||||
skip_first!(char('?'), space0_before($val_parser, $min_indent))
|
||||
skip_first!(ascii_char(':'), space0_before($val_parser, $min_indent)),
|
||||
skip_first!(ascii_char('?'), space0_before($val_parser, $min_indent))
|
||||
))
|
||||
.parse(arena, state)?;
|
||||
|
||||
@ -952,10 +1046,10 @@ macro_rules! record_field {
|
||||
macro_rules! record_without_update {
|
||||
($val_parser:expr, $min_indent:expr) => {
|
||||
collection!(
|
||||
char('{'),
|
||||
ascii_char('{'),
|
||||
loc!(record_field!($val_parser, $min_indent)),
|
||||
char(','),
|
||||
char('}'),
|
||||
ascii_char(','),
|
||||
ascii_char('}'),
|
||||
$min_indent
|
||||
)
|
||||
};
|
||||
@ -965,7 +1059,7 @@ macro_rules! record_without_update {
|
||||
macro_rules! record {
|
||||
($val_parser:expr, $min_indent:expr) => {
|
||||
skip_first!(
|
||||
$crate::parser::char('{'),
|
||||
$crate::parser::ascii_char('{'),
|
||||
and!(
|
||||
// You can optionally have an identifier followed by an '&' to
|
||||
// make this a record update, e.g. { Foo.user & username: "blah" }.
|
||||
@ -981,7 +1075,7 @@ macro_rules! record {
|
||||
)),
|
||||
$min_indent
|
||||
),
|
||||
$crate::parser::char('&')
|
||||
$crate::parser::ascii_char('&')
|
||||
)),
|
||||
loc!(skip_first!(
|
||||
// We specifically allow space characters inside here, so that
|
||||
@ -995,16 +1089,16 @@ macro_rules! record {
|
||||
// We could change the AST to add extra storage specifically to
|
||||
// support empty literals containing newlines or comments, but this
|
||||
// does not seem worth even the tiniest regression in compiler performance.
|
||||
zero_or_more!($crate::parser::char(' ')),
|
||||
zero_or_more!($crate::parser::ascii_char(' ')),
|
||||
skip_second!(
|
||||
$crate::parser::sep_by0(
|
||||
$crate::parser::char(','),
|
||||
$crate::parser::ascii_char(','),
|
||||
$crate::blankspace::space0_around(
|
||||
loc!(record_field!($val_parser, $min_indent)),
|
||||
$min_indent
|
||||
)
|
||||
),
|
||||
$crate::parser::char('}')
|
||||
$crate::parser::ascii_char('}')
|
||||
)
|
||||
))
|
||||
)
|
||||
@ -1067,3 +1161,10 @@ where
|
||||
{
|
||||
attempt!(attempting, parser)
|
||||
}
|
||||
|
||||
pub fn parse_utf8(bytes: &[u8]) -> Result<&str, FailReason> {
|
||||
match from_utf8(bytes) {
|
||||
Ok(string) => Ok(string),
|
||||
Err(_) => Err(FailReason::BadUtf8),
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,7 @@
|
||||
use crate::ast::Attempting;
|
||||
use crate::parser::{unexpected, unexpected_eof, ParseResult, Parser, State};
|
||||
use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use std::char;
|
||||
|
||||
pub enum StringLiteral<'a> {
|
||||
Line(&'a str),
|
||||
@ -11,14 +10,15 @@ pub enum StringLiteral<'a> {
|
||||
|
||||
pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
|
||||
move |arena: &'a Bump, state: State<'a>| {
|
||||
let mut chars = state.input.chars();
|
||||
let mut bytes = state.bytes.iter();
|
||||
|
||||
// String literals must start with a quote.
|
||||
// If this doesn't, it must not be a string literal!
|
||||
match chars.next() {
|
||||
Some('"') => (),
|
||||
Some(other_char) => {
|
||||
return Err(unexpected(other_char, 0, state, Attempting::StringLiteral));
|
||||
match bytes.next() {
|
||||
Some(&byte) => {
|
||||
if byte != b'"' {
|
||||
return Err(unexpected(0, state, Attempting::StringLiteral));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
return Err(unexpected_eof(0, Attempting::StringLiteral, state));
|
||||
@ -35,44 +35,49 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
|
||||
// Since we're keeping the entire raw string, all we need to track is
|
||||
// how many characters we've parsed. So far, that's 1 (the opening `"`).
|
||||
let mut parsed_chars = 1;
|
||||
let mut prev_ch = '"';
|
||||
let mut prev_byte = b'"';
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
while let Some(&byte) = bytes.next() {
|
||||
parsed_chars += 1;
|
||||
|
||||
// Potentially end the string (unless this is an escaped `"`!)
|
||||
if ch == '"' && prev_ch != '\\' {
|
||||
let string = if parsed_chars == 2 {
|
||||
if let Some('"') = chars.next() {
|
||||
// If the first three chars were all `"`, then this
|
||||
// literal begins with `"""` and is a block string.
|
||||
return parse_block_string(arena, state, &mut chars);
|
||||
} else {
|
||||
""
|
||||
if byte == b'"' && prev_byte != b'\\' {
|
||||
let (string, state) = if parsed_chars == 2 {
|
||||
match bytes.next() {
|
||||
Some(byte) if *byte == b'"' => {
|
||||
// If the first three chars were all `"`, then this
|
||||
// literal begins with `"""` and is a block string.
|
||||
return parse_block_string(arena, state, &mut bytes);
|
||||
}
|
||||
_ => ("", state.advance_without_indenting(2)?),
|
||||
}
|
||||
} else {
|
||||
// Start at 1 so we omit the opening `"`.
|
||||
// Subtract 1 from parsed_chars so we omit the closing `"`.
|
||||
&state.input[1..(parsed_chars - 1)]
|
||||
let string_bytes = &state.bytes[1..(parsed_chars - 1)];
|
||||
|
||||
match parse_utf8(string_bytes) {
|
||||
Ok(string) => (string, state.advance_without_indenting(parsed_chars)?),
|
||||
Err(reason) => {
|
||||
return state.fail(reason);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let next_state = state.advance_without_indenting(parsed_chars)?;
|
||||
|
||||
return Ok((StringLiteral::Line(string), next_state));
|
||||
} else if ch == '\n' {
|
||||
return Ok((StringLiteral::Line(string), state));
|
||||
} else if byte == b'\n' {
|
||||
// This is a single-line string, which cannot have newlines!
|
||||
// Treat this as an unclosed string literal, and consume
|
||||
// all remaining chars. This will mask all other errors, but
|
||||
// it should make it easiest to debug; the file will be a giant
|
||||
// error starting from where the open quote appeared.
|
||||
return Err(unexpected(
|
||||
'\n',
|
||||
state.input.len() - 1,
|
||||
state.bytes.len() - 1,
|
||||
state,
|
||||
Attempting::StringLiteral,
|
||||
));
|
||||
} else {
|
||||
prev_ch = ch;
|
||||
prev_byte = byte;
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,48 +93,64 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
|
||||
fn parse_block_string<'a, I>(
|
||||
arena: &'a Bump,
|
||||
state: State<'a>,
|
||||
chars: &mut I,
|
||||
bytes: &mut I,
|
||||
) -> ParseResult<'a, StringLiteral<'a>>
|
||||
where
|
||||
I: Iterator<Item = char>,
|
||||
I: Iterator<Item = &'a u8>,
|
||||
{
|
||||
// So far we have consumed the `"""` and that's it.
|
||||
let mut parsed_chars = 3;
|
||||
let mut prev_ch = '"';
|
||||
let mut prev_byte = b'"';
|
||||
let mut quotes_seen = 0;
|
||||
|
||||
// start at 3 to omit the opening `"`.
|
||||
let mut line_start = 3;
|
||||
|
||||
let mut lines = Vec::new_in(arena);
|
||||
let mut lines: Vec<'a, &'a str> = Vec::new_in(arena);
|
||||
|
||||
for ch in chars {
|
||||
for byte in bytes {
|
||||
parsed_chars += 1;
|
||||
|
||||
// Potentially end the string (unless this is an escaped `"`!)
|
||||
if ch == '"' && prev_ch != '\\' {
|
||||
if *byte == b'"' && prev_byte != b'\\' {
|
||||
if quotes_seen == 2 {
|
||||
// three consecutive qoutes, end string
|
||||
|
||||
// Subtract 3 from parsed_chars so we omit the closing `"`.
|
||||
let string = &state.input[line_start..(parsed_chars - 3)];
|
||||
lines.push(string);
|
||||
let line_bytes = &state.bytes[line_start..(parsed_chars - 3)];
|
||||
|
||||
let next_state = state.advance_without_indenting(parsed_chars)?;
|
||||
return match parse_utf8(line_bytes) {
|
||||
Ok(line) => {
|
||||
let state = state.advance_without_indenting(parsed_chars)?;
|
||||
|
||||
return Ok((StringLiteral::Block(arena.alloc(lines)), next_state));
|
||||
lines.push(line);
|
||||
|
||||
Ok((StringLiteral::Block(arena.alloc(lines)), state))
|
||||
}
|
||||
Err(reason) => state.fail(reason),
|
||||
};
|
||||
}
|
||||
quotes_seen += 1;
|
||||
} else if ch == '\n' {
|
||||
} else if *byte == b'\n' {
|
||||
// note this includes the newline
|
||||
let string = &state.input[line_start..parsed_chars];
|
||||
lines.push(string);
|
||||
quotes_seen = 0;
|
||||
line_start = parsed_chars;
|
||||
let line_bytes = &state.bytes[line_start..parsed_chars];
|
||||
|
||||
match parse_utf8(line_bytes) {
|
||||
Ok(line) => {
|
||||
lines.push(line);
|
||||
|
||||
quotes_seen = 0;
|
||||
line_start = parsed_chars;
|
||||
}
|
||||
Err(reason) => {
|
||||
return state.fail(reason);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
quotes_seen = 0;
|
||||
}
|
||||
prev_ch = ch;
|
||||
|
||||
prev_byte = *byte;
|
||||
}
|
||||
|
||||
// We ran out of characters before finding 3 closing quotes
|
||||
@ -137,6 +158,6 @@ where
|
||||
parsed_chars,
|
||||
// TODO custom BlockStringLiteral?
|
||||
Attempting::StringLiteral,
|
||||
state.clone(),
|
||||
state,
|
||||
))
|
||||
}
|
||||
|
@ -4,8 +4,8 @@ use crate::expr::{global_tag, private_tag};
|
||||
use crate::ident::join_module_parts;
|
||||
use crate::keyword;
|
||||
use crate::parser::{
|
||||
allocated, char, not, optional, string, unexpected, unexpected_eof, Either, ParseResult,
|
||||
Parser, State,
|
||||
allocated, ascii_char, ascii_string, not, optional, peek_utf8_char, unexpected, Either,
|
||||
ParseResult, Parser, State,
|
||||
};
|
||||
use bumpalo::collections::string::String;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
@ -22,10 +22,10 @@ macro_rules! tag_union {
|
||||
map!(
|
||||
and!(
|
||||
collection!(
|
||||
char('['),
|
||||
ascii_char('['),
|
||||
loc!(tag_type($min_indent)),
|
||||
char(','),
|
||||
char(']'),
|
||||
ascii_char(','),
|
||||
ascii_char(']'),
|
||||
$min_indent
|
||||
),
|
||||
optional(
|
||||
@ -61,7 +61,7 @@ pub fn term<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>>
|
||||
and!(
|
||||
space1(min_indent),
|
||||
skip_first!(
|
||||
string(keyword::AS),
|
||||
ascii_string(keyword::AS),
|
||||
space1_before(term(min_indent), min_indent)
|
||||
)
|
||||
)
|
||||
@ -89,7 +89,7 @@ pub fn term<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>>
|
||||
|
||||
/// The `*` type variable, e.g. in (List *) Wildcard,
|
||||
fn loc_wildcard<'a>() -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
|
||||
map!(loc!(char('*')), |loc_val: Located<()>| {
|
||||
map!(loc!(ascii_char('*')), |loc_val: Located<()>| {
|
||||
loc_val.map(|_| TypeAnnotation::Wildcard)
|
||||
})
|
||||
}
|
||||
@ -97,7 +97,7 @@ fn loc_wildcard<'a>() -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
|
||||
pub fn loc_applied_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
|
||||
skip_first!(
|
||||
// Once we hit an "as", stop parsing args
|
||||
not(string(keyword::AS)),
|
||||
not(ascii_string(keyword::AS)),
|
||||
one_of!(
|
||||
loc_wildcard(),
|
||||
loc_parenthetical_type(min_indent),
|
||||
@ -112,12 +112,12 @@ pub fn loc_applied_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnot
|
||||
#[inline(always)]
|
||||
fn loc_parenthetical_type<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>>> {
|
||||
between!(
|
||||
char('('),
|
||||
ascii_char('('),
|
||||
space0_around(
|
||||
move |arena, state| expression(min_indent).parse(arena, state),
|
||||
min_indent,
|
||||
),
|
||||
char(')')
|
||||
ascii_char(')')
|
||||
)
|
||||
}
|
||||
|
||||
@ -208,7 +208,7 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
|
||||
move |arena, state: State<'a>| {
|
||||
let (first, state) = space0_before(term(min_indent), min_indent).parse(arena, state)?;
|
||||
let (rest, state) = zero_or_more!(skip_first!(
|
||||
char(','),
|
||||
ascii_char(','),
|
||||
space0_around(term(min_indent), min_indent)
|
||||
))
|
||||
.parse(arena, state)?;
|
||||
@ -216,7 +216,7 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
|
||||
// TODO this space0 is dropped, so newlines just before the function arrow when there
|
||||
// is only one argument are not seen by the formatter. Can we do better?
|
||||
let (is_function, state) =
|
||||
optional(skip_first!(space0(min_indent), string("->"))).parse(arena, state)?;
|
||||
optional(skip_first!(space0(min_indent), ascii_string("->"))).parse(arena, state)?;
|
||||
|
||||
if is_function.is_some() {
|
||||
let (return_type, state) =
|
||||
@ -263,67 +263,70 @@ fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located<TypeAnnotation<'a>
|
||||
|
||||
fn parse_concrete_type<'a>(
|
||||
arena: &'a Bump,
|
||||
state: State<'a>,
|
||||
mut state: State<'a>,
|
||||
) -> ParseResult<'a, TypeAnnotation<'a>> {
|
||||
let mut chars = state.input.chars();
|
||||
let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.)
|
||||
let mut parts: Vec<&'a str> = Vec::new_in(arena);
|
||||
|
||||
// Qualified types must start with a capitalized letter.
|
||||
match chars.next() {
|
||||
Some(ch) => {
|
||||
if ch.is_alphabetic() && ch.is_uppercase() {
|
||||
part_buf.push(ch);
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((first_letter, bytes_parsed)) => {
|
||||
if first_letter.is_alphabetic() && first_letter.is_uppercase() {
|
||||
part_buf.push(first_letter);
|
||||
} else {
|
||||
return Err(unexpected(ch, 0, state, Attempting::ConcreteType));
|
||||
return Err(unexpected(0, state, Attempting::ConcreteType));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
return Err(unexpected_eof(0, Attempting::ConcreteType, state));
|
||||
}
|
||||
};
|
||||
|
||||
let mut chars_parsed = 1;
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
}
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
|
||||
let mut next_char = None;
|
||||
|
||||
while let Some(ch) = chars.next() {
|
||||
// After the first character, only these are allowed:
|
||||
//
|
||||
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
|
||||
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||
// * A dot ('.')
|
||||
if ch.is_alphabetic() {
|
||||
if part_buf.is_empty() && !ch.is_uppercase() {
|
||||
// Each part must begin with a capital letter.
|
||||
return malformed(Some(ch), arena, state, &mut chars, parts);
|
||||
while !state.bytes.is_empty() {
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((ch, bytes_parsed)) => {
|
||||
// After the first character, only these are allowed:
|
||||
//
|
||||
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
|
||||
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||
// * A dot ('.')
|
||||
if ch.is_alphabetic() {
|
||||
if part_buf.is_empty() && !ch.is_uppercase() {
|
||||
// Each part must begin with a capital letter.
|
||||
return malformed(Some(ch), arena, state, parts);
|
||||
}
|
||||
|
||||
part_buf.push(ch);
|
||||
} else if ch.is_ascii_digit() {
|
||||
// Parts may not start with numbers!
|
||||
if part_buf.is_empty() {
|
||||
return malformed(Some(ch), arena, state, parts);
|
||||
}
|
||||
|
||||
part_buf.push(ch);
|
||||
} else if ch == '.' {
|
||||
// Having two consecutive dots is an error.
|
||||
if part_buf.is_empty() {
|
||||
return malformed(Some(ch), arena, state, parts);
|
||||
}
|
||||
|
||||
parts.push(part_buf.into_bump_str());
|
||||
|
||||
// Now that we've recorded the contents of the current buffer, reset it.
|
||||
part_buf = String::new_in(arena);
|
||||
} else {
|
||||
// This must be the end of the type. We're done!
|
||||
next_char = Some(ch);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
}
|
||||
|
||||
part_buf.push(ch);
|
||||
} else if ch.is_ascii_digit() {
|
||||
// Parts may not start with numbers!
|
||||
if part_buf.is_empty() {
|
||||
return malformed(Some(ch), arena, state, &mut chars, parts);
|
||||
}
|
||||
|
||||
part_buf.push(ch);
|
||||
} else if ch == '.' {
|
||||
// Having two consecutive dots is an error.
|
||||
if part_buf.is_empty() {
|
||||
return malformed(Some(ch), arena, state, &mut chars, parts);
|
||||
}
|
||||
|
||||
parts.push(part_buf.into_bump_str());
|
||||
|
||||
// Now that we've recorded the contents of the current buffer, reset it.
|
||||
part_buf = String::new_in(arena);
|
||||
} else {
|
||||
// This must be the end of the type. We're done!
|
||||
next_char = Some(ch);
|
||||
|
||||
break;
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
|
||||
chars_parsed += 1;
|
||||
}
|
||||
|
||||
if part_buf.is_empty() {
|
||||
@ -333,23 +336,16 @@ fn parse_concrete_type<'a>(
|
||||
//
|
||||
// If we made it this far and don't have a next_char, then necessarily
|
||||
// we have consumed a '.' char previously.
|
||||
return malformed(
|
||||
next_char.or_else(|| Some('.')),
|
||||
arena,
|
||||
state,
|
||||
&mut chars,
|
||||
parts,
|
||||
);
|
||||
return malformed(next_char.or_else(|| Some('.')), arena, state, parts);
|
||||
}
|
||||
|
||||
if part_buf.is_empty() {
|
||||
// We had neither capitalized nor noncapitalized parts,
|
||||
// yet we made it this far. The only explanation is that this was
|
||||
// a stray '.' drifting through the cosmos.
|
||||
return Err(unexpected('.', 1, state, Attempting::Identifier));
|
||||
return Err(unexpected(1, state, Attempting::Identifier));
|
||||
}
|
||||
|
||||
let state = state.advance_without_indenting(chars_parsed)?;
|
||||
let answer = TypeAnnotation::Apply(
|
||||
join_module_parts(arena, parts.into_bump_slice()),
|
||||
part_buf.into_bump_str(),
|
||||
@ -361,58 +357,55 @@ fn parse_concrete_type<'a>(
|
||||
|
||||
fn parse_type_variable<'a>(
|
||||
arena: &'a Bump,
|
||||
state: State<'a>,
|
||||
mut state: State<'a>,
|
||||
) -> ParseResult<'a, TypeAnnotation<'a>> {
|
||||
let mut chars = state.input.chars();
|
||||
let mut buf = String::new_in(arena);
|
||||
|
||||
// Type variables must start with a lowercase letter.
|
||||
match chars.next() {
|
||||
Some(ch) => {
|
||||
if ch.is_alphabetic() && ch.is_lowercase() {
|
||||
buf.push(ch);
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((first_letter, bytes_parsed)) => {
|
||||
// Type variables must start with a lowercase letter.
|
||||
if first_letter.is_alphabetic() && first_letter.is_lowercase() {
|
||||
buf.push(first_letter);
|
||||
} else {
|
||||
return Err(unexpected(ch, 0, state, Attempting::TypeVariable));
|
||||
return Err(unexpected(0, state, Attempting::TypeVariable));
|
||||
}
|
||||
}
|
||||
None => {
|
||||
return Err(unexpected_eof(0, Attempting::TypeVariable, state));
|
||||
}
|
||||
};
|
||||
|
||||
let mut chars_parsed = 1;
|
||||
|
||||
for ch in chars {
|
||||
// After the first character, only these are allowed:
|
||||
//
|
||||
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
|
||||
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||
if ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
buf.push(ch);
|
||||
} else {
|
||||
// This must be the end of the type. We're done!
|
||||
break;
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
}
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
|
||||
while !state.bytes.is_empty() {
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((ch, bytes_parsed)) => {
|
||||
// After the first character, only these are allowed:
|
||||
//
|
||||
// * Unicode alphabetic chars - you might name a variable `鹏` if that's clear to your readers
|
||||
// * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric()
|
||||
if ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
buf.push(ch);
|
||||
} else {
|
||||
// This must be the end of the type. We're done!
|
||||
break;
|
||||
}
|
||||
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
}
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
|
||||
chars_parsed += 1;
|
||||
}
|
||||
|
||||
let state = state.advance_without_indenting(chars_parsed)?;
|
||||
let answer = TypeAnnotation::BoundVariable(buf.into_bump_str());
|
||||
|
||||
Ok((answer, state))
|
||||
}
|
||||
|
||||
fn malformed<'a, I>(
|
||||
fn malformed<'a>(
|
||||
opt_bad_char: Option<char>,
|
||||
arena: &'a Bump,
|
||||
state: State<'a>,
|
||||
chars: &mut I,
|
||||
mut state: State<'a>,
|
||||
parts: Vec<&'a str>,
|
||||
) -> ParseResult<'a, TypeAnnotation<'a>>
|
||||
where
|
||||
I: Iterator<Item = char>,
|
||||
{
|
||||
) -> ParseResult<'a, TypeAnnotation<'a>> {
|
||||
// Reconstruct the original string that we've been parsing.
|
||||
let mut full_string = String::new_in(arena);
|
||||
|
||||
@ -423,20 +416,25 @@ where
|
||||
}
|
||||
|
||||
// Consume the remaining chars in the identifier.
|
||||
for ch in chars {
|
||||
// We can't use ch.is_alphanumeric() here because that passes for
|
||||
// things that are "numeric" but not ASCII digits, like `¾`
|
||||
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
full_string.push(ch);
|
||||
} else {
|
||||
break;
|
||||
while !state.bytes.is_empty() {
|
||||
match peek_utf8_char(&state) {
|
||||
Ok((ch, bytes_parsed)) => {
|
||||
// We can't use ch.is_alphanumeric() here because that passes for
|
||||
// things that are "numeric" but not ASCII digits, like `¾`
|
||||
if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() {
|
||||
full_string.push(ch);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
state = state.advance_without_indenting(bytes_parsed)?;
|
||||
}
|
||||
Err(reason) => return state.fail(reason),
|
||||
}
|
||||
}
|
||||
|
||||
let chars_parsed = full_string.len();
|
||||
|
||||
Ok((
|
||||
TypeAnnotation::Malformed(full_string.into_bump_str()),
|
||||
state.advance_without_indenting(chars_parsed)?,
|
||||
state,
|
||||
))
|
||||
}
|
||||
|
@ -13,7 +13,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let state = State::new(input.as_bytes(), Attempting::Module);
|
||||
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
|
@ -918,17 +918,12 @@ mod test_parse {
|
||||
let arena = Bump::new();
|
||||
let arg = arena.alloc(Located::new(0, 0, 5, 6, Num("1")));
|
||||
let args = bumpalo::vec![in &arena; &*arg];
|
||||
let expr = Var {
|
||||
module_name: "",
|
||||
ident: "whee",
|
||||
};
|
||||
let expected = Expr::Apply(
|
||||
arena.alloc(Located::new(
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
4,
|
||||
Var {
|
||||
module_name: "",
|
||||
ident: "whee",
|
||||
},
|
||||
)),
|
||||
arena.alloc(Located::new(0, 0, 0, 4, expr)),
|
||||
args,
|
||||
CalledVia::Space,
|
||||
);
|
||||
@ -1040,16 +1035,11 @@ mod test_parse {
|
||||
fn unary_negation() {
|
||||
let arena = Bump::new();
|
||||
let loc_op = Located::new(0, 0, 0, 1, UnaryOp::Negate);
|
||||
let loc_arg1_expr = Located::new(
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
4,
|
||||
Var {
|
||||
module_name: "",
|
||||
ident: "foo",
|
||||
},
|
||||
);
|
||||
let arg1_expr = Var {
|
||||
module_name: "",
|
||||
ident: "foo",
|
||||
};
|
||||
let loc_arg1_expr = Located::new(0, 0, 1, 4, arg1_expr);
|
||||
let expected = UnaryOp(arena.alloc(loc_arg1_expr), loc_op);
|
||||
let actual = parse_with(&arena, "-foo");
|
||||
|
||||
@ -1060,16 +1050,11 @@ mod test_parse {
|
||||
fn unary_not() {
|
||||
let arena = Bump::new();
|
||||
let loc_op = Located::new(0, 0, 0, 1, UnaryOp::Not);
|
||||
let loc_arg1_expr = Located::new(
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
5,
|
||||
Var {
|
||||
module_name: "",
|
||||
ident: "blah",
|
||||
},
|
||||
);
|
||||
let arg1_expr = Var {
|
||||
module_name: "",
|
||||
ident: "blah",
|
||||
};
|
||||
let loc_arg1_expr = Located::new(0, 0, 1, 5, arg1_expr);
|
||||
let expected = UnaryOp(arena.alloc(loc_arg1_expr), loc_op);
|
||||
let actual = parse_with(&arena, "!blah");
|
||||
|
||||
@ -2092,7 +2077,7 @@ mod test_parse {
|
||||
"#
|
||||
);
|
||||
let actual = interface_header()
|
||||
.parse(&arena, State::new(&src, Attempting::Module))
|
||||
.parse(&arena, State::new(src.as_bytes(), Attempting::Module))
|
||||
.map(|tuple| tuple.0);
|
||||
|
||||
assert_eq!(Ok(expected), actual);
|
||||
@ -2121,7 +2106,7 @@ mod test_parse {
|
||||
"#
|
||||
);
|
||||
let actual = interface_header()
|
||||
.parse(&arena, State::new(&src, Attempting::Module))
|
||||
.parse(&arena, State::new(src.as_bytes(), Attempting::Module))
|
||||
.map(|tuple| tuple.0);
|
||||
|
||||
assert_eq!(Ok(expected), actual);
|
||||
@ -2174,7 +2159,7 @@ mod test_parse {
|
||||
"#
|
||||
);
|
||||
let actual = module_defs()
|
||||
.parse(&arena, State::new(&src, Attempting::Module))
|
||||
.parse(&arena, State::new(src.as_bytes(), Attempting::Module))
|
||||
.map(|tuple| tuple.0);
|
||||
|
||||
assert_eq!(Ok(expected), actual);
|
||||
|
@ -91,7 +91,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let state = State::new(input.as_bytes(), Attempting::Module);
|
||||
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
|
@ -93,7 +93,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let state = State::new(input.as_bytes(), Attempting::Module);
|
||||
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
|
@ -93,7 +93,7 @@ pub fn parse_with<'a>(arena: &'a Bump, input: &'a str) -> Result<ast::Expr<'a>,
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn parse_loc_with<'a>(arena: &'a Bump, input: &'a str) -> Result<Located<ast::Expr<'a>>, Fail> {
|
||||
let state = State::new(&input, Attempting::Module);
|
||||
let state = State::new(input.as_bytes(), Attempting::Module);
|
||||
let parser = space0_before(loc(roc_parse::expr::expr(0)), 0);
|
||||
let answer = parser.parse(&arena, state);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user