mirror of
https://github.com/roc-lang/roc.git
synced 2024-09-21 07:49:17 +03:00
extract state
This commit is contained in:
parent
33277144d6
commit
49818343dd
@ -17,7 +17,8 @@ use roc_parse::header::{
|
||||
use roc_parse::{
|
||||
ast::{Def, Module},
|
||||
module::{self, module_defs},
|
||||
parser::{Parser, State, SyntaxError},
|
||||
parser::{Parser, SyntaxError},
|
||||
state::State,
|
||||
};
|
||||
use roc_region::all::Located;
|
||||
use roc_reporting::{internal_error, user_error};
|
||||
|
@ -92,7 +92,7 @@ impl Validator for InputValidator {
|
||||
Ok(ValidationResult::Incomplete)
|
||||
} else {
|
||||
let arena = bumpalo::Bump::new();
|
||||
let state = roc_parse::parser::State::new(ctx.input().trim().as_bytes());
|
||||
let state = roc_parse::state::State::new(ctx.input().trim().as_bytes());
|
||||
|
||||
match roc_parse::expr::parse_loc_expr(0, &arena, state) {
|
||||
// Special case some syntax errors to allow for multi-line inputs
|
||||
|
@ -11,7 +11,8 @@ mod test_fmt {
|
||||
use roc_fmt::module::fmt_module;
|
||||
use roc_fmt::Buf;
|
||||
use roc_parse::module::{self, module_defs};
|
||||
use roc_parse::parser::{Parser, State};
|
||||
use roc_parse::parser::Parser;
|
||||
use roc_parse::state::State;
|
||||
use roc_test_utils::assert_multiline_str_eq;
|
||||
|
||||
// Not intended to be used directly in tests; please use expr_formats_to or expr_formats_same
|
||||
|
@ -28,7 +28,7 @@ use roc_parse::header::{
|
||||
ExposedName, ImportsEntry, PackageEntry, PackageOrPath, PlatformHeader, To, TypedIdent,
|
||||
};
|
||||
use roc_parse::module::module_defs;
|
||||
use roc_parse::parser::{self, ParseProblem, Parser, SyntaxError};
|
||||
use roc_parse::parser::{ParseProblem, Parser, SyntaxError};
|
||||
use roc_region::all::{Located, Region};
|
||||
use roc_solve::module::SolvedModule;
|
||||
use roc_solve::solve;
|
||||
@ -674,7 +674,7 @@ struct ModuleHeader<'a> {
|
||||
exposes: Vec<Symbol>,
|
||||
exposed_imports: MutMap<Ident, (Symbol, Region)>,
|
||||
header_src: &'a str,
|
||||
parse_state: roc_parse::parser::State<'a>,
|
||||
parse_state: roc_parse::state::State<'a>,
|
||||
module_timing: ModuleTiming,
|
||||
}
|
||||
|
||||
@ -2356,7 +2356,7 @@ fn load_pkg_config<'a>(
|
||||
Ok(bytes_vec) => {
|
||||
let parse_start = SystemTime::now();
|
||||
let bytes = arena.alloc(bytes_vec);
|
||||
let parse_state = parser::State::new(bytes);
|
||||
let parse_state = roc_parse::state::State::new(bytes);
|
||||
let parsed = roc_parse::module::parse_header(arena, parse_state);
|
||||
let parse_header_duration = parse_start.elapsed().unwrap();
|
||||
|
||||
@ -2384,7 +2384,7 @@ fn load_pkg_config<'a>(
|
||||
)))
|
||||
}
|
||||
Ok((ast::Module::Platform { header }, parser_state)) => {
|
||||
let delta = bytes.len() - parser_state.bytes.len();
|
||||
let delta = bytes.len() - parser_state.bytes().len();
|
||||
let chomped = &bytes[..delta];
|
||||
let header_src = unsafe { std::str::from_utf8_unchecked(chomped) };
|
||||
|
||||
@ -2526,7 +2526,7 @@ fn parse_header<'a>(
|
||||
start_time: SystemTime,
|
||||
) -> Result<(ModuleId, Msg<'a>), LoadingProblem<'a>> {
|
||||
let parse_start = SystemTime::now();
|
||||
let parse_state = parser::State::new(src_bytes);
|
||||
let parse_state = roc_parse::state::State::new(src_bytes);
|
||||
let parsed = roc_parse::module::parse_header(arena, parse_state);
|
||||
let parse_header_duration = parse_start.elapsed().unwrap();
|
||||
|
||||
@ -2539,7 +2539,7 @@ fn parse_header<'a>(
|
||||
match parsed {
|
||||
Ok((ast::Module::Interface { header }, parse_state)) => {
|
||||
let header_src = unsafe {
|
||||
let chomped = src_bytes.len() - parse_state.bytes.len();
|
||||
let chomped = src_bytes.len() - parse_state.bytes().len();
|
||||
std::str::from_utf8_unchecked(&src_bytes[..chomped])
|
||||
};
|
||||
|
||||
@ -2571,7 +2571,7 @@ fn parse_header<'a>(
|
||||
pkg_config_dir.pop();
|
||||
|
||||
let header_src = unsafe {
|
||||
let chomped = src_bytes.len() - parse_state.bytes.len();
|
||||
let chomped = src_bytes.len() - parse_state.bytes().len();
|
||||
std::str::from_utf8_unchecked(&src_bytes[..chomped])
|
||||
};
|
||||
|
||||
@ -2767,7 +2767,7 @@ struct HeaderInfo<'a> {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn send_header<'a>(
|
||||
info: HeaderInfo<'a>,
|
||||
parse_state: parser::State<'a>,
|
||||
parse_state: roc_parse::state::State<'a>,
|
||||
module_ids: Arc<Mutex<PackageModuleIds<'a>>>,
|
||||
ident_ids_by_module: Arc<Mutex<MutMap<ModuleId, IdentIds>>>,
|
||||
module_timing: ModuleTiming,
|
||||
@ -2980,7 +2980,7 @@ struct PlatformHeaderInfo<'a> {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn send_header_two<'a>(
|
||||
info: PlatformHeaderInfo<'a>,
|
||||
parse_state: parser::State<'a>,
|
||||
parse_state: roc_parse::state::State<'a>,
|
||||
module_ids: Arc<Mutex<PackageModuleIds<'a>>>,
|
||||
ident_ids_by_module: Arc<Mutex<MutMap<ModuleId, IdentIds>>>,
|
||||
module_timing: ModuleTiming,
|
||||
@ -3327,7 +3327,7 @@ fn fabricate_pkg_config_module<'a>(
|
||||
shorthand: &'a str,
|
||||
app_module_id: ModuleId,
|
||||
filename: PathBuf,
|
||||
parse_state: parser::State<'a>,
|
||||
parse_state: roc_parse::state::State<'a>,
|
||||
module_ids: Arc<Mutex<PackageModuleIds<'a>>>,
|
||||
ident_ids_by_module: Arc<Mutex<MutMap<ModuleId, IdentIds>>>,
|
||||
header: &PlatformHeader<'a>,
|
||||
@ -3724,7 +3724,7 @@ where
|
||||
fn parse<'a>(arena: &'a Bump, header: ModuleHeader<'a>) -> Result<Msg<'a>, LoadingProblem<'a>> {
|
||||
let mut module_timing = header.module_timing;
|
||||
let parse_start = SystemTime::now();
|
||||
let source = header.parse_state.bytes;
|
||||
let source = header.parse_state.bytes();
|
||||
let parse_state = header.parse_state;
|
||||
let parsed_defs = match module_defs().parse(arena, parse_state) {
|
||||
Ok((_, success, _state)) => success,
|
||||
|
@ -1,8 +1,7 @@
|
||||
use crate::ast::CommentOrNewline;
|
||||
use crate::ast::Spaceable;
|
||||
use crate::parser::{
|
||||
self, and, backtrackable, BadInputError, Col, Parser, Progress::*, Row, State,
|
||||
};
|
||||
use crate::parser::{self, and, backtrackable, BadInputError, Col, Parser, Progress::*, Row};
|
||||
use crate::state::State;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use roc_region::all::Located;
|
||||
@ -193,17 +192,25 @@ where
|
||||
move |arena, mut state: State<'a>| {
|
||||
let comments_and_newlines = Vec::new_in(arena);
|
||||
|
||||
match eat_spaces(state.bytes, state.line, state.column, comments_and_newlines) {
|
||||
match eat_spaces(
|
||||
state.bytes(),
|
||||
state.line,
|
||||
state.column,
|
||||
comments_and_newlines,
|
||||
) {
|
||||
HasTab { row, col } => {
|
||||
// there was a tab character
|
||||
let mut state = state;
|
||||
state.line = row;
|
||||
state.column = col;
|
||||
// TODO: it _seems_ like if we're changing the line/column, we should also be
|
||||
// advancing the state by the corresponding number of bytes.
|
||||
// Not doing this is likely a bug!
|
||||
// state = state.advance(<something>);
|
||||
Err((
|
||||
MadeProgress,
|
||||
space_problem(BadInputError::HasTab, row, col),
|
||||
State {
|
||||
line: row,
|
||||
column: col,
|
||||
..state
|
||||
},
|
||||
state,
|
||||
))
|
||||
}
|
||||
Good {
|
||||
@ -212,7 +219,7 @@ where
|
||||
bytes,
|
||||
comments_and_newlines,
|
||||
} => {
|
||||
if bytes == state.bytes {
|
||||
if bytes == state.bytes() {
|
||||
Ok((NoProgress, &[] as &[_], state))
|
||||
} else if state.line != row {
|
||||
// we parsed at least one newline
|
||||
@ -222,7 +229,7 @@ where
|
||||
if col >= min_indent {
|
||||
state.line = row;
|
||||
state.column = col;
|
||||
state.bytes = bytes;
|
||||
state = state.advance(state.bytes().len() - bytes.len());
|
||||
|
||||
Ok((MadeProgress, comments_and_newlines.into_bump_slice(), state))
|
||||
} else {
|
||||
@ -234,7 +241,7 @@ where
|
||||
}
|
||||
} else {
|
||||
state.column = col;
|
||||
state.bytes = bytes;
|
||||
state = state.advance(state.bytes().len() - bytes.len());
|
||||
|
||||
Ok((MadeProgress, comments_and_newlines.into_bump_slice(), state))
|
||||
}
|
||||
|
@ -7,9 +7,10 @@ use crate::keyword;
|
||||
use crate::parser::{
|
||||
self, backtrackable, optional, sep_by1, sep_by1_e, specialize, specialize_ref, then,
|
||||
trailing_sep_by0, word1, word2, EExpect, EExpr, EIf, EInParens, ELambda, EList, ENumber,
|
||||
EPattern, ERecord, EString, EType, EWhen, Either, ParseResult, Parser, State,
|
||||
EPattern, ERecord, EString, EType, EWhen, Either, ParseResult, Parser,
|
||||
};
|
||||
use crate::pattern::loc_closure_param;
|
||||
use crate::state::State;
|
||||
use crate::type_annotation;
|
||||
use bumpalo::collections::Vec;
|
||||
use bumpalo::Bump;
|
||||
@ -304,22 +305,16 @@ fn unary_negate<'a>() -> impl Parser<'a, (), EExpr<'a>> {
|
||||
// - it is preceded by whitespace (spaces, newlines, comments)
|
||||
// - it is not followed by whitespace
|
||||
let followed_by_whitespace = state
|
||||
.bytes
|
||||
.bytes()
|
||||
.get(1)
|
||||
.map(|c| c.is_ascii_whitespace() || *c == b'#')
|
||||
.unwrap_or(false);
|
||||
|
||||
if state.bytes.starts_with(b"-") && !followed_by_whitespace {
|
||||
if state.bytes().starts_with(b"-") && !followed_by_whitespace {
|
||||
// the negate is only unary if it is not followed by whitespace
|
||||
Ok((
|
||||
MadeProgress,
|
||||
(),
|
||||
State {
|
||||
bytes: &state.bytes[1..],
|
||||
column: state.column + 1,
|
||||
..state
|
||||
},
|
||||
))
|
||||
let mut state = state.advance(1);
|
||||
state.column += 1;
|
||||
Ok((MadeProgress, (), state))
|
||||
} else {
|
||||
// this is not a negated expression
|
||||
Err((NoProgress, EExpr::UnaryNot(state.line, state.column), state))
|
||||
@ -515,7 +510,7 @@ fn numeric_negate_expression<'a, T>(
|
||||
expr: Located<Expr<'a>>,
|
||||
spaces: &'a [CommentOrNewline<'a>],
|
||||
) -> Located<Expr<'a>> {
|
||||
debug_assert_eq!(state.bytes.get(0), Some(&b'-'));
|
||||
debug_assert_eq!(state.bytes().get(0), Some(&b'-'));
|
||||
// for overflow reasons, we must make the unary minus part of the number literal.
|
||||
let mut region = expr.region;
|
||||
region.start_col -= 1;
|
||||
@ -523,13 +518,13 @@ fn numeric_negate_expression<'a, T>(
|
||||
let new_expr = match &expr.value {
|
||||
Expr::Num(string) => {
|
||||
let new_string =
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes[..string.len() + 1]) };
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
|
||||
|
||||
Expr::Num(new_string)
|
||||
}
|
||||
Expr::Float(string) => {
|
||||
let new_string =
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes[..string.len() + 1]) };
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
|
||||
|
||||
Expr::Float(new_string)
|
||||
}
|
||||
@ -1283,8 +1278,8 @@ fn parse_expr_end<'a>(
|
||||
}
|
||||
Err((NoProgress, _, mut state)) => {
|
||||
// try multi-backpassing
|
||||
if options.accept_multi_backpassing && state.bytes.starts_with(b",") {
|
||||
state.bytes = &state.bytes[1..];
|
||||
if options.accept_multi_backpassing && state.bytes().starts_with(b",") {
|
||||
state = state.advance(1);
|
||||
state.column += 1;
|
||||
|
||||
let (_, mut patterns, state) = specialize_ref(
|
||||
@ -1344,7 +1339,7 @@ fn parse_expr_end<'a>(
|
||||
Ok((MadeProgress, ret, state))
|
||||
}
|
||||
}
|
||||
} else if options.check_for_arrow && state.bytes.starts_with(b"->") {
|
||||
} else if options.check_for_arrow && state.bytes().starts_with(b"->") {
|
||||
Err((
|
||||
MadeProgress,
|
||||
EExpr::BadOperator(&[b'-', b'>'], state.line, state.column),
|
||||
@ -1755,7 +1750,7 @@ mod when {
|
||||
}
|
||||
);
|
||||
|
||||
while !state.bytes.is_empty() {
|
||||
while !state.bytes().is_empty() {
|
||||
match branch_parser.parse(arena, state) {
|
||||
Ok((_, next_output, next_state)) => {
|
||||
state = next_state;
|
||||
@ -1773,14 +1768,10 @@ mod when {
|
||||
}
|
||||
}
|
||||
|
||||
Ok((
|
||||
MadeProgress,
|
||||
branches,
|
||||
State {
|
||||
indent_col: when_indent,
|
||||
..state
|
||||
},
|
||||
))
|
||||
let mut state = state;
|
||||
state.indent_col = when_indent;
|
||||
|
||||
Ok((MadeProgress, branches, state))
|
||||
}
|
||||
}
|
||||
|
||||
@ -2396,12 +2387,12 @@ where
|
||||
G: Fn(&'a [u8], Row, Col) -> E,
|
||||
E: 'a,
|
||||
{
|
||||
let chomped = chomp_ops(state.bytes);
|
||||
let chomped = chomp_ops(state.bytes());
|
||||
|
||||
macro_rules! good {
|
||||
($op:expr, $width:expr) => {{
|
||||
state.column += $width;
|
||||
state.bytes = &state.bytes[$width..];
|
||||
state = state.advance($width);
|
||||
|
||||
Ok((MadeProgress, $op, state))
|
||||
}};
|
||||
@ -2416,7 +2407,7 @@ where
|
||||
match chomped {
|
||||
0 => Err((NoProgress, to_expectation(state.line, state.column), state)),
|
||||
1 => {
|
||||
let op = state.bytes[0];
|
||||
let op = state.bytes()[0];
|
||||
match op {
|
||||
b'+' => good!(BinOp::Plus, 1),
|
||||
b'-' => good!(BinOp::Minus, 1),
|
||||
@ -2432,12 +2423,12 @@ where
|
||||
}
|
||||
b'=' => good!(BinOp::Assignment, 1),
|
||||
b':' => good!(BinOp::HasType, 1),
|
||||
_ => bad_made_progress!(&state.bytes[0..1]),
|
||||
_ => bad_made_progress!(&state.bytes()[0..1]),
|
||||
}
|
||||
}
|
||||
2 => {
|
||||
let op0 = state.bytes[0];
|
||||
let op1 = state.bytes[1];
|
||||
let op0 = state.bytes()[0];
|
||||
let op1 = state.bytes()[1];
|
||||
|
||||
match (op0, op1) {
|
||||
(b'|', b'>') => good!(BinOp::Pizza, 2),
|
||||
@ -2454,10 +2445,10 @@ where
|
||||
Err((NoProgress, to_error(b"->", state.line, state.column), state))
|
||||
}
|
||||
(b'<', b'-') => good!(BinOp::Backpassing, 2),
|
||||
_ => bad_made_progress!(&state.bytes[0..2]),
|
||||
_ => bad_made_progress!(&state.bytes()[0..2]),
|
||||
}
|
||||
}
|
||||
_ => bad_made_progress!(&state.bytes[0..chomped]),
|
||||
_ => bad_made_progress!(&state.bytes()[0..chomped]),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2,9 +2,8 @@ use crate::ast::{Collection, CommentOrNewline, Spaced, StrLiteral, TypeAnnotatio
|
||||
use crate::blankspace::space0_e;
|
||||
use crate::ident::lowercase_ident;
|
||||
use crate::parser::Progress::{self, *};
|
||||
use crate::parser::{
|
||||
specialize, word1, EPackageEntry, EPackageName, EPackageOrPath, Parser, State,
|
||||
};
|
||||
use crate::parser::{specialize, word1, EPackageEntry, EPackageName, EPackageOrPath, Parser};
|
||||
use crate::state::State;
|
||||
use crate::string_literal;
|
||||
use bumpalo::collections::Vec;
|
||||
use roc_region::all::Loc;
|
||||
@ -282,8 +281,8 @@ where
|
||||
T: 'a,
|
||||
{
|
||||
|_, mut state: State<'a>| {
|
||||
let mut chomped = 0;
|
||||
let mut it = state.bytes.iter();
|
||||
let mut chomped = 0usize;
|
||||
let mut it = state.bytes().iter();
|
||||
|
||||
while let Some(b' ') = it.next() {
|
||||
chomped += 1;
|
||||
@ -292,8 +291,8 @@ where
|
||||
if chomped == 0 {
|
||||
Ok((NoProgress, (), state))
|
||||
} else {
|
||||
state.column += chomped;
|
||||
state.bytes = it.as_slice();
|
||||
state.column += chomped as u16;
|
||||
state = state.advance(chomped);
|
||||
|
||||
Ok((MadeProgress, (), state))
|
||||
}
|
||||
@ -316,7 +315,7 @@ pub fn package_name<'a>() -> impl Parser<'a, PackageName<'a>, EPackageName> {
|
||||
// They cannot contain underscores or other special characters.
|
||||
// They must be ASCII.
|
||||
|
||||
|_, mut state: State<'a>| match chomp_package_part(state.bytes) {
|
||||
|_, mut state: State<'a>| match chomp_package_part(state.bytes()) {
|
||||
Err(progress) => Err((
|
||||
progress,
|
||||
EPackageName::Account(state.line, state.column),
|
||||
@ -324,9 +323,9 @@ pub fn package_name<'a>() -> impl Parser<'a, PackageName<'a>, EPackageName> {
|
||||
)),
|
||||
Ok(account) => {
|
||||
let mut chomped = account.len();
|
||||
if let Ok(('/', width)) = char::from_utf8_slice_start(&state.bytes[chomped..]) {
|
||||
if let Ok(('/', width)) = char::from_utf8_slice_start(&state.bytes()[chomped..]) {
|
||||
chomped += width;
|
||||
match chomp_package_part(&state.bytes[chomped..]) {
|
||||
match chomp_package_part(&state.bytes()[chomped..]) {
|
||||
Err(progress) => Err((
|
||||
progress,
|
||||
EPackageName::Pkg(state.line, state.column + chomped as u16),
|
||||
@ -336,7 +335,7 @@ pub fn package_name<'a>() -> impl Parser<'a, PackageName<'a>, EPackageName> {
|
||||
chomped += pkg.len();
|
||||
|
||||
state.column += chomped as u16;
|
||||
state.bytes = &state.bytes[chomped..];
|
||||
state = state.advance(chomped);
|
||||
|
||||
let value = PackageName { account, pkg };
|
||||
Ok((MadeProgress, value, state))
|
||||
|
@ -1,5 +1,6 @@
|
||||
use crate::parser::Progress::{self, *};
|
||||
use crate::parser::{BadInputError, Col, EExpr, ParseResult, Parser, Row, State};
|
||||
use crate::parser::{BadInputError, Col, EExpr, ParseResult, Parser, Row};
|
||||
use crate::state::State;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
|
||||
@ -59,7 +60,7 @@ impl<'a> Ident<'a> {
|
||||
/// * A record field, e.g. "email" in `.email` or in `email:`
|
||||
/// * A named pattern match, e.g. "foo" in `foo =` or `foo ->` or `\foo ->`
|
||||
pub fn lowercase_ident<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
move |_, state: State<'a>| match chomp_lowercase_part(state.bytes) {
|
||||
move |_, state: State<'a>| match chomp_lowercase_part(state.bytes()) {
|
||||
Err(progress) => Err((progress, (), state)),
|
||||
Ok(ident) => {
|
||||
if crate::keyword::KEYWORDS.iter().any(|kw| &ident == kw) {
|
||||
@ -77,8 +78,8 @@ pub fn lowercase_ident<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
|
||||
pub fn tag_name<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
move |arena, state: State<'a>| {
|
||||
if state.bytes.starts_with(b"@") {
|
||||
match chomp_private_tag(state.bytes, state.line, state.column) {
|
||||
if state.bytes().starts_with(b"@") {
|
||||
match chomp_private_tag(state.bytes(), state.line, state.column) {
|
||||
Err(BadIdent::Start(_, _)) => Err((NoProgress, (), state)),
|
||||
Err(_) => Err((MadeProgress, (), state)),
|
||||
Ok(ident) => {
|
||||
@ -101,7 +102,7 @@ pub fn tag_name<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
/// * A type name
|
||||
/// * A global tag
|
||||
pub fn uppercase_ident<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
move |_, state: State<'a>| match chomp_uppercase_part(state.bytes) {
|
||||
move |_, state: State<'a>| match chomp_uppercase_part(state.bytes()) {
|
||||
Err(progress) => Err((progress, (), state)),
|
||||
Ok(ident) => {
|
||||
let width = ident.len();
|
||||
@ -114,7 +115,7 @@ pub fn uppercase_ident<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
}
|
||||
|
||||
pub fn unqualified_ident<'a>() -> impl Parser<'a, &'a str, ()> {
|
||||
move |_, state: State<'a>| match chomp_part(|c| c.is_alphabetic(), state.bytes) {
|
||||
move |_, state: State<'a>| match chomp_part(|c| c.is_alphabetic(), state.bytes()) {
|
||||
Err(progress) => Err((progress, (), state)),
|
||||
Ok(ident) => {
|
||||
if crate::keyword::KEYWORDS.iter().any(|kw| &ident == kw) {
|
||||
@ -167,7 +168,7 @@ pub fn parse_ident<'a>(arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Ide
|
||||
Err((MadeProgress, fail, state)) => match fail {
|
||||
BadIdent::Start(r, c) => Err((NoProgress, EExpr::Start(r, c), state)),
|
||||
BadIdent::Space(e, r, c) => Err((NoProgress, EExpr::Space(e, r, c), state)),
|
||||
_ => malformed_identifier(initial.bytes, fail, state),
|
||||
_ => malformed_identifier(initial.bytes(), fail, state),
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -177,8 +178,8 @@ fn malformed_identifier<'a>(
|
||||
problem: BadIdent,
|
||||
mut state: State<'a>,
|
||||
) -> ParseResult<'a, Ident<'a>, EExpr<'a>> {
|
||||
let chomped = chomp_malformed(state.bytes);
|
||||
let delta = initial_bytes.len() - state.bytes.len();
|
||||
let chomped = chomp_malformed(state.bytes());
|
||||
let delta = initial_bytes.len() - state.bytes().len();
|
||||
let parsed_str = unsafe { std::str::from_utf8_unchecked(&initial_bytes[..chomped + delta]) };
|
||||
|
||||
state = state.advance_without_indenting_ee(chomped, |r, c| {
|
||||
@ -448,7 +449,7 @@ fn chomp_module_chain(buffer: &[u8]) -> Result<u16, Progress> {
|
||||
}
|
||||
|
||||
pub fn concrete_type<'a>() -> impl Parser<'a, (&'a str, &'a str), ()> {
|
||||
move |_, state: State<'a>| match chomp_concrete_type(state.bytes) {
|
||||
move |_, state: State<'a>| match chomp_concrete_type(state.bytes()) {
|
||||
Err(progress) => Err((progress, (), state)),
|
||||
Ok((module_name, type_name, width)) => {
|
||||
match state.advance_without_indenting_ee(width, |_, _| ()) {
|
||||
@ -527,7 +528,7 @@ fn parse_ident_help<'a>(
|
||||
arena: &'a Bump,
|
||||
mut state: State<'a>,
|
||||
) -> ParseResult<'a, Ident<'a>, BadIdent> {
|
||||
match chomp_identifier_chain(arena, state.bytes, state.line, state.column) {
|
||||
match chomp_identifier_chain(arena, state.bytes(), state.line, state.column) {
|
||||
Ok((width, ident)) => {
|
||||
state = advance_state!(state, width as usize)?;
|
||||
Ok((MadeProgress, ident, state))
|
||||
|
@ -14,6 +14,7 @@ pub mod module;
|
||||
pub mod number_literal;
|
||||
pub mod pattern;
|
||||
pub mod problems;
|
||||
pub mod state;
|
||||
pub mod string_literal;
|
||||
pub mod test_helpers;
|
||||
pub mod type_annotation;
|
||||
|
@ -9,8 +9,9 @@ use crate::ident::{lowercase_ident, unqualified_ident, uppercase_ident};
|
||||
use crate::parser::Progress::{self, *};
|
||||
use crate::parser::{
|
||||
backtrackable, specialize, word1, word2, Col, EEffects, EExposes, EHeader, EImports, EPackages,
|
||||
EProvides, ERequires, ETypedIdent, Parser, Row, State, SyntaxError,
|
||||
EProvides, ERequires, ETypedIdent, Parser, Row, SyntaxError,
|
||||
};
|
||||
use crate::state::State;
|
||||
use crate::string_literal;
|
||||
use crate::type_annotation;
|
||||
use bumpalo::collections::Vec;
|
||||
@ -171,11 +172,11 @@ fn chomp_module_name(buffer: &[u8]) -> Result<&str, Progress> {
|
||||
|
||||
#[inline(always)]
|
||||
fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>, ()> {
|
||||
|_, mut state: State<'a>| match chomp_module_name(state.bytes) {
|
||||
|_, mut state: State<'a>| match chomp_module_name(state.bytes()) {
|
||||
Ok(name) => {
|
||||
let width = name.len();
|
||||
state.column += width as u16;
|
||||
state.bytes = &state.bytes[width..];
|
||||
state = state.advance(width);
|
||||
|
||||
Ok((MadeProgress, ModuleName::new(name), state))
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
use crate::ast::Base;
|
||||
use crate::parser::{ENumber, ParseResult, Parser, Progress, State};
|
||||
use crate::parser::{ENumber, ParseResult, Parser, Progress};
|
||||
use crate::state::State;
|
||||
|
||||
pub enum NumLiteral<'a> {
|
||||
Float(&'a str),
|
||||
@ -13,9 +14,9 @@ pub enum NumLiteral<'a> {
|
||||
|
||||
pub fn positive_number_literal<'a>() -> impl Parser<'a, NumLiteral<'a>, ENumber> {
|
||||
move |_arena, state: State<'a>| {
|
||||
match state.bytes.get(0) {
|
||||
match state.bytes().get(0) {
|
||||
Some(first_byte) if (*first_byte as char).is_ascii_digit() => {
|
||||
parse_number_base(false, state.bytes, state)
|
||||
parse_number_base(false, state.bytes(), state)
|
||||
}
|
||||
_ => {
|
||||
// this is not a number at all
|
||||
@ -27,13 +28,13 @@ pub fn positive_number_literal<'a>() -> impl Parser<'a, NumLiteral<'a>, ENumber>
|
||||
|
||||
pub fn number_literal<'a>() -> impl Parser<'a, NumLiteral<'a>, ENumber> {
|
||||
move |_arena, state: State<'a>| {
|
||||
match state.bytes.get(0) {
|
||||
match state.bytes().get(0) {
|
||||
Some(first_byte) if *first_byte == b'-' => {
|
||||
// drop the minus
|
||||
parse_number_base(true, &state.bytes[1..], state)
|
||||
parse_number_base(true, &state.bytes()[1..], state)
|
||||
}
|
||||
Some(first_byte) if (*first_byte as char).is_ascii_digit() => {
|
||||
parse_number_base(false, state.bytes, state)
|
||||
parse_number_base(false, state.bytes(), state)
|
||||
}
|
||||
_ => {
|
||||
// this is not a number at all
|
||||
@ -99,7 +100,7 @@ fn chomp_number_dec<'a>(
|
||||
}
|
||||
|
||||
let string =
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes[0..chomped + is_negative as usize]) };
|
||||
unsafe { std::str::from_utf8_unchecked(&state.bytes()[0..chomped + is_negative as usize]) };
|
||||
|
||||
let new = state.advance_without_indenting_ee(chomped + is_negative as usize, |_, _| {
|
||||
ENumber::LineTooLong
|
||||
|
@ -1,143 +1,15 @@
|
||||
use crate::state::State;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use roc_region::all::{Located, Position, Region};
|
||||
use std::fmt;
|
||||
use roc_region::all::{Located, Region};
|
||||
use Progress::*;
|
||||
|
||||
/// A position in a source file.
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
pub struct State<'a> {
|
||||
/// The raw input bytes from the file.
|
||||
pub bytes: &'a [u8],
|
||||
|
||||
/// Current line of the input
|
||||
pub line: u32,
|
||||
/// Current column of the input
|
||||
pub column: u16,
|
||||
|
||||
/// Current indentation level, in columns
|
||||
/// (so no indent is col 1 - this saves an arithmetic operation.)
|
||||
pub indent_col: u16,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Either<First, Second> {
|
||||
First(First),
|
||||
Second(Second),
|
||||
}
|
||||
|
||||
impl<'a> State<'a> {
|
||||
pub fn new(bytes: &'a [u8]) -> State<'a> {
|
||||
State {
|
||||
bytes,
|
||||
line: 0,
|
||||
column: 0,
|
||||
indent_col: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the parser has reached the end of the input
|
||||
pub const fn get_position(&self) -> Position {
|
||||
Position {
|
||||
row: self.line,
|
||||
col: self.column,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the parser has reached the end of the input
|
||||
pub const fn has_reached_end(&self) -> bool {
|
||||
self.bytes.is_empty()
|
||||
}
|
||||
|
||||
/// Use advance_spaces to advance with indenting.
|
||||
/// This assumes we are *not* advancing with spaces, or at least that
|
||||
/// any spaces on the line were preceded by non-spaces - which would mean
|
||||
/// they weren't eligible to indent anyway.
|
||||
pub fn advance_without_indenting_e<TE, E>(
|
||||
self,
|
||||
quantity: usize,
|
||||
to_error: TE,
|
||||
) -> Result<Self, (Progress, E, Self)>
|
||||
where
|
||||
TE: Fn(BadInputError, Row, Col) -> E,
|
||||
{
|
||||
self.advance_without_indenting_ee(quantity, |r, c| {
|
||||
to_error(BadInputError::LineTooLong, r, c)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn advance_without_indenting_ee<TE, E>(
|
||||
self,
|
||||
quantity: usize,
|
||||
to_error: TE,
|
||||
) -> Result<Self, (Progress, E, Self)>
|
||||
where
|
||||
TE: Fn(Row, Col) -> E,
|
||||
{
|
||||
match (self.column as usize).checked_add(quantity) {
|
||||
Some(column_usize) if column_usize <= u16::MAX as usize => {
|
||||
Ok(State {
|
||||
bytes: &self.bytes[quantity..],
|
||||
column: column_usize as u16,
|
||||
// Once we hit a nonspace character, we are no longer indenting.
|
||||
..self
|
||||
})
|
||||
}
|
||||
_ => Err((NoProgress, to_error(self.line, self.column), self)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a Region corresponding to the current state, but
|
||||
/// with the end_col advanced by the given amount. This is
|
||||
/// useful when parsing something "manually" (using input.chars())
|
||||
/// and thus wanting a Region while not having access to loc().
|
||||
pub fn len_region(&self, length: u16) -> Region {
|
||||
Region {
|
||||
start_col: self.column,
|
||||
start_line: self.line,
|
||||
end_col: self
|
||||
.column
|
||||
.checked_add(length)
|
||||
.unwrap_or_else(|| panic!("len_region overflowed")),
|
||||
end_line: self.line,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a failing ParseResult for the given FailReason
|
||||
pub fn fail<T, X>(
|
||||
self,
|
||||
_arena: &'a Bump,
|
||||
progress: Progress,
|
||||
reason: X,
|
||||
) -> Result<(Progress, T, Self), (Progress, X, Self)> {
|
||||
Err((progress, reason, self))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for State<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "State {{")?;
|
||||
|
||||
match std::str::from_utf8(self.bytes) {
|
||||
Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?,
|
||||
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?,
|
||||
}
|
||||
|
||||
write!(f, "\n\t(line, col): ({}, {}),", self.line, self.column)?;
|
||||
write!(f, "\n\tindent_col: {}", self.indent_col)?;
|
||||
write!(f, "\n}}")
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn state_size() {
|
||||
// State should always be under 8 machine words, so it fits in a typical
|
||||
// cache line.
|
||||
let state_size = std::mem::size_of::<State>();
|
||||
let maximum = std::mem::size_of::<usize>() * 8;
|
||||
assert!(state_size <= maximum, "{:?} <= {:?}", state_size, maximum);
|
||||
}
|
||||
|
||||
pub type ParseResult<'a, Output, Error> =
|
||||
Result<(Progress, Output, State<'a>), (Progress, Error, State<'a>)>;
|
||||
|
||||
@ -776,21 +648,21 @@ where
|
||||
move |_, mut state: State<'a>| {
|
||||
let width = keyword.len();
|
||||
|
||||
if !state.bytes.starts_with(keyword.as_bytes()) {
|
||||
if !state.bytes().starts_with(keyword.as_bytes()) {
|
||||
return Err((NoProgress, if_error(state.line, state.column), state));
|
||||
}
|
||||
|
||||
// the next character should not be an identifier character
|
||||
// to prevent treating `whence` or `iffy` as keywords
|
||||
match state.bytes.get(width) {
|
||||
match state.bytes().get(width) {
|
||||
Some(next) if *next == b' ' || *next == b'#' || *next == b'\n' => {
|
||||
state.column += width as u16;
|
||||
state.bytes = &state.bytes[width..];
|
||||
state = state.advance(width);
|
||||
Ok((MadeProgress, (), state))
|
||||
}
|
||||
None => {
|
||||
state.column += width as u16;
|
||||
state.bytes = &state.bytes[width..];
|
||||
state = state.advance(width);
|
||||
Ok((MadeProgress, (), state))
|
||||
}
|
||||
Some(_) => Err((NoProgress, if_error(state.line, state.column), state)),
|
||||
@ -810,7 +682,7 @@ where
|
||||
Error: 'a,
|
||||
{
|
||||
move |arena, state: State<'a>| {
|
||||
let start_bytes_len = state.bytes.len();
|
||||
let start_bytes_len = state.bytes().len();
|
||||
|
||||
match parser.parse(arena, state) {
|
||||
Ok((elem_progress, first_output, next_state)) => {
|
||||
@ -837,8 +709,10 @@ where
|
||||
Err((_, fail, state)) => {
|
||||
// If the delimiter parsed, but the following
|
||||
// element did not, that's a fatal error.
|
||||
let progress =
|
||||
Progress::from_lengths(start_bytes_len, state.bytes.len());
|
||||
let progress = Progress::from_lengths(
|
||||
start_bytes_len,
|
||||
state.bytes().len(),
|
||||
);
|
||||
|
||||
return Err((progress, fail, state));
|
||||
}
|
||||
@ -871,7 +745,7 @@ where
|
||||
Error: 'a,
|
||||
{
|
||||
move |arena, state: State<'a>| {
|
||||
let start_bytes_len = state.bytes.len();
|
||||
let start_bytes_len = state.bytes().len();
|
||||
|
||||
match parser.parse(arena, state) {
|
||||
Ok((progress, first_output, next_state)) => {
|
||||
@ -899,7 +773,7 @@ where
|
||||
// element did not, that means we saw a trailing comma
|
||||
let progress = Progress::from_lengths(
|
||||
start_bytes_len,
|
||||
old_state.bytes.len(),
|
||||
old_state.bytes().len(),
|
||||
);
|
||||
return Ok((progress, buf, old_state));
|
||||
}
|
||||
@ -932,7 +806,7 @@ where
|
||||
Error: 'a,
|
||||
{
|
||||
move |arena, state: State<'a>| {
|
||||
let start_bytes_len = state.bytes.len();
|
||||
let start_bytes_len = state.bytes().len();
|
||||
|
||||
match parser.parse(arena, state) {
|
||||
Ok((progress, first_output, next_state)) => {
|
||||
@ -965,7 +839,7 @@ where
|
||||
NoProgress => {
|
||||
let progress = Progress::from_lengths(
|
||||
start_bytes_len,
|
||||
old_state.bytes.len(),
|
||||
old_state.bytes().len(),
|
||||
);
|
||||
return Ok((progress, buf, old_state));
|
||||
}
|
||||
@ -993,7 +867,7 @@ where
|
||||
Error: 'a,
|
||||
{
|
||||
move |arena, state: State<'a>| {
|
||||
let start_bytes_len = state.bytes.len();
|
||||
let start_bytes_len = state.bytes().len();
|
||||
|
||||
match parser.parse(arena, state) {
|
||||
Ok((progress, first_output, next_state)) => {
|
||||
@ -1033,7 +907,7 @@ where
|
||||
NoProgress => {
|
||||
let progress = Progress::from_lengths(
|
||||
start_bytes_len,
|
||||
old_state.bytes.len(),
|
||||
old_state.bytes().len(),
|
||||
);
|
||||
return Ok((progress, buf, old_state));
|
||||
}
|
||||
@ -1094,7 +968,7 @@ where
|
||||
#[macro_export]
|
||||
macro_rules! loc {
|
||||
($parser:expr) => {
|
||||
move |arena, state: $crate::parser::State<'a>| {
|
||||
move |arena, state: $crate::state::State<'a>| {
|
||||
use roc_region::all::{Located, Region};
|
||||
|
||||
let start_col = state.column;
|
||||
@ -1123,7 +997,7 @@ macro_rules! loc {
|
||||
#[macro_export]
|
||||
macro_rules! skip_first {
|
||||
($p1:expr, $p2:expr) => {
|
||||
move |arena, state: $crate::parser::State<'a>| {
|
||||
move |arena, state: $crate::state::State<'a>| {
|
||||
let original_state = state.clone();
|
||||
|
||||
match $p1.parse(arena, state) {
|
||||
@ -1142,7 +1016,7 @@ macro_rules! skip_first {
|
||||
#[macro_export]
|
||||
macro_rules! skip_second {
|
||||
($p1:expr, $p2:expr) => {
|
||||
move |arena, state: $crate::parser::State<'a>| {
|
||||
move |arena, state: $crate::state::State<'a>| {
|
||||
let original_state = state.clone();
|
||||
|
||||
match $p1.parse(arena, state) {
|
||||
@ -1243,7 +1117,7 @@ macro_rules! collection_trailing_sep_e {
|
||||
#[macro_export]
|
||||
macro_rules! succeed {
|
||||
($value:expr) => {
|
||||
move |_arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| {
|
||||
move |_arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
|
||||
Ok((NoProgress, $value, state))
|
||||
}
|
||||
};
|
||||
@ -1252,7 +1126,7 @@ macro_rules! succeed {
|
||||
#[macro_export]
|
||||
macro_rules! and {
|
||||
($p1:expr, $p2:expr) => {
|
||||
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| {
|
||||
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
|
||||
// We have to clone this because if the first parser passes and then
|
||||
// the second one fails, we need to revert back to the original state.
|
||||
let original_state = state.clone();
|
||||
@ -1271,7 +1145,7 @@ macro_rules! and {
|
||||
#[macro_export]
|
||||
macro_rules! one_of {
|
||||
($p1:expr, $p2:expr) => {
|
||||
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| {
|
||||
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
|
||||
|
||||
match $p1.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
@ -1292,7 +1166,7 @@ macro_rules! one_of {
|
||||
#[macro_export]
|
||||
macro_rules! maybe {
|
||||
($p1:expr) => {
|
||||
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| match $p1
|
||||
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| match $p1
|
||||
.parse(arena, state)
|
||||
{
|
||||
Ok((progress, value, state)) => Ok((progress, Some(value), state)),
|
||||
@ -1305,7 +1179,7 @@ macro_rules! maybe {
|
||||
#[macro_export]
|
||||
macro_rules! one_of_with_error {
|
||||
($toerror:expr; $p1:expr) => {
|
||||
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| {
|
||||
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
|
||||
|
||||
match $p1.parse(arena, state) {
|
||||
valid @ Ok(_) => valid,
|
||||
@ -1352,16 +1226,12 @@ where
|
||||
{
|
||||
debug_assert_ne!(word, b'\n');
|
||||
|
||||
move |_arena: &'a Bump, state: State<'a>| match state.bytes.get(0) {
|
||||
Some(x) if *x == word => Ok((
|
||||
MadeProgress,
|
||||
(),
|
||||
State {
|
||||
bytes: &state.bytes[1..],
|
||||
column: state.column + 1,
|
||||
..state
|
||||
},
|
||||
)),
|
||||
move |_arena: &'a Bump, state: State<'a>| match state.bytes().get(0) {
|
||||
Some(x) if *x == word => {
|
||||
let mut state = state.advance(1);
|
||||
state.column += 1;
|
||||
Ok((MadeProgress, (), state))
|
||||
}
|
||||
_ => Err((NoProgress, to_error(state.line, state.column), state)),
|
||||
}
|
||||
}
|
||||
@ -1377,16 +1247,10 @@ where
|
||||
let needle = [word_1, word_2];
|
||||
|
||||
move |_arena: &'a Bump, state: State<'a>| {
|
||||
if state.bytes.starts_with(&needle) {
|
||||
Ok((
|
||||
MadeProgress,
|
||||
(),
|
||||
State {
|
||||
bytes: &state.bytes[2..],
|
||||
column: state.column + 2,
|
||||
..state
|
||||
},
|
||||
))
|
||||
if state.bytes().starts_with(&needle) {
|
||||
let mut state = state.advance(2);
|
||||
state.column += 2;
|
||||
Ok((MadeProgress, (), state))
|
||||
} else {
|
||||
Err((NoProgress, to_error(state.line, state.column), state))
|
||||
}
|
||||
@ -1448,7 +1312,7 @@ macro_rules! zero_or_more {
|
||||
move |arena, state: State<'a>| {
|
||||
use bumpalo::collections::Vec;
|
||||
|
||||
let start_bytes_len = state.bytes.len();
|
||||
let start_bytes_len = state.bytes().len();
|
||||
|
||||
match $parser.parse(arena, state) {
|
||||
Ok((_, first_output, next_state)) => {
|
||||
@ -1472,7 +1336,7 @@ macro_rules! zero_or_more {
|
||||
NoProgress => {
|
||||
// the next element failed with no progress
|
||||
// report whether we made progress before
|
||||
let progress = Progress::from_lengths(start_bytes_len, old_state.bytes.len());
|
||||
let progress = Progress::from_lengths(start_bytes_len, old_state.bytes().len());
|
||||
return Ok((progress, buf, old_state));
|
||||
}
|
||||
}
|
||||
@ -1539,14 +1403,14 @@ macro_rules! one_or_more {
|
||||
#[macro_export]
|
||||
macro_rules! debug {
|
||||
($parser:expr) => {
|
||||
move |arena, state: $crate::parser::State<'a>| dbg!($parser.parse(arena, state))
|
||||
move |arena, state: $crate::state::State<'a>| dbg!($parser.parse(arena, state))
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! either {
|
||||
($p1:expr, $p2:expr) => {
|
||||
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| match $p1
|
||||
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| match $p1
|
||||
.parse(arena, state)
|
||||
{
|
||||
Ok((progress, output, state)) => {
|
||||
|
@ -4,8 +4,9 @@ use crate::ident::{lowercase_ident, parse_ident, Ident};
|
||||
use crate::parser::Progress::{self, *};
|
||||
use crate::parser::{
|
||||
backtrackable, optional, specialize, specialize_ref, word1, EPattern, PInParens, PRecord,
|
||||
ParseResult, Parser, State,
|
||||
ParseResult, Parser,
|
||||
};
|
||||
use crate::state::State;
|
||||
use bumpalo::collections::string::String;
|
||||
use bumpalo::collections::Vec;
|
||||
use bumpalo::Bump;
|
||||
|
144
compiler/parse/src/state.rs
Normal file
144
compiler/parse/src/state.rs
Normal file
@ -0,0 +1,144 @@
|
||||
use crate::parser::Progress::*;
|
||||
use crate::parser::{BadInputError, Col, Progress, Row};
|
||||
use bumpalo::Bump;
|
||||
use roc_region::all::{Position, Region};
|
||||
use std::fmt;
|
||||
|
||||
/// A position in a source file.
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
pub struct State<'a> {
|
||||
/// The raw input bytes from the file.
|
||||
bytes: &'a [u8],
|
||||
|
||||
/// Current line of the input
|
||||
pub line: u32,
|
||||
/// Current column of the input
|
||||
pub column: u16,
|
||||
|
||||
/// Current indentation level, in columns
|
||||
/// (so no indent is col 1 - this saves an arithmetic operation.)
|
||||
pub indent_col: u16,
|
||||
}
|
||||
|
||||
impl<'a> State<'a> {
|
||||
pub fn new(bytes: &'a [u8]) -> State<'a> {
|
||||
State {
|
||||
bytes,
|
||||
line: 0,
|
||||
column: 0,
|
||||
indent_col: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn bytes(&self) -> &'a [u8] {
|
||||
self.bytes
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn advance(&self, offset: usize) -> State<'a> {
|
||||
let mut state = *self;
|
||||
state.bytes = &state.bytes[offset..];
|
||||
state
|
||||
}
|
||||
|
||||
/// Returns whether the parser has reached the end of the input
|
||||
pub const fn get_position(&self) -> Position {
|
||||
Position {
|
||||
row: self.line,
|
||||
col: self.column,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the parser has reached the end of the input
|
||||
pub const fn has_reached_end(&self) -> bool {
|
||||
self.bytes.is_empty()
|
||||
}
|
||||
|
||||
/// Use advance_spaces to advance with indenting.
|
||||
/// This assumes we are *not* advancing with spaces, or at least that
|
||||
/// any spaces on the line were preceded by non-spaces - which would mean
|
||||
/// they weren't eligible to indent anyway.
|
||||
pub fn advance_without_indenting_e<TE, E>(
|
||||
self,
|
||||
quantity: usize,
|
||||
to_error: TE,
|
||||
) -> Result<Self, (Progress, E, Self)>
|
||||
where
|
||||
TE: Fn(BadInputError, Row, Col) -> E,
|
||||
{
|
||||
self.advance_without_indenting_ee(quantity, |r, c| {
|
||||
to_error(BadInputError::LineTooLong, r, c)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn advance_without_indenting_ee<TE, E>(
|
||||
self,
|
||||
quantity: usize,
|
||||
to_error: TE,
|
||||
) -> Result<Self, (Progress, E, Self)>
|
||||
where
|
||||
TE: Fn(Row, Col) -> E,
|
||||
{
|
||||
match (self.column as usize).checked_add(quantity) {
|
||||
Some(column_usize) if column_usize <= u16::MAX as usize => {
|
||||
Ok(State {
|
||||
bytes: &self.bytes[quantity..],
|
||||
column: column_usize as u16,
|
||||
// Once we hit a nonspace character, we are no longer indenting.
|
||||
..self
|
||||
})
|
||||
}
|
||||
_ => Err((NoProgress, to_error(self.line, self.column), self)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a Region corresponding to the current state, but
|
||||
/// with the end_col advanced by the given amount. This is
|
||||
/// useful when parsing something "manually" (using input.chars())
|
||||
/// and thus wanting a Region while not having access to loc().
|
||||
pub fn len_region(&self, length: u16) -> Region {
|
||||
Region {
|
||||
start_col: self.column,
|
||||
start_line: self.line,
|
||||
end_col: self
|
||||
.column
|
||||
.checked_add(length)
|
||||
.unwrap_or_else(|| panic!("len_region overflowed")),
|
||||
end_line: self.line,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a failing ParseResult for the given FailReason
|
||||
pub fn fail<T, X>(
|
||||
self,
|
||||
_arena: &'a Bump,
|
||||
progress: Progress,
|
||||
reason: X,
|
||||
) -> Result<(Progress, T, Self), (Progress, X, Self)> {
|
||||
Err((progress, reason, self))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> fmt::Debug for State<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "State {{")?;
|
||||
|
||||
match std::str::from_utf8(self.bytes) {
|
||||
Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?,
|
||||
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?,
|
||||
}
|
||||
|
||||
write!(f, "\n\t(line, col): ({}, {}),", self.line, self.column)?;
|
||||
write!(f, "\n\tindent_col: {}", self.indent_col)?;
|
||||
write!(f, "\n}}")
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn state_size() {
|
||||
// State should always be under 8 machine words, so it fits in a typical
|
||||
// cache line.
|
||||
let state_size = std::mem::size_of::<State>();
|
||||
let maximum = std::mem::size_of::<usize>() * 8;
|
||||
assert!(state_size <= maximum, "{:?} <= {:?}", state_size, maximum);
|
||||
}
|
@ -1,7 +1,8 @@
|
||||
use crate::ast::{EscapedChar, StrLiteral, StrSegment};
|
||||
use crate::expr;
|
||||
use crate::parser::Progress::*;
|
||||
use crate::parser::{allocated, loc, specialize_ref, word1, BadInputError, EString, Parser, State};
|
||||
use crate::parser::{allocated, loc, specialize_ref, word1, BadInputError, EString, Parser};
|
||||
use crate::state::State;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
|
||||
@ -11,7 +12,7 @@ fn ascii_hex_digits<'a>() -> impl Parser<'a, &'a str, EString<'a>> {
|
||||
move |arena, state: State<'a>| {
|
||||
let mut buf = bumpalo::collections::String::new_in(arena);
|
||||
|
||||
for &byte in state.bytes.iter() {
|
||||
for &byte in state.bytes().iter() {
|
||||
if (byte as char).is_ascii_hexdigit() {
|
||||
buf.push(byte as char);
|
||||
} else if buf.is_empty() {
|
||||
@ -53,15 +54,15 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
let is_multiline;
|
||||
let mut bytes;
|
||||
|
||||
if state.bytes.starts_with(b"\"\"\"") {
|
||||
if state.bytes().starts_with(b"\"\"\"") {
|
||||
// we will be parsing a multi-string
|
||||
is_multiline = true;
|
||||
bytes = state.bytes[3..].iter();
|
||||
bytes = state.bytes()[3..].iter();
|
||||
state = advance_state!(state, 3)?;
|
||||
} else if state.bytes.starts_with(b"\"") {
|
||||
} else if state.bytes().starts_with(b"\"") {
|
||||
// we will be parsing a single-string
|
||||
is_multiline = false;
|
||||
bytes = state.bytes[1..].iter();
|
||||
bytes = state.bytes()[1..].iter();
|
||||
state = advance_state!(state, 1)?;
|
||||
} else {
|
||||
return Err((NoProgress, EString::Open(state.line, state.column), state));
|
||||
@ -97,7 +98,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
// something which signalled that we should end the
|
||||
// current segment - so use segment_parsed_bytes - 1 here,
|
||||
// to exclude that char we just parsed.
|
||||
let string_bytes = &state.bytes[0..(segment_parsed_bytes - 1)];
|
||||
let string_bytes = &state.bytes()[0..(segment_parsed_bytes - 1)];
|
||||
|
||||
match std::str::from_utf8(string_bytes) {
|
||||
Ok(string) => {
|
||||
@ -224,7 +225,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
// Advance past the `\(` before using the expr parser
|
||||
state = advance_state!(state, 2)?;
|
||||
|
||||
let original_byte_count = state.bytes.len();
|
||||
let original_byte_count = state.bytes().len();
|
||||
|
||||
// This is an interpolated variable.
|
||||
// Parse an arbitrary expression, then give a
|
||||
@ -237,7 +238,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
.parse(arena, state)?;
|
||||
|
||||
// Advance the iterator past the expr we just parsed.
|
||||
for _ in 0..(original_byte_count - new_state.bytes.len()) {
|
||||
for _ in 0..(original_byte_count - new_state.bytes().len()) {
|
||||
bytes.next();
|
||||
}
|
||||
|
||||
@ -251,7 +252,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
// Advance past the `\u` before using the expr parser
|
||||
state = advance_state!(state, 2)?;
|
||||
|
||||
let original_byte_count = state.bytes.len();
|
||||
let original_byte_count = state.bytes().len();
|
||||
|
||||
// Parse the hex digits, surrounded by parens, then
|
||||
// give a canonicalization error if the digits form
|
||||
@ -264,7 +265,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
|
||||
.parse(arena, state)?;
|
||||
|
||||
// Advance the iterator past the expr we just parsed.
|
||||
for _ in 0..(original_byte_count - new_state.bytes.len()) {
|
||||
for _ in 0..(original_byte_count - new_state.bytes().len()) {
|
||||
bytes.next();
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,8 @@ use crate::ast;
|
||||
use crate::module::module_defs;
|
||||
// use crate::module::module_defs;
|
||||
use crate::parser::Parser;
|
||||
use crate::parser::{State, SyntaxError};
|
||||
use crate::parser::SyntaxError;
|
||||
use crate::state::State;
|
||||
use bumpalo::collections::Vec as BumpVec;
|
||||
use bumpalo::Bump;
|
||||
use roc_region::all::Located;
|
||||
|
@ -5,8 +5,8 @@ use crate::parser::{
|
||||
allocated, backtrackable, optional, specialize, specialize_ref, word1, word2, EType,
|
||||
ETypeApply, ETypeInParens, ETypeRecord, ETypeTagUnion, ParseResult, Parser,
|
||||
Progress::{self, *},
|
||||
State,
|
||||
};
|
||||
use crate::state::State;
|
||||
use bumpalo::collections::vec::Vec;
|
||||
use bumpalo::Bump;
|
||||
use roc_region::all::{Located, Region};
|
||||
@ -430,7 +430,7 @@ fn parse_concrete_type<'a>(
|
||||
arena: &'a Bump,
|
||||
state: State<'a>,
|
||||
) -> ParseResult<'a, TypeAnnotation<'a>, ETypeApply> {
|
||||
let initial_bytes = state.bytes;
|
||||
let initial_bytes = state.bytes();
|
||||
|
||||
match crate::ident::concrete_type().parse(arena, state) {
|
||||
Ok((_, (module_name, type_name), state)) => {
|
||||
@ -444,8 +444,8 @@ fn parse_concrete_type<'a>(
|
||||
Err((MadeProgress, _, mut state)) => {
|
||||
// we made some progress, but ultimately failed.
|
||||
// that means a malformed type name
|
||||
let chomped = crate::ident::chomp_malformed(state.bytes);
|
||||
let delta = initial_bytes.len() - state.bytes.len();
|
||||
let chomped = crate::ident::chomp_malformed(state.bytes());
|
||||
let delta = initial_bytes.len() - state.bytes().len();
|
||||
let parsed_str =
|
||||
unsafe { std::str::from_utf8_unchecked(&initial_bytes[..chomped + delta]) };
|
||||
|
||||
|
@ -20,7 +20,8 @@ mod test_parse {
|
||||
use roc_parse::ast::StrSegment::*;
|
||||
use roc_parse::ast::{self, EscapedChar};
|
||||
use roc_parse::module::module_defs;
|
||||
use roc_parse::parser::{Parser, State, SyntaxError};
|
||||
use roc_parse::parser::{Parser, SyntaxError};
|
||||
use roc_parse::state::State;
|
||||
use roc_parse::test_helpers::parse_expr_with;
|
||||
use roc_region::all::{Located, Region};
|
||||
use roc_test_utils::assert_multiline_str_eq;
|
||||
|
@ -14,7 +14,8 @@ use roc_load::docs::{ModuleDocumentation, RecordField};
|
||||
use roc_load::file::{LoadedModule, LoadingProblem};
|
||||
use roc_module::symbol::{IdentIds, Interns, ModuleId, ModuleIds};
|
||||
use roc_parse::ident::{parse_ident, Ident};
|
||||
use roc_parse::parser::{State, SyntaxError};
|
||||
use roc_parse::parser::SyntaxError;
|
||||
use roc_parse::state::State;
|
||||
use roc_region::all::Region;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
@ -186,7 +186,7 @@ mod test_reporting {
|
||||
{
|
||||
use ven_pretty::DocAllocator;
|
||||
|
||||
use roc_parse::parser::State;
|
||||
use roc_parse::state::State;
|
||||
|
||||
let state = State::new(src.as_bytes());
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user