extract state

This commit is contained in:
Joshua Warner 2021-12-16 17:13:53 -08:00
parent 33277144d6
commit 49818343dd
20 changed files with 304 additions and 288 deletions

View File

@ -17,7 +17,8 @@ use roc_parse::header::{
use roc_parse::{
ast::{Def, Module},
module::{self, module_defs},
parser::{Parser, State, SyntaxError},
parser::{Parser, SyntaxError},
state::State,
};
use roc_region::all::Located;
use roc_reporting::{internal_error, user_error};

View File

@ -92,7 +92,7 @@ impl Validator for InputValidator {
Ok(ValidationResult::Incomplete)
} else {
let arena = bumpalo::Bump::new();
let state = roc_parse::parser::State::new(ctx.input().trim().as_bytes());
let state = roc_parse::state::State::new(ctx.input().trim().as_bytes());
match roc_parse::expr::parse_loc_expr(0, &arena, state) {
// Special case some syntax errors to allow for multi-line inputs

View File

@ -11,7 +11,8 @@ mod test_fmt {
use roc_fmt::module::fmt_module;
use roc_fmt::Buf;
use roc_parse::module::{self, module_defs};
use roc_parse::parser::{Parser, State};
use roc_parse::parser::Parser;
use roc_parse::state::State;
use roc_test_utils::assert_multiline_str_eq;
// Not intended to be used directly in tests; please use expr_formats_to or expr_formats_same

View File

@ -28,7 +28,7 @@ use roc_parse::header::{
ExposedName, ImportsEntry, PackageEntry, PackageOrPath, PlatformHeader, To, TypedIdent,
};
use roc_parse::module::module_defs;
use roc_parse::parser::{self, ParseProblem, Parser, SyntaxError};
use roc_parse::parser::{ParseProblem, Parser, SyntaxError};
use roc_region::all::{Located, Region};
use roc_solve::module::SolvedModule;
use roc_solve::solve;
@ -674,7 +674,7 @@ struct ModuleHeader<'a> {
exposes: Vec<Symbol>,
exposed_imports: MutMap<Ident, (Symbol, Region)>,
header_src: &'a str,
parse_state: roc_parse::parser::State<'a>,
parse_state: roc_parse::state::State<'a>,
module_timing: ModuleTiming,
}
@ -2356,7 +2356,7 @@ fn load_pkg_config<'a>(
Ok(bytes_vec) => {
let parse_start = SystemTime::now();
let bytes = arena.alloc(bytes_vec);
let parse_state = parser::State::new(bytes);
let parse_state = roc_parse::state::State::new(bytes);
let parsed = roc_parse::module::parse_header(arena, parse_state);
let parse_header_duration = parse_start.elapsed().unwrap();
@ -2384,7 +2384,7 @@ fn load_pkg_config<'a>(
)))
}
Ok((ast::Module::Platform { header }, parser_state)) => {
let delta = bytes.len() - parser_state.bytes.len();
let delta = bytes.len() - parser_state.bytes().len();
let chomped = &bytes[..delta];
let header_src = unsafe { std::str::from_utf8_unchecked(chomped) };
@ -2526,7 +2526,7 @@ fn parse_header<'a>(
start_time: SystemTime,
) -> Result<(ModuleId, Msg<'a>), LoadingProblem<'a>> {
let parse_start = SystemTime::now();
let parse_state = parser::State::new(src_bytes);
let parse_state = roc_parse::state::State::new(src_bytes);
let parsed = roc_parse::module::parse_header(arena, parse_state);
let parse_header_duration = parse_start.elapsed().unwrap();
@ -2539,7 +2539,7 @@ fn parse_header<'a>(
match parsed {
Ok((ast::Module::Interface { header }, parse_state)) => {
let header_src = unsafe {
let chomped = src_bytes.len() - parse_state.bytes.len();
let chomped = src_bytes.len() - parse_state.bytes().len();
std::str::from_utf8_unchecked(&src_bytes[..chomped])
};
@ -2571,7 +2571,7 @@ fn parse_header<'a>(
pkg_config_dir.pop();
let header_src = unsafe {
let chomped = src_bytes.len() - parse_state.bytes.len();
let chomped = src_bytes.len() - parse_state.bytes().len();
std::str::from_utf8_unchecked(&src_bytes[..chomped])
};
@ -2767,7 +2767,7 @@ struct HeaderInfo<'a> {
#[allow(clippy::too_many_arguments)]
fn send_header<'a>(
info: HeaderInfo<'a>,
parse_state: parser::State<'a>,
parse_state: roc_parse::state::State<'a>,
module_ids: Arc<Mutex<PackageModuleIds<'a>>>,
ident_ids_by_module: Arc<Mutex<MutMap<ModuleId, IdentIds>>>,
module_timing: ModuleTiming,
@ -2980,7 +2980,7 @@ struct PlatformHeaderInfo<'a> {
#[allow(clippy::too_many_arguments)]
fn send_header_two<'a>(
info: PlatformHeaderInfo<'a>,
parse_state: parser::State<'a>,
parse_state: roc_parse::state::State<'a>,
module_ids: Arc<Mutex<PackageModuleIds<'a>>>,
ident_ids_by_module: Arc<Mutex<MutMap<ModuleId, IdentIds>>>,
module_timing: ModuleTiming,
@ -3327,7 +3327,7 @@ fn fabricate_pkg_config_module<'a>(
shorthand: &'a str,
app_module_id: ModuleId,
filename: PathBuf,
parse_state: parser::State<'a>,
parse_state: roc_parse::state::State<'a>,
module_ids: Arc<Mutex<PackageModuleIds<'a>>>,
ident_ids_by_module: Arc<Mutex<MutMap<ModuleId, IdentIds>>>,
header: &PlatformHeader<'a>,
@ -3724,7 +3724,7 @@ where
fn parse<'a>(arena: &'a Bump, header: ModuleHeader<'a>) -> Result<Msg<'a>, LoadingProblem<'a>> {
let mut module_timing = header.module_timing;
let parse_start = SystemTime::now();
let source = header.parse_state.bytes;
let source = header.parse_state.bytes();
let parse_state = header.parse_state;
let parsed_defs = match module_defs().parse(arena, parse_state) {
Ok((_, success, _state)) => success,

View File

@ -1,8 +1,7 @@
use crate::ast::CommentOrNewline;
use crate::ast::Spaceable;
use crate::parser::{
self, and, backtrackable, BadInputError, Col, Parser, Progress::*, Row, State,
};
use crate::parser::{self, and, backtrackable, BadInputError, Col, Parser, Progress::*, Row};
use crate::state::State;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use roc_region::all::Located;
@ -193,17 +192,25 @@ where
move |arena, mut state: State<'a>| {
let comments_and_newlines = Vec::new_in(arena);
match eat_spaces(state.bytes, state.line, state.column, comments_and_newlines) {
match eat_spaces(
state.bytes(),
state.line,
state.column,
comments_and_newlines,
) {
HasTab { row, col } => {
// there was a tab character
let mut state = state;
state.line = row;
state.column = col;
// TODO: it _seems_ like if we're changing the line/column, we should also be
// advancing the state by the corresponding number of bytes.
// Not doing this is likely a bug!
// state = state.advance(<something>);
Err((
MadeProgress,
space_problem(BadInputError::HasTab, row, col),
State {
line: row,
column: col,
..state
},
state,
))
}
Good {
@ -212,7 +219,7 @@ where
bytes,
comments_and_newlines,
} => {
if bytes == state.bytes {
if bytes == state.bytes() {
Ok((NoProgress, &[] as &[_], state))
} else if state.line != row {
// we parsed at least one newline
@ -222,7 +229,7 @@ where
if col >= min_indent {
state.line = row;
state.column = col;
state.bytes = bytes;
state = state.advance(state.bytes().len() - bytes.len());
Ok((MadeProgress, comments_and_newlines.into_bump_slice(), state))
} else {
@ -234,7 +241,7 @@ where
}
} else {
state.column = col;
state.bytes = bytes;
state = state.advance(state.bytes().len() - bytes.len());
Ok((MadeProgress, comments_and_newlines.into_bump_slice(), state))
}

View File

@ -7,9 +7,10 @@ use crate::keyword;
use crate::parser::{
self, backtrackable, optional, sep_by1, sep_by1_e, specialize, specialize_ref, then,
trailing_sep_by0, word1, word2, EExpect, EExpr, EIf, EInParens, ELambda, EList, ENumber,
EPattern, ERecord, EString, EType, EWhen, Either, ParseResult, Parser, State,
EPattern, ERecord, EString, EType, EWhen, Either, ParseResult, Parser,
};
use crate::pattern::loc_closure_param;
use crate::state::State;
use crate::type_annotation;
use bumpalo::collections::Vec;
use bumpalo::Bump;
@ -304,22 +305,16 @@ fn unary_negate<'a>() -> impl Parser<'a, (), EExpr<'a>> {
// - it is preceded by whitespace (spaces, newlines, comments)
// - it is not followed by whitespace
let followed_by_whitespace = state
.bytes
.bytes()
.get(1)
.map(|c| c.is_ascii_whitespace() || *c == b'#')
.unwrap_or(false);
if state.bytes.starts_with(b"-") && !followed_by_whitespace {
if state.bytes().starts_with(b"-") && !followed_by_whitespace {
// the negate is only unary if it is not followed by whitespace
Ok((
MadeProgress,
(),
State {
bytes: &state.bytes[1..],
column: state.column + 1,
..state
},
))
let mut state = state.advance(1);
state.column += 1;
Ok((MadeProgress, (), state))
} else {
// this is not a negated expression
Err((NoProgress, EExpr::UnaryNot(state.line, state.column), state))
@ -515,7 +510,7 @@ fn numeric_negate_expression<'a, T>(
expr: Located<Expr<'a>>,
spaces: &'a [CommentOrNewline<'a>],
) -> Located<Expr<'a>> {
debug_assert_eq!(state.bytes.get(0), Some(&b'-'));
debug_assert_eq!(state.bytes().get(0), Some(&b'-'));
// for overflow reasons, we must make the unary minus part of the number literal.
let mut region = expr.region;
region.start_col -= 1;
@ -523,13 +518,13 @@ fn numeric_negate_expression<'a, T>(
let new_expr = match &expr.value {
Expr::Num(string) => {
let new_string =
unsafe { std::str::from_utf8_unchecked(&state.bytes[..string.len() + 1]) };
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
Expr::Num(new_string)
}
Expr::Float(string) => {
let new_string =
unsafe { std::str::from_utf8_unchecked(&state.bytes[..string.len() + 1]) };
unsafe { std::str::from_utf8_unchecked(&state.bytes()[..string.len() + 1]) };
Expr::Float(new_string)
}
@ -1283,8 +1278,8 @@ fn parse_expr_end<'a>(
}
Err((NoProgress, _, mut state)) => {
// try multi-backpassing
if options.accept_multi_backpassing && state.bytes.starts_with(b",") {
state.bytes = &state.bytes[1..];
if options.accept_multi_backpassing && state.bytes().starts_with(b",") {
state = state.advance(1);
state.column += 1;
let (_, mut patterns, state) = specialize_ref(
@ -1344,7 +1339,7 @@ fn parse_expr_end<'a>(
Ok((MadeProgress, ret, state))
}
}
} else if options.check_for_arrow && state.bytes.starts_with(b"->") {
} else if options.check_for_arrow && state.bytes().starts_with(b"->") {
Err((
MadeProgress,
EExpr::BadOperator(&[b'-', b'>'], state.line, state.column),
@ -1755,7 +1750,7 @@ mod when {
}
);
while !state.bytes.is_empty() {
while !state.bytes().is_empty() {
match branch_parser.parse(arena, state) {
Ok((_, next_output, next_state)) => {
state = next_state;
@ -1773,14 +1768,10 @@ mod when {
}
}
Ok((
MadeProgress,
branches,
State {
indent_col: when_indent,
..state
},
))
let mut state = state;
state.indent_col = when_indent;
Ok((MadeProgress, branches, state))
}
}
@ -2396,12 +2387,12 @@ where
G: Fn(&'a [u8], Row, Col) -> E,
E: 'a,
{
let chomped = chomp_ops(state.bytes);
let chomped = chomp_ops(state.bytes());
macro_rules! good {
($op:expr, $width:expr) => {{
state.column += $width;
state.bytes = &state.bytes[$width..];
state = state.advance($width);
Ok((MadeProgress, $op, state))
}};
@ -2416,7 +2407,7 @@ where
match chomped {
0 => Err((NoProgress, to_expectation(state.line, state.column), state)),
1 => {
let op = state.bytes[0];
let op = state.bytes()[0];
match op {
b'+' => good!(BinOp::Plus, 1),
b'-' => good!(BinOp::Minus, 1),
@ -2432,12 +2423,12 @@ where
}
b'=' => good!(BinOp::Assignment, 1),
b':' => good!(BinOp::HasType, 1),
_ => bad_made_progress!(&state.bytes[0..1]),
_ => bad_made_progress!(&state.bytes()[0..1]),
}
}
2 => {
let op0 = state.bytes[0];
let op1 = state.bytes[1];
let op0 = state.bytes()[0];
let op1 = state.bytes()[1];
match (op0, op1) {
(b'|', b'>') => good!(BinOp::Pizza, 2),
@ -2454,10 +2445,10 @@ where
Err((NoProgress, to_error(b"->", state.line, state.column), state))
}
(b'<', b'-') => good!(BinOp::Backpassing, 2),
_ => bad_made_progress!(&state.bytes[0..2]),
_ => bad_made_progress!(&state.bytes()[0..2]),
}
}
_ => bad_made_progress!(&state.bytes[0..chomped]),
_ => bad_made_progress!(&state.bytes()[0..chomped]),
}
}

View File

@ -2,9 +2,8 @@ use crate::ast::{Collection, CommentOrNewline, Spaced, StrLiteral, TypeAnnotatio
use crate::blankspace::space0_e;
use crate::ident::lowercase_ident;
use crate::parser::Progress::{self, *};
use crate::parser::{
specialize, word1, EPackageEntry, EPackageName, EPackageOrPath, Parser, State,
};
use crate::parser::{specialize, word1, EPackageEntry, EPackageName, EPackageOrPath, Parser};
use crate::state::State;
use crate::string_literal;
use bumpalo::collections::Vec;
use roc_region::all::Loc;
@ -282,8 +281,8 @@ where
T: 'a,
{
|_, mut state: State<'a>| {
let mut chomped = 0;
let mut it = state.bytes.iter();
let mut chomped = 0usize;
let mut it = state.bytes().iter();
while let Some(b' ') = it.next() {
chomped += 1;
@ -292,8 +291,8 @@ where
if chomped == 0 {
Ok((NoProgress, (), state))
} else {
state.column += chomped;
state.bytes = it.as_slice();
state.column += chomped as u16;
state = state.advance(chomped);
Ok((MadeProgress, (), state))
}
@ -316,7 +315,7 @@ pub fn package_name<'a>() -> impl Parser<'a, PackageName<'a>, EPackageName> {
// They cannot contain underscores or other special characters.
// They must be ASCII.
|_, mut state: State<'a>| match chomp_package_part(state.bytes) {
|_, mut state: State<'a>| match chomp_package_part(state.bytes()) {
Err(progress) => Err((
progress,
EPackageName::Account(state.line, state.column),
@ -324,9 +323,9 @@ pub fn package_name<'a>() -> impl Parser<'a, PackageName<'a>, EPackageName> {
)),
Ok(account) => {
let mut chomped = account.len();
if let Ok(('/', width)) = char::from_utf8_slice_start(&state.bytes[chomped..]) {
if let Ok(('/', width)) = char::from_utf8_slice_start(&state.bytes()[chomped..]) {
chomped += width;
match chomp_package_part(&state.bytes[chomped..]) {
match chomp_package_part(&state.bytes()[chomped..]) {
Err(progress) => Err((
progress,
EPackageName::Pkg(state.line, state.column + chomped as u16),
@ -336,7 +335,7 @@ pub fn package_name<'a>() -> impl Parser<'a, PackageName<'a>, EPackageName> {
chomped += pkg.len();
state.column += chomped as u16;
state.bytes = &state.bytes[chomped..];
state = state.advance(chomped);
let value = PackageName { account, pkg };
Ok((MadeProgress, value, state))

View File

@ -1,5 +1,6 @@
use crate::parser::Progress::{self, *};
use crate::parser::{BadInputError, Col, EExpr, ParseResult, Parser, Row, State};
use crate::parser::{BadInputError, Col, EExpr, ParseResult, Parser, Row};
use crate::state::State;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
@ -59,7 +60,7 @@ impl<'a> Ident<'a> {
/// * A record field, e.g. "email" in `.email` or in `email:`
/// * A named pattern match, e.g. "foo" in `foo =` or `foo ->` or `\foo ->`
pub fn lowercase_ident<'a>() -> impl Parser<'a, &'a str, ()> {
move |_, state: State<'a>| match chomp_lowercase_part(state.bytes) {
move |_, state: State<'a>| match chomp_lowercase_part(state.bytes()) {
Err(progress) => Err((progress, (), state)),
Ok(ident) => {
if crate::keyword::KEYWORDS.iter().any(|kw| &ident == kw) {
@ -77,8 +78,8 @@ pub fn lowercase_ident<'a>() -> impl Parser<'a, &'a str, ()> {
pub fn tag_name<'a>() -> impl Parser<'a, &'a str, ()> {
move |arena, state: State<'a>| {
if state.bytes.starts_with(b"@") {
match chomp_private_tag(state.bytes, state.line, state.column) {
if state.bytes().starts_with(b"@") {
match chomp_private_tag(state.bytes(), state.line, state.column) {
Err(BadIdent::Start(_, _)) => Err((NoProgress, (), state)),
Err(_) => Err((MadeProgress, (), state)),
Ok(ident) => {
@ -101,7 +102,7 @@ pub fn tag_name<'a>() -> impl Parser<'a, &'a str, ()> {
/// * A type name
/// * A global tag
pub fn uppercase_ident<'a>() -> impl Parser<'a, &'a str, ()> {
move |_, state: State<'a>| match chomp_uppercase_part(state.bytes) {
move |_, state: State<'a>| match chomp_uppercase_part(state.bytes()) {
Err(progress) => Err((progress, (), state)),
Ok(ident) => {
let width = ident.len();
@ -114,7 +115,7 @@ pub fn uppercase_ident<'a>() -> impl Parser<'a, &'a str, ()> {
}
pub fn unqualified_ident<'a>() -> impl Parser<'a, &'a str, ()> {
move |_, state: State<'a>| match chomp_part(|c| c.is_alphabetic(), state.bytes) {
move |_, state: State<'a>| match chomp_part(|c| c.is_alphabetic(), state.bytes()) {
Err(progress) => Err((progress, (), state)),
Ok(ident) => {
if crate::keyword::KEYWORDS.iter().any(|kw| &ident == kw) {
@ -167,7 +168,7 @@ pub fn parse_ident<'a>(arena: &'a Bump, state: State<'a>) -> ParseResult<'a, Ide
Err((MadeProgress, fail, state)) => match fail {
BadIdent::Start(r, c) => Err((NoProgress, EExpr::Start(r, c), state)),
BadIdent::Space(e, r, c) => Err((NoProgress, EExpr::Space(e, r, c), state)),
_ => malformed_identifier(initial.bytes, fail, state),
_ => malformed_identifier(initial.bytes(), fail, state),
},
}
}
@ -177,8 +178,8 @@ fn malformed_identifier<'a>(
problem: BadIdent,
mut state: State<'a>,
) -> ParseResult<'a, Ident<'a>, EExpr<'a>> {
let chomped = chomp_malformed(state.bytes);
let delta = initial_bytes.len() - state.bytes.len();
let chomped = chomp_malformed(state.bytes());
let delta = initial_bytes.len() - state.bytes().len();
let parsed_str = unsafe { std::str::from_utf8_unchecked(&initial_bytes[..chomped + delta]) };
state = state.advance_without_indenting_ee(chomped, |r, c| {
@ -448,7 +449,7 @@ fn chomp_module_chain(buffer: &[u8]) -> Result<u16, Progress> {
}
pub fn concrete_type<'a>() -> impl Parser<'a, (&'a str, &'a str), ()> {
move |_, state: State<'a>| match chomp_concrete_type(state.bytes) {
move |_, state: State<'a>| match chomp_concrete_type(state.bytes()) {
Err(progress) => Err((progress, (), state)),
Ok((module_name, type_name, width)) => {
match state.advance_without_indenting_ee(width, |_, _| ()) {
@ -527,7 +528,7 @@ fn parse_ident_help<'a>(
arena: &'a Bump,
mut state: State<'a>,
) -> ParseResult<'a, Ident<'a>, BadIdent> {
match chomp_identifier_chain(arena, state.bytes, state.line, state.column) {
match chomp_identifier_chain(arena, state.bytes(), state.line, state.column) {
Ok((width, ident)) => {
state = advance_state!(state, width as usize)?;
Ok((MadeProgress, ident, state))

View File

@ -14,6 +14,7 @@ pub mod module;
pub mod number_literal;
pub mod pattern;
pub mod problems;
pub mod state;
pub mod string_literal;
pub mod test_helpers;
pub mod type_annotation;

View File

@ -9,8 +9,9 @@ use crate::ident::{lowercase_ident, unqualified_ident, uppercase_ident};
use crate::parser::Progress::{self, *};
use crate::parser::{
backtrackable, specialize, word1, word2, Col, EEffects, EExposes, EHeader, EImports, EPackages,
EProvides, ERequires, ETypedIdent, Parser, Row, State, SyntaxError,
EProvides, ERequires, ETypedIdent, Parser, Row, SyntaxError,
};
use crate::state::State;
use crate::string_literal;
use crate::type_annotation;
use bumpalo::collections::Vec;
@ -171,11 +172,11 @@ fn chomp_module_name(buffer: &[u8]) -> Result<&str, Progress> {
#[inline(always)]
fn module_name<'a>() -> impl Parser<'a, ModuleName<'a>, ()> {
|_, mut state: State<'a>| match chomp_module_name(state.bytes) {
|_, mut state: State<'a>| match chomp_module_name(state.bytes()) {
Ok(name) => {
let width = name.len();
state.column += width as u16;
state.bytes = &state.bytes[width..];
state = state.advance(width);
Ok((MadeProgress, ModuleName::new(name), state))
}

View File

@ -1,5 +1,6 @@
use crate::ast::Base;
use crate::parser::{ENumber, ParseResult, Parser, Progress, State};
use crate::parser::{ENumber, ParseResult, Parser, Progress};
use crate::state::State;
pub enum NumLiteral<'a> {
Float(&'a str),
@ -13,9 +14,9 @@ pub enum NumLiteral<'a> {
pub fn positive_number_literal<'a>() -> impl Parser<'a, NumLiteral<'a>, ENumber> {
move |_arena, state: State<'a>| {
match state.bytes.get(0) {
match state.bytes().get(0) {
Some(first_byte) if (*first_byte as char).is_ascii_digit() => {
parse_number_base(false, state.bytes, state)
parse_number_base(false, state.bytes(), state)
}
_ => {
// this is not a number at all
@ -27,13 +28,13 @@ pub fn positive_number_literal<'a>() -> impl Parser<'a, NumLiteral<'a>, ENumber>
pub fn number_literal<'a>() -> impl Parser<'a, NumLiteral<'a>, ENumber> {
move |_arena, state: State<'a>| {
match state.bytes.get(0) {
match state.bytes().get(0) {
Some(first_byte) if *first_byte == b'-' => {
// drop the minus
parse_number_base(true, &state.bytes[1..], state)
parse_number_base(true, &state.bytes()[1..], state)
}
Some(first_byte) if (*first_byte as char).is_ascii_digit() => {
parse_number_base(false, state.bytes, state)
parse_number_base(false, state.bytes(), state)
}
_ => {
// this is not a number at all
@ -99,7 +100,7 @@ fn chomp_number_dec<'a>(
}
let string =
unsafe { std::str::from_utf8_unchecked(&state.bytes[0..chomped + is_negative as usize]) };
unsafe { std::str::from_utf8_unchecked(&state.bytes()[0..chomped + is_negative as usize]) };
let new = state.advance_without_indenting_ee(chomped + is_negative as usize, |_, _| {
ENumber::LineTooLong

View File

@ -1,143 +1,15 @@
use crate::state::State;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use roc_region::all::{Located, Position, Region};
use std::fmt;
use roc_region::all::{Located, Region};
use Progress::*;
/// A position in a source file.
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct State<'a> {
/// The raw input bytes from the file.
pub bytes: &'a [u8],
/// Current line of the input
pub line: u32,
/// Current column of the input
pub column: u16,
/// Current indentation level, in columns
/// (so no indent is col 1 - this saves an arithmetic operation.)
pub indent_col: u16,
}
#[derive(Debug, PartialEq, Eq)]
pub enum Either<First, Second> {
First(First),
Second(Second),
}
impl<'a> State<'a> {
pub fn new(bytes: &'a [u8]) -> State<'a> {
State {
bytes,
line: 0,
column: 0,
indent_col: 0,
}
}
/// Returns whether the parser has reached the end of the input
pub const fn get_position(&self) -> Position {
Position {
row: self.line,
col: self.column,
}
}
/// Returns whether the parser has reached the end of the input
pub const fn has_reached_end(&self) -> bool {
self.bytes.is_empty()
}
/// Use advance_spaces to advance with indenting.
/// This assumes we are *not* advancing with spaces, or at least that
/// any spaces on the line were preceded by non-spaces - which would mean
/// they weren't eligible to indent anyway.
pub fn advance_without_indenting_e<TE, E>(
self,
quantity: usize,
to_error: TE,
) -> Result<Self, (Progress, E, Self)>
where
TE: Fn(BadInputError, Row, Col) -> E,
{
self.advance_without_indenting_ee(quantity, |r, c| {
to_error(BadInputError::LineTooLong, r, c)
})
}
pub fn advance_without_indenting_ee<TE, E>(
self,
quantity: usize,
to_error: TE,
) -> Result<Self, (Progress, E, Self)>
where
TE: Fn(Row, Col) -> E,
{
match (self.column as usize).checked_add(quantity) {
Some(column_usize) if column_usize <= u16::MAX as usize => {
Ok(State {
bytes: &self.bytes[quantity..],
column: column_usize as u16,
// Once we hit a nonspace character, we are no longer indenting.
..self
})
}
_ => Err((NoProgress, to_error(self.line, self.column), self)),
}
}
/// Returns a Region corresponding to the current state, but
/// with the end_col advanced by the given amount. This is
/// useful when parsing something "manually" (using input.chars())
/// and thus wanting a Region while not having access to loc().
pub fn len_region(&self, length: u16) -> Region {
Region {
start_col: self.column,
start_line: self.line,
end_col: self
.column
.checked_add(length)
.unwrap_or_else(|| panic!("len_region overflowed")),
end_line: self.line,
}
}
/// Return a failing ParseResult for the given FailReason
pub fn fail<T, X>(
self,
_arena: &'a Bump,
progress: Progress,
reason: X,
) -> Result<(Progress, T, Self), (Progress, X, Self)> {
Err((progress, reason, self))
}
}
impl<'a> fmt::Debug for State<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "State {{")?;
match std::str::from_utf8(self.bytes) {
Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?,
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?,
}
write!(f, "\n\t(line, col): ({}, {}),", self.line, self.column)?;
write!(f, "\n\tindent_col: {}", self.indent_col)?;
write!(f, "\n}}")
}
}
#[test]
fn state_size() {
// State should always be under 8 machine words, so it fits in a typical
// cache line.
let state_size = std::mem::size_of::<State>();
let maximum = std::mem::size_of::<usize>() * 8;
assert!(state_size <= maximum, "{:?} <= {:?}", state_size, maximum);
}
pub type ParseResult<'a, Output, Error> =
Result<(Progress, Output, State<'a>), (Progress, Error, State<'a>)>;
@ -776,21 +648,21 @@ where
move |_, mut state: State<'a>| {
let width = keyword.len();
if !state.bytes.starts_with(keyword.as_bytes()) {
if !state.bytes().starts_with(keyword.as_bytes()) {
return Err((NoProgress, if_error(state.line, state.column), state));
}
// the next character should not be an identifier character
// to prevent treating `whence` or `iffy` as keywords
match state.bytes.get(width) {
match state.bytes().get(width) {
Some(next) if *next == b' ' || *next == b'#' || *next == b'\n' => {
state.column += width as u16;
state.bytes = &state.bytes[width..];
state = state.advance(width);
Ok((MadeProgress, (), state))
}
None => {
state.column += width as u16;
state.bytes = &state.bytes[width..];
state = state.advance(width);
Ok((MadeProgress, (), state))
}
Some(_) => Err((NoProgress, if_error(state.line, state.column), state)),
@ -810,7 +682,7 @@ where
Error: 'a,
{
move |arena, state: State<'a>| {
let start_bytes_len = state.bytes.len();
let start_bytes_len = state.bytes().len();
match parser.parse(arena, state) {
Ok((elem_progress, first_output, next_state)) => {
@ -837,8 +709,10 @@ where
Err((_, fail, state)) => {
// If the delimiter parsed, but the following
// element did not, that's a fatal error.
let progress =
Progress::from_lengths(start_bytes_len, state.bytes.len());
let progress = Progress::from_lengths(
start_bytes_len,
state.bytes().len(),
);
return Err((progress, fail, state));
}
@ -871,7 +745,7 @@ where
Error: 'a,
{
move |arena, state: State<'a>| {
let start_bytes_len = state.bytes.len();
let start_bytes_len = state.bytes().len();
match parser.parse(arena, state) {
Ok((progress, first_output, next_state)) => {
@ -899,7 +773,7 @@ where
// element did not, that means we saw a trailing comma
let progress = Progress::from_lengths(
start_bytes_len,
old_state.bytes.len(),
old_state.bytes().len(),
);
return Ok((progress, buf, old_state));
}
@ -932,7 +806,7 @@ where
Error: 'a,
{
move |arena, state: State<'a>| {
let start_bytes_len = state.bytes.len();
let start_bytes_len = state.bytes().len();
match parser.parse(arena, state) {
Ok((progress, first_output, next_state)) => {
@ -965,7 +839,7 @@ where
NoProgress => {
let progress = Progress::from_lengths(
start_bytes_len,
old_state.bytes.len(),
old_state.bytes().len(),
);
return Ok((progress, buf, old_state));
}
@ -993,7 +867,7 @@ where
Error: 'a,
{
move |arena, state: State<'a>| {
let start_bytes_len = state.bytes.len();
let start_bytes_len = state.bytes().len();
match parser.parse(arena, state) {
Ok((progress, first_output, next_state)) => {
@ -1033,7 +907,7 @@ where
NoProgress => {
let progress = Progress::from_lengths(
start_bytes_len,
old_state.bytes.len(),
old_state.bytes().len(),
);
return Ok((progress, buf, old_state));
}
@ -1094,7 +968,7 @@ where
#[macro_export]
macro_rules! loc {
($parser:expr) => {
move |arena, state: $crate::parser::State<'a>| {
move |arena, state: $crate::state::State<'a>| {
use roc_region::all::{Located, Region};
let start_col = state.column;
@ -1123,7 +997,7 @@ macro_rules! loc {
#[macro_export]
macro_rules! skip_first {
($p1:expr, $p2:expr) => {
move |arena, state: $crate::parser::State<'a>| {
move |arena, state: $crate::state::State<'a>| {
let original_state = state.clone();
match $p1.parse(arena, state) {
@ -1142,7 +1016,7 @@ macro_rules! skip_first {
#[macro_export]
macro_rules! skip_second {
($p1:expr, $p2:expr) => {
move |arena, state: $crate::parser::State<'a>| {
move |arena, state: $crate::state::State<'a>| {
let original_state = state.clone();
match $p1.parse(arena, state) {
@ -1243,7 +1117,7 @@ macro_rules! collection_trailing_sep_e {
#[macro_export]
macro_rules! succeed {
($value:expr) => {
move |_arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| {
move |_arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
Ok((NoProgress, $value, state))
}
};
@ -1252,7 +1126,7 @@ macro_rules! succeed {
#[macro_export]
macro_rules! and {
($p1:expr, $p2:expr) => {
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
// We have to clone this because if the first parser passes and then
// the second one fails, we need to revert back to the original state.
let original_state = state.clone();
@ -1271,7 +1145,7 @@ macro_rules! and {
#[macro_export]
macro_rules! one_of {
($p1:expr, $p2:expr) => {
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
match $p1.parse(arena, state) {
valid @ Ok(_) => valid,
@ -1292,7 +1166,7 @@ macro_rules! one_of {
#[macro_export]
macro_rules! maybe {
($p1:expr) => {
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| match $p1
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| match $p1
.parse(arena, state)
{
Ok((progress, value, state)) => Ok((progress, Some(value), state)),
@ -1305,7 +1179,7 @@ macro_rules! maybe {
#[macro_export]
macro_rules! one_of_with_error {
($toerror:expr; $p1:expr) => {
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| {
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| {
match $p1.parse(arena, state) {
valid @ Ok(_) => valid,
@ -1352,16 +1226,12 @@ where
{
debug_assert_ne!(word, b'\n');
move |_arena: &'a Bump, state: State<'a>| match state.bytes.get(0) {
Some(x) if *x == word => Ok((
MadeProgress,
(),
State {
bytes: &state.bytes[1..],
column: state.column + 1,
..state
},
)),
move |_arena: &'a Bump, state: State<'a>| match state.bytes().get(0) {
Some(x) if *x == word => {
let mut state = state.advance(1);
state.column += 1;
Ok((MadeProgress, (), state))
}
_ => Err((NoProgress, to_error(state.line, state.column), state)),
}
}
@ -1377,16 +1247,10 @@ where
let needle = [word_1, word_2];
move |_arena: &'a Bump, state: State<'a>| {
if state.bytes.starts_with(&needle) {
Ok((
MadeProgress,
(),
State {
bytes: &state.bytes[2..],
column: state.column + 2,
..state
},
))
if state.bytes().starts_with(&needle) {
let mut state = state.advance(2);
state.column += 2;
Ok((MadeProgress, (), state))
} else {
Err((NoProgress, to_error(state.line, state.column), state))
}
@ -1448,7 +1312,7 @@ macro_rules! zero_or_more {
move |arena, state: State<'a>| {
use bumpalo::collections::Vec;
let start_bytes_len = state.bytes.len();
let start_bytes_len = state.bytes().len();
match $parser.parse(arena, state) {
Ok((_, first_output, next_state)) => {
@ -1472,7 +1336,7 @@ macro_rules! zero_or_more {
NoProgress => {
// the next element failed with no progress
// report whether we made progress before
let progress = Progress::from_lengths(start_bytes_len, old_state.bytes.len());
let progress = Progress::from_lengths(start_bytes_len, old_state.bytes().len());
return Ok((progress, buf, old_state));
}
}
@ -1539,14 +1403,14 @@ macro_rules! one_or_more {
#[macro_export]
macro_rules! debug {
($parser:expr) => {
move |arena, state: $crate::parser::State<'a>| dbg!($parser.parse(arena, state))
move |arena, state: $crate::state::State<'a>| dbg!($parser.parse(arena, state))
};
}
#[macro_export]
macro_rules! either {
($p1:expr, $p2:expr) => {
move |arena: &'a bumpalo::Bump, state: $crate::parser::State<'a>| match $p1
move |arena: &'a bumpalo::Bump, state: $crate::state::State<'a>| match $p1
.parse(arena, state)
{
Ok((progress, output, state)) => {

View File

@ -4,8 +4,9 @@ use crate::ident::{lowercase_ident, parse_ident, Ident};
use crate::parser::Progress::{self, *};
use crate::parser::{
backtrackable, optional, specialize, specialize_ref, word1, EPattern, PInParens, PRecord,
ParseResult, Parser, State,
ParseResult, Parser,
};
use crate::state::State;
use bumpalo::collections::string::String;
use bumpalo::collections::Vec;
use bumpalo::Bump;

144
compiler/parse/src/state.rs Normal file
View File

@ -0,0 +1,144 @@
use crate::parser::Progress::*;
use crate::parser::{BadInputError, Col, Progress, Row};
use bumpalo::Bump;
use roc_region::all::{Position, Region};
use std::fmt;
/// A position in a source file.
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct State<'a> {
/// The raw input bytes from the file.
bytes: &'a [u8],
/// Current line of the input
pub line: u32,
/// Current column of the input
pub column: u16,
/// Current indentation level, in columns
/// (so no indent is col 1 - this saves an arithmetic operation.)
pub indent_col: u16,
}
impl<'a> State<'a> {
pub fn new(bytes: &'a [u8]) -> State<'a> {
State {
bytes,
line: 0,
column: 0,
indent_col: 0,
}
}
pub fn bytes(&self) -> &'a [u8] {
self.bytes
}
#[must_use]
pub fn advance(&self, offset: usize) -> State<'a> {
let mut state = *self;
state.bytes = &state.bytes[offset..];
state
}
/// Returns whether the parser has reached the end of the input
pub const fn get_position(&self) -> Position {
Position {
row: self.line,
col: self.column,
}
}
/// Returns whether the parser has reached the end of the input
pub const fn has_reached_end(&self) -> bool {
self.bytes.is_empty()
}
/// Use advance_spaces to advance with indenting.
/// This assumes we are *not* advancing with spaces, or at least that
/// any spaces on the line were preceded by non-spaces - which would mean
/// they weren't eligible to indent anyway.
pub fn advance_without_indenting_e<TE, E>(
self,
quantity: usize,
to_error: TE,
) -> Result<Self, (Progress, E, Self)>
where
TE: Fn(BadInputError, Row, Col) -> E,
{
self.advance_without_indenting_ee(quantity, |r, c| {
to_error(BadInputError::LineTooLong, r, c)
})
}
pub fn advance_without_indenting_ee<TE, E>(
self,
quantity: usize,
to_error: TE,
) -> Result<Self, (Progress, E, Self)>
where
TE: Fn(Row, Col) -> E,
{
match (self.column as usize).checked_add(quantity) {
Some(column_usize) if column_usize <= u16::MAX as usize => {
Ok(State {
bytes: &self.bytes[quantity..],
column: column_usize as u16,
// Once we hit a nonspace character, we are no longer indenting.
..self
})
}
_ => Err((NoProgress, to_error(self.line, self.column), self)),
}
}
/// Returns a Region corresponding to the current state, but
/// with the end_col advanced by the given amount. This is
/// useful when parsing something "manually" (using input.chars())
/// and thus wanting a Region while not having access to loc().
pub fn len_region(&self, length: u16) -> Region {
Region {
start_col: self.column,
start_line: self.line,
end_col: self
.column
.checked_add(length)
.unwrap_or_else(|| panic!("len_region overflowed")),
end_line: self.line,
}
}
/// Return a failing ParseResult for the given FailReason
pub fn fail<T, X>(
self,
_arena: &'a Bump,
progress: Progress,
reason: X,
) -> Result<(Progress, T, Self), (Progress, X, Self)> {
Err((progress, reason, self))
}
}
impl<'a> fmt::Debug for State<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "State {{")?;
match std::str::from_utf8(self.bytes) {
Ok(string) => write!(f, "\n\tbytes: [utf8] {:?}", string)?,
Err(_) => write!(f, "\n\tbytes: [invalid utf8] {:?}", self.bytes)?,
}
write!(f, "\n\t(line, col): ({}, {}),", self.line, self.column)?;
write!(f, "\n\tindent_col: {}", self.indent_col)?;
write!(f, "\n}}")
}
}
#[test]
fn state_size() {
// State should always be under 8 machine words, so it fits in a typical
// cache line.
let state_size = std::mem::size_of::<State>();
let maximum = std::mem::size_of::<usize>() * 8;
assert!(state_size <= maximum, "{:?} <= {:?}", state_size, maximum);
}

View File

@ -1,7 +1,8 @@
use crate::ast::{EscapedChar, StrLiteral, StrSegment};
use crate::expr;
use crate::parser::Progress::*;
use crate::parser::{allocated, loc, specialize_ref, word1, BadInputError, EString, Parser, State};
use crate::parser::{allocated, loc, specialize_ref, word1, BadInputError, EString, Parser};
use crate::state::State;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
@ -11,7 +12,7 @@ fn ascii_hex_digits<'a>() -> impl Parser<'a, &'a str, EString<'a>> {
move |arena, state: State<'a>| {
let mut buf = bumpalo::collections::String::new_in(arena);
for &byte in state.bytes.iter() {
for &byte in state.bytes().iter() {
if (byte as char).is_ascii_hexdigit() {
buf.push(byte as char);
} else if buf.is_empty() {
@ -53,15 +54,15 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
let is_multiline;
let mut bytes;
if state.bytes.starts_with(b"\"\"\"") {
if state.bytes().starts_with(b"\"\"\"") {
// we will be parsing a multi-string
is_multiline = true;
bytes = state.bytes[3..].iter();
bytes = state.bytes()[3..].iter();
state = advance_state!(state, 3)?;
} else if state.bytes.starts_with(b"\"") {
} else if state.bytes().starts_with(b"\"") {
// we will be parsing a single-string
is_multiline = false;
bytes = state.bytes[1..].iter();
bytes = state.bytes()[1..].iter();
state = advance_state!(state, 1)?;
} else {
return Err((NoProgress, EString::Open(state.line, state.column), state));
@ -97,7 +98,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
// something which signalled that we should end the
// current segment - so use segment_parsed_bytes - 1 here,
// to exclude that char we just parsed.
let string_bytes = &state.bytes[0..(segment_parsed_bytes - 1)];
let string_bytes = &state.bytes()[0..(segment_parsed_bytes - 1)];
match std::str::from_utf8(string_bytes) {
Ok(string) => {
@ -224,7 +225,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
// Advance past the `\(` before using the expr parser
state = advance_state!(state, 2)?;
let original_byte_count = state.bytes.len();
let original_byte_count = state.bytes().len();
// This is an interpolated variable.
// Parse an arbitrary expression, then give a
@ -237,7 +238,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
.parse(arena, state)?;
// Advance the iterator past the expr we just parsed.
for _ in 0..(original_byte_count - new_state.bytes.len()) {
for _ in 0..(original_byte_count - new_state.bytes().len()) {
bytes.next();
}
@ -251,7 +252,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
// Advance past the `\u` before using the expr parser
state = advance_state!(state, 2)?;
let original_byte_count = state.bytes.len();
let original_byte_count = state.bytes().len();
// Parse the hex digits, surrounded by parens, then
// give a canonicalization error if the digits form
@ -264,7 +265,7 @@ pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>, EString<'a>> {
.parse(arena, state)?;
// Advance the iterator past the expr we just parsed.
for _ in 0..(original_byte_count - new_state.bytes.len()) {
for _ in 0..(original_byte_count - new_state.bytes().len()) {
bytes.next();
}

View File

@ -2,7 +2,8 @@ use crate::ast;
use crate::module::module_defs;
// use crate::module::module_defs;
use crate::parser::Parser;
use crate::parser::{State, SyntaxError};
use crate::parser::SyntaxError;
use crate::state::State;
use bumpalo::collections::Vec as BumpVec;
use bumpalo::Bump;
use roc_region::all::Located;

View File

@ -5,8 +5,8 @@ use crate::parser::{
allocated, backtrackable, optional, specialize, specialize_ref, word1, word2, EType,
ETypeApply, ETypeInParens, ETypeRecord, ETypeTagUnion, ParseResult, Parser,
Progress::{self, *},
State,
};
use crate::state::State;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
use roc_region::all::{Located, Region};
@ -430,7 +430,7 @@ fn parse_concrete_type<'a>(
arena: &'a Bump,
state: State<'a>,
) -> ParseResult<'a, TypeAnnotation<'a>, ETypeApply> {
let initial_bytes = state.bytes;
let initial_bytes = state.bytes();
match crate::ident::concrete_type().parse(arena, state) {
Ok((_, (module_name, type_name), state)) => {
@ -444,8 +444,8 @@ fn parse_concrete_type<'a>(
Err((MadeProgress, _, mut state)) => {
// we made some progress, but ultimately failed.
// that means a malformed type name
let chomped = crate::ident::chomp_malformed(state.bytes);
let delta = initial_bytes.len() - state.bytes.len();
let chomped = crate::ident::chomp_malformed(state.bytes());
let delta = initial_bytes.len() - state.bytes().len();
let parsed_str =
unsafe { std::str::from_utf8_unchecked(&initial_bytes[..chomped + delta]) };

View File

@ -20,7 +20,8 @@ mod test_parse {
use roc_parse::ast::StrSegment::*;
use roc_parse::ast::{self, EscapedChar};
use roc_parse::module::module_defs;
use roc_parse::parser::{Parser, State, SyntaxError};
use roc_parse::parser::{Parser, SyntaxError};
use roc_parse::state::State;
use roc_parse::test_helpers::parse_expr_with;
use roc_region::all::{Located, Region};
use roc_test_utils::assert_multiline_str_eq;

View File

@ -14,7 +14,8 @@ use roc_load::docs::{ModuleDocumentation, RecordField};
use roc_load::file::{LoadedModule, LoadingProblem};
use roc_module::symbol::{IdentIds, Interns, ModuleId, ModuleIds};
use roc_parse::ident::{parse_ident, Ident};
use roc_parse::parser::{State, SyntaxError};
use roc_parse::parser::SyntaxError;
use roc_parse::state::State;
use roc_region::all::Region;
use std::fs;
use std::path::{Path, PathBuf};

View File

@ -186,7 +186,7 @@ mod test_reporting {
{
use ven_pretty::DocAllocator;
use roc_parse::parser::State;
use roc_parse::state::State;
let state = State::new(src.as_bytes());