This commit is contained in:
Folkert 2021-02-08 22:47:22 +01:00
parent 0ccf17007e
commit 8f83bb4518
6 changed files with 26 additions and 407 deletions

View File

@ -191,7 +191,7 @@ pub fn main() -> io::Result<()> {
Ok(())
}
fn report_parse_error<'a>(fail: SyntaxError<'a>) {
fn report_parse_error(fail: SyntaxError) {
println!("TODO Gracefully report parse error in repl: {:?}", fail);
}

View File

@ -301,7 +301,9 @@ pub fn space0_e<'a, E>(
where
E: 'a,
{
spaces0_help(min_indent, space_problem, indent_problem)
spaces_help(false, min_indent, space_problem, indent_problem, |_, _| {
unreachable!("no spaces are required, so this is unreachable")
})
}
/// One or more (spaces/comments/newlines).
@ -422,253 +424,6 @@ fn spaces<'a>(
)
}
#[inline(always)]
fn spaces0_help<'a, E>(
min_indent: u16,
space_problem: fn(BadInputError, Row, Col) -> E,
indent_problem: fn(Row, Col) -> E,
) -> impl Parser<'a, &'a [CommentOrNewline<'a>], E>
where
E: 'a,
{
move |arena: &'a Bump, state: State<'a>| {
let original_state = state.clone();
let mut space_list = Vec::new_in(arena);
let mut bytes_parsed = 0;
let mut comment_line_buf = String::new_in(arena);
let mut line_state = LineState::Normal;
let mut state = state;
let mut any_newlines = false;
let start_bytes_len = state.bytes.len();
while !state.bytes.is_empty() {
match peek_utf8_char(&state) {
Ok((ch, utf8_len)) => {
bytes_parsed += utf8_len;
match line_state {
LineState::Normal => {
match ch {
' ' => {
// Don't check indentation here; it might not be enough
// indentation yet, but maybe it will be after more spaces happen!
state =
state.advance_spaces_e(arena, 1, space_problem.clone())?;
}
'\r' => {
// Ignore carriage returns.
state = state.advance_spaces_e(arena, 1, space_problem)?;
}
'\n' => {
// don't need to check the indent here since we'll reset it
// anyway
state = state.newline_e(arena, space_problem)?;
// Newlines only get added to the list when they're outside comments.
space_list.push(Newline);
any_newlines = true;
}
'#' => {
// Check indentation to make sure we were indented enough
// before this comment began.
let progress =
Progress::from_lengths(start_bytes_len, state.bytes.len());
state = state
.check_indent_e(arena, min_indent, indent_problem)
.map_err(|(fail, _)| {
(progress, fail, original_state.clone())
})?
.advance_without_indenting_e(arena, 1, space_problem)?;
// We're now parsing a line comment!
line_state = LineState::Comment;
}
_ => {
return {
// First make sure we were indented enough!
//
// (We only do this if we've encountered any newlines.
// Otherwise, we assume indentation is already correct.
// It's actively important for correctness that we skip
// this check if there are no newlines, because otherwise
// we would have false positives for single-line defs.)
let progress = Progress::from_lengths(
start_bytes_len,
state.bytes.len(),
);
if any_newlines {
state = state
.check_indent_e(arena, min_indent, indent_problem)
.map_err(|(fail, _)| {
(progress, fail, original_state.clone())
})?;
}
Ok((progress, space_list.into_bump_slice(), state))
};
}
}
}
LineState::Comment => {
match ch {
' ' => {
// If we're in a line comment, this won't affect indentation anyway.
state = state.advance_without_indenting_e(
arena,
1,
space_problem,
)?;
if comment_line_buf.len() == 1 {
match comment_line_buf.chars().next() {
Some('#') => {
// This is a comment begining with `## ` - that is,
// a doc comment.
//
// (The space is important; otherwise, this is not
// a doc comment, but rather something like a
// big separator block, e.g. ############)
line_state = LineState::DocComment;
// This is now the beginning of the doc comment.
comment_line_buf.clear();
}
_ => {
comment_line_buf.push(ch);
}
}
} else {
comment_line_buf.push(ch);
}
}
'\n' => {
state = state.newline_e(arena, space_problem)?;
match (comment_line_buf.len(), comment_line_buf.chars().next())
{
(1, Some('#')) => {
// This is a line with `##` - that is,
// a doc comment new line.
space_list.push(DocComment(""));
comment_line_buf = String::new_in(arena);
line_state = LineState::Normal;
}
_ => {
// This was a newline, so end this line comment.
space_list.push(LineComment(
comment_line_buf.into_bump_str(),
));
comment_line_buf = String::new_in(arena);
line_state = LineState::Normal;
}
}
}
nonblank => {
// Chars can have btye lengths of more than 1!
state = state.advance_without_indenting_e(
arena,
nonblank.len_utf8(),
space_problem,
)?;
comment_line_buf.push(nonblank);
}
}
}
LineState::DocComment => {
match ch {
' ' => {
// If we're in a doc comment, this won't affect indentation anyway.
state = state.advance_without_indenting_e(
arena,
1,
space_problem,
)?;
comment_line_buf.push(ch);
}
'\n' => {
state = state.newline_e(arena, space_problem)?;
// This was a newline, so end this doc comment.
space_list.push(DocComment(comment_line_buf.into_bump_str()));
comment_line_buf = String::new_in(arena);
line_state = LineState::Normal;
}
nonblank => {
state = state.advance_without_indenting_e(
arena,
utf8_len,
space_problem,
)?;
comment_line_buf.push(nonblank);
}
}
}
}
}
Err(SyntaxError::BadUtf8) => {
// If we hit an invalid UTF-8 character, bail out immediately.
let progress = Progress::from_lengths(start_bytes_len, state.bytes.len());
let row = state.line;
let col = state.column;
return state.fail(
arena,
progress,
space_problem(BadInputError::BadUtf8, row, col),
);
}
Err(_) => {
let space_slice = space_list.into_bump_slice();
// First make sure we were indented enough!
//
// (We only do this if we've encountered any newlines.
// Otherwise, we assume indentation is already correct.
// It's actively important for correctness that we skip
// this check if there are no newlines, because otherwise
// we would have false positives for single-line defs.)
let progress = Progress::from_lengths(start_bytes_len, state.bytes.len());
if any_newlines {
return Ok((
progress,
space_slice,
state
.check_indent_e(arena, min_indent, indent_problem)
.map_err(|(fail, _)| (progress, fail, original_state))?,
));
}
return Ok((progress, space_slice, state));
}
};
}
// First make sure we were indented enough!
//
// (We only do this if we've encountered any newlines.
// Otherwise, we assume indentation is already correct.
// It's actively important for correctness that we skip
// this check if there are no newlines, because otherwise
// we would have false positives for single-line defs.)
let progress = Progress::from_lengths(start_bytes_len, state.bytes.len());
if any_newlines {
state = state
.check_indent_e(arena, min_indent, indent_problem)
.map_err(|(fail, _)| (progress, fail, original_state))?;
}
Ok((progress, space_list.into_bump_slice(), state))
}
}
#[inline(always)]
fn spaces_help<'a, E>(
require_at_least_one: bool,

View File

@ -1,9 +1,7 @@
use crate::ast::Attempting;
use crate::keyword;
use crate::parser::Progress::{self, *};
use crate::parser::{
backtrackable, peek_utf8_char, unexpected, ParseResult, Parser, State, SyntaxError,
};
use crate::parser::{peek_utf8_char, unexpected, ParseResult, Parser, State, SyntaxError};
use bumpalo::collections::string::String;
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;

View File

@ -57,7 +57,7 @@ impl<'a> State<'a> {
pub fn check_indent(
self,
arena: &'a Bump,
_arena: &'a Bump,
min_indent: u16,
) -> Result<Self, (SyntaxError<'a>, Self)> {
if self.indent_col < min_indent {
@ -69,7 +69,7 @@ impl<'a> State<'a> {
pub fn check_indent_e<TE, E>(
self,
arena: &'a Bump,
_arena: &'a Bump,
min_indent: u16,
to_error: TE,
) -> Result<Self, (E, Self)>
@ -235,7 +235,7 @@ impl<'a> State<'a> {
/// Return a failing ParseResult for the given FailReason
pub fn fail<T, X>(
self,
arena: &'a Bump,
_arena: &'a Bump,
progress: Progress,
reason: X,
) -> Result<(Progress, T, Self), (Progress, X, Self)> {
@ -1000,12 +1000,12 @@ where
}
}
pub fn fail_when_progress<'a, T, E>(
pub fn fail_when_progress<T, E>(
progress: Progress,
fail: E,
value: T,
state: State<'a>,
) -> ParseResult<'a, T, E> {
state: State<'_>,
) -> ParseResult<'_, T, E> {
match progress {
MadeProgress => Err((MadeProgress, fail, state)),
NoProgress => Ok((NoProgress, value, state)),
@ -1316,139 +1316,6 @@ where
}
}
pub fn chomp_and_check_indent<'a, E1, E2, X>(
to_space_error: E1,
to_indent_error: E2,
) -> impl Parser<'a, (), X>
where
E1: Fn(BadInputError, Row, Col) -> X,
E2: Fn(Row, Col) -> X,
X: 'a,
{
move |arena, state: State<'a>| {
let SpaceState {
status,
row,
col,
bytes,
} = eat_spaces(state.bytes, state.line, state.column);
let indent_col = state.indent_col;
let progress = if state.column != col || state.line != row {
MadeProgress
} else {
NoProgress
};
match status {
Status::Good => {
if col > state.indent_col && col > 1 {
let new_state = State {
line: row,
column: col,
..state
};
Ok((progress, (), new_state))
} else {
Err((NoProgress, to_indent_error(state.line, state.column), state))
}
}
Status::HasTab => {
let new_state = State {
line: row,
column: col,
..state
};
Err((NoProgress, to_indent_error(row, col), new_state))
}
}
}
}
enum Status {
Good,
HasTab,
}
struct SpaceState<'a> {
status: Status,
row: Row,
col: Col,
bytes: &'a [u8],
}
fn eat_spaces<'a>(mut bytes: &'a [u8], mut row: Row, mut col: Col) -> SpaceState<'a> {
loop {
match bytes.get(0) {
None => {
return SpaceState {
status: Status::Good,
row,
col,
bytes: &[],
};
}
Some(b' ') => {
bytes = &bytes[1..];
col += 1;
}
Some(b'\n') => {
bytes = &bytes[1..];
col = 1;
row += 1;
}
Some(b'#') => {
return eat_line_comment(&bytes[1..], row, col + 1);
}
Some(b'\r') => {
bytes = &bytes[1..];
}
Some(b'\t') => {
return SpaceState {
status: Status::HasTab,
row,
col,
bytes,
};
}
Some(_) => {
return SpaceState {
status: Status::Good,
row,
col,
bytes,
};
}
}
}
}
fn eat_line_comment<'a>(mut bytes: &'a [u8], row: Row, mut col: Col) -> SpaceState<'a> {
loop {
match bytes.get(0) {
None => {
return SpaceState {
status: Status::Good,
row,
col,
bytes: &[],
};
}
Some(b'\n') => {
return eat_spaces(&bytes[1..], row + 1, 1);
}
Some(_) => {
// NOTE here elm checks the character width of the word, presumably to deal with
// unicode?
bytes = &bytes[1..];
col += 1;
}
}
}
}
#[macro_export]
macro_rules! map {
($parser:expr, $transform:expr) => {
@ -1592,7 +1459,6 @@ macro_rules! attempt {
.map(|(progress, answer, mut state)| {
// If the parser suceeded, go back to what we were originally attempting.
// (If it failed, that's exactly where we care what we were attempting!)
// debug_assert_eq!(!state.context_stack.is_empty());
match state.context_stack.uncons() {
Some((_item, rest)) => {
state.context_stack = rest;
@ -1813,7 +1679,7 @@ pub fn parse_utf8<'a>(bytes: &[u8]) -> Result<&str, SyntaxError<'a>> {
}
pub fn end_of_file<'a>() -> impl Parser<'a, (), SyntaxError<'a>> {
|arena: &'a Bump, state: State<'a>| {
|_arena: &'a Bump, state: State<'a>| {
if state.has_reached_end() {
Ok((NoProgress, (), state))
} else {

View File

@ -1,11 +1,11 @@
use crate::ast::{AssignedField, Attempting, CommentOrNewline, Tag, TypeAnnotation};
use crate::ast::{Attempting, CommentOrNewline, Tag, TypeAnnotation};
use crate::blankspace::{space0_around, space0_before, space1, space1_before};
use crate::expr::{global_tag, private_tag};
use crate::ident::join_module_parts;
use crate::keyword;
use crate::parser::{
allocated, ascii_char, ascii_string, chomp_and_check_indent, not, optional, peek_utf8_char,
specialize, unexpected, word1, BadInputError, Either, ParseResult, Parser,
allocated, ascii_char, ascii_string, not, optional, peek_utf8_char, specialize, unexpected,
word1, BadInputError, Either, ParseResult, Parser,
Progress::{self, *},
State, SyntaxError, TRecord, Type,
};

View File

@ -115,7 +115,7 @@ fn to_trecord_report<'a>(
]);
Report {
filename: filename.clone(),
filename,
doc,
title: "UNFINISHED RECORD TYPE".to_string(),
}
@ -135,7 +135,7 @@ fn to_trecord_report<'a>(
]);
Report {
filename: filename.clone(),
filename,
doc,
title: "UNFINISHED RECORD TYPE".to_string(),
}
@ -158,7 +158,7 @@ fn to_trecord_report<'a>(
]);
Report {
filename: filename.clone(),
filename,
doc,
title: "UNFINISHED RECORD TYPE".to_string(),
}
@ -180,7 +180,7 @@ fn to_trecord_report<'a>(
]);
Report {
filename: filename.clone(),
filename,
doc,
title: "UNFINISHED RECORD TYPE".to_string(),
}
@ -204,7 +204,7 @@ fn to_trecord_report<'a>(
]);
Report {
filename: filename.clone(),
filename,
doc,
title: "PROBLEM IN RECORD TYPE".to_string(),
}
@ -229,7 +229,7 @@ fn to_trecord_report<'a>(
]);
Report {
filename: filename.clone(),
filename,
doc,
title: "NEED MORE INDENTATION".to_string(),
}
@ -252,7 +252,7 @@ fn to_trecord_report<'a>(
]);
Report {
filename: filename.clone(),
filename,
doc,
title: "UNFINISHED RECORD TYPE".to_string(),
}
@ -284,7 +284,7 @@ fn what_is_next<'a>(source_lines: &'a [&'a str], row: Row, col: Col) -> Next<'a>
.find(|keyword| starts_with_keyword(chars, keyword))
{
Some(keyword) => Next::Keyword(keyword),
None => match chars.chars().nth(0) {
None => match chars.chars().next() {
None => Next::Other(None),
Some(c) => match c {
')' => Next::Close("parenthesis", ')'),
@ -300,8 +300,8 @@ fn what_is_next<'a>(source_lines: &'a [&'a str], row: Row, col: Col) -> Next<'a>
}
fn starts_with_keyword(rest_of_line: &str, keyword: &str) -> bool {
if rest_of_line.starts_with(keyword) {
match (&rest_of_line[keyword.len()..]).chars().nth(0) {
if let Some(stripped) = rest_of_line.strip_prefix(keyword) {
match stripped.chars().next() {
None => true,
Some(c) => !c.is_alphanumeric(),
}
@ -316,7 +316,7 @@ fn next_line_starts_with_close_curly(source_lines: &[&str], row: Row) -> Option<
Some(line) => {
let spaces_dropped = line.trim_start_matches(' ');
match spaces_dropped.chars().nth(0) {
match spaces_dropped.chars().next() {
Some('}') => Some((row + 1, (line.len() - spaces_dropped.len()) as u16)),
_ => None,
}