mirror of
https://github.com/roc-lang/roc.git
synced 2024-11-11 05:34:11 +03:00
Merge pull request #3071 from rtfeldman/faster-comment-parsing
still faster comment parsing
This commit is contained in:
commit
e6abc8eb77
@ -180,56 +180,106 @@ fn spaces_help_help<'a, E>(
|
||||
where
|
||||
E: 'a + SpaceProblem,
|
||||
{
|
||||
use SpaceState::*;
|
||||
move |arena, state: State<'a>| match fast_eat_spaces(&state) {
|
||||
FastSpaceState::HasTab(position) => Err((
|
||||
MadeProgress,
|
||||
E::space_problem(BadInputError::HasTab, position),
|
||||
state,
|
||||
)),
|
||||
FastSpaceState::Good {
|
||||
newlines,
|
||||
consumed,
|
||||
column,
|
||||
} => {
|
||||
if consumed == 0 {
|
||||
Ok((NoProgress, &[] as &[_], state))
|
||||
} else if column < min_indent {
|
||||
Err((MadeProgress, indent_problem(state.pos()), state))
|
||||
} else {
|
||||
let comments_and_newlines = Vec::with_capacity_in(newlines, arena);
|
||||
let mut spaces = eat_spaces(state, false, comments_and_newlines);
|
||||
|
||||
move |arena, state: State<'a>| {
|
||||
let comments_and_newlines = Vec::new_in(arena);
|
||||
match eat_spaces(state.clone(), false, comments_and_newlines) {
|
||||
HasTab(state) => Err((
|
||||
MadeProgress,
|
||||
E::space_problem(BadInputError::HasTab, state.pos()),
|
||||
state,
|
||||
)),
|
||||
Good {
|
||||
state: mut new_state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
} => {
|
||||
if new_state.bytes() == state.bytes() {
|
||||
Ok((NoProgress, &[] as &[_], state))
|
||||
} else if multiline {
|
||||
// we parsed at least one newline
|
||||
|
||||
new_state.indent_column = new_state.column();
|
||||
|
||||
if new_state.column() >= min_indent {
|
||||
Ok((
|
||||
MadeProgress,
|
||||
comments_and_newlines.into_bump_slice(),
|
||||
new_state,
|
||||
))
|
||||
} else {
|
||||
Err((MadeProgress, indent_problem(state.pos()), state))
|
||||
}
|
||||
} else {
|
||||
Ok((
|
||||
MadeProgress,
|
||||
comments_and_newlines.into_bump_slice(),
|
||||
new_state,
|
||||
))
|
||||
if spaces.multiline {
|
||||
spaces.state.indent_column = spaces.state.column();
|
||||
}
|
||||
|
||||
Ok((
|
||||
MadeProgress,
|
||||
spaces.comments_and_newlines.into_bump_slice(),
|
||||
spaces.state,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
enum SpaceState<'a> {
|
||||
enum FastSpaceState {
|
||||
Good {
|
||||
state: State<'a>,
|
||||
multiline: bool,
|
||||
comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||
newlines: usize,
|
||||
consumed: usize,
|
||||
column: u32,
|
||||
},
|
||||
HasTab(State<'a>),
|
||||
HasTab(Position),
|
||||
}
|
||||
|
||||
fn fast_eat_spaces(state: &State) -> FastSpaceState {
|
||||
use FastSpaceState::*;
|
||||
|
||||
let mut newlines = 0;
|
||||
let mut index = 0;
|
||||
let mut line_start = state.line_start.offset as usize;
|
||||
let base_offset = state.pos().offset as usize;
|
||||
|
||||
let bytes = state.bytes();
|
||||
let length = bytes.len();
|
||||
|
||||
'outer: while index < length {
|
||||
match bytes[index] {
|
||||
b' ' => {
|
||||
index += 1;
|
||||
}
|
||||
b'\n' => {
|
||||
newlines += 1;
|
||||
index += 1;
|
||||
line_start = base_offset + index;
|
||||
}
|
||||
b'\r' => {
|
||||
index += 1;
|
||||
line_start = base_offset + index;
|
||||
}
|
||||
b'\t' => {
|
||||
return HasTab(Position::new((base_offset + index) as u32));
|
||||
}
|
||||
b'#' => {
|
||||
index += 1;
|
||||
|
||||
while index < length {
|
||||
match bytes[index] {
|
||||
b'\n' | b'\t' | b'\r' => {
|
||||
continue 'outer;
|
||||
}
|
||||
|
||||
_ => {
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
Good {
|
||||
newlines,
|
||||
consumed: index,
|
||||
column: ((base_offset + index) - line_start) as u32,
|
||||
}
|
||||
}
|
||||
|
||||
struct SpaceState<'a> {
|
||||
state: State<'a>,
|
||||
multiline: bool,
|
||||
comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||
}
|
||||
|
||||
fn eat_spaces<'a>(
|
||||
@ -237,8 +287,6 @@ fn eat_spaces<'a>(
|
||||
mut multiline: bool,
|
||||
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||
) -> SpaceState<'a> {
|
||||
use SpaceState::*;
|
||||
|
||||
for c in state.bytes() {
|
||||
match c {
|
||||
b' ' => {
|
||||
@ -252,9 +300,8 @@ fn eat_spaces<'a>(
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => {
|
||||
return HasTab(state);
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
return eat_line_comment(state, multiline, comments_and_newlines);
|
||||
@ -263,7 +310,7 @@ fn eat_spaces<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
Good {
|
||||
SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
@ -275,8 +322,6 @@ fn eat_line_comment<'a>(
|
||||
mut multiline: bool,
|
||||
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||
) -> SpaceState<'a> {
|
||||
use SpaceState::*;
|
||||
|
||||
let mut index = 0;
|
||||
let bytes = state.bytes();
|
||||
let length = bytes.len();
|
||||
@ -313,9 +358,7 @@ fn eat_line_comment<'a>(
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => {
|
||||
return HasTab(state);
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
index += 1;
|
||||
@ -327,7 +370,7 @@ fn eat_line_comment<'a>(
|
||||
index += 1;
|
||||
}
|
||||
|
||||
return Good {
|
||||
return SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
@ -337,7 +380,7 @@ fn eat_line_comment<'a>(
|
||||
// consume the second #
|
||||
state = state.advance(1);
|
||||
|
||||
return Good {
|
||||
return SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
@ -354,7 +397,7 @@ fn eat_line_comment<'a>(
|
||||
|
||||
while index < length {
|
||||
match bytes[index] {
|
||||
b'\t' => return HasTab(state),
|
||||
b'\t' => unreachable!(),
|
||||
b'\n' => {
|
||||
let comment =
|
||||
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
|
||||
@ -381,9 +424,7 @@ fn eat_line_comment<'a>(
|
||||
b'\r' => {
|
||||
state = state.advance_newline();
|
||||
}
|
||||
b'\t' => {
|
||||
return HasTab(state);
|
||||
}
|
||||
b'\t' => unreachable!(),
|
||||
b'#' => {
|
||||
state = state.advance(1);
|
||||
index += 1;
|
||||
@ -395,7 +436,7 @@ fn eat_line_comment<'a>(
|
||||
index += 1;
|
||||
}
|
||||
|
||||
return Good {
|
||||
return SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
@ -421,7 +462,7 @@ fn eat_line_comment<'a>(
|
||||
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
|
||||
}
|
||||
|
||||
return Good {
|
||||
return SpaceState {
|
||||
state,
|
||||
multiline,
|
||||
comments_and_newlines,
|
||||
|
@ -2705,6 +2705,21 @@ fn number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
|
||||
|
||||
const BINOP_CHAR_SET: &[u8] = b"+-/*=.<>:&|^?%!";
|
||||
|
||||
const BINOP_CHAR_MASK: [bool; 125] = {
|
||||
let mut result = [false; 125];
|
||||
|
||||
let mut i = 0;
|
||||
while i < BINOP_CHAR_SET.len() {
|
||||
let index = BINOP_CHAR_SET[i] as usize;
|
||||
|
||||
result[index] = true;
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
result
|
||||
};
|
||||
|
||||
fn operator<'a>() -> impl Parser<'a, BinOp, EExpr<'a>> {
|
||||
|_, state| operator_help(EExpr::Start, EExpr::BadOperator, state)
|
||||
}
|
||||
@ -2774,10 +2789,11 @@ fn chomp_ops(bytes: &[u8]) -> &str {
|
||||
let mut chomped = 0;
|
||||
|
||||
for c in bytes.iter() {
|
||||
if !BINOP_CHAR_SET.contains(c) {
|
||||
if let Some(true) = BINOP_CHAR_MASK.get(*c as usize) {
|
||||
chomped += 1;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
chomped += 1;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
|
@ -13,11 +13,11 @@ pub struct State<'a> {
|
||||
offset: usize,
|
||||
|
||||
/// Position of the start of the current line
|
||||
line_start: Position,
|
||||
pub(crate) line_start: Position,
|
||||
|
||||
/// Current indentation level, in columns
|
||||
/// (so no indent is col 1 - this saves an arithmetic operation.)
|
||||
pub indent_column: u32,
|
||||
pub(crate) indent_column: u32,
|
||||
}
|
||||
|
||||
impl<'a> State<'a> {
|
||||
|
Loading…
Reference in New Issue
Block a user