Merge pull request #3071 from rtfeldman/faster-comment-parsing

still faster comment parsing
This commit is contained in:
Richard Feldman 2022-05-16 17:20:13 -04:00 committed by GitHub
commit e6abc8eb77
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 121 additions and 64 deletions

View File

@ -180,56 +180,106 @@ fn spaces_help_help<'a, E>(
where
E: 'a + SpaceProblem,
{
use SpaceState::*;
move |arena, state: State<'a>| match fast_eat_spaces(&state) {
FastSpaceState::HasTab(position) => Err((
MadeProgress,
E::space_problem(BadInputError::HasTab, position),
state,
)),
FastSpaceState::Good {
newlines,
consumed,
column,
} => {
if consumed == 0 {
Ok((NoProgress, &[] as &[_], state))
} else if column < min_indent {
Err((MadeProgress, indent_problem(state.pos()), state))
} else {
let comments_and_newlines = Vec::with_capacity_in(newlines, arena);
let mut spaces = eat_spaces(state, false, comments_and_newlines);
move |arena, state: State<'a>| {
let comments_and_newlines = Vec::new_in(arena);
match eat_spaces(state.clone(), false, comments_and_newlines) {
HasTab(state) => Err((
MadeProgress,
E::space_problem(BadInputError::HasTab, state.pos()),
state,
)),
Good {
state: mut new_state,
multiline,
comments_and_newlines,
} => {
if new_state.bytes() == state.bytes() {
Ok((NoProgress, &[] as &[_], state))
} else if multiline {
// we parsed at least one newline
new_state.indent_column = new_state.column();
if new_state.column() >= min_indent {
Ok((
MadeProgress,
comments_and_newlines.into_bump_slice(),
new_state,
))
} else {
Err((MadeProgress, indent_problem(state.pos()), state))
}
} else {
Ok((
MadeProgress,
comments_and_newlines.into_bump_slice(),
new_state,
))
if spaces.multiline {
spaces.state.indent_column = spaces.state.column();
}
Ok((
MadeProgress,
spaces.comments_and_newlines.into_bump_slice(),
spaces.state,
))
}
}
}
}
enum SpaceState<'a> {
enum FastSpaceState {
Good {
state: State<'a>,
multiline: bool,
comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
newlines: usize,
consumed: usize,
column: u32,
},
HasTab(State<'a>),
HasTab(Position),
}
fn fast_eat_spaces(state: &State) -> FastSpaceState {
use FastSpaceState::*;
let mut newlines = 0;
let mut index = 0;
let mut line_start = state.line_start.offset as usize;
let base_offset = state.pos().offset as usize;
let bytes = state.bytes();
let length = bytes.len();
'outer: while index < length {
match bytes[index] {
b' ' => {
index += 1;
}
b'\n' => {
newlines += 1;
index += 1;
line_start = base_offset + index;
}
b'\r' => {
index += 1;
line_start = base_offset + index;
}
b'\t' => {
return HasTab(Position::new((base_offset + index) as u32));
}
b'#' => {
index += 1;
while index < length {
match bytes[index] {
b'\n' | b'\t' | b'\r' => {
continue 'outer;
}
_ => {
index += 1;
}
}
}
}
_ => break,
}
}
Good {
newlines,
consumed: index,
column: ((base_offset + index) - line_start) as u32,
}
}
struct SpaceState<'a> {
state: State<'a>,
multiline: bool,
comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
}
fn eat_spaces<'a>(
@ -237,8 +287,6 @@ fn eat_spaces<'a>(
mut multiline: bool,
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
) -> SpaceState<'a> {
use SpaceState::*;
for c in state.bytes() {
match c {
b' ' => {
@ -252,9 +300,8 @@ fn eat_spaces<'a>(
b'\r' => {
state = state.advance_newline();
}
b'\t' => {
return HasTab(state);
}
b'\t' => unreachable!(),
b'#' => {
state = state.advance(1);
return eat_line_comment(state, multiline, comments_and_newlines);
@ -263,7 +310,7 @@ fn eat_spaces<'a>(
}
}
Good {
SpaceState {
state,
multiline,
comments_and_newlines,
@ -275,8 +322,6 @@ fn eat_line_comment<'a>(
mut multiline: bool,
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
) -> SpaceState<'a> {
use SpaceState::*;
let mut index = 0;
let bytes = state.bytes();
let length = bytes.len();
@ -313,9 +358,7 @@ fn eat_line_comment<'a>(
b'\r' => {
state = state.advance_newline();
}
b'\t' => {
return HasTab(state);
}
b'\t' => unreachable!(),
b'#' => {
state = state.advance(1);
index += 1;
@ -327,7 +370,7 @@ fn eat_line_comment<'a>(
index += 1;
}
return Good {
return SpaceState {
state,
multiline,
comments_and_newlines,
@ -337,7 +380,7 @@ fn eat_line_comment<'a>(
// consume the second #
state = state.advance(1);
return Good {
return SpaceState {
state,
multiline,
comments_and_newlines,
@ -354,7 +397,7 @@ fn eat_line_comment<'a>(
while index < length {
match bytes[index] {
b'\t' => return HasTab(state),
b'\t' => unreachable!(),
b'\n' => {
let comment =
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
@ -381,9 +424,7 @@ fn eat_line_comment<'a>(
b'\r' => {
state = state.advance_newline();
}
b'\t' => {
return HasTab(state);
}
b'\t' => unreachable!(),
b'#' => {
state = state.advance(1);
index += 1;
@ -395,7 +436,7 @@ fn eat_line_comment<'a>(
index += 1;
}
return Good {
return SpaceState {
state,
multiline,
comments_and_newlines,
@ -421,7 +462,7 @@ fn eat_line_comment<'a>(
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
}
return Good {
return SpaceState {
state,
multiline,
comments_and_newlines,

View File

@ -2705,6 +2705,21 @@ fn number_literal_help<'a>() -> impl Parser<'a, Expr<'a>, ENumber> {
const BINOP_CHAR_SET: &[u8] = b"+-/*=.<>:&|^?%!";
const BINOP_CHAR_MASK: [bool; 125] = {
let mut result = [false; 125];
let mut i = 0;
while i < BINOP_CHAR_SET.len() {
let index = BINOP_CHAR_SET[i] as usize;
result[index] = true;
i += 1;
}
result
};
fn operator<'a>() -> impl Parser<'a, BinOp, EExpr<'a>> {
|_, state| operator_help(EExpr::Start, EExpr::BadOperator, state)
}
@ -2774,10 +2789,11 @@ fn chomp_ops(bytes: &[u8]) -> &str {
let mut chomped = 0;
for c in bytes.iter() {
if !BINOP_CHAR_SET.contains(c) {
if let Some(true) = BINOP_CHAR_MASK.get(*c as usize) {
chomped += 1;
} else {
break;
}
chomped += 1;
}
unsafe {

View File

@ -13,11 +13,11 @@ pub struct State<'a> {
offset: usize,
/// Position of the start of the current line
line_start: Position,
pub(crate) line_start: Position,
/// Current indentation level, in columns
/// (so no indent is col 1 - this saves an arithmetic operation.)
pub indent_column: u32,
pub(crate) indent_column: u32,
}
impl<'a> State<'a> {