mirror of
https://github.com/roc-lang/roc.git
synced 2024-11-10 10:02:38 +03:00
Merge pull request #3088 from rtfeldman/fast-comment-simd
use simd in actual comment parsing
This commit is contained in:
commit
5baf1ac8b3
@ -278,7 +278,7 @@ fn fast_eat_spaces(state: &State) -> FastSpaceState {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// we've made `first_special_char` characters of progress
|
// we've made `first_special_char` characters of progress
|
||||||
index += first_special_char as usize;
|
index += usize::min(first_special_char as usize, remaining);
|
||||||
|
|
||||||
// if we found a special char, let the outer loop handle it
|
// if we found a special char, let the outer loop handle it
|
||||||
if first_special_char != 16 {
|
if first_special_char != 16 {
|
||||||
@ -359,8 +359,8 @@ fn eat_line_comment<'a>(
|
|||||||
mut multiline: bool,
|
mut multiline: bool,
|
||||||
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
mut comments_and_newlines: Vec<'a, CommentOrNewline<'a>>,
|
||||||
) -> SpaceState<'a> {
|
) -> SpaceState<'a> {
|
||||||
let mut index = 0;
|
let mut index = state.pos().offset as usize;
|
||||||
let bytes = state.bytes();
|
let bytes = state.original_bytes();
|
||||||
let length = bytes.len();
|
let length = bytes.len();
|
||||||
|
|
||||||
'outer: loop {
|
'outer: loop {
|
||||||
@ -431,6 +431,95 @@ fn eat_line_comment<'a>(
|
|||||||
|
|
||||||
let loop_start = index;
|
let loop_start = index;
|
||||||
|
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
{
|
||||||
|
use std::arch::x86_64::*;
|
||||||
|
|
||||||
|
// a bytestring with the three characters we're looking for (the rest is ignored)
|
||||||
|
let needle = b"\r\n\t=============";
|
||||||
|
let needle = unsafe { _mm_loadu_si128(needle.as_ptr() as *const _) };
|
||||||
|
|
||||||
|
while index < length {
|
||||||
|
let remaining = length - index;
|
||||||
|
let chunk = if remaining < 16 { remaining as i32 } else { 16 };
|
||||||
|
|
||||||
|
// the source bytes we'll be looking at
|
||||||
|
let haystack = unsafe { _mm_loadu_si128(bytes.as_ptr().add(index) as *const _) };
|
||||||
|
|
||||||
|
// use first 3 characters of needle, first chunk` characters of haystack
|
||||||
|
// finds the first index where one of the `needle` characters occurs
|
||||||
|
// or 16 when none of the needle characters occur
|
||||||
|
let first_special_char =
|
||||||
|
unsafe { _mm_cmpestri(needle, 3, haystack, chunk, _SIDD_CMP_EQUAL_ANY) };
|
||||||
|
|
||||||
|
// we've made `first_special_char` characters of progress
|
||||||
|
let progress = usize::min(first_special_char as usize, remaining);
|
||||||
|
index += progress;
|
||||||
|
state = state.advance(progress);
|
||||||
|
|
||||||
|
if first_special_char != 16 {
|
||||||
|
match bytes[index] {
|
||||||
|
b'\t' => unreachable!(),
|
||||||
|
b'\n' => {
|
||||||
|
let comment =
|
||||||
|
unsafe { std::str::from_utf8_unchecked(&bytes[loop_start..index]) };
|
||||||
|
|
||||||
|
if is_doc_comment {
|
||||||
|
comments_and_newlines.push(CommentOrNewline::DocComment(comment));
|
||||||
|
} else {
|
||||||
|
comments_and_newlines.push(CommentOrNewline::LineComment(comment));
|
||||||
|
}
|
||||||
|
state = state.advance_newline();
|
||||||
|
multiline = true;
|
||||||
|
|
||||||
|
index += 1;
|
||||||
|
while index < length {
|
||||||
|
match bytes[index] {
|
||||||
|
b' ' => {
|
||||||
|
state = state.advance(1);
|
||||||
|
}
|
||||||
|
b'\n' => {
|
||||||
|
state = state.advance_newline();
|
||||||
|
multiline = true;
|
||||||
|
comments_and_newlines.push(CommentOrNewline::Newline);
|
||||||
|
}
|
||||||
|
b'\r' => {
|
||||||
|
state = state.advance_newline();
|
||||||
|
}
|
||||||
|
b'\t' => unreachable!(),
|
||||||
|
b'#' => {
|
||||||
|
state = state.advance(1);
|
||||||
|
index += 1;
|
||||||
|
continue 'outer;
|
||||||
|
}
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return SpaceState {
|
||||||
|
state,
|
||||||
|
multiline,
|
||||||
|
comments_and_newlines,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
b'\r' => {
|
||||||
|
state = state.advance_newline();
|
||||||
|
index += 1;
|
||||||
|
}
|
||||||
|
odd_character => {
|
||||||
|
unreachable!(
|
||||||
|
"unexpected_character {} {}",
|
||||||
|
odd_character, odd_character as char
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(target_arch = "x86_64"))]
|
||||||
while index < length {
|
while index < length {
|
||||||
match bytes[index] {
|
match bytes[index] {
|
||||||
b'\t' => unreachable!(),
|
b'\t' => unreachable!(),
|
||||||
|
Loading…
Reference in New Issue
Block a user