perf(es/lexer): Use jump table for skip_space (#7073)

This commit is contained in:
Donny/강동윤 2023-03-13 17:32:49 +09:00 committed by GitHub
parent 9c29666402
commit f854d51343
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 121 additions and 33 deletions

View File

@ -4,4 +4,4 @@ set -eu
export RUST_LOG=off
export MIMALLOC_SHOW_STATS=1
cargo profile instruments --release -t time --features tracing/release_max_level_info --features swc_common/concurrent --features swc_common/parking_lot --bench parser -- --bench --color
cargo profile instruments --release -t time --features tracing/release_max_level_info --features swc_common/concurrent --features swc_common/parking_lot --bench parser -- --bench --color $@

View File

@ -34,6 +34,7 @@ mod table;
#[cfg(test)]
mod tests;
pub mod util;
mod whitespace;
pub(crate) type LexResult<T> = Result<T, Error>;

View File

@ -12,7 +12,10 @@ use swc_common::{
use swc_ecma_ast::Ident;
use tracing::warn;
use super::{comments_buffer::BufferedComment, input::Input, Char, LexResult, Lexer};
use super::{
comments_buffer::BufferedComment, input::Input, whitespace::SkipWhitespace, Char, LexResult,
Lexer,
};
use crate::{
error::{Error, SyntaxError},
lexer::comments_buffer::BufferedCommentKind,
@ -184,18 +187,20 @@ impl<'a> Lexer<'a> {
/// See https://tc39.github.io/ecma262/#sec-white-space
pub(super) fn skip_space<const LEX_COMMENTS: bool>(&mut self) -> LexResult<()> {
loop {
let cur_b = self.input.cur_as_ascii();
let (offset, newline) = {
let mut skip = SkipWhitespace {
input: self.input.as_str(),
newline: false,
offset: 0,
};
if matches!(cur_b, Some(b'\n' | b'\r')) {
self.input.bump();
self.state.had_line_break = true;
continue;
}
skip.scan();
if matches!(cur_b, Some(b'\x09' | b'\x0b' | b'\x0c' | b'\x20' | b'\xa0')) {
self.input.bump();
continue;
}
(skip.offset, skip.newline)
};
self.input.bump_bytes(offset);
self.state.had_line_break |= newline;
if LEX_COMMENTS && self.input.is_byte(b'/') {
if self.peek() == Some('/') {
@ -205,34 +210,15 @@ impl<'a> Lexer<'a> {
self.skip_block_comment()?;
continue;
}
break;
}
let c = self.cur();
let c = match c {
Some(v) => v,
None => break,
};
match c {
// white spaces
'\u{feff}' => {}
// line breaks
'\u{2028}' | '\u{2029}' => {
self.state.had_line_break = true;
}
_ if c.is_whitespace() => {}
_ => break,
}
self.bump();
break;
}
Ok(())
}
#[inline(never)]
pub(super) fn skip_line_comment(&mut self, start_skip: usize) {
let start = self.cur_pos();
self.input.bump_bytes(start_skip);
@ -282,6 +268,7 @@ impl<'a> Lexer<'a> {
}
/// Expects current char to be '/' and next char to be '*'.
#[inline(never)]
pub(super) fn skip_block_comment(&mut self) -> LexResult<()> {
let start = self.cur_pos();

View File

@ -0,0 +1,100 @@
/// Returns true if it's done
pub(super) type ByteHandler = Option<for<'aa> fn(&mut SkipWhitespace<'aa>) -> usize>;
/// Lookup table for whitespace
static BYTE_HANDLERS: [ByteHandler; 256] = [
// 0 1 2 3 4 5 6 7 8 9 A B C D E F //
___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, NLN, SPC, SPC, NLN, ___, ___, // 0
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1
SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 2
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 3
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 4
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 5
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 6
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 7
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 8
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 9
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // A
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // B
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // F
];
/// Stop
const ___: ByteHandler = None;
/// Newline
const NLN: ByteHandler = Some(|skip| {
skip.newline = true;
1
});
/// Space
const SPC: ByteHandler = Some(|_| 1);
/// Unicode
const UNI: ByteHandler = Some(|skip| {
let s = unsafe {
// Safety: `skip.offset` is always valid
skip.input.get_unchecked(skip.offset..)
};
let c = unsafe {
// Safety: Byte handlers are called only when `skip.input` is not empty
s.chars().next().unwrap_unchecked()
};
match c {
// white spaces
'\u{feff}' => {}
// line breaks
'\u{2028}' | '\u{2029}' => {
skip.newline = true;
}
_ if c.is_whitespace() => {}
_ => return 0,
}
c.len_utf8()
});
/// API is taked from oxc by Boshen (https://github.com/Boshen/oxc/pull/26)
pub(super) struct SkipWhitespace<'a> {
pub input: &'a str,
/// Total offset
pub offset: usize,
/// Found newline
pub newline: bool,
}
impl SkipWhitespace<'_> {
#[inline(always)]
pub fn scan(&mut self) {
let mut byte;
loop {
byte = match self.input.as_bytes().get(self.offset).copied() {
Some(v) => v,
None => return,
};
let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
if let Some(handler) = handler {
let delta = handler(self);
if delta == 0 {
return;
}
self.offset += delta;
} else {
return;
}
}
}
}