mirror of
https://github.com/swc-project/swc.git
synced 2024-12-25 06:36:08 +03:00
perf(es/lexer): Use jump table for skip_space
(#7073)
This commit is contained in:
parent
9c29666402
commit
f854d51343
@ -4,4 +4,4 @@ set -eu
|
||||
export RUST_LOG=off
|
||||
export MIMALLOC_SHOW_STATS=1
|
||||
|
||||
cargo profile instruments --release -t time --features tracing/release_max_level_info --features swc_common/concurrent --features swc_common/parking_lot --bench parser -- --bench --color
|
||||
cargo profile instruments --release -t time --features tracing/release_max_level_info --features swc_common/concurrent --features swc_common/parking_lot --bench parser -- --bench --color $@
|
||||
|
@ -34,6 +34,7 @@ mod table;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
pub mod util;
|
||||
mod whitespace;
|
||||
|
||||
pub(crate) type LexResult<T> = Result<T, Error>;
|
||||
|
||||
|
@ -12,7 +12,10 @@ use swc_common::{
|
||||
use swc_ecma_ast::Ident;
|
||||
use tracing::warn;
|
||||
|
||||
use super::{comments_buffer::BufferedComment, input::Input, Char, LexResult, Lexer};
|
||||
use super::{
|
||||
comments_buffer::BufferedComment, input::Input, whitespace::SkipWhitespace, Char, LexResult,
|
||||
Lexer,
|
||||
};
|
||||
use crate::{
|
||||
error::{Error, SyntaxError},
|
||||
lexer::comments_buffer::BufferedCommentKind,
|
||||
@ -184,18 +187,20 @@ impl<'a> Lexer<'a> {
|
||||
/// See https://tc39.github.io/ecma262/#sec-white-space
|
||||
pub(super) fn skip_space<const LEX_COMMENTS: bool>(&mut self) -> LexResult<()> {
|
||||
loop {
|
||||
let cur_b = self.input.cur_as_ascii();
|
||||
let (offset, newline) = {
|
||||
let mut skip = SkipWhitespace {
|
||||
input: self.input.as_str(),
|
||||
newline: false,
|
||||
offset: 0,
|
||||
};
|
||||
|
||||
if matches!(cur_b, Some(b'\n' | b'\r')) {
|
||||
self.input.bump();
|
||||
self.state.had_line_break = true;
|
||||
continue;
|
||||
}
|
||||
skip.scan();
|
||||
|
||||
if matches!(cur_b, Some(b'\x09' | b'\x0b' | b'\x0c' | b'\x20' | b'\xa0')) {
|
||||
self.input.bump();
|
||||
continue;
|
||||
}
|
||||
(skip.offset, skip.newline)
|
||||
};
|
||||
|
||||
self.input.bump_bytes(offset);
|
||||
self.state.had_line_break |= newline;
|
||||
|
||||
if LEX_COMMENTS && self.input.is_byte(b'/') {
|
||||
if self.peek() == Some('/') {
|
||||
@ -205,34 +210,15 @@ impl<'a> Lexer<'a> {
|
||||
self.skip_block_comment()?;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
let c = self.cur();
|
||||
let c = match c {
|
||||
Some(v) => v,
|
||||
None => break,
|
||||
};
|
||||
|
||||
match c {
|
||||
// white spaces
|
||||
'\u{feff}' => {}
|
||||
// line breaks
|
||||
'\u{2028}' | '\u{2029}' => {
|
||||
self.state.had_line_break = true;
|
||||
}
|
||||
|
||||
_ if c.is_whitespace() => {}
|
||||
|
||||
_ => break,
|
||||
}
|
||||
|
||||
self.bump();
|
||||
break;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
pub(super) fn skip_line_comment(&mut self, start_skip: usize) {
|
||||
let start = self.cur_pos();
|
||||
self.input.bump_bytes(start_skip);
|
||||
@ -282,6 +268,7 @@ impl<'a> Lexer<'a> {
|
||||
}
|
||||
|
||||
/// Expects current char to be '/' and next char to be '*'.
|
||||
#[inline(never)]
|
||||
pub(super) fn skip_block_comment(&mut self) -> LexResult<()> {
|
||||
let start = self.cur_pos();
|
||||
|
||||
|
100
crates/swc_ecma_parser/src/lexer/whitespace.rs
Normal file
100
crates/swc_ecma_parser/src/lexer/whitespace.rs
Normal file
@ -0,0 +1,100 @@
|
||||
/// Returns true if it's done
|
||||
pub(super) type ByteHandler = Option<for<'aa> fn(&mut SkipWhitespace<'aa>) -> usize>;
|
||||
|
||||
/// Lookup table for whitespace
|
||||
static BYTE_HANDLERS: [ByteHandler; 256] = [
|
||||
// 0 1 2 3 4 5 6 7 8 9 A B C D E F //
|
||||
___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, NLN, SPC, SPC, NLN, ___, ___, // 0
|
||||
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1
|
||||
SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 2
|
||||
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 3
|
||||
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 4
|
||||
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 5
|
||||
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 6
|
||||
___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 7
|
||||
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 8
|
||||
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 9
|
||||
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // A
|
||||
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // B
|
||||
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C
|
||||
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D
|
||||
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E
|
||||
UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // F
|
||||
];
|
||||
|
||||
/// Stop
|
||||
const ___: ByteHandler = None;
|
||||
|
||||
/// Newline
|
||||
const NLN: ByteHandler = Some(|skip| {
|
||||
skip.newline = true;
|
||||
|
||||
1
|
||||
});
|
||||
|
||||
/// Space
|
||||
const SPC: ByteHandler = Some(|_| 1);
|
||||
|
||||
/// Unicode
|
||||
const UNI: ByteHandler = Some(|skip| {
|
||||
let s = unsafe {
|
||||
// Safety: `skip.offset` is always valid
|
||||
skip.input.get_unchecked(skip.offset..)
|
||||
};
|
||||
|
||||
let c = unsafe {
|
||||
// Safety: Byte handlers are called only when `skip.input` is not empty
|
||||
s.chars().next().unwrap_unchecked()
|
||||
};
|
||||
|
||||
match c {
|
||||
// white spaces
|
||||
'\u{feff}' => {}
|
||||
// line breaks
|
||||
'\u{2028}' | '\u{2029}' => {
|
||||
skip.newline = true;
|
||||
}
|
||||
|
||||
_ if c.is_whitespace() => {}
|
||||
|
||||
_ => return 0,
|
||||
}
|
||||
|
||||
c.len_utf8()
|
||||
});
|
||||
|
||||
/// API is taked from oxc by Boshen (https://github.com/Boshen/oxc/pull/26)
|
||||
pub(super) struct SkipWhitespace<'a> {
|
||||
pub input: &'a str,
|
||||
|
||||
/// Total offset
|
||||
pub offset: usize,
|
||||
|
||||
/// Found newline
|
||||
pub newline: bool,
|
||||
}
|
||||
|
||||
impl SkipWhitespace<'_> {
|
||||
#[inline(always)]
|
||||
pub fn scan(&mut self) {
|
||||
let mut byte;
|
||||
loop {
|
||||
byte = match self.input.as_bytes().get(self.offset).copied() {
|
||||
Some(v) => v,
|
||||
None => return,
|
||||
};
|
||||
|
||||
let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) };
|
||||
|
||||
if let Some(handler) = handler {
|
||||
let delta = handler(self);
|
||||
if delta == 0 {
|
||||
return;
|
||||
}
|
||||
self.offset += delta;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user