diff --git a/crates/swc_ecma_parser/scripts/instrument/bench.sh b/crates/swc_ecma_parser/scripts/instrument/bench.sh index 1f8c08c4977..e0b4d34c490 100755 --- a/crates/swc_ecma_parser/scripts/instrument/bench.sh +++ b/crates/swc_ecma_parser/scripts/instrument/bench.sh @@ -4,4 +4,4 @@ set -eu export RUST_LOG=off export MIMALLOC_SHOW_STATS=1 -cargo profile instruments --release -t time --features tracing/release_max_level_info --features swc_common/concurrent --features swc_common/parking_lot --bench parser -- --bench --color +cargo profile instruments --release -t time --features tracing/release_max_level_info --features swc_common/concurrent --features swc_common/parking_lot --bench parser -- --bench --color $@ diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index 05b855f18a2..06e8cefa1a7 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -34,6 +34,7 @@ mod table; #[cfg(test)] mod tests; pub mod util; +mod whitespace; pub(crate) type LexResult = Result; diff --git a/crates/swc_ecma_parser/src/lexer/util.rs b/crates/swc_ecma_parser/src/lexer/util.rs index 7365d4b7243..d31fddc594b 100644 --- a/crates/swc_ecma_parser/src/lexer/util.rs +++ b/crates/swc_ecma_parser/src/lexer/util.rs @@ -12,7 +12,10 @@ use swc_common::{ use swc_ecma_ast::Ident; use tracing::warn; -use super::{comments_buffer::BufferedComment, input::Input, Char, LexResult, Lexer}; +use super::{ + comments_buffer::BufferedComment, input::Input, whitespace::SkipWhitespace, Char, LexResult, + Lexer, +}; use crate::{ error::{Error, SyntaxError}, lexer::comments_buffer::BufferedCommentKind, @@ -184,18 +187,20 @@ impl<'a> Lexer<'a> { /// See https://tc39.github.io/ecma262/#sec-white-space pub(super) fn skip_space(&mut self) -> LexResult<()> { loop { - let cur_b = self.input.cur_as_ascii(); + let (offset, newline) = { + let mut skip = SkipWhitespace { + input: self.input.as_str(), + newline: false, + offset: 0, + }; - if matches!(cur_b, Some(b'\n' | b'\r')) { - self.input.bump(); - self.state.had_line_break = true; - continue; - } + skip.scan(); - if matches!(cur_b, Some(b'\x09' | b'\x0b' | b'\x0c' | b'\x20' | b'\xa0')) { - self.input.bump(); - continue; - } + (skip.offset, skip.newline) + }; + + self.input.bump_bytes(offset); + self.state.had_line_break |= newline; if LEX_COMMENTS && self.input.is_byte(b'/') { if self.peek() == Some('/') { @@ -205,34 +210,15 @@ impl<'a> Lexer<'a> { self.skip_block_comment()?; continue; } - break; } - let c = self.cur(); - let c = match c { - Some(v) => v, - None => break, - }; - - match c { - // white spaces - '\u{feff}' => {} - // line breaks - '\u{2028}' | '\u{2029}' => { - self.state.had_line_break = true; - } - - _ if c.is_whitespace() => {} - - _ => break, - } - - self.bump(); + break; } Ok(()) } + #[inline(never)] pub(super) fn skip_line_comment(&mut self, start_skip: usize) { let start = self.cur_pos(); self.input.bump_bytes(start_skip); @@ -282,6 +268,7 @@ impl<'a> Lexer<'a> { } /// Expects current char to be '/' and next char to be '*'. + #[inline(never)] pub(super) fn skip_block_comment(&mut self) -> LexResult<()> { let start = self.cur_pos(); diff --git a/crates/swc_ecma_parser/src/lexer/whitespace.rs b/crates/swc_ecma_parser/src/lexer/whitespace.rs new file mode 100644 index 00000000000..a38cffa05a0 --- /dev/null +++ b/crates/swc_ecma_parser/src/lexer/whitespace.rs @@ -0,0 +1,100 @@ +/// Returns true if it's done +pub(super) type ByteHandler = Option fn(&mut SkipWhitespace<'aa>) -> usize>; + +/// Lookup table for whitespace +static BYTE_HANDLERS: [ByteHandler; 256] = [ + // 0 1 2 3 4 5 6 7 8 9 A B C D E F // + ___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, NLN, SPC, SPC, NLN, ___, ___, // 0 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 1 + SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 2 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 3 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 4 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 5 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 6 + ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, // 7 + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 8 + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // 9 + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // A + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // B + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // C + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // D + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // E + UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, // F +]; + +/// Stop +const ___: ByteHandler = None; + +/// Newline +const NLN: ByteHandler = Some(|skip| { + skip.newline = true; + + 1 +}); + +/// Space +const SPC: ByteHandler = Some(|_| 1); + +/// Unicode +const UNI: ByteHandler = Some(|skip| { + let s = unsafe { + // Safety: `skip.offset` is always valid + skip.input.get_unchecked(skip.offset..) + }; + + let c = unsafe { + // Safety: Byte handlers are called only when `skip.input` is not empty + s.chars().next().unwrap_unchecked() + }; + + match c { + // white spaces + '\u{feff}' => {} + // line breaks + '\u{2028}' | '\u{2029}' => { + skip.newline = true; + } + + _ if c.is_whitespace() => {} + + _ => return 0, + } + + c.len_utf8() +}); + +/// API is taked from oxc by Boshen (https://github.com/Boshen/oxc/pull/26) +pub(super) struct SkipWhitespace<'a> { + pub input: &'a str, + + /// Total offset + pub offset: usize, + + /// Found newline + pub newline: bool, +} + +impl SkipWhitespace<'_> { + #[inline(always)] + pub fn scan(&mut self) { + let mut byte; + loop { + byte = match self.input.as_bytes().get(self.offset).copied() { + Some(v) => v, + None => return, + }; + + let handler = unsafe { *(&BYTE_HANDLERS as *const ByteHandler).offset(byte as isize) }; + + if let Some(handler) = handler { + let delta = handler(self); + if delta == 0 { + return; + } + self.offset += delta; + } else { + return; + } + } + } +}