perf(es/lexer): Make lexer faster (#7052)

This commit is contained in:
Donny/강동윤 2023-03-11 12:43:06 +09:00 committed by GitHub
parent d868434845
commit d1d0607158
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 44 additions and 32 deletions

View File

@ -77,20 +77,6 @@ impl<'a> Input for StringInput<'a> {
self.iter.clone().nth(2).map(|i| i.1)
}
#[inline]
fn is_str(&self, s: &str) -> bool {
let mut s_iter = s.as_bytes().iter();
let mut p_iter = self.iter.clone().map(|i| i.1);
while let (Some(expected), Some(actual)) = (s_iter.next(), p_iter.next()) {
if *expected as char != actual {
return false;
}
}
s_iter.next().is_none()
}
#[inline]
fn bump(&mut self) {
if let Some((i, c)) = self.iter.next() {
@ -102,6 +88,16 @@ impl<'a> Input for StringInput<'a> {
}
}
#[inline]
fn cur_as_ascii(&mut self) -> Option<u8> {
let first_byte = *self.as_str().as_bytes().first()?;
if first_byte <= 0x7f {
Some(first_byte)
} else {
None
}
}
#[inline]
fn is_at_start(&self) -> bool {
self.orig_start == self.last_pos
@ -120,15 +116,17 @@ impl<'a> Input for StringInput<'a> {
#[inline]
fn slice(&mut self, start: BytePos, end: BytePos) -> &str {
assert!(start <= end, "Cannot slice {:?}..{:?}", start, end);
debug_assert!(start <= end, "Cannot slice {:?}..{:?}", start, end);
let s = self.orig;
let start_idx = (start - self.orig_start).0 as usize;
let end_idx = (end - self.orig_start).0 as usize;
let ret = &s[start_idx..end_idx];
debug_assert!(end_idx <= s.len());
self.iter = s[end_idx..].char_indices();
let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
self.iter = unsafe { s.get_unchecked(end_idx..) }.char_indices();
self.last_pos = end;
self.start_pos_of_iter = end;
@ -149,11 +147,12 @@ impl<'a> Input for StringInput<'a> {
break;
}
}
let ret = &s[..last];
debug_assert!(last <= s.len());
let ret = unsafe { s.get_unchecked(..last) };
self.last_pos = self.last_pos + BytePos(last as _);
self.start_pos_of_iter = self.last_pos;
self.iter = s[last..].char_indices();
self.iter = unsafe { s.get_unchecked(last..) }.char_indices();
ret
}
@ -175,9 +174,11 @@ impl<'a> Input for StringInput<'a> {
return None;
}
debug_assert!(last <= s.len());
self.last_pos = self.last_pos + BytePos(last as _);
self.start_pos_of_iter = self.last_pos;
self.iter = s[last..].char_indices();
self.iter = unsafe { s.get_unchecked(last..) }.char_indices();
Some(self.last_pos)
}
@ -187,7 +188,8 @@ impl<'a> Input for StringInput<'a> {
let orig = self.orig;
let idx = (to - self.orig_start).0 as usize;
let s = &orig[idx..];
debug_assert!(idx <= orig.len());
let s = unsafe { orig.get_unchecked(idx..) };
self.iter = s.char_indices();
self.start_pos_of_iter = to;
self.last_pos = to;
@ -203,6 +205,11 @@ impl<'a> Input for StringInput<'a> {
}
}
#[inline]
fn is_str(&self, s: &str) -> bool {
self.as_str().starts_with(s)
}
#[inline]
fn eat_byte(&mut self, c: u8) -> bool {
if self.is_byte(c) {

View File

@ -26,7 +26,7 @@ impl<'a> Lexer<'a> {
self.emit_error_span(span, SyntaxError::TS1185);
self.skip_line_comment(6);
self.skip_space(true)?;
self.skip_space::<true>()?;
return self.read_token();
}
'<' | '{' => {

View File

@ -243,7 +243,7 @@ impl<'a> Lexer<'a> {
SyntaxError::LegacyCommentInModule,
);
self.skip_line_comment(0);
self.skip_space(true)?;
self.skip_space::<true>()?;
return self.read_token();
}
@ -282,7 +282,7 @@ impl<'a> Lexer<'a> {
SyntaxError::TS1185,
);
self.skip_line_comment(4);
self.skip_space(true)?;
self.skip_space::<true>()?;
return self.read_token();
}
@ -303,6 +303,11 @@ impl<'a> Lexer<'a> {
AssignOp(Assign)
}));
}
b'a'..=b'z' | b'A'..=b'Z' | b'$' | b'_' | b'\\' => {
// Fast path for ascii identifiers.
return self.read_ident_or_keyword().map(Some);
}
_ => {}
}
}
@ -504,7 +509,7 @@ impl<'a> Lexer<'a> {
let span = fixed_len_span(start, 7);
self.emit_error_span(span, SyntaxError::TS1185);
self.skip_line_comment(5);
self.skip_space(true)?;
self.skip_space::<true>()?;
return self.error_span(span, SyntaxError::TS1185);
}
@ -719,7 +724,7 @@ impl<'a> Lexer<'a> {
// XML style comment. `<!--`
if c == '<' && self.is(b'!') && self.peek() == Some('-') && self.peek_ahead() == Some('-') {
self.skip_line_comment(3);
self.skip_space(true)?;
self.skip_space::<true>()?;
self.emit_module_mode_error(start, SyntaxError::LegacyCommentInModule);
return self.read_token();
@ -767,7 +772,7 @@ impl<'a> Lexer<'a> {
{
self.emit_error_span(fixed_len_span(start, 7), SyntaxError::TS1185);
self.skip_line_comment(5);
self.skip_space(true)?;
self.skip_space::<true>()?;
return self.read_token();
}

View File

@ -214,7 +214,7 @@ impl<'a> Iterator for Lexer<'a> {
// skip spaces before getting next character, if we are allowed to.
if self.state.can_skip_space() {
self.skip_space(true)?;
self.skip_space::<true>()?;
start = self.input.cur_pos();
};
@ -309,7 +309,7 @@ impl<'a> Iterator for Lexer<'a> {
self.emit_error_span(span, SyntaxError::TS1185);
self.skip_line_comment(6);
self.skip_space(true)?;
self.skip_space::<true>()?;
return self.read_token();
}

View File

@ -182,7 +182,7 @@ impl<'a> Lexer<'a> {
/// Skip comments or whitespaces.
///
/// See https://tc39.github.io/ecma262/#sec-white-space
pub(super) fn skip_space(&mut self, lex_comments: bool) -> LexResult<()> {
pub(super) fn skip_space<const LEX_COMMENTS: bool>(&mut self) -> LexResult<()> {
loop {
let cur_b = self.input.cur_as_ascii();
@ -197,7 +197,7 @@ impl<'a> Lexer<'a> {
continue;
}
if lex_comments && self.input.is_byte(b'/') {
if LEX_COMMENTS && self.input.is_byte(b'/') {
if self.peek() == Some('/') {
self.skip_line_comment(2);
continue;
@ -308,7 +308,7 @@ impl<'a> Lexer<'a> {
let end = self.cur_pos();
self.skip_space(false)?;
self.skip_space::<false>()?;
if self.input.is_byte(b';') {
is_for_next = false;