From d1d0607158ab40463d1b123fed52cc526eba8385 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Sat, 11 Mar 2023 12:43:06 +0900 Subject: [PATCH] perf(es/lexer): Make lexer faster (#7052) --- crates/swc_common/src/input.rs | 49 +++++++++++++---------- crates/swc_ecma_parser/src/lexer/jsx.rs | 2 +- crates/swc_ecma_parser/src/lexer/mod.rs | 15 ++++--- crates/swc_ecma_parser/src/lexer/state.rs | 4 +- crates/swc_ecma_parser/src/lexer/util.rs | 6 +-- 5 files changed, 44 insertions(+), 32 deletions(-) diff --git a/crates/swc_common/src/input.rs b/crates/swc_common/src/input.rs index 47949744728..246a98b1d94 100644 --- a/crates/swc_common/src/input.rs +++ b/crates/swc_common/src/input.rs @@ -77,20 +77,6 @@ impl<'a> Input for StringInput<'a> { self.iter.clone().nth(2).map(|i| i.1) } - #[inline] - fn is_str(&self, s: &str) -> bool { - let mut s_iter = s.as_bytes().iter(); - let mut p_iter = self.iter.clone().map(|i| i.1); - - while let (Some(expected), Some(actual)) = (s_iter.next(), p_iter.next()) { - if *expected as char != actual { - return false; - } - } - - s_iter.next().is_none() - } - #[inline] fn bump(&mut self) { if let Some((i, c)) = self.iter.next() { @@ -102,6 +88,16 @@ impl<'a> Input for StringInput<'a> { } } + #[inline] + fn cur_as_ascii(&mut self) -> Option { + let first_byte = *self.as_str().as_bytes().first()?; + if first_byte <= 0x7f { + Some(first_byte) + } else { + None + } + } + #[inline] fn is_at_start(&self) -> bool { self.orig_start == self.last_pos @@ -120,15 +116,17 @@ impl<'a> Input for StringInput<'a> { #[inline] fn slice(&mut self, start: BytePos, end: BytePos) -> &str { - assert!(start <= end, "Cannot slice {:?}..{:?}", start, end); + debug_assert!(start <= end, "Cannot slice {:?}..{:?}", start, end); let s = self.orig; let start_idx = (start - self.orig_start).0 as usize; let end_idx = (end - self.orig_start).0 as usize; - let ret = &s[start_idx..end_idx]; + debug_assert!(end_idx <= s.len()); - self.iter = s[end_idx..].char_indices(); + let ret = unsafe { s.get_unchecked(start_idx..end_idx) }; + + self.iter = unsafe { s.get_unchecked(end_idx..) }.char_indices(); self.last_pos = end; self.start_pos_of_iter = end; @@ -149,11 +147,12 @@ impl<'a> Input for StringInput<'a> { break; } } - let ret = &s[..last]; + debug_assert!(last <= s.len()); + let ret = unsafe { s.get_unchecked(..last) }; self.last_pos = self.last_pos + BytePos(last as _); self.start_pos_of_iter = self.last_pos; - self.iter = s[last..].char_indices(); + self.iter = unsafe { s.get_unchecked(last..) }.char_indices(); ret } @@ -175,9 +174,11 @@ impl<'a> Input for StringInput<'a> { return None; } + debug_assert!(last <= s.len()); + self.last_pos = self.last_pos + BytePos(last as _); self.start_pos_of_iter = self.last_pos; - self.iter = s[last..].char_indices(); + self.iter = unsafe { s.get_unchecked(last..) }.char_indices(); Some(self.last_pos) } @@ -187,7 +188,8 @@ impl<'a> Input for StringInput<'a> { let orig = self.orig; let idx = (to - self.orig_start).0 as usize; - let s = &orig[idx..]; + debug_assert!(idx <= orig.len()); + let s = unsafe { orig.get_unchecked(idx..) }; self.iter = s.char_indices(); self.start_pos_of_iter = to; self.last_pos = to; @@ -203,6 +205,11 @@ impl<'a> Input for StringInput<'a> { } } + #[inline] + fn is_str(&self, s: &str) -> bool { + self.as_str().starts_with(s) + } + #[inline] fn eat_byte(&mut self, c: u8) -> bool { if self.is_byte(c) { diff --git a/crates/swc_ecma_parser/src/lexer/jsx.rs b/crates/swc_ecma_parser/src/lexer/jsx.rs index aab1ae833d0..ea64b0c188b 100644 --- a/crates/swc_ecma_parser/src/lexer/jsx.rs +++ b/crates/swc_ecma_parser/src/lexer/jsx.rs @@ -26,7 +26,7 @@ impl<'a> Lexer<'a> { self.emit_error_span(span, SyntaxError::TS1185); self.skip_line_comment(6); - self.skip_space(true)?; + self.skip_space::()?; return self.read_token(); } '<' | '{' => { diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index 4341e74dda8..11efbc5d313 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -243,7 +243,7 @@ impl<'a> Lexer<'a> { SyntaxError::LegacyCommentInModule, ); self.skip_line_comment(0); - self.skip_space(true)?; + self.skip_space::()?; return self.read_token(); } @@ -282,7 +282,7 @@ impl<'a> Lexer<'a> { SyntaxError::TS1185, ); self.skip_line_comment(4); - self.skip_space(true)?; + self.skip_space::()?; return self.read_token(); } @@ -303,6 +303,11 @@ impl<'a> Lexer<'a> { AssignOp(Assign) })); } + + b'a'..=b'z' | b'A'..=b'Z' | b'$' | b'_' | b'\\' => { + // Fast path for ascii identifiers. + return self.read_ident_or_keyword().map(Some); + } _ => {} } } @@ -504,7 +509,7 @@ impl<'a> Lexer<'a> { let span = fixed_len_span(start, 7); self.emit_error_span(span, SyntaxError::TS1185); self.skip_line_comment(5); - self.skip_space(true)?; + self.skip_space::()?; return self.error_span(span, SyntaxError::TS1185); } @@ -719,7 +724,7 @@ impl<'a> Lexer<'a> { // XML style comment. `