From c1d12d6e2a0ad1633652817ebea435eca404bbb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Donny/=EA=B0=95=EB=8F=99=EC=9C=A4?= Date: Mon, 11 Nov 2024 21:22:46 +0900 Subject: [PATCH] perf(es/plugin): Make `analyze_source_file` lazy, again (#9732) **Description:** This reverts commit 772f023fd2f8bbcb336b0561a81621f0f1163622. **Related issue:** - Reverts https://github.com/swc-project/swc/pull/9717 --- .changeset/calm-crabs-lie.md | 5 + crates/swc_common/src/cache.rs | 116 +++++++++++++++++++++ crates/swc_common/src/lib.rs | 5 +- crates/swc_common/src/source_map.rs | 26 +++-- crates/swc_common/src/syntax_pos.rs | 57 +++++----- crates/swc_error_reporters/src/lib.rs | 2 +- crates/swc_estree_compat/src/swcify/ctx.rs | 2 +- 7 files changed, 173 insertions(+), 40 deletions(-) create mode 100644 .changeset/calm-crabs-lie.md create mode 100644 crates/swc_common/src/cache.rs diff --git a/.changeset/calm-crabs-lie.md b/.changeset/calm-crabs-lie.md new file mode 100644 index 00000000000..084e5bde5dd --- /dev/null +++ b/.changeset/calm-crabs-lie.md @@ -0,0 +1,5 @@ +--- +swc_common: major +--- + +perf(es/plugin): Make `analyze_source_file` lazy, again diff --git a/crates/swc_common/src/cache.rs b/crates/swc_common/src/cache.rs new file mode 100644 index 00000000000..d943cb3479d --- /dev/null +++ b/crates/swc_common/src/cache.rs @@ -0,0 +1,116 @@ +use std::ops::Deref; + +use once_cell::sync::OnceCell; + +/// Wrapper for [OnceCell] with support for [rkyv]. +#[derive(Clone, Debug)] +pub struct CacheCell(OnceCell); + +impl Deref for CacheCell { + type Target = OnceCell; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl CacheCell { + pub fn new() -> Self { + Self(OnceCell::new()) + } +} + +impl From for CacheCell { + fn from(value: T) -> Self { + Self(OnceCell::from(value)) + } +} + +impl Default for CacheCell { + fn default() -> Self { + Self::new() + } +} + +#[cfg(feature = "rkyv-impl")] +mod rkyv_impl { + use std::hint::unreachable_unchecked; + + use rancor::Fallible; + use rkyv::{ + munge::munge, option::ArchivedOption, traits::NoUndef, Archive, Deserialize, Place, + Serialize, + }; + + use super::*; + + #[allow(dead_code)] + #[repr(u8)] + enum ArchivedOptionTag { + None, + Some, + } + + // SAFETY: `ArchivedOptionTag` is `repr(u8)` and so always consists of a single + // well-defined byte. + unsafe impl NoUndef for ArchivedOptionTag {} + + #[repr(C)] + struct ArchivedOptionVariantNone(ArchivedOptionTag); + + #[repr(C)] + struct ArchivedOptionVariantSome(ArchivedOptionTag, T); + + impl Archive for CacheCell { + type Archived = ArchivedOption; + type Resolver = Option; + + fn resolve(&self, resolver: Self::Resolver, out: Place) { + match resolver { + None => { + let out = unsafe { out.cast_unchecked::() }; + munge!(let ArchivedOptionVariantNone(tag) = out); + tag.write(ArchivedOptionTag::None); + } + Some(resolver) => { + let out = + unsafe { out.cast_unchecked::>() }; + munge!(let ArchivedOptionVariantSome(tag, out_value) = out); + tag.write(ArchivedOptionTag::Some); + + let value = if let Some(value) = self.get() { + value + } else { + unsafe { + unreachable_unchecked(); + } + }; + + value.resolve(resolver, out_value); + } + } + } + } + + impl, S: Fallible + ?Sized> Serialize for CacheCell { + fn serialize(&self, serializer: &mut S) -> Result { + self.get() + .map(|value| value.serialize(serializer)) + .transpose() + } + } + + impl Deserialize, D> for ArchivedOption + where + T: Archive, + T::Archived: Deserialize, + D: Fallible + ?Sized, + { + fn deserialize(&self, deserializer: &mut D) -> Result, D::Error> { + Ok(match self { + ArchivedOption::Some(value) => CacheCell::from(value.deserialize(deserializer)?), + ArchivedOption::None => CacheCell::new(), + }) + } + } +} diff --git a/crates/swc_common/src/lib.rs b/crates/swc_common/src/lib.rs index ba0b27e7a86..7f8e8e07fa9 100644 --- a/crates/swc_common/src/lib.rs +++ b/crates/swc_common/src/lib.rs @@ -51,14 +51,13 @@ pub use self::{ source_map::{FileLines, FileLoader, FilePathMapping, SourceMap, SpanSnippetError}, syntax_pos::LineCol, }; -#[doc(hidden)] -pub mod private; /// A trait for ast nodes. pub trait AstNode: Debug + PartialEq + Clone + Spanned { const TYPE: &'static str; } +pub mod cache; pub mod collections; pub mod comments; mod eq; @@ -68,6 +67,8 @@ pub mod iter; pub mod pass; pub mod plugin; mod pos; +#[doc(hidden)] +pub mod private; mod rustc_data_structures; pub mod serializer; pub mod source_map; diff --git a/crates/swc_common/src/source_map.rs b/crates/swc_common/src/source_map.rs index 12b38bf261c..ef89f797b84 100644 --- a/crates/swc_common/src/source_map.rs +++ b/crates/swc_common/src/source_map.rs @@ -316,10 +316,11 @@ impl SourceMap { let line_info = self.lookup_line_with(fm, pos); match line_info { Ok(SourceFileAndLine { sf: f, line: a }) => { + let analysis = f.analyze(); let chpos = self.bytepos_to_file_charpos_with(&f, pos); let line = a + 1; // Line numbers start at 1 - let linebpos = f.lines[a]; + let linebpos = f.analyze().lines[a]; assert!( pos >= linebpos, "{}: bpos = {:?}; linebpos = {:?};", @@ -332,16 +333,17 @@ impl SourceMap { let col = chpos - linechpos; let col_display = { - let start_width_idx = f + let start_width_idx = analysis .non_narrow_chars .binary_search_by_key(&linebpos, |x| x.pos()) .unwrap_or_else(|x| x); - let end_width_idx = f + let end_width_idx = analysis .non_narrow_chars .binary_search_by_key(&pos, |x| x.pos()) .unwrap_or_else(|x| x); let special_chars = end_width_idx - start_width_idx; - let non_narrow: usize = f.non_narrow_chars[start_width_idx..end_width_idx] + let non_narrow: usize = analysis.non_narrow_chars + [start_width_idx..end_width_idx] .iter() .map(|x| x.width()) .sum(); @@ -367,14 +369,15 @@ impl SourceMap { }) } Err(f) => { + let analysis = f.analyze(); let chpos = self.bytepos_to_file_charpos(pos)?; let col_display = { - let end_width_idx = f + let end_width_idx = analysis .non_narrow_chars .binary_search_by_key(&pos, |x| x.pos()) .unwrap_or_else(|x| x); - let non_narrow: usize = f.non_narrow_chars[0..end_width_idx] + let non_narrow: usize = analysis.non_narrow_chars[0..end_width_idx] .iter() .map(|x| x.width()) .sum(); @@ -1028,11 +1031,11 @@ impl SourceMap { ) -> u32 { let mut total_extra_bytes = state.total_extra_bytes; let mut index = state.mbc_index; - + let analysis = file.analyze(); if bpos >= state.pos { - let range = index..file.multibyte_chars.len(); + let range = index..analysis.multibyte_chars.len(); for i in range { - let mbc = &file.multibyte_chars[i]; + let mbc = &analysis.multibyte_chars[i]; debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos); if mbc.pos >= bpos { break; @@ -1052,7 +1055,7 @@ impl SourceMap { } else { let range = 0..index; for i in range.rev() { - let mbc = &file.multibyte_chars[i]; + let mbc = &analysis.multibyte_chars[i]; debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos); if mbc.pos < bpos { break; @@ -1322,7 +1325,8 @@ impl SourceMap { None => continue, }; - let linebpos = f.lines[line as usize]; + let analysis = f.analyze(); + let linebpos = analysis.lines[line as usize]; debug_assert!( pos >= linebpos, "{}: bpos = {:?}; linebpos = {:?};", diff --git a/crates/swc_common/src/syntax_pos.rs b/crates/swc_common/src/syntax_pos.rs index 54ec1f5f269..4039f5a15cc 100644 --- a/crates/swc_common/src/syntax_pos.rs +++ b/crates/swc_common/src/syntax_pos.rs @@ -16,7 +16,7 @@ use url::Url; use self::hygiene::MarkData; pub use self::hygiene::{Mark, SyntaxContext}; -use crate::{rustc_data_structures::stable_hasher::StableHasher, sync::Lrc}; +use crate::{cache::CacheCell, rustc_data_structures::stable_hasher::StableHasher, sync::Lrc}; mod analyze_source_file; pub mod hygiene; @@ -825,14 +825,10 @@ pub struct SourceFile { pub start_pos: BytePos, /// The end position of this source in the `SourceMap` pub end_pos: BytePos, - /// Locations of lines beginnings in the source code - pub lines: Vec, - /// Locations of multi-byte characters in the source code - pub multibyte_chars: Vec, - /// Width of characters that are not narrow in the source code - pub non_narrow_chars: Vec, /// A hash of the filename, used for speeding up the incr. comp. hashing. pub name_hash: u128, + + lazy: CacheCell, } #[cfg_attr( @@ -849,8 +845,6 @@ pub struct SourceFileAnalysis { pub multibyte_chars: Vec, /// Width of characters that are not narrow in the source code pub non_narrow_chars: Vec, - /// A hash of the filename, used for speeding up the incr. comp. hashing. - pub name_hash: u128, } impl fmt::Debug for SourceFile { @@ -904,9 +898,6 @@ impl SourceFile { }; let end_pos = start_pos.to_usize() + src.len(); - let (lines, multibyte_chars, non_narrow_chars) = - analyze_source_file::analyze_source_file(&src[..], start_pos); - SourceFile { name, name_was_remapped, @@ -916,17 +907,16 @@ impl SourceFile { src_hash, start_pos, end_pos: SmallPos::from_usize(end_pos), - lines, - multibyte_chars, - non_narrow_chars, name_hash, + lazy: CacheCell::new(), } } /// Return the BytePos of the beginning of the current line. pub fn line_begin_pos(&self, pos: BytePos) -> BytePos { let line_index = self.lookup_line(pos).unwrap(); - self.lines[line_index] + let analysis = self.analyze(); + analysis.lines[line_index] } /// Get a line from the list of pre-computed line-beginnings. @@ -944,7 +934,8 @@ impl SourceFile { } let begin = { - let line = self.lines.get(line_number)?; + let analysis = self.analyze(); + let line = analysis.lines.get(line_number)?; let begin: BytePos = *line - self.start_pos; begin.to_usize() }; @@ -961,7 +952,8 @@ impl SourceFile { } pub fn count_lines(&self) -> usize { - self.lines.len() + let analysis = self.analyze(); + analysis.lines.len() } /// Find the line containing the given position. The return value is the @@ -969,12 +961,13 @@ impl SourceFile { /// number. If the `source_file` is empty or the position is located before /// the first line, `None` is returned. pub fn lookup_line(&self, pos: BytePos) -> Option { - if self.lines.is_empty() { + let analysis = self.analyze(); + if analysis.lines.is_empty() { return None; } - let line_index = lookup_line(&self.lines[..], pos); - assert!(line_index < self.lines.len() as isize); + let line_index = lookup_line(&analysis.lines, pos); + assert!(line_index < analysis.lines.len() as isize); if line_index >= 0 { Some(line_index as usize) } else { @@ -987,11 +980,13 @@ impl SourceFile { return (self.start_pos, self.end_pos); } - assert!(line_index < self.lines.len()); - if line_index == (self.lines.len() - 1) { - (self.lines[line_index], self.end_pos) + let analysis = self.analyze(); + + assert!(line_index < analysis.lines.len()); + if line_index == (analysis.lines.len() - 1) { + (analysis.lines[line_index], self.end_pos) } else { - (self.lines[line_index], self.lines[line_index + 1]) + (analysis.lines[line_index], analysis.lines[line_index + 1]) } } @@ -999,6 +994,18 @@ impl SourceFile { pub fn contains(&self, byte_pos: BytePos) -> bool { byte_pos >= self.start_pos && byte_pos <= self.end_pos } + + pub fn analyze(&self) -> &SourceFileAnalysis { + self.lazy.get_or_init(|| { + let (lines, multibyte_chars, non_narrow_chars) = + analyze_source_file::analyze_source_file(&self.src[..], self.start_pos); + SourceFileAnalysis { + lines, + multibyte_chars, + non_narrow_chars, + } + }) + } } /// Remove utf-8 BOM if any. diff --git a/crates/swc_error_reporters/src/lib.rs b/crates/swc_error_reporters/src/lib.rs index 4297233473c..ff378249eb5 100644 --- a/crates/swc_error_reporters/src/lib.rs +++ b/crates/swc_error_reporters/src/lib.rs @@ -131,7 +131,7 @@ impl SourceCode for MietteSourceCode<'_> { } let loc = self.0.lookup_char_pos(span.lo()); - let line_count = loc.file.lines.len(); + let line_count = loc.file.analyze().lines.len(); let name = if self.1.skip_filename { None diff --git a/crates/swc_estree_compat/src/swcify/ctx.rs b/crates/swc_estree_compat/src/swcify/ctx.rs index b707b2c9806..af9c9b8f3da 100644 --- a/crates/swc_estree_compat/src/swcify/ctx.rs +++ b/crates/swc_estree_compat/src/swcify/ctx.rs @@ -12,7 +12,7 @@ pub struct Context { impl Context { fn locate_line_col(&self, loc: LineCol) -> BytePos { - if let Some(&line_start) = self.fm.lines.get(loc.line) { + if let Some(&line_start) = self.fm.analyze().lines.get(loc.line) { line_start + BytePos(loc.column as _) } else { BytePos(0)