perf(es/plugin): Make analyze_source_file lazy, again (#9732)
Some checks are pending
CI / Cargo fmt (push) Waiting to run
CI / Cargo clippy (push) Waiting to run
CI / Check license of dependencies (push) Waiting to run
CI / Check (macos-latest) (push) Waiting to run
CI / Check (ubuntu-latest) (push) Waiting to run
CI / Check (windows-latest) (push) Waiting to run
CI / Test wasm (binding_core_wasm) (push) Waiting to run
CI / Test wasm (binding_minifier_wasm) (push) Waiting to run
CI / Test wasm (binding_typescript_wasm) (push) Waiting to run
CI / List crates (push) Waiting to run
CI / Test - ${{ matrix.settings.crate }} - ${{ matrix.settings.os }} (push) Blocked by required conditions
CI / Test node bindings - ${{ matrix.os }} (macos-latest) (push) Waiting to run
CI / Test node bindings - ${{ matrix.os }} (windows-latest) (push) Waiting to run
CI / Test with @swc/cli (push) Waiting to run
CI / Miri (better_scoped_tls) (push) Waiting to run
CI / Miri (string_enum) (push) Waiting to run
CI / Miri (swc) (push) Waiting to run
CI / Miri (swc_bundler) (push) Waiting to run
CI / Done (push) Blocked by required conditions
Benchmark / Bench everything (push) Waiting to run
Publish crates (auto) / Publish cargo crates (push) Waiting to run

**Description:**

This reverts commit 772f023fd2.

**Related issue:**

 - Reverts https://github.com/swc-project/swc/pull/9717
This commit is contained in:
Donny/강동윤 2024-11-11 21:22:46 +09:00 committed by GitHub
parent 9b74ccd92c
commit c1d12d6e2a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 173 additions and 40 deletions

View File

@ -0,0 +1,5 @@
---
swc_common: major
---
perf(es/plugin): Make `analyze_source_file` lazy, again

View File

@ -0,0 +1,116 @@
use std::ops::Deref;
use once_cell::sync::OnceCell;
/// Wrapper for [OnceCell] with support for [rkyv].
#[derive(Clone, Debug)]
pub struct CacheCell<T>(OnceCell<T>);
impl<T> Deref for CacheCell<T> {
type Target = OnceCell<T>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<T> CacheCell<T> {
pub fn new() -> Self {
Self(OnceCell::new())
}
}
impl<T> From<T> for CacheCell<T> {
fn from(value: T) -> Self {
Self(OnceCell::from(value))
}
}
impl<T> Default for CacheCell<T> {
fn default() -> Self {
Self::new()
}
}
#[cfg(feature = "rkyv-impl")]
mod rkyv_impl {
use std::hint::unreachable_unchecked;
use rancor::Fallible;
use rkyv::{
munge::munge, option::ArchivedOption, traits::NoUndef, Archive, Deserialize, Place,
Serialize,
};
use super::*;
#[allow(dead_code)]
#[repr(u8)]
enum ArchivedOptionTag {
None,
Some,
}
// SAFETY: `ArchivedOptionTag` is `repr(u8)` and so always consists of a single
// well-defined byte.
unsafe impl NoUndef for ArchivedOptionTag {}
#[repr(C)]
struct ArchivedOptionVariantNone(ArchivedOptionTag);
#[repr(C)]
struct ArchivedOptionVariantSome<T>(ArchivedOptionTag, T);
impl<T: Archive> Archive for CacheCell<T> {
type Archived = ArchivedOption<T::Archived>;
type Resolver = Option<T::Resolver>;
fn resolve(&self, resolver: Self::Resolver, out: Place<Self::Archived>) {
match resolver {
None => {
let out = unsafe { out.cast_unchecked::<ArchivedOptionVariantNone>() };
munge!(let ArchivedOptionVariantNone(tag) = out);
tag.write(ArchivedOptionTag::None);
}
Some(resolver) => {
let out =
unsafe { out.cast_unchecked::<ArchivedOptionVariantSome<T::Archived>>() };
munge!(let ArchivedOptionVariantSome(tag, out_value) = out);
tag.write(ArchivedOptionTag::Some);
let value = if let Some(value) = self.get() {
value
} else {
unsafe {
unreachable_unchecked();
}
};
value.resolve(resolver, out_value);
}
}
}
}
impl<T: Serialize<S>, S: Fallible + ?Sized> Serialize<S> for CacheCell<T> {
fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
self.get()
.map(|value| value.serialize(serializer))
.transpose()
}
}
impl<T, D> Deserialize<CacheCell<T>, D> for ArchivedOption<T::Archived>
where
T: Archive,
T::Archived: Deserialize<T, D>,
D: Fallible + ?Sized,
{
fn deserialize(&self, deserializer: &mut D) -> Result<CacheCell<T>, D::Error> {
Ok(match self {
ArchivedOption::Some(value) => CacheCell::from(value.deserialize(deserializer)?),
ArchivedOption::None => CacheCell::new(),
})
}
}
}

View File

@ -51,14 +51,13 @@ pub use self::{
source_map::{FileLines, FileLoader, FilePathMapping, SourceMap, SpanSnippetError},
syntax_pos::LineCol,
};
#[doc(hidden)]
pub mod private;
/// A trait for ast nodes.
pub trait AstNode: Debug + PartialEq + Clone + Spanned {
const TYPE: &'static str;
}
pub mod cache;
pub mod collections;
pub mod comments;
mod eq;
@ -68,6 +67,8 @@ pub mod iter;
pub mod pass;
pub mod plugin;
mod pos;
#[doc(hidden)]
pub mod private;
mod rustc_data_structures;
pub mod serializer;
pub mod source_map;

View File

@ -316,10 +316,11 @@ impl SourceMap {
let line_info = self.lookup_line_with(fm, pos);
match line_info {
Ok(SourceFileAndLine { sf: f, line: a }) => {
let analysis = f.analyze();
let chpos = self.bytepos_to_file_charpos_with(&f, pos);
let line = a + 1; // Line numbers start at 1
let linebpos = f.lines[a];
let linebpos = f.analyze().lines[a];
assert!(
pos >= linebpos,
"{}: bpos = {:?}; linebpos = {:?};",
@ -332,16 +333,17 @@ impl SourceMap {
let col = chpos - linechpos;
let col_display = {
let start_width_idx = f
let start_width_idx = analysis
.non_narrow_chars
.binary_search_by_key(&linebpos, |x| x.pos())
.unwrap_or_else(|x| x);
let end_width_idx = f
let end_width_idx = analysis
.non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let special_chars = end_width_idx - start_width_idx;
let non_narrow: usize = f.non_narrow_chars[start_width_idx..end_width_idx]
let non_narrow: usize = analysis.non_narrow_chars
[start_width_idx..end_width_idx]
.iter()
.map(|x| x.width())
.sum();
@ -367,14 +369,15 @@ impl SourceMap {
})
}
Err(f) => {
let analysis = f.analyze();
let chpos = self.bytepos_to_file_charpos(pos)?;
let col_display = {
let end_width_idx = f
let end_width_idx = analysis
.non_narrow_chars
.binary_search_by_key(&pos, |x| x.pos())
.unwrap_or_else(|x| x);
let non_narrow: usize = f.non_narrow_chars[0..end_width_idx]
let non_narrow: usize = analysis.non_narrow_chars[0..end_width_idx]
.iter()
.map(|x| x.width())
.sum();
@ -1028,11 +1031,11 @@ impl SourceMap {
) -> u32 {
let mut total_extra_bytes = state.total_extra_bytes;
let mut index = state.mbc_index;
let analysis = file.analyze();
if bpos >= state.pos {
let range = index..file.multibyte_chars.len();
let range = index..analysis.multibyte_chars.len();
for i in range {
let mbc = &file.multibyte_chars[i];
let mbc = &analysis.multibyte_chars[i];
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos >= bpos {
break;
@ -1052,7 +1055,7 @@ impl SourceMap {
} else {
let range = 0..index;
for i in range.rev() {
let mbc = &file.multibyte_chars[i];
let mbc = &analysis.multibyte_chars[i];
debug!("{}-byte char at {:?}", mbc.bytes, mbc.pos);
if mbc.pos < bpos {
break;
@ -1322,7 +1325,8 @@ impl SourceMap {
None => continue,
};
let linebpos = f.lines[line as usize];
let analysis = f.analyze();
let linebpos = analysis.lines[line as usize];
debug_assert!(
pos >= linebpos,
"{}: bpos = {:?}; linebpos = {:?};",

View File

@ -16,7 +16,7 @@ use url::Url;
use self::hygiene::MarkData;
pub use self::hygiene::{Mark, SyntaxContext};
use crate::{rustc_data_structures::stable_hasher::StableHasher, sync::Lrc};
use crate::{cache::CacheCell, rustc_data_structures::stable_hasher::StableHasher, sync::Lrc};
mod analyze_source_file;
pub mod hygiene;
@ -825,14 +825,10 @@ pub struct SourceFile {
pub start_pos: BytePos,
/// The end position of this source in the `SourceMap`
pub end_pos: BytePos,
/// Locations of lines beginnings in the source code
pub lines: Vec<BytePos>,
/// Locations of multi-byte characters in the source code
pub multibyte_chars: Vec<MultiByteChar>,
/// Width of characters that are not narrow in the source code
pub non_narrow_chars: Vec<NonNarrowChar>,
/// A hash of the filename, used for speeding up the incr. comp. hashing.
pub name_hash: u128,
lazy: CacheCell<SourceFileAnalysis>,
}
#[cfg_attr(
@ -849,8 +845,6 @@ pub struct SourceFileAnalysis {
pub multibyte_chars: Vec<MultiByteChar>,
/// Width of characters that are not narrow in the source code
pub non_narrow_chars: Vec<NonNarrowChar>,
/// A hash of the filename, used for speeding up the incr. comp. hashing.
pub name_hash: u128,
}
impl fmt::Debug for SourceFile {
@ -904,9 +898,6 @@ impl SourceFile {
};
let end_pos = start_pos.to_usize() + src.len();
let (lines, multibyte_chars, non_narrow_chars) =
analyze_source_file::analyze_source_file(&src[..], start_pos);
SourceFile {
name,
name_was_remapped,
@ -916,17 +907,16 @@ impl SourceFile {
src_hash,
start_pos,
end_pos: SmallPos::from_usize(end_pos),
lines,
multibyte_chars,
non_narrow_chars,
name_hash,
lazy: CacheCell::new(),
}
}
/// Return the BytePos of the beginning of the current line.
pub fn line_begin_pos(&self, pos: BytePos) -> BytePos {
let line_index = self.lookup_line(pos).unwrap();
self.lines[line_index]
let analysis = self.analyze();
analysis.lines[line_index]
}
/// Get a line from the list of pre-computed line-beginnings.
@ -944,7 +934,8 @@ impl SourceFile {
}
let begin = {
let line = self.lines.get(line_number)?;
let analysis = self.analyze();
let line = analysis.lines.get(line_number)?;
let begin: BytePos = *line - self.start_pos;
begin.to_usize()
};
@ -961,7 +952,8 @@ impl SourceFile {
}
pub fn count_lines(&self) -> usize {
self.lines.len()
let analysis = self.analyze();
analysis.lines.len()
}
/// Find the line containing the given position. The return value is the
@ -969,12 +961,13 @@ impl SourceFile {
/// number. If the `source_file` is empty or the position is located before
/// the first line, `None` is returned.
pub fn lookup_line(&self, pos: BytePos) -> Option<usize> {
if self.lines.is_empty() {
let analysis = self.analyze();
if analysis.lines.is_empty() {
return None;
}
let line_index = lookup_line(&self.lines[..], pos);
assert!(line_index < self.lines.len() as isize);
let line_index = lookup_line(&analysis.lines, pos);
assert!(line_index < analysis.lines.len() as isize);
if line_index >= 0 {
Some(line_index as usize)
} else {
@ -987,11 +980,13 @@ impl SourceFile {
return (self.start_pos, self.end_pos);
}
assert!(line_index < self.lines.len());
if line_index == (self.lines.len() - 1) {
(self.lines[line_index], self.end_pos)
let analysis = self.analyze();
assert!(line_index < analysis.lines.len());
if line_index == (analysis.lines.len() - 1) {
(analysis.lines[line_index], self.end_pos)
} else {
(self.lines[line_index], self.lines[line_index + 1])
(analysis.lines[line_index], analysis.lines[line_index + 1])
}
}
@ -999,6 +994,18 @@ impl SourceFile {
pub fn contains(&self, byte_pos: BytePos) -> bool {
byte_pos >= self.start_pos && byte_pos <= self.end_pos
}
pub fn analyze(&self) -> &SourceFileAnalysis {
self.lazy.get_or_init(|| {
let (lines, multibyte_chars, non_narrow_chars) =
analyze_source_file::analyze_source_file(&self.src[..], self.start_pos);
SourceFileAnalysis {
lines,
multibyte_chars,
non_narrow_chars,
}
})
}
}
/// Remove utf-8 BOM if any.

View File

@ -131,7 +131,7 @@ impl SourceCode for MietteSourceCode<'_> {
}
let loc = self.0.lookup_char_pos(span.lo());
let line_count = loc.file.lines.len();
let line_count = loc.file.analyze().lines.len();
let name = if self.1.skip_filename {
None

View File

@ -12,7 +12,7 @@ pub struct Context {
impl Context {
fn locate_line_col(&self, loc: LineCol) -> BytePos {
if let Some(&line_start) = self.fm.lines.get(loc.line) {
if let Some(&line_start) = self.fm.analyze().lines.get(loc.line) {
line_start + BytePos(loc.column as _)
} else {
BytePos(0)