perf(atoms): Replace string-cache with hstr (#8126)

**Description:**

`hstr` is an alternative for `string-cache` which does not support static strings and does not use a global mutex.
 
**Related issue:**

 - Closes #4946.
 - Closes #7974.
This commit is contained in:
Donny/강동윤 2023-11-07 09:51:43 +09:00 committed by GitHub
parent d1c6d6ea7b
commit aa22746d03
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 138 additions and 140 deletions

38
Cargo.lock generated
View File

@ -1037,12 +1037,12 @@ dependencies = [
[[package]]
name = "dashmap"
version = "5.4.0"
version = "5.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "907076dfda823b0b36d2a1bb5f90c96660a5bbcd7729e10727f07858f22c4edc"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [
"cfg-if",
"hashbrown 0.12.3",
"hashbrown 0.14.1",
"lock_api",
"once_cell",
"parking_lot_core",
@ -1634,6 +1634,20 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70"
[[package]]
name = "hstr"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5700a2810cdc52f9a306f40c88031fac8d480f3d51ef38b9ca9ff5b8d4f3814"
dependencies = [
"dashmap",
"new_debug_unreachable",
"once_cell",
"phf 0.11.2",
"rustc-hash",
"smallvec",
]
[[package]]
name = "http"
version = "0.2.9"
@ -3471,9 +3485,9 @@ checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec"
[[package]]
name = "smallvec"
version = "1.10.0"
version = "1.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a507befe795404456341dfab10cef66ead4c041f62b8b11bbb92bffe5d0953e0"
checksum = "942b4a808e05215192e39f4ab80813e599068285906cc91aa64f923db842bd5a"
[[package]]
name = "smartstring"
@ -3771,13 +3785,11 @@ name = "swc_atoms"
version = "0.6.0"
dependencies = [
"bytecheck",
"hstr",
"once_cell",
"rkyv",
"rustc-hash",
"serde",
"string_cache",
"string_cache_codegen",
"triomphe",
]
[[package]]
@ -5870,16 +5882,6 @@ dependencies = [
"tracing-log",
]
[[package]]
name = "triomphe"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1ee9bd9239c339d714d657fac840c6d2a4f9c45f4f9ec7b0975113458be78db"
dependencies = [
"serde",
"stable_deref_trait",
]
[[package]]
name = "try-lock"
version = "0.2.4"

View File

@ -1,6 +1,5 @@
[package]
authors = ["강동윤 <kdy1997.dev@gmail.com>"]
build = "build.rs"
description = "Atoms for the swc project."
documentation = "https://rustdoc.swc.rs/swc_atoms/"
edition = "2021"
@ -19,6 +18,7 @@ rkyv-impl = ["__rkyv", "rkyv", "bytecheck"]
[dependencies]
# bytecheck version should be in sync with rkyv version. Do not bump individually.
bytecheck = { version = "0.6.10", optional = true }
hstr = "0.2.3"
once_cell = "1"
rkyv = { package = "rkyv", version = "=0.7.42", optional = true, features = [
"strict",
@ -26,9 +26,3 @@ rkyv = { package = "rkyv", version = "=0.7.42", optional = true, features = [
] }
rustc-hash = "1.1.0"
serde = "1"
string_cache = "0.8.7"
triomphe = "0.1.8"
[build-dependencies]
string_cache_codegen = "0.5.2"

View File

@ -1,29 +0,0 @@
use std::{env, path::Path};
fn main() {
let strs = include_str!("words.txt")
.lines()
.map(|l| l.trim())
.collect::<Vec<_>>();
gen("internal_word", "InternalWord", &strs);
}
fn gen(mac_name: &str, type_name: &str, atoms: &[&str]) {
string_cache_codegen::AtomType::new(type_name, &format!("{}!", mac_name))
.atoms(atoms)
.with_atom_doc(
"
[JsWord] is an interned string.
This type should be used instead of [String] for values, because lots of
values are duplicated. For example, if an identifier is named `myVariable`,
there will be lots of identifier usages with the value `myVariable`.
This type
- makes equality comparison faster.
- reduces memory usage.
",
)
.write_to_file(&Path::new(&env::var("OUT_DIR").unwrap()).join(format!("{}.rs", mac_name)))
.unwrap();
}

View File

@ -26,7 +26,7 @@ pub use self::{atom as js_word, Atom as JsWord};
#[derive(Clone, Default)]
#[cfg_attr(feature = "rkyv-impl", derive(rkyv::bytecheck::CheckBytes))]
#[cfg_attr(feature = "rkyv-impl", repr(C))]
pub struct Atom(string_cache::Atom<InternalWordStaticSet>);
pub struct Atom(hstr::Atom);
/// Safety: We do not perform slicing of single [Atom] from multiple threads.
/// In other words, typically [Atom] is created in a single thread (and in the
@ -39,11 +39,11 @@ unsafe impl Sync for Atom {}
impl Atom {
/// Creates a new [Atom] from a string.
pub fn new<S>(s: S) -> Self
pub fn new<'i, S>(s: S) -> Self
where
S: AsRef<str>,
S: Into<Cow<'i, str>>,
{
Atom(s.as_ref().into())
Atom(hstr::Atom::from(s.into()))
}
#[inline]
@ -52,9 +52,6 @@ impl Atom {
}
}
/// API wrappers for [tendril].
impl Atom {}
impl Deref for Atom {
type Target = str;
@ -236,9 +233,17 @@ where
#[doc(hidden)]
pub type CahcedAtom = Lazy<Atom>;
include!(concat!(env!("OUT_DIR"), "/internal_word.rs"));
/// This should be used as a key for hash maps and hash sets.
///
/// This will be replaced with [Atom] in the future.
pub type StaticString = String;
pub type StaticString = Atom;
#[derive(Default)]
pub struct AtomStore(hstr::AtomStore);
impl AtomStore {
#[inline]
pub fn atom<'a>(&mut self, s: impl Into<Cow<'a, str>>) -> Atom {
Atom(self.0.atom(s))
}
}

View File

@ -351,12 +351,12 @@ impl Compressor {
value,
span,
..
})) => match &*value.to_ascii_lowercase() {
"transparent" => {
})) => match value.to_ascii_lowercase() {
ref s if *s == "transparent" => {
*color = make_color!(*span, 0.0_f64, 0.0_f64, 0.0_f64, 0.0_f64);
}
name => {
if let Some(value) = NAMED_COLORS.get(name) {
if let Some(value) = NAMED_COLORS.get(&name) {
*color = make_color!(
*span,
value.rgb[0] as f64,

View File

@ -5,7 +5,7 @@ use std::mem::take;
use once_cell::sync::Lazy;
use preset_env_base::{query::targets_to_versions, version::Version, BrowserData, Versions};
use swc_atoms::{JsWord, StaticString};
use swc_atoms::JsWord;
use swc_common::{collections::AHashMap, EqIgnoreSpan, DUMMY_SP};
use swc_css_ast::*;
use swc_css_utils::{
@ -16,9 +16,9 @@ use swc_css_visit::{VisitMut, VisitMutWith};
use crate::options::Options;
static PREFIXES_AND_BROWSERS: Lazy<AHashMap<StaticString, [BrowserData<Option<Version>>; 2]>> =
static PREFIXES_AND_BROWSERS: Lazy<AHashMap<String, [BrowserData<Option<Version>>; 2]>> =
Lazy::new(|| {
let map: AHashMap<StaticString, [BrowserData<Option<Version>>; 2]> =
let map: AHashMap<String, [BrowserData<Option<Version>>; 2]> =
serde_json::from_str(include_str!("../data/prefixes_and_browsers.json"))
.expect("failed to parse json");

View File

@ -1,5 +1,4 @@
use either::Either;
use swc_atoms::Atom;
use super::*;
use crate::token::Token;
@ -48,7 +47,7 @@ impl<'a> Lexer<'a> {
});
return Ok(Some(Token::JSXText {
raw: Atom::new(out),
raw: self.atoms.borrow_mut().atom(out),
}));
}
'>' => {
@ -323,9 +322,10 @@ impl<'a> Lexer<'a> {
raw.push(quote);
let mut b = self.atoms.borrow_mut();
Ok(Token::Str {
value: out.into(),
raw: Atom::new(raw),
value: b.atom(out),
raw: b.atom(raw),
})
}
@ -350,7 +350,9 @@ impl<'a> Lexer<'a> {
}
});
Ok(Token::JSXName { name: slice.into() })
Ok(Token::JSXName {
name: self.atoms.borrow_mut().atom(slice),
})
}
}

View File

@ -5,7 +5,7 @@ use std::{cell::RefCell, char, iter::FusedIterator, rc::Rc};
use either::Either::{Left, Right};
use smallvec::{smallvec, SmallVec};
use smartstring::SmartString;
use swc_atoms::Atom;
use swc_atoms::{Atom, AtomStore};
use swc_common::{comments::Comments, input::StringInput, BytePos, Span};
use swc_ecma_ast::{op, AssignOp, EsVersion};
@ -132,6 +132,8 @@ pub struct Lexer<'a> {
module_errors: Rc<RefCell<Vec<Error>>>,
buf: Rc<RefCell<String>>,
atoms: Rc<RefCell<AtomStore>>,
}
impl FusedIterator for Lexer<'_> {}
@ -157,6 +159,7 @@ impl<'a> Lexer<'a> {
errors: Default::default(),
module_errors: Default::default(),
buf: Rc::new(RefCell::new(String::with_capacity(256))),
atoms: Default::default(),
}
}
@ -764,8 +767,10 @@ impl<'a> Lexer<'a> {
fn read_ident_unknown(&mut self) -> LexResult<Token> {
debug_assert!(self.cur().is_some());
let (word, _) =
self.read_word_as_str_with(|s, _, _| Word::Ident(IdentLike::Other(s.into())))?;
let atoms = self.atoms.clone();
let (word, _) = self.read_word_as_str_with(|s, _, _| {
Word::Ident(IdentLike::Other(atoms.borrow_mut().atom(s)))
})?;
Ok(Word(word))
}
@ -778,6 +783,8 @@ impl<'a> Lexer<'a> {
) -> LexResult<Option<Token>> {
debug_assert!(self.cur().is_some());
let atoms = self.atoms.clone();
let start = self.cur_pos();
let (word, has_escape) = self.read_word_as_str_with(|s, _, can_be_known| {
if can_be_known {
@ -786,7 +793,7 @@ impl<'a> Lexer<'a> {
}
}
Word::Ident(IdentLike::Other(s.into()))
Word::Ident(IdentLike::Other(atoms.borrow_mut().atom(s)))
})?;
// Note: ctx is store in lexer because of this error.
@ -1000,6 +1007,7 @@ impl<'a> Lexer<'a> {
self.bump(); // '"'
let atoms = self.atoms.clone();
self.with_buf(|l, out| {
while let Some(c) = {
// Optimization
@ -1018,9 +1026,10 @@ impl<'a> Lexer<'a> {
l.bump();
let mut b = atoms.borrow_mut();
return Ok(Token::Str {
value: (&**out).into(),
raw: raw.into(),
value: b.atom(&*out),
raw: b.atom(raw),
});
}
'\\' => {
@ -1052,9 +1061,10 @@ impl<'a> Lexer<'a> {
l.emit_error(start, SyntaxError::UnterminatedStrLit);
let mut b = atoms.borrow_mut();
Ok(Token::Str {
value: (&**out).into(),
raw: raw.into(),
value: b.atom(&*out),
raw: b.atom(raw),
})
})
}
@ -1074,6 +1084,8 @@ impl<'a> Lexer<'a> {
let (mut escaped, mut in_class) = (false, false);
let atoms = self.atoms.clone();
let content = self.with_buf(|l, buf| {
while let Some(c) = l.cur() {
// This is ported from babel.
@ -1102,7 +1114,7 @@ impl<'a> Lexer<'a> {
buf.push(c);
}
Ok(Atom::new(&**buf))
Ok(atoms.borrow_mut().atom(&**buf))
})?;
// input is terminated without following `/`
@ -1122,9 +1134,9 @@ impl<'a> Lexer<'a> {
// let flags_start = self.cur_pos();
let flags = {
match self.cur() {
Some(c) if c.is_ident_start() => {
self.read_word_as_str_with(|s, _, _| s.into()).map(Some)
}
Some(c) if c.is_ident_start() => self
.read_word_as_str_with(|s, _, _| atoms.borrow_mut().atom(s))
.map(Some),
_ => Ok(None),
}
}?
@ -1145,7 +1157,7 @@ impl<'a> Lexer<'a> {
self.input.bump();
}
let s = self.input.uncons_while(|c| !c.is_line_terminator());
Ok(Some(Atom::new(s)))
Ok(Some(self.atoms.borrow_mut().atom(s)))
}
fn read_tmpl_token(&mut self, start_of_tpl: BytePos) -> LexResult<Token> {
@ -1170,7 +1182,7 @@ impl<'a> Lexer<'a> {
// TODO: Handle error
return Ok(Token::Template {
cooked: cooked.map(Atom::from),
raw: Atom::new(&*raw),
raw: self.atoms.borrow_mut().atom(&*raw),
});
}

View File

@ -62,7 +62,10 @@ impl<'a> Lexer<'a> {
if self.eat(b'n') {
raw.push('n');
return Ok(Either::Right((Box::new(s.into_value()), (&*raw).into())));
return Ok(Either::Right((
Box::new(s.into_value()),
self.atoms.borrow_mut().atom(&*raw),
)));
}
write!(raw_val, "{}", &s.value).unwrap();
@ -81,9 +84,9 @@ impl<'a> Lexer<'a> {
// e.g. `000` is octal
if start.0 != self.last_pos().0 - 1 {
// `-1` is utf 8 length of `0`
return self
.make_legacy_octal(start, 0f64)
.map(|value| Either::Left((value, (&*raw).into())));
return self.make_legacy_octal(start, 0f64).map(|value| {
Either::Left((value, self.atoms.borrow_mut().atom(&*raw)))
});
}
} else {
// strict mode hates non-zero decimals starting with zero.
@ -110,9 +113,9 @@ impl<'a> Lexer<'a> {
panic!("failed to parse {} into float using BigInt", val_str)
});
return self
.make_legacy_octal(start, val)
.map(|value| Either::Left((value, (&*raw).into())));
return self.make_legacy_octal(start, val).map(|value| {
Either::Left((value, self.atoms.borrow_mut().atom(&*raw)))
});
}
}
}
@ -224,7 +227,7 @@ impl<'a> Lexer<'a> {
self.ensure_not_ident()?;
Ok(Either::Left((val, (&*raw_str).into())))
Ok(Either::Left((val, self.atoms.borrow_mut().atom(&*raw_str))))
}
/// Returns `Left(value)` or `Right(BigInt)`
@ -238,6 +241,7 @@ impl<'a> Lexer<'a> {
);
debug_assert_eq!(self.cur(), Some('0'));
let atoms = self.atoms.clone();
self.with_buf(|l, buf| {
l.bump();
@ -263,12 +267,15 @@ impl<'a> Lexer<'a> {
if l.eat(b'n') {
buf.push('n');
return Ok(Either::Right((Box::new(s.into_value()), (&**buf).into())));
return Ok(Either::Right((
Box::new(s.into_value()),
atoms.borrow_mut().atom(&**buf),
)));
}
l.ensure_not_ident()?;
Ok(Either::Left((val, (&**buf).into())))
Ok(Either::Left((val, atoms.borrow_mut().atom(&**buf))))
})
}

View File

@ -257,7 +257,7 @@ impl<'a> Lexer<'a> {
let cmt = Comment {
kind: CommentKind::Line,
span: Span::new(start, end, SyntaxContext::empty()),
text: s.into(),
text: self.atoms.borrow_mut().atom(s),
};
if is_for_next {
@ -343,7 +343,7 @@ impl<'a> Lexer<'a> {
let cmt = Comment {
kind: CommentKind::Block,
span: Span::new(start, end, SyntaxContext::empty()),
text: s.into(),
text: self.atoms.borrow_mut().atom(s),
};
let _ = self.input.peek();

View File

@ -7,7 +7,7 @@ use std::{
};
use num_bigint::BigInt as BigIntValue;
use swc_atoms::{atom, Atom, JsWord};
use swc_atoms::{atom, Atom, AtomStore, JsWord};
use swc_common::{Span, Spanned};
use swc_ecma_ast::{AssignOp, BinaryOp};
@ -504,38 +504,25 @@ pub enum IdentLike {
Other(JsWord),
}
impl From<&'_ str> for IdentLike {
fn from(s: &str) -> Self {
s.parse::<KnownIdent>()
.map(Self::Known)
.unwrap_or_else(|_| Self::Other(s.into()))
}
}
impl IdentLike {
pub(crate) fn from_str(atoms: &mut AtomStore, s: &str) -> IdentLike {
s.parse::<KnownIdent>()
.map(Self::Known)
.unwrap_or_else(|_| Self::Other(atoms.atom(s)))
}
}
impl Word {
pub(crate) fn kind(&self) -> WordKind {
match self {
Word::Keyword(k) => WordKind::Keyword(*k),
Word::Null => WordKind::Null,
Word::True => WordKind::True,
Word::False => WordKind::False,
Word::Ident(IdentLike::Known(i)) => WordKind::Ident(IdentKind::Known(*i)),
Word::Ident(IdentLike::Other(..)) => WordKind::Ident(IdentKind::Other),
}
}
}
impl WordKind {
pub(crate) const fn before_expr(self) -> bool {
match self {
Self::Keyword(k) => k.before_expr(),
_ => false,
}
}
pub(crate) const fn starts_expr(self) -> bool {
match self {
Self::Keyword(k) => k.starts_expr(),
_ => true,
}
}
}
impl From<&'_ str> for Word {
fn from(i: &str) -> Self {
match i {
pub fn from_str(atoms: &mut AtomStore, s: &str) -> Self {
match s {
"null" => Word::Null,
"true" => Word::True,
"false" => Word::False,
@ -574,17 +561,35 @@ impl From<&'_ str> for Word {
"typeof" => TypeOf.into(),
"void" => Void.into(),
"delete" => Delete.into(),
_ => Word::Ident(i.into()),
_ => Word::Ident(IdentLike::from_str(atoms, s)),
}
}
pub(crate) fn kind(&self) -> WordKind {
match self {
Word::Keyword(k) => WordKind::Keyword(*k),
Word::Null => WordKind::Null,
Word::True => WordKind::True,
Word::False => WordKind::False,
Word::Ident(IdentLike::Known(i)) => WordKind::Ident(IdentKind::Known(*i)),
Word::Ident(IdentLike::Other(..)) => WordKind::Ident(IdentKind::Other),
}
}
}
impl From<&'_ str> for IdentLike {
#[inline]
fn from(s: &str) -> Self {
s.parse::<KnownIdent>()
.map(Self::Known)
.unwrap_or_else(|_| Self::Other(s.into()))
impl WordKind {
pub(crate) const fn before_expr(self) -> bool {
match self {
Self::Keyword(k) => k.before_expr(),
_ => false,
}
}
pub(crate) const fn starts_expr(self) -> bool {
match self {
Self::Keyword(k) => k.starts_expr(),
_ => true,
}
}
}