perf(html/minifier): Improve performance (#5983)

This commit is contained in:
Alexander Akait 2022-09-29 05:58:09 +03:00 committed by GitHub
parent 18123fb7b0
commit 6c08971a53
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 102 additions and 53 deletions

1
Cargo.lock generated
View File

@ -4090,6 +4090,7 @@ dependencies = [
"once_cell",
"serde",
"serde_json",
"swc_atoms",
"swc_common",
]

View File

@ -54,7 +54,7 @@ pub enum Child {
}
#[ast_node("DocumentType")]
#[derive(Eq, Hash, EqIgnoreSpan)]
#[derive(Eq, Hash)]
pub struct DocumentType {
pub span: Span,
#[cfg_attr(feature = "rkyv", with(swc_atoms::EncodeJsWord))]
@ -67,6 +67,14 @@ pub struct DocumentType {
pub raw: Option<JsWord>,
}
impl EqIgnoreSpan for DocumentType {
fn eq_ignore_span(&self, other: &Self) -> bool {
self.name == other.name
&& self.public_id == other.public_id
&& self.system_id == other.system_id
}
}
#[derive(StringEnum, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash, EqIgnoreSpan)]
#[cfg_attr(
feature = "rkyv",
@ -110,7 +118,7 @@ pub struct Element {
}
#[ast_node("Attribute")]
#[derive(Eq, Hash, EqIgnoreSpan)]
#[derive(Eq, Hash)]
pub struct Attribute {
pub span: Span,
pub namespace: Option<Namespace>,
@ -126,8 +134,17 @@ pub struct Attribute {
pub raw_value: Option<JsWord>,
}
impl EqIgnoreSpan for Attribute {
fn eq_ignore_span(&self, other: &Self) -> bool {
self.namespace == other.namespace
&& self.prefix == other.prefix
&& self.name == other.name
&& self.value == other.value
}
}
#[ast_node("Text")]
#[derive(Eq, Hash, EqIgnoreSpan)]
#[derive(Eq, Hash)]
pub struct Text {
pub span: Span,
#[cfg_attr(feature = "rkyv", with(swc_atoms::EncodeJsWord))]
@ -136,8 +153,14 @@ pub struct Text {
pub raw: Option<JsWord>,
}
impl EqIgnoreSpan for Text {
fn eq_ignore_span(&self, other: &Self) -> bool {
self.data == other.data
}
}
#[ast_node("Comment")]
#[derive(Eq, Hash, EqIgnoreSpan)]
#[derive(Eq, Hash)]
pub struct Comment {
pub span: Span,
#[cfg_attr(feature = "rkyv", with(swc_atoms::EncodeJsWord))]
@ -145,3 +168,9 @@ pub struct Comment {
#[cfg_attr(feature = "rkyv", with(swc_atoms::EncodeJsWord))]
pub raw: Option<JsWord>,
}
impl EqIgnoreSpan for Comment {
fn eq_ignore_span(&self, other: &Self) -> bool {
self.data == other.data
}
}

View File

@ -1,17 +1,15 @@
#![deny(clippy::all)]
#![feature(box_patterns)]
use std::cmp::Ordering;
use std::{cmp::Ordering, mem::take};
use once_cell::sync::Lazy;
use serde_json::Value;
use swc_atoms::{js_word, JsWord};
use swc_cached::regex::CachedRegex;
use swc_common::{
collections::{AHashMap, AHashSet},
comments::SingleThreadedComments,
sync::Lrc,
FileName, FilePathMapping, Mark, SourceMap,
collections::AHashMap, comments::SingleThreadedComments, sync::Lrc, FileName, FilePathMapping,
Mark, SourceMap,
};
use swc_html_ast::*;
use swc_html_parser::parser::ParserConfig;
@ -22,6 +20,7 @@ use crate::option::{
CollapseWhitespaces, CssOptions, JsOptions, JsParserOptions, JsonOptions, MinifierType,
MinifyCssOption, MinifyJsOption, MinifyJsonOption, MinifyOptions,
};
pub mod option;
// Global attributes
@ -370,8 +369,8 @@ impl Minifier<'_> {
EVENT_HANDLER_ATTRIBUTES.contains(&&**name)
}
fn is_boolean_attribute(&self, element: &Element, name: &str) -> bool {
if let Some(global_pseudo_element) = HTML_ELEMENTS_AND_ATTRIBUTES.get("*") {
fn is_boolean_attribute(&self, element: &Element, name: &JsWord) -> bool {
if let Some(global_pseudo_element) = HTML_ELEMENTS_AND_ATTRIBUTES.get(&js_word!("*")) {
if let Some(element) = global_pseudo_element.other.get(name) {
if element.boolean.is_some() && element.boolean.unwrap() {
return true;
@ -379,7 +378,7 @@ impl Minifier<'_> {
}
}
if let Some(element) = HTML_ELEMENTS_AND_ATTRIBUTES.get(&*element.tag_name) {
if let Some(element) = HTML_ELEMENTS_AND_ATTRIBUTES.get(&element.tag_name) {
if let Some(element) = element.other.get(name) {
if element.boolean.is_some() && element.boolean.unwrap() {
return true;
@ -479,7 +478,7 @@ impl Minifier<'_> {
}
}
fn is_semicolon_separated_attribute(&self, element: &Element, attribute_name: &str) -> bool {
fn is_semicolon_separated_attribute(&self, element: &Element, attribute_name: &JsWord) -> bool {
match element.namespace {
Namespace::SVG => {
SEMICOLON_SEPARATED_SVG_ATTRIBUTES.contains(&(&element.tag_name, attribute_name))
@ -612,13 +611,13 @@ impl Minifier<'_> {
with_namespace.push(':');
with_namespace.push_str(&attribute.name);
with_namespace
with_namespace.into()
} else {
attribute.name.to_string()
attribute.name.clone()
};
let normalized_value = attribute_value.trim();
let attributes = match default_attributes.get(&**tag_name) {
let attributes = match default_attributes.get(tag_name) {
Some(element) => element,
None => return false,
};
@ -2137,39 +2136,56 @@ impl VisitMut for Minifier<'_> {
self.descendant_of_pre = old_descendant_of_pre;
}
let mut already_seen: AHashSet<JsWord> = Default::default();
let mut remove_list = vec![];
n.attributes.retain(|attribute| {
if already_seen.contains(&attribute.name) {
return false;
}
already_seen.insert(attribute.name.clone());
if attribute.value.is_none() {
return true;
}
if self.options.remove_redundant_attributes
&& self.is_default_attribute_value(n.namespace, &n.tag_name, attribute)
{
return false;
}
if self.options.remove_empty_attributes {
let value = attribute.value.as_ref().unwrap();
if (matches!(attribute.name, js_word!("id")) && value.is_empty())
|| (matches!(attribute.name, js_word!("class") | js_word!("style"))
&& value.is_empty())
|| self.is_event_handler_attribute(&attribute.name) && value.is_empty()
for (i, i1) in n.attributes.iter().enumerate() {
if i1.value.is_some() {
if self.options.remove_redundant_attributes
&& self.is_default_attribute_value(n.namespace, &n.tag_name, i1)
{
return false;
remove_list.push(i);
continue;
}
if self.options.remove_empty_attributes {
let value = i1.value.as_ref().unwrap();
if (matches!(i1.name, js_word!("id")) && value.is_empty())
|| (matches!(i1.name, js_word!("class") | js_word!("style"))
&& value.is_empty())
|| self.is_event_handler_attribute(&i1.name) && value.is_empty()
{
remove_list.push(i);
continue;
}
}
}
true
});
for (j, j1) in n.attributes.iter().enumerate() {
if i < j && i1.name == j1.name {
remove_list.push(j);
}
}
}
// Fast path. We don't face real duplicates in most cases.
if !remove_list.is_empty() {
let new = take(&mut n.attributes)
.into_iter()
.enumerate()
.filter_map(|(idx, value)| {
if remove_list.contains(&idx) {
None
} else {
Some(value)
}
})
.collect::<Vec<_>>();
n.attributes = new;
}
if let Some(attribute_name_counter) = &self.attribute_name_counter {
n.attributes.sort_by(|a, b| {

View File

@ -1,6 +1,7 @@
extern crate swc_node_base;
use criterion::{black_box, criterion_group, criterion_main, Bencher, Criterion};
use swc_atoms::js_word;
use swc_common::{input::StringInput, FileName, Span, SyntaxContext, DUMMY_SP};
use swc_html_ast::{Document, DocumentFragment, DocumentMode, Element, Namespace};
use swc_html_parser::{lexer::Lexer, parser::Parser};
@ -43,7 +44,7 @@ where
.parse_document_fragment(
Element {
span: Default::default(),
tag_name: "template".into(),
tag_name: js_word!("template"),
namespace: Namespace::HTML,
attributes: vec![],
is_self_closing: false,

View File

@ -1,7 +1,7 @@
[package]
authors = [
"강동윤 <kdy1997.dev@gmail.com>",
"Alexander Akait <sheo13666q@gmail.com>",
"강동윤 <kdy1997.dev@gmail.com>",
"Alexander Akait <sheo13666q@gmail.com>",
]
description = "Utils for HTML"
documentation = "https://rustdoc.swc.rs/swc_html_utils/"
@ -19,4 +19,5 @@ bench = false
once_cell = "1.10.0"
serde = { version = "1.0.118", features = ["derive"] }
serde_json = "1.0.61"
swc_atoms = { version = "0.4.17", path = "../swc_atoms" }
swc_common = { version = "0.29.3", path = "../swc_common" }

View File

@ -1,5 +1,6 @@
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use swc_atoms::JsWord;
use swc_common::collections::AHashMap;
#[derive(Serialize, Deserialize, Debug)]
@ -28,21 +29,21 @@ pub struct AttributeInfo {
#[derive(Serialize, Deserialize, Debug)]
pub struct Element {
_extends: Option<Vec<String>>,
_extends: Option<Vec<JsWord>>,
#[serde(flatten)]
pub other: AHashMap<String, AttributeInfo>,
pub other: AHashMap<JsWord, AttributeInfo>,
}
pub static HTML_ELEMENTS_AND_ATTRIBUTES: Lazy<AHashMap<String, Element>> = Lazy::new(|| {
let default_attributes: AHashMap<String, Element> =
pub static HTML_ELEMENTS_AND_ATTRIBUTES: Lazy<AHashMap<JsWord, Element>> = Lazy::new(|| {
let default_attributes: AHashMap<JsWord, Element> =
serde_json::from_str(include_str!("../data/html_elements_and_attributes.json"))
.expect("failed to parse html_elements_and_attributes.json for default attributes");
default_attributes
});
pub static SVG_ELEMENTS_AND_ATTRIBUTES: Lazy<AHashMap<String, Element>> = Lazy::new(|| {
let svg_elements_and_attributes: AHashMap<String, Element> =
pub static SVG_ELEMENTS_AND_ATTRIBUTES: Lazy<AHashMap<JsWord, Element>> = Lazy::new(|| {
let svg_elements_and_attributes: AHashMap<JsWord, Element> =
serde_json::from_str(include_str!("../data/svg_elements_and_attributes.json"))
.expect("failed to parse svg_elements_and_attributes.json for default attributes");