feat(html/minifier): Add normalize_attributes for disabling normalization (#5045)

This commit is contained in:
Alexander Akait 2022-06-28 07:37:04 +03:00 committed by GitHub
parent 70770e0fd2
commit 0836d29913
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 135 additions and 104 deletions

View File

@ -330,6 +330,7 @@ struct Minifier {
remove_empty_attributes: bool,
remove_redundant_attributes: bool,
collapse_boolean_attributes: bool,
normalize_attributes: bool,
minify_json: bool,
minify_js: bool,
minify_css: bool,
@ -1374,6 +1375,7 @@ impl Minifier {
remove_empty_attributes: self.remove_empty_attributes,
remove_redundant_attributes: self.remove_empty_attributes,
collapse_boolean_attributes: self.collapse_boolean_attributes,
normalize_attributes: self.normalize_attributes,
minify_js: self.minify_js,
minify_json: self.minify_json,
minify_css: self.minify_css,
@ -1547,6 +1549,10 @@ impl VisitMut for Minifier {
}
};
if value.is_empty() {
return;
}
let current_element = self.current_element.as_ref().unwrap();
if self.collapse_boolean_attributes
@ -1556,7 +1562,88 @@ impl VisitMut for Minifier {
n.value = None;
return;
} else if self.is_space_separated_attribute(current_element, &n.name) {
} else if self.normalize_attributes {
if self.is_space_separated_attribute(current_element, &n.name) {
value = value.split_whitespace().collect::<Vec<_>>().join(" ");
} else if self.is_comma_separated_attribute(current_element, &n.name) {
let is_sizes = matches!(&*n.name, "sizes" | "imagesizes");
let mut new_values = vec![];
for value in value.trim().split(',') {
if is_sizes {
let trimmed = value.trim();
match self.minify_sizes(trimmed) {
Some(minified) => {
new_values.push(minified);
}
_ => {
new_values.push(trimmed.to_string());
}
};
} else {
new_values.push(value.trim().to_string());
}
}
value = new_values.join(",");
} else if self.is_trimable_separated_attribute(current_element, &n.name) {
value = value.trim().to_string();
} else if current_element.namespace == Namespace::HTML
&& &n.name == "contenteditable"
&& value == "true"
{
n.value = Some(js_word!(""));
return;
} else if &n.name == "content"
&& self.element_has_attribute_with_value(
current_element,
"http-equiv",
&["content-security-policy"],
)
{
let values = value.trim().split(';');
let mut new_values = vec![];
for value in values {
new_values.push(
value
.trim()
.split(' ')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(" "),
);
}
value = new_values.join(";");
if value.ends_with(';') {
value.pop();
}
} else if self.is_event_handler_attribute(&n.name) {
value = value.trim().into();
if value.trim().to_lowercase().starts_with("javascript:") {
value = value.chars().skip(11).collect();
}
} else if current_element.namespace == Namespace::HTML
&& &*current_element.tag_name == "iframe"
&& &n.name == "srcdoc"
{
value = match self
.minify_html(value.clone(), HtmlMinificationMode::DocumentIframeSrcdoc)
{
Some(minified) => minified,
_ => value,
};
}
}
if &*n.name == "class" {
let mut values = value.split_whitespace().collect::<Vec<_>>();
if &*n.name == "class" {
@ -1564,101 +1651,23 @@ impl VisitMut for Minifier {
}
value = values.join(" ");
} else if self.is_comma_separated_attribute(current_element, &n.name) {
let is_sizes = matches!(&*n.name, "sizes" | "imagesizes");
let mut new_values = vec![];
for value in value.trim().split(',') {
if is_sizes {
let trimmed = value.trim();
match self.minify_sizes(trimmed) {
Some(minified) => {
new_values.push(minified);
}
_ => {
new_values.push(trimmed.to_string());
}
};
} else {
new_values.push(value.trim().to_string());
}
}
value = new_values.join(",");
if self.minify_css && &*n.name == "media" && !value.is_empty() {
if let Some(minified) =
self.minify_css(value.clone(), CssMinificationMode::MediaQueryList)
{
value = minified;
}
}
} else if self.is_trimable_separated_attribute(current_element, &n.name) {
value = value.trim().to_string();
if self.minify_css && &*n.name == "style" && !value.is_empty() {
if let Some(minified) =
self.minify_css(value.clone(), CssMinificationMode::ListOfDeclarations)
{
value = minified;
}
}
} else if current_element.namespace == Namespace::HTML
&& &n.name == "contenteditable"
&& value == "true"
{
n.value = Some(js_word!(""));
return;
} else if &n.name == "content"
&& self.element_has_attribute_with_value(
current_element,
"http-equiv",
&["content-security-policy"],
)
{
let values = value.trim().split(';');
let mut new_values = vec![];
for value in values {
new_values.push(
value
.trim()
.split(' ')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(" "),
);
}
value = new_values.join(";");
if value.ends_with(';') {
value.pop();
}
} else if self.is_event_handler_attribute(&n.name) {
value = value.trim().into();
if value.trim().to_lowercase().starts_with("javascript:") {
value = value.chars().skip(11).collect();
}
} else if self.minify_js && self.is_event_handler_attribute(&n.name) {
value = match self.minify_js(value.clone(), false) {
Some(minified) => minified,
_ => value,
};
} else if current_element.namespace == Namespace::HTML
&& &*current_element.tag_name == "iframe"
&& &n.name == "srcdoc"
{
value =
match self.minify_html(value.clone(), HtmlMinificationMode::DocumentIframeSrcdoc) {
Some(minified) => minified,
_ => value,
};
} else if self.minify_css && &*n.name == "media" && !value.is_empty() {
if let Some(minified) =
self.minify_css(value.clone(), CssMinificationMode::MediaQueryList)
{
value = minified;
}
} else if self.minify_css && &*n.name == "style" && !value.is_empty() {
if let Some(minified) =
self.minify_css(value.clone(), CssMinificationMode::ListOfDeclarations)
{
value = minified;
}
}
if self.minify_additional_attributes.is_some() {
@ -1893,6 +1902,7 @@ fn create_minifier(context_element: Option<&Element>, options: &MinifyOptions) -
remove_empty_attributes: options.remove_empty_attributes,
remove_redundant_attributes: options.remove_redundant_attributes,
collapse_boolean_attributes: options.collapse_boolean_attributes,
normalize_attributes: options.normalize_attributes,
minify_js: options.minify_js,
minify_json: options.minify_json,

View File

@ -19,6 +19,12 @@ pub struct MinifyOptions {
pub force_set_html5_doctype: bool,
#[serde(default)]
pub collapse_whitespaces: Option<CollapseWhitespaces>,
#[serde(default = "true_by_default")]
pub remove_comments: bool,
#[serde(default = "default_preserve_comments")]
pub preserve_comments: Option<Vec<CachedRegex>>,
#[serde(default = "true_by_default")]
pub minify_conditional_comments: bool,
/// Prevent to remove empty attributes, by default we only remove attributes
/// that are safe to remove (for example - empty a `style` attribute),
/// but in edge cases it can be unsafe because some libraries can
@ -26,29 +32,28 @@ pub struct MinifyOptions {
/// case strings will be different, which can break the work of
/// libraries
#[serde(default = "true_by_default")]
pub remove_comments: bool,
#[serde(default = "default_preserve_comments")]
pub preserve_comments: Option<Vec<CachedRegex>>,
#[serde(default = "true_by_default")]
pub minify_conditional_comments: bool,
#[serde(default = "true_by_default")]
pub remove_empty_attributes: bool,
#[serde(default = "true_by_default")]
pub remove_redundant_attributes: bool,
#[serde(default = "true_by_default")]
pub collapse_boolean_attributes: bool,
/// Remove extra whitespace in space and comma separated attribute values
/// (where it is safe) and remove `javascript:` prefix for event handler
/// attributes
#[serde(default = "true_by_default")]
pub normalize_attributes: bool,
#[serde(default = "true_by_default")]
pub minify_js: bool,
#[serde(default = "true_by_default")]
pub minify_json: bool,
#[serde(default = "true_by_default")]
pub minify_css: bool,
// Allow to compress value of custom attributes,
// i.e. `<div data-js="myFunction(100 * 2, 'foo' + 'bar')"></div>`
//
// The first item is tag_name
// The second is attribute name
// The third is type of minifier
/// Allow to compress value of custom attributes,
/// i.e. `<div data-js="myFunction(100 * 2, 'foo' + 'bar')"></div>`
///
/// The first item is tag_name
/// The second is attribute name
/// The third is type of minifier
#[serde(default)]
pub minify_additional_attributes: Option<Vec<(CachedRegex, MinifierType)>>,
}

View File

@ -0,0 +1,3 @@
{
"normalizeAttributes": false
}

View File

@ -0,0 +1,11 @@
<!doctype html>
<html lang="en">
<head>
<title>Document</title>
<link rel="stylesheet" href="test.css" media="screen and (min-width: 1024px)">
</head>
<body>
<a rel="foo bar baz"></a>
<div onclick="javascript:alert( 'test' ) ;"></div>
</body>
</html>

View File

@ -0,0 +1,2 @@
<!doctype html><html lang=en><title>Document</title><link rel=stylesheet href=test.css media="screen and (min-width:1024px)"><a rel="foo bar baz"></a>
<div onclick='javascript:alert("test")'></div>