feat(html/codegen): Improve entity compression (#4889)

This commit is contained in:
Alexander Akait 2022-06-07 07:33:33 +03:00 committed by GitHub
parent 44e606a240
commit da09c1ca5b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
20 changed files with 841 additions and 79 deletions

13
Cargo.lock generated
View File

@ -3919,6 +3919,7 @@ dependencies = [
"swc_html_ast",
"swc_html_codegen_macros",
"swc_html_parser",
"swc_html_utils",
"swc_html_visit",
"testing",
]
@ -3955,17 +3956,27 @@ dependencies = [
"bitflags",
"criterion",
"lexical",
"once_cell",
"serde",
"serde_json",
"swc_atoms",
"swc_common",
"swc_html_ast",
"swc_html_utils",
"swc_html_visit",
"swc_node_base",
"testing",
]
[[package]]
name = "swc_html_utils"
version = "0.1.0"
dependencies = [
"once_cell",
"serde",
"serde_json",
"swc_common",
]
[[package]]
name = "swc_html_visit"
version = "0.8.0"

View File

@ -19,6 +19,7 @@ swc_atoms = {version = "0.2.7", path = "../swc_atoms"}
swc_common = { version = "0.18.0", path = "../swc_common"}
swc_html_ast = {version = "0.8.0", path = "../swc_html_ast"}
swc_html_codegen_macros = {version = "0.1.0", path = "../swc_html_codegen_macros"}
swc_html_utils = { version = "0.1.0", path = "../swc_html_utils" }
[dev-dependencies]
swc_common = { version = "0.18.0", path = "../swc_common", features = [

View File

@ -2,10 +2,12 @@
#![allow(clippy::needless_update)]
pub use std::fmt::Result;
use std::{iter::Peekable, str::Chars};
use swc_common::Spanned;
use swc_html_ast::*;
use swc_html_codegen_macros::emitter;
use swc_html_utils::HTML_ENTITIES;
use writer::HtmlWriter;
pub use self::emit::*;
@ -820,10 +822,12 @@ fn minify_attribute_value(value: &str) -> String {
let mut dq = 0;
let mut sq = 0;
for c in value.chars() {
let mut chars = value.chars().peekable();
while let Some(c) = chars.next() {
match c {
'&' => {
minified.push_str("&");
minified.push_str(&minify_amp(&mut chars));
continue;
}
@ -875,11 +879,12 @@ fn normalize_attribute_value(value: &str) -> String {
fn minify_text(value: &str) -> String {
let mut result = String::with_capacity(value.len());
let mut chars = value.chars().peekable();
for c in value.chars() {
while let Some(c) = chars.next() {
match c {
'&' => {
result.push_str("&");
result.push_str(&minify_amp(&mut chars));
}
'<' => {
result.push_str("&lt;");
@ -891,6 +896,94 @@ fn minify_text(value: &str) -> String {
result
}
fn minify_amp(chars: &mut Peekable<Chars>) -> String {
let mut result = String::with_capacity(7);
match chars.next() {
Some(hash @ '#') => {
match chars.next() {
// HTML CODE
// Prevent `&amp;#38;` -> `&#38`
Some(number @ '0'..='9') => {
result.push_str("&amp;");
result.push(hash);
result.push(number);
}
Some(x @ 'x' | x @ 'X') => {
match chars.peek() {
// HEX CODE
// Prevent `&amp;#x38;` -> `&#x38`
Some(c) if c.is_ascii_hexdigit() => {
result.push_str("&amp;");
result.push(hash);
result.push(x);
}
_ => {
result.push('&');
result.push(hash);
result.push(x);
}
}
}
any => {
result.push('&');
result.push(hash);
if let Some(any) = any {
result.push(any);
}
}
}
}
// Named entity
// Prevent `&amp;current` -> `&current`
Some(c @ 'a'..='z') | Some(c @ 'A'..='Z') => {
let mut entity_temporary_buffer = String::with_capacity(33);
entity_temporary_buffer.push('&');
entity_temporary_buffer.push(c);
let mut found_entity = false;
// No need to validate input, because we reset position if nothing was found
for c in chars {
entity_temporary_buffer.push(c);
if HTML_ENTITIES.get(&entity_temporary_buffer).is_some() {
found_entity = true;
break;
} else {
// We stop when:
//
// - not ascii alphanumeric
// - we consume more characters than the longest entity
if !c.is_ascii_alphanumeric() || entity_temporary_buffer.len() > 32 {
break;
}
}
}
if found_entity {
result.push_str("&amp;");
result.push_str(&entity_temporary_buffer[1..]);
} else {
result.push('&');
result.push_str(&entity_temporary_buffer[1..]);
}
}
any => {
result.push('&');
if let Some(any) = any {
result.push(any);
}
}
}
result
}
// Escaping a string (for the purposes of the algorithm above) consists of
// running the following steps:
//

View File

@ -1,5 +1,5 @@
<div>Test &amp;</div>
<div>Test &amp;</div>
<div>Test &</div>
<div>Test &</div>
<style>
a::before {
content: "&";

View File

@ -89,8 +89,8 @@ foo
<div data-test=\\foo class=bar>test</div>
<span title='test "with" &amp;quot;'>test</span>
<span title='test "with" &amp; quot'>test</span>
<span title='test "with" &amp;test'>test</span>
<span title='test "with" & quot'>test</span>
<span title='test "with" &test'>test</span>
<span title='test "with" &amp;amptest'>test</span>
<span title='test "with" <'>test</span>
<span title='test "with" >'>test</span>
@ -116,9 +116,9 @@ foo
</style>
<div>
foo &amp; bar
foo&amp;<i>bar</i>
foo&amp;&amp;&amp; bar
foo & bar
foo&<i>bar</i>
foo&&& bar
</div>
<pre><code>Label current;
@ -128,20 +128,20 @@ __ bind(&amp;current);
</code></pre>
<div>
&amp;xxx; &amp;xxx &amp;thorn; &amp;thorn &amp;curren;t &amp;current &amp;current; &amp;&amp;
&xxx; &xxx &amp;thorn; &amp;thorn &amp;curren;t &amp;current &amp;current; &&
&amp;gt
&amp;unknown;
&unknown;
&amp;current
&amp;current;
&amp;current
&amp;current;
ø &amp;osLash Ø
&amp;ø &amp;&amp;osLash; &amp;Ø
&amp;ø &amp;&amp;osLash; &amp;Ø
ø &osLash Ø
&ø &&osLash; &Ø
&ø &&osLash; &Ø
&amp;oslash; &amp;osLash; &amp;Oslash;
&amp;oslash; &amp;osLash; &amp;Oslash;
&amp;oslash; &osLash; &amp;Oslash;
&amp;oslash; &osLash; &amp;Oslash;
</div>

View File

@ -35,6 +35,38 @@
kablammo!
</a>
<div>&amp;#38;</div>
<div>&amp#38;</div>
<div>&amp#38</div>
<div>&amp#x26;</div>
<div>&amp#x26</div>
<div>&#38;amp</div>
<div>&amp#</div>
<div>&amp#1</div>
<div>&#8</div>
<div>&8</div>
<div>&#38;#a</div>
<div>&#38;#xb</div>
<div>&#38;#xj</div>
<div data-test="&amp;amp;"></div>
<div data-test="&amp;amp"></div>
<div data-test="&amp;#36"></div>
<div data-test="&amp;#x36"></div>
<div data-test="&unknown">&unknown</div>
<div data-test="&unknown;">&unknown;</div>
<div data-test="&u;">&u;</div>
<div data-test="&ampCounterClockwiseContourIntegral">&ampCounterClockwiseContourIntegral</div>
<div data-test="&amp;CounterClockwiseContourIntegral">&amp;CounterClockwiseContourIntegral</div>
<div data-test="&ampCounterClockwiseContourIntegral;">&ampCounterClockwiseContourIntegral;</div>
<div data-test="&amp;CounterClockwiseContourIntegral;">&amp;CounterClockwiseContourIntegral;</div>
<div data-test="&amp;amp">&amp;amp</div>
<div data-test="&amp;am">&amp;am</div>
<div data-test="&amp;;">&amp;;</div>
<div data-test="&amp;">&amp;</div>
<div data-test="&amp;#x">&amp;#x</div>
<div data-test="&amp;#x1">&amp;#x1</div>
<div data-test="&amp;#1">&amp;#1</div>
</body>
</html>

View File

@ -35,6 +35,38 @@
kablammo!
</a>
<div>&amp;#38;</div>
<div>&amp;#38;</div>
<div>&amp;#38</div>
<div>&amp;#x26;</div>
<div>&amp;#x26</div>
<div>&amp;amp</div>
<div>&amp;#</div>
<div>&amp;#1</div>
<div></div>
<div>&amp;8</div>
<div>&amp;#a</div>
<div>&amp;#xb</div>
<div>&amp;#xj</div>
<div data-test="&amp;amp;"></div>
<div data-test="&amp;amp"></div>
<div data-test="&amp;#36"></div>
<div data-test="&amp;#x36"></div>
<div data-test="&amp;unknown">&amp;unknown</div>
<div data-test="&amp;unknown;">&amp;unknown;</div>
<div data-test="&amp;u;">&amp;u;</div>
<div data-test="&amp;ampCounterClockwiseContourIntegral">&amp;CounterClockwiseContourIntegral</div>
<div data-test="&amp;CounterClockwiseContourIntegral">&amp;CounterClockwiseContourIntegral</div>
<div data-test="&amp;ampCounterClockwiseContourIntegral;">&amp;CounterClockwiseContourIntegral;</div>
<div data-test="&amp;CounterClockwiseContourIntegral;">&amp;CounterClockwiseContourIntegral;</div>
<div data-test="&amp;amp">&amp;amp</div>
<div data-test="&amp;am">&amp;am</div>
<div data-test="&amp;;">&amp;;</div>
<div data-test="&amp;">&amp;</div>
<div data-test="&amp;#x">&amp;#x</div>
<div data-test="&amp;#x1">&amp;#x1</div>
<div data-test="&amp;#1">&amp;#1</div>

View File

@ -2,8 +2,8 @@
<h1>HTML Entity Example</h1>
<div>A space character: &amp;</div>
<div>A space character: &amp;</div>
<div>A space character: &</div>
<div>A space character: &</div>
<div>A space character:  </div>
<div>The less-than sign: &lt;</div>
@ -25,14 +25,46 @@
<div>An o with a circumflex accent: ô</div>
<div>An o with a tilde: õ</div>
<div>A space character: &amp;&amp;</div>
<div>A space character: &&</div>
<div>I'm ∉ I tell you</div>
<a href="http://lmgtfy.com/?l=1&amp;q=rick+roll">tired meme</a>
<a href=# onclick="window.location='?l=1&amp;q=rick+roll';return false">
<a href="http://lmgtfy.com/?l=1&q=rick+roll">tired meme</a>
<a href=# onclick="window.location='?l=1&q=rick+roll';return false">
kablammo!
</a>
<div>&amp;#38;</div>
<div>&amp;#38;</div>
<div>&amp;#38</div>
<div>&amp;#x26;</div>
<div>&amp;#x26</div>
<div>&amp;amp</div>
<div>&#</div>
<div>&amp;#1</div>
<div></div>
<div>&8</div>
<div>&#a</div>
<div>&amp;#xb</div>
<div>&#xj</div>
<div data-test=&amp;amp;></div>
<div data-test=&amp;amp></div>
<div data-test=&amp;#36></div>
<div data-test=&amp;#x36></div>
<div data-test=&unknown>&unknown</div>
<div data-test=&unknown;>&unknown;</div>
<div data-test=&u;>&u;</div>
<div data-test=&amp;ampCounterClockwiseContourIntegral>&CounterClockwiseContourIntegral</div>
<div data-test=&CounterClockwiseContourIntegral>&CounterClockwiseContourIntegral</div>
<div data-test=&amp;ampCounterClockwiseContourIntegral;>&amp;CounterClockwiseContourIntegral;</div>
<div data-test=&amp;CounterClockwiseContourIntegral;>&amp;CounterClockwiseContourIntegral;</div>
<div data-test=&amp;amp>&amp;amp</div>
<div data-test=&am>&am</div>
<div data-test=&;>&;</div>
<div data-test=&>&</div>
<div data-test=&#x>&#x</div>
<div data-test=&amp;#x1>&amp;#x1</div>
<div data-test=&amp;#1>&amp;#1</div>

View File

@ -23,7 +23,7 @@
<line x1=55.1 y1=30.4 x2=100 y2=30.4 class=stroke-primary style="stroke-width: 2;stroke-miterlimit: 10;"></line>
<line x1=45.1 y1=30.4 x2=0 y2=30.4 class=stroke-primary style="stroke-width: 2;stroke-miterlimit: 10;"></line>
</svg>
<p class="font-light leading-relaxed mx-auto pb-2 text-gray-500 text-xl">Save time managing advertising &amp; Content for your business.</p>
<p class="font-light leading-relaxed mx-auto pb-2 text-gray-500 text-xl">Save time managing advertising & Content for your business.</p>
</header>
@ -96,7 +96,7 @@
<path d="M12.136.326A1.5 1.5 0 0 1 14 1.78V3h.5A1.5 1.5 0 0 1 16 4.5v9a1.5 1.5 0 0 1-1.5 1.5h-13A1.5 1.5 0 0 1 0 13.5v-9a1.5 1.5 0 0 1 1.432-1.499L12.136.326zM5.562 3H13V1.78a.5.5 0 0 0-.621-.484L5.562 3zM1.5 4a.5.5 0 0 0-.5.5v9a.5.5 0 0 0 .5.5h13a.5.5 0 0 0 .5-.5v-9a.5.5 0 0 0-.5-.5h-13z"></path>
</svg>
</div>
<h3 class="font-semibold leading-normal mb-2 text-black text-lg">Budget &amp; Marketing</h3>
<h3 class="font-semibold leading-normal mb-2 text-black text-lg">Budget & Marketing</h3>
<p class=text-gray-500>This is a wider card with supporting text below as a natural content.</p>
</div>

View File

@ -1,7 +1,7 @@
<!doctype html><html lang=en><title>Document</title><body>
<div id='John"&amp;Harry'>Test</div>
<div id='John"&amp;HarryOther'>Test</div>
<div id='John"&Harry'>Test</div>
<div id='John"&HarryOther'>Test</div>
<div id='John"<HarryOtherOther'>Test</div>
<div id=John HarryOtherOtherOther>Test</div>
<div id="John<test>">Test</div>

View File

@ -21,12 +21,10 @@ debug = []
[dependencies]
bitflags = "1.2.1"
lexical = "6.1.0"
once_cell = "1.10.0"
serde = { version = "1.0.118", features = ["derive"] }
serde_json = "1.0.61"
swc_atoms = { version = "0.2.7", path = "../swc_atoms" }
swc_common = { version = "0.18.0", path = "../swc_common" }
swc_html_ast = { version = "0.8.0", path = "../swc_html_ast" }
swc_html_utils = { version = "0.1.0", path = "../swc_html_utils" }
[dev-dependencies]
criterion = "0.3"

View File

@ -1,32 +1,15 @@
use std::{char::REPLACEMENT_CHARACTER, mem::take};
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use swc_atoms::JsWord;
use swc_common::{
collections::{AHashMap, AHashSet},
input::Input,
BytePos, Span,
};
use swc_common::{collections::AHashSet, input::Input, BytePos, Span};
use swc_html_ast::{AttributeToken, Token, TokenAndSpan};
use swc_html_utils::{Entity, HTML_ENTITIES};
use crate::{
error::{Error, ErrorKind},
parser::input::ParserInput,
};
#[derive(Serialize, Deserialize, Debug)]
pub struct Entity {
characters: String,
}
pub static HTML_ENTITIES: Lazy<AHashMap<String, Entity>> = Lazy::new(|| {
let entities: AHashMap<String, Entity> = serde_json::from_str(include_str!("./entities.json"))
.expect("failed to parse entities.json for html entities");
entities
});
#[derive(Debug, Clone)]
pub enum State {
Data,
@ -4035,7 +4018,7 @@ where
//
// - not ascii alphanumeric
// - we consume more characters than the longest entity
if !c.is_ascii_alphanumeric() || self.temporary_buffer.len() > 32 {
if !c.is_ascii_alphanumeric() || entity_temporary_buffer.len() > 32 {
break;
}
}

View File

@ -81,6 +81,34 @@ FOOºR
FOO䆺R
FOOAZOO
"
| <div>
| data-test="&ampamp;"
| "
"
| <div>
| data-test="&ampamp"
| "
"
| <div>
| data-test="&amp;"
| "
"
| <div>
| data-test="&amp"
| "
"
| <div>
| data-test="&#36"
| "
"
| <div>
| data-test="&#x36"
| "
"
| <div>
| data-test="6"
| "

View File

@ -49,6 +49,13 @@ FOO&#xBAR
FOO&#x41BAR
FOO&#x41ZOO
<div data-test="&ampamp;"></div>
<div data-test="&ampamp"></div>
<div data-test="&amp;amp;"></div>
<div data-test="&amp;amp"></div>
<div data-test="&amp;#36"></div>
<div data-test="&amp;#x36"></div>
<div data-test="&#x36"></div>
</body>
</html>

View File

@ -2,7 +2,7 @@
"type": "Document",
"span": {
"start": 1,
"end": 1612,
"end": 1841,
"ctxt": 0
},
"mode": "no-quirks",
@ -22,7 +22,7 @@
"type": "Element",
"span": {
"start": 17,
"end": 1612,
"end": 1841,
"ctxt": 0
},
"tagName": "html",
@ -46,7 +46,7 @@
"type": "Element",
"span": {
"start": 24,
"end": 1612,
"end": 1841,
"ctxt": 0
},
"tagName": "body",
@ -377,10 +377,255 @@
"type": "Text",
"span": {
"start": 1558,
"end": 1612,
"end": 1595,
"ctxt": 0
},
"value": "\n\nFOOºR\nFOO䆺R\nFOOAZOO\n\n\n\n\n"
"value": "\n\nFOOºR\nFOO䆺R\nFOOAZOO\n\n"
},
{
"type": "Element",
"span": {
"start": 1595,
"end": 1627,
"ctxt": 0
},
"tagName": "div",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1600,
"end": 1620,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "data-test",
"value": "&ampamp;"
}
],
"children": [],
"content": null
},
{
"type": "Text",
"span": {
"start": 1627,
"end": 1628,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 1628,
"end": 1659,
"ctxt": 0
},
"tagName": "div",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1633,
"end": 1652,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "data-test",
"value": "&ampamp"
}
],
"children": [],
"content": null
},
{
"type": "Text",
"span": {
"start": 1659,
"end": 1660,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 1660,
"end": 1693,
"ctxt": 0
},
"tagName": "div",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1665,
"end": 1686,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "data-test",
"value": "&amp;"
}
],
"children": [],
"content": null
},
{
"type": "Text",
"span": {
"start": 1693,
"end": 1694,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 1694,
"end": 1726,
"ctxt": 0
},
"tagName": "div",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1699,
"end": 1719,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "data-test",
"value": "&amp"
}
],
"children": [],
"content": null
},
{
"type": "Text",
"span": {
"start": 1726,
"end": 1727,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 1727,
"end": 1759,
"ctxt": 0
},
"tagName": "div",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1732,
"end": 1752,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "data-test",
"value": "&#36"
}
],
"children": [],
"content": null
},
{
"type": "Text",
"span": {
"start": 1759,
"end": 1760,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 1760,
"end": 1793,
"ctxt": 0
},
"tagName": "div",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1765,
"end": 1786,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "data-test",
"value": "&#x36"
}
],
"children": [],
"content": null
},
{
"type": "Text",
"span": {
"start": 1793,
"end": 1794,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 1794,
"end": 1823,
"ctxt": 0
},
"tagName": "div",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1799,
"end": 1816,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "data-test",
"value": "6"
}
],
"children": [],
"content": null
},
{
"type": "Text",
"span": {
"start": 1823,
"end": 1841,
"ctxt": 0
},
"value": "\n\n\n\n"
}
],
"content": null

View File

@ -220,3 +220,9 @@
50 | FOO&#x41ZOO
: ^
`----
x Missing semicolon after character reference
,-[$DIR/tests/recovery/text/entity/input.html:58:1]
58 | <div data-test="&#x36"></div>
: ^
`----

View File

@ -52,9 +52,16 @@
49 | | FOO&#x41BAR
50 | | FOO&#x41ZOO
51 | |
52 | | </body>
53 | | </html>
54 | `->
52 | | <div data-test="&ampamp;"></div>
53 | | <div data-test="&ampamp"></div>
54 | | <div data-test="&amp;amp;"></div>
55 | | <div data-test="&amp;amp"></div>
56 | | <div data-test="&amp;#36"></div>
57 | | <div data-test="&amp;#x36"></div>
58 | | <div data-test="&#x36"></div>
59 | | </body>
60 | | </html>
61 | `->
`----
x Child
@ -121,9 +128,16 @@
49 | | FOO&#x41BAR
50 | | FOO&#x41ZOO
51 | |
52 | | </body>
53 | | </html>
54 | `->
52 | | <div data-test="&ampamp;"></div>
53 | | <div data-test="&ampamp"></div>
54 | | <div data-test="&amp;amp;"></div>
55 | | <div data-test="&amp;amp"></div>
56 | | <div data-test="&amp;#36"></div>
57 | | <div data-test="&amp;#x36"></div>
58 | | <div data-test="&#x36"></div>
59 | | </body>
60 | | </html>
61 | `->
`----
x Element
@ -178,9 +192,16 @@
49 | | FOO&#x41BAR
50 | | FOO&#x41ZOO
51 | |
52 | | </body>
53 | | </html>
54 | `->
52 | | <div data-test="&ampamp;"></div>
53 | | <div data-test="&ampamp"></div>
54 | | <div data-test="&amp;amp;"></div>
55 | | <div data-test="&amp;amp"></div>
56 | | <div data-test="&amp;#36"></div>
57 | | <div data-test="&amp;#x36"></div>
58 | | <div data-test="&#x36"></div>
59 | | </body>
60 | | </html>
61 | `->
`----
x Child
@ -238,9 +259,16 @@
49 | | FOO&#x41BAR
50 | | FOO&#x41ZOO
51 | |
52 | | </body>
53 | | </html>
54 | `->
52 | | <div data-test="&ampamp;"></div>
53 | | <div data-test="&ampamp"></div>
54 | | <div data-test="&amp;amp;"></div>
55 | | <div data-test="&amp;amp"></div>
56 | | <div data-test="&amp;#36"></div>
57 | | <div data-test="&amp;#x36"></div>
58 | | <div data-test="&#x36"></div>
59 | | </body>
60 | | </html>
61 | `->
`----
x Element
@ -294,9 +322,16 @@
49 | | FOO&#x41BAR
50 | | FOO&#x41ZOO
51 | |
52 | | </body>
53 | | </html>
54 | `->
52 | | <div data-test="&ampamp;"></div>
53 | | <div data-test="&ampamp"></div>
54 | | <div data-test="&amp;amp;"></div>
55 | | <div data-test="&amp;amp"></div>
56 | | <div data-test="&amp;#36"></div>
57 | | <div data-test="&amp;#x36"></div>
58 | | <div data-test="&#x36"></div>
59 | | </body>
60 | | </html>
61 | `->
`----
x Child
@ -794,10 +829,8 @@
48 | | FOO&#xBAR
49 | | FOO&#x41BAR
50 | | FOO&#x41ZOO
51 | |
52 | | </body>
53 | | </html>
54 | `->
51 | `->
52 | <div data-test="&ampamp;"></div>
`----
x Text
@ -807,8 +840,232 @@
48 | | FOO&#xBAR
49 | | FOO&#x41BAR
50 | | FOO&#x41ZOO
51 | |
52 | | </body>
53 | | </html>
54 | `->
51 | `->
52 | <div data-test="&ampamp;"></div>
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:52:1]
52 | <div data-test="&ampamp;"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/recovery/text/entity/input.html:52:1]
52 | <div data-test="&ampamp;"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/recovery/text/entity/input.html:52:1]
52 | <div data-test="&ampamp;"></div>
: ^^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:52:1]
52 | <div data-test="&ampamp;"></div>
: ^
53 | <div data-test="&ampamp"></div>
`----
x Text
,-[$DIR/tests/recovery/text/entity/input.html:52:1]
52 | <div data-test="&ampamp;"></div>
: ^
53 | <div data-test="&ampamp"></div>
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:53:1]
53 | <div data-test="&ampamp"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/recovery/text/entity/input.html:53:1]
53 | <div data-test="&ampamp"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/recovery/text/entity/input.html:53:1]
53 | <div data-test="&ampamp"></div>
: ^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:53:1]
53 | <div data-test="&ampamp"></div>
: ^
54 | <div data-test="&amp;amp;"></div>
`----
x Text
,-[$DIR/tests/recovery/text/entity/input.html:53:1]
53 | <div data-test="&ampamp"></div>
: ^
54 | <div data-test="&amp;amp;"></div>
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:54:1]
54 | <div data-test="&amp;amp;"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/recovery/text/entity/input.html:54:1]
54 | <div data-test="&amp;amp;"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/recovery/text/entity/input.html:54:1]
54 | <div data-test="&amp;amp;"></div>
: ^^^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:54:1]
54 | <div data-test="&amp;amp;"></div>
: ^
55 | <div data-test="&amp;amp"></div>
`----
x Text
,-[$DIR/tests/recovery/text/entity/input.html:54:1]
54 | <div data-test="&amp;amp;"></div>
: ^
55 | <div data-test="&amp;amp"></div>
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:55:1]
55 | <div data-test="&amp;amp"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/recovery/text/entity/input.html:55:1]
55 | <div data-test="&amp;amp"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/recovery/text/entity/input.html:55:1]
55 | <div data-test="&amp;amp"></div>
: ^^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:55:1]
55 | <div data-test="&amp;amp"></div>
: ^
56 | <div data-test="&amp;#36"></div>
`----
x Text
,-[$DIR/tests/recovery/text/entity/input.html:55:1]
55 | <div data-test="&amp;amp"></div>
: ^
56 | <div data-test="&amp;#36"></div>
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:56:1]
56 | <div data-test="&amp;#36"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/recovery/text/entity/input.html:56:1]
56 | <div data-test="&amp;#36"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/recovery/text/entity/input.html:56:1]
56 | <div data-test="&amp;#36"></div>
: ^^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:56:1]
56 | <div data-test="&amp;#36"></div>
: ^
57 | <div data-test="&amp;#x36"></div>
`----
x Text
,-[$DIR/tests/recovery/text/entity/input.html:56:1]
56 | <div data-test="&amp;#36"></div>
: ^
57 | <div data-test="&amp;#x36"></div>
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:57:1]
57 | <div data-test="&amp;#x36"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/recovery/text/entity/input.html:57:1]
57 | <div data-test="&amp;#x36"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/recovery/text/entity/input.html:57:1]
57 | <div data-test="&amp;#x36"></div>
: ^^^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:57:1]
57 | <div data-test="&amp;#x36"></div>
: ^
58 | <div data-test="&#x36"></div>
`----
x Text
,-[$DIR/tests/recovery/text/entity/input.html:57:1]
57 | <div data-test="&amp;#x36"></div>
: ^
58 | <div data-test="&#x36"></div>
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:58:1]
58 | <div data-test="&#x36"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/recovery/text/entity/input.html:58:1]
58 | <div data-test="&#x36"></div>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/recovery/text/entity/input.html:58:1]
58 | <div data-test="&#x36"></div>
: ^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/recovery/text/entity/input.html:58:1]
58 | ,-> <div data-test="&#x36"></div>
59 | | </body>
60 | | </html>
61 | `->
`----
x Text
,-[$DIR/tests/recovery/text/entity/input.html:58:1]
58 | ,-> <div data-test="&#x36"></div>
59 | | </body>
60 | | </html>
61 | `->
`----

View File

@ -0,0 +1,22 @@
[package]
authors = [
"강동윤 <kdy1997.dev@gmail.com>",
"Alexander Akait <sheo13666q@gmail.com>",
]
description = "Utils for HTML"
documentation = "https://rustdoc.swc.rs/swc_html_utils/"
edition = "2021"
include = ["Cargo.toml", "src/**/*.rs", "src/**/*.json"]
license = "Apache-2.0"
name = "swc_html_utils"
repository = "https://github.com/swc-project/swc.git"
version = "0.1.0"
[lib]
bench = false
[dependencies]
once_cell = "1.10.0"
serde = { version = "1.0.118", features = ["derive"] }
serde_json = "1.0.61"
swc_common = { version = "0.18.0", path = "../swc_common" }

View File

@ -0,0 +1,15 @@
use once_cell::sync::Lazy;
use serde::{Deserialize, Serialize};
use swc_common::collections::AHashMap;
#[derive(Serialize, Deserialize, Debug)]
pub struct Entity {
pub characters: String,
}
pub static HTML_ENTITIES: Lazy<AHashMap<String, Entity>> = Lazy::new(|| {
let entities: AHashMap<String, Entity> = serde_json::from_str(include_str!("./entities.json"))
.expect("failed to parse entities.json for html entities");
entities
});