From 5abe847c2358ee9876140772f16d303c388cbbb5 Mon Sep 17 00:00:00 2001 From: Alexander Akait <4567934+alexander-akait@users.noreply.github.com> Date: Sun, 5 Jun 2022 23:16:27 +0300 Subject: [PATCH] perf(html/parser): Improve performance (#4881) --- crates/swc_html_parser/src/lexer/mod.rs | 35 ++- .../tests/fixture/text/entity/dom.rust-debug | 26 ++ .../tests/fixture/text/entity/input.html | 7 + .../tests/fixture/text/entity/output.json | 220 ++++++++++++- .../tests/fixture/text/entity/span.rust-debug | 295 ++++++++++++++++-- 5 files changed, 539 insertions(+), 44 deletions(-) diff --git a/crates/swc_html_parser/src/lexer/mod.rs b/crates/swc_html_parser/src/lexer/mod.rs index 685f2a59d4e..ab7dd7c6bbc 100644 --- a/crates/swc_html_parser/src/lexer/mod.rs +++ b/crates/swc_html_parser/src/lexer/mod.rs @@ -200,7 +200,7 @@ where attribute_start_position: None, character_reference_code: None, // Do this without a new allocation. - temporary_buffer: String::with_capacity(8), + temporary_buffer: String::with_capacity(33), is_adjusted_current_node_is_element_in_html_namespace: None, doctype_keyword: None, }; @@ -4010,40 +4010,41 @@ where // The shortest entity - `>` // The longest entity - `∳` let initial_cur_pos = self.input.cur_pos(); - let initial_buffer = self.temporary_buffer.clone(); + let mut entity: Option<&Entity> = None; let mut entity_cur_pos: Option = None; - let mut entity_temporary_buffer = None; + let mut entity_temporary_buffer = + String::with_capacity(self.temporary_buffer.capacity()); + + entity_temporary_buffer.push_str(&self.temporary_buffer); // No need to validate input, because we reset position if nothing was found while let Some(c) = &self.consume_next_char() { - self.temporary_buffer.push(*c); + entity_temporary_buffer.push(*c); - let found_entity = HTML_ENTITIES.get(&self.temporary_buffer); - - if let Some(found_entity) = found_entity { + if let Some(found_entity) = HTML_ENTITIES.get(&entity_temporary_buffer) { entity = Some(found_entity); entity_cur_pos = Some(self.input.cur_pos()); - entity_temporary_buffer = Some(self.temporary_buffer.clone()); - } - // We stop when: - // - // - not ascii alphanumeric - // - we consume more characters than the longest entity - if !c.is_ascii_alphanumeric() || self.temporary_buffer.len() > 32 { - break; + self.temporary_buffer + .replace_range(1.., &entity_temporary_buffer[1..]); + } else { + // We stop when: + // + // - not ascii alphanumeric + // - we consume more characters than the longest entity + if !c.is_ascii_alphanumeric() || self.temporary_buffer.len() > 32 { + break; + } } } if entity.is_some() { self.cur_pos = entity_cur_pos.unwrap(); self.input.reset_to(entity_cur_pos.unwrap()); - self.temporary_buffer = entity_temporary_buffer.unwrap(); } else { self.cur_pos = initial_cur_pos; self.input.reset_to(initial_cur_pos); - self.temporary_buffer = initial_buffer; } let is_last_semicolon = self.temporary_buffer.ends_with(';'); diff --git a/crates/swc_html_parser/tests/fixture/text/entity/dom.rust-debug b/crates/swc_html_parser/tests/fixture/text/entity/dom.rust-debug index e1e3de606a4..3b6893910cc 100644 --- a/crates/swc_html_parser/tests/fixture/text/entity/dom.rust-debug +++ b/crates/swc_html_parser/tests/fixture/text/entity/dom.rust-debug @@ -193,6 +193,32 @@ | "I'm ∉ I tell you" | " +" +|
+| "⋹̸" +| " + +" +| +| href="test⋹̸test" +| "test" +| " +" +| +| href="test¬inEtest" +| "test" +| " +" +| +| href="test¬inEtest" +| "test" +| " +" +| +| href="test&" +| "test" +| " + diff --git a/crates/swc_html_parser/tests/fixture/text/entity/input.html b/crates/swc_html_parser/tests/fixture/text/entity/input.html index 108554dc9b0..4c7aeeb4f65 100644 --- a/crates/swc_html_parser/tests/fixture/text/entity/input.html +++ b/crates/swc_html_parser/tests/fixture/text/entity/input.html @@ -59,6 +59,13 @@
I'm ∉ I tell you
+
⋹̸
+ +
test +test +test +test + diff --git a/crates/swc_html_parser/tests/fixture/text/entity/output.json b/crates/swc_html_parser/tests/fixture/text/entity/output.json index 0ef87e04999..5ea2819a4e8 100644 --- a/crates/swc_html_parser/tests/fixture/text/entity/output.json +++ b/crates/swc_html_parser/tests/fixture/text/entity/output.json @@ -2,7 +2,7 @@ "type": "Document", "span": { "start": 1, - "end": 1537, + "end": 1694, "ctxt": 0 }, "mode": "no-quirks", @@ -22,7 +22,7 @@ "type": "Element", "span": { "start": 17, - "end": 1537, + "end": 1694, "ctxt": 0 }, "tagName": "html", @@ -46,7 +46,7 @@ "type": "Element", "span": { "start": 24, - "end": 1537, + "end": 1694, "ctxt": 0 }, "tagName": "body", @@ -1471,7 +1471,219 @@ "type": "Text", "span": { "start": 1518, - "end": 1537, + "end": 1520, + "ctxt": 0 + }, + "value": "\n\n" + }, + { + "type": "Element", + "span": { + "start": 1520, + "end": 1539, + "ctxt": 0 + }, + "tagName": "div", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 1525, + "end": 1533, + "ctxt": 0 + }, + "value": "⋹̸" + } + ], + "content": null + }, + { + "type": "Text", + "span": { + "start": 1539, + "end": 1541, + "ctxt": 0 + }, + "value": "\n\n" + }, + { + "type": "Element", + "span": { + "start": 1541, + "end": 1576, + "ctxt": 0 + }, + "tagName": "a", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [ + { + "type": "Attribute", + "span": { + "start": 1544, + "end": 1567, + "ctxt": 0 + }, + "namespace": null, + "prefix": null, + "name": "href", + "value": "test⋹̸test" + } + ], + "children": [ + { + "type": "Text", + "span": { + "start": 1568, + "end": 1572, + "ctxt": 0 + }, + "value": "test" + } + ], + "content": null + }, + { + "type": "Text", + "span": { + "start": 1576, + "end": 1577, + "ctxt": 0 + }, + "value": "\n" + }, + { + "type": "Element", + "span": { + "start": 1577, + "end": 1611, + "ctxt": 0 + }, + "tagName": "a", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [ + { + "type": "Attribute", + "span": { + "start": 1580, + "end": 1602, + "ctxt": 0 + }, + "namespace": null, + "prefix": null, + "name": "href", + "value": "test¬inEtest" + } + ], + "children": [ + { + "type": "Text", + "span": { + "start": 1603, + "end": 1607, + "ctxt": 0 + }, + "value": "test" + } + ], + "content": null + }, + { + "type": "Text", + "span": { + "start": 1611, + "end": 1612, + "ctxt": 0 + }, + "value": "\n" + }, + { + "type": "Element", + "span": { + "start": 1612, + "end": 1646, + "ctxt": 0 + }, + "tagName": "a", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [ + { + "type": "Attribute", + "span": { + "start": 1615, + "end": 1637, + "ctxt": 0 + }, + "namespace": null, + "prefix": null, + "name": "href", + "value": "test¬inEtest" + } + ], + "children": [ + { + "type": "Text", + "span": { + "start": 1638, + "end": 1642, + "ctxt": 0 + }, + "value": "test" + } + ], + "content": null + }, + { + "type": "Text", + "span": { + "start": 1646, + "end": 1647, + "ctxt": 0 + }, + "value": "\n" + }, + { + "type": "Element", + "span": { + "start": 1647, + "end": 1675, + "ctxt": 0 + }, + "tagName": "a", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [ + { + "type": "Attribute", + "span": { + "start": 1650, + "end": 1666, + "ctxt": 0 + }, + "namespace": null, + "prefix": null, + "name": "href", + "value": "test&" + } + ], + "children": [ + { + "type": "Text", + "span": { + "start": 1667, + "end": 1671, + "ctxt": 0 + }, + "value": "test" + } + ], + "content": null + }, + { + "type": "Text", + "span": { + "start": 1675, + "end": 1694, "ctxt": 0 }, "value": "\n\n\n\n\n" diff --git a/crates/swc_html_parser/tests/fixture/text/entity/span.rust-debug b/crates/swc_html_parser/tests/fixture/text/entity/span.rust-debug index a1130dbf2fb..057a6cde633 100644 --- a/crates/swc_html_parser/tests/fixture/text/entity/span.rust-debug +++ b/crates/swc_html_parser/tests/fixture/text/entity/span.rust-debug @@ -62,9 +62,16 @@ 59 | | 60 | |
I'm ∉ I tell you
61 | | - 62 | | - 63 | | - 64 | `-> + 62 | |
⋹̸
+ 63 | | + 64 | | test + 65 | | test + 66 | | test + 67 | | test + 68 | | + 69 | | + 70 | | + 71 | `-> `---- x Child @@ -141,9 +148,16 @@ 59 | | 60 | |
I'm ∉ I tell you
61 | | - 62 | | - 63 | | - 64 | `-> + 62 | |
⋹̸
+ 63 | | + 64 | | test + 65 | | test + 66 | | test + 67 | | test + 68 | | + 69 | | + 70 | | + 71 | `-> `---- x Element @@ -208,9 +222,16 @@ 59 | | 60 | |
I'm ∉ I tell you
61 | | - 62 | | - 63 | | - 64 | `-> + 62 | |
⋹̸
+ 63 | | + 64 | | test + 65 | | test + 66 | | test + 67 | | test + 68 | | + 69 | | + 70 | | + 71 | `-> `---- x Child @@ -278,9 +299,16 @@ 59 | | 60 | |
I'm ∉ I tell you
61 | | - 62 | | - 63 | | - 64 | `-> + 62 | |
⋹̸
+ 63 | | + 64 | | test + 65 | | test + 66 | | test + 67 | | test + 68 | | + 69 | | + 70 | | + 71 | `-> `---- x Element @@ -344,9 +372,16 @@ 59 | | 60 | |
I'm ∉ I tell you
61 | | - 62 | | - 63 | | - 64 | `-> + 62 | |
⋹̸
+ 63 | | + 64 | | test + 65 | | test + 66 | | test + 67 | | test + 68 | | + 69 | | + 70 | | + 71 | `-> `---- x Child @@ -2008,17 +2043,231 @@ x Child ,-[$DIR/tests/fixture/text/entity/input.html:60:1] 60 | ,->
I'm ∉ I tell you
- 61 | | - 62 | | - 63 | | - 64 | `-> + 61 | `-> + 62 |
⋹̸
`---- x Text ,-[$DIR/tests/fixture/text/entity/input.html:60:1] 60 | ,->
I'm ∉ I tell you
- 61 | | - 62 | | - 63 | | - 64 | `-> + 61 | `-> + 62 |
⋹̸
+ `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:62:1] + 62 |
⋹̸
+ : ^^^^^^^^^^^^^^^^^^^ + `---- + + x Element + ,-[$DIR/tests/fixture/text/entity/input.html:62:1] + 62 |
⋹̸
+ : ^^^^^^^^^^^^^^^^^^^ + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:62:1] + 62 |
⋹̸
+ : ^^^^^^^^ + `---- + + x Text + ,-[$DIR/tests/fixture/text/entity/input.html:62:1] + 62 |
⋹̸
+ : ^^^^^^^^ + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:62:1] + 62 | ,->
⋹̸
+ 63 | `-> + 64 | test + `---- + + x Text + ,-[$DIR/tests/fixture/text/entity/input.html:62:1] + 62 | ,->
⋹̸
+ 63 | `-> + 64 | test + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:64:1] + 64 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Element + ,-[$DIR/tests/fixture/text/entity/input.html:64:1] + 64 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Attribute + ,-[$DIR/tests/fixture/text/entity/input.html:64:1] + 64 | test + : ^^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:64:1] + 64 | test + : ^^^^ + `---- + + x Text + ,-[$DIR/tests/fixture/text/entity/input.html:64:1] + 64 | test + : ^^^^ + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:64:1] + 64 | test + : ^ + 65 | test + `---- + + x Text + ,-[$DIR/tests/fixture/text/entity/input.html:64:1] + 64 | test + : ^ + 65 | test + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:65:1] + 65 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Element + ,-[$DIR/tests/fixture/text/entity/input.html:65:1] + 65 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Attribute + ,-[$DIR/tests/fixture/text/entity/input.html:65:1] + 65 | test + : ^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:65:1] + 65 | test + : ^^^^ + `---- + + x Text + ,-[$DIR/tests/fixture/text/entity/input.html:65:1] + 65 | test + : ^^^^ + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:65:1] + 65 | test + : ^ + 66 | test + `---- + + x Text + ,-[$DIR/tests/fixture/text/entity/input.html:65:1] + 65 | test + : ^ + 66 | test + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:66:1] + 66 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Element + ,-[$DIR/tests/fixture/text/entity/input.html:66:1] + 66 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Attribute + ,-[$DIR/tests/fixture/text/entity/input.html:66:1] + 66 | test + : ^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:66:1] + 66 | test + : ^^^^ + `---- + + x Text + ,-[$DIR/tests/fixture/text/entity/input.html:66:1] + 66 | test + : ^^^^ + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:66:1] + 66 | test + : ^ + 67 | test + `---- + + x Text + ,-[$DIR/tests/fixture/text/entity/input.html:66:1] + 66 | test + : ^ + 67 | test + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:67:1] + 67 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Element + ,-[$DIR/tests/fixture/text/entity/input.html:67:1] + 67 | test + : ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + `---- + + x Attribute + ,-[$DIR/tests/fixture/text/entity/input.html:67:1] + 67 | test + : ^^^^^^^^^^^^^^^^ + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:67:1] + 67 | test + : ^^^^ + `---- + + x Text + ,-[$DIR/tests/fixture/text/entity/input.html:67:1] + 67 | test + : ^^^^ + `---- + + x Child + ,-[$DIR/tests/fixture/text/entity/input.html:67:1] + 67 | ,-> test + 68 | | + 69 | | + 70 | | + 71 | `-> + `---- + + x Text + ,-[$DIR/tests/fixture/text/entity/input.html:67:1] + 67 | ,-> test + 68 | | + 69 | | + 70 | | + 71 | `-> `----