perf(html/parser): Improve performance (#4881)

This commit is contained in:
Alexander Akait 2022-06-05 23:16:27 +03:00 committed by GitHub
parent eb0acc3859
commit 5abe847c23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 539 additions and 44 deletions

View File

@ -200,7 +200,7 @@ where
attribute_start_position: None,
character_reference_code: None,
// Do this without a new allocation.
temporary_buffer: String::with_capacity(8),
temporary_buffer: String::with_capacity(33),
is_adjusted_current_node_is_element_in_html_namespace: None,
doctype_keyword: None,
};
@ -4010,40 +4010,41 @@ where
// The shortest entity - `&GT`
// The longest entity - `&CounterClockwiseContourIntegral;`
let initial_cur_pos = self.input.cur_pos();
let initial_buffer = self.temporary_buffer.clone();
let mut entity: Option<&Entity> = None;
let mut entity_cur_pos: Option<BytePos> = None;
let mut entity_temporary_buffer = None;
let mut entity_temporary_buffer =
String::with_capacity(self.temporary_buffer.capacity());
entity_temporary_buffer.push_str(&self.temporary_buffer);
// No need to validate input, because we reset position if nothing was found
while let Some(c) = &self.consume_next_char() {
self.temporary_buffer.push(*c);
entity_temporary_buffer.push(*c);
let found_entity = HTML_ENTITIES.get(&self.temporary_buffer);
if let Some(found_entity) = found_entity {
if let Some(found_entity) = HTML_ENTITIES.get(&entity_temporary_buffer) {
entity = Some(found_entity);
entity_cur_pos = Some(self.input.cur_pos());
entity_temporary_buffer = Some(self.temporary_buffer.clone());
}
// We stop when:
//
// - not ascii alphanumeric
// - we consume more characters than the longest entity
if !c.is_ascii_alphanumeric() || self.temporary_buffer.len() > 32 {
break;
self.temporary_buffer
.replace_range(1.., &entity_temporary_buffer[1..]);
} else {
// We stop when:
//
// - not ascii alphanumeric
// - we consume more characters than the longest entity
if !c.is_ascii_alphanumeric() || self.temporary_buffer.len() > 32 {
break;
}
}
}
if entity.is_some() {
self.cur_pos = entity_cur_pos.unwrap();
self.input.reset_to(entity_cur_pos.unwrap());
self.temporary_buffer = entity_temporary_buffer.unwrap();
} else {
self.cur_pos = initial_cur_pos;
self.input.reset_to(initial_cur_pos);
self.temporary_buffer = initial_buffer;
}
let is_last_semicolon = self.temporary_buffer.ends_with(';');

View File

@ -193,6 +193,32 @@
| "I'm ∉ I tell you"
| "
"
| <div>
| "⋹̸"
| "
"
| <a>
| href="test⋹̸test"
| "test"
| "
"
| <a>
| href="test&notinEtest"
| "test"
| "
"
| <a>
| href="test&notinEtest"
| "test"
| "
"
| <a>
| href="test&"
| "test"
| "

View File

@ -59,6 +59,13 @@
<div>I'm &notin; I tell you</div>
<div>&notinE;</div>
<a href="test&notinE;test">test</a>
<a href="test&notinEtest">test</a>
<a href="test&notinEtest">test</a>
<a href="test&amp;">test</a>
</body>
</html>

View File

@ -2,7 +2,7 @@
"type": "Document",
"span": {
"start": 1,
"end": 1537,
"end": 1694,
"ctxt": 0
},
"mode": "no-quirks",
@ -22,7 +22,7 @@
"type": "Element",
"span": {
"start": 17,
"end": 1537,
"end": 1694,
"ctxt": 0
},
"tagName": "html",
@ -46,7 +46,7 @@
"type": "Element",
"span": {
"start": 24,
"end": 1537,
"end": 1694,
"ctxt": 0
},
"tagName": "body",
@ -1471,7 +1471,219 @@
"type": "Text",
"span": {
"start": 1518,
"end": 1537,
"end": 1520,
"ctxt": 0
},
"value": "\n\n"
},
{
"type": "Element",
"span": {
"start": 1520,
"end": 1539,
"ctxt": 0
},
"tagName": "div",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [],
"children": [
{
"type": "Text",
"span": {
"start": 1525,
"end": 1533,
"ctxt": 0
},
"value": "⋹̸"
}
],
"content": null
},
{
"type": "Text",
"span": {
"start": 1539,
"end": 1541,
"ctxt": 0
},
"value": "\n\n"
},
{
"type": "Element",
"span": {
"start": 1541,
"end": 1576,
"ctxt": 0
},
"tagName": "a",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1544,
"end": 1567,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "href",
"value": "test⋹̸test"
}
],
"children": [
{
"type": "Text",
"span": {
"start": 1568,
"end": 1572,
"ctxt": 0
},
"value": "test"
}
],
"content": null
},
{
"type": "Text",
"span": {
"start": 1576,
"end": 1577,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 1577,
"end": 1611,
"ctxt": 0
},
"tagName": "a",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1580,
"end": 1602,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "href",
"value": "test&notinEtest"
}
],
"children": [
{
"type": "Text",
"span": {
"start": 1603,
"end": 1607,
"ctxt": 0
},
"value": "test"
}
],
"content": null
},
{
"type": "Text",
"span": {
"start": 1611,
"end": 1612,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 1612,
"end": 1646,
"ctxt": 0
},
"tagName": "a",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1615,
"end": 1637,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "href",
"value": "test&notinEtest"
}
],
"children": [
{
"type": "Text",
"span": {
"start": 1638,
"end": 1642,
"ctxt": 0
},
"value": "test"
}
],
"content": null
},
{
"type": "Text",
"span": {
"start": 1646,
"end": 1647,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 1647,
"end": 1675,
"ctxt": 0
},
"tagName": "a",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 1650,
"end": 1666,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "href",
"value": "test&"
}
],
"children": [
{
"type": "Text",
"span": {
"start": 1667,
"end": 1671,
"ctxt": 0
},
"value": "test"
}
],
"content": null
},
{
"type": "Text",
"span": {
"start": 1675,
"end": 1694,
"ctxt": 0
},
"value": "\n\n\n\n\n"

View File

@ -62,9 +62,16 @@
59 | |
60 | | <div>I'm &notin; I tell you</div>
61 | |
62 | | </body>
63 | | </html>
64 | `->
62 | | <div>&notinE;</div>
63 | |
64 | | <a href="test&notinE;test">test</a>
65 | | <a href="test&notinEtest">test</a>
66 | | <a href="test&notinEtest">test</a>
67 | | <a href="test&amp;">test</a>
68 | |
69 | | </body>
70 | | </html>
71 | `->
`----
x Child
@ -141,9 +148,16 @@
59 | |
60 | | <div>I'm &notin; I tell you</div>
61 | |
62 | | </body>
63 | | </html>
64 | `->
62 | | <div>&notinE;</div>
63 | |
64 | | <a href="test&notinE;test">test</a>
65 | | <a href="test&notinEtest">test</a>
66 | | <a href="test&notinEtest">test</a>
67 | | <a href="test&amp;">test</a>
68 | |
69 | | </body>
70 | | </html>
71 | `->
`----
x Element
@ -208,9 +222,16 @@
59 | |
60 | | <div>I'm &notin; I tell you</div>
61 | |
62 | | </body>
63 | | </html>
64 | `->
62 | | <div>&notinE;</div>
63 | |
64 | | <a href="test&notinE;test">test</a>
65 | | <a href="test&notinEtest">test</a>
66 | | <a href="test&notinEtest">test</a>
67 | | <a href="test&amp;">test</a>
68 | |
69 | | </body>
70 | | </html>
71 | `->
`----
x Child
@ -278,9 +299,16 @@
59 | |
60 | | <div>I'm &notin; I tell you</div>
61 | |
62 | | </body>
63 | | </html>
64 | `->
62 | | <div>&notinE;</div>
63 | |
64 | | <a href="test&notinE;test">test</a>
65 | | <a href="test&notinEtest">test</a>
66 | | <a href="test&notinEtest">test</a>
67 | | <a href="test&amp;">test</a>
68 | |
69 | | </body>
70 | | </html>
71 | `->
`----
x Element
@ -344,9 +372,16 @@
59 | |
60 | | <div>I'm &notin; I tell you</div>
61 | |
62 | | </body>
63 | | </html>
64 | `->
62 | | <div>&notinE;</div>
63 | |
64 | | <a href="test&notinE;test">test</a>
65 | | <a href="test&notinEtest">test</a>
66 | | <a href="test&notinEtest">test</a>
67 | | <a href="test&amp;">test</a>
68 | |
69 | | </body>
70 | | </html>
71 | `->
`----
x Child
@ -2008,17 +2043,231 @@
x Child
,-[$DIR/tests/fixture/text/entity/input.html:60:1]
60 | ,-> <div>I'm &notin; I tell you</div>
61 | |
62 | | </body>
63 | | </html>
64 | `->
61 | `->
62 | <div>&notinE;</div>
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:60:1]
60 | ,-> <div>I'm &notin; I tell you</div>
61 | |
62 | | </body>
63 | | </html>
64 | `->
61 | `->
62 | <div>&notinE;</div>
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:62:1]
62 | <div>&notinE;</div>
: ^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/fixture/text/entity/input.html:62:1]
62 | <div>&notinE;</div>
: ^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:62:1]
62 | <div>&notinE;</div>
: ^^^^^^^^
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:62:1]
62 | <div>&notinE;</div>
: ^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:62:1]
62 | ,-> <div>&notinE;</div>
63 | `->
64 | <a href="test&notinE;test">test</a>
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:62:1]
62 | ,-> <div>&notinE;</div>
63 | `->
64 | <a href="test&notinE;test">test</a>
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:64:1]
64 | <a href="test&notinE;test">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/fixture/text/entity/input.html:64:1]
64 | <a href="test&notinE;test">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/fixture/text/entity/input.html:64:1]
64 | <a href="test&notinE;test">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:64:1]
64 | <a href="test&notinE;test">test</a>
: ^^^^
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:64:1]
64 | <a href="test&notinE;test">test</a>
: ^^^^
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:64:1]
64 | <a href="test&notinE;test">test</a>
: ^
65 | <a href="test&notinEtest">test</a>
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:64:1]
64 | <a href="test&notinE;test">test</a>
: ^
65 | <a href="test&notinEtest">test</a>
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:65:1]
65 | <a href="test&notinEtest">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/fixture/text/entity/input.html:65:1]
65 | <a href="test&notinEtest">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/fixture/text/entity/input.html:65:1]
65 | <a href="test&notinEtest">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:65:1]
65 | <a href="test&notinEtest">test</a>
: ^^^^
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:65:1]
65 | <a href="test&notinEtest">test</a>
: ^^^^
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:65:1]
65 | <a href="test&notinEtest">test</a>
: ^
66 | <a href="test&notinEtest">test</a>
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:65:1]
65 | <a href="test&notinEtest">test</a>
: ^
66 | <a href="test&notinEtest">test</a>
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:66:1]
66 | <a href="test&notinEtest">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/fixture/text/entity/input.html:66:1]
66 | <a href="test&notinEtest">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/fixture/text/entity/input.html:66:1]
66 | <a href="test&notinEtest">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:66:1]
66 | <a href="test&notinEtest">test</a>
: ^^^^
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:66:1]
66 | <a href="test&notinEtest">test</a>
: ^^^^
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:66:1]
66 | <a href="test&notinEtest">test</a>
: ^
67 | <a href="test&amp;">test</a>
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:66:1]
66 | <a href="test&notinEtest">test</a>
: ^
67 | <a href="test&amp;">test</a>
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:67:1]
67 | <a href="test&amp;">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/fixture/text/entity/input.html:67:1]
67 | <a href="test&amp;">test</a>
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/fixture/text/entity/input.html:67:1]
67 | <a href="test&amp;">test</a>
: ^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:67:1]
67 | <a href="test&amp;">test</a>
: ^^^^
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:67:1]
67 | <a href="test&amp;">test</a>
: ^^^^
`----
x Child
,-[$DIR/tests/fixture/text/entity/input.html:67:1]
67 | ,-> <a href="test&amp;">test</a>
68 | |
69 | | </body>
70 | | </html>
71 | `->
`----
x Text
,-[$DIR/tests/fixture/text/entity/input.html:67:1]
67 | ,-> <a href="test&amp;">test</a>
68 | |
69 | | </body>
70 | | </html>
71 | `->
`----