fix(html/parser): Handle BOM (#4843)

This commit is contained in:
Alexander Akait 2022-05-30 09:20:02 +03:00 committed by GitHub
parent c523832c80
commit 20fee8abee
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 676 additions and 1 deletions

View File

@ -186,7 +186,7 @@ where
pub fn new(input: I) -> Self { pub fn new(input: I) -> Self {
let start_pos = input.last_pos(); let start_pos = input.last_pos();
Lexer { let mut lexer = Lexer {
input, input,
cur: None, cur: None,
cur_pos: start_pos, cur_pos: start_pos,
@ -208,7 +208,15 @@ where
is_adjusted_current_node_is_element_in_html_namespace: None, is_adjusted_current_node_is_element_in_html_namespace: None,
doctype_keyword: None, doctype_keyword: None,
last_emitted_error_pos: None, last_emitted_error_pos: None,
};
// A leading Byte Order Mark (BOM) causes the character encoding argument to be
// ignored and will itself be skipped.
if lexer.input.is_at_start() && lexer.input.cur() == Some('\u{feff}') {
lexer.input.bump();
} }
lexer
} }
} }

View File

@ -0,0 +1,34 @@
| <!DOCTYPE html>
| <html>
| lang="en"
| <head>
| "
"
| <meta>
| charset="UTF-8"
| "
"
| <meta>
| content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0"
| name="viewport"
| "
"
| <meta>
| content="ie=edge"
| http-equiv="X-UA-Compatible"
| "
"
| <title>
| "Document"
| "
"
| "
"
| <body>
| "
"
| <div>
| "Test"
| "
"

View File

@ -0,0 +1,13 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport"
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<title>Document</title>
</head>
<body>
<div>Test</div>
</body>
</html>

View File

@ -0,0 +1,297 @@
{
"type": "Document",
"span": {
"start": 1,
"end": 341,
"ctxt": 0
},
"mode": "no-quirks",
"children": [
{
"type": "DocumentType",
"span": {
"start": 1,
"end": 16,
"ctxt": 0
},
"name": "html",
"publicId": null,
"systemId": null
},
{
"type": "Element",
"span": {
"start": 17,
"end": 334,
"ctxt": 0
},
"tagName": "html",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 23,
"end": 32,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "lang",
"value": "en"
}
],
"children": [
{
"type": "Element",
"span": {
"start": 34,
"end": 295,
"ctxt": 0
},
"tagName": "head",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [],
"children": [
{
"type": "Text",
"span": {
"start": 40,
"end": 45,
"ctxt": 0
},
"value": "\n "
},
{
"type": "Element",
"span": {
"start": 45,
"end": 67,
"ctxt": 0
},
"tagName": "meta",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 51,
"end": 66,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "charset",
"value": "UTF-8"
}
],
"children": [],
"content": null
},
{
"type": "Text",
"span": {
"start": 67,
"end": 72,
"ctxt": 0
},
"value": "\n "
},
{
"type": "Element",
"span": {
"start": 72,
"end": 208,
"ctxt": 0
},
"tagName": "meta",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 78,
"end": 93,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "name",
"value": "viewport"
},
{
"type": "Attribute",
"span": {
"start": 104,
"end": 207,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "content",
"value": "width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0"
}
],
"children": [],
"content": null
},
{
"type": "Text",
"span": {
"start": 208,
"end": 213,
"ctxt": 0
},
"value": "\n "
},
{
"type": "Element",
"span": {
"start": 213,
"end": 266,
"ctxt": 0
},
"tagName": "meta",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [
{
"type": "Attribute",
"span": {
"start": 219,
"end": 247,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "http-equiv",
"value": "X-UA-Compatible"
},
{
"type": "Attribute",
"span": {
"start": 248,
"end": 265,
"ctxt": 0
},
"namespace": null,
"prefix": null,
"name": "content",
"value": "ie=edge"
}
],
"children": [],
"content": null
},
{
"type": "Text",
"span": {
"start": 266,
"end": 271,
"ctxt": 0
},
"value": "\n "
},
{
"type": "Element",
"span": {
"start": 271,
"end": 286,
"ctxt": 0
},
"tagName": "title",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [],
"children": [
{
"type": "Text",
"span": {
"start": 278,
"end": 286,
"ctxt": 0
},
"value": "Document"
}
],
"content": null
},
{
"type": "Text",
"span": {
"start": 294,
"end": 295,
"ctxt": 0
},
"value": "\n"
}
],
"content": null
},
{
"type": "Text",
"span": {
"start": 302,
"end": 303,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 303,
"end": 334,
"ctxt": 0
},
"tagName": "body",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [],
"children": [
{
"type": "Text",
"span": {
"start": 309,
"end": 310,
"ctxt": 0
},
"value": "\n"
},
{
"type": "Element",
"span": {
"start": 310,
"end": 319,
"ctxt": 0
},
"tagName": "div",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [],
"children": [
{
"type": "Text",
"span": {
"start": 315,
"end": 319,
"ctxt": 0
},
"value": "Test"
}
],
"content": null
},
{
"type": "Text",
"span": {
"start": 325,
"end": 334,
"ctxt": 0
},
"value": "\n\n"
}
],
"content": null
}
],
"content": null
}
]
}

View File

@ -0,0 +1,323 @@
x Document
,-[$DIR/tests/fixture/text/bom/input.html:1:1]
1 | ,-> <!doctype html>
2 | | <html lang="en">
3 | | <head>
4 | | <meta charset="UTF-8">
5 | | <meta name="viewport"
6 | | content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
7 | | <meta http-equiv="X-UA-Compatible" content="ie=edge">
8 | | <title>Document</title>
9 | | </head>
10 | | <body>
11 | | <div>Test</div>
12 | | </body>
13 | `-> </html>
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:1:1]
1 | <!doctype html>
: ^^^^^^^^^^^^^^^
`----
x DocumentType
,-[$DIR/tests/fixture/text/bom/input.html:1:1]
1 | <!doctype html>
: ^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:2:1]
2 | ,-> <html lang="en">
3 | | <head>
4 | | <meta charset="UTF-8">
5 | | <meta name="viewport"
6 | | content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
7 | | <meta http-equiv="X-UA-Compatible" content="ie=edge">
8 | | <title>Document</title>
9 | | </head>
10 | | <body>
11 | | <div>Test</div>
12 | `-> </body>
13 | </html>
`----
x Element
,-[$DIR/tests/fixture/text/bom/input.html:2:1]
2 | ,-> <html lang="en">
3 | | <head>
4 | | <meta charset="UTF-8">
5 | | <meta name="viewport"
6 | | content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
7 | | <meta http-equiv="X-UA-Compatible" content="ie=edge">
8 | | <title>Document</title>
9 | | </head>
10 | | <body>
11 | | <div>Test</div>
12 | `-> </body>
13 | </html>
`----
x Attribute
,-[$DIR/tests/fixture/text/bom/input.html:2:1]
2 | <html lang="en">
: ^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:3:1]
3 | ,-> <head>
4 | | <meta charset="UTF-8">
5 | | <meta name="viewport"
6 | | content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
7 | | <meta http-equiv="X-UA-Compatible" content="ie=edge">
8 | `-> <title>Document</title>
9 | </head>
`----
x Element
,-[$DIR/tests/fixture/text/bom/input.html:3:1]
3 | ,-> <head>
4 | | <meta charset="UTF-8">
5 | | <meta name="viewport"
6 | | content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
7 | | <meta http-equiv="X-UA-Compatible" content="ie=edge">
8 | `-> <title>Document</title>
9 | </head>
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:3:1]
3 | ,-> <head>
4 | `-> <meta charset="UTF-8">
`----
x Text
,-[$DIR/tests/fixture/text/bom/input.html:3:1]
3 | ,-> <head>
4 | `-> <meta charset="UTF-8">
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:4:5]
4 | <meta charset="UTF-8">
: ^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/fixture/text/bom/input.html:4:5]
4 | <meta charset="UTF-8">
: ^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/fixture/text/bom/input.html:4:5]
4 | <meta charset="UTF-8">
: ^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:4:5]
4 | ,-> <meta charset="UTF-8">
5 | `-> <meta name="viewport"
`----
x Text
,-[$DIR/tests/fixture/text/bom/input.html:4:5]
4 | ,-> <meta charset="UTF-8">
5 | `-> <meta name="viewport"
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:5:5]
5 | ,-> <meta name="viewport"
6 | `-> content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
`----
x Element
,-[$DIR/tests/fixture/text/bom/input.html:5:5]
5 | ,-> <meta name="viewport"
6 | `-> content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
`----
x Attribute
,-[$DIR/tests/fixture/text/bom/input.html:5:5]
5 | <meta name="viewport"
: ^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/fixture/text/bom/input.html:6:11]
6 | content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:6:11]
6 | ,-> content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
7 | `-> <meta http-equiv="X-UA-Compatible" content="ie=edge">
`----
x Text
,-[$DIR/tests/fixture/text/bom/input.html:6:11]
6 | ,-> content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
7 | `-> <meta http-equiv="X-UA-Compatible" content="ie=edge">
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:7:5]
7 | <meta http-equiv="X-UA-Compatible" content="ie=edge">
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/fixture/text/bom/input.html:7:5]
7 | <meta http-equiv="X-UA-Compatible" content="ie=edge">
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/fixture/text/bom/input.html:7:5]
7 | <meta http-equiv="X-UA-Compatible" content="ie=edge">
: ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
`----
x Attribute
,-[$DIR/tests/fixture/text/bom/input.html:7:5]
7 | <meta http-equiv="X-UA-Compatible" content="ie=edge">
: ^^^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:7:5]
7 | ,-> <meta http-equiv="X-UA-Compatible" content="ie=edge">
8 | `-> <title>Document</title>
`----
x Text
,-[$DIR/tests/fixture/text/bom/input.html:7:5]
7 | ,-> <meta http-equiv="X-UA-Compatible" content="ie=edge">
8 | `-> <title>Document</title>
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:8:5]
8 | <title>Document</title>
: ^^^^^^^^^^^^^^^
`----
x Element
,-[$DIR/tests/fixture/text/bom/input.html:8:5]
8 | <title>Document</title>
: ^^^^^^^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:8:5]
8 | <title>Document</title>
: ^^^^^^^^
`----
x Text
,-[$DIR/tests/fixture/text/bom/input.html:8:5]
8 | <title>Document</title>
: ^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:8:5]
8 | <title>Document</title>
: ^
9 | </head>
`----
x Text
,-[$DIR/tests/fixture/text/bom/input.html:8:5]
8 | <title>Document</title>
: ^
9 | </head>
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:9:1]
9 | </head>
: ^
10 | <body>
`----
x Text
,-[$DIR/tests/fixture/text/bom/input.html:9:1]
9 | </head>
: ^
10 | <body>
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:10:1]
10 | ,-> <body>
11 | | <div>Test</div>
12 | `-> </body>
13 | </html>
`----
x Element
,-[$DIR/tests/fixture/text/bom/input.html:10:1]
10 | ,-> <body>
11 | | <div>Test</div>
12 | `-> </body>
13 | </html>
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:10:1]
10 | <body>
: ^
11 | <div>Test</div>
`----
x Text
,-[$DIR/tests/fixture/text/bom/input.html:10:1]
10 | <body>
: ^
11 | <div>Test</div>
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:11:1]
11 | <div>Test</div>
: ^^^^^^^^^
`----
x Element
,-[$DIR/tests/fixture/text/bom/input.html:11:1]
11 | <div>Test</div>
: ^^^^^^^^^
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:11:1]
11 | <div>Test</div>
: ^^^^
`----
x Text
,-[$DIR/tests/fixture/text/bom/input.html:11:1]
11 | <div>Test</div>
: ^^^^
`----
x Child
,-[$DIR/tests/fixture/text/bom/input.html:11:1]
11 | ,-> <div>Test</div>
12 | `-> </body>
13 | </html>
`----
x Text
,-[$DIR/tests/fixture/text/bom/input.html:11:1]
11 | ,-> <div>Test</div>
12 | `-> </body>
13 | </html>
`----