From e3cbe7e9a98149bfc05421a264d4e421fb543f7e Mon Sep 17 00:00:00 2001 From: Alexander Akait <4567934+alexander-akait@users.noreply.github.com> Date: Tue, 29 Nov 2022 05:39:48 +0300 Subject: [PATCH] fix(html/parser): Fix parsing of cdata (#6534) --- .../tests/fixture/cdata/input.html | 11 + .../tests/fixture/cdata/output.html | 11 + .../tests/fixture/cdata/output.min.html | 8 + crates/swc_html_parser/src/lexer/mod.rs | 60 +- crates/swc_html_parser/src/parser/mod.rs | 12 +- .../fixture/text/cdata-svg/dom.rust-debug | 112 ++ .../tests/fixture/text/cdata-svg/input.html | 32 + .../tests/fixture/text/cdata-svg/output.json | 1046 +++++++++++++ .../fixture/text/cdata-svg/span.rust-debug | 1364 +++++++++++++++++ .../recovery/comment/cdata-1/output.stderr | 4 +- .../tests/recovery/text/cdata/dom.rust-debug | 80 + .../tests/recovery/text/cdata/input.html | 24 + .../tests/recovery/text/cdata/output.json | 707 +++++++++ .../tests/recovery/text/cdata/output.stderr | 112 ++ .../tests/recovery/text/cdata/span.rust-debug | 1048 +++++++++++++ 15 files changed, 4592 insertions(+), 39 deletions(-) create mode 100644 crates/swc_html_codegen/tests/fixture/cdata/input.html create mode 100644 crates/swc_html_codegen/tests/fixture/cdata/output.html create mode 100644 crates/swc_html_codegen/tests/fixture/cdata/output.min.html create mode 100644 crates/swc_html_parser/tests/fixture/text/cdata-svg/dom.rust-debug create mode 100644 crates/swc_html_parser/tests/fixture/text/cdata-svg/input.html create mode 100644 crates/swc_html_parser/tests/fixture/text/cdata-svg/output.json create mode 100644 crates/swc_html_parser/tests/fixture/text/cdata-svg/span.rust-debug create mode 100644 crates/swc_html_parser/tests/recovery/text/cdata/dom.rust-debug create mode 100644 crates/swc_html_parser/tests/recovery/text/cdata/input.html create mode 100644 crates/swc_html_parser/tests/recovery/text/cdata/output.json create mode 100644 crates/swc_html_parser/tests/recovery/text/cdata/output.stderr create mode 100644 crates/swc_html_parser/tests/recovery/text/cdata/span.rust-debug diff --git a/crates/swc_html_codegen/tests/fixture/cdata/input.html b/crates/swc_html_codegen/tests/fixture/cdata/input.html new file mode 100644 index 00000000000..90d38e9e0d1 --- /dev/null +++ b/crates/swc_html_codegen/tests/fixture/cdata/input.html @@ -0,0 +1,11 @@ + + +
+| +| " +" +|
+| +| "text]]>" +| " +" +|
+| +| "]]>" +| " +" +|
+| +| +| " +" +|
+| " + " +| +| " +" +| " +" +|
+| +| " +" +| +| viewBox="0 0 100 100" +| " + " +| +| height="100px" +| width="100px" +| +| "a" +| " + " +| +| +| " + " +| +| +| " +" +| " + +" diff --git a/crates/swc_html_parser/tests/recovery/text/cdata/input.html b/crates/swc_html_parser/tests/recovery/text/cdata/input.html new file mode 100644 index 00000000000..25422d4fb2a --- /dev/null +++ b/crates/swc_html_parser/tests/recovery/text/cdata/input.html @@ -0,0 +1,24 @@ + + + + Document + + + + + + +text]]> +]]> + + + + + + + a + + + + + \ No newline at end of file diff --git a/crates/swc_html_parser/tests/recovery/text/cdata/output.json b/crates/swc_html_parser/tests/recovery/text/cdata/output.json new file mode 100644 index 00000000000..c330cfa4297 --- /dev/null +++ b/crates/swc_html_parser/tests/recovery/text/cdata/output.json @@ -0,0 +1,707 @@ +{ + "type": "Document", + "span": { + "start": 1, + "end": 620, + "ctxt": 0 + }, + "mode": "no-quirks", + "children": [ + { + "type": "DocumentType", + "span": { + "start": 1, + "end": 16, + "ctxt": 0 + }, + "name": "html", + "publicId": null, + "systemId": null, + "raw": "" + }, + { + "type": "Element", + "span": { + "start": 17, + "end": 620, + "ctxt": 0 + }, + "tagName": "html", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [ + { + "type": "Attribute", + "span": { + "start": 23, + "end": 32, + "ctxt": 0 + }, + "namespace": null, + "prefix": null, + "name": "lang", + "rawName": "lang", + "value": "en", + "rawValue": "\"en\"" + } + ], + "children": [ + { + "type": "Element", + "span": { + "start": 34, + "end": 76, + "ctxt": 0 + }, + "tagName": "head", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 40, + "end": 45, + "ctxt": 0 + }, + "data": "\n ", + "raw": "\n " + }, + { + "type": "Element", + "span": { + "start": 45, + "end": 68, + "ctxt": 0 + }, + "tagName": "title", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 52, + "end": 60, + "ctxt": 0 + }, + "data": "Document", + "raw": "Document" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 68, + "end": 69, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 76, + "end": 77, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 77, + "end": 613, + "ctxt": 0 + }, + "tagName": "body", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 83, + "end": 84, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 84, + "end": 110, + "ctxt": 0 + }, + "tagName": "p", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Comment", + "span": { + "start": 87, + "end": 106, + "ctxt": 0 + }, + "data": "[CDATA[content]]", + "raw": "" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 110, + "end": 111, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 111, + "end": 137, + "ctxt": 0 + }, + "tagName": "p", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Comment", + "span": { + "start": 114, + "end": 133, + "ctxt": 0 + }, + "data": "[CDATA[&ing]]", + "raw": "" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 137, + "end": 138, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 138, + "end": 166, + "ctxt": 0 + }, + "tagName": "p", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Comment", + "span": { + "start": 141, + "end": 162, + "ctxt": 0 + }, + "data": "[CDATA[&ing ]]]", + "raw": "" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 166, + "end": 167, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 167, + "end": 196, + "ctxt": 0 + }, + "tagName": "p", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Comment", + "span": { + "start": 170, + "end": 192, + "ctxt": 0 + }, + "data": "[CDATA[&ing]] ]]", + "raw": "" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 196, + "end": 197, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 197, + "end": 239, + "ctxt": 0 + }, + "tagName": "p", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Comment", + "span": { + "start": 200, + "end": 218, + "ctxt": 0 + }, + "data": "[CDATA[" + }, + { + "type": "Text", + "span": { + "start": 218, + "end": 235, + "ctxt": 0 + }, + "data": "text]]>", + "raw": "text]]>" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 239, + "end": 240, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 240, + "end": 310, + "ctxt": 0 + }, + "tagName": "p", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Comment", + "span": { + "start": 243, + "end": 303, + "ctxt": 0 + }, + "data": "[CDATA[" + }, + { + "type": "Text", + "span": { + "start": 303, + "end": 306, + "ctxt": 0 + }, + "data": "]]>", + "raw": "]]>" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 310, + "end": 311, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 311, + "end": 344, + "ctxt": 0 + }, + "tagName": "p", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Comment", + "span": { + "start": 314, + "end": 327, + "ctxt": 0 + }, + "data": "[CDATA[1]]", + "raw": "" + }, + { + "type": "Comment", + "span": { + "start": 327, + "end": 340, + "ctxt": 0 + }, + "data": "[CDATA[2]]", + "raw": "" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 344, + "end": 345, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 345, + "end": 374, + "ctxt": 0 + }, + "tagName": "p", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Text", + "span": { + "start": 348, + "end": 353, + "ctxt": 0 + }, + "data": "\n ", + "raw": "\n " + }, + { + "type": "Comment", + "span": { + "start": 353, + "end": 369, + "ctxt": 0 + }, + "data": "[CDATA[data]]", + "raw": "" + }, + { + "type": "Text", + "span": { + "start": 369, + "end": 370, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 374, + "end": 375, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 375, + "end": 408, + "ctxt": 0 + }, + "tagName": "p", + "namespace": "http://www.w3.org/1999/xhtml", + "attributes": [], + "children": [ + { + "type": "Comment", + "span": { + "start": 378, + "end": 404, + "ctxt": 0 + }, + "data": "[CDATA[bracket ]after]]", + "raw": "" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 408, + "end": 409, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + }, + { + "type": "Element", + "span": { + "start": 409, + "end": 604, + "ctxt": 0 + }, + "tagName": "svg", + "namespace": "http://www.w3.org/2000/svg", + "attributes": [ + { + "type": "Attribute", + "span": { + "start": 414, + "end": 435, + "ctxt": 0 + }, + "namespace": null, + "prefix": null, + "name": "viewBox", + "rawName": "viewBox", + "value": "0 0 100 100", + "rawValue": "\"0 0 100 100\"" + } + ], + "children": [ + { + "type": "Text", + "span": { + "start": 436, + "end": 441, + "ctxt": 0 + }, + "data": "\n ", + "raw": "\n " + }, + { + "type": "Element", + "span": { + "start": 441, + "end": 521, + "ctxt": 0 + }, + "tagName": "foreignObject", + "namespace": "http://www.w3.org/2000/svg", + "attributes": [ + { + "type": "Attribute", + "span": { + "start": 456, + "end": 469, + "ctxt": 0 + }, + "namespace": null, + "prefix": null, + "name": "width", + "rawName": "width", + "value": "100px", + "rawValue": "\"100px\"" + }, + { + "type": "Attribute", + "span": { + "start": 470, + "end": 484, + "ctxt": 0 + }, + "namespace": null, + "prefix": null, + "name": "height", + "rawName": "height", + "value": "100px", + "rawValue": "\"100px\"" + } + ], + "children": [ + { + "type": "Comment", + "span": { + "start": 485, + "end": 504, + "ctxt": 0 + }, + "data": "[CDATA[content]]", + "raw": "" + }, + { + "type": "Text", + "span": { + "start": 504, + "end": 505, + "ctxt": 0 + }, + "data": "a", + "raw": "a" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 521, + "end": 526, + "ctxt": 0 + }, + "data": "\n ", + "raw": "\n " + }, + { + "type": "Element", + "span": { + "start": 526, + "end": 558, + "ctxt": 0 + }, + "tagName": "desc", + "namespace": "http://www.w3.org/2000/svg", + "attributes": [], + "children": [ + { + "type": "Comment", + "span": { + "start": 532, + "end": 551, + "ctxt": 0 + }, + "data": "[CDATA[content]]", + "raw": "" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 558, + "end": 563, + "ctxt": 0 + }, + "data": "\n ", + "raw": "\n " + }, + { + "type": "Element", + "span": { + "start": 563, + "end": 597, + "ctxt": 0 + }, + "tagName": "title", + "namespace": "http://www.w3.org/2000/svg", + "attributes": [], + "children": [ + { + "type": "Comment", + "span": { + "start": 570, + "end": 589, + "ctxt": 0 + }, + "data": "[CDATA[content]]", + "raw": "" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 597, + "end": 598, + "ctxt": 0 + }, + "data": "\n", + "raw": "\n" + } + ], + "content": null, + "isSelfClosing": false + }, + { + "type": "Text", + "span": { + "start": 604, + "end": 613, + "ctxt": 0 + }, + "data": "\n\n", + "raw": "\n\n" + } + ], + "content": null, + "isSelfClosing": false + } + ], + "content": null, + "isSelfClosing": false + } + ] +} diff --git a/crates/swc_html_parser/tests/recovery/text/cdata/output.stderr b/crates/swc_html_parser/tests/recovery/text/cdata/output.stderr new file mode 100644 index 00000000000..cbd42e5932f --- /dev/null +++ b/crates/swc_html_parser/tests/recovery/text/cdata/output.stderr @@ -0,0 +1,112 @@ + + x Cdata in html content + ,-[$DIR/tests/recovery/text/cdata/input.html:6:1] + 6 | + 7 | + : ^ + 8 | + `---- + + x Cdata in html content + ,-[$DIR/tests/recovery/text/cdata/input.html:7:1] + 7 | + 8 | + : ^ + 9 | + `---- + + x Cdata in html content + ,-[$DIR/tests/recovery/text/cdata/input.html:8:1] + 8 | + 9 | + : ^ + 10 | + `---- + + x Cdata in html content + ,-[$DIR/tests/recovery/text/cdata/input.html:9:1] + 9 | + 10 | + : ^ + 11 | text]]>
text]]>
]]>
+ +
text
+ `---- + + x Cdata in html content + ,-[$DIR/tests/recovery/text/cdata/input.html:12:1] + 12 |
+ `---- + + x Cdata in html content + ,-[$DIR/tests/recovery/text/cdata/input.html:14:1] + 14 |
+ 15 | + : ^ + 16 |
+ 15 | | + 16 | |
+ `---- + + x Text + ,-[$DIR/tests/recovery/text/cdata/input.html:11:1] + 11 |
+ `---- + + x Child + ,-[$DIR/tests/recovery/text/cdata/input.html:12:1] + 12 |
+ `---- + + x Element + ,-[$DIR/tests/recovery/text/cdata/input.html:12:1] + 12 |
+ `---- + + x Comment + ,-[$DIR/tests/recovery/text/cdata/input.html:12:1] + 12 |
+ 15 | + `---- + + x Text + ,-[$DIR/tests/recovery/text/cdata/input.html:12:1] + 12 |
+ 15 | + `---- + + x Child + ,-[$DIR/tests/recovery/text/cdata/input.html:13:1] + 13 |
+ 15 | | + 16 | `->
+ 15 | `-> + 16 |
+ 15 | + : ^^^^^^^^^^^^^^^^ + 16 |