fix(html/parser): Fix parsing of cdata in foreign context (#4531)

This commit is contained in:
Alexander Akait 2022-05-05 05:03:27 +03:00 committed by GitHub
parent 98b6727c9c
commit 5dc9376a4b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 159 additions and 14 deletions

View File

@ -27,12 +27,12 @@ where
state: State,
return_state: State,
errors: Vec<Error>,
in_foreign_node: bool,
pub last_start_tag_token: Option<Token>,
pending_tokens: Vec<TokenAndSpan>,
cur_token: Option<Token>,
character_reference_code: Option<Vec<(u8, u32)>>,
temporary_buffer: Option<String>,
is_adjusted_current_node_is_element_in_html_namespace: Option<bool>,
doctype_keyword: Option<String>,
last_emitted_error_pos: Option<BytePos>,
}
@ -54,12 +54,12 @@ where
state: State::Data,
return_state: State::Data,
errors: vec![],
in_foreign_node: false,
last_start_tag_token: None,
pending_tokens: vec![],
cur_token: None,
character_reference_code: None,
temporary_buffer: None,
is_adjusted_current_node_is_element_in_html_namespace: None,
doctype_keyword: None,
last_emitted_error_pos: None,
}
@ -205,13 +205,17 @@ where
self.input.reset_to(state.pos);
}
fn set_input_state(&mut self, state: State) {
self.state = state;
}
fn take_errors(&mut self) -> Vec<Error> {
take(&mut self.errors)
}
fn set_adjusted_current_node_to_html_namespace(&mut self, value: bool) {
self.is_adjusted_current_node_is_element_in_html_namespace = Some(value);
}
fn set_input_state(&mut self, state: State) {
self.state = state;
}
}
impl<I> Lexer<I>
@ -3108,7 +3112,7 @@ where
Some(a2 @ 'a' | a2 @ 'A') => {
match self.consume_next_char() {
Some('[') => {
if self.in_foreign_node {
if let Some(false) = self.is_adjusted_current_node_is_element_in_html_namespace {
self.state = State::CdataSection;
} else {
self.emit_error(

View File

@ -20,9 +20,11 @@ pub trait ParserInput {
fn reset(&mut self, state: &Self::State);
fn take_errors(&mut self) -> Vec<Error>;
fn set_input_state(&mut self, state: State);
fn take_errors(&mut self) -> Vec<Error>;
fn set_adjusted_current_node_to_html_namespace(&mut self, value: bool);
}
#[derive(Debug)]
@ -120,4 +122,9 @@ where
pub(super) fn set_input_state(&mut self, state: State) {
self.input.set_input_state(state);
}
pub(super) fn set_adjusted_current_node_to_html_namespace(&mut self, value: bool) {
self.input
.set_adjusted_current_node_to_html_namespace(value);
}
}

View File

@ -278,17 +278,26 @@ where
// to the current insertion mode in HTML content.
let adjusted_current_node = self.get_adjusted_current_node();
let is_element_in_html_namespace = is_element_in_html_namespace(adjusted_current_node);
let is_mathml_text_integration_point =
is_mathml_text_integration_point(adjusted_current_node);
let is_mathml_annotation_xml = is_mathml_annotation_xml(adjusted_current_node);
let is_html_integration_point = is_html_integration_point(adjusted_current_node);
self.input
.set_adjusted_current_node_to_html_namespace(is_element_in_html_namespace);
if self.open_elements_stack.items.is_empty()
|| is_element_in_html_namespace(adjusted_current_node)
|| (is_mathml_text_integration_point(adjusted_current_node)
|| is_element_in_html_namespace
|| (is_mathml_text_integration_point
&& matches!(&token_and_info.token, Token::StartTag { tag_name, .. } if &*tag_name != "mglyph" && &*tag_name != "malignmark"))
|| (is_mathml_text_integration_point(adjusted_current_node)
|| (is_mathml_text_integration_point
&& matches!(&token_and_info.token, Token::Character { .. }))
|| (is_mathml_annotation_xml(adjusted_current_node)
|| (is_mathml_annotation_xml
&& matches!(&token_and_info.token, Token::StartTag { tag_name, .. } if &*tag_name == "svg"))
|| (is_html_integration_point(adjusted_current_node)
|| (is_html_integration_point
&& matches!(&token_and_info.token, Token::StartTag { .. }))
|| (is_html_integration_point(adjusted_current_node)
|| (is_html_integration_point
&& matches!(&token_and_info.token, Token::Character { .. }))
|| matches!(&token_and_info.token, Token::Eof)
{

View File

@ -0,0 +1,2 @@
<svg><![CDATA[foo
bar]]>

After

Width:  |  Height:  |  Size: 24 B

View File

@ -0,0 +1,75 @@
{
"type": "Document",
"span": {
"start": 0,
"end": 21,
"ctxt": 0
},
"mode": "no-quirks",
"children": [
{
"type": "Element",
"span": {
"start": 0,
"end": 21,
"ctxt": 0
},
"tagName": "html",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [],
"children": [
{
"type": "Element",
"span": {
"start": 0,
"end": 5,
"ctxt": 0
},
"tagName": "head",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [],
"children": [],
"content": null
},
{
"type": "Element",
"span": {
"start": 0,
"end": 21,
"ctxt": 0
},
"tagName": "body",
"namespace": "http://www.w3.org/1999/xhtml",
"attributes": [],
"children": [
{
"type": "Element",
"span": {
"start": 0,
"end": 21,
"ctxt": 0
},
"tagName": "svg",
"namespace": "http://www.w3.org/2000/svg",
"attributes": [],
"children": [
{
"type": "Text",
"span": {
"start": 5,
"end": 21,
"ctxt": 0
},
"value": "foo\nbar"
}
],
"content": null
}
],
"content": null
}
],
"content": null
}
]
}

View File

@ -0,0 +1,6 @@
x Unexpected token
,-[$DIR/tests/recovery/element/svg-1/input.html:1:1]
1 | ,-> <svg><![CDATA[foo
2 | `-> bar]]>
`----

View File

@ -0,0 +1,42 @@
x Document
,-[$DIR/tests/recovery/element/svg-1/input.html:1:1]
1 | ,-> <svg><![CDATA[foo
2 | `-> bar]]>
`----
x Child
,-[$DIR/tests/recovery/element/svg-1/input.html:1:1]
1 | ,-> <svg><![CDATA[foo
2 | `-> bar]]>
`----
x Element
,-[$DIR/tests/recovery/element/svg-1/input.html:1:1]
1 | ,-> <svg><![CDATA[foo
2 | `-> bar]]>
`----
x Child
,-[$DIR/tests/recovery/element/svg-1/input.html:1:1]
1 | <svg><![CDATA[foo
: ^^^^^
`----
x Element
,-[$DIR/tests/recovery/element/svg-1/input.html:1:1]
1 | <svg><![CDATA[foo
: ^^^^^
`----
x Child
,-[$DIR/tests/recovery/element/svg-1/input.html:1:1]
1 | ,-> <svg><![CDATA[foo
2 | `-> bar]]>
`----
x Text
,-[$DIR/tests/recovery/element/svg-1/input.html:1:1]
1 | ,-> <svg><![CDATA[foo
2 | `-> bar]]>
`----