perf(html/parser): Improve performance (#5884)

This commit is contained in:
Alexander Akait 2022-09-17 03:10:08 +03:00 committed by GitHub
parent 6b684ca162
commit 5645e810df
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,4 +1,4 @@
use std::{char::REPLACEMENT_CHARACTER, collections::VecDeque, mem::take};
use std::{cell::RefCell, char::REPLACEMENT_CHARACTER, collections::VecDeque, mem::take, rc::Rc};
use swc_atoms::JsWord;
use swc_common::{collections::AHashSet, input::Input, BytePos, Span};
@ -158,6 +158,7 @@ where
character_reference_code: Option<Vec<(u8, u32, Option<char>)>>,
temporary_buffer: String,
is_adjusted_current_node_is_element_in_html_namespace: Option<bool>,
char_buf: Rc<RefCell<String>>,
}
impl<I> Lexer<I>
@ -187,6 +188,7 @@ where
// Do this without a new allocation.
temporary_buffer: String::with_capacity(33),
is_adjusted_current_node_is_element_in_html_namespace: None,
char_buf: Rc::new(RefCell::new(String::with_capacity(2))),
};
// A leading Byte Order Mark (BOM) causes the character encoding argument to be
@ -359,7 +361,7 @@ where
#[inline(always)]
fn emit_temporary_buffer_as_character_tokens(&mut self) {
for c in self.temporary_buffer.clone().chars() {
for c in take(&mut self.temporary_buffer).chars() {
self.emit_token(Token::Character {
value: c,
raw: Some(String::from(c).into()),
@ -376,7 +378,7 @@ where
let mut once_raw = raw;
let mut once_emitted = false;
for c in self.temporary_buffer.clone().chars() {
for c in take(&mut self.temporary_buffer).chars() {
if let Some(old_value) = &mut attribute.value {
old_value.push(c);
} else {
@ -421,7 +423,7 @@ where
let mut once_raw = raw;
let mut once_emitted = false;
for c in self.temporary_buffer.clone().chars() {
for c in take(&mut self.temporary_buffer).chars() {
self.emit_token(Token::Character {
value: c,
raw: match once_raw {
@ -828,38 +830,64 @@ where
});
}
fn handle_raw_and_emit_character_token(&mut self, c: char) {
fn with_char_buf<F, Ret>(&mut self, op: F) -> LexResult<Ret>
where
F: for<'any> FnOnce(&mut Lexer<I>, &mut String) -> LexResult<Ret>,
{
let b = self.char_buf.clone();
let mut buf = b.borrow_mut();
buf.clear();
op(self, &mut buf)
}
fn handle_raw_and_emit_character_token(&mut self, c: char) -> LexResult<()> {
let is_cr = c == '\r';
if is_cr {
let mut raw = String::with_capacity(2);
self.with_char_buf(|l, buf| {
buf.push(c);
raw.push(c);
if l.input.cur() == Some('\n') {
l.input.bump();
if self.input.cur() == Some('\n') {
self.input.bump();
buf.push('\n');
}
raw.push('\n');
}
l.emit_token(Token::Character {
value: '\n',
raw: Some((&**buf).into()),
});
self.emit_token(Token::Character {
value: '\n',
raw: Some(raw.into()),
});
Ok(())
})
} else {
self.emit_token(Token::Character {
value: c,
raw: Some(String::from(c).into()),
});
self.with_char_buf(|l, buf| {
buf.push(c);
l.emit_token(Token::Character {
value: c,
raw: Some((&**buf).into()),
});
Ok(())
})
}
}
#[inline(always)]
fn emit_character_token(&mut self, value: (char, char)) {
self.emit_token(Token::Character {
value: value.0,
raw: Some(String::from(value.1).into()),
});
fn emit_character_token(&mut self, value: (char, char)) -> LexResult<()> {
self.with_char_buf(|l, buf| {
buf.push(value.1);
l.emit_token(Token::Character {
value: value.0,
raw: Some((&**buf).into()),
});
Ok(())
})
}
fn read_token_and_span(&mut self) -> LexResult<TokenAndSpan> {
@ -908,7 +936,7 @@ where
// character as a character token.
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// EOF
// Emit an end-of-file token.
@ -921,7 +949,7 @@ where
// Emit the current input character as a character token.
Some(c) => {
self.validate_input_stream_character(c);
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -946,7 +974,7 @@ where
// REPLACEMENT CHARACTER character token.
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.emit_character_token((REPLACEMENT_CHARACTER, c));
self.emit_character_token((REPLACEMENT_CHARACTER, c))?;
}
// EOF
// Emit an end-of-file token.
@ -959,7 +987,7 @@ where
// Emit the current input character as a character token.
Some(c) => {
self.validate_input_stream_character(c);
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -975,7 +1003,7 @@ where
// REPLACEMENT CHARACTER character token.
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.emit_character_token((REPLACEMENT_CHARACTER, c));
self.emit_character_token((REPLACEMENT_CHARACTER, c))?;
}
// EOF
// Emit an end-of-file token.
@ -988,7 +1016,7 @@ where
// Emit the current input character as a character token.
Some(c) => {
self.validate_input_stream_character(c);
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -1004,7 +1032,7 @@ where
// REPLACEMENT CHARACTER character token.
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.emit_character_token((REPLACEMENT_CHARACTER, c));
self.emit_character_token((REPLACEMENT_CHARACTER, c))?;
}
// EOF
// Emit an end-of-file token.
@ -1017,7 +1045,7 @@ where
// Emit the current input character as a character token.
Some(c) => {
self.validate_input_stream_character(c);
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -1030,7 +1058,7 @@ where
// REPLACEMENT CHARACTER character token.
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.emit_character_token((REPLACEMENT_CHARACTER, c));
self.emit_character_token((REPLACEMENT_CHARACTER, c))?;
}
// EOF
// Emit an end-of-file token.
@ -1043,7 +1071,7 @@ where
// Emit the current input character as a character token.
Some(c) => {
self.validate_input_stream_character(c);
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -1082,7 +1110,7 @@ where
// character token and an end-of-file token.
None => {
self.emit_error(ErrorKind::EofBeforeTagName);
self.emit_character_token(('<', '<'));
self.emit_character_token(('<', '<'))?;
self.emit_token(Token::Eof);
return Ok(());
@ -1092,7 +1120,7 @@ where
// LESS-THAN SIGN character token. Reconsume in the data state.
_ => {
self.emit_error(ErrorKind::InvalidFirstCharacterOfTagName);
self.emit_character_token(('<', '<'));
self.emit_character_token(('<', '<'))?;
self.reconsume_in_state(State::Data);
}
}
@ -1120,8 +1148,8 @@ where
// token.
None => {
self.emit_error(ErrorKind::EofBeforeTagName);
self.emit_character_token(('<', '<'));
self.emit_character_token(('/', '/'));
self.emit_character_token(('<', '<'))?;
self.emit_character_token(('/', '/'))?;
self.emit_token(Token::Eof);
return Ok(());
@ -1205,7 +1233,7 @@ where
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RCDATA
// state.
_ => {
self.emit_character_token(('<', '<'));
self.emit_character_token(('<', '<'))?;
self.reconsume_in_state(State::Rcdata);
}
}
@ -1225,19 +1253,21 @@ where
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
// character token. Reconsume in the RCDATA state.
_ => {
self.emit_character_token(('<', '<'));
self.emit_character_token(('/', '/'));
self.emit_character_token(('<', '<'))?;
self.emit_character_token(('/', '/'))?;
self.reconsume_in_state(State::Rcdata);
}
}
}
// https://html.spec.whatwg.org/multipage/parsing.html#rcdata-end-tag-name-state
State::RcdataEndTagName => {
let anything_else = |lexer: &mut Lexer<I>| {
lexer.emit_character_token(('<', '<'));
lexer.emit_character_token(('/', '/'));
let anything_else = |lexer: &mut Lexer<I>| -> LexResult<()> {
lexer.emit_character_token(('<', '<'))?;
lexer.emit_character_token(('/', '/'))?;
lexer.emit_temporary_buffer_as_character_tokens();
lexer.reconsume_in_state(State::Rcdata);
Ok(())
};
// Consume the next input character:
@ -1255,7 +1285,7 @@ where
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
self.state = State::BeforeAttributeName;
} else {
anything_else(self);
anything_else(self)?;
}
}
// U+002F SOLIDUS (/)
@ -1266,7 +1296,7 @@ where
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
self.state = State::SelfClosingStartTag;
} else {
anything_else(self);
anything_else(self)?;
}
}
// U+003E GREATER-THAN SIGN (>)
@ -1278,7 +1308,7 @@ where
self.state = State::Data;
self.emit_tag_token();
} else {
anything_else(self);
anything_else(self)?;
}
}
// ASCII upper alpha
@ -1302,7 +1332,7 @@ where
// buffer (in the order they were added to the buffer). Reconsume in the
// RCDATA state.
_ => {
anything_else(self);
anything_else(self)?;
}
}
}
@ -1321,7 +1351,7 @@ where
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the RAWTEXT
// state.
_ => {
self.emit_character_token(('<', '<'));
self.emit_character_token(('<', '<'))?;
self.reconsume_in_state(State::Rawtext);
}
}
@ -1341,19 +1371,21 @@ where
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
// character token. Reconsume in the RAWTEXT state.
_ => {
self.emit_character_token(('<', '<'));
self.emit_character_token(('/', '/'));
self.emit_character_token(('<', '<'))?;
self.emit_character_token(('/', '/'))?;
self.reconsume_in_state(State::Rawtext);
}
}
}
// https://html.spec.whatwg.org/multipage/parsing.html#rawtext-end-tag-name-state
State::RawtextEndTagName => {
let anything_else = |lexer: &mut Lexer<I>| {
lexer.emit_character_token(('<', '<'));
lexer.emit_character_token(('/', '/'));
let anything_else = |lexer: &mut Lexer<I>| -> LexResult<()> {
lexer.emit_character_token(('<', '<'))?;
lexer.emit_character_token(('/', '/'))?;
lexer.emit_temporary_buffer_as_character_tokens();
lexer.reconsume_in_state(State::Rawtext);
Ok(())
};
// Consume the next input character:
@ -1371,7 +1403,7 @@ where
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
self.state = State::BeforeAttributeName;
} else {
anything_else(self);
anything_else(self)?;
}
}
// U+002F SOLIDUS (/)
@ -1382,7 +1414,7 @@ where
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
self.state = State::SelfClosingStartTag;
} else {
anything_else(self);
anything_else(self)?;
}
}
// U+003E GREATER-THAN SIGN (>)
@ -1394,7 +1426,7 @@ where
self.state = State::Data;
self.emit_tag_token();
} else {
anything_else(self);
anything_else(self)?;
}
}
// ASCII upper alpha
@ -1418,7 +1450,7 @@ where
// buffer (in the order they were added to the buffer). Reconsume in the
// RAWTEXT state.
_ => {
anything_else(self);
anything_else(self)?;
}
}
}
@ -1438,14 +1470,14 @@ where
// SIGN character token and a U+0021 EXCLAMATION MARK character token.
Some('!') => {
self.state = State::ScriptDataEscapeStart;
self.emit_character_token(('<', '<'));
self.emit_character_token(('!', '!'));
self.emit_character_token(('<', '<'))?;
self.emit_character_token(('!', '!'))?;
}
// Anything else
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
// data state.
_ => {
self.emit_character_token(('<', '<'));
self.emit_character_token(('<', '<'))?;
self.reconsume_in_state(State::ScriptData);
}
}
@ -1465,19 +1497,21 @@ where
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
// character token. Reconsume in the script data state.
_ => {
self.emit_character_token(('<', '<'));
self.emit_character_token(('/', '/'));
self.emit_character_token(('<', '<'))?;
self.emit_character_token(('/', '/'))?;
self.reconsume_in_state(State::ScriptData);
}
}
}
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-end-tag-name-state
State::ScriptDataEndTagName => {
let anything_else = |lexer: &mut Lexer<I>| {
lexer.emit_character_token(('<', '<'));
lexer.emit_character_token(('/', '/'));
let anything_else = |lexer: &mut Lexer<I>| -> LexResult<()> {
lexer.emit_character_token(('<', '<'))?;
lexer.emit_character_token(('/', '/'))?;
lexer.emit_temporary_buffer_as_character_tokens();
lexer.reconsume_in_state(State::ScriptData);
Ok(())
};
// Consume the next input character:
@ -1495,7 +1529,7 @@ where
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
self.state = State::BeforeAttributeName;
} else {
anything_else(self);
anything_else(self)?;
}
}
// U+002F SOLIDUS (/)
@ -1506,7 +1540,7 @@ where
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
self.state = State::SelfClosingStartTag;
} else {
anything_else(self);
anything_else(self)?;
}
}
// U+003E GREATER-THAN SIGN (>)
@ -1518,7 +1552,7 @@ where
self.state = State::Data;
self.emit_tag_token();
} else {
anything_else(self);
anything_else(self)?;
}
}
// ASCII upper alpha
@ -1542,7 +1576,7 @@ where
// buffer (in the order they were added to the buffer). Reconsume in the
// script data state.
_ => {
anything_else(self);
anything_else(self)?;
}
}
}
@ -1555,7 +1589,7 @@ where
// HYPHEN-MINUS character token.
Some(c @ '-') => {
self.state = State::ScriptDataEscapeStartDash;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// Anything else
// Reconsume in the script data state.
@ -1573,7 +1607,7 @@ where
// HYPHEN-MINUS character token.
Some(c @ '-') => {
self.state = State::ScriptDataEscapedDashDash;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// Anything else
// Reconsume in the script data state.
@ -1591,7 +1625,7 @@ where
// character token.
Some(c @ '-') => {
self.state = State::ScriptDataEscapedDash;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+003C LESS-THAN SIGN (<)
// Switch to the script data escaped less-than sign state.
@ -1603,7 +1637,7 @@ where
// REPLACEMENT CHARACTER character token.
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.emit_character_token((REPLACEMENT_CHARACTER, c));
self.emit_character_token((REPLACEMENT_CHARACTER, c))?;
}
// EOF
// This is an eof-in-script-html-comment-like-text parse error. Emit an
@ -1618,7 +1652,7 @@ where
// Emit the current input character as a character token.
Some(c) => {
self.validate_input_stream_character(c);
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -1631,7 +1665,7 @@ where
// HYPHEN-MINUS character token.
Some(c @ '-') => {
self.state = State::ScriptDataEscapedDashDash;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+003C LESS-THAN SIGN (<)
// Switch to the script data escaped less-than sign state.
@ -1644,7 +1678,7 @@ where
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.state = State::ScriptDataEscaped;
self.emit_character_token((REPLACEMENT_CHARACTER, c));
self.emit_character_token((REPLACEMENT_CHARACTER, c))?;
}
// EOF
// This is an eof-in-script-html-comment-like-text parse error. Emit an
@ -1661,7 +1695,7 @@ where
Some(c) => {
self.validate_input_stream_character(c);
self.state = State::ScriptDataEscaped;
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -1672,7 +1706,7 @@ where
// U+002D HYPHEN-MINUS (-)
// Emit a U+002D HYPHEN-MINUS character token.
Some(c @ '-') => {
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+003C LESS-THAN SIGN (<)
// Switch to the script data escaped less-than sign state.
@ -1684,7 +1718,7 @@ where
// character token.
Some(c @ '>') => {
self.state = State::ScriptData;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+0000 NULL
// This is an unexpected-null-character parse error. Switch to the script
@ -1692,7 +1726,7 @@ where
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.state = State::ScriptDataEscaped;
self.emit_character_token((REPLACEMENT_CHARACTER, c));
self.emit_character_token((REPLACEMENT_CHARACTER, c))?;
}
// EOF
// This is an eof-in-script-html-comment-like-text parse error. Emit an
@ -1709,7 +1743,7 @@ where
Some(c) => {
self.validate_input_stream_character(c);
self.state = State::ScriptDataEscaped;
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -1730,14 +1764,14 @@ where
// state.
Some(c) if is_ascii_alpha(c) => {
self.temporary_buffer.clear();
self.emit_character_token(('<', '<'));
self.emit_character_token(('<', '<'))?;
self.reconsume_in_state(State::ScriptDataDoubleEscapeStart);
}
// Anything else
// Emit a U+003C LESS-THAN SIGN character token. Reconsume in the script
// data escaped state.
_ => {
self.emit_character_token(('<', '<'));
self.emit_character_token(('<', '<'))?;
self.reconsume_in_state(State::ScriptDataEscaped);
}
}
@ -1757,19 +1791,21 @@ where
// Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS
// character token. Reconsume in the script data escaped state.
_ => {
self.emit_character_token(('<', '<'));
self.emit_character_token(('/', '/'));
self.emit_character_token(('<', '<'))?;
self.emit_character_token(('/', '/'))?;
self.reconsume_in_state(State::ScriptDataEscaped);
}
}
}
// https://html.spec.whatwg.org/multipage/parsing.html#script-data-escaped-end-tag-name-state
State::ScriptDataEscapedEndTagName => {
let anything_else = |lexer: &mut Lexer<I>| {
lexer.emit_character_token(('<', '<'));
lexer.emit_character_token(('/', '/'));
let anything_else = |lexer: &mut Lexer<I>| -> LexResult<()> {
lexer.emit_character_token(('<', '<'))?;
lexer.emit_character_token(('/', '/'))?;
lexer.emit_temporary_buffer_as_character_tokens();
lexer.reconsume_in_state(State::ScriptDataEscaped);
Ok(())
};
// Consume the next input character:
@ -1787,7 +1823,7 @@ where
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
self.state = State::BeforeAttributeName;
} else {
anything_else(self);
anything_else(self)?;
}
}
// U+002F SOLIDUS (/)
@ -1798,7 +1834,7 @@ where
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
self.state = State::SelfClosingStartTag;
} else {
anything_else(self);
anything_else(self)?;
}
}
// U+003E GREATER-THAN SIGN (>)
@ -1810,7 +1846,7 @@ where
self.state = State::Data;
self.emit_tag_token();
} else {
anything_else(self);
anything_else(self)?
}
}
// ASCII upper alpha
@ -1835,7 +1871,7 @@ where
// buffer (in the order they were added to the buffer). Reconsume in the
// script data escaped state.
_ => {
anything_else(self);
anything_else(self)?;
}
}
}
@ -1861,7 +1897,7 @@ where
self.state = State::ScriptDataEscaped;
}
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
Some(c @ '/' | c @ '>') => {
let is_script = self.temporary_buffer == "script";
@ -1872,7 +1908,7 @@ where
self.state = State::ScriptDataEscaped;
}
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// ASCII upper alpha
// Append the lowercase version of the current input character (add 0x0020
@ -1880,14 +1916,14 @@ where
// input character as a character token.
Some(c) if is_ascii_upper_alpha(c) => {
self.temporary_buffer.push(c.to_ascii_lowercase());
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// ASCII lower alpha
// Append the current input character to the temporary buffer. Emit the
// current input character as a character token.
Some(c) if is_ascii_lower_alpha(c) => {
self.temporary_buffer.push(c);
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// Anything else
// Reconsume in the script data escaped state.
@ -1905,21 +1941,21 @@ where
// HYPHEN-MINUS character token.
Some(c @ '-') => {
self.state = State::ScriptDataDoubleEscapedDash;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+003C LESS-THAN SIGN (<)
// Switch to the script data double escaped less-than sign state. Emit a
// U+003C LESS-THAN SIGN character token.
Some(c @ '<') => {
self.state = State::ScriptDataDoubleEscapedLessThanSign;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+0000 NULL
// This is an unexpected-null-character parse error. Emit a U+FFFD
// REPLACEMENT CHARACTER character token.
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.emit_character_token((REPLACEMENT_CHARACTER, c));
self.emit_character_token((REPLACEMENT_CHARACTER, c))?;
}
// EOF
// This is an eof-in-script-html-comment-like-text parse error. Emit an
@ -1934,7 +1970,7 @@ where
// Emit the current input character as a character token.
Some(c) => {
self.validate_input_stream_character(c);
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -1947,14 +1983,14 @@ where
// HYPHEN-MINUS character token.
Some(c @ '-') => {
self.state = State::ScriptDataDoubleEscapedDashDash;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+003C LESS-THAN SIGN (<)
// Switch to the script data double escaped less-than sign state. Emit a
// U+003C LESS-THAN SIGN character token.
Some(c @ '<') => {
self.state = State::ScriptDataDoubleEscapedLessThanSign;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+0000 NULL
// This is an unexpected-null-character parse error. Switch to the script
@ -1963,7 +1999,7 @@ where
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.state = State::ScriptDataDoubleEscaped;
self.emit_character_token((REPLACEMENT_CHARACTER, c));
self.emit_character_token((REPLACEMENT_CHARACTER, c))?;
}
// EOF
// This is an eof-in-script-html-comment-like-text parse error. Emit an
@ -1980,7 +2016,7 @@ where
Some(c) => {
self.validate_input_stream_character(c);
self.state = State::ScriptDataDoubleEscaped;
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -1991,21 +2027,21 @@ where
// U+002D HYPHEN-MINUS (-)
// Emit a U+002D HYPHEN-MINUS character token.
Some(c @ '-') => {
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+003C LESS-THAN SIGN (<)
// Switch to the script data double escaped less-than sign state. Emit a
// U+003C LESS-THAN SIGN character token.
Some(c @ '<') => {
self.state = State::ScriptDataDoubleEscapedLessThanSign;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+003E GREATER-THAN SIGN (>)
// Switch to the script data state. Emit a U+003E GREATER-THAN SIGN
// character token.
Some(c @ '>') => {
self.state = State::ScriptData;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// U+0000 NULL
// This is an unexpected-null-character parse error. Switch to the script
@ -2014,7 +2050,7 @@ where
Some(c @ '\x00') => {
self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.state = State::ScriptDataDoubleEscaped;
self.emit_character_token((REPLACEMENT_CHARACTER, c));
self.emit_character_token((REPLACEMENT_CHARACTER, c))?;
}
// EOF
// This is an eof-in-script-html-comment-like-text parse error. Emit an
@ -2031,7 +2067,7 @@ where
Some(c) => {
self.validate_input_stream_character(c);
self.state = State::ScriptDataDoubleEscaped;
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -2045,7 +2081,7 @@ where
Some(c @ '/') => {
self.temporary_buffer.clear();
self.state = State::ScriptDataDoubleEscapeEnd;
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// Anything else
// Reconsume in the script data double escaped state.
@ -2076,7 +2112,7 @@ where
self.state = State::ScriptDataDoubleEscaped;
}
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
Some(c @ '/' | c @ '>') => {
let is_script = self.temporary_buffer == "script";
@ -2087,7 +2123,7 @@ where
self.state = State::ScriptDataDoubleEscaped;
}
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// ASCII upper alpha
// Append the lowercase version of the current input character (add 0x0020
@ -2095,7 +2131,7 @@ where
// input character as a character token.
Some(c) if is_ascii_upper_alpha(c) => {
self.temporary_buffer.push(c.to_ascii_lowercase());
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// ASCII lower alpha
// Append the current input character to the temporary buffer. Emit the
@ -2103,7 +2139,7 @@ where
Some(c) if is_ascii_lower_alpha(c) => {
self.temporary_buffer.push(c);
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
// Anything else
// Reconsume in the script data double escaped state.
@ -3962,7 +3998,7 @@ where
// Emit the current input character as a character token.
Some(c) => {
self.validate_input_stream_character(c);
self.handle_raw_and_emit_character_token(c);
self.handle_raw_and_emit_character_token(c)?;
}
}
}
@ -3979,7 +4015,7 @@ where
// Emit a U+005D RIGHT SQUARE BRACKET character token. Reconsume in the
// CDATA section state.
_ => {
self.emit_character_token((']', ']'));
self.emit_character_token((']', ']'))?;
self.reconsume_in_state(State::CdataSection);
}
}
@ -3991,7 +4027,7 @@ where
// U+005D RIGHT SQUARE BRACKET (])
// Emit a U+005D RIGHT SQUARE BRACKET character token.
Some(c @ ']') => {
self.emit_character_token((']', c));
self.emit_character_token((']', c))?;
}
// U+003E GREATER-THAN SIGN character
// Switch to the data state.
@ -4002,8 +4038,8 @@ where
// Emit two U+005D RIGHT SQUARE BRACKET character tokens. Reconsume in the
// CDATA section state.
_ => {
self.emit_character_token((']', ']'));
self.emit_character_token((']', ']'));
self.emit_character_token((']', ']'))?;
self.emit_character_token((']', ']'))?;
self.reconsume_in_state(State::CdataSection);
}
}
@ -4163,7 +4199,7 @@ where
if self.is_consumed_as_part_of_an_attribute() {
self.append_to_attribute(None, Some((false, Some(c), Some(c))));
} else {
self.emit_character_token((c, c));
self.emit_character_token((c, c))?;
}
}
// U+003B SEMICOLON (;)