feat(html/ast): Add raw to doctype (#5198)

This commit is contained in:
Alexander Akait 2022-07-17 15:48:21 +03:00 committed by GitHub
parent 71ea5d25e4
commit 945510a695
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
834 changed files with 1847 additions and 1059 deletions

View File

@ -48,6 +48,7 @@ pub struct DocumentType {
pub name: Option<JsWord>, pub name: Option<JsWord>,
pub public_id: Option<JsWord>, pub public_id: Option<JsWord>,
pub system_id: Option<JsWord>, pub system_id: Option<JsWord>,
pub raw: Option<JsWord>,
} }
#[derive(StringEnum, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash, EqIgnoreSpan)] #[derive(StringEnum, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash, EqIgnoreSpan)]

View File

@ -23,29 +23,16 @@ pub struct AttributeToken {
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, EqIgnoreSpan)] #[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize, EqIgnoreSpan)]
pub enum Token { pub enum Token {
Doctype { Doctype {
// DOCTYPE keyword
raw_keyword: Option<JsWord>,
// Name // Name
name: Option<JsWord>, name: Option<JsWord>,
raw_name: Option<JsWord>,
// Is force quirks? // Is force quirks?
force_quirks: bool, force_quirks: bool,
// PUBLIC keyword
raw_public_keyword: Option<JsWord>,
// Quotes around public identifier
public_quote: Option<char>,
// Public identifier // Public identifier
public_id: Option<JsWord>, public_id: Option<JsWord>,
// SYSTEM keyword
raw_system_keyword: Option<JsWord>,
// Quotes around system identifier
system_quote: Option<char>,
// System identifier // System identifier
system_id: Option<JsWord>, system_id: Option<JsWord>,
// Raw value
raw: Option<JsWord>,
}, },
StartTag { StartTag {
tag_name: JsWord, tag_name: JsWord,

View File

@ -326,6 +326,12 @@ fn verify_document_fragment(
struct DropSpan; struct DropSpan;
impl VisitMut for DropSpan { impl VisitMut for DropSpan {
fn visit_mut_document_type(&mut self, n: &mut DocumentType) {
n.visit_mut_children_with(self);
n.raw = None;
}
fn visit_mut_comment(&mut self, n: &mut Comment) { fn visit_mut_comment(&mut self, n: &mut Comment) {
n.visit_mut_children_with(self); n.visit_mut_children_with(self);

View File

@ -96,15 +96,9 @@ pub enum State {
#[derive(PartialEq, Eq, Clone, Debug)] #[derive(PartialEq, Eq, Clone, Debug)]
struct Doctype { struct Doctype {
raw_keyword: Option<String>,
name: Option<String>, name: Option<String>,
raw_name: Option<String>,
force_quirks: bool, force_quirks: bool,
raw_public_keyword: Option<String>,
public_quote: Option<char>,
public_id: Option<String>, public_id: Option<String>,
raw_system_keyword: Option<String>,
system_quote: Option<char>,
system_id: Option<String>, system_id: Option<String>,
} }
@ -158,12 +152,12 @@ where
pending_tokens: VecDeque<TokenAndSpan>, pending_tokens: VecDeque<TokenAndSpan>,
current_doctype_token: Option<Doctype>, current_doctype_token: Option<Doctype>,
current_comment_token: Option<Comment>, current_comment_token: Option<Comment>,
doctype_raw: Option<String>,
current_tag_token: Option<Tag>, current_tag_token: Option<Tag>,
attribute_start_position: Option<BytePos>, attribute_start_position: Option<BytePos>,
character_reference_code: Option<Vec<(u8, u32, Option<char>)>>, character_reference_code: Option<Vec<(u8, u32, Option<char>)>>,
temporary_buffer: String, temporary_buffer: String,
is_adjusted_current_node_is_element_in_html_namespace: Option<bool>, is_adjusted_current_node_is_element_in_html_namespace: Option<bool>,
doctype_keyword: Option<String>,
} }
impl<I> Lexer<I> impl<I> Lexer<I>
@ -185,6 +179,7 @@ where
last_start_tag_name: None, last_start_tag_name: None,
pending_tokens: VecDeque::new(), pending_tokens: VecDeque::new(),
current_doctype_token: None, current_doctype_token: None,
doctype_raw: None,
current_comment_token: None, current_comment_token: None,
current_tag_token: None, current_tag_token: None,
attribute_start_position: None, attribute_start_position: None,
@ -192,7 +187,6 @@ where
// Do this without a new allocation. // Do this without a new allocation.
temporary_buffer: String::with_capacity(33), temporary_buffer: String::with_capacity(33),
is_adjusted_current_node_is_element_in_html_namespace: None, is_adjusted_current_node_is_element_in_html_namespace: None,
doctype_keyword: None,
}; };
// A leading Byte Order Mark (BOM) causes the character encoding argument to be // A leading Byte Order Mark (BOM) causes the character encoding argument to be
@ -448,62 +442,60 @@ where
} }
} }
fn create_doctype_token(&mut self, keyword: Option<String>, name_c: Option<(char, char)>) { fn create_doctype_token(&mut self, name_c: Option<char>) {
let mut new_name = None; let mut new_name = None;
let mut new_raw_name = None;
if let Some(name_c) = name_c { if let Some(name_c) = name_c {
let mut name = String::with_capacity(4); let mut name = String::with_capacity(4);
let mut raw_name = String::with_capacity(4);
name.push(name_c.0);
raw_name.push(name_c.1);
name.push(name_c);
new_name = Some(name); new_name = Some(name);
new_raw_name = Some(raw_name);
} }
self.current_doctype_token = Some(Doctype { self.current_doctype_token = Some(Doctype {
raw_keyword: keyword,
name: new_name, name: new_name,
raw_name: new_raw_name,
force_quirks: false, force_quirks: false,
public_quote: None,
raw_public_keyword: None,
public_id: None, public_id: None,
system_quote: None,
raw_system_keyword: None,
system_id: None, system_id: None,
}); });
} }
fn append_raw_to_doctype_token(&mut self, c: char) {
if let Some(doctype_raw) = &mut self.doctype_raw {
let is_cr = c == '\r';
if is_cr {
let mut raw = String::with_capacity(2);
raw.push(c);
if self.input.cur() == Some('\n') {
self.input.bump();
raw.push('\n');
}
doctype_raw.push_str(&raw);
} else {
doctype_raw.push(c);
}
}
}
fn append_to_doctype_token( fn append_to_doctype_token(
&mut self, &mut self,
raw_keyword: Option<String>, name: Option<char>,
name: Option<(char, char)>, public_id: Option<char>,
public_id: Option<(char, char)>, system_id: Option<char>,
system_id: Option<(char, char)>,
) { ) {
if let Some(ref mut token) = self.current_doctype_token { if let Some(ref mut token) = self.current_doctype_token {
if let Some(raw_keyword) = raw_keyword {
if let Doctype {
raw_keyword: Some(old_raw_keyword),
..
} = token
{
*old_raw_keyword = raw_keyword;
}
}
if let Some(name) = name { if let Some(name) = name {
if let Doctype { if let Doctype {
name: Some(old_name), name: Some(old_name),
raw_name: Some(old_raw_name),
.. ..
} = token } = token
{ {
old_name.push(name.0); old_name.push(name);
old_raw_name.push(name.1);
} }
} }
@ -513,7 +505,7 @@ where
.. ..
} = token } = token
{ {
old_public_id.push(public_id.0); old_public_id.push(public_id);
} }
} }
@ -523,7 +515,7 @@ where
.. ..
} = token } = token
{ {
old_system_id.push(system_id.0); old_system_id.push(system_id);
} }
} }
} }
@ -535,46 +527,37 @@ where
} }
} }
fn set_doctype_token_public_id(&mut self, quote: char) { fn set_doctype_token_public_id(&mut self) {
if let Some(Doctype { if let Some(Doctype { public_id, .. }) = &mut self.current_doctype_token {
public_id,
public_quote,
..
}) = &mut self.current_doctype_token
{
// The Longest public id is `-//softquad software//dtd hotmetal pro // The Longest public id is `-//softquad software//dtd hotmetal pro
// 6.0::19990601::extensions to html 4.0//` // 6.0::19990601::extensions to html 4.0//`
*public_id = Some(String::with_capacity(78)); *public_id = Some(String::with_capacity(78));
*public_quote = Some(quote);
} }
} }
fn set_doctype_token_system_id(&mut self, quote: char) { fn set_doctype_token_system_id(&mut self) {
if let Some(Doctype { if let Some(Doctype { system_id, .. }) = &mut self.current_doctype_token {
system_id,
system_quote,
..
}) = &mut self.current_doctype_token
{
// The Longest system id is `http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd` // The Longest system id is `http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd`
*system_id = Some(String::with_capacity(58)); *system_id = Some(String::with_capacity(58));
*system_quote = Some(quote);
} }
} }
fn emit_doctype_token(&mut self) { fn emit_doctype_token(&mut self) {
let current_doctype_token = self.current_doctype_token.take().unwrap(); let current_doctype_token = self.current_doctype_token.take().unwrap();
let raw = match self.doctype_raw.take() {
Some(raw) => raw,
_ => {
unreachable!();
}
};
let token = Token::Doctype { let token = Token::Doctype {
raw_keyword: current_doctype_token.raw_keyword.map(JsWord::from),
name: current_doctype_token.name.map(JsWord::from), name: current_doctype_token.name.map(JsWord::from),
raw_name: current_doctype_token.raw_name.map(JsWord::from),
force_quirks: current_doctype_token.force_quirks, force_quirks: current_doctype_token.force_quirks,
raw_public_keyword: current_doctype_token.raw_public_keyword.map(JsWord::from),
public_quote: current_doctype_token.public_quote,
public_id: current_doctype_token.public_id.map(JsWord::from), public_id: current_doctype_token.public_id.map(JsWord::from),
raw_system_keyword: current_doctype_token.raw_system_keyword.map(JsWord::from),
system_quote: current_doctype_token.system_quote,
system_id: current_doctype_token.system_id.map(JsWord::from), system_id: current_doctype_token.system_id.map(JsWord::from),
raw: Some(JsWord::from(raw)),
}; };
self.emit_token(token); self.emit_token(token);
@ -864,7 +847,7 @@ where
value: c, value: c,
raw: Some(String::from(c).into()), raw: Some(String::from(c).into()),
}); });
}; }
} }
#[inline(always)] #[inline(always)]
@ -2631,8 +2614,10 @@ where
Some(e @ 'e' | e @ 'E') => { Some(e @ 'e' | e @ 'E') => {
self.state = State::Doctype; self.state = State::Doctype;
let mut raw_keyword = String::with_capacity(7); let mut raw_keyword = String::with_capacity(9);
raw_keyword.push('<');
raw_keyword.push('!');
raw_keyword.push(d); raw_keyword.push(d);
raw_keyword.push(o); raw_keyword.push(o);
raw_keyword.push(c); raw_keyword.push(c);
@ -2641,7 +2626,7 @@ where
raw_keyword.push(p); raw_keyword.push(p);
raw_keyword.push(e); raw_keyword.push(e);
self.doctype_keyword = Some(raw_keyword); self.doctype_raw = Some(raw_keyword);
} }
_ => { _ => {
anything_else(self); anything_else(self);
@ -3032,8 +3017,7 @@ where
// U+0020 SPACE // U+0020 SPACE
// Switch to the before DOCTYPE name state. // Switch to the before DOCTYPE name state.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
self.state = State::BeforeDoctypeName; self.state = State::BeforeDoctypeName;
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
@ -3047,10 +3031,7 @@ where
// token. // token.
None => { None => {
self.emit_error(ErrorKind::EofInDoctype); self.emit_error(ErrorKind::EofInDoctype);
self.create_doctype_token(None);
let doctype_keyword = self.doctype_keyword.take();
self.create_doctype_token(doctype_keyword, None);
self.set_force_quirks(); self.set_force_quirks();
self.emit_doctype_token(); self.emit_doctype_token();
self.emit_token(Token::Eof); self.emit_token(Token::Eof);
@ -3076,19 +3057,15 @@ where
// U+0020 SPACE // U+0020 SPACE
// Ignore the character. // Ignore the character.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
} }
// ASCII upper alpha // ASCII upper alpha
// Create a new DOCTYPE token. Set the token's name to the lowercase version // Create a new DOCTYPE token. Set the token's name to the lowercase version
// of the current input character (add 0x0020 to the character's code // of the current input character (add 0x0020 to the character's code
// point). Switch to the DOCTYPE name state. // point). Switch to the DOCTYPE name state.
Some(c) if is_ascii_upper_alpha(c) => { Some(c) if is_ascii_upper_alpha(c) => {
let doctype_keyword = self.doctype_keyword.take(); self.append_raw_to_doctype_token(c);
self.create_doctype_token(Some(c.to_ascii_lowercase()));
self.create_doctype_token(
doctype_keyword,
Some((c.to_ascii_lowercase(), c)),
);
self.state = State::DoctypeName; self.state = State::DoctypeName;
} }
// U+0000 NULL // U+0000 NULL
@ -3096,23 +3073,19 @@ where
// token. Set the token's name to a U+FFFD REPLACEMENT CHARACTER character. // token. Set the token's name to a U+FFFD REPLACEMENT CHARACTER character.
// Switch to the DOCTYPE name state. // Switch to the DOCTYPE name state.
Some(c @ '\x00') => { Some(c @ '\x00') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::UnexpectedNullCharacter); self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.create_doctype_token(Some(REPLACEMENT_CHARACTER));
let doctype_keyword = self.doctype_keyword.take();
self.create_doctype_token(
doctype_keyword,
Some((REPLACEMENT_CHARACTER, c)),
);
self.state = State::DoctypeName; self.state = State::DoctypeName;
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// This is a missing-doctype-name parse error. Create a new DOCTYPE token. // This is a missing-doctype-name parse error. Create a new DOCTYPE token.
// Set its force-quirks flag to on. Switch to the data state. Emit the // Set its force-quirks flag to on. Switch to the data state. Emit the
// current token. // current token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::MissingDoctypeName); self.emit_error(ErrorKind::MissingDoctypeName);
self.create_doctype_token(None, None); self.create_doctype_token(None);
self.set_force_quirks(); self.set_force_quirks();
self.state = State::Data; self.state = State::Data;
self.emit_doctype_token(); self.emit_doctype_token();
@ -3123,7 +3096,7 @@ where
// token. // token.
None => { None => {
self.emit_error(ErrorKind::EofInDoctype); self.emit_error(ErrorKind::EofInDoctype);
self.create_doctype_token(None, None); self.create_doctype_token(None);
self.set_force_quirks(); self.set_force_quirks();
self.emit_doctype_token(); self.emit_doctype_token();
self.emit_token(Token::Eof); self.emit_token(Token::Eof);
@ -3135,10 +3108,8 @@ where
// character. Switch to the DOCTYPE name state. // character. Switch to the DOCTYPE name state.
Some(c) => { Some(c) => {
self.validate_input_stream_character(c); self.validate_input_stream_character(c);
self.append_raw_to_doctype_token(c);
let doctype_keyword = self.doctype_keyword.take(); self.create_doctype_token(Some(c));
self.create_doctype_token(doctype_keyword, Some((c, c)));
self.state = State::DoctypeName; self.state = State::DoctypeName;
} }
} }
@ -3153,12 +3124,13 @@ where
// U+0020 SPACE // U+0020 SPACE
// Switch to the after DOCTYPE name state. // Switch to the after DOCTYPE name state.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
self.state = State::AfterDoctypeName; self.state = State::AfterDoctypeName;
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// Switch to the data state. Emit the current DOCTYPE token. // Switch to the data state. Emit the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.state = State::Data; self.state = State::Data;
self.emit_doctype_token(); self.emit_doctype_token();
} }
@ -3166,24 +3138,16 @@ where
// Append the lowercase version of the current input character (add 0x0020 // Append the lowercase version of the current input character (add 0x0020
// to the character's code point) to the current DOCTYPE token's name. // to the character's code point) to the current DOCTYPE token's name.
Some(c) if is_ascii_upper_alpha(c) => { Some(c) if is_ascii_upper_alpha(c) => {
self.append_to_doctype_token( self.append_raw_to_doctype_token(c);
None, self.append_to_doctype_token(Some(c.to_ascii_lowercase()), None, None);
Some((c.to_ascii_lowercase(), c)),
None,
None,
);
} }
// U+0000 NULL // U+0000 NULL
// This is an unexpected-null-character parse error. Append a U+FFFD // This is an unexpected-null-character parse error. Append a U+FFFD
// REPLACEMENT CHARACTER character to the current DOCTYPE token's name. // REPLACEMENT CHARACTER character to the current DOCTYPE token's name.
Some(c @ '\x00') => { Some(c @ '\x00') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::UnexpectedNullCharacter); self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.append_to_doctype_token( self.append_to_doctype_token(Some(REPLACEMENT_CHARACTER), None, None);
None,
Some((REPLACEMENT_CHARACTER, c)),
None,
None,
);
} }
// EOF // EOF
// This is an eof-in-doctype parse error. Set the current DOCTYPE token's // This is an eof-in-doctype parse error. Set the current DOCTYPE token's
@ -3201,7 +3165,8 @@ where
// Append the current input character to the current DOCTYPE token's name. // Append the current input character to the current DOCTYPE token's name.
Some(c) => { Some(c) => {
self.validate_input_stream_character(c); self.validate_input_stream_character(c);
self.append_to_doctype_token(None, Some((c, c)), None, None); self.append_raw_to_doctype_token(c);
self.append_to_doctype_token(Some(c), None, None);
} }
} }
} }
@ -3217,11 +3182,12 @@ where
// U+0020 SPACE // U+0020 SPACE
// Ignore the character. // Ignore the character.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// Switch to the data state. Emit the current DOCTYPE token. // Switch to the data state. Emit the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.state = State::Data; self.state = State::Data;
self.emit_doctype_token(); self.emit_doctype_token();
} }
@ -3269,21 +3235,17 @@ where
match &*first_six_chars.to_lowercase() { match &*first_six_chars.to_lowercase() {
"public" => { "public" => {
self.state = State::AfterDoctypePublicKeyword; self.state = State::AfterDoctypePublicKeyword;
self.append_to_doctype_token(
Some(first_six_chars), if let Some(doctype_raw) = &mut self.doctype_raw {
None, doctype_raw.push_str(&first_six_chars);
None, }
None,
);
} }
"system" => { "system" => {
self.state = State::AfterDoctypeSystemKeyword; self.state = State::AfterDoctypeSystemKeyword;
self.append_to_doctype_token(
Some(first_six_chars), if let Some(doctype_raw) = &mut self.doctype_raw {
None, doctype_raw.push_str(&first_six_chars);
None, }
None,
);
} }
_ => { _ => {
self.cur_pos = cur_pos; self.cur_pos = cur_pos;
@ -3308,7 +3270,7 @@ where
// U+0020 SPACE // U+0020 SPACE
// Switch to the before DOCTYPE public identifier state. // Switch to the before DOCTYPE public identifier state.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
self.state = State::BeforeDoctypePublicIdentifier; self.state = State::BeforeDoctypePublicIdentifier;
} }
// U+0022 QUOTATION MARK (") // U+0022 QUOTATION MARK (")
@ -3316,9 +3278,10 @@ where
// Set the current DOCTYPE token's public identifier to the empty string // Set the current DOCTYPE token's public identifier to the empty string
// (not missing), then switch to the DOCTYPE public identifier // (not missing), then switch to the DOCTYPE public identifier
// (double-quoted) state. // (double-quoted) state.
Some('"') => { Some(c @ '"') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypePublicKeyword); self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypePublicKeyword);
self.set_doctype_token_public_id('"'); self.set_doctype_token_public_id();
self.state = State::DoctypePublicIdentifierDoubleQuoted; self.state = State::DoctypePublicIdentifierDoubleQuoted;
} }
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
@ -3326,16 +3289,18 @@ where
// Set the current DOCTYPE token's public identifier to the empty string // Set the current DOCTYPE token's public identifier to the empty string
// (not missing), then switch to the DOCTYPE public identifier // (not missing), then switch to the DOCTYPE public identifier
// (single-quoted) state. // (single-quoted) state.
Some('\'') => { Some(c @ '\'') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypePublicKeyword); self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypePublicKeyword);
self.set_doctype_token_public_id('\''); self.set_doctype_token_public_id();
self.state = State::DoctypePublicIdentifierSingleQuoted; self.state = State::DoctypePublicIdentifierSingleQuoted;
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// This is a missing-doctype-public-identifier parse error. Set the current // This is a missing-doctype-public-identifier parse error. Set the current
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
// the current DOCTYPE token. // the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::MissingDoctypePublicIdentifier); self.emit_error(ErrorKind::MissingDoctypePublicIdentifier);
self.set_force_quirks(); self.set_force_quirks();
self.state = State::Data; self.state = State::Data;
@ -3374,29 +3339,32 @@ where
// U+0020 SPACE // U+0020 SPACE
// Ignore the character. // Ignore the character.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
} }
// U+0022 QUOTATION MARK (") // U+0022 QUOTATION MARK (")
// Set the current DOCTYPE token's public identifier to the empty string // Set the current DOCTYPE token's public identifier to the empty string
// (not missing), then switch to the DOCTYPE public identifier // (not missing), then switch to the DOCTYPE public identifier
// (double-quoted) state. // (double-quoted) state.
Some('"') => { Some(c @ '"') => {
self.set_doctype_token_public_id('"'); self.append_raw_to_doctype_token(c);
self.set_doctype_token_public_id();
self.state = State::DoctypePublicIdentifierDoubleQuoted; self.state = State::DoctypePublicIdentifierDoubleQuoted;
} }
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
// Set the current DOCTYPE token's public identifier to the empty string // Set the current DOCTYPE token's public identifier to the empty string
// (not missing), then switch to the DOCTYPE public identifier // (not missing), then switch to the DOCTYPE public identifier
// (single-quoted) state. // (single-quoted) state.
Some('\'') => { Some(c @ '\'') => {
self.set_doctype_token_public_id('\''); self.append_raw_to_doctype_token(c);
self.set_doctype_token_public_id();
self.state = State::DoctypePublicIdentifierSingleQuoted; self.state = State::DoctypePublicIdentifierSingleQuoted;
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// This is a missing-doctype-public-identifier parse error. Set the current // This is a missing-doctype-public-identifier parse error. Set the current
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
// the current DOCTYPE token. // the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::MissingDoctypePublicIdentifier); self.emit_error(ErrorKind::MissingDoctypePublicIdentifier);
self.set_force_quirks(); self.set_force_quirks();
self.state = State::Data; self.state = State::Data;
@ -3431,7 +3399,8 @@ where
match self.consume_next_char() { match self.consume_next_char() {
// U+0022 QUOTATION MARK (") // U+0022 QUOTATION MARK (")
// Switch to the after DOCTYPE public identifier state. // Switch to the after DOCTYPE public identifier state.
Some('"') => { Some(c @ '"') => {
self.append_raw_to_doctype_token(c);
self.state = State::AfterDoctypePublicIdentifier; self.state = State::AfterDoctypePublicIdentifier;
} }
// U+0000 NULL // U+0000 NULL
@ -3439,19 +3408,16 @@ where
// REPLACEMENT CHARACTER character to the current DOCTYPE token's public // REPLACEMENT CHARACTER character to the current DOCTYPE token's public
// identifier. // identifier.
Some(c @ '\x00') => { Some(c @ '\x00') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::UnexpectedNullCharacter); self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.append_to_doctype_token( self.append_to_doctype_token(None, Some(REPLACEMENT_CHARACTER), None);
None,
None,
Some((REPLACEMENT_CHARACTER, c)),
None,
);
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// This is an abrupt-doctype-public-identifier parse error. Set the current // This is an abrupt-doctype-public-identifier parse error. Set the current
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
// the current DOCTYPE token. // the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::AbruptDoctypePublicIdentifier); self.emit_error(ErrorKind::AbruptDoctypePublicIdentifier);
self.set_force_quirks(); self.set_force_quirks();
self.state = State::Data; self.state = State::Data;
@ -3474,7 +3440,8 @@ where
// identifier. // identifier.
Some(c) => { Some(c) => {
self.validate_input_stream_character(c); self.validate_input_stream_character(c);
self.append_to_doctype_token(None, None, Some((c, c)), None); self.append_raw_to_doctype_token(c);
self.append_to_doctype_token(None, Some(c), None);
} }
} }
} }
@ -3484,7 +3451,8 @@ where
match self.consume_next_char() { match self.consume_next_char() {
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
// Switch to the after DOCTYPE public identifier state. // Switch to the after DOCTYPE public identifier state.
Some('\'') => { Some(c @ '\'') => {
self.append_raw_to_doctype_token(c);
self.state = State::AfterDoctypePublicIdentifier; self.state = State::AfterDoctypePublicIdentifier;
} }
// U+0000 NULL // U+0000 NULL
@ -3492,19 +3460,16 @@ where
// REPLACEMENT CHARACTER character to the current DOCTYPE token's public // REPLACEMENT CHARACTER character to the current DOCTYPE token's public
// identifier. // identifier.
Some(c @ '\x00') => { Some(c @ '\x00') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::UnexpectedNullCharacter); self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.append_to_doctype_token( self.append_to_doctype_token(None, Some(REPLACEMENT_CHARACTER), None);
None,
None,
Some((REPLACEMENT_CHARACTER, c)),
None,
);
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// This is an abrupt-doctype-public-identifier parse error. Set the current // This is an abrupt-doctype-public-identifier parse error. Set the current
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
// the current DOCTYPE token. // the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::AbruptDoctypePublicIdentifier); self.emit_error(ErrorKind::AbruptDoctypePublicIdentifier);
self.set_force_quirks(); self.set_force_quirks();
self.state = State::Data; self.state = State::Data;
@ -3527,7 +3492,8 @@ where
// identifier. // identifier.
Some(c) => { Some(c) => {
self.validate_input_stream_character(c); self.validate_input_stream_character(c);
self.append_to_doctype_token(None, None, Some((c, c)), None); self.append_raw_to_doctype_token(c);
self.append_to_doctype_token(None, Some(c), None);
} }
} }
} }
@ -3541,12 +3507,13 @@ where
// U+0020 SPACE // U+0020 SPACE
// Switch to the between DOCTYPE public and system identifiers state. // Switch to the between DOCTYPE public and system identifiers state.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
self.state = State::BetweenDoctypePublicAndSystemIdentifiers; self.state = State::BetweenDoctypePublicAndSystemIdentifiers;
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// Switch to the data state. Emit the current DOCTYPE token. // Switch to the data state. Emit the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.state = State::Data; self.state = State::Data;
self.emit_doctype_token(); self.emit_doctype_token();
} }
@ -3555,11 +3522,12 @@ where
// parse error. Set the current DOCTYPE token's system // parse error. Set the current DOCTYPE token's system
// identifier to the empty string (not missing), then switch // identifier to the empty string (not missing), then switch
// to the DOCTYPE system identifier (double-quoted) state. // to the DOCTYPE system identifier (double-quoted) state.
Some('"') => { Some(c @ '"') => {
self.append_raw_to_doctype_token(c);
self.emit_error( self.emit_error(
ErrorKind::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers, ErrorKind::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers,
); );
self.set_doctype_token_system_id('"'); self.set_doctype_token_system_id();
self.state = State::DoctypeSystemIdentifierDoubleQuoted; self.state = State::DoctypeSystemIdentifierDoubleQuoted;
} }
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
@ -3567,11 +3535,12 @@ where
// parse error. Set the current DOCTYPE token's system // parse error. Set the current DOCTYPE token's system
// identifier to the empty string (not missing), then switch // identifier to the empty string (not missing), then switch
// to the DOCTYPE system identifier (single-quoted) state. // to the DOCTYPE system identifier (single-quoted) state.
Some('\'') => { Some(c @ '\'') => {
self.append_raw_to_doctype_token(c);
self.emit_error( self.emit_error(
ErrorKind::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers, ErrorKind::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers,
); );
self.set_doctype_token_system_id('\''); self.set_doctype_token_system_id();
self.state = State::DoctypeSystemIdentifierSingleQuoted; self.state = State::DoctypeSystemIdentifierSingleQuoted;
} }
// EOF // EOF
@ -3607,11 +3576,12 @@ where
// U+0020 SPACE // U+0020 SPACE
// Ignore the character. // Ignore the character.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// Switch to the data state. Emit the current DOCTYPE token. // Switch to the data state. Emit the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.state = State::Data; self.state = State::Data;
self.emit_doctype_token(); self.emit_doctype_token();
} }
@ -3619,16 +3589,18 @@ where
// Set the current DOCTYPE token's system identifier to the empty string // Set the current DOCTYPE token's system identifier to the empty string
// (not missing), then switch to the DOCTYPE system identifier // (not missing), then switch to the DOCTYPE system identifier
// (double-quoted) state. // (double-quoted) state.
Some('"') => { Some(c @ '"') => {
self.set_doctype_token_system_id('"'); self.append_raw_to_doctype_token(c);
self.set_doctype_token_system_id();
self.state = State::DoctypeSystemIdentifierDoubleQuoted; self.state = State::DoctypeSystemIdentifierDoubleQuoted;
} }
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
// Set the current DOCTYPE token's system identifier to the empty string // Set the current DOCTYPE token's system identifier to the empty string
// (not missing), then switch to the DOCTYPE system identifier // (not missing), then switch to the DOCTYPE system identifier
// (single-quoted) state. // (single-quoted) state.
Some('\'') => { Some(c @ '\'') => {
self.set_doctype_token_system_id('\''); self.append_raw_to_doctype_token(c);
self.set_doctype_token_system_id();
self.state = State::DoctypeSystemIdentifierSingleQuoted; self.state = State::DoctypeSystemIdentifierSingleQuoted;
} }
// EOF // EOF
@ -3664,7 +3636,7 @@ where
// U+0020 SPACE // U+0020 SPACE
// Switch to the before DOCTYPE system identifier state. // Switch to the before DOCTYPE system identifier state.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
self.state = State::BeforeDoctypeSystemIdentifier; self.state = State::BeforeDoctypeSystemIdentifier;
} }
// U+0022 QUOTATION MARK (") // U+0022 QUOTATION MARK (")
@ -3672,9 +3644,10 @@ where
// Set the current DOCTYPE token's system identifier to the empty string // Set the current DOCTYPE token's system identifier to the empty string
// (not missing), then switch to the DOCTYPE system identifier // (not missing), then switch to the DOCTYPE system identifier
// (double-quoted) state. // (double-quoted) state.
Some('"') => { Some(c @ '"') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypeSystemKeyword); self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypeSystemKeyword);
self.set_doctype_token_system_id('"'); self.set_doctype_token_system_id();
self.state = State::DoctypeSystemIdentifierDoubleQuoted; self.state = State::DoctypeSystemIdentifierDoubleQuoted;
} }
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
@ -3682,16 +3655,18 @@ where
// Set the current DOCTYPE token's system identifier to the empty string // Set the current DOCTYPE token's system identifier to the empty string
// (not missing), then switch to the DOCTYPE system identifier // (not missing), then switch to the DOCTYPE system identifier
// (single-quoted) state. // (single-quoted) state.
Some('\'') => { Some(c @ '\'') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypeSystemKeyword); self.emit_error(ErrorKind::MissingWhitespaceAfterDoctypeSystemKeyword);
self.set_doctype_token_system_id('\''); self.set_doctype_token_system_id();
self.state = State::DoctypeSystemIdentifierSingleQuoted; self.state = State::DoctypeSystemIdentifierSingleQuoted;
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// This is a missing-doctype-system-identifier parse error. Set the current // This is a missing-doctype-system-identifier parse error. Set the current
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
// the current DOCTYPE token. // the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::MissingDoctypeSystemIdentifier); self.emit_error(ErrorKind::MissingDoctypeSystemIdentifier);
self.set_force_quirks(); self.set_force_quirks();
self.state = State::Data; self.state = State::Data;
@ -3730,29 +3705,32 @@ where
// U+0020 SPACE // U+0020 SPACE
// Ignore the character. // Ignore the character.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
} }
// U+0022 QUOTATION MARK (") // U+0022 QUOTATION MARK (")
// Set the current DOCTYPE token's system identifier to the empty string // Set the current DOCTYPE token's system identifier to the empty string
// (not missing), then switch to the DOCTYPE system identifier // (not missing), then switch to the DOCTYPE system identifier
// (double-quoted) state. // (double-quoted) state.
Some('"') => { Some(c @ '"') => {
self.set_doctype_token_system_id('"'); self.append_raw_to_doctype_token(c);
self.set_doctype_token_system_id();
self.state = State::DoctypeSystemIdentifierDoubleQuoted; self.state = State::DoctypeSystemIdentifierDoubleQuoted;
} }
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
// Set the current DOCTYPE token's system identifier to the empty string // Set the current DOCTYPE token's system identifier to the empty string
// (not missing), then switch to the DOCTYPE system identifier // (not missing), then switch to the DOCTYPE system identifier
// (single-quoted) state. // (single-quoted) state.
Some('\'') => { Some(c @ '\'') => {
self.set_doctype_token_system_id('\''); self.append_raw_to_doctype_token(c);
self.set_doctype_token_system_id();
self.state = State::DoctypeSystemIdentifierSingleQuoted; self.state = State::DoctypeSystemIdentifierSingleQuoted;
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// This is a missing-doctype-system-identifier parse error. Set the current // This is a missing-doctype-system-identifier parse error. Set the current
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
// the current DOCTYPE token. // the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::EofInDoctype); self.emit_error(ErrorKind::EofInDoctype);
self.set_force_quirks(); self.set_force_quirks();
self.state = State::Data; self.state = State::Data;
@ -3787,7 +3765,8 @@ where
match self.consume_next_char() { match self.consume_next_char() {
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
// Switch to the after DOCTYPE system identifier state. // Switch to the after DOCTYPE system identifier state.
Some('"') => { Some(c @ '"') => {
self.append_raw_to_doctype_token(c);
self.state = State::AfterDoctypeSystemIdentifier; self.state = State::AfterDoctypeSystemIdentifier;
} }
// U+0000 NULL // U+0000 NULL
@ -3795,19 +3774,16 @@ where
// REPLACEMENT CHARACTER character to the current DOCTYPE token's system // REPLACEMENT CHARACTER character to the current DOCTYPE token's system
// identifier. // identifier.
Some(c @ '\x00') => { Some(c @ '\x00') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::UnexpectedNullCharacter); self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.append_to_doctype_token( self.append_to_doctype_token(None, None, Some(REPLACEMENT_CHARACTER));
None,
None,
None,
Some((REPLACEMENT_CHARACTER, c)),
);
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// This is an abrupt-doctype-system-identifier parse error. Set the current // This is an abrupt-doctype-system-identifier parse error. Set the current
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
// the current DOCTYPE token. // the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::AbruptDoctypeSystemIdentifier); self.emit_error(ErrorKind::AbruptDoctypeSystemIdentifier);
self.set_force_quirks(); self.set_force_quirks();
self.state = State::Data; self.state = State::Data;
@ -3830,7 +3806,8 @@ where
// identifier. // identifier.
Some(c) => { Some(c) => {
self.validate_input_stream_character(c); self.validate_input_stream_character(c);
self.append_to_doctype_token(None, None, None, Some((c, c))); self.append_raw_to_doctype_token(c);
self.append_to_doctype_token(None, None, Some(c));
} }
} }
} }
@ -3840,7 +3817,8 @@ where
match self.consume_next_char() { match self.consume_next_char() {
// U+0027 APOSTROPHE (') // U+0027 APOSTROPHE (')
// Switch to the after DOCTYPE system identifier state. // Switch to the after DOCTYPE system identifier state.
Some('\'') => { Some(c @ '\'') => {
self.append_raw_to_doctype_token(c);
self.state = State::AfterDoctypeSystemIdentifier; self.state = State::AfterDoctypeSystemIdentifier;
} }
// U+0000 NULL // U+0000 NULL
@ -3848,19 +3826,16 @@ where
// REPLACEMENT CHARACTER character to the current DOCTYPE token's system // REPLACEMENT CHARACTER character to the current DOCTYPE token's system
// identifier. // identifier.
Some(c @ '\x00') => { Some(c @ '\x00') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::UnexpectedNullCharacter); self.emit_error(ErrorKind::UnexpectedNullCharacter);
self.append_to_doctype_token( self.append_to_doctype_token(None, None, Some(REPLACEMENT_CHARACTER));
None,
None,
None,
Some((REPLACEMENT_CHARACTER, c)),
);
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// This is an abrupt-doctype-system-identifier parse error. Set the current // This is an abrupt-doctype-system-identifier parse error. Set the current
// DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit // DOCTYPE token's force-quirks flag to on. Switch to the data state. Emit
// the current DOCTYPE token. // the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::AbruptDoctypeSystemIdentifier); self.emit_error(ErrorKind::AbruptDoctypeSystemIdentifier);
self.set_force_quirks(); self.set_force_quirks();
self.state = State::Data; self.state = State::Data;
@ -3883,7 +3858,8 @@ where
// identifier. // identifier.
Some(c) => { Some(c) => {
self.validate_input_stream_character(c); self.validate_input_stream_character(c);
self.append_to_doctype_token(None, None, None, Some((c, c))); self.append_raw_to_doctype_token(c);
self.append_to_doctype_token(None, None, Some(c));
} }
} }
} }
@ -3897,11 +3873,12 @@ where
// U+0020 SPACE // U+0020 SPACE
// Ignore the character. // Ignore the character.
Some(c) if is_spacy(c) => { Some(c) if is_spacy(c) => {
self.skip_next_lf(c); self.append_raw_to_doctype_token(c);
} }
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// Switch to the data state. Emit the current DOCTYPE token. // Switch to the data state. Emit the current DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.state = State::Data; self.state = State::Data;
self.emit_doctype_token(); self.emit_doctype_token();
} }
@ -3933,13 +3910,15 @@ where
match self.consume_next_char() { match self.consume_next_char() {
// U+003E GREATER-THAN SIGN (>) // U+003E GREATER-THAN SIGN (>)
// Switch to the data state. Emit the DOCTYPE token. // Switch to the data state. Emit the DOCTYPE token.
Some('>') => { Some(c @ '>') => {
self.append_raw_to_doctype_token(c);
self.state = State::Data; self.state = State::Data;
self.emit_doctype_token(); self.emit_doctype_token();
} }
// U+0000 NULL // U+0000 NULL
// This is an unexpected-null-character parse error. Ignore the character. // This is an unexpected-null-character parse error. Ignore the character.
Some('\x00') => { Some(c @ '\x00') => {
self.append_raw_to_doctype_token(c);
self.emit_error(ErrorKind::UnexpectedNullCharacter); self.emit_error(ErrorKind::UnexpectedNullCharacter);
} }
// EOF // EOF
@ -3954,6 +3933,7 @@ where
// Ignore the character. // Ignore the character.
Some(c) => { Some(c) => {
self.validate_input_stream_character(c); self.validate_input_stream_character(c);
self.append_raw_to_doctype_token(c);
} }
} }
} }

View File

@ -401,11 +401,13 @@ where
name, name,
public_id, public_id,
system_id, system_id,
raw,
} => Child::DocumentType(DocumentType { } => Child::DocumentType(DocumentType {
span: start_span, span: start_span,
name, name,
public_id, public_id,
system_id, system_id,
raw,
}), }),
Data::Element { Data::Element {
namespace, namespace,
@ -1320,6 +1322,7 @@ where
public_id, public_id,
system_id, system_id,
force_quirks, force_quirks,
raw,
.. ..
} => { } => {
let is_html_name = matches!(name, Some(name) if name.as_ref().eq_ignore_ascii_case("html")); let is_html_name = matches!(name, Some(name) if name.as_ref().eq_ignore_ascii_case("html"));
@ -1340,6 +1343,7 @@ where
name: name.clone(), name: name.clone(),
public_id: public_id.clone(), public_id: public_id.clone(),
system_id: system_id.clone(), system_id: system_id.clone(),
raw: raw.clone(),
}, },
token_and_info.span, token_and_info.span,
); );

View File

@ -24,6 +24,7 @@ pub enum Data {
name: Option<JsWord>, name: Option<JsWord>,
public_id: Option<JsWord>, public_id: Option<JsWord>,
system_id: Option<JsWord>, system_id: Option<JsWord>,
raw: Option<JsWord>,
}, },
Element { Element {
namespace: Namespace, namespace: Namespace,

View File

@ -586,21 +586,8 @@ fn html5lib_test_tokenizer(input: PathBuf) {
let mut new_token = token_and_span.unwrap().token.clone(); let mut new_token = token_and_span.unwrap().token.clone();
match new_token { match new_token {
Token::Doctype { Token::Doctype { ref mut raw, .. } => {
ref mut raw_keyword, *raw = None;
ref mut raw_name,
ref mut public_quote,
ref mut raw_public_keyword,
ref mut system_quote,
ref mut raw_system_keyword,
..
} => {
*raw_keyword = None;
*raw_name = None;
*public_quote = None;
*raw_public_keyword = None;
*system_quote = None;
*raw_system_keyword = None;
} }
Token::StartTag { Token::StartTag {
ref mut raw_tag_name, ref mut raw_tag_name,
@ -678,16 +665,11 @@ fn html5lib_test_tokenizer(input: PathBuf) {
.expect("failed to deserialize"); .expect("failed to deserialize");
vec![Token::Doctype { vec![Token::Doctype {
raw_keyword: None,
name: name.map(|v| v.into()), name: name.map(|v| v.into()),
raw_name: None,
force_quirks: !correctness, force_quirks: !correctness,
raw_public_keyword: None,
public_quote: None,
public_id: public_id.map(|v| v.into()), public_id: public_id.map(|v| v.into()),
raw_system_keyword: None,
system_quote: None,
system_id: system_id.map(|v| v.into()), system_id: system_id.map(|v| v.into()),
raw: None,
}] }]
} }
"StartTag" => { "StartTag" => {

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": "about:legacy-compat" "systemId": "about:legacy-compat",
"raw": "<!DOCTYPE html SYSTEM \"about:legacy-compat\">"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE HTML>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype\rhtml>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype\nhtml>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!DOCTYPE html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

View File

@ -16,7 +16,8 @@
}, },
"name": "html", "name": "html",
"publicId": null, "publicId": null,
"systemId": null "systemId": null,
"raw": "<!doctype html>"
}, },
{ {
"type": "Element", "type": "Element",

Some files were not shown because too many files have changed in this diff Show More