mirror of
https://github.com/swc-project/swc.git
synced 2024-10-04 20:28:43 +03:00
perf(html/parser): Optimize usage of buffers (#6590)
This commit is contained in:
parent
d9c1c3a9bf
commit
d6e961368b
@ -199,7 +199,7 @@ where
|
|||||||
|
|
||||||
doctype.push('>');
|
doctype.push('>');
|
||||||
|
|
||||||
write_raw!(self, n.span, &doctype);
|
write_multiline_raw!(self, n.span, &doctype);
|
||||||
formatting_newline!(self);
|
formatting_newline!(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,7 +4,7 @@ use criterion::{black_box, criterion_group, criterion_main, Bencher, Criterion};
|
|||||||
use swc_common::{input::StringInput, FileName};
|
use swc_common::{input::StringInput, FileName};
|
||||||
use swc_html_parser::lexer::Lexer;
|
use swc_html_parser::lexer::Lexer;
|
||||||
|
|
||||||
fn bench_lexer(b: &mut Bencher, src: &'static str) {
|
fn bench_document(b: &mut Bencher, src: &'static str) {
|
||||||
let _ = ::testing::run_test(false, |cm, _| {
|
let _ = ::testing::run_test(false, |cm, _| {
|
||||||
let fm = cm.new_source_file(FileName::Anon, src.into());
|
let fm = cm.new_source_file(FileName::Anon, src.into());
|
||||||
|
|
||||||
@ -20,18 +20,30 @@ fn bench_lexer(b: &mut Bencher, src: &'static str) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn run(c: &mut Criterion, id: &str, src: &'static str) {
|
||||||
|
c.bench_function(&format!("html/lexer/{}", id), |b| {
|
||||||
|
bench_document(b, src);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
fn bench_files(c: &mut Criterion) {
|
fn bench_files(c: &mut Criterion) {
|
||||||
c.bench_function("html/lexer/css_2021_spec", |b| {
|
run(
|
||||||
bench_lexer(b, include_str!("./files/css_2021_spec.html"))
|
c,
|
||||||
});
|
"css_2021_spec",
|
||||||
|
include_str!("./files/css_2021_spec.html"),
|
||||||
|
);
|
||||||
|
|
||||||
c.bench_function("html/lexer/github_com_17_05_2022", |b| {
|
run(
|
||||||
bench_lexer(b, include_str!("./files/github_com_17_05_2022.html"))
|
c,
|
||||||
});
|
"github_com_17_05_2022",
|
||||||
|
include_str!("./files/github_com_17_05_2022.html"),
|
||||||
|
);
|
||||||
|
|
||||||
c.bench_function("html/lexer/stackoverflow_com_17_05_2022", |b| {
|
run(
|
||||||
bench_lexer(b, include_str!("./files/stackoverflow_com_17_05_2022.html"))
|
c,
|
||||||
});
|
"stackoverflow_com_17_05_2022",
|
||||||
|
include_str!("./files/stackoverflow_com_17_05_2022.html"),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group!(benches, bench_files);
|
criterion_group!(benches, bench_files);
|
||||||
|
@ -51,30 +51,52 @@ fn bench_document_fragment(b: &mut Bencher, src: &'static str) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn run_document(c: &mut Criterion, id: &str, src: &'static str) {
|
||||||
|
c.bench_function(&format!("html/parser/{}", id), |b| {
|
||||||
|
bench_document(b, src);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
fn run_document_fragment(c: &mut Criterion, id: &str, src: &'static str) {
|
||||||
|
c.bench_function(&format!("html/parser/{}", id), |b| {
|
||||||
|
bench_document_fragment(b, src);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
fn bench_files(c: &mut Criterion) {
|
fn bench_files(c: &mut Criterion) {
|
||||||
c.bench_function("html/parser_document/css_2021_spec", |b| {
|
run_document(
|
||||||
bench_document(b, include_str!("./files/css_2021_spec.html"))
|
c,
|
||||||
});
|
"parser_document/css_2021_spec",
|
||||||
|
include_str!("./files/css_2021_spec.html"),
|
||||||
|
);
|
||||||
|
|
||||||
c.bench_function("html/parser_document/github_com_17_05_2022", |b| {
|
run_document(
|
||||||
bench_document(b, include_str!("./files/github_com_17_05_2022.html"))
|
c,
|
||||||
});
|
"parser_document/github_com_17_05_2022",
|
||||||
|
include_str!("./files/github_com_17_05_2022.html"),
|
||||||
|
);
|
||||||
|
|
||||||
c.bench_function("html/parser_document/stackoverflow_com_17_05_2022", |b| {
|
run_document(
|
||||||
bench_document(b, include_str!("./files/stackoverflow_com_17_05_2022.html"))
|
c,
|
||||||
});
|
"parser_document/stackoverflow_com_17_05_2022",
|
||||||
|
include_str!("./files/stackoverflow_com_17_05_2022.html"),
|
||||||
|
);
|
||||||
|
|
||||||
c.bench_function("html/parser_document_fragment/css_2021_spec", |b| {
|
run_document_fragment(
|
||||||
bench_document_fragment(b, include_str!("./files/css_2021_spec.html"))
|
c,
|
||||||
});
|
"parser_document_fragment/css_2021_spec",
|
||||||
|
include_str!("./files/css_2021_spec.html"),
|
||||||
|
);
|
||||||
|
|
||||||
c.bench_function("html/parser_document_fragment/github_com_17_05_2022", |b| {
|
run_document_fragment(
|
||||||
bench_document_fragment(b, include_str!("./files/github_com_17_05_2022.html"))
|
c,
|
||||||
});
|
"parser_document_fragment/github_com_17_05_2022",
|
||||||
|
include_str!("./files/github_com_17_05_2022.html"),
|
||||||
|
);
|
||||||
|
|
||||||
c.bench_function(
|
run_document_fragment(
|
||||||
"html/parser_document_fragment/stackoverflow_com_17_05_2022",
|
c,
|
||||||
|b| bench_document_fragment(b, include_str!("./files/stackoverflow_com_17_05_2022.html")),
|
"parser_document_fragment/stackoverflow_com_17_05_2022",
|
||||||
|
include_str!("./files/stackoverflow_com_17_05_2022.html"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -225,9 +225,9 @@ where
|
|||||||
fn validate_input_stream_character(&mut self, c: char) {
|
fn validate_input_stream_character(&mut self, c: char) {
|
||||||
let code = c as u32;
|
let code = c as u32;
|
||||||
|
|
||||||
if (0xd800..=0xdfff).contains(&code) {
|
if is_surrogate(code) {
|
||||||
self.emit_error(ErrorKind::SurrogateInInputStream);
|
self.emit_error(ErrorKind::SurrogateInInputStream);
|
||||||
} else if code != 0x00 && is_control(code) {
|
} else if is_allowed_control_character(code) {
|
||||||
self.emit_error(ErrorKind::ControlCharacterInInputStream);
|
self.emit_error(ErrorKind::ControlCharacterInInputStream);
|
||||||
} else if is_noncharacter(code) {
|
} else if is_noncharacter(code) {
|
||||||
self.emit_error(ErrorKind::NoncharacterInInputStream);
|
self.emit_error(ErrorKind::NoncharacterInInputStream);
|
||||||
@ -432,6 +432,86 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn consume_and_append_to_doctype_token_name<F>(&mut self, c: char, f: F)
|
||||||
|
where
|
||||||
|
F: Fn(char) -> bool,
|
||||||
|
{
|
||||||
|
let b = self.buf.clone();
|
||||||
|
let mut buf = b.borrow_mut();
|
||||||
|
let b = self.sub_buf.clone();
|
||||||
|
let mut sub_buf = b.borrow_mut();
|
||||||
|
|
||||||
|
buf.push(c.to_ascii_lowercase());
|
||||||
|
sub_buf.push(c);
|
||||||
|
|
||||||
|
let value = self.input.uncons_while(f);
|
||||||
|
|
||||||
|
buf.push_str(&value.to_ascii_lowercase());
|
||||||
|
sub_buf.push_str(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_and_append_to_doctype_token_public_id<F>(&mut self, c: char, f: F)
|
||||||
|
where
|
||||||
|
F: Fn(char) -> bool,
|
||||||
|
{
|
||||||
|
let b = self.buf.clone();
|
||||||
|
let mut buf = b.borrow_mut();
|
||||||
|
let b = self.sub_buf.clone();
|
||||||
|
let mut sub_buf = b.borrow_mut();
|
||||||
|
|
||||||
|
let is_cr = c == '\r';
|
||||||
|
|
||||||
|
if is_cr {
|
||||||
|
buf.push('\n');
|
||||||
|
sub_buf.push(c);
|
||||||
|
|
||||||
|
if self.input.cur() == Some('\n') {
|
||||||
|
self.input.bump();
|
||||||
|
|
||||||
|
sub_buf.push('\n');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
buf.push(c);
|
||||||
|
sub_buf.push(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = self.input.uncons_while(f);
|
||||||
|
|
||||||
|
buf.push_str(value);
|
||||||
|
sub_buf.push_str(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_and_append_to_doctype_token_system_id<F>(&mut self, c: char, f: F)
|
||||||
|
where
|
||||||
|
F: Fn(char) -> bool,
|
||||||
|
{
|
||||||
|
let b = self.buf.clone();
|
||||||
|
let mut buf = b.borrow_mut();
|
||||||
|
let b = self.sub_buf.clone();
|
||||||
|
let mut sub_buf = b.borrow_mut();
|
||||||
|
|
||||||
|
let is_cr = c == '\r';
|
||||||
|
|
||||||
|
if is_cr {
|
||||||
|
buf.push('\n');
|
||||||
|
sub_buf.push(c);
|
||||||
|
|
||||||
|
if self.input.cur() == Some('\n') {
|
||||||
|
self.input.bump();
|
||||||
|
|
||||||
|
sub_buf.push('\n');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
buf.push(c);
|
||||||
|
sub_buf.push(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = self.input.uncons_while(f);
|
||||||
|
|
||||||
|
buf.push_str(value);
|
||||||
|
sub_buf.push_str(value);
|
||||||
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn set_doctype_token_force_quirks(&mut self) {
|
fn set_doctype_token_force_quirks(&mut self) {
|
||||||
if let Some(Token::Doctype { force_quirks, .. }) = &mut self.current_token {
|
if let Some(Token::Doctype { force_quirks, .. }) = &mut self.current_token {
|
||||||
@ -550,6 +630,24 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn consume_and_append_to_tag_token_name<F>(&mut self, c: char, f: F)
|
||||||
|
where
|
||||||
|
F: Fn(char) -> bool,
|
||||||
|
{
|
||||||
|
let b = self.buf.clone();
|
||||||
|
let mut buf = b.borrow_mut();
|
||||||
|
let b = self.sub_buf.clone();
|
||||||
|
let mut sub_buf = b.borrow_mut();
|
||||||
|
|
||||||
|
buf.push(c.to_ascii_lowercase());
|
||||||
|
sub_buf.push(c);
|
||||||
|
|
||||||
|
let value = self.input.uncons_while(f);
|
||||||
|
|
||||||
|
buf.push_str(&value.to_ascii_lowercase());
|
||||||
|
sub_buf.push_str(value);
|
||||||
|
}
|
||||||
|
|
||||||
fn finish_tag_token_name(&mut self) {
|
fn finish_tag_token_name(&mut self) {
|
||||||
if let Some(
|
if let Some(
|
||||||
Token::StartTag {
|
Token::StartTag {
|
||||||
@ -603,6 +701,46 @@ where
|
|||||||
sub_buf.push(raw_c);
|
sub_buf.push(raw_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn consume_and_append_to_attribute_token_name<F>(&mut self, c: char, f: F)
|
||||||
|
where
|
||||||
|
F: FnMut(char) -> bool,
|
||||||
|
{
|
||||||
|
let b = self.buf.clone();
|
||||||
|
let mut buf = b.borrow_mut();
|
||||||
|
let b = self.sub_buf.clone();
|
||||||
|
let mut sub_buf = b.borrow_mut();
|
||||||
|
|
||||||
|
buf.push(c.to_ascii_lowercase());
|
||||||
|
sub_buf.push(c);
|
||||||
|
|
||||||
|
let value = self.input.uncons_while(f);
|
||||||
|
|
||||||
|
buf.push_str(&value.to_ascii_lowercase());
|
||||||
|
sub_buf.push_str(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn consume_and_append_to_attribute_token_name_and_temp_buf<F>(&mut self, c: char, f: F)
|
||||||
|
where
|
||||||
|
F: FnMut(char) -> bool,
|
||||||
|
{
|
||||||
|
let b = self.buf.clone();
|
||||||
|
let mut buf = b.borrow_mut();
|
||||||
|
let b = self.sub_buf.clone();
|
||||||
|
let mut sub_buf = b.borrow_mut();
|
||||||
|
|
||||||
|
buf.push(c.to_ascii_lowercase());
|
||||||
|
sub_buf.push(c);
|
||||||
|
|
||||||
|
self.temporary_buffer.push(c);
|
||||||
|
|
||||||
|
let value = self.input.uncons_while(f);
|
||||||
|
|
||||||
|
buf.push_str(&value.to_ascii_lowercase());
|
||||||
|
sub_buf.push_str(value);
|
||||||
|
|
||||||
|
self.temporary_buffer.push_str(value);
|
||||||
|
}
|
||||||
|
|
||||||
fn finish_attribute_token_name(&mut self) {
|
fn finish_attribute_token_name(&mut self) {
|
||||||
if let Some(attribute_start_position) = self.attribute_start_position {
|
if let Some(attribute_start_position) = self.attribute_start_position {
|
||||||
if let Some(
|
if let Some(
|
||||||
@ -672,6 +810,37 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn consume_and_append_to_attribute_token_value<F>(&mut self, c: char, f: F)
|
||||||
|
where
|
||||||
|
F: FnMut(char) -> bool,
|
||||||
|
{
|
||||||
|
let b = self.buf.clone();
|
||||||
|
let mut buf = b.borrow_mut();
|
||||||
|
let b = self.sub_buf.clone();
|
||||||
|
let mut sub_buf = b.borrow_mut();
|
||||||
|
|
||||||
|
let is_cr = c == '\r';
|
||||||
|
|
||||||
|
if is_cr {
|
||||||
|
buf.push('\n');
|
||||||
|
sub_buf.push(c);
|
||||||
|
|
||||||
|
if self.input.cur() == Some('\n') {
|
||||||
|
self.input.bump();
|
||||||
|
|
||||||
|
sub_buf.push('\n');
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
buf.push(c);
|
||||||
|
sub_buf.push(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
let value = self.input.uncons_while(f);
|
||||||
|
|
||||||
|
buf.push_str(value);
|
||||||
|
sub_buf.push_str(value);
|
||||||
|
}
|
||||||
|
|
||||||
fn finish_attribute_token_value(&mut self) {
|
fn finish_attribute_token_value(&mut self) {
|
||||||
if let Some(attribute_start_position) = self.attribute_start_position {
|
if let Some(attribute_start_position) = self.attribute_start_position {
|
||||||
if let Some(
|
if let Some(
|
||||||
@ -770,7 +939,10 @@ where
|
|||||||
sub_buf.push(raw_c);
|
sub_buf.push(raw_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn handle_raw_and_append_to_comment_token(&mut self, c: char) {
|
fn consume_and_append_to_comment_token<F>(&mut self, c: char, f: F)
|
||||||
|
where
|
||||||
|
F: Fn(char) -> bool,
|
||||||
|
{
|
||||||
let b = self.buf.clone();
|
let b = self.buf.clone();
|
||||||
let mut buf = b.borrow_mut();
|
let mut buf = b.borrow_mut();
|
||||||
let b = self.sub_buf.clone();
|
let b = self.sub_buf.clone();
|
||||||
@ -791,6 +963,11 @@ where
|
|||||||
buf.push(c);
|
buf.push(c);
|
||||||
sub_buf.push(c);
|
sub_buf.push(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let value = self.input.uncons_while(f);
|
||||||
|
|
||||||
|
buf.push_str(value);
|
||||||
|
sub_buf.push_str(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn emit_comment_token(&mut self, raw_end: Option<&str>) {
|
fn emit_comment_token(&mut self, raw_end: Option<&str>) {
|
||||||
@ -1150,7 +1327,7 @@ where
|
|||||||
// Switch to the before attribute name state.
|
// Switch to the before attribute name state.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.finish_tag_token_name();
|
self.finish_tag_token_name();
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
self.state = State::BeforeAttributeName;
|
self.state = State::BeforeAttributeName;
|
||||||
}
|
}
|
||||||
// U+002F SOLIDUS (/)
|
// U+002F SOLIDUS (/)
|
||||||
@ -1170,7 +1347,7 @@ where
|
|||||||
// Append the lowercase version of the current input character (add 0x0020
|
// Append the lowercase version of the current input character (add 0x0020
|
||||||
// to the character's code point) to the current tag token's tag name.
|
// to the character's code point) to the current tag token's tag name.
|
||||||
Some(c) if is_ascii_upper_alpha(c) => {
|
Some(c) if is_ascii_upper_alpha(c) => {
|
||||||
self.append_to_tag_token_name(c.to_ascii_lowercase(), c);
|
self.consume_and_append_to_tag_token_name(c, is_ascii_upper_alpha);
|
||||||
}
|
}
|
||||||
// U+0000 NULL
|
// U+0000 NULL
|
||||||
// This is an unexpected-null-character parse error. Append a U+FFFD
|
// This is an unexpected-null-character parse error. Append a U+FFFD
|
||||||
@ -1192,7 +1369,17 @@ where
|
|||||||
// Append the current input character to the current tag token's tag name.
|
// Append the current input character to the current tag token's tag name.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
self.append_to_tag_token_name(c, c);
|
self.consume_and_append_to_tag_token_name(c, |c| {
|
||||||
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// List of characters from above to stop consumption and a certain
|
||||||
|
// branch took control
|
||||||
|
!is_spacy(c)
|
||||||
|
&& !matches!(c, '/' | '>' | '\x00')
|
||||||
|
&& !is_ascii_upper_alpha(c)
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1257,7 +1444,7 @@ where
|
|||||||
// to the before attribute name state. Otherwise, treat it as per the
|
// to the before attribute name state. Otherwise, treat it as per the
|
||||||
// "anything else" entry below.
|
// "anything else" entry below.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
|
|
||||||
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
|
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
|
||||||
self.finish_tag_token_name();
|
self.finish_tag_token_name();
|
||||||
@ -1296,15 +1483,19 @@ where
|
|||||||
// to the character's code point) to the current tag token's tag name.
|
// to the character's code point) to the current tag token's tag name.
|
||||||
// Append the current input character to the temporary buffer.
|
// Append the current input character to the temporary buffer.
|
||||||
Some(c) if is_ascii_upper_alpha(c) => {
|
Some(c) if is_ascii_upper_alpha(c) => {
|
||||||
self.append_to_tag_token_name(c.to_ascii_lowercase(), c);
|
self.consume_and_append_to_attribute_token_name_and_temp_buf(
|
||||||
self.temporary_buffer.push(c);
|
c,
|
||||||
|
is_ascii_upper_alpha,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// ASCII lower alpha
|
// ASCII lower alpha
|
||||||
// Append the current input character to the current tag token's tag name.
|
// Append the current input character to the current tag token's tag name.
|
||||||
// Append the current input character to the temporary buffer.
|
// Append the current input character to the temporary buffer.
|
||||||
Some(c) if is_ascii_lower_alpha(c) => {
|
Some(c) if is_ascii_lower_alpha(c) => {
|
||||||
self.append_to_tag_token_name(c, c);
|
self.consume_and_append_to_attribute_token_name_and_temp_buf(
|
||||||
self.temporary_buffer.push(c);
|
c,
|
||||||
|
is_ascii_lower_alpha,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// Anything else
|
// Anything else
|
||||||
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
||||||
@ -1377,7 +1568,7 @@ where
|
|||||||
// to the before attribute name state. Otherwise, treat it as per the
|
// to the before attribute name state. Otherwise, treat it as per the
|
||||||
// "anything else" entry below.
|
// "anything else" entry below.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
|
|
||||||
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
|
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
|
||||||
self.finish_tag_token_name();
|
self.finish_tag_token_name();
|
||||||
@ -1416,15 +1607,19 @@ where
|
|||||||
// to the character's code point) to the current tag token's tag name.
|
// to the character's code point) to the current tag token's tag name.
|
||||||
// Append the current input character to the temporary buffer.
|
// Append the current input character to the temporary buffer.
|
||||||
Some(c) if is_ascii_upper_alpha(c) => {
|
Some(c) if is_ascii_upper_alpha(c) => {
|
||||||
self.append_to_tag_token_name(c.to_ascii_lowercase(), c);
|
self.consume_and_append_to_attribute_token_name_and_temp_buf(
|
||||||
self.temporary_buffer.push(c);
|
c,
|
||||||
|
is_ascii_upper_alpha,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// ASCII lower alpha
|
// ASCII lower alpha
|
||||||
// Append the current input character to the current tag token's tag name.
|
// Append the current input character to the current tag token's tag name.
|
||||||
// Append the current input character to the temporary buffer.
|
// Append the current input character to the temporary buffer.
|
||||||
Some(c) if is_ascii_lower_alpha(c) => {
|
Some(c) if is_ascii_lower_alpha(c) => {
|
||||||
self.append_to_tag_token_name(c, c);
|
self.consume_and_append_to_attribute_token_name_and_temp_buf(
|
||||||
self.temporary_buffer.push(c);
|
c,
|
||||||
|
is_ascii_lower_alpha,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// Anything else
|
// Anything else
|
||||||
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
||||||
@ -1505,7 +1700,7 @@ where
|
|||||||
// to the before attribute name state. Otherwise, treat it as per the
|
// to the before attribute name state. Otherwise, treat it as per the
|
||||||
// "anything else" entry below.
|
// "anything else" entry below.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
|
|
||||||
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
|
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
|
||||||
self.finish_tag_token_name();
|
self.finish_tag_token_name();
|
||||||
@ -1544,15 +1739,19 @@ where
|
|||||||
// to the character's code point) to the current tag token's tag name.
|
// to the character's code point) to the current tag token's tag name.
|
||||||
// Append the current input character to the temporary buffer.
|
// Append the current input character to the temporary buffer.
|
||||||
Some(c) if is_ascii_upper_alpha(c) => {
|
Some(c) if is_ascii_upper_alpha(c) => {
|
||||||
self.append_to_tag_token_name(c.to_ascii_lowercase(), c);
|
self.consume_and_append_to_attribute_token_name_and_temp_buf(
|
||||||
self.temporary_buffer.push(c);
|
c,
|
||||||
|
is_ascii_upper_alpha,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// ASCII lower alpha
|
// ASCII lower alpha
|
||||||
// Append the current input character to the current tag token's tag name.
|
// Append the current input character to the current tag token's tag name.
|
||||||
// Append the current input character to the temporary buffer.
|
// Append the current input character to the temporary buffer.
|
||||||
Some(c) if is_ascii_lower_alpha(c) => {
|
Some(c) if is_ascii_lower_alpha(c) => {
|
||||||
self.append_to_tag_token_name(c, c);
|
self.consume_and_append_to_attribute_token_name_and_temp_buf(
|
||||||
self.temporary_buffer.push(c);
|
c,
|
||||||
|
is_ascii_lower_alpha,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// Anything else
|
// Anything else
|
||||||
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
||||||
@ -1801,7 +2000,7 @@ where
|
|||||||
// to the before attribute name state. Otherwise, treat it as per the
|
// to the before attribute name state. Otherwise, treat it as per the
|
||||||
// "anything else" entry below.
|
// "anything else" entry below.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
|
|
||||||
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
|
if self.current_end_tag_token_is_an_appropriate_end_tag_token() {
|
||||||
self.finish_tag_token_name();
|
self.finish_tag_token_name();
|
||||||
@ -1840,15 +2039,19 @@ where
|
|||||||
// to the character's code point) to the current tag token's tag name.
|
// to the character's code point) to the current tag token's tag name.
|
||||||
// Append the current input character to the temporary buffer.
|
// Append the current input character to the temporary buffer.
|
||||||
Some(c) if is_ascii_upper_alpha(c) => {
|
Some(c) if is_ascii_upper_alpha(c) => {
|
||||||
self.append_to_tag_token_name(c.to_ascii_lowercase(), c);
|
self.consume_and_append_to_attribute_token_name_and_temp_buf(
|
||||||
self.temporary_buffer.push(c);
|
c,
|
||||||
|
is_ascii_upper_alpha,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// ASCII lower alpha
|
// ASCII lower alpha
|
||||||
// Append the current input character to the current tag token's tag name.
|
// Append the current input character to the current tag token's tag name.
|
||||||
// Append the current input character to the temporary buffer.
|
// Append the current input character to the temporary buffer.
|
||||||
Some(c) if is_ascii_lower_alpha(c) => {
|
Some(c) if is_ascii_lower_alpha(c) => {
|
||||||
self.append_to_tag_token_name(c, c);
|
self.consume_and_append_to_attribute_token_name_and_temp_buf(
|
||||||
self.temporary_buffer.push(c);
|
c,
|
||||||
|
is_ascii_lower_alpha,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
// Anything else
|
// Anything else
|
||||||
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
// Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character
|
||||||
@ -2143,7 +2346,7 @@ where
|
|||||||
// U+0020 SPACE
|
// U+0020 SPACE
|
||||||
// Ignore the character.
|
// Ignore the character.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
}
|
}
|
||||||
// U+002F SOLIDUS (/)
|
// U+002F SOLIDUS (/)
|
||||||
// U+003E GREATER-THAN SIGN (>)
|
// U+003E GREATER-THAN SIGN (>)
|
||||||
@ -2192,7 +2395,7 @@ where
|
|||||||
// Reconsume in the after attribute name state.
|
// Reconsume in the after attribute name state.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.finish_attribute_token_name();
|
self.finish_attribute_token_name();
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
self.reconsume_in_state(State::AfterAttributeName);
|
self.reconsume_in_state(State::AfterAttributeName);
|
||||||
}
|
}
|
||||||
Some('/' | '>') | None => {
|
Some('/' | '>') | None => {
|
||||||
@ -2209,7 +2412,9 @@ where
|
|||||||
// Append the lowercase version of the current input character (add 0x0020
|
// Append the lowercase version of the current input character (add 0x0020
|
||||||
// to the character's code point) to the current attribute's name.
|
// to the character's code point) to the current attribute's name.
|
||||||
Some(c) if is_ascii_upper_alpha(c) => {
|
Some(c) if is_ascii_upper_alpha(c) => {
|
||||||
self.append_to_attribute_token_name(c.to_ascii_lowercase(), c);
|
self.consume_and_append_to_attribute_token_name(c, |c| {
|
||||||
|
is_ascii_upper_alpha(c)
|
||||||
|
});
|
||||||
}
|
}
|
||||||
// U+0000 NULL
|
// U+0000 NULL
|
||||||
// This is an unexpected-null-character parse error. Append a U+FFFD
|
// This is an unexpected-null-character parse error. Append a U+FFFD
|
||||||
@ -2232,8 +2437,17 @@ where
|
|||||||
// Append the current input character to the current attribute's name.
|
// Append the current input character to the current attribute's name.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
|
self.consume_and_append_to_attribute_token_name(c, |c| {
|
||||||
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
anything_else(self, c);
|
// List of characters from above to stop consumption and a certain
|
||||||
|
// branch took control
|
||||||
|
!is_spacy(c)
|
||||||
|
&& !matches!(c, '/' | '>' | '=' | '\x00' | '"' | '\'' | '<')
|
||||||
|
&& !is_ascii_upper_alpha(c)
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2257,7 +2471,7 @@ where
|
|||||||
// U+0020 SPACE
|
// U+0020 SPACE
|
||||||
// Ignore the character.
|
// Ignore the character.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
}
|
}
|
||||||
// U+002F SOLIDUS (/)
|
// U+002F SOLIDUS (/)
|
||||||
// Switch to the self-closing start tag state.
|
// Switch to the self-closing start tag state.
|
||||||
@ -2303,7 +2517,7 @@ where
|
|||||||
// U+0020 SPACE
|
// U+0020 SPACE
|
||||||
// Ignore the character.
|
// Ignore the character.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
}
|
}
|
||||||
// U+0022 QUOTATION MARK (")
|
// U+0022 QUOTATION MARK (")
|
||||||
// Switch to the attribute value (double-quoted) state.
|
// Switch to the attribute value (double-quoted) state.
|
||||||
@ -2369,7 +2583,15 @@ where
|
|||||||
// Append the current input character to the current attribute's value.
|
// Append the current input character to the current attribute's value.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
self.append_to_attribute_token_value(Some(c), Some(c));
|
self.consume_and_append_to_attribute_token_value(c, |c| {
|
||||||
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// List of characters from above to stop consumption and a certain
|
||||||
|
// branch took control, `\r` is in list because of newline normalization
|
||||||
|
!matches!(c, '"' | '&' | '\x00' | '\r')
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2410,7 +2632,15 @@ where
|
|||||||
// Append the current input character to the current attribute's value.
|
// Append the current input character to the current attribute's value.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
self.append_to_attribute_token_value(Some(c), Some(c));
|
self.consume_and_append_to_attribute_token_value(c, |c| {
|
||||||
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// List of characters from above to stop consumption and a certain
|
||||||
|
// branch took control, `\r` is in list because of newline normalization
|
||||||
|
!matches!(c, '\'' | '&' | '\x00' | '\r')
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2429,7 +2659,7 @@ where
|
|||||||
// Switch to the before attribute name state.
|
// Switch to the before attribute name state.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.finish_attribute_token_value();
|
self.finish_attribute_token_value();
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
self.state = State::BeforeAttributeName;
|
self.state = State::BeforeAttributeName;
|
||||||
}
|
}
|
||||||
// U+0026 AMPERSAND (&)
|
// U+0026 AMPERSAND (&)
|
||||||
@ -2479,8 +2709,19 @@ where
|
|||||||
// Append the current input character to the current attribute's value.
|
// Append the current input character to the current attribute's value.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
|
self.consume_and_append_to_attribute_token_value(c, |c| {
|
||||||
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
anything_else(self, c);
|
// List of characters from above to stop consumption and a certain
|
||||||
|
// branch took control, `\r` is in list because of newline normalization
|
||||||
|
!is_spacy(c)
|
||||||
|
&& !matches!(
|
||||||
|
c,
|
||||||
|
'&' | '>' | '\x00' | '"' | '\'' | '<' | '=' | '`' | '\r'
|
||||||
|
)
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2495,7 +2736,7 @@ where
|
|||||||
// Switch to the before attribute name state.
|
// Switch to the before attribute name state.
|
||||||
Some(c) if is_spacy(c) => {
|
Some(c) if is_spacy(c) => {
|
||||||
self.finish_attribute_token_value();
|
self.finish_attribute_token_value();
|
||||||
self.skip_next_lf(c);
|
self.skip_whitespaces(c);
|
||||||
self.state = State::BeforeAttributeName;
|
self.state = State::BeforeAttributeName;
|
||||||
}
|
}
|
||||||
// U+002F SOLIDUS (/)
|
// U+002F SOLIDUS (/)
|
||||||
@ -2599,7 +2840,15 @@ where
|
|||||||
// Append the current input character to the comment token's data.
|
// Append the current input character to the comment token's data.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
self.handle_raw_and_append_to_comment_token(c);
|
self.consume_and_append_to_comment_token(c, |c| {
|
||||||
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// List of characters from above to stop consumption and a certain
|
||||||
|
// branch took control, `\r` is in list because of newline normalization
|
||||||
|
!matches!(c, '>' | '\x00' | '\r')
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2832,7 +3081,15 @@ where
|
|||||||
// Append the current input character to the comment token's data.
|
// Append the current input character to the comment token's data.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
self.handle_raw_and_append_to_comment_token(c);
|
self.consume_and_append_to_comment_token(c, |c| {
|
||||||
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// List of characters from above to stop consumption and a certain
|
||||||
|
// branch took control, `\r` is in list because of newline normalization
|
||||||
|
!matches!(c, '<' | '-' | '\x00' | '\r')
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3157,8 +3414,7 @@ where
|
|||||||
// Append the lowercase version of the current input character (add 0x0020
|
// Append the lowercase version of the current input character (add 0x0020
|
||||||
// to the character's code point) to the current DOCTYPE token's name.
|
// to the character's code point) to the current DOCTYPE token's name.
|
||||||
Some(c) if is_ascii_upper_alpha(c) => {
|
Some(c) if is_ascii_upper_alpha(c) => {
|
||||||
self.append_raw_to_doctype_token(c);
|
self.consume_and_append_to_doctype_token_name(c, is_ascii_upper_alpha);
|
||||||
self.append_to_doctype_token(Some(c.to_ascii_lowercase()), None, None);
|
|
||||||
}
|
}
|
||||||
// U+0000 NULL
|
// U+0000 NULL
|
||||||
// This is an unexpected-null-character parse error. Append a U+FFFD
|
// This is an unexpected-null-character parse error. Append a U+FFFD
|
||||||
@ -3185,8 +3441,13 @@ where
|
|||||||
// Append the current input character to the current DOCTYPE token's name.
|
// Append the current input character to the current DOCTYPE token's name.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
self.append_raw_to_doctype_token(c);
|
self.consume_and_append_to_doctype_token_name(c, |c| {
|
||||||
self.append_to_doctype_token(Some(c), None, None);
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
!is_spacy(c) && !matches!(c, '>' | '\x00') && !is_ascii_upper_alpha(c)
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3469,8 +3730,13 @@ where
|
|||||||
// identifier.
|
// identifier.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
self.append_raw_to_doctype_token(c);
|
self.consume_and_append_to_doctype_token_public_id(c, |c| {
|
||||||
self.append_to_doctype_token(None, Some(c), None);
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
!matches!(c, '"' | '\x00' | '>' | '\r')
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3524,8 +3790,13 @@ where
|
|||||||
// identifier.
|
// identifier.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
self.append_raw_to_doctype_token(c);
|
self.consume_and_append_to_doctype_token_public_id(c, |c| {
|
||||||
self.append_to_doctype_token(None, Some(c), None);
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
!matches!(c, '\'' | '\x00' | '>' | '\r')
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3841,8 +4112,13 @@ where
|
|||||||
// identifier.
|
// identifier.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
self.append_raw_to_doctype_token(c);
|
self.consume_and_append_to_doctype_token_system_id(c, |c| {
|
||||||
self.append_to_doctype_token(None, None, Some(c));
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
!matches!(c, '"' | '\x00' | '>' | '\r')
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3896,8 +4172,13 @@ where
|
|||||||
// identifier.
|
// identifier.
|
||||||
Some(c) => {
|
Some(c) => {
|
||||||
self.validate_input_stream_character(c);
|
self.validate_input_stream_character(c);
|
||||||
self.append_raw_to_doctype_token(c);
|
self.consume_and_append_to_doctype_token_system_id(c, |c| {
|
||||||
self.append_to_doctype_token(None, None, Some(c));
|
if !is_allowed_character(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
!matches!(c, '\'' | '\x00' | '>' | '\r')
|
||||||
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -4061,7 +4342,6 @@ where
|
|||||||
// numeric character reference state.
|
// numeric character reference state.
|
||||||
Some(c @ '#') => {
|
Some(c @ '#') => {
|
||||||
self.temporary_buffer.push(c);
|
self.temporary_buffer.push(c);
|
||||||
|
|
||||||
self.state = State::NumericCharacterReference;
|
self.state = State::NumericCharacterReference;
|
||||||
}
|
}
|
||||||
// Anything else
|
// Anything else
|
||||||
@ -4532,7 +4812,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn skip_next_lf(&mut self, c: char) {
|
fn skip_whitespaces(&mut self, c: char) {
|
||||||
if c == '\r' && self.input.cur() == Some('\n') {
|
if c == '\r' && self.input.cur() == Some('\n') {
|
||||||
self.input.bump();
|
self.input.bump();
|
||||||
}
|
}
|
||||||
@ -4634,3 +4914,19 @@ fn is_ascii_lower_alpha(c: char) -> bool {
|
|||||||
fn is_ascii_alpha(c: char) -> bool {
|
fn is_ascii_alpha(c: char) -> bool {
|
||||||
is_ascii_upper_alpha(c) || is_ascii_lower_alpha(c)
|
is_ascii_upper_alpha(c) || is_ascii_lower_alpha(c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn is_allowed_control_character(c: u32) -> bool {
|
||||||
|
c != 0x00 && is_control(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn is_allowed_character(c: char) -> bool {
|
||||||
|
let c = c as u32;
|
||||||
|
|
||||||
|
if is_surrogate(c) || is_allowed_control_character(c) || is_noncharacter(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
@ -0,0 +1,6 @@
|
|||||||
|
| <!DOCTYPE html "-//W3C//DTD HTML 4.01
|
||||||
|
|
||||||
|
Transitional//EN" "">
|
||||||
|
| <html>
|
||||||
|
| <head>
|
||||||
|
| <body>
|
@ -0,0 +1,3 @@
|
|||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01
|
||||||
|
|
||||||
|
Transitional//EN">
|
@ -0,0 +1,66 @@
|
|||||||
|
{
|
||||||
|
"type": "Document",
|
||||||
|
"span": {
|
||||||
|
"start": 1,
|
||||||
|
"end": 65,
|
||||||
|
"ctxt": 0
|
||||||
|
},
|
||||||
|
"mode": "no-quirks",
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"type": "DocumentType",
|
||||||
|
"span": {
|
||||||
|
"start": 1,
|
||||||
|
"end": 65,
|
||||||
|
"ctxt": 0
|
||||||
|
},
|
||||||
|
"name": "html",
|
||||||
|
"publicId": "-//W3C//DTD HTML 4.01\n\nTransitional//EN",
|
||||||
|
"systemId": null,
|
||||||
|
"raw": "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01\n\nTransitional//EN\">"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "Element",
|
||||||
|
"span": {
|
||||||
|
"start": 0,
|
||||||
|
"end": 0,
|
||||||
|
"ctxt": 0
|
||||||
|
},
|
||||||
|
"tagName": "html",
|
||||||
|
"namespace": "http://www.w3.org/1999/xhtml",
|
||||||
|
"attributes": [],
|
||||||
|
"children": [
|
||||||
|
{
|
||||||
|
"type": "Element",
|
||||||
|
"span": {
|
||||||
|
"start": 0,
|
||||||
|
"end": 0,
|
||||||
|
"ctxt": 0
|
||||||
|
},
|
||||||
|
"tagName": "head",
|
||||||
|
"namespace": "http://www.w3.org/1999/xhtml",
|
||||||
|
"attributes": [],
|
||||||
|
"children": [],
|
||||||
|
"content": null,
|
||||||
|
"isSelfClosing": false
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "Element",
|
||||||
|
"span": {
|
||||||
|
"start": 0,
|
||||||
|
"end": 0,
|
||||||
|
"ctxt": 0
|
||||||
|
},
|
||||||
|
"tagName": "body",
|
||||||
|
"namespace": "http://www.w3.org/1999/xhtml",
|
||||||
|
"attributes": [],
|
||||||
|
"children": [],
|
||||||
|
"content": null,
|
||||||
|
"isSelfClosing": false
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"content": null,
|
||||||
|
"isSelfClosing": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
@ -0,0 +1,7 @@
|
|||||||
|
|
||||||
|
x Non conforming doctype
|
||||||
|
,-[$DIR/tests/recovery/doctype/newline/input.html:1:1]
|
||||||
|
1 | ,-> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01
|
||||||
|
2 | |
|
||||||
|
3 | `-> Transitional//EN">
|
||||||
|
`----
|
@ -0,0 +1,25 @@
|
|||||||
|
|
||||||
|
x Document
|
||||||
|
,-[$DIR/tests/recovery/doctype/newline/input.html:1:1]
|
||||||
|
1 | ,-> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01
|
||||||
|
2 | |
|
||||||
|
3 | `-> Transitional//EN">
|
||||||
|
`----
|
||||||
|
|
||||||
|
x Child
|
||||||
|
,-[$DIR/tests/recovery/doctype/newline/input.html:1:1]
|
||||||
|
1 | ,-> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01
|
||||||
|
2 | |
|
||||||
|
3 | `-> Transitional//EN">
|
||||||
|
`----
|
||||||
|
|
||||||
|
x DocumentType
|
||||||
|
,-[$DIR/tests/recovery/doctype/newline/input.html:1:1]
|
||||||
|
1 | ,-> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01
|
||||||
|
2 | |
|
||||||
|
3 | `-> Transitional//EN">
|
||||||
|
`----
|
||||||
|
|
||||||
|
x Child
|
||||||
|
|
||||||
|
x Element
|
Loading…
Reference in New Issue
Block a user