2022-05-04 08:22:58 +03:00
|
|
|
#![deny(warnings)]
|
2022-05-25 09:23:35 +03:00
|
|
|
#![allow(clippy::if_same_then_else)]
|
2022-04-04 07:12:45 +03:00
|
|
|
#![allow(clippy::needless_update)]
|
2022-05-07 19:56:38 +03:00
|
|
|
#![allow(clippy::redundant_clone)]
|
|
|
|
#![allow(clippy::while_let_on_iterator)]
|
2022-04-04 07:12:45 +03:00
|
|
|
|
2022-05-03 05:49:06 +03:00
|
|
|
use std::{fs, mem::take, path::PathBuf};
|
|
|
|
|
|
|
|
use serde_json::Value;
|
2022-07-14 07:56:31 +03:00
|
|
|
use swc_atoms::{js_word, JsWord};
|
2022-05-03 05:49:06 +03:00
|
|
|
use swc_common::{
|
|
|
|
collections::AHashSet,
|
|
|
|
errors::Handler,
|
|
|
|
input::{SourceFileInput, StringInput},
|
|
|
|
BytePos, Spanned,
|
|
|
|
};
|
2022-04-04 07:12:45 +03:00
|
|
|
use swc_html_ast::*;
|
|
|
|
use swc_html_parser::{
|
2022-05-03 05:49:06 +03:00
|
|
|
error::ErrorKind,
|
|
|
|
lexer::{Lexer, State},
|
|
|
|
parser::{input::ParserInput, PResult, Parser, ParserConfig},
|
2022-04-04 07:12:45 +03:00
|
|
|
};
|
2022-05-05 16:00:25 +03:00
|
|
|
use swc_html_visit::{Visit, VisitMut, VisitMutWith, VisitWith};
|
2022-04-04 07:12:45 +03:00
|
|
|
use testing::NormalizedOutput;
|
|
|
|
|
2022-05-12 05:58:51 +03:00
|
|
|
fn document_test(input: PathBuf, config: ParserConfig) {
|
2022-04-04 07:12:45 +03:00
|
|
|
testing::run_test2(false, |cm, handler| {
|
2022-04-28 20:27:39 +03:00
|
|
|
let json_path = input.parent().unwrap().join("output.json");
|
2022-04-04 07:12:45 +03:00
|
|
|
let fm = cm.load_file(&input).unwrap();
|
2022-05-24 09:32:15 +03:00
|
|
|
let lexer = Lexer::new(SourceFileInput::from(&*fm));
|
2022-04-04 07:12:45 +03:00
|
|
|
let mut parser = Parser::new(lexer, config);
|
2022-04-22 14:20:47 +03:00
|
|
|
let document: PResult<Document> = parser.parse_document();
|
2022-04-28 20:27:39 +03:00
|
|
|
let errors = parser.take_errors();
|
|
|
|
|
|
|
|
for err in &errors {
|
|
|
|
err.to_diagnostics(&handler).emit();
|
|
|
|
}
|
|
|
|
|
|
|
|
if !errors.is_empty() {
|
|
|
|
return Err(());
|
|
|
|
}
|
2022-04-04 07:12:45 +03:00
|
|
|
|
|
|
|
match document {
|
|
|
|
Ok(document) => {
|
|
|
|
let actual_json = serde_json::to_string_pretty(&document)
|
|
|
|
.map(NormalizedOutput::from)
|
|
|
|
.expect("failed to serialize document");
|
|
|
|
|
2022-04-28 20:27:39 +03:00
|
|
|
actual_json.compare_to_file(&json_path).unwrap();
|
2022-04-04 07:12:45 +03:00
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
Err(err) => {
|
|
|
|
let mut d = err.to_diagnostics(&handler);
|
|
|
|
|
2022-04-28 20:27:39 +03:00
|
|
|
d.note(&format!("current token = {}", parser.dump_cur()));
|
2022-04-04 07:12:45 +03:00
|
|
|
d.emit();
|
|
|
|
|
|
|
|
Err(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.unwrap();
|
|
|
|
}
|
|
|
|
|
2022-05-12 05:58:51 +03:00
|
|
|
fn document_recovery_test(input: PathBuf, config: ParserConfig) {
|
2022-04-28 20:27:39 +03:00
|
|
|
let stderr_path = input.parent().unwrap().join("output.stderr");
|
|
|
|
let mut recovered = false;
|
|
|
|
|
|
|
|
let stderr = testing::run_test2(false, |cm, handler| {
|
|
|
|
// Type annotation
|
|
|
|
if false {
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
let json_path = input.parent().unwrap().join("output.json");
|
|
|
|
let fm = cm.load_file(&input).unwrap();
|
2022-05-24 09:32:15 +03:00
|
|
|
let lexer = Lexer::new(SourceFileInput::from(&*fm));
|
2022-04-28 20:27:39 +03:00
|
|
|
let mut parser = Parser::new(lexer, config);
|
|
|
|
let document: PResult<Document> = parser.parse_document();
|
|
|
|
let errors = parser.take_errors();
|
|
|
|
|
|
|
|
for err in &errors {
|
|
|
|
err.to_diagnostics(&handler).emit();
|
|
|
|
}
|
|
|
|
|
|
|
|
if !errors.is_empty() {
|
|
|
|
recovered = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
match document {
|
|
|
|
Ok(document) => {
|
|
|
|
let actual_json = serde_json::to_string_pretty(&document)
|
|
|
|
.map(NormalizedOutput::from)
|
|
|
|
.expect("failed to serialize document");
|
|
|
|
|
|
|
|
actual_json.compare_to_file(&json_path).unwrap();
|
|
|
|
|
|
|
|
Err(())
|
|
|
|
}
|
|
|
|
Err(err) => {
|
|
|
|
let mut d = err.to_diagnostics(&handler);
|
|
|
|
|
|
|
|
d.note(&format!("current token = {}", parser.dump_cur()));
|
|
|
|
d.emit();
|
|
|
|
|
|
|
|
Err(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.unwrap_err();
|
|
|
|
|
|
|
|
if !recovered {
|
|
|
|
panic!(
|
|
|
|
"Parser should emit errors (recover mode), but parser parsed everything successfully \
|
|
|
|
{}",
|
|
|
|
stderr
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
stderr.compare_to_file(&stderr_path).unwrap();
|
2022-04-04 07:12:45 +03:00
|
|
|
}
|
|
|
|
|
2022-05-12 05:58:51 +03:00
|
|
|
fn document_span_visualizer(input: PathBuf, config: ParserConfig) {
|
2022-05-11 06:55:46 +03:00
|
|
|
let dir = input.parent().unwrap().to_path_buf();
|
|
|
|
|
|
|
|
let output = testing::run_test2(false, |cm, handler| {
|
|
|
|
// Type annotation
|
|
|
|
if false {
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
let fm = cm.load_file(&input).unwrap();
|
2022-05-24 09:32:15 +03:00
|
|
|
let lexer = Lexer::new(SourceFileInput::from(&*fm));
|
2022-05-11 06:55:46 +03:00
|
|
|
let mut parser = Parser::new(lexer, config);
|
|
|
|
|
|
|
|
let document: PResult<Document> = parser.parse_document();
|
|
|
|
|
|
|
|
match document {
|
|
|
|
Ok(document) => {
|
|
|
|
document.visit_with(&mut SpanVisualizer { handler: &handler });
|
|
|
|
|
|
|
|
Err(())
|
|
|
|
}
|
|
|
|
Err(err) => {
|
|
|
|
let mut d = err.to_diagnostics(&handler);
|
|
|
|
|
|
|
|
d.note(&format!("current token = {}", parser.dump_cur()));
|
|
|
|
d.emit();
|
|
|
|
|
|
|
|
panic!();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.unwrap_err();
|
|
|
|
|
|
|
|
output
|
|
|
|
.compare_to_file(&dir.join("span.rust-debug"))
|
|
|
|
.unwrap();
|
|
|
|
}
|
|
|
|
|
2022-05-12 05:58:51 +03:00
|
|
|
fn document_dom_visualizer(input: PathBuf, config: ParserConfig) {
|
|
|
|
let dir = input.parent().unwrap().to_path_buf();
|
|
|
|
|
|
|
|
testing::run_test2(false, |cm, handler| {
|
|
|
|
// Type annotation
|
|
|
|
if false {
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
|
|
|
let fm = cm.load_file(&input).unwrap();
|
2022-05-24 09:32:15 +03:00
|
|
|
let lexer = Lexer::new(SourceFileInput::from(&*fm));
|
2022-05-12 05:58:51 +03:00
|
|
|
let mut parser = Parser::new(lexer, config);
|
|
|
|
|
|
|
|
let document: PResult<Document> = parser.parse_document();
|
|
|
|
|
|
|
|
match document {
|
|
|
|
Ok(mut document) => {
|
|
|
|
let mut dom_buf = String::new();
|
|
|
|
|
|
|
|
document.visit_mut_with(&mut DomVisualizer {
|
|
|
|
dom_buf: &mut dom_buf,
|
|
|
|
indent: 0,
|
|
|
|
});
|
|
|
|
|
|
|
|
NormalizedOutput::from(dom_buf)
|
|
|
|
.compare_to_file(&dir.join("dom.rust-debug"))
|
|
|
|
.unwrap();
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
Err(err) => {
|
|
|
|
let mut d = err.to_diagnostics(&handler);
|
|
|
|
|
|
|
|
d.note(&format!("current token = {}", parser.dump_cur()));
|
|
|
|
d.emit();
|
|
|
|
|
|
|
|
panic!();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.unwrap();
|
|
|
|
}
|
|
|
|
|
2022-04-04 07:12:45 +03:00
|
|
|
struct SpanVisualizer<'a> {
|
|
|
|
handler: &'a Handler,
|
|
|
|
}
|
|
|
|
|
|
|
|
macro_rules! mtd {
|
|
|
|
($T:ty,$name:ident) => {
|
|
|
|
fn $name(&mut self, n: &$T) {
|
2022-05-17 11:23:24 +03:00
|
|
|
let span = n.span();
|
|
|
|
|
|
|
|
self.handler.struct_span_err(span, stringify!($T)).emit();
|
2022-04-04 07:12:45 +03:00
|
|
|
|
|
|
|
n.visit_children_with(self);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Visit for SpanVisualizer<'_> {
|
|
|
|
mtd!(Document, visit_document);
|
|
|
|
|
2022-04-22 14:20:47 +03:00
|
|
|
mtd!(DocumentFragment, visit_document_fragment);
|
|
|
|
|
|
|
|
mtd!(Child, visit_child);
|
|
|
|
|
|
|
|
mtd!(DocumentType, visit_document_type);
|
|
|
|
|
|
|
|
mtd!(Element, visit_element);
|
|
|
|
|
|
|
|
mtd!(Attribute, visit_attribute);
|
|
|
|
|
|
|
|
mtd!(Text, visit_text);
|
|
|
|
|
|
|
|
mtd!(Comment, visit_comment);
|
2022-04-04 07:12:45 +03:00
|
|
|
}
|
|
|
|
|
2022-05-11 06:55:46 +03:00
|
|
|
struct DomVisualizer<'a> {
|
|
|
|
dom_buf: &'a mut String,
|
|
|
|
indent: usize,
|
|
|
|
}
|
2022-04-04 07:12:45 +03:00
|
|
|
|
2022-05-11 06:55:46 +03:00
|
|
|
impl DomVisualizer<'_> {
|
|
|
|
fn get_ident(&self) -> String {
|
|
|
|
let mut indent = String::new();
|
2022-04-04 07:12:45 +03:00
|
|
|
|
2022-05-11 06:55:46 +03:00
|
|
|
indent.push_str("| ");
|
|
|
|
indent.push_str(&" ".repeat(self.indent));
|
2022-04-04 07:12:45 +03:00
|
|
|
|
2022-05-11 06:55:46 +03:00
|
|
|
indent
|
|
|
|
}
|
|
|
|
}
|
2022-04-04 07:12:45 +03:00
|
|
|
|
2022-05-11 06:55:46 +03:00
|
|
|
impl VisitMut for DomVisualizer<'_> {
|
|
|
|
fn visit_mut_document_type(&mut self, n: &mut DocumentType) {
|
|
|
|
let mut document_type = String::new();
|
2022-04-04 07:12:45 +03:00
|
|
|
|
2022-05-11 06:55:46 +03:00
|
|
|
document_type.push_str(&self.get_ident());
|
|
|
|
document_type.push_str("<!DOCTYPE ");
|
|
|
|
|
|
|
|
if let Some(name) = &n.name {
|
|
|
|
document_type.push_str(name);
|
|
|
|
}
|
|
|
|
|
|
|
|
if let Some(public_id) = &n.public_id {
|
|
|
|
document_type.push(' ');
|
|
|
|
document_type.push('"');
|
|
|
|
document_type.push_str(public_id);
|
|
|
|
document_type.push('"');
|
|
|
|
|
|
|
|
if let Some(system_id) = &n.system_id {
|
|
|
|
document_type.push(' ');
|
|
|
|
document_type.push('"');
|
|
|
|
document_type.push_str(system_id);
|
|
|
|
document_type.push('"');
|
|
|
|
} else {
|
|
|
|
document_type.push(' ');
|
|
|
|
document_type.push('"');
|
|
|
|
document_type.push('"');
|
2022-04-04 07:12:45 +03:00
|
|
|
}
|
2022-05-11 06:55:46 +03:00
|
|
|
} else if let Some(system_id) = &n.system_id {
|
|
|
|
document_type.push(' ');
|
|
|
|
document_type.push('"');
|
|
|
|
document_type.push('"');
|
|
|
|
document_type.push(' ');
|
|
|
|
document_type.push('"');
|
|
|
|
document_type.push_str(system_id);
|
|
|
|
document_type.push('"');
|
|
|
|
}
|
2022-04-04 07:12:45 +03:00
|
|
|
|
2022-05-11 06:55:46 +03:00
|
|
|
document_type.push('>');
|
|
|
|
document_type.push('\n');
|
2022-04-04 07:12:45 +03:00
|
|
|
|
2022-05-11 06:55:46 +03:00
|
|
|
self.dom_buf.push_str(&document_type);
|
|
|
|
|
|
|
|
n.visit_mut_children_with(self);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn visit_mut_element(&mut self, n: &mut Element) {
|
|
|
|
let mut element = String::new();
|
|
|
|
|
|
|
|
element.push_str(&self.get_ident());
|
|
|
|
element.push('<');
|
|
|
|
|
|
|
|
match n.namespace {
|
|
|
|
Namespace::SVG => {
|
|
|
|
element.push_str("svg ");
|
|
|
|
}
|
|
|
|
Namespace::MATHML => {
|
|
|
|
element.push_str("math ");
|
2022-04-04 07:12:45 +03:00
|
|
|
}
|
2022-05-11 06:55:46 +03:00
|
|
|
_ => {}
|
2022-04-04 07:12:45 +03:00
|
|
|
}
|
|
|
|
|
2022-05-11 06:55:46 +03:00
|
|
|
element.push_str(&n.tag_name);
|
|
|
|
element.push('>');
|
|
|
|
element.push('\n');
|
|
|
|
|
|
|
|
let is_template = n.namespace == Namespace::HTML && &*n.tag_name == "template";
|
|
|
|
|
|
|
|
if is_template {
|
|
|
|
self.indent += 1;
|
|
|
|
|
|
|
|
element.push_str(&self.get_ident());
|
|
|
|
element.push_str("content");
|
|
|
|
element.push('\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
n.attributes
|
|
|
|
.sort_by(|a, b| a.name.partial_cmp(&b.name).unwrap());
|
|
|
|
|
|
|
|
self.dom_buf.push_str(&element);
|
|
|
|
|
|
|
|
let old_indent = self.indent;
|
|
|
|
|
|
|
|
self.indent += 1;
|
|
|
|
|
|
|
|
n.visit_mut_children_with(self);
|
|
|
|
|
|
|
|
self.indent = old_indent;
|
|
|
|
|
|
|
|
if is_template {
|
|
|
|
self.indent -= 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn visit_mut_attribute(&mut self, n: &mut Attribute) {
|
|
|
|
let mut attribute = String::new();
|
|
|
|
|
|
|
|
attribute.push_str(&self.get_ident());
|
|
|
|
|
|
|
|
if let Some(prefix) = &n.prefix {
|
|
|
|
attribute.push_str(prefix);
|
|
|
|
attribute.push(' ');
|
|
|
|
}
|
|
|
|
|
|
|
|
attribute.push_str(&n.name);
|
|
|
|
attribute.push('=');
|
|
|
|
attribute.push('"');
|
|
|
|
|
|
|
|
if let Some(value) = &n.value {
|
|
|
|
attribute.push_str(value);
|
|
|
|
}
|
|
|
|
|
|
|
|
attribute.push('"');
|
|
|
|
attribute.push('\n');
|
|
|
|
|
|
|
|
self.dom_buf.push_str(&attribute);
|
|
|
|
|
|
|
|
n.visit_mut_children_with(self);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn visit_mut_text(&mut self, n: &mut Text) {
|
|
|
|
let mut text = String::new();
|
|
|
|
|
|
|
|
text.push_str(&self.get_ident());
|
|
|
|
text.push('"');
|
2022-06-08 06:55:11 +03:00
|
|
|
text.push_str(&n.data);
|
2022-05-11 06:55:46 +03:00
|
|
|
text.push('"');
|
|
|
|
text.push('\n');
|
|
|
|
|
|
|
|
self.dom_buf.push_str(&text);
|
|
|
|
|
|
|
|
n.visit_mut_children_with(self);
|
|
|
|
}
|
|
|
|
|
|
|
|
fn visit_mut_comment(&mut self, n: &mut Comment) {
|
|
|
|
let mut comment = String::new();
|
|
|
|
|
|
|
|
comment.push_str(&self.get_ident());
|
|
|
|
comment.push_str("<!-- ");
|
|
|
|
comment.push_str(&n.data);
|
|
|
|
comment.push_str(" -->");
|
|
|
|
comment.push('\n');
|
|
|
|
|
|
|
|
self.dom_buf.push_str(&comment);
|
|
|
|
|
|
|
|
n.visit_mut_children_with(self);
|
|
|
|
}
|
2022-04-04 07:12:45 +03:00
|
|
|
}
|
2022-04-28 20:27:39 +03:00
|
|
|
|
2022-05-05 16:00:25 +03:00
|
|
|
#[testing::fixture("tests/fixture/**/*.html")]
|
2022-04-28 20:27:39 +03:00
|
|
|
fn pass(input: PathBuf) {
|
2022-05-12 05:58:51 +03:00
|
|
|
document_test(
|
2022-04-28 20:27:39 +03:00
|
|
|
input,
|
|
|
|
ParserConfig {
|
|
|
|
..Default::default()
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2022-05-15 06:27:29 +03:00
|
|
|
#[testing::fixture("tests/iframe_srcdoc/**/*.html")]
|
|
|
|
fn pass_iframe_srcdoc(input: PathBuf) {
|
|
|
|
document_test(
|
|
|
|
input,
|
|
|
|
ParserConfig {
|
|
|
|
iframe_srcdoc: true,
|
|
|
|
..Default::default()
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2022-05-05 16:00:25 +03:00
|
|
|
#[testing::fixture("tests/recovery/**/*.html")]
|
2022-04-28 20:27:39 +03:00
|
|
|
fn recovery(input: PathBuf) {
|
2022-05-12 05:58:51 +03:00
|
|
|
document_recovery_test(
|
2022-04-28 20:27:39 +03:00
|
|
|
input,
|
|
|
|
ParserConfig {
|
|
|
|
..Default::default()
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2022-05-05 16:00:25 +03:00
|
|
|
#[testing::fixture("tests/fixture/**/*.html")]
|
|
|
|
#[testing::fixture("tests/recovery/**/*.html")]
|
2022-05-15 06:27:29 +03:00
|
|
|
#[testing::fixture("tests/iframe_srcdoc/**/*.html")]
|
2022-06-04 20:29:24 +03:00
|
|
|
#[testing::fixture("tests/html5lib-tests-fixture/**/*.html")]
|
2022-04-28 20:27:39 +03:00
|
|
|
fn span_visualizer(input: PathBuf) {
|
2022-05-12 05:58:51 +03:00
|
|
|
document_span_visualizer(
|
|
|
|
input,
|
|
|
|
ParserConfig {
|
|
|
|
..Default::default()
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
|
|
|
#[testing::fixture("tests/fixture/**/*.html")]
|
|
|
|
#[testing::fixture("tests/recovery/**/*.html")]
|
2022-05-15 06:27:29 +03:00
|
|
|
#[testing::fixture("tests/iframe_srcdoc/**/*.html")]
|
2022-05-12 05:58:51 +03:00
|
|
|
fn dom_visualizer(input: PathBuf) {
|
|
|
|
document_dom_visualizer(
|
2022-04-28 20:27:39 +03:00
|
|
|
input,
|
|
|
|
ParserConfig {
|
|
|
|
..Default::default()
|
|
|
|
},
|
|
|
|
)
|
|
|
|
}
|
2022-05-03 05:49:06 +03:00
|
|
|
|
|
|
|
fn unescape(s: &str) -> Option<String> {
|
|
|
|
let mut out = String::with_capacity(s.len());
|
|
|
|
let mut it = s.chars().peekable();
|
|
|
|
|
|
|
|
loop {
|
|
|
|
match it.next() {
|
|
|
|
None => return Some(out),
|
|
|
|
Some('\\') => {
|
|
|
|
if it.peek() != Some(&'u') {
|
|
|
|
panic!("can't understand escape");
|
|
|
|
}
|
|
|
|
|
|
|
|
let hex: String = it.by_ref().take(4).collect();
|
|
|
|
|
|
|
|
match u32::from_str_radix(&hex, 16).ok().and_then(char::from_u32) {
|
|
|
|
// TODO fix me surrogate paris
|
|
|
|
// Some of the tests use lone surrogates, but we have no
|
|
|
|
// way to represent them in the UTF-8 input to our parser.
|
|
|
|
// Since these can only come from script, we will catch
|
|
|
|
// them there.
|
|
|
|
None => return None,
|
|
|
|
Some(c) => out.push(c),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Some(c) => out.push(c),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO we need to enable `preserve_order` for serde, but we can't https://github.com/tkaitchuck/aHash/issues/95, so we sort attributes
|
|
|
|
#[testing::fixture("tests/html5lib-tests/tokenizer/**/*.test")]
|
|
|
|
fn html5lib_test_tokenizer(input: PathBuf) {
|
|
|
|
let filename = input.to_str().expect("failed to parse path");
|
|
|
|
let contents = fs::read_to_string(filename).expect("Something went wrong reading the file");
|
2022-05-04 08:22:58 +03:00
|
|
|
let obj: Value = serde_json::from_str(&contents).expect("json parse error");
|
2022-05-03 05:49:06 +03:00
|
|
|
let tests = match obj.get(&"tests".to_string()) {
|
|
|
|
Some(&Value::Array(ref tests)) => tests,
|
2022-05-04 08:22:58 +03:00
|
|
|
_ => return,
|
2022-05-03 05:49:06 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
for test in tests.iter() {
|
|
|
|
let description = test
|
|
|
|
.get("description")
|
|
|
|
.expect("failed to get input in test");
|
|
|
|
|
|
|
|
let states = if let Some(initial_states) = test.get("initialStates") {
|
|
|
|
let mut states = vec![];
|
|
|
|
let json_states: Vec<String> = serde_json::from_value(initial_states.clone())
|
|
|
|
.expect("failed to get input in test");
|
|
|
|
|
|
|
|
for json_state in json_states {
|
|
|
|
match &*json_state {
|
|
|
|
"Data state" => {
|
|
|
|
states.push(State::Data);
|
|
|
|
}
|
|
|
|
"PLAINTEXT state" => {
|
|
|
|
states.push(State::PlainText);
|
|
|
|
}
|
|
|
|
"RCDATA state" => {
|
|
|
|
states.push(State::Rcdata);
|
|
|
|
}
|
|
|
|
"RAWTEXT state" => {
|
|
|
|
states.push(State::Rawtext);
|
|
|
|
}
|
|
|
|
"Script data state" => {
|
|
|
|
states.push(State::ScriptData);
|
|
|
|
}
|
|
|
|
"CDATA section state" => {
|
|
|
|
states.push(State::CdataSection);
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
unreachable!()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
states
|
|
|
|
} else {
|
|
|
|
vec![State::Data]
|
|
|
|
};
|
|
|
|
|
|
|
|
for state in states.iter() {
|
|
|
|
eprintln!("==== ==== Description ==== ====\n{}\n", description);
|
|
|
|
|
|
|
|
let json_input = test["input"].clone();
|
|
|
|
let mut input: String =
|
|
|
|
serde_json::from_value(json_input).expect("failed to get input in test");
|
|
|
|
|
|
|
|
let need_double_escaped = test.get("doubleEscaped").is_some();
|
|
|
|
|
|
|
|
if need_double_escaped {
|
|
|
|
input = match unescape(&input) {
|
|
|
|
Some(unescaped) => unescaped,
|
|
|
|
_ => {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
eprintln!("==== ==== Input ==== ====\n{}\n", input);
|
|
|
|
|
|
|
|
let json_output = test["output"].clone();
|
|
|
|
let output = json_output.to_string();
|
|
|
|
|
|
|
|
eprintln!("==== ==== Output ==== ====\n{}\n", output);
|
|
|
|
|
2022-05-12 22:56:47 +03:00
|
|
|
let lexer_str_input = StringInput::new(&input, BytePos(0), BytePos(input.len() as u32));
|
2022-05-24 09:32:15 +03:00
|
|
|
let mut lexer = Lexer::new(lexer_str_input);
|
2022-05-03 05:49:06 +03:00
|
|
|
|
|
|
|
lexer.set_input_state(state.clone());
|
|
|
|
|
|
|
|
if let Some(last_start_tag) = test.get("lastStartTag") {
|
|
|
|
let last_start_tag: String = serde_json::from_value(last_start_tag.clone())
|
|
|
|
.expect("failed to get lastStartTag in test");
|
|
|
|
|
2022-05-25 20:33:15 +03:00
|
|
|
lexer.set_last_start_tag_name(&last_start_tag);
|
2022-05-03 05:49:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut actual_tokens = vec![];
|
|
|
|
|
|
|
|
loop {
|
2022-05-24 09:32:15 +03:00
|
|
|
let token_and_span = lexer.next();
|
2022-05-03 05:49:06 +03:00
|
|
|
|
2022-05-24 09:32:15 +03:00
|
|
|
if token_and_span.is_none() {
|
|
|
|
break;
|
|
|
|
}
|
2022-05-03 05:49:06 +03:00
|
|
|
|
2022-05-24 09:32:15 +03:00
|
|
|
let mut new_token = token_and_span.unwrap().token.clone();
|
|
|
|
|
|
|
|
match new_token {
|
2022-07-17 15:48:21 +03:00
|
|
|
Token::Doctype { ref mut raw, .. } => {
|
|
|
|
*raw = None;
|
2022-05-24 09:32:15 +03:00
|
|
|
}
|
|
|
|
Token::StartTag {
|
|
|
|
ref mut raw_tag_name,
|
|
|
|
ref mut attributes,
|
|
|
|
..
|
|
|
|
} => {
|
|
|
|
*raw_tag_name = None;
|
|
|
|
|
|
|
|
let mut new_attributes = vec![];
|
|
|
|
let mut already_seen: AHashSet<JsWord> = Default::default();
|
|
|
|
|
|
|
|
for mut attribute in take(attributes) {
|
|
|
|
if already_seen.contains(&attribute.name) {
|
|
|
|
continue;
|
|
|
|
}
|
2022-05-03 05:49:06 +03:00
|
|
|
|
2022-05-24 09:32:15 +03:00
|
|
|
already_seen.insert(attribute.name.clone());
|
2022-05-03 05:49:06 +03:00
|
|
|
|
2022-05-24 09:32:15 +03:00
|
|
|
if attribute.value.is_none() {
|
|
|
|
attribute.value = Some("".into());
|
|
|
|
}
|
2022-05-03 05:49:06 +03:00
|
|
|
|
2022-05-24 09:32:15 +03:00
|
|
|
attribute.span = Default::default();
|
|
|
|
attribute.raw_name = None;
|
|
|
|
attribute.raw_value = None;
|
2022-05-03 05:49:06 +03:00
|
|
|
|
2022-05-24 09:32:15 +03:00
|
|
|
new_attributes.push(attribute);
|
2022-05-03 05:49:06 +03:00
|
|
|
}
|
|
|
|
|
2022-05-24 09:32:15 +03:00
|
|
|
new_attributes.sort_by(|a, b| a.name.partial_cmp(&b.name).unwrap());
|
|
|
|
|
|
|
|
*attributes = new_attributes;
|
2022-05-03 05:49:06 +03:00
|
|
|
}
|
2022-05-24 09:32:15 +03:00
|
|
|
Token::EndTag {
|
|
|
|
ref mut raw_tag_name,
|
|
|
|
ref mut attributes,
|
2022-06-14 16:31:23 +03:00
|
|
|
ref mut is_self_closing,
|
2022-05-24 09:32:15 +03:00
|
|
|
..
|
|
|
|
} => {
|
|
|
|
*raw_tag_name = None;
|
2022-06-14 16:31:23 +03:00
|
|
|
*is_self_closing = false;
|
2022-05-24 09:32:15 +03:00
|
|
|
*attributes = vec![];
|
|
|
|
}
|
|
|
|
Token::Character { ref mut raw, .. } => {
|
|
|
|
*raw = None;
|
|
|
|
}
|
2022-07-14 07:56:31 +03:00
|
|
|
Token::Comment { ref mut raw, .. } => {
|
|
|
|
*raw = js_word!("");
|
|
|
|
}
|
2022-05-24 09:32:15 +03:00
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
|
|
|
|
actual_tokens.push(new_token);
|
2022-05-03 05:49:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
let mut expected_tokens: Vec<Token> = vec![];
|
|
|
|
|
2022-05-04 08:22:58 +03:00
|
|
|
if let Some(output_tokens) = json_output.as_array() {
|
2022-05-03 05:49:06 +03:00
|
|
|
for output_token in output_tokens {
|
|
|
|
match output_token {
|
|
|
|
Value::Array(token_parts) => {
|
2022-06-14 18:58:10 +03:00
|
|
|
let tokens = match token_parts[0].as_str().expect("failed") {
|
2022-05-03 05:49:06 +03:00
|
|
|
"DOCTYPE" => {
|
|
|
|
let name: Option<String> =
|
|
|
|
serde_json::from_value(token_parts[1].clone())
|
|
|
|
.expect("failed to deserialize");
|
|
|
|
let public_id: Option<String> =
|
|
|
|
serde_json::from_value(token_parts[2].clone())
|
|
|
|
.expect("failed to deserialize");
|
|
|
|
let system_id: Option<String> =
|
|
|
|
serde_json::from_value(token_parts[3].clone())
|
|
|
|
.expect("failed to deserialize");
|
|
|
|
let correctness: bool =
|
|
|
|
serde_json::from_value(token_parts[4].clone())
|
|
|
|
.expect("failed to deserialize");
|
|
|
|
|
|
|
|
vec![Token::Doctype {
|
|
|
|
name: name.map(|v| v.into()),
|
|
|
|
force_quirks: !correctness,
|
|
|
|
public_id: public_id.map(|v| v.into()),
|
|
|
|
system_id: system_id.map(|v| v.into()),
|
2022-07-17 15:48:21 +03:00
|
|
|
raw: None,
|
2022-05-03 05:49:06 +03:00
|
|
|
}]
|
|
|
|
}
|
|
|
|
"StartTag" => {
|
|
|
|
let tag_name: String =
|
|
|
|
serde_json::from_value(token_parts[1].clone())
|
|
|
|
.expect("failed to deserialize");
|
|
|
|
let mut attributes = vec![];
|
|
|
|
|
2022-05-04 08:22:58 +03:00
|
|
|
if let Some(json_attributes) = token_parts.get(2) {
|
2022-05-03 05:49:06 +03:00
|
|
|
let obj_attributes: Value =
|
|
|
|
serde_json::from_value(json_attributes.clone())
|
|
|
|
.expect("failed to deserialize");
|
|
|
|
|
|
|
|
match obj_attributes {
|
|
|
|
Value::Object(obj) => {
|
|
|
|
for key in obj.keys() {
|
|
|
|
let json_value = obj.get(key).expect(
|
|
|
|
"failed to get value for attribute",
|
|
|
|
);
|
|
|
|
let value: Option<String> =
|
|
|
|
serde_json::from_value(json_value.clone())
|
|
|
|
.expect("failed to deserialize");
|
|
|
|
|
|
|
|
attributes.push(AttributeToken {
|
2022-05-17 11:23:24 +03:00
|
|
|
span: Default::default(),
|
2022-05-03 05:49:06 +03:00
|
|
|
name: key.clone().into(),
|
|
|
|
raw_name: None,
|
|
|
|
value: value.map(|v| v.into()),
|
|
|
|
raw_value: None,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
unreachable!();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-14 16:31:23 +03:00
|
|
|
let mut is_self_closing = false;
|
2022-05-03 05:49:06 +03:00
|
|
|
|
2022-06-14 16:31:23 +03:00
|
|
|
if let Some(json_is_self_closing) = token_parts.get(3) {
|
2022-05-03 05:49:06 +03:00
|
|
|
let value: bool =
|
2022-06-14 16:31:23 +03:00
|
|
|
serde_json::from_value(json_is_self_closing.clone())
|
2022-05-03 05:49:06 +03:00
|
|
|
.expect("failed to deserialize");
|
|
|
|
|
2022-06-14 16:31:23 +03:00
|
|
|
is_self_closing = value;
|
2022-05-03 05:49:06 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
attributes.sort_by(|a, b| a.name.partial_cmp(&b.name).unwrap());
|
|
|
|
|
|
|
|
vec![Token::StartTag {
|
|
|
|
tag_name: tag_name.into(),
|
|
|
|
raw_tag_name: None,
|
2022-06-14 16:31:23 +03:00
|
|
|
is_self_closing,
|
2022-05-03 05:49:06 +03:00
|
|
|
attributes,
|
|
|
|
}]
|
|
|
|
}
|
|
|
|
"EndTag" => {
|
|
|
|
let tag_name: String =
|
|
|
|
serde_json::from_value(token_parts[1].clone())
|
|
|
|
.expect("failed to deserialize");
|
|
|
|
|
|
|
|
vec![Token::EndTag {
|
|
|
|
tag_name: tag_name.into(),
|
|
|
|
raw_tag_name: None,
|
2022-06-14 16:31:23 +03:00
|
|
|
is_self_closing: false,
|
2022-05-03 05:49:06 +03:00
|
|
|
attributes: vec![],
|
|
|
|
}]
|
|
|
|
}
|
|
|
|
"Character" => {
|
|
|
|
let mut data: String =
|
|
|
|
serde_json::from_value(token_parts[1].clone())
|
|
|
|
.expect("failed to deserialize");
|
|
|
|
|
|
|
|
if need_double_escaped {
|
|
|
|
data = match unescape(&data) {
|
|
|
|
Some(v) => v,
|
|
|
|
_ => {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut tokens = vec![];
|
|
|
|
|
|
|
|
for c in data.chars() {
|
|
|
|
tokens.push(Token::Character {
|
|
|
|
value: c,
|
|
|
|
raw: None,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
tokens
|
|
|
|
}
|
|
|
|
"Comment" => {
|
|
|
|
let mut data: String =
|
|
|
|
serde_json::from_value(token_parts[1].clone())
|
|
|
|
.expect("failed to deserialize");
|
|
|
|
|
|
|
|
if need_double_escaped {
|
|
|
|
data = match unescape(&data) {
|
|
|
|
Some(v) => v,
|
|
|
|
_ => {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-07-14 07:56:31 +03:00
|
|
|
vec![Token::Comment {
|
|
|
|
data: data.into(),
|
|
|
|
raw: js_word!(""),
|
|
|
|
}]
|
2022-05-03 05:49:06 +03:00
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
unreachable!("unknown token {}", token_parts[0])
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
expected_tokens.extend(tokens);
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
unreachable!();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let actual =
|
|
|
|
serde_json::to_string(&actual_tokens).expect("failed to serialize actual tokens");
|
|
|
|
let expected = serde_json::to_string(&expected_tokens)
|
|
|
|
.expect("failed to serialize expected tokens");
|
|
|
|
|
|
|
|
if let Some(json_errors) = test.get("errors") {
|
|
|
|
let expected_errors = json_errors.as_array().expect("failed to deserialize error");
|
|
|
|
let actual_errors = lexer.take_errors();
|
|
|
|
|
|
|
|
eprintln!("==== ==== Errors ==== ====\n{:?}\n", actual_errors);
|
|
|
|
|
|
|
|
assert_eq!(actual_errors.len(), expected_errors.len());
|
|
|
|
|
|
|
|
for expected_error in expected_errors.iter() {
|
|
|
|
let obj_expected_code =
|
|
|
|
expected_error.as_object().expect("failed to get error");
|
|
|
|
let expected_code = match obj_expected_code.get("code") {
|
|
|
|
Some(expected_code) => match expected_code.as_str() {
|
|
|
|
Some("eof-in-doctype") => ErrorKind::EofInDoctype,
|
|
|
|
Some("eof-in-comment") => ErrorKind::EofInComment,
|
|
|
|
Some("eof-in-cdata") => ErrorKind::EofInCdata,
|
|
|
|
Some("eof-in-tag") => ErrorKind::EofInTag,
|
|
|
|
Some("eof-before-tag-name") => ErrorKind::EofBeforeTagName,
|
|
|
|
Some("eof-in-script-html-comment-like-text") => {
|
|
|
|
ErrorKind::EofInScriptHtmlCommentLikeText
|
|
|
|
}
|
|
|
|
Some("unknown-named-character-reference") => {
|
|
|
|
ErrorKind::UnknownNamedCharacterReference
|
|
|
|
}
|
|
|
|
Some("incorrectly-opened-comment") => {
|
|
|
|
ErrorKind::IncorrectlyOpenedComment
|
|
|
|
}
|
|
|
|
Some("abrupt-closing-of-empty-comment") => {
|
|
|
|
ErrorKind::AbruptClosingOfEmptyComment
|
|
|
|
}
|
|
|
|
Some("abrupt-doctype-public-identifier") => {
|
|
|
|
ErrorKind::AbruptDoctypePublicIdentifier
|
|
|
|
}
|
|
|
|
Some("abrupt-doctype-system-identifier") => {
|
|
|
|
ErrorKind::AbruptDoctypeSystemIdentifier
|
|
|
|
}
|
|
|
|
Some("absence-of-digits-in-numeric-character-reference") => {
|
|
|
|
ErrorKind::AbsenceOfDigitsInNumericCharacterReference
|
|
|
|
}
|
|
|
|
Some("surrogate-character-reference") => {
|
|
|
|
ErrorKind::SurrogateCharacterReference
|
|
|
|
}
|
|
|
|
Some("nested-comment") => ErrorKind::NestedComment,
|
|
|
|
Some("end-tag-with-trailing-solidus") => {
|
|
|
|
ErrorKind::EndTagWithTrailingSolidus
|
|
|
|
}
|
|
|
|
Some("null-character-reference") => ErrorKind::NullCharacterReference,
|
|
|
|
Some("cdata-in-html-content") => ErrorKind::CdataInHtmlContent,
|
|
|
|
Some("character-reference-outside-unicode-range") => {
|
|
|
|
ErrorKind::CharacterReferenceOutsideUnicodeRange
|
|
|
|
}
|
|
|
|
Some("control-character-in-input-stream") => {
|
|
|
|
ErrorKind::ControlCharacterInInputStream
|
|
|
|
}
|
|
|
|
Some("control-character-reference") => {
|
|
|
|
ErrorKind::ControlCharacterReference
|
|
|
|
}
|
|
|
|
Some("noncharacter-in-input-stream") => {
|
|
|
|
ErrorKind::NoncharacterInInputStream
|
|
|
|
}
|
|
|
|
Some("noncharacter-character-reference") => {
|
|
|
|
ErrorKind::NoncharacterCharacterReference
|
|
|
|
}
|
|
|
|
Some("unexpected-equals-sign-before-attribute-name") => {
|
|
|
|
ErrorKind::UnexpectedEqualsSignBeforeAttributeName
|
|
|
|
}
|
|
|
|
Some("unexpected-question-mark-instead-of-tag-name") => {
|
|
|
|
ErrorKind::UnexpectedQuestionMarkInsteadOfTagName
|
|
|
|
}
|
|
|
|
Some("unexpected-character-after-doctype-system-identifier") => {
|
|
|
|
ErrorKind::UnexpectedCharacterAfterDoctypeSystemIdentifier
|
|
|
|
}
|
|
|
|
Some("unexpected-null-character") => ErrorKind::UnexpectedNullCharacter,
|
|
|
|
Some("unexpected-solidus-in-tag") => ErrorKind::UnexpectedSolidusInTag,
|
|
|
|
Some("unexpected-character-in-attribute-name") => {
|
|
|
|
ErrorKind::UnexpectedCharacterInAttributeName
|
|
|
|
}
|
|
|
|
Some("unexpected-character-in-unquoted-attribute-value") => {
|
|
|
|
ErrorKind::UnexpectedCharacterInUnquotedAttributeValue
|
|
|
|
}
|
|
|
|
Some("duplicate-attribute") => ErrorKind::DuplicateAttribute,
|
|
|
|
Some("end-tag-with-attributes") => ErrorKind::EndTagWithAttributes,
|
|
|
|
Some("missing-whitespace-before-doctype-name") => {
|
|
|
|
ErrorKind::MissingWhitespaceBeforeDoctypeName
|
|
|
|
}
|
|
|
|
Some("missing-attribute-value") => ErrorKind::MissingAttributeValue,
|
|
|
|
Some("missing-doctype-public-identifier") => {
|
|
|
|
ErrorKind::MissingDoctypePublicIdentifier
|
|
|
|
}
|
|
|
|
Some("missing-end-tag-name") => ErrorKind::MissingEndTagName,
|
|
|
|
Some("missing-doctype-name") => ErrorKind::MissingDoctypeName,
|
|
|
|
Some("missing-doctype-system-identifier") => {
|
|
|
|
ErrorKind::MissingDoctypeSystemIdentifier
|
|
|
|
}
|
|
|
|
Some("missing-whitespace-after-doctype-system-keyword") => {
|
|
|
|
ErrorKind::MissingWhitespaceAfterDoctypeSystemKeyword
|
|
|
|
}
|
|
|
|
Some("missing-whitespace-after-doctype-public-keyword") => {
|
|
|
|
ErrorKind::MissingWhitespaceAfterDoctypePublicKeyword
|
|
|
|
}
|
|
|
|
Some("missing-quote-before-doctype-public-identifier") => {
|
|
|
|
ErrorKind::MissingQuoteBeforeDoctypePublicIdentifier
|
|
|
|
}
|
|
|
|
Some("missing-quote-before-doctype-system-identifier") => {
|
|
|
|
ErrorKind::MissingQuoteBeforeDoctypeSystemIdentifier
|
|
|
|
}
|
|
|
|
Some("incorrectly-closed-comment") => {
|
|
|
|
ErrorKind::IncorrectlyClosedComment
|
|
|
|
}
|
|
|
|
Some("invalid-character-sequence-after-doctype-name") => {
|
|
|
|
ErrorKind::InvalidCharacterSequenceAfterDoctypeName
|
|
|
|
}
|
|
|
|
Some(
|
|
|
|
"missing-whitespace-between-doctype-public-and-system-identifiers",
|
|
|
|
) => {
|
|
|
|
ErrorKind::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers
|
|
|
|
}
|
|
|
|
Some("missing-whitespace-between-attributes") => {
|
|
|
|
ErrorKind::MissingWhitespaceBetweenAttributes
|
|
|
|
}
|
|
|
|
Some("missing-semicolon-after-character-reference") => {
|
|
|
|
ErrorKind::MissingSemicolonAfterCharacterReference
|
|
|
|
}
|
|
|
|
Some("invalid-first-character-of-tag-name") => {
|
|
|
|
ErrorKind::InvalidFirstCharacterOfTagName
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
unreachable!("unknown error {:?}", expected_code);
|
|
|
|
}
|
|
|
|
},
|
|
|
|
_ => {
|
|
|
|
unreachable!();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2022-05-04 08:22:58 +03:00
|
|
|
assert!(actual_errors
|
|
|
|
.iter()
|
|
|
|
.any(|error| *error.kind() == expected_code));
|
2022-05-03 05:49:06 +03:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
let errors = lexer.take_errors();
|
|
|
|
|
|
|
|
assert_eq!(errors.len(), 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert_eq!(actual, expected);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-05-05 16:00:25 +03:00
|
|
|
|
2022-05-09 09:26:31 +03:00
|
|
|
enum TestState {
|
|
|
|
Data,
|
|
|
|
Document,
|
|
|
|
DocumentFragment,
|
|
|
|
Errors,
|
|
|
|
NewErrors,
|
|
|
|
}
|
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
enum DocumentOrDocumentFragment {
|
|
|
|
Document(PResult<Document>),
|
|
|
|
DocumentFragment(PResult<DocumentFragment>),
|
|
|
|
}
|
|
|
|
|
2022-05-05 16:00:25 +03:00
|
|
|
#[testing::fixture("tests/html5lib-tests/tree-construction/**/*.dat")]
|
|
|
|
#[testing::fixture("tests/html5lib-tests-fixture/**/*.html")]
|
|
|
|
fn html5lib_test_tree_construction(input: PathBuf) {
|
|
|
|
if input.extension().unwrap() == "dat" {
|
|
|
|
let mut tree_construction_base = None;
|
|
|
|
let mut tests_base = None;
|
|
|
|
let mut path_buf = input.to_path_buf();
|
|
|
|
|
|
|
|
while path_buf.pop() {
|
|
|
|
if path_buf.ends_with("tree-construction") {
|
|
|
|
tree_construction_base = Some(path_buf.clone());
|
|
|
|
}
|
|
|
|
|
|
|
|
if path_buf.ends_with("tests") {
|
|
|
|
tests_base = Some(path_buf.clone());
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
let tree_construction_base = tree_construction_base.unwrap();
|
|
|
|
let relative_path_to_test = input
|
|
|
|
.strip_prefix(tree_construction_base)
|
|
|
|
.expect("failed to get relative filename")
|
|
|
|
.to_str()
|
|
|
|
.unwrap()
|
|
|
|
.replace('/', "_")
|
|
|
|
.replace('.', "_");
|
|
|
|
let tests_base = tests_base.unwrap();
|
|
|
|
|
|
|
|
let dir = tests_base
|
|
|
|
.join("html5lib-tests-fixture")
|
|
|
|
.join(&relative_path_to_test);
|
|
|
|
|
|
|
|
fs::create_dir_all(dir.clone()).expect("failed to create directory for fixtures");
|
|
|
|
|
|
|
|
let tests_file = fs::read_to_string(input).expect("Something went wrong reading the file");
|
2022-05-09 09:26:31 +03:00
|
|
|
let mut tests = tests_file.split("#data\n");
|
|
|
|
|
|
|
|
tests.next();
|
2022-05-05 16:00:25 +03:00
|
|
|
|
|
|
|
let mut counter = 0;
|
|
|
|
|
|
|
|
while let Some(test) = tests.next() {
|
2022-05-09 09:26:31 +03:00
|
|
|
let mut data: Vec<&str> = vec![];
|
|
|
|
let mut document: Vec<&str> = vec![];
|
|
|
|
let mut document_fragment: Vec<&str> = vec![];
|
|
|
|
let mut errors: Vec<&str> = vec![];
|
|
|
|
let mut new_errors: Vec<&str> = vec![];
|
|
|
|
let mut scripting_enabled = false;
|
|
|
|
|
|
|
|
let mut state = Some(TestState::Data);
|
2022-05-11 06:31:49 +03:00
|
|
|
let lines = test.lines();
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-05-11 06:31:49 +03:00
|
|
|
for line in lines {
|
2022-05-09 09:26:31 +03:00
|
|
|
match line {
|
|
|
|
"#data" => {
|
|
|
|
state = Some(TestState::Data);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
"#errors" => {
|
|
|
|
state = Some(TestState::Errors);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
"#new-errors" => {
|
|
|
|
state = Some(TestState::NewErrors);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
"#document" => {
|
|
|
|
state = Some(TestState::Document);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
"#document-fragment" => {
|
|
|
|
state = Some(TestState::DocumentFragment);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
"#script-on" => {
|
|
|
|
scripting_enabled = true;
|
|
|
|
|
|
|
|
state = None;
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
"#script-off" => {
|
|
|
|
scripting_enabled = false;
|
|
|
|
|
|
|
|
state = None;
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
|
|
|
|
match &state {
|
|
|
|
Some(TestState::Data) => {
|
|
|
|
data.push(line);
|
|
|
|
}
|
|
|
|
Some(TestState::Document) => {
|
|
|
|
document.push(line);
|
|
|
|
}
|
|
|
|
Some(TestState::DocumentFragment) => {
|
|
|
|
document_fragment.push(line);
|
|
|
|
}
|
|
|
|
Some(TestState::Errors) => {
|
|
|
|
errors.push(line);
|
|
|
|
}
|
|
|
|
Some(TestState::NewErrors) => {
|
|
|
|
new_errors.push(line);
|
|
|
|
}
|
|
|
|
_ => {
|
|
|
|
unreachable!();
|
|
|
|
}
|
|
|
|
}
|
2022-05-05 16:00:25 +03:00
|
|
|
}
|
|
|
|
|
2022-05-08 18:39:17 +03:00
|
|
|
let mut file_stem = counter.to_string();
|
|
|
|
|
2022-05-09 09:26:31 +03:00
|
|
|
if !document_fragment.is_empty() {
|
2022-06-03 07:35:19 +03:00
|
|
|
file_stem += ".fragment_";
|
2022-05-09 10:17:33 +03:00
|
|
|
file_stem += &document_fragment.join("").replace(' ', "_");
|
2022-05-09 09:26:31 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if scripting_enabled {
|
2022-05-08 18:39:17 +03:00
|
|
|
file_stem += ".script_on";
|
|
|
|
}
|
|
|
|
|
2022-06-03 07:35:19 +03:00
|
|
|
let test_case_dir = dir.join(file_stem);
|
|
|
|
|
|
|
|
fs::create_dir_all(test_case_dir.clone())
|
|
|
|
.expect("failed to create directory for fixtures");
|
|
|
|
|
|
|
|
let html_path = test_case_dir.join("input.html");
|
2022-05-05 16:00:25 +03:00
|
|
|
|
2022-05-09 09:26:31 +03:00
|
|
|
fs::write(html_path, data.join("\n"))
|
|
|
|
.expect("Something went wrong when writing to the file");
|
2022-05-05 16:00:25 +03:00
|
|
|
|
2022-06-03 07:35:19 +03:00
|
|
|
let dom_snapshot_path = test_case_dir.join("dom.rust-debug");
|
2022-05-05 16:00:25 +03:00
|
|
|
|
2022-05-09 09:26:31 +03:00
|
|
|
let mut dom = document.join("\n");
|
|
|
|
|
2022-05-09 10:17:33 +03:00
|
|
|
if !dom.ends_with('\n') {
|
2022-05-09 09:26:31 +03:00
|
|
|
dom.push('\n');
|
|
|
|
}
|
|
|
|
|
|
|
|
fs::write(dom_snapshot_path, dom)
|
2022-05-14 04:43:52 +03:00
|
|
|
.expect("Something went wrong when writing to the file");
|
|
|
|
|
|
|
|
let errors = errors.join("\n");
|
2022-06-03 07:35:19 +03:00
|
|
|
let errors_snapshot_path = test_case_dir.join("output.stderr");
|
2022-05-14 04:43:52 +03:00
|
|
|
|
|
|
|
fs::write(errors_snapshot_path, errors)
|
|
|
|
.expect("Something went wrong when writing to the file");
|
2022-05-05 16:00:25 +03:00
|
|
|
|
|
|
|
counter += 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
testing::run_test2(false, |cm, handler| {
|
|
|
|
// Type annotation
|
|
|
|
if false {
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
|
2022-06-03 07:35:19 +03:00
|
|
|
let parent = input.parent().unwrap();
|
|
|
|
let parent_str = parent.to_string_lossy();
|
2022-05-08 18:39:17 +03:00
|
|
|
|
2022-06-03 07:35:19 +03:00
|
|
|
let scripting_enabled = parent_str.contains("script_on");
|
|
|
|
let json_path = parent.join("output.json");
|
2022-05-05 16:00:25 +03:00
|
|
|
let fm = cm.load_file(&input).unwrap();
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-05-24 09:32:15 +03:00
|
|
|
let lexer = Lexer::new(SourceFileInput::from(&*fm));
|
2022-05-15 06:27:29 +03:00
|
|
|
let config = ParserConfig {
|
|
|
|
scripting_enabled,
|
|
|
|
iframe_srcdoc: false,
|
|
|
|
};
|
2022-05-09 09:26:31 +03:00
|
|
|
let mut parser = Parser::new(lexer, config);
|
2022-06-03 07:35:19 +03:00
|
|
|
let document_or_document_fragment = if parent_str.contains("fragment") {
|
2022-05-09 09:26:31 +03:00
|
|
|
let mut context_element_namespace = Namespace::HTML;
|
|
|
|
let mut context_element_tag_name = "unknown";
|
2022-05-05 16:00:25 +03:00
|
|
|
|
2022-06-03 07:35:19 +03:00
|
|
|
let context_element = parent_str
|
2022-05-09 09:26:31 +03:00
|
|
|
.split('.')
|
|
|
|
.last()
|
2022-06-03 07:35:19 +03:00
|
|
|
.expect("failed to get context element from filename")
|
|
|
|
.replace("fragment_", "");
|
2022-05-05 16:00:25 +03:00
|
|
|
|
2022-05-09 10:17:33 +03:00
|
|
|
if context_element.contains('_') {
|
|
|
|
let mut splited = context_element.split('_');
|
2022-05-05 16:00:25 +03:00
|
|
|
|
2022-05-09 09:26:31 +03:00
|
|
|
if let Some(namespace) = splited.next() {
|
|
|
|
context_element_namespace = match namespace {
|
|
|
|
"math" => Namespace::MATHML,
|
|
|
|
"svg" => Namespace::SVG,
|
|
|
|
_ => {
|
|
|
|
unreachable!();
|
|
|
|
}
|
|
|
|
};
|
2022-05-05 16:00:25 +03:00
|
|
|
}
|
|
|
|
|
2022-05-09 09:26:31 +03:00
|
|
|
if let Some(tag_name) = splited.next() {
|
|
|
|
context_element_tag_name = tag_name;
|
|
|
|
}
|
|
|
|
} else {
|
2022-06-03 07:35:19 +03:00
|
|
|
context_element_tag_name = &context_element;
|
2022-05-09 09:26:31 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
let context_element = Element {
|
|
|
|
span: Default::default(),
|
|
|
|
namespace: context_element_namespace,
|
|
|
|
tag_name: context_element_tag_name.into(),
|
|
|
|
attributes: vec![],
|
2022-06-14 16:31:23 +03:00
|
|
|
is_self_closing: false,
|
2022-05-09 09:26:31 +03:00
|
|
|
children: vec![],
|
|
|
|
content: None,
|
|
|
|
};
|
|
|
|
|
2022-06-25 08:33:22 +03:00
|
|
|
DocumentOrDocumentFragment::DocumentFragment(parser.parse_document_fragment(
|
|
|
|
context_element,
|
|
|
|
DocumentMode::NoQuirks,
|
|
|
|
None,
|
|
|
|
))
|
2022-05-14 04:43:52 +03:00
|
|
|
} else {
|
|
|
|
DocumentOrDocumentFragment::Document(parser.parse_document())
|
|
|
|
};
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-06-03 07:35:19 +03:00
|
|
|
let parent_parent = parent.parent().unwrap().to_string_lossy();
|
2022-05-21 18:22:10 +03:00
|
|
|
// `scripted` for browser tests with JS
|
|
|
|
// `search` proposed, but not merged in spec
|
2022-06-03 07:35:19 +03:00
|
|
|
let need_skip_tests =
|
|
|
|
parent_parent.contains("scripted") || parent_parent.contains("search");
|
2022-05-21 18:22:10 +03:00
|
|
|
|
2022-05-24 08:04:20 +03:00
|
|
|
if !need_skip_tests {
|
|
|
|
let errors = parser.take_errors();
|
2022-06-03 07:35:19 +03:00
|
|
|
let errors_path = input.parent().unwrap().join("output.stderr");
|
2022-05-24 08:04:20 +03:00
|
|
|
let contents =
|
|
|
|
fs::read_to_string(errors_path).expect("Something went wrong reading the file");
|
|
|
|
|
|
|
|
// TODO bug in tests - https://github.com/html5lib/html5lib-tests/issues/138
|
|
|
|
let actual_number_of_errors =
|
2022-06-03 07:35:19 +03:00
|
|
|
if parent_parent.contains("tests19_dat") && parent_str.contains("84") {
|
2022-05-24 08:04:20 +03:00
|
|
|
errors.len() + 1
|
2022-06-03 07:35:19 +03:00
|
|
|
} else if (parent_parent.contains("math_dat") || parent_parent.contains("svg_dat"))
|
|
|
|
&& (parent_str.contains("5.fragment_tbody")
|
|
|
|
|| parent_str.contains("6.fragment_tbody")
|
|
|
|
|| parent_str.contains("7.fragment_tbody"))
|
2022-05-24 08:04:20 +03:00
|
|
|
{
|
|
|
|
errors.len() - 1
|
2022-06-03 07:35:19 +03:00
|
|
|
} else if parent_parent.contains("foreign-fragment_dat")
|
|
|
|
&& parent_str.contains("3.fragment_svg_path")
|
2022-05-24 08:04:20 +03:00
|
|
|
{
|
|
|
|
errors.len() - 1
|
|
|
|
} else {
|
|
|
|
errors.len()
|
|
|
|
};
|
|
|
|
let expected_number_of_errors = contents.lines().count();
|
|
|
|
|
|
|
|
assert_eq!(actual_number_of_errors, expected_number_of_errors);
|
|
|
|
}
|
2022-05-05 16:00:25 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
match document_or_document_fragment {
|
|
|
|
DocumentOrDocumentFragment::Document(Ok(mut document)) => {
|
|
|
|
let actual_json = serde_json::to_string_pretty(&document)
|
|
|
|
.map(NormalizedOutput::from)
|
|
|
|
.expect("failed to serialize document");
|
2022-05-11 06:31:49 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
actual_json.compare_to_file(&json_path).unwrap();
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-06-03 07:35:19 +03:00
|
|
|
if parent_parent.contains("scripted") || parent_parent.contains("search") {
|
2022-05-14 04:43:52 +03:00
|
|
|
return Ok(());
|
2022-05-09 09:26:31 +03:00
|
|
|
}
|
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
let mut dom_buf = String::new();
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
document.visit_mut_with(&mut DomVisualizer {
|
|
|
|
dom_buf: &mut dom_buf,
|
|
|
|
indent: 0,
|
|
|
|
});
|
|
|
|
|
|
|
|
NormalizedOutput::from(dom_buf)
|
2022-06-03 07:35:19 +03:00
|
|
|
.compare_to_file(&parent.join("dom.rust-debug"))
|
2022-05-14 04:43:52 +03:00
|
|
|
.unwrap();
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
DocumentOrDocumentFragment::DocumentFragment(Ok(mut document_fragment)) => {
|
|
|
|
let actual_json = serde_json::to_string_pretty(&document_fragment)
|
|
|
|
.map(NormalizedOutput::from)
|
|
|
|
.expect("failed to serialize document");
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
actual_json.compare_to_file(&json_path).unwrap();
|
2022-05-11 06:31:49 +03:00
|
|
|
|
2022-05-21 18:22:10 +03:00
|
|
|
if need_skip_tests {
|
2022-05-14 04:43:52 +03:00
|
|
|
return Ok(());
|
|
|
|
}
|
2022-05-11 06:31:49 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
let mut dom_buf = String::new();
|
2022-05-05 16:00:25 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
document_fragment.visit_mut_with(&mut DomVisualizer {
|
|
|
|
dom_buf: &mut dom_buf,
|
|
|
|
indent: 0,
|
|
|
|
});
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
NormalizedOutput::from(dom_buf)
|
2022-06-03 07:35:19 +03:00
|
|
|
.compare_to_file(&parent.join("dom.rust-debug"))
|
2022-05-14 04:43:52 +03:00
|
|
|
.unwrap();
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
DocumentOrDocumentFragment::Document(Err(err))
|
|
|
|
| DocumentOrDocumentFragment::DocumentFragment(Err(err)) => {
|
|
|
|
let mut d = err.to_diagnostics(&handler);
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
d.note(&format!("current token = {}", parser.dump_cur()));
|
|
|
|
d.emit();
|
2022-05-09 09:26:31 +03:00
|
|
|
|
2022-05-14 04:43:52 +03:00
|
|
|
panic!();
|
2022-05-05 16:00:25 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
.unwrap();
|
|
|
|
}
|