feat(html/codegen): Support context element (#4887)

This commit is contained in:
Alexander Akait 2022-06-05 23:43:56 +03:00 committed by GitHub
parent 3812fb2eb0
commit ae1ff1e55b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 138 additions and 131 deletions

View File

@ -22,7 +22,7 @@ where
#[derive(Debug, Default, Clone, Copy)]
pub(crate) struct Ctx {
pub skip_escape_text: bool,
pub need_escape_text: bool,
pub need_extra_newline_in_text: bool,
}

View File

@ -18,10 +18,18 @@ mod emit;
mod list;
pub mod writer;
#[derive(Debug, Clone, Copy, Default)]
#[derive(Debug, Clone, Default)]
pub struct CodegenConfig {
pub minify: bool,
pub scripting_enabled: bool,
/// Should be used only for `DocumentFragment` code generation
pub context_element: Option<Element>,
}
enum TagOmissionParent<'a> {
Document(&'a Document),
DocumentFragment(&'a DocumentFragment),
Element(&'a Element),
}
#[derive(Debug)]
@ -52,23 +60,7 @@ where
#[emitter]
fn emit_document(&mut self, n: &Document) -> Result {
if self.config.minify {
for (idx, node) in n.children.iter().enumerate() {
match node {
Child::Element(element) => {
let prev = if idx > 0 {
n.children.get(idx - 1)
} else {
None
};
let next = n.children.get(idx + 1);
self.basic_emit_element(element, None, prev, next)?;
}
_ => {
emit!(self, node)
}
}
}
self.emit_list_for_tag_omission(TagOmissionParent::Document(n))?;
} else {
self.emit_list(&n.children, ListFormat::NotDelimited)?;
}
@ -76,26 +68,18 @@ where
#[emitter]
fn emit_document_fragment(&mut self, n: &DocumentFragment) -> Result {
if self.config.minify {
for (idx, node) in n.children.iter().enumerate() {
match node {
Child::Element(element) => {
let prev = if idx > 0 {
n.children.get(idx - 1)
} else {
None
};
let next = n.children.get(idx + 1);
self.basic_emit_element(element, None, prev, next)?;
}
_ => {
emit!(self, node)
}
}
}
let ctx = if let Some(context_element) = &self.config.context_element {
self.create_context_for_element(context_element)
} else {
self.emit_list(&n.children, ListFormat::NotDelimited)?;
Default::default()
};
if self.config.minify {
self.with_ctx(ctx)
.emit_list_for_tag_omission(TagOmissionParent::DocumentFragment(n))?;
} else {
self.with_ctx(ctx)
.emit_list(&n.children, ListFormat::NotDelimited)?;
}
}
@ -183,6 +167,10 @@ where
prev: Option<&Child>,
next: Option<&Child>,
) -> Result {
if self.is_plaintext {
return Ok(());
}
let has_attributes = !n.attributes.is_empty();
let can_omit_start_tag = self.config.minify
&& !has_attributes
@ -342,29 +330,18 @@ where
return Ok(());
}
self.is_plaintext = matches!(&*n.tag_name, "plaintext");
if !self.is_plaintext {
self.is_plaintext = matches!(&*n.tag_name, "plaintext");
}
if let Some(content) = &n.content {
emit!(self, content);
} else if !n.children.is_empty() {
let skip_escape_text = match &*n.tag_name {
"style" | "script" | "xmp" | "iframe" | "noembed" | "noframes" => true,
"noscript" => self.config.scripting_enabled,
_ if self.is_plaintext => true,
_ => false,
};
let need_extra_newline_in_text =
n.namespace == Namespace::HTML && matches!(&*n.tag_name, "textarea" | "pre");
let ctx = Ctx {
skip_escape_text,
need_extra_newline_in_text,
..self.ctx
};
let ctx = self.create_context_for_element(n);
if self.config.minify {
self.with_ctx(ctx)
.emit_list_for_tag_omission(n, &n.children)?;
.emit_list_for_tag_omission(TagOmissionParent::Element(n))?;
} else {
self.with_ctx(ctx)
.emit_list(&n.children, ListFormat::NotDelimited)?;
@ -713,9 +690,7 @@ where
#[emitter]
fn emit_text(&mut self, n: &Text) -> Result {
if self.ctx.skip_escape_text {
write_str!(self, n.span, &n.value);
} else {
if self.ctx.need_escape_text {
let mut data = String::with_capacity(n.value.len());
if self.ctx.need_extra_newline_in_text && n.value.contains('\n') {
@ -729,6 +704,8 @@ where
}
write_str!(self, n.span, &data);
} else {
write_str!(self, n.span, &n.value);
}
}
@ -743,6 +720,23 @@ where
write_str!(self, n.span, &comment);
}
fn create_context_for_element(&self, n: &Element) -> Ctx {
let need_escape_text = match &*n.tag_name {
"style" | "script" | "xmp" | "iframe" | "noembed" | "noframes" | "plaintext" => false,
"noscript" => !self.config.scripting_enabled,
_ if self.is_plaintext => false,
_ => true,
};
let need_extra_newline_in_text =
n.namespace == Namespace::HTML && matches!(&*n.tag_name, "textarea" | "pre");
Ctx {
need_escape_text,
need_extra_newline_in_text,
..self.ctx
}
}
#[emitter]
fn emit_token_and_span(&mut self, n: &TokenAndSpan) -> Result {
let span = n.span;
@ -1002,6 +996,34 @@ where
}
}
fn emit_list_for_tag_omission(&mut self, parent: TagOmissionParent) -> Result {
let nodes = match &parent {
TagOmissionParent::Document(document) => &document.children,
TagOmissionParent::DocumentFragment(document_fragment) => &document_fragment.children,
TagOmissionParent::Element(element) => &element.children,
};
let parent = match parent {
TagOmissionParent::Element(element) => Some(element),
_ => None,
};
for (idx, node) in nodes.iter().enumerate() {
match node {
Child::Element(element) => {
let prev = if idx > 0 { nodes.get(idx - 1) } else { None };
let next = nodes.get(idx + 1);
self.basic_emit_element(element, parent, prev, next)?;
}
_ => {
emit!(self, node)
}
}
}
Ok(())
}
fn emit_list<N>(&mut self, nodes: &[N], format: ListFormat) -> Result
where
Self: Emit<N>,
@ -1022,24 +1044,6 @@ where
Ok(())
}
fn emit_list_for_tag_omission(&mut self, parent: &Element, nodes: &[Child]) -> Result {
for (idx, node) in nodes.iter().enumerate() {
match node {
Child::Element(element) => {
let prev = if idx > 0 { nodes.get(idx - 1) } else { None };
let next = nodes.get(idx + 1);
self.basic_emit_element(element, Some(parent), prev, next)?;
}
_ => {
emit!(self, node)
}
}
}
Ok(())
}
fn write_delim(&mut self, f: ListFormat) -> Result {
match f & ListFormat::DelimitersMask {
ListFormat::None => {}

View File

@ -254,11 +254,13 @@ fn verify_document_fragment(
Some(writer_config) => writer_config,
_ => BasicHtmlWriterConfig::default(),
};
let codegen_config = match codegen_config {
let mut codegen_config = match codegen_config {
Some(codegen_config) => codegen_config,
_ => CodegenConfig::default(),
};
codegen_config.context_element = Some(context_element.clone());
testing::run_test2(false, |cm, handler| {
let fm = cm.load_file(input).unwrap();
let mut errors = vec![];
@ -330,6 +332,7 @@ fn test_document(input: PathBuf) {
Some(CodegenConfig {
scripting_enabled: false,
minify: false,
..Default::default()
}),
);
print_document(
@ -339,44 +342,42 @@ fn test_document(input: PathBuf) {
Some(CodegenConfig {
scripting_enabled: false,
minify: true,
..Default::default()
}),
);
}
#[testing::fixture("tests/document_fragment/**/input.html")]
fn test_document_fragment(input: PathBuf) {
let context_element = Element {
span: Default::default(),
tag_name: "template".into(),
namespace: Namespace::HTML,
attributes: vec![],
children: vec![],
content: None,
};
print_document_fragment(
&input,
Element {
span: Default::default(),
tag_name: "template".into(),
namespace: Namespace::HTML,
attributes: vec![],
children: vec![],
content: None,
},
context_element.clone(),
None,
None,
Some(CodegenConfig {
scripting_enabled: false,
minify: false,
..Default::default()
}),
);
print_document_fragment(
&input,
Element {
span: Default::default(),
tag_name: "template".into(),
namespace: Namespace::HTML,
attributes: vec![],
children: vec![],
content: None,
},
context_element,
None,
None,
Some(CodegenConfig {
scripting_enabled: false,
minify: true,
..Default::default()
}),
);
}
@ -405,6 +406,7 @@ fn parser_verify(input: PathBuf) {
Some(CodegenConfig {
scripting_enabled: false,
minify: true,
..Default::default()
}),
false,
);
@ -419,7 +421,6 @@ fn parser_verify(input: PathBuf) {
"element/foreign-context/input.html",
"element/a-4/input.html",
"element/b-3/input.html",
"element/template-1/input.html",
)
)]
fn parser_recovery_verify(input: PathBuf) {
@ -430,41 +431,37 @@ fn parser_recovery_verify(input: PathBuf) {
Some(CodegenConfig {
scripting_enabled: false,
minify: true,
..Default::default()
}),
true,
);
}
// Non conforming document (i.e. broken HTML), so restore them how it was
// originally is impossible ,
//
// Example - `<!DOCTYPE html><html><body><!-- Test`
//
// Here we have unclosed comment, so serialization will be not the same
//
// TODO There is only bugs for `fragment` because we should allow to
// pass context element for codegen too
// Tag omission only works for valid HTML documents (i.e. without errors)
static IGNORE_TAG_OMISSION: &[&str] = &[
"adoption01_dat/5/input.html",
"adoption01_dat/6/input.html",
"adoption01_dat/7/input.html",
"adoption01_dat/8/input.html",
"adoption02_dat/0/input.html",
"tests1_dat/68/input.html",
"tests1_dat/69/input.html",
"tests1_dat/70/input.html",
"tests1_dat/71/input.html",
"tests15_dat/0/input.html",
"tests15_dat/1/input.html",
"template_dat/68/input.html",
"tricky01_dat/6/input.html",
];
#[testing::fixture(
"../swc_html_parser/tests/html5lib-tests-fixture/**/*.html",
exclude(
"adoption01_dat/5/input.html",
"adoption01_dat/6/input.html",
"adoption01_dat/7/input.html",
"adoption01_dat/8/input.html",
"adoption02_dat/0/input.html",
"tests1_dat/30/input.html",
"tests1_dat/68/input.html",
"tests1_dat/69/input.html",
"tests1_dat/70/input.html",
"tests1_dat/71/input.html",
"tests1_dat/77/input.html",
"tests1_dat/90/input.html",
"tests1_dat/103/input.html",
"tests2_dat/12/input.html",
"tests4_dat/3.fragment_style/input.html",
"tests4_dat/4.fragment_plaintext/input.html",
"tests15_dat/0/input.html",
"tests15_dat/1/input.html",
"tests16_dat/31/input.html",
"tests16_dat/32/input.html",
"tests16_dat/33/input.html",
@ -494,14 +491,11 @@ fn parser_recovery_verify(input: PathBuf) {
"tests18_dat/8/input.html",
"tests18_dat/9/input.html",
"tests18_dat/12/input.html",
"tests18_dat/15/input.html",
"tests18_dat/21/input.html",
"tests19_dat/103/input.html",
"tests20_dat/41/input.html",
"tests26_dat/2/input.html",
"tricky01_dat/6/input.html",
"plain-text-unsafe_dat/0/input.html",
"template_dat/68/input.html",
"template_dat/107/input.html",
)
)]
@ -515,10 +509,12 @@ fn html5lib_tests_verify(input: PathBuf) {
let codegen_config = CodegenConfig {
minify: false,
scripting_enabled,
..Default::default()
};
let minified_codegen_config = CodegenConfig {
minify: true,
scripting_enabled,
..Default::default()
};
if parent.contains("fragment") {
@ -583,12 +579,20 @@ fn html5lib_tests_verify(input: PathBuf) {
Some(codegen_config),
true,
);
verify_document(
&input,
Some(parser_config),
None,
Some(minified_codegen_config),
true,
);
let relative_path = input.to_string_lossy().replace('-', "_").replace('\\', "/");
if !IGNORE_TAG_OMISSION
.iter()
.any(|ignored| relative_path.contains(&**ignored))
{
verify_document(
&input,
Some(parser_config),
None,
Some(minified_codegen_config),
true,
);
}
}
}

View File

@ -56,8 +56,7 @@
<p x='x"'>x</p>
<a href="#"><p>Click me</p></a>
<span><button>Hit me</button></span>
<!--TODO FIX ME -->
<!--<object type="image/svg+xml" data="image.svg"><div>[fallback image]</div></object>-->
<object type="image/svg+xml" data="image.svg"><div>[fallback image]</div></object>
<img class="user-image" src="http:&#47;&#47;adasdasdasd.cloudfront.net&#47;users&#47;2011&#47;05&#47;24&#47;4asdasd&#47;asdasd.jpg" />

View File

@ -52,8 +52,7 @@
<p x="x&quot;">x</p>
<a href="#"><p>Click me</p></a>
<span><button>Hit me</button></span>
<!--TODO FIX ME -->
<!--<object type="image/svg+xml" data="image.svg"><div>[fallback image]</div></object>-->
<object type="image/svg+xml" data="image.svg"><div>[fallback image]</div></object>
<img class="user-image" src="http://adasdasdasd.cloudfront.net/users/2011/05/24/4asdasd/asdasd.jpg">

View File

@ -50,8 +50,7 @@
<p x='x"'>x</p>
<a href=#><p>Click me</p></a>
<span><button>Hit me</button></span>
<!--TODO FIX ME -->
<!--<object type="image/svg+xml" data="image.svg"><div>[fallback image]</div></object>-->
<object type=image/svg+xml data=image.svg><div>[fallback image]</div></object>
<img class=user-image src=http://adasdasdasd.cloudfront.net/users/2011/05/24/4asdasd/asdasd.jpg>

View File

@ -47,6 +47,7 @@ fn minify_fixtures(input: PathBuf) {
CodegenConfig {
scripting_enabled: false,
minify: true,
..Default::default()
},
);
@ -99,6 +100,7 @@ fn minify_recovery(input: PathBuf) {
CodegenConfig {
scripting_enabled: false,
minify: true,
..Default::default()
},
);