rustdoc_to_markdown: Add helper methods for checking HTML attributes (#12496)

This PR adds some helper methods to `HtmlElement` to make it easier to
interact with the element's attributes.

This cleans up a bunch of the code by a fair amount.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-05-30 14:15:08 -04:00 committed by GitHub
parent c83d1c23d7
commit 4dc98026c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -23,6 +23,50 @@ struct HtmlElement {
attrs: RefCell<Vec<Attribute>>,
}
impl HtmlElement {
/// Returns the attribute with the specified name.
pub fn attr(&self, name: &str) -> Option<String> {
self.attrs
.borrow()
.iter()
.find(|attr| attr.name.local.to_string() == name)
.map(|attr| attr.value.to_string())
}
/// Returns the list of classes on this [`HtmlElement`].
pub fn classes(&self) -> Vec<String> {
self.attrs
.borrow()
.iter()
.find(|attr| attr.name.local.to_string() == "class")
.map(|attr| {
attr.value
.split(' ')
.map(|class| class.trim().to_string())
.collect::<Vec<_>>()
})
.unwrap_or_default()
}
/// Returns whether this [`HtmlElement`] has the specified class.
pub fn has_class(&self, class: &str) -> bool {
self.has_any_classes(&[class])
}
/// Returns whether this [`HtmlElement`] has any of the specified classes.
pub fn has_any_classes(&self, classes: &[&str]) -> bool {
self.attrs.borrow().iter().any(|attr| {
attr.name.local.to_string() == "class"
&& attr
.value
.split(' ')
.any(|class| classes.contains(&class.trim()))
})
}
}
const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name";
enum StartTagOutcome {
Continue,
Skip,
@ -145,22 +189,12 @@ impl MarkdownWriter {
"h6" => self.push_str("\n\n###### "),
"code" => {
if !self.is_inside("pre") {
self.push_str("`")
self.push_str("`");
}
}
"pre" => {
let attrs = tag.attrs.borrow();
let classes = attrs
.iter()
.find(|attr| attr.name.local.to_string() == "class")
.map(|attr| {
attr.value
.split(' ')
.map(|class| class.trim())
.collect::<Vec<_>>()
})
.unwrap_or_default();
let is_rust = classes.iter().any(|class| class == &"rust");
let classes = tag.classes();
let is_rust = classes.iter().any(|class| class == "rust");
let language = is_rust
.then(|| "rs")
.or_else(|| {
@ -174,7 +208,7 @@ impl MarkdownWriter {
})
.unwrap_or("");
self.push_str(&format!("\n\n```{language}\n"))
self.push_str(&format!("\n\n```{language}\n"));
}
"ul" | "ol" => self.push_newline(),
"li" => self.push_str("- "),
@ -198,39 +232,23 @@ impl MarkdownWriter {
self.push_str("| ");
}
"summary" => {
if tag.attrs.borrow().iter().any(|attr| {
attr.name.local.to_string() == "class" && attr.value.to_string() == "hideme"
}) {
if tag.has_class("hideme") {
return StartTagOutcome::Skip;
}
}
"button" => {
if tag.attrs.borrow().iter().any(|attr| {
attr.name.local.to_string() == "id" && attr.value.to_string() == "copy-path"
}) {
if tag.attr("id").as_deref() == Some("copy-path") {
return StartTagOutcome::Skip;
}
}
"div" | "span" => {
let classes_to_skip = ["nav-container", "sidebar-elems", "out-of-band"];
if tag.attrs.borrow().iter().any(|attr| {
attr.name.local.to_string() == "class"
&& attr
.value
.split(' ')
.any(|class| classes_to_skip.contains(&class.trim()))
}) {
if tag.has_any_classes(&classes_to_skip) {
return StartTagOutcome::Skip;
}
if self.is_inside_item_name() {
if tag.attrs.borrow().iter().any(|attr| {
attr.name.local.to_string() == "class"
&& attr.value.split(' ').any(|class| class.trim() == "stab")
}) {
self.push_str(" [");
}
if self.is_inside_item_name() && tag.has_class("stab") {
self.push_str(" [");
}
}
_ => {}
@ -244,7 +262,7 @@ impl MarkdownWriter {
"h1" | "h2" | "h3" | "h4" | "h5" | "h6" => self.push_str("\n\n"),
"code" => {
if !self.is_inside("pre") {
self.push_str("`")
self.push_str("`");
}
}
"pre" => self.push_str("\n```\n"),
@ -269,19 +287,12 @@ impl MarkdownWriter {
self.current_table_columns = 0;
}
"div" | "span" => {
if tag.attrs.borrow().iter().any(|attr| {
attr.name.local.to_string() == "class" && attr.value.to_string() == "item-name"
}) {
if tag.has_class(RUSTDOC_ITEM_NAME_CLASS) {
self.push_str(": ");
}
if self.is_inside_item_name() {
if tag.attrs.borrow().iter().any(|attr| {
attr.name.local.to_string() == "class"
&& attr.value.split(' ').any(|class| class.trim() == "stab")
}) {
self.push_str("]");
}
if self.is_inside_item_name() && tag.has_class("stab") {
self.push_str("]");
}
}
_ => {}
@ -309,10 +320,8 @@ impl MarkdownWriter {
/// Returns whether we're currently inside of an `.item-name` element, which
/// rustdoc uses to display Rust items in a list.
fn is_inside_item_name(&self) -> bool {
self.current_element_stack.iter().any(|element| {
element.attrs.borrow().iter().any(|attr| {
attr.name.local.to_string() == "class" && attr.value.to_string() == "item-name"
})
})
self.current_element_stack
.iter()
.any(|element| element.has_class(RUSTDOC_ITEM_NAME_CLASS))
}
}