rustdoc_to_markdown: Clean up heading spacing (#12456)

This PR cleans up the spacing around the Markdown headings in the output
so that they are consistent.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-05-29 18:39:51 -04:00 committed by GitHub
parent 08881828ce
commit abec028e58
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 28 additions and 7 deletions

1
Cargo.lock generated
View File

@ -8639,6 +8639,7 @@ dependencies = [
"html5ever",
"indoc",
"markup5ever_rcdom",
"regex",
]
[[package]]

View File

@ -15,6 +15,7 @@ path = "src/rustdoc_to_markdown.rs"
anyhow.workspace = true
html5ever.workspace = true
markup5ever_rcdom.workspace = true
regex.workspace = true
[dev-dependencies]
indoc.workspace = true

View File

@ -1,9 +1,21 @@
use std::cell::RefCell;
use std::collections::VecDeque;
use std::sync::OnceLock;
use anyhow::Result;
use html5ever::Attribute;
use markup5ever_rcdom::{Handle, NodeData};
use regex::Regex;
fn empty_line_regex() -> &'static Regex {
static REGEX: OnceLock<Regex> = OnceLock::new();
REGEX.get_or_init(|| Regex::new(r"^\s*$").unwrap())
}
fn more_than_three_newlines_regex() -> &'static Regex {
static REGEX: OnceLock<Regex> = OnceLock::new();
REGEX.get_or_init(|| Regex::new(r"\n{3,}").unwrap())
}
#[derive(Debug, Clone)]
struct HtmlElement {
@ -48,7 +60,14 @@ impl MarkdownWriter {
pub fn run(mut self, root_node: &Handle) -> Result<String> {
self.visit_node(&root_node)?;
Ok(self.markdown.trim().to_string())
Ok(Self::prettify_markdown(self.markdown))
}
fn prettify_markdown(markdown: String) -> String {
let markdown = empty_line_regex().replace_all(&markdown, "");
let markdown = more_than_three_newlines_regex().replace_all(&markdown, "\n\n");
markdown.trim().to_string()
}
fn visit_node(&mut self, node: &Handle) -> Result<()> {
@ -107,12 +126,12 @@ impl MarkdownWriter {
fn start_tag(&mut self, tag: &HtmlElement) -> StartTagOutcome {
match tag.tag.as_str() {
"head" | "script" | "nav" => return StartTagOutcome::Skip,
"h1" => self.push_str("\n# "),
"h2" => self.push_str("\n## "),
"h3" => self.push_str("\n### "),
"h4" => self.push_str("\n#### "),
"h5" => self.push_str("\n##### "),
"h6" => self.push_str("\n###### "),
"h1" => self.push_str("\n\n# "),
"h2" => self.push_str("\n\n## "),
"h3" => self.push_str("\n\n### "),
"h4" => self.push_str("\n\n#### "),
"h5" => self.push_str("\n\n##### "),
"h6" => self.push_str("\n\n###### "),
"code" => {
if !self.is_inside("pre") {
self.push_str("`")