mirror of
https://github.com/zed-industries/zed.git
synced 2024-09-18 18:08:07 +03:00
Add rustdoc_to_markdown
crate (#12445)
This PR adds a new crate for converting rustdoc output to Markdown. We're leveraging Servo's `html5ever` to parse the Markdown content, and then walking the DOM nodes to convert it to a Markdown string. The Markdown output will be continued to be refined, but it's in a place where it should be reasonable. Release Notes: - N/A
This commit is contained in:
parent
a22cd95f9d
commit
5bcb9ed017
146
Cargo.lock
generated
146
Cargo.lock
generated
@ -5060,6 +5060,20 @@ version = "3.5.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4d13cdbd5dbb29f9c88095bbdc2590c9cba0d0a1269b983fef6b2cdd7e9f4db1"
|
checksum = "4d13cdbd5dbb29f9c88095bbdc2590c9cba0d0a1269b983fef6b2cdd7e9f4db1"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "html5ever"
|
||||||
|
version = "0.27.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"mac",
|
||||||
|
"markup5ever",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn 2.0.59",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "http"
|
name = "http"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@ -5719,7 +5733,7 @@ dependencies = [
|
|||||||
"tree-sitter-embedded-template",
|
"tree-sitter-embedded-template",
|
||||||
"tree-sitter-heex",
|
"tree-sitter-heex",
|
||||||
"tree-sitter-html",
|
"tree-sitter-html",
|
||||||
"tree-sitter-json 0.20.2",
|
"tree-sitter-json",
|
||||||
"tree-sitter-markdown",
|
"tree-sitter-markdown",
|
||||||
"tree-sitter-ruby",
|
"tree-sitter-ruby",
|
||||||
"tree-sitter-rust",
|
"tree-sitter-rust",
|
||||||
@ -5809,7 +5823,7 @@ dependencies = [
|
|||||||
"tree-sitter-gomod",
|
"tree-sitter-gomod",
|
||||||
"tree-sitter-gowork",
|
"tree-sitter-gowork",
|
||||||
"tree-sitter-jsdoc",
|
"tree-sitter-jsdoc",
|
||||||
"tree-sitter-json 0.20.2",
|
"tree-sitter-json",
|
||||||
"tree-sitter-markdown",
|
"tree-sitter-markdown",
|
||||||
"tree-sitter-proto",
|
"tree-sitter-proto",
|
||||||
"tree-sitter-python",
|
"tree-sitter-python",
|
||||||
@ -6181,6 +6195,32 @@ dependencies = [
|
|||||||
"workspace",
|
"workspace",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "markup5ever"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"phf",
|
||||||
|
"phf_codegen",
|
||||||
|
"string_cache",
|
||||||
|
"string_cache_codegen",
|
||||||
|
"tendril",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "markup5ever_rcdom"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "edaa21ab3701bfee5099ade5f7e1f84553fd19228cf332f13cd6e964bf59be18"
|
||||||
|
dependencies = [
|
||||||
|
"html5ever",
|
||||||
|
"markup5ever",
|
||||||
|
"tendril",
|
||||||
|
"xml5ever",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "matchers"
|
name = "matchers"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@ -7286,7 +7326,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
|
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf_macros",
|
"phf_macros",
|
||||||
"phf_shared",
|
"phf_shared 0.11.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_codegen"
|
||||||
|
version = "0.11.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator 0.11.2",
|
||||||
|
"phf_shared 0.11.2",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_generator"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
|
||||||
|
dependencies = [
|
||||||
|
"phf_shared 0.10.0",
|
||||||
|
"rand 0.8.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@ -7295,7 +7355,7 @@ version = "0.11.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
|
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf_shared",
|
"phf_shared 0.11.2",
|
||||||
"rand 0.8.5",
|
"rand 0.8.5",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -7305,13 +7365,22 @@ version = "0.11.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
|
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"phf_generator",
|
"phf_generator 0.11.2",
|
||||||
"phf_shared",
|
"phf_shared 0.11.2",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
"syn 2.0.59",
|
"syn 2.0.59",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_shared"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
|
||||||
|
dependencies = [
|
||||||
|
"siphasher 0.3.11",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "phf_shared"
|
name = "phf_shared"
|
||||||
version = "0.11.2"
|
version = "0.11.2"
|
||||||
@ -7555,6 +7624,12 @@ version = "0.2.17"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "precomputed-hash"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "prettier"
|
name = "prettier"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
@ -8554,6 +8629,16 @@ dependencies = [
|
|||||||
"semver",
|
"semver",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustdoc_to_markdown"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"html5ever",
|
||||||
|
"indoc",
|
||||||
|
"markup5ever_rcdom",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustix"
|
name = "rustix"
|
||||||
version = "0.37.23"
|
version = "0.37.23"
|
||||||
@ -9118,7 +9203,7 @@ dependencies = [
|
|||||||
"serde_json_lenient",
|
"serde_json_lenient",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
"tree-sitter",
|
"tree-sitter",
|
||||||
"tree-sitter-json 0.19.0",
|
"tree-sitter-json",
|
||||||
"unindent",
|
"unindent",
|
||||||
"util",
|
"util",
|
||||||
]
|
]
|
||||||
@ -9802,6 +9887,32 @@ dependencies = [
|
|||||||
"float-cmp",
|
"float-cmp",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "string_cache"
|
||||||
|
version = "0.8.7"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
|
||||||
|
dependencies = [
|
||||||
|
"new_debug_unreachable",
|
||||||
|
"once_cell",
|
||||||
|
"parking_lot",
|
||||||
|
"phf_shared 0.10.0",
|
||||||
|
"precomputed-hash",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "string_cache_codegen"
|
||||||
|
version = "0.5.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6bb30289b722be4ff74a408c3cc27edeaad656e06cb1fe8fa9231fa59c728988"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator 0.10.0",
|
||||||
|
"phf_shared 0.10.0",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "stringprep"
|
name = "stringprep"
|
||||||
version = "0.1.4"
|
version = "0.1.4"
|
||||||
@ -10991,16 +11102,6 @@ dependencies = [
|
|||||||
"tree-sitter",
|
"tree-sitter",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tree-sitter-json"
|
|
||||||
version = "0.19.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "90b04c4e1a92139535eb9fca4ec8fa9666cc96b618005d3ae35f3c957fa92f92"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
"tree-sitter",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tree-sitter-json"
|
name = "tree-sitter-json"
|
||||||
version = "0.20.2"
|
version = "0.20.2"
|
||||||
@ -12937,6 +13038,17 @@ version = "0.2.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "054a8e68b76250b253f671d1268cb7f1ae089ec35e195b2efb2a4e9a836d0621"
|
checksum = "054a8e68b76250b253f671d1268cb7f1ae089ec35e195b2efb2a4e9a836d0621"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "xml5ever"
|
||||||
|
version = "0.18.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "7c376f76ed09df711203e20c3ef5ce556f0166fa03d39590016c0fd625437fad"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"mac",
|
||||||
|
"markup5ever",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "xmlparser"
|
name = "xmlparser"
|
||||||
version = "0.13.5"
|
version = "0.13.5"
|
||||||
|
@ -76,6 +76,7 @@ members = [
|
|||||||
"crates/rich_text",
|
"crates/rich_text",
|
||||||
"crates/rope",
|
"crates/rope",
|
||||||
"crates/rpc",
|
"crates/rpc",
|
||||||
|
"crates/rustdoc_to_markdown",
|
||||||
"crates/task",
|
"crates/task",
|
||||||
"crates/tasks_ui",
|
"crates/tasks_ui",
|
||||||
"crates/search",
|
"crates/search",
|
||||||
@ -220,6 +221,7 @@ dev_server_projects = { path = "crates/dev_server_projects" }
|
|||||||
rich_text = { path = "crates/rich_text" }
|
rich_text = { path = "crates/rich_text" }
|
||||||
rope = { path = "crates/rope" }
|
rope = { path = "crates/rope" }
|
||||||
rpc = { path = "crates/rpc" }
|
rpc = { path = "crates/rpc" }
|
||||||
|
rustdoc_to_markdown = { path = "crates/rustdoc_to_markdown" }
|
||||||
task = { path = "crates/task" }
|
task = { path = "crates/task" }
|
||||||
tasks_ui = { path = "crates/tasks_ui" }
|
tasks_ui = { path = "crates/tasks_ui" }
|
||||||
search = { path = "crates/search" }
|
search = { path = "crates/search" }
|
||||||
@ -288,6 +290,7 @@ heed = { version = "0.20.1", features = [
|
|||||||
"read-txn-no-tls",
|
"read-txn-no-tls",
|
||||||
] }
|
] }
|
||||||
hex = "0.4.3"
|
hex = "0.4.3"
|
||||||
|
html5ever = "0.27.0"
|
||||||
ignore = "0.4.22"
|
ignore = "0.4.22"
|
||||||
indoc = "1"
|
indoc = "1"
|
||||||
# We explicitly disable http2 support in isahc.
|
# We explicitly disable http2 support in isahc.
|
||||||
@ -300,6 +303,7 @@ lazy_static = "1.4.0"
|
|||||||
libc = "0.2"
|
libc = "0.2"
|
||||||
linkify = "0.10.0"
|
linkify = "0.10.0"
|
||||||
log = { version = "0.4.16", features = ["kv_unstable_serde"] }
|
log = { version = "0.4.16", features = ["kv_unstable_serde"] }
|
||||||
|
markup5ever_rcdom = "0.3.0"
|
||||||
nanoid = "0.4"
|
nanoid = "0.4"
|
||||||
nix = "0.28"
|
nix = "0.28"
|
||||||
once_cell = "1.19.0"
|
once_cell = "1.19.0"
|
||||||
|
20
crates/rustdoc_to_markdown/Cargo.toml
Normal file
20
crates/rustdoc_to_markdown/Cargo.toml
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
[package]
|
||||||
|
name = "rustdoc_to_markdown"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
publish = false
|
||||||
|
license = "GPL-3.0-or-later"
|
||||||
|
|
||||||
|
[lints]
|
||||||
|
workspace = true
|
||||||
|
|
||||||
|
[lib]
|
||||||
|
path = "src/rustdoc_to_markdown.rs"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow.workspace = true
|
||||||
|
html5ever.workspace = true
|
||||||
|
markup5ever_rcdom.workspace = true
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
indoc.workspace = true
|
1
crates/rustdoc_to_markdown/LICENSE-GPL
Symbolic link
1
crates/rustdoc_to_markdown/LICENSE-GPL
Symbolic link
@ -0,0 +1 @@
|
|||||||
|
../../LICENSE-GPL
|
29
crates/rustdoc_to_markdown/examples/test.rs
Normal file
29
crates/rustdoc_to_markdown/examples/test.rs
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
use indoc::indoc;
|
||||||
|
use rustdoc_to_markdown::convert_rustdoc_to_markdown;
|
||||||
|
|
||||||
|
pub fn main() {
|
||||||
|
let html = indoc! {"
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<h1>Hello World</h1>
|
||||||
|
<p>
|
||||||
|
Here is some content.
|
||||||
|
</p>
|
||||||
|
<h2>Some items</h2>
|
||||||
|
<ul>
|
||||||
|
<li>One</li>
|
||||||
|
<li>Two</li>
|
||||||
|
<li>Three</li>
|
||||||
|
</ul>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"};
|
||||||
|
// To test this out with some real input, try this:
|
||||||
|
//
|
||||||
|
// ```
|
||||||
|
// let html = include_str!("/path/to/zed/target/doc/gpui/index.html");
|
||||||
|
// ```
|
||||||
|
let markdown = convert_rustdoc_to_markdown(html).unwrap();
|
||||||
|
|
||||||
|
println!("{markdown}");
|
||||||
|
}
|
201
crates/rustdoc_to_markdown/src/markdown_writer.rs
Normal file
201
crates/rustdoc_to_markdown/src/markdown_writer.rs
Normal file
@ -0,0 +1,201 @@
|
|||||||
|
use std::cell::RefCell;
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use html5ever::Attribute;
|
||||||
|
use markup5ever_rcdom::{Handle, NodeData};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct HtmlElement {
|
||||||
|
tag: String,
|
||||||
|
attrs: RefCell<Vec<Attribute>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum StartTagOutcome {
|
||||||
|
Continue,
|
||||||
|
Skip,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct MarkdownWriter {
|
||||||
|
current_element_stack: VecDeque<HtmlElement>,
|
||||||
|
/// The Markdown output.
|
||||||
|
markdown: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MarkdownWriter {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
current_element_stack: VecDeque::new(),
|
||||||
|
markdown: String::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_inside(&self, tag: &str) -> bool {
|
||||||
|
self.current_element_stack
|
||||||
|
.iter()
|
||||||
|
.any(|parent_element| parent_element.tag == tag)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_inside_heading(&self) -> bool {
|
||||||
|
["h1", "h2", "h3", "h4", "h5", "h6"]
|
||||||
|
.into_iter()
|
||||||
|
.any(|heading| self.is_inside(heading))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Appends the given string slice onto the end of the Markdown output.
|
||||||
|
fn push_str(&mut self, str: &str) {
|
||||||
|
self.markdown.push_str(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Appends a newline to the end of the Markdown output.
|
||||||
|
fn push_newline(&mut self) {
|
||||||
|
self.push_str("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn run(mut self, root_node: &Handle) -> Result<String> {
|
||||||
|
self.visit_node(&root_node)?;
|
||||||
|
Ok(self.markdown.trim().to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_node(&mut self, node: &Handle) -> Result<()> {
|
||||||
|
let mut current_element = None;
|
||||||
|
|
||||||
|
match node.data {
|
||||||
|
NodeData::Document
|
||||||
|
| NodeData::Doctype { .. }
|
||||||
|
| NodeData::ProcessingInstruction { .. }
|
||||||
|
| NodeData::Comment { .. } => {
|
||||||
|
// Currently left unimplemented, as we're not interested in this data
|
||||||
|
// at this time.
|
||||||
|
}
|
||||||
|
NodeData::Element {
|
||||||
|
ref name,
|
||||||
|
ref attrs,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
let tag_name = name.local.to_string();
|
||||||
|
if !tag_name.is_empty() {
|
||||||
|
current_element = Some(HtmlElement {
|
||||||
|
tag: tag_name,
|
||||||
|
attrs: attrs.clone(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
NodeData::Text { ref contents } => {
|
||||||
|
let text = contents.borrow().to_string();
|
||||||
|
self.visit_text(text)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(current_element) = current_element.as_ref() {
|
||||||
|
match self.start_tag(¤t_element) {
|
||||||
|
StartTagOutcome::Continue => {}
|
||||||
|
StartTagOutcome::Skip => return Ok(()),
|
||||||
|
}
|
||||||
|
|
||||||
|
self.current_element_stack
|
||||||
|
.push_back(current_element.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
for child in node.children.borrow().iter() {
|
||||||
|
self.visit_node(child)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.current_element_stack.pop_back();
|
||||||
|
|
||||||
|
if let Some(current_element) = current_element {
|
||||||
|
self.end_tag(¤t_element);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_tag(&mut self, tag: &HtmlElement) -> StartTagOutcome {
|
||||||
|
match tag.tag.as_str() {
|
||||||
|
"head" | "script" | "nav" => return StartTagOutcome::Skip,
|
||||||
|
"h1" => self.push_str("\n# "),
|
||||||
|
"h2" => self.push_str("\n## "),
|
||||||
|
"h3" => self.push_str("\n### "),
|
||||||
|
"h4" => self.push_str("\n#### "),
|
||||||
|
"h5" => self.push_str("\n##### "),
|
||||||
|
"h6" => self.push_str("\n###### "),
|
||||||
|
"code" => {
|
||||||
|
if !self.is_inside("pre") {
|
||||||
|
self.push_str("`")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"pre" => self.push_str("\n```\n"),
|
||||||
|
"ul" | "ol" => self.push_newline(),
|
||||||
|
"li" => self.push_str("- "),
|
||||||
|
"summary" => {
|
||||||
|
if tag.attrs.borrow().iter().any(|attr| {
|
||||||
|
attr.name.local.to_string() == "class" && attr.value.to_string() == "hideme"
|
||||||
|
}) {
|
||||||
|
return StartTagOutcome::Skip;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"div" | "span" => {
|
||||||
|
if tag.attrs.borrow().iter().any(|attr| {
|
||||||
|
attr.name.local.to_string() == "class"
|
||||||
|
&& attr.value.to_string() == "sidebar-elems"
|
||||||
|
}) {
|
||||||
|
return StartTagOutcome::Skip;
|
||||||
|
}
|
||||||
|
|
||||||
|
if tag.attrs.borrow().iter().any(|attr| {
|
||||||
|
attr.name.local.to_string() == "class"
|
||||||
|
&& attr.value.to_string() == "out-of-band"
|
||||||
|
}) {
|
||||||
|
return StartTagOutcome::Skip;
|
||||||
|
}
|
||||||
|
|
||||||
|
if tag.attrs.borrow().iter().any(|attr| {
|
||||||
|
attr.name.local.to_string() == "class" && attr.value.to_string() == "item-name"
|
||||||
|
}) {
|
||||||
|
self.push_str("`");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
StartTagOutcome::Continue
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_tag(&mut self, tag: &HtmlElement) {
|
||||||
|
match tag.tag.as_str() {
|
||||||
|
"h1" | "h2" | "h3" | "h4" | "h5" | "h6" => self.push_str("\n\n"),
|
||||||
|
"code" => {
|
||||||
|
if !self.is_inside("pre") {
|
||||||
|
self.push_str("`")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"pre" => self.push_str("\n```\n"),
|
||||||
|
"ul" | "ol" => self.push_newline(),
|
||||||
|
"li" => self.push_newline(),
|
||||||
|
"div" => {
|
||||||
|
if tag.attrs.borrow().iter().any(|attr| {
|
||||||
|
attr.name.local.to_string() == "class" && attr.value.to_string() == "item-name"
|
||||||
|
}) {
|
||||||
|
self.push_str("`: ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_text(&mut self, text: String) -> Result<()> {
|
||||||
|
if self.is_inside("pre") {
|
||||||
|
self.push_str(&text);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.is_inside_heading() && self.is_inside("a") {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let trimmed_text = text.trim_matches(|char| char == '\n' || char == '\r' || char == '§');
|
||||||
|
self.push_str(trimmed_text);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
36
crates/rustdoc_to_markdown/src/rustdoc_to_markdown.rs
Normal file
36
crates/rustdoc_to_markdown/src/rustdoc_to_markdown.rs
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
//! Provides conversion from rustdoc's HTML output to Markdown.
|
||||||
|
|
||||||
|
#![deny(missing_docs)]
|
||||||
|
|
||||||
|
mod markdown_writer;
|
||||||
|
|
||||||
|
use anyhow::{Context, Result};
|
||||||
|
use html5ever::driver::ParseOpts;
|
||||||
|
use html5ever::parse_document;
|
||||||
|
use html5ever::tendril::TendrilSink;
|
||||||
|
use html5ever::tree_builder::TreeBuilderOpts;
|
||||||
|
use markup5ever_rcdom::RcDom;
|
||||||
|
|
||||||
|
use crate::markdown_writer::MarkdownWriter;
|
||||||
|
|
||||||
|
/// Converts the provided rustdoc HTML to Markdown.
|
||||||
|
pub fn convert_rustdoc_to_markdown(html: &str) -> Result<String> {
|
||||||
|
let parse_options = ParseOpts {
|
||||||
|
tree_builder: TreeBuilderOpts {
|
||||||
|
drop_doctype: true,
|
||||||
|
..Default::default()
|
||||||
|
},
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let dom = parse_document(RcDom::default(), parse_options)
|
||||||
|
.from_utf8()
|
||||||
|
.read_from(&mut html.as_bytes())
|
||||||
|
.context("failed to parse rustdoc HTML")?;
|
||||||
|
|
||||||
|
let markdown_writer = MarkdownWriter::new();
|
||||||
|
let markdown = markdown_writer
|
||||||
|
.run(&dom.document)
|
||||||
|
.context("failed to convert rustdoc to HTML")?;
|
||||||
|
|
||||||
|
Ok(markdown)
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user