mirror of
https://github.com/zed-industries/zed.git
synced 2024-09-19 02:17:35 +03:00
html_to_markdown: Move TableHandler
out of rustdoc
(#12697)
This PR moves the `TableHandler` out of the `rustdoc` module, as it doesn't contain anything specific to rustdoc. Release Notes: - N/A
This commit is contained in:
parent
071270fe88
commit
f3460d440c
@ -16,7 +16,9 @@ use html5ever::tendril::TendrilSink;
|
||||
use html5ever::tree_builder::TreeBuilderOpts;
|
||||
use markup5ever_rcdom::RcDom;
|
||||
|
||||
use crate::markdown::{HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler};
|
||||
use crate::markdown::{
|
||||
HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler, TableHandler,
|
||||
};
|
||||
use crate::markdown_writer::{HandleTag, MarkdownWriter};
|
||||
|
||||
/// Converts the provided HTML to Markdown.
|
||||
@ -27,11 +29,11 @@ pub fn convert_html_to_markdown(html: impl Read) -> Result<String> {
|
||||
Box::new(ParagraphHandler),
|
||||
Box::new(HeadingHandler),
|
||||
Box::new(ListHandler),
|
||||
Box::new(TableHandler::new()),
|
||||
Box::new(StyledTextHandler),
|
||||
Box::new(structure::rustdoc::RustdocChromeRemover),
|
||||
Box::new(structure::rustdoc::RustdocHeadingHandler),
|
||||
Box::new(structure::rustdoc::RustdocCodeHandler),
|
||||
Box::new(structure::rustdoc::RustdocTableHandler::new()),
|
||||
Box::new(structure::rustdoc::RustdocItemHandler),
|
||||
];
|
||||
|
||||
@ -51,11 +53,11 @@ pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result<String> {
|
||||
Box::new(ParagraphHandler),
|
||||
Box::new(HeadingHandler),
|
||||
Box::new(ListHandler),
|
||||
Box::new(TableHandler::new()),
|
||||
Box::new(StyledTextHandler),
|
||||
Box::new(structure::rustdoc::RustdocChromeRemover),
|
||||
Box::new(structure::rustdoc::RustdocHeadingHandler),
|
||||
Box::new(structure::rustdoc::RustdocCodeHandler),
|
||||
Box::new(structure::rustdoc::RustdocTableHandler::new()),
|
||||
Box::new(structure::rustdoc::RustdocItemHandler),
|
||||
];
|
||||
|
||||
|
@ -101,6 +101,87 @@ impl HandleTag for ListHandler {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TableHandler {
|
||||
/// The number of columns in the current `<table>`.
|
||||
current_table_columns: usize,
|
||||
is_first_th: bool,
|
||||
is_first_td: bool,
|
||||
}
|
||||
|
||||
impl TableHandler {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
current_table_columns: 0,
|
||||
is_first_th: true,
|
||||
is_first_td: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HandleTag for TableHandler {
|
||||
fn should_handle(&self, tag: &str) -> bool {
|
||||
match tag {
|
||||
"table" | "thead" | "tbody" | "tr" | "th" | "td" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
match tag.tag.as_str() {
|
||||
"thead" => writer.push_blank_line(),
|
||||
"tr" => writer.push_newline(),
|
||||
"th" => {
|
||||
self.current_table_columns += 1;
|
||||
if self.is_first_th {
|
||||
self.is_first_th = false;
|
||||
} else {
|
||||
writer.push_str(" ");
|
||||
}
|
||||
writer.push_str("| ");
|
||||
}
|
||||
"td" => {
|
||||
if self.is_first_td {
|
||||
self.is_first_td = false;
|
||||
} else {
|
||||
writer.push_str(" ");
|
||||
}
|
||||
writer.push_str("| ");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
|
||||
fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
|
||||
match tag.tag.as_str() {
|
||||
"thead" => {
|
||||
writer.push_newline();
|
||||
for ix in 0..self.current_table_columns {
|
||||
if ix > 0 {
|
||||
writer.push_str(" ");
|
||||
}
|
||||
writer.push_str("| ---");
|
||||
}
|
||||
writer.push_str(" |");
|
||||
self.is_first_th = true;
|
||||
}
|
||||
"tr" => {
|
||||
writer.push_str(" |");
|
||||
self.is_first_td = true;
|
||||
}
|
||||
"table" => {
|
||||
self.current_table_columns = 0;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StyledTextHandler;
|
||||
|
||||
impl HandleTag for StyledTextHandler {
|
||||
|
@ -96,87 +96,6 @@ impl HandleTag for RustdocCodeHandler {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RustdocTableHandler {
|
||||
/// The number of columns in the current `<table>`.
|
||||
current_table_columns: usize,
|
||||
is_first_th: bool,
|
||||
is_first_td: bool,
|
||||
}
|
||||
|
||||
impl RustdocTableHandler {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
current_table_columns: 0,
|
||||
is_first_th: true,
|
||||
is_first_td: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HandleTag for RustdocTableHandler {
|
||||
fn should_handle(&self, tag: &str) -> bool {
|
||||
match tag {
|
||||
"table" | "thead" | "tbody" | "tr" | "th" | "td" => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn handle_tag_start(
|
||||
&mut self,
|
||||
tag: &HtmlElement,
|
||||
writer: &mut MarkdownWriter,
|
||||
) -> StartTagOutcome {
|
||||
match tag.tag.as_str() {
|
||||
"thead" => writer.push_blank_line(),
|
||||
"tr" => writer.push_newline(),
|
||||
"th" => {
|
||||
self.current_table_columns += 1;
|
||||
if self.is_first_th {
|
||||
self.is_first_th = false;
|
||||
} else {
|
||||
writer.push_str(" ");
|
||||
}
|
||||
writer.push_str("| ");
|
||||
}
|
||||
"td" => {
|
||||
if self.is_first_td {
|
||||
self.is_first_td = false;
|
||||
} else {
|
||||
writer.push_str(" ");
|
||||
}
|
||||
writer.push_str("| ");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
StartTagOutcome::Continue
|
||||
}
|
||||
|
||||
fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) {
|
||||
match tag.tag.as_str() {
|
||||
"thead" => {
|
||||
writer.push_newline();
|
||||
for ix in 0..self.current_table_columns {
|
||||
if ix > 0 {
|
||||
writer.push_str(" ");
|
||||
}
|
||||
writer.push_str("| ---");
|
||||
}
|
||||
writer.push_str(" |");
|
||||
self.is_first_th = true;
|
||||
}
|
||||
"tr" => {
|
||||
writer.push_str(" |");
|
||||
self.is_first_td = true;
|
||||
}
|
||||
"table" => {
|
||||
self.current_table_columns = 0;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name";
|
||||
|
||||
pub struct RustdocItemHandler;
|
||||
|
Loading…
Reference in New Issue
Block a user