diff --git a/crates/html_to_markdown/src/html_to_markdown.rs b/crates/html_to_markdown/src/html_to_markdown.rs index 34eec2b001..5417c51dd0 100644 --- a/crates/html_to_markdown/src/html_to_markdown.rs +++ b/crates/html_to_markdown/src/html_to_markdown.rs @@ -16,7 +16,9 @@ use html5ever::tendril::TendrilSink; use html5ever::tree_builder::TreeBuilderOpts; use markup5ever_rcdom::RcDom; -use crate::markdown::{HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler}; +use crate::markdown::{ + HeadingHandler, ListHandler, ParagraphHandler, StyledTextHandler, TableHandler, +}; use crate::markdown_writer::{HandleTag, MarkdownWriter}; /// Converts the provided HTML to Markdown. @@ -27,11 +29,11 @@ pub fn convert_html_to_markdown(html: impl Read) -> Result { Box::new(ParagraphHandler), Box::new(HeadingHandler), Box::new(ListHandler), + Box::new(TableHandler::new()), Box::new(StyledTextHandler), Box::new(structure::rustdoc::RustdocChromeRemover), Box::new(structure::rustdoc::RustdocHeadingHandler), Box::new(structure::rustdoc::RustdocCodeHandler), - Box::new(structure::rustdoc::RustdocTableHandler::new()), Box::new(structure::rustdoc::RustdocItemHandler), ]; @@ -51,11 +53,11 @@ pub fn convert_rustdoc_to_markdown(html: impl Read) -> Result { Box::new(ParagraphHandler), Box::new(HeadingHandler), Box::new(ListHandler), + Box::new(TableHandler::new()), Box::new(StyledTextHandler), Box::new(structure::rustdoc::RustdocChromeRemover), Box::new(structure::rustdoc::RustdocHeadingHandler), Box::new(structure::rustdoc::RustdocCodeHandler), - Box::new(structure::rustdoc::RustdocTableHandler::new()), Box::new(structure::rustdoc::RustdocItemHandler), ]; diff --git a/crates/html_to_markdown/src/markdown.rs b/crates/html_to_markdown/src/markdown.rs index f6af5794b5..0d45b17517 100644 --- a/crates/html_to_markdown/src/markdown.rs +++ b/crates/html_to_markdown/src/markdown.rs @@ -101,6 +101,87 @@ impl HandleTag for ListHandler { } } +pub struct TableHandler { + /// The number of columns in the current ``. + current_table_columns: usize, + is_first_th: bool, + is_first_td: bool, +} + +impl TableHandler { + pub fn new() -> Self { + Self { + current_table_columns: 0, + is_first_th: true, + is_first_td: true, + } + } +} + +impl HandleTag for TableHandler { + fn should_handle(&self, tag: &str) -> bool { + match tag { + "table" | "thead" | "tbody" | "tr" | "th" | "td" => true, + _ => false, + } + } + + fn handle_tag_start( + &mut self, + tag: &HtmlElement, + writer: &mut MarkdownWriter, + ) -> StartTagOutcome { + match tag.tag.as_str() { + "thead" => writer.push_blank_line(), + "tr" => writer.push_newline(), + "th" => { + self.current_table_columns += 1; + if self.is_first_th { + self.is_first_th = false; + } else { + writer.push_str(" "); + } + writer.push_str("| "); + } + "td" => { + if self.is_first_td { + self.is_first_td = false; + } else { + writer.push_str(" "); + } + writer.push_str("| "); + } + _ => {} + } + + StartTagOutcome::Continue + } + + fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { + match tag.tag.as_str() { + "thead" => { + writer.push_newline(); + for ix in 0..self.current_table_columns { + if ix > 0 { + writer.push_str(" "); + } + writer.push_str("| ---"); + } + writer.push_str(" |"); + self.is_first_th = true; + } + "tr" => { + writer.push_str(" |"); + self.is_first_td = true; + } + "table" => { + self.current_table_columns = 0; + } + _ => {} + } + } +} + pub struct StyledTextHandler; impl HandleTag for StyledTextHandler { diff --git a/crates/html_to_markdown/src/structure/rustdoc.rs b/crates/html_to_markdown/src/structure/rustdoc.rs index b1ae7d2425..7d6cc2f0b3 100644 --- a/crates/html_to_markdown/src/structure/rustdoc.rs +++ b/crates/html_to_markdown/src/structure/rustdoc.rs @@ -96,87 +96,6 @@ impl HandleTag for RustdocCodeHandler { } } -pub struct RustdocTableHandler { - /// The number of columns in the current `
`. - current_table_columns: usize, - is_first_th: bool, - is_first_td: bool, -} - -impl RustdocTableHandler { - pub fn new() -> Self { - Self { - current_table_columns: 0, - is_first_th: true, - is_first_td: true, - } - } -} - -impl HandleTag for RustdocTableHandler { - fn should_handle(&self, tag: &str) -> bool { - match tag { - "table" | "thead" | "tbody" | "tr" | "th" | "td" => true, - _ => false, - } - } - - fn handle_tag_start( - &mut self, - tag: &HtmlElement, - writer: &mut MarkdownWriter, - ) -> StartTagOutcome { - match tag.tag.as_str() { - "thead" => writer.push_blank_line(), - "tr" => writer.push_newline(), - "th" => { - self.current_table_columns += 1; - if self.is_first_th { - self.is_first_th = false; - } else { - writer.push_str(" "); - } - writer.push_str("| "); - } - "td" => { - if self.is_first_td { - self.is_first_td = false; - } else { - writer.push_str(" "); - } - writer.push_str("| "); - } - _ => {} - } - - StartTagOutcome::Continue - } - - fn handle_tag_end(&mut self, tag: &HtmlElement, writer: &mut MarkdownWriter) { - match tag.tag.as_str() { - "thead" => { - writer.push_newline(); - for ix in 0..self.current_table_columns { - if ix > 0 { - writer.push_str(" "); - } - writer.push_str("| ---"); - } - writer.push_str(" |"); - self.is_first_th = true; - } - "tr" => { - writer.push_str(" |"); - self.is_first_td = true; - } - "table" => { - self.current_table_columns = 0; - } - _ => {} - } - } -} - const RUSTDOC_ITEM_NAME_CLASS: &str = "item-name"; pub struct RustdocItemHandler;