From dc0af0f4c15d0ffd70d8712e8437dcbed082e0b0 Mon Sep 17 00:00:00 2001 From: "Kilu.He" <108015703+qinluhe@users.noreply.github.com> Date: Thu, 2 Nov 2023 22:13:29 +0800 Subject: [PATCH] feat: support convert external data to nested json (#3848) * feat: support convert external data to nested json * fix: add some comment * fix: code review * fix: code review * fix: code view * fix: code view * fix: update tauri cargo lock file * fix: remove reduant function * fix: parse dir attribute in element * fix: add comment about parse dir * fix: code review * fix: code review * fix: code review * fix: code review --- frontend/appflowy_tauri/src-tauri/Cargo.lock | 20 +- frontend/rust-lib/Cargo.lock | 20 +- .../src/document/document_event.rs | 17 +- .../tests/document/local_test/test.rs | 58 +- frontend/rust-lib/flowy-document2/Cargo.toml | 3 +- .../rust-lib/flowy-document2/src/entities.rs | 2 + .../flowy-document2/src/event_handler.rs | 91 ++- .../rust-lib/flowy-document2/src/event_map.rs | 54 +- .../flowy-document2/src/parser/constant.rs | 88 ++- .../src/parser/document_data_parser.rs | 203 +++---- .../src/parser/external/mod.rs | 2 + .../src/parser/external/parser.rs | 40 ++ .../src/parser/external/utils.rs | 559 ++++++++++++++++++ .../flowy-document2/src/parser/mod.rs | 1 + .../src/parser/parser_entities.rs | 254 ++++---- .../flowy-document2/src/parser/utils.rs | 89 +-- .../tests/assets/html/bulleted_list.html | 2 +- .../tests/assets/html/callout.html | 4 +- .../tests/assets/html/google_docs.html | 1 + .../tests/assets/html/notion.html | 34 ++ .../tests/assets/html/numbered_list.html | 2 +- .../tests/assets/html/todo_list.html | 2 +- .../tests/assets/html/toggle_list.html | 2 +- .../tests/assets/json/google_docs.json | 351 +++++++++++ .../tests/assets/json/notion.json | 371 ++++++++++++ .../tests/assets/json/plain_text.json | 510 ++++++++++++++++ .../tests/assets/text/plain_text.txt | 64 ++ .../flowy-document2/tests/parser/html/mod.rs | 1 + .../tests/parser/html/parser_test.rs | 45 ++ .../flowy-document2/tests/parser/mod.rs | 3 +- .../{html_text => parse_to_html_text}/mod.rs | 0 .../{html_text => parse_to_html_text}/test.rs | 2 +- .../utils.rs | 0 33 files changed, 2543 insertions(+), 352 deletions(-) create mode 100644 frontend/rust-lib/flowy-document2/src/parser/external/mod.rs create mode 100644 frontend/rust-lib/flowy-document2/src/parser/external/parser.rs create mode 100644 frontend/rust-lib/flowy-document2/src/parser/external/utils.rs create mode 100644 frontend/rust-lib/flowy-document2/tests/assets/html/google_docs.html create mode 100644 frontend/rust-lib/flowy-document2/tests/assets/html/notion.html create mode 100644 frontend/rust-lib/flowy-document2/tests/assets/json/google_docs.json create mode 100644 frontend/rust-lib/flowy-document2/tests/assets/json/notion.json create mode 100644 frontend/rust-lib/flowy-document2/tests/assets/json/plain_text.json create mode 100644 frontend/rust-lib/flowy-document2/tests/assets/text/plain_text.txt create mode 100644 frontend/rust-lib/flowy-document2/tests/parser/html/mod.rs create mode 100644 frontend/rust-lib/flowy-document2/tests/parser/html/parser_test.rs rename frontend/rust-lib/flowy-document2/tests/parser/{html_text => parse_to_html_text}/mod.rs (100%) rename frontend/rust-lib/flowy-document2/tests/parser/{html_text => parse_to_html_text}/test.rs (90%) rename frontend/rust-lib/flowy-document2/tests/parser/{html_text => parse_to_html_text}/utils.rs (100%) diff --git a/frontend/appflowy_tauri/src-tauri/Cargo.lock b/frontend/appflowy_tauri/src-tauri/Cargo.lock index ddfc303bd2..adc7299a62 100644 --- a/frontend/appflowy_tauri/src-tauri/Cargo.lock +++ b/frontend/appflowy_tauri/src-tauri/Cargo.lock @@ -770,7 +770,7 @@ dependencies = [ "parking_lot", "realtime-entity", "reqwest", - "scraper", + "scraper 0.17.1", "serde", "serde_json", "serde_repr", @@ -2064,6 +2064,7 @@ dependencies = [ "nanoid", "parking_lot", "protobuf", + "scraper 0.18.0", "serde", "serde_json", "strum_macros 0.21.1", @@ -2071,6 +2072,7 @@ dependencies = [ "tokio-stream", "tracing", "uuid", + "validator", ] [[package]] @@ -5354,6 +5356,22 @@ dependencies = [ "tendril", ] +[[package]] +name = "scraper" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3693f9a0203d49a7ba8f38aa915316b3d535c1862d03dae7009cb71a3408b36a" +dependencies = [ + "ahash 0.8.3", + "cssparser 0.31.2", + "ego-tree", + "getopts", + "html5ever 0.26.0", + "once_cell", + "selectors 0.25.0", + "tendril", +] + [[package]] name = "sct" version = "0.7.0" diff --git a/frontend/rust-lib/Cargo.lock b/frontend/rust-lib/Cargo.lock index 06dd20e105..188eb32f5b 100644 --- a/frontend/rust-lib/Cargo.lock +++ b/frontend/rust-lib/Cargo.lock @@ -668,7 +668,7 @@ dependencies = [ "parking_lot", "realtime-entity", "reqwest", - "scraper", + "scraper 0.17.1", "serde", "serde_json", "serde_repr", @@ -1885,6 +1885,7 @@ dependencies = [ "nanoid", "parking_lot", "protobuf", + "scraper 0.18.0", "serde", "serde_json", "strum_macros 0.21.1", @@ -1894,6 +1895,7 @@ dependencies = [ "tracing", "tracing-subscriber", "uuid", + "validator", ] [[package]] @@ -4701,6 +4703,22 @@ dependencies = [ "tendril", ] +[[package]] +name = "scraper" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3693f9a0203d49a7ba8f38aa915316b3d535c1862d03dae7009cb71a3408b36a" +dependencies = [ + "ahash 0.8.3", + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "once_cell", + "selectors", + "tendril", +] + [[package]] name = "sct" version = "0.7.0" diff --git a/frontend/rust-lib/event-integration/src/document/document_event.rs b/frontend/rust-lib/event-integration/src/document/document_event.rs index 866ce61154..4a2b782f94 100644 --- a/frontend/rust-lib/event-integration/src/document/document_event.rs +++ b/frontend/rust-lib/event-integration/src/document/document_event.rs @@ -5,7 +5,8 @@ use serde_json::Value; use flowy_document2::entities::*; use flowy_document2::event_map::DocumentEvent; use flowy_document2::parser::parser_entities::{ - ConvertDocumentPayloadPB, ConvertDocumentResponsePB, + ConvertDataToJsonPayloadPB, ConvertDataToJsonResponsePB, ConvertDocumentPayloadPB, + ConvertDocumentResponsePB, }; use flowy_folder2::entities::{CreateViewPayloadPB, ViewLayoutPB, ViewPB}; use flowy_folder2::event_map::FolderEvent; @@ -124,6 +125,20 @@ impl DocumentEventTest { .parse::() } + // convert data to json for document event test + pub async fn convert_data_to_json( + &self, + payload: ConvertDataToJsonPayloadPB, + ) -> ConvertDataToJsonResponsePB { + let core = &self.inner; + EventBuilder::new(core.clone()) + .event(DocumentEvent::ConvertDataToJSON) + .payload(payload) + .async_send() + .await + .parse::() + } + pub async fn create_text(&self, payload: TextDeltaPayloadPB) { let core = &self.inner; EventBuilder::new(core.clone()) diff --git a/frontend/rust-lib/event-integration/tests/document/local_test/test.rs b/frontend/rust-lib/event-integration/tests/document/local_test/test.rs index b03320f247..86cea38259 100644 --- a/frontend/rust-lib/event-integration/tests/document/local_test/test.rs +++ b/frontend/rust-lib/event-integration/tests/document/local_test/test.rs @@ -2,7 +2,9 @@ use collab_document::blocks::json_str_to_hashmap; use event_integration::document::document_event::DocumentEventTest; use event_integration::document::utils::*; use flowy_document2::entities::*; -use flowy_document2::parser::parser_entities::{ConvertDocumentPayloadPB, ExportTypePB}; +use flowy_document2::parser::parser_entities::{ + ConvertDataToJsonPayloadPB, ConvertDocumentPayloadPB, InputType, NestedBlock, ParseTypePB, +}; use serde_json::{json, Value}; use std::collections::HashMap; @@ -125,7 +127,7 @@ async fn apply_text_delta_test() { macro_rules! generate_convert_document_test_cases { ($($json:ident, $text:ident, $html:ident),*) => { [ - $((ExportTypePB { json: $json, text: $text, html: $html }, ($json, $text, $html))),* + $((ParseTypePB { json: $json, text: $text, html: $html }, ($json, $text, $html))),* ] }; } @@ -145,7 +147,7 @@ async fn convert_document_test() { let copy_payload = ConvertDocumentPayloadPB { document_id: view.id.to_string(), range: None, - export_types: export_types.clone(), + parse_types: export_types.clone(), }; let result = test.convert_document(copy_payload).await; assert_eq!(result.json.is_some(), *json_assert); @@ -153,3 +155,53 @@ async fn convert_document_test() { assert_eq!(result.html.is_some(), *html_assert); } } + +/// test convert data to json +/// - input html:

Hello

World!

+/// - input plain text: Hello World! +/// - output json: { "type": "page", "data": {}, "children": [{ "type": "paragraph", "children": [], "data": { "delta": [{ "insert": "Hello" }] } }, { "type": "paragraph", "children": [], "data": { "delta": [{ "insert": " World!" }] } }] } +#[tokio::test] +async fn convert_data_to_json_test() { + let test = DocumentEventTest::new().await; + let _ = test.create_document().await; + + let html = r#"

Hello

World!

"#; + let payload = ConvertDataToJsonPayloadPB { + data: html.to_string(), + input_type: InputType::Html, + }; + let result = test.convert_data_to_json(payload).await; + let expect_json = json!({ + "type": "page", + "data": {}, + "children": [{ + "type": "paragraph", + "children": [], + "data": { + "delta": [{ "insert": "Hello" }] + } + }, { + "type": "paragraph", + "children": [], + "data": { + "delta": [{ "insert": "World!" }] + } + }] + }); + + let expect_json = serde_json::from_value::(expect_json).unwrap(); + assert!(serde_json::from_str::(&result.json) + .unwrap() + .eq(&expect_json)); + + let plain_text = "Hello\nWorld!"; + let payload = ConvertDataToJsonPayloadPB { + data: plain_text.to_string(), + input_type: InputType::PlainText, + }; + let result = test.convert_data_to_json(payload).await; + + assert!(serde_json::from_str::(&result.json) + .unwrap() + .eq(&expect_json)); +} diff --git a/frontend/rust-lib/flowy-document2/Cargo.toml b/frontend/rust-lib/flowy-document2/Cargo.toml index 1bf274ad6d..332176aed4 100644 --- a/frontend/rust-lib/flowy-document2/Cargo.toml +++ b/frontend/rust-lib/flowy-document2/Cargo.toml @@ -18,7 +18,7 @@ flowy-notification = { workspace = true } flowy-error = { path = "../flowy-error", features = ["impl_from_serde", "impl_from_sqlite", "impl_from_dispatch_error", "impl_from_collab"] } lib-dispatch = { workspace = true } lib-infra = { path = "../../../shared-lib/lib-infra" } - +validator = "0.16.0" protobuf = {version = "2.28.0"} bytes = { version = "1.5" } nanoid = "0.4.0" @@ -33,6 +33,7 @@ indexmap = {version = "1.9.2", features = ["serde"]} uuid = { version = "1.3.3", features = ["v4"] } futures = "0.3.26" tokio-stream = { version = "0.1.14", features = ["sync"] } +scraper = "0.18.0" [dev-dependencies] tempfile = "3.4.0" diff --git a/frontend/rust-lib/flowy-document2/src/entities.rs b/frontend/rust-lib/flowy-document2/src/entities.rs index 52efbed21b..8e3d68ef6d 100644 --- a/frontend/rust-lib/flowy-document2/src/entities.rs +++ b/frontend/rust-lib/flowy-document2/src/entities.rs @@ -319,6 +319,7 @@ pub struct ExportDataPB { #[pb(index = 2)] pub export_type: ExportType, } + #[derive(PartialEq, Eq, Debug, ProtoBuf_Enum, Clone, Default)] pub enum ConvertType { #[default] @@ -337,6 +338,7 @@ impl From for ConvertType { } } +/// for convert data to document /// for the json type /// the data is the json string #[derive(Default, ProtoBuf, Debug)] diff --git a/frontend/rust-lib/flowy-document2/src/event_handler.rs b/frontend/rust-lib/flowy-document2/src/event_handler.rs index a576caf697..4f1d3bb700 100644 --- a/frontend/rust-lib/flowy-document2/src/event_handler.rs +++ b/frontend/rust-lib/flowy-document2/src/event_handler.rs @@ -12,14 +12,18 @@ use collab_document::blocks::{ }; use flowy_error::{FlowyError, FlowyResult}; -use lib_dispatch::prelude::{data_result_ok, AFPluginData, AFPluginState, DataResult}; +use lib_dispatch::prelude::{ + data_result_ok, AFPluginData, AFPluginDataValidator, AFPluginState, DataResult, +}; use crate::entities::*; use crate::parser::document_data_parser::DocumentDataParser; use crate::parser::parser_entities::{ + ConvertDataToJsonParams, ConvertDataToJsonPayloadPB, ConvertDataToJsonResponsePB, ConvertDocumentParams, ConvertDocumentPayloadPB, ConvertDocumentResponsePB, }; +use crate::parser::external::parser::ExternalDataToNestedJSONParser; use crate::{manager::DocumentManager, parser::json::parser::JsonToDocumentParser}; fn upgrade_document( @@ -309,16 +313,46 @@ impl From<(&Vec, bool)> for DocEventPB { } } -/** -* Handler for converting a document to a JSON string, HTML string, or plain text string. - -* @param data: AFPluginData<[ConvertDocumentPayloadPB]> - -* @param manager: AFPluginState> - -* @return DataResult<[ConvertDocumentResponsePB], FlowyError> - */ -pub async fn convert_document( +/// Handler for converting a document to a JSON string, HTML string, or plain text string. +/// +/// ConvertDocumentPayloadPB is the input of this event. +/// ConvertDocumentResponsePB is the output of this event. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ```txt +/// // document: [{ "block_id": "1", "type": "paragraph", "data": {"delta": [{ "insert": "Hello World!" }] } }, { "block_id": "2", "type": "paragraph", "data": {"delta": [{ "insert": "Hello World!" }] } +/// let test = DocumentEventTest::new().await; +/// let view = test.create_document().await; +/// let payload = ConvertDocumentPayloadPB { +/// document_id: view.id, +/// range: Some(RangePB { +/// start: SelectionPB { +/// block_id: "1".to_string(), +/// index: 0, +/// length: 5, +/// }, +/// end: SelectionPB { +/// block_id: "2".to_string(), +/// index: 5, +/// length: 7, +/// } +/// }), +/// parse_types: ParseTypePB { +/// json: true, +/// text: true, +/// html: true, +/// }, +/// }; +/// let result = test.convert_document(payload).await; +/// assert_eq!(result.json, Some("[{ \"block_id\": \"1\", \"type\": \"paragraph\", \"data\": {\"delta\": [{ \"insert\": \"Hello\" }] } }, { \"block_id\": \"2\", \"type\": \"paragraph\", \"data\": {\"delta\": [{ \"insert\": \" World!\" }] } }".to_string())); +/// assert_eq!(result.text, Some("Hello\n World!".to_string())); +/// assert_eq!(result.html, Some("

Hello

World!

".to_string())); +/// ``` +/// # +pub async fn convert_document_handler( data: AFPluginData, manager: AFPluginState>, ) -> DataResult { @@ -329,7 +363,7 @@ pub async fn convert_document( let document_data = document.lock().get_document_data()?; let parser = DocumentDataParser::new(Arc::new(document_data), params.range); - if !params.export_types.any_enabled() { + if !params.parse_types.any_enabled() { return data_result_ok(ConvertDocumentResponsePB::default()); } @@ -337,16 +371,43 @@ pub async fn convert_document( data_result_ok(ConvertDocumentResponsePB { json: params - .export_types + .parse_types .json .then(|| serde_json::to_string(root).unwrap_or_default()), html: params - .export_types + .parse_types .html .then(|| parser.to_html_with_json(root)), text: params - .export_types + .parse_types .text .then(|| parser.to_text_with_json(root)), }) } + +/// Handler for converting a string to a JSON string. +/// # Examples +/// Basic usage: +/// ```txt +/// let test = DocumentEventTest::new().await; +/// let payload = ConvertDataToJsonPayloadPB { +/// data: "

Hello

World!

".to_string(), +/// input_type: InputTypePB::Html, +/// }; +/// let result: ConvertDataToJsonResponsePB = test.convert_data_to_json(payload).await; +/// let expect_json = json!({ "type": "page", "data": {}, "children": [{ "type": "paragraph", "children": [], "data": { "delta": [{ "insert": "Hello" }] } }, { "type": "paragraph", "children": [], "data": { "delta": [{ "insert": " World!" }] } }] }); +/// assert!(serde_json::from_str::(&result.json).unwrap().eq(&serde_json::from_value::(expect_json).unwrap())); +/// ``` +pub(crate) async fn convert_data_to_json_handler( + data: AFPluginData, +) -> DataResult { + let payload: ConvertDataToJsonParams = data.validate()?.into_inner().try_into()?; + let parser = ExternalDataToNestedJSONParser::new(payload.data, payload.input_type); + + let result = match parser.to_nested_block() { + Some(result) => serde_json::to_string(&result)?, + None => "".to_string(), + }; + + data_result_ok(ConvertDataToJsonResponsePB { json: result }) +} diff --git a/frontend/rust-lib/flowy-document2/src/event_map.rs b/frontend/rust-lib/flowy-document2/src/event_map.rs index e7c4dcd13f..a43967aa14 100644 --- a/frontend/rust-lib/flowy-document2/src/event_map.rs +++ b/frontend/rust-lib/flowy-document2/src/event_map.rs @@ -5,7 +5,6 @@ use strum_macros::Display; use flowy_derive::{Flowy_Event, ProtoBuf_Enum}; use lib_dispatch::prelude::AFPlugin; -use crate::event_handler::convert_document; use crate::event_handler::get_snapshot_handler; use crate::{event_handler::*, manager::DocumentManager}; @@ -28,7 +27,11 @@ pub fn init(document_manager: Weak) -> AFPlugin { .event(DocumentEvent::GetDocumentSnapshots, get_snapshot_handler) .event(DocumentEvent::CreateText, create_text_handler) .event(DocumentEvent::ApplyTextDeltaEvent, apply_text_delta_handler) - .event(DocumentEvent::ConvertDocument, convert_document) + .event(DocumentEvent::ConvertDocument, convert_document_handler) + .event( + DocumentEvent::ConvertDataToJSON, + convert_data_to_json_handler, + ) } #[derive(Debug, Clone, PartialEq, Eq, Hash, Display, ProtoBuf_Enum, Flowy_Event)] @@ -79,48 +82,17 @@ pub enum DocumentEvent { #[event(input = "TextDeltaPayloadPB")] ApplyTextDeltaEvent = 11, - /// Handler for converting a document to a JSON string, HTML string, or plain text string. - /// - /// ConvertDocumentPayloadPB is the input of this event. - /// ConvertDocumentResponsePB is the output of this event. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ```txt - /// // document: [{ "block_id": "1", "type": "paragraph", "data": {"delta": [{ "insert": "Hello World!" }] } }, { "block_id": "2", "type": "paragraph", "data": {"delta": [{ "insert": "Hello World!" }] } - /// let test = DocumentEventTest::new().await; - /// let view = test.create_document().await; - /// let payload = ConvertDocumentPayloadPB { - /// document_id: view.id, - /// range: Some(RangePB { - /// start: SelectionPB { - /// block_id: "1".to_string(), - /// index: 0, - /// length: 5, - /// }, - /// end: SelectionPB { - /// block_id: "2".to_string(), - /// index: 5, - /// length: 7, - /// } - /// }), - /// export_types: ConvertTypePB { - /// json: true, - /// text: true, - /// html: true, - /// }, - /// }; - /// let result = test.convert_document(payload).await; - /// assert_eq!(result.json, Some("[{ \"block_id\": \"1\", \"type\": \"paragraph\", \"data\": {\"delta\": [{ \"insert\": \"Hello\" }] } }, { \"block_id\": \"2\", \"type\": \"paragraph\", \"data\": {\"delta\": [{ \"insert\": \" World!\" }] } }".to_string())); - /// assert_eq!(result.text, Some("Hello\n World!".to_string())); - /// assert_eq!(result.html, Some("

Hello

World!

".to_string())); - /// ``` - /// # + // document in event_handler.rs -> convert_document #[event( input = "ConvertDocumentPayloadPB", output = "ConvertDocumentResponsePB" )] ConvertDocument = 12, + + // document in event_handler.rs -> convert_data_to_json + #[event( + input = "ConvertDataToJsonPayloadPB", + output = "ConvertDataToJsonResponsePB" + )] + ConvertDataToJSON = 13, } diff --git a/frontend/rust-lib/flowy-document2/src/parser/constant.rs b/frontend/rust-lib/flowy-document2/src/parser/constant.rs index d5c4d56e6b..c13722fcd3 100644 --- a/frontend/rust-lib/flowy-document2/src/parser/constant.rs +++ b/frontend/rust-lib/flowy-document2/src/parser/constant.rs @@ -32,6 +32,92 @@ pub const CODE: &str = "code"; pub const UNDERLINE: &str = "underline"; pub const FONT_COLOR: &str = "font_color"; pub const BG_COLOR: &str = "bg_color"; -pub const HREF: &str = "href"; + pub const FORMULA: &str = "formula"; pub const MENTION: &str = "mention"; + +pub const TEXT_DIRECTION: &str = "text_direction"; + +pub const HTML_TAG_NAME: &str = "html"; +pub const HR_TAG_NAME: &str = "hr"; +pub const META_TAG_NAME: &str = "meta"; +pub const LINK_TAG_NAME: &str = "link"; +pub const SCRIPT_TAG_NAME: &str = "script"; +pub const STYLE_TAG_NAME: &str = "style"; +pub const IFRAME_TAG_NAME: &str = "iframe"; +pub const NOSCRIPT_TAG_NAME: &str = "noscript"; +pub const HEAD_TAG_NAME: &str = "head"; +pub const H1_TAG_NAME: &str = "h1"; +pub const H2_TAG_NAME: &str = "h2"; +pub const H3_TAG_NAME: &str = "h3"; +pub const H4_TAG_NAME: &str = "h4"; +pub const H5_TAG_NAME: &str = "h5"; +pub const H6_TAG_NAME: &str = "h6"; +pub const P_TAG_NAME: &str = "p"; +pub const ASIDE_TAG_NAME: &str = "aside"; +pub const ARTICLE_TAG_NAME: &str = "article"; +pub const UL_TAG_NAME: &str = "ul"; +pub const OL_TAG_NAME: &str = "ol"; +pub const LI_TAG_NAME: &str = "li"; +pub const BLOCKQUOTE_TAG_NAME: &str = "blockquote"; +pub const PRE_TAG_NAME: &str = "pre"; +pub const IMG_TAG_NAME: &str = "img"; +pub const B_TAG_NAME: &str = "b"; +pub const CODE_TAG_NAME: &str = "code"; +pub const STRONG_TAG_NAME: &str = "strong"; +pub const EM_TAG_NAME: &str = "em"; +pub const U_TAG_NAME: &str = "u"; +pub const S_TAG_NAME: &str = "s"; +pub const SPAN_TAG_NAME: &str = "span"; +pub const BR_TAG_NAME: &str = "br"; + +pub const A_TAG_NAME: &str = "a"; +pub const BASE_TAG_NAME: &str = "base"; +pub const ABBR_TAG_NAME: &str = "abbr"; +pub const ADDRESS_TAG_NAME: &str = "address"; +pub const DBO_TAG_NAME: &str = "bdo"; +pub const DIR_ATTR_NAME: &str = "dir"; + +pub const RTL_ATTR_VALUE: &str = "rtl"; + +pub const CITE_TAG_NAME: &str = "cite"; + +pub const DEL_TAG_NAME: &str = "del"; + +pub const DETAILS_TAG_NAME: &str = "details"; + +pub const SUMMARY_TAG_NAME: &str = "summary"; + +pub const DFN_TAG_NAME: &str = "dfn"; + +pub const DL_TAG_NAME: &str = "dl"; + +pub const I_TAG_NAME: &str = "i"; +pub const VAR_TAG_NAME: &str = "var"; + +pub const INS_TAG_NAME: &str = "ins"; +pub const MENU_TAG_NAME: &str = "menu"; + +pub const MARK_TAG_NAME: &str = "mark"; + +pub const FONT_WEIGHT: &str = "font-weight"; +pub const FONT_STYLE: &str = "font-style"; +pub const TEXT_DECORATION: &str = "text-decoration"; + +pub const BACKGROUND_COLOR: &str = "background-color"; +pub const COLOR: &str = "color"; +pub const LINE_THROUGH: &str = "line-through"; + +pub const FONT_STYLE_ITALIC: &str = "font-style: italic;"; +pub const TEXT_DECORATION_UNDERLINE: &str = "text-decoration: underline;"; +pub const TEXT_DECORATION_LINE_THROUGH: &str = "text-decoration: line-through;"; +pub const FONT_WEIGHT_BOLD: &str = "font-weight: bold;"; +pub const FONT_FAMILY_FANTASY: &str = "font-family: fantasy;"; + +pub const SRC: &str = "src"; +pub const HREF: &str = "href"; +pub const ROLE: &str = "role"; +pub const CHECKBOX: &str = "checkbox"; +pub const ARIA_CHECKED: &str = "aria-checked"; +pub const CLASS: &str = "class"; +pub const STYLE: &str = "style"; diff --git a/frontend/rust-lib/flowy-document2/src/parser/document_data_parser.rs b/frontend/rust-lib/flowy-document2/src/parser/document_data_parser.rs index 5339c7eff3..d92857f7b7 100644 --- a/frontend/rust-lib/flowy-document2/src/parser/document_data_parser.rs +++ b/frontend/rust-lib/flowy-document2/src/parser/document_data_parser.rs @@ -1,10 +1,7 @@ -use crate::parser::parser_entities::{ConvertBlockToHtmlParams, NestedBlock, Range}; -use crate::parser::utils::{ - block_to_nested_json, get_delta_for_block, get_delta_for_selection, get_flat_block_ids, - ConvertBlockToJsonParams, -}; +use crate::parser::constant::DELTA; +use crate::parser::parser_entities::{ConvertBlockToHtmlParams, InsertDelta, NestedBlock, Range}; +use crate::parser::utils::{get_delta_for_block, get_delta_for_selection}; use collab_document::blocks::DocumentData; -use std::collections::HashMap; use std::sync::Arc; /// DocumentDataParser is a struct for parsing a document's data and converting it to JSON, HTML, or text. @@ -61,120 +58,94 @@ impl DocumentDataParser { /// Converts the document data to a nested JSON structure, considering the optional range. pub fn to_json(&self) -> Option { let root_id = &self.document_data.page_id; - // flatten the block id list. - let block_id_list = get_flat_block_ids(root_id, &self.document_data); - - // collect the block ids in the range. - let mut in_range_block_ids = self.collect_in_range_block_ids(&block_id_list); - // insert the root block id if it is not in the in-range block ids. - if !in_range_block_ids.contains(root_id) { - in_range_block_ids.push(root_id.to_string()); - } - - // build the parameters for converting the block to JSON with the in-range block ids. - let convert_params = self.build_convert_json_params(&in_range_block_ids); - // convert the root block to JSON. - let mut root = block_to_nested_json(root_id, &convert_params)?; - - // If the start block's parent is outside the in-range selection, we need to insert the start block. - if self.should_insert_start_block() { - self.insert_start_block_json(&mut root, &convert_params); - } - - Some(root) + let mut children = vec![]; + let mut start_found = false; + let mut end_found = false; + self.block_to_nested_block(root_id, &mut children, &mut start_found, &mut end_found) } - /// Collects the block ids in the range. - fn collect_in_range_block_ids(&self, block_id_list: &Vec) -> Vec { - if let Some(range) = &self.range { - // Find the positions of start and end block IDs in the list - let mut start_index = block_id_list - .iter() - .position(|id| id == &range.start.block_id) - .unwrap_or(0); - let mut end_index = block_id_list - .iter() - .position(|id| id == &range.end.block_id) - .unwrap_or(0); - - if start_index > end_index { - // Swap start and end if they are in reverse order - std::mem::swap(&mut start_index, &mut end_index); - } - - // Slice the block IDs based on the positions of start and end - block_id_list[start_index..=end_index].to_vec() - } else { - // If no range is specified, return the entire list - block_id_list.to_owned() - } - } - - /// Builds the parameters for converting the block to JSON. - /// ConvertBlockToJsonParams format: - /// { - /// blocks: HashMap>, // in-range blocks - /// relation_map: HashMap>>, // in-range blocks' children - /// delta_map: HashMap, // in-range blocks' delta - /// } - fn build_convert_json_params(&self, block_id_list: &[String]) -> ConvertBlockToJsonParams { - let mut delta_map = HashMap::new(); - let mut in_range_blocks = HashMap::new(); - let mut relation_map = HashMap::new(); - - for block_id in block_id_list { - if let Some(block) = self.document_data.blocks.get(block_id) { - // Insert the block into the in-range block map. - in_range_blocks.insert(block_id.to_string(), Arc::new(block.to_owned())); - - // If the block has children, insert the children into the relation map. - if let Some(children) = self.document_data.meta.children_map.get(&block.children) { - relation_map.insert(block_id.to_string(), Arc::new(children.to_owned())); - } - - let delta = match &self.range { - Some(range) if block_id == &range.start.block_id => { - get_delta_for_selection(&range.start, &self.document_data) - }, - Some(range) if block_id == &range.end.block_id => { - get_delta_for_selection(&range.end, &self.document_data) - }, - _ => get_delta_for_block(block_id, &self.document_data), - }; - - // If the delta exists, insert it into the delta map. - if let Some(delta) = delta { - delta_map.insert(block_id.to_string(), delta); - } - } - } - - ConvertBlockToJsonParams { - blocks: in_range_blocks, - relation_map, - delta_map, - } - } - - // Checks if the start block should be inserted whether the start block's parent is outside the in-range selection. - fn should_insert_start_block(&self) -> bool { - if let Some(range) = &self.range { - if let Some(start_block) = self.document_data.blocks.get(&range.start.block_id) { - return start_block.parent != self.document_data.page_id; - } - } - false - } - - // Inserts the start block JSON to the root JSON. - fn insert_start_block_json( + fn block_to_nested_block( &self, - root: &mut NestedBlock, - convert_params: &ConvertBlockToJsonParams, + block_id: &str, + children: &mut Vec, + start_found: &mut bool, + end_found: &mut bool, + ) -> Option { + let block = self.document_data.blocks.get(block_id)?; + let delta = self.get_delta(block_id); + + // Prepare the data, including delta if available + let mut data = block.data.clone(); + if let Some(delta) = delta { + if let Ok(delta_value) = serde_json::to_value(delta) { + data.insert(DELTA.to_string(), delta_value); + } + } + + // Get the child IDs for the current block + if let Some(block_children_ids) = self.document_data.meta.children_map.get(&block.children) { + for child_id in block_children_ids { + if let Some(range) = &self.range { + if child_id == &range.start.block_id { + *start_found = true; + } + + if child_id == &range.end.block_id { + *end_found = true; + // Process the "end" block recursively + self.process_child_block(child_id, children, start_found, end_found); + break; + } + } + + if self.range.is_some() { + if !*start_found { + // Don't insert children before the "start" block is found + self.block_to_nested_block(child_id, children, start_found, end_found); + continue; + } + if *end_found { + // Stop inserting children after the "end" block is found + break; + } + } + + // Process child blocks recursively + self.process_child_block(child_id, children, start_found, end_found); + } + } + + Some(NestedBlock { + ty: block.ty.clone(), + children: children.to_owned(), + data, + }) + } + + fn get_delta(&self, block_id: &str) -> Option> { + match &self.range { + Some(range) if block_id == range.start.block_id => { + get_delta_for_selection(&range.start, &self.document_data) + }, + Some(range) if block_id == range.end.block_id => { + get_delta_for_selection(&range.end, &self.document_data) + }, + _ => get_delta_for_block(block_id, &self.document_data), + } + } + + fn process_child_block( + &self, + child_id: &str, + children: &mut Vec, + start_found: &mut bool, + end_found: &mut bool, ) { - let start = &self.range.as_ref().unwrap().start; - if let Some(start_block_json) = block_to_nested_json(&start.block_id, convert_params) { - root.children.insert(0, start_block_json); + let mut child_children = vec![]; + if let Some(child) = + self.block_to_nested_block(child_id, &mut child_children, start_found, end_found) + { + children.push(child); } } } diff --git a/frontend/rust-lib/flowy-document2/src/parser/external/mod.rs b/frontend/rust-lib/flowy-document2/src/parser/external/mod.rs new file mode 100644 index 0000000000..8a43408ba1 --- /dev/null +++ b/frontend/rust-lib/flowy-document2/src/parser/external/mod.rs @@ -0,0 +1,2 @@ +pub mod parser; +mod utils; diff --git a/frontend/rust-lib/flowy-document2/src/parser/external/parser.rs b/frontend/rust-lib/flowy-document2/src/parser/external/parser.rs new file mode 100644 index 0000000000..4bc3618744 --- /dev/null +++ b/frontend/rust-lib/flowy-document2/src/parser/external/parser.rs @@ -0,0 +1,40 @@ +use crate::parser::external::utils::{flatten_element_to_block, parse_plaintext_to_nested_block}; +use crate::parser::parser_entities::{InputType, NestedBlock}; +use scraper::Html; + +/// External data to nested json parser. +#[derive(Debug, Clone, Default)] +pub struct ExternalDataToNestedJSONParser { + /// External data. for example: html string, plain text string. + external_data: String, + /// External data type. for example: [InputType]::Html, [InputType]::PlainText. + input_type: InputType, +} + +impl ExternalDataToNestedJSONParser { + pub fn new(data: String, input_type: InputType) -> Self { + Self { + external_data: data, + input_type, + } + } + + /// Format to nested block. + /// + /// Example: + /// - input html:

Hello

World!

+ /// - output json: + /// ```json + /// { "type": "page", "data": {}, "children": [{ "type": "paragraph", "children": [], "data": { "delta": [{ "insert": "Hello", attributes: { "bold": true } }] } }, { "type": "paragraph", "children": [], "data": { "delta": [{ "insert": " World!", attributes: null }] } }] } + /// ``` + pub fn to_nested_block(&self) -> Option { + match self.input_type { + InputType::Html => { + let fragment = Html::parse_fragment(&self.external_data); + let root_element = fragment.root_element(); + flatten_element_to_block(root_element) + }, + InputType::PlainText => parse_plaintext_to_nested_block(&self.external_data), + } + } +} diff --git a/frontend/rust-lib/flowy-document2/src/parser/external/utils.rs b/frontend/rust-lib/flowy-document2/src/parser/external/utils.rs new file mode 100644 index 0000000000..d170706cd3 --- /dev/null +++ b/frontend/rust-lib/flowy-document2/src/parser/external/utils.rs @@ -0,0 +1,559 @@ +use crate::parser::constant::*; +use crate::parser::parser_entities::{InsertDelta, NestedBlock}; +use scraper::node::Attrs; +use scraper::ElementRef; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use std::collections::HashMap; + +const INLINE_TAGS: [&str; 18] = [ + A_TAG_NAME, + EM_TAG_NAME, + STRONG_TAG_NAME, + U_TAG_NAME, + S_TAG_NAME, + CODE_TAG_NAME, + SPAN_TAG_NAME, + ADDRESS_TAG_NAME, + BASE_TAG_NAME, + CITE_TAG_NAME, + DFN_TAG_NAME, + I_TAG_NAME, + VAR_TAG_NAME, + ABBR_TAG_NAME, + INS_TAG_NAME, + DEL_TAG_NAME, + MARK_TAG_NAME, + "", +]; + +const LINK_TAGS: [&str; 2] = [A_TAG_NAME, BASE_TAG_NAME]; +const ITALIC_TAGS: [&str; 6] = [ + EM_TAG_NAME, + I_TAG_NAME, + VAR_TAG_NAME, + CITE_TAG_NAME, + DFN_TAG_NAME, + ADDRESS_TAG_NAME, +]; + +const BOLD_TAGS: [&str; 2] = [STRONG_TAG_NAME, B_TAG_NAME]; + +const UNDERLINE_TAGS: [&str; 3] = [U_TAG_NAME, ABBR_TAG_NAME, INS_TAG_NAME]; +const STRIKETHROUGH_TAGS: [&str; 2] = [S_TAG_NAME, DEL_TAG_NAME]; +const IGNORE_TAGS: [&str; 7] = [ + META_TAG_NAME, + HEAD_TAG_NAME, + LINK_TAG_NAME, + SCRIPT_TAG_NAME, + STYLE_TAG_NAME, + NOSCRIPT_TAG_NAME, + IFRAME_TAG_NAME, +]; + +const HEADING_TAGS: [&str; 6] = [ + H1_TAG_NAME, + H2_TAG_NAME, + H3_TAG_NAME, + H4_TAG_NAME, + H5_TAG_NAME, + H6_TAG_NAME, +]; + +const SHOULD_EXPAND_TAGS: [&str; 4] = [UL_TAG_NAME, OL_TAG_NAME, DL_TAG_NAME, MENU_TAG_NAME]; + +#[derive(Debug, Serialize, Deserialize)] +pub enum JSONResult { + Block(NestedBlock), + Delta(InsertDelta), + BlockArray(Vec), + DeltaArray(Vec), +} + +/// Flatten element to block +pub fn flatten_element_to_block(node: ElementRef) -> Option { + if let Some(JSONResult::Block(block)) = flatten_element_to_json(node, &None, &None) { + return Some(block); + } + + None +} + +/// Parse plaintext to nested block +pub fn parse_plaintext_to_nested_block(plaintext: &str) -> Option { + let lines: Vec<&str> = plaintext + .lines() + .filter(|line| !line.trim().is_empty()) + .collect(); + let mut current_block = NestedBlock { + ty: PAGE.to_string(), + ..Default::default() + }; + + for line in lines { + let mut data = HashMap::new(); + + // Insert plaintext into delta + if let Ok(delta) = serde_json::to_value(vec![InsertDelta { + insert: line.to_string(), + attributes: None, + }]) { + data.insert(DELTA.to_string(), delta); + } + + // Create a new block for each non-empty line + current_block.children.push(NestedBlock { + ty: PARAGRAPH.to_string(), + data, + children: Default::default(), + }); + } + + if current_block.children.is_empty() { + return None; + } + Some(current_block) +} + +fn flatten_element_to_json( + node: ElementRef, + list_type: &Option, + attributes: &Option>, +) -> Option { + let tag_name = get_tag_name(node.to_owned()); + + if IGNORE_TAGS.contains(&tag_name.as_str()) { + return None; + } + + if INLINE_TAGS.contains(&tag_name.as_str()) { + return process_inline_element(node, attributes.to_owned()); + } + + let mut data = HashMap::new(); + // insert dir into attrs when dir is rtl + // for example: Right to left -> { "attributes": { "text_direction": "rtl" }, "insert": "Right to left" } + if let Some(dir) = find_attribute_value(node.to_owned(), DIR_ATTR_NAME) { + data.insert(TEXT_DIRECTION.to_string(), Value::String(dir)); + } + + if HEADING_TAGS.contains(&tag_name.as_str()) { + return process_heading_element(node, data); + } + + if SHOULD_EXPAND_TAGS.contains(&tag_name.as_str()) { + return process_nested_element(node); + } + + match tag_name.as_str() { + LI_TAG_NAME => process_li_element(node, list_type.to_owned(), data), + BLOCKQUOTE_TAG_NAME | DETAILS_TAG_NAME => { + process_node_summary_and_details(QUOTE.to_string(), node, data) + }, + PRE_TAG_NAME => process_code_element(node), + IMG_TAG_NAME => process_image_element(node), + B_TAG_NAME => { + // Compatible with Google Docs, is the document top level tag, so we need to process it's children + let id = find_attribute_value(node.to_owned(), "id"); + if id.is_some() { + return process_nested_element(node); + } + process_inline_element(node, attributes.to_owned()) + }, + + _ => process_default_element(node, data), + } +} + +fn process_default_element( + node: ElementRef, + mut data: HashMap, +) -> Option { + let tag_name = get_tag_name(node.to_owned()); + + let ty = match tag_name.as_str() { + HTML_TAG_NAME => PAGE, + P_TAG_NAME => PARAGRAPH, + ASIDE_TAG_NAME | ARTICLE_TAG_NAME => CALLOUT, + HR_TAG_NAME => DIVIDER, + _ => PARAGRAPH, + }; + + let (delta, children) = process_node_children(node, &None, None); + + if !delta.is_empty() { + data.insert(DELTA.to_string(), delta_to_json(&delta)); + } + Some(JSONResult::Block(NestedBlock { + ty: ty.to_string(), + children, + data, + })) +} + +fn process_image_element(node: ElementRef) -> Option { + let mut data = HashMap::new(); + if let Some(src) = find_attribute_value(node, SRC) { + data.insert(URL.to_string(), Value::String(src)); + } + Some(JSONResult::Block(NestedBlock { + ty: IMAGE.to_string(), + children: Default::default(), + data, + })) +} + +fn process_code_element(node: ElementRef) -> Option { + let mut data = HashMap::new(); + + // find code element and get language and delta, then insert into data + if let Some(code_child) = find_child_node(node.to_owned(), CODE_TAG_NAME.to_string()) { + // get language + if let Some(class) = find_attribute_value(code_child.to_owned(), CLASS) { + let lang = class.split('-').last().unwrap_or_default(); + data.insert(LANGUAGE.to_string(), Value::String(lang.to_string())); + } + // get delta + let text = code_child.text().collect::(); + if let Ok(delta) = serde_json::to_value(vec![InsertDelta { + insert: text, + attributes: None, + }]) { + data.insert(DELTA.to_string(), delta); + } + } + + Some(JSONResult::Block(NestedBlock { + ty: CODE.to_string(), + children: Default::default(), + data, + })) +} + +// process "ul" | "ol" | "dl" | "menu" element +fn process_nested_element(node: ElementRef) -> Option { + let tag_name = get_tag_name(node.to_owned()); + + let ty = match tag_name.as_str() { + UL_TAG_NAME => BULLETED_LIST, + OL_TAG_NAME => NUMBERED_LIST, + _ => PARAGRAPH, + }; + let (_, children) = process_node_children(node, &Some(ty.to_string()), None); + Some(JSONResult::BlockArray(children)) +} + +// process
  • element, if it's a checkbox, then return a todo list, otherwise return a normal list. +fn process_li_element( + node: ElementRef, + list_type: Option, + mut data: HashMap, +) -> Option { + let mut ty = list_type.unwrap_or(BULLETED_LIST.to_string()); + if let Some(role) = find_attribute_value(node.to_owned(), ROLE) { + if role == CHECKBOX { + if let Some(checked_attr) = find_attribute_value(node.to_owned(), ARIA_CHECKED) { + let checked = match checked_attr.as_str() { + "true" => true, + "false" => false, + _ => false, + }; + data.insert( + CHECKED.to_string(), + serde_json::to_value(checked).unwrap_or_default(), + ); + } + data.insert( + CHECKED.to_string(), + serde_json::to_value(false).unwrap_or_default(), + ); + ty = TODO_LIST.to_string(); + } + } + process_node_summary_and_details(ty, node, data) +} + +// Process children and handle potential nesting +//
  • +//

    title

    +//

    content

    +//
  • +// Or Process children and handle potential consecutive arrangement +//
  • title

    content

  • +// li | blockquote | details +fn process_node_summary_and_details( + ty: String, + node: ElementRef, + mut data: HashMap, +) -> Option { + let (delta, children) = process_node_children(node, &Some(ty.to_string()), None); + if delta.is_empty() { + if let Some(first_child) = children.first() { + let mut data = HashMap::new(); + if let Some(first_child_delta) = first_child.data.get(DELTA) { + data.insert(DELTA.to_string(), first_child_delta.to_owned()); + let rest_children = children.iter().skip(1).cloned().collect(); + return Some(JSONResult::Block(NestedBlock { + ty, + children: rest_children, + data, + })); + } + } + } else { + data.insert(DELTA.to_string(), delta_to_json(&delta)); + } + Some(JSONResult::Block(NestedBlock { + ty, + children, + data: data.to_owned(), + })) +} + +fn process_heading_element( + node: ElementRef, + mut data: HashMap, +) -> Option { + let tag_name = get_tag_name(node.to_owned()); + let level = match tag_name.chars().last().unwrap_or_default() { + '1' => 1, + '2' => 2, + // default to h3 even if it's h4, h5, h6 + _ => 3, + }; + + data.insert( + LEVEL.to_string(), + serde_json::to_value(level).unwrap_or_default(), + ); + + let (delta, children) = process_node_children(node, &None, None); + if !delta.is_empty() { + data.insert( + DELTA.to_string(), + serde_json::to_value(delta).unwrap_or_default(), + ); + } + + Some(JSONResult::Block(NestedBlock { + ty: HEADING.to_string(), + children, + data, + })) +} + +// process
    +fn process_inline_element( + node: ElementRef, + attributes: Option>, +) -> Option { + let tag_name = get_tag_name(node.to_owned()); + + let attributes = get_delta_attributes_for(&tag_name, &get_node_attrs(node), attributes); + let (delta, children) = process_node_children(node, &None, attributes); + Some(if !delta.is_empty() { + JSONResult::DeltaArray(delta) + } else { + JSONResult::BlockArray(children) + }) +} + +fn process_node_children( + node: ElementRef, + list_type: &Option, + attributes: Option>, +) -> (Vec, Vec) { + let tag_name = get_tag_name(node.to_owned()); + let mut delta = Vec::new(); + let mut children = Vec::new(); + + for child in node.children() { + if let Some(child_element) = ElementRef::wrap(child) { + if let Some(child_json) = flatten_element_to_json(child_element, list_type, &attributes) { + match child_json { + JSONResult::Delta(op) => delta.push(op), + JSONResult::Block(block) => children.push(block), + JSONResult::BlockArray(blocks) => children.extend(blocks), + JSONResult::DeltaArray(ops) => delta.extend(ops), + } + } + } else { + // put text into delta while child is a text node + let text = child + .value() + .as_text() + .map(|text| text.text.to_string()) + .unwrap_or_default(); + + if let Some(op) = node_to_delta(&tag_name, text, &mut get_node_attrs(node), &attributes) { + delta.push(op); + } + } + } + + (delta, children) +} + +// get attributes from style +// for example: style="font-weight: bold; font-style: italic; text-decoration: underline; text-decoration: line-through;" +fn get_attributes_with_style(style: &str) -> HashMap { + let mut attributes = HashMap::new(); + + for property in style.split(';') { + let parts: Vec<&str> = property.split(':').map(|s| s.trim()).collect::>(); + + if parts.len() != 2 { + continue; + } + + let (key, value) = (parts[0], parts[1]); + + match key { + FONT_WEIGHT if value.contains(BOLD) => { + attributes.insert(BOLD.to_string(), Value::Bool(true)); + }, + FONT_STYLE if value.contains(ITALIC) => { + attributes.insert(ITALIC.to_string(), Value::Bool(true)); + }, + TEXT_DECORATION if value.contains(UNDERLINE) => { + attributes.insert(UNDERLINE.to_string(), Value::Bool(true)); + }, + TEXT_DECORATION if value.contains(LINE_THROUGH) => { + attributes.insert(STRIKETHROUGH.to_string(), Value::Bool(true)); + }, + BACKGROUND_COLOR => { + attributes.insert(BG_COLOR.to_string(), Value::String(value.to_string())); + }, + COLOR => { + attributes.insert(FONT_COLOR.to_string(), Value::String(value.to_string())); + }, + _ => {}, + } + } + + attributes +} + +// get attributes from tag name +// input
    Google +// export attributes: { "href": "https://www.google.com" } +// input Italic +// export attributes: { "italic": true } +// input Bold +// export attributes: { "bold": true } +// input Underline +// export attributes: { "underline": true } +// input Strikethrough +// export attributes: { "strikethrough": true } +// input Code +// export attributes: { "code": true } +fn get_delta_attributes_for( + tag_name: &str, + attrs: &Attrs, + parent_attributes: Option>, +) -> Option> { + let href = find_attribute_value_from_attrs(attrs, HREF); + + let style = find_attribute_value_from_attrs(attrs, STYLE); + + let mut attributes = get_attributes_with_style(&style); + if let Some(parent_attributes) = parent_attributes { + parent_attributes.iter().for_each(|(k, v)| { + attributes.insert(k.to_string(), v.clone()); + }); + } + + match tag_name { + CODE_TAG_NAME => { + attributes.insert(CODE.to_string(), Value::Bool(true)); + }, + MARK_TAG_NAME => { + attributes.insert(BG_COLOR.to_string(), Value::String("#FFFF00".to_string())); + }, + _ => { + if LINK_TAGS.contains(&tag_name) { + attributes.insert(HREF.to_string(), Value::String(href)); + } + if ITALIC_TAGS.contains(&tag_name) { + attributes.insert(ITALIC.to_string(), Value::Bool(true)); + } + if BOLD_TAGS.contains(&tag_name) { + attributes.insert(BOLD.to_string(), Value::Bool(true)); + } + if UNDERLINE_TAGS.contains(&tag_name) { + attributes.insert(UNDERLINE.to_string(), Value::Bool(true)); + } + if STRIKETHROUGH_TAGS.contains(&tag_name) { + attributes.insert(STRIKETHROUGH.to_string(), Value::Bool(true)); + } + }, + } + if attributes.is_empty() { + None + } else { + Some(attributes) + } +} + +// transform text_node to delta +// input Google +// export delta: [{ "insert": "Google", "attributes": { "href": "https://www.google.com" } }] +fn node_to_delta( + tag_name: &str, + text: String, + attrs: &mut Attrs, + parent_attributes: &Option>, +) -> Option { + let attributes = get_delta_attributes_for(tag_name, attrs, parent_attributes.to_owned()); + if text.trim().is_empty() { + return None; + } + + Some(InsertDelta { + insert: text, + attributes, + }) +} + +// get tag name from node +fn get_tag_name(node: ElementRef) -> String { + node.value().name().to_string() +} + +fn get_node_attrs(node: ElementRef) -> Attrs { + node.value().attrs() +} +// find attribute value from node +fn find_attribute_value(node: ElementRef, attr_name: &str) -> Option { + node + .value() + .attrs() + .find(|(name, _)| *name == attr_name) + .map(|(_, value)| value.to_string()) +} + +fn find_attribute_value_from_attrs(attrs: &Attrs, attr_name: &str) -> String { + // The attrs need to be mutable, because the find method will consume the attrs + // So we clone it and use the clone one + let mut attrs = attrs.clone(); + attrs + .find(|(name, _)| *name == attr_name) + .map(|(_, value)| value.to_string()) + .unwrap_or_default() +} + +fn find_child_node(node: ElementRef, child_tag_name: String) -> Option { + node + .children() + .find(|child| { + if let Some(child_element) = ElementRef::wrap(child.to_owned()) { + return get_tag_name(child_element) == child_tag_name; + } + false + }) + .and_then(|child| ElementRef::wrap(child.to_owned())) +} + +fn delta_to_json(delta: &Vec) -> Value { + serde_json::to_value(delta).unwrap_or_default() +} diff --git a/frontend/rust-lib/flowy-document2/src/parser/mod.rs b/frontend/rust-lib/flowy-document2/src/parser/mod.rs index 0c040e6e51..305d7ee0e8 100644 --- a/frontend/rust-lib/flowy-document2/src/parser/mod.rs +++ b/frontend/rust-lib/flowy-document2/src/parser/mod.rs @@ -1,5 +1,6 @@ pub mod constant; pub mod document_data_parser; +pub mod external; pub mod json; pub mod parser_entities; pub mod utils; diff --git a/frontend/rust-lib/flowy-document2/src/parser/parser_entities.rs b/frontend/rust-lib/flowy-document2/src/parser/parser_entities.rs index 0fec927dcd..cb7bf35e27 100644 --- a/frontend/rust-lib/flowy-document2/src/parser/parser_entities.rs +++ b/frontend/rust-lib/flowy-document2/src/parser/parser_entities.rs @@ -1,19 +1,16 @@ use crate::parse::NotEmptyStr; -use crate::parser::constant::{ - BG_COLOR, BOLD, BULLETED_LIST, CALLOUT, CHECKED, CODE, DELTA, DIVIDER, FONT_COLOR, FORMULA, - HEADING, HREF, ICON, IMAGE, ITALIC, LANGUAGE, LEVEL, MATH_EQUATION, NUMBERED_LIST, PAGE, - PARAGRAPH, QUOTE, STRIKETHROUGH, TODO_LIST, TOGGLE_LIST, UNDERLINE, URL, -}; +use crate::parser::constant::*; use crate::parser::utils::{ convert_insert_delta_from_json, convert_nested_block_children_to_html, delta_to_html, - delta_to_text, + delta_to_text, required_not_empty_str, serialize_color_attribute, }; -use flowy_derive::ProtoBuf; +use flowy_derive::{ProtoBuf, ProtoBuf_Enum}; use flowy_error::ErrorCode; use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::HashMap; use std::sync::Arc; +use validator::Validate; #[derive(Default, ProtoBuf)] pub struct SelectionPB { @@ -43,7 +40,7 @@ pub struct RangePB { * @field text: bool // export text data */ #[derive(Default, ProtoBuf, Debug, Clone)] -pub struct ExportTypePB { +pub struct ParseTypePB { #[pb(index = 1)] pub json: bool, @@ -57,7 +54,7 @@ pub struct ExportTypePB { * ConvertDocumentPayloadPB * @field document_id: String * @file range: Option - optional // if range is None, copy the whole document - * @field export_types: [ExportTypePB] + * @field parse_types: [ParseTypePB] */ #[derive(Default, ProtoBuf)] pub struct ConvertDocumentPayloadPB { @@ -68,7 +65,7 @@ pub struct ConvertDocumentPayloadPB { pub range: Option, #[pb(index = 3)] - pub export_types: ExportTypePB, + pub parse_types: ParseTypePB, } #[derive(Default, ProtoBuf, Debug)] @@ -92,7 +89,7 @@ pub struct Range { pub end: Selection, } -pub struct ExportType { +pub struct ParseType { pub json: bool, pub html: bool, pub text: bool, @@ -101,10 +98,10 @@ pub struct ExportType { pub struct ConvertDocumentParams { pub document_id: String, pub range: Option, - pub export_types: ExportType, + pub parse_types: ParseType, } -impl ExportType { +impl ParseType { pub fn any_enabled(&self) -> bool { self.json || self.html || self.text } @@ -129,9 +126,9 @@ impl From for Range { } } -impl From for ExportType { - fn from(data: ExportTypePB) -> Self { - ExportType { +impl From for ParseType { + fn from(data: ParseTypePB) -> Self { + ParseType { json: data.json, html: data.html, text: data.text, @@ -148,7 +145,7 @@ impl TryInto for ConvertDocumentPayloadPB { Ok(ConvertDocumentParams { document_id: document_id.0, range, - export_types: self.export_types.into(), + parse_types: self.parse_types.into(), }) } } @@ -169,88 +166,88 @@ impl InsertDelta { pub fn to_html(&self) -> String { let mut html = String::new(); let mut style = String::new(); + let mut html_attributes = String::new(); // If there are attributes, serialize them as a HashMap. if let Some(attrs) = &self.attributes { - // Serialize the font color attributes. - if let Some(color) = attrs.get(FONT_COLOR) { - style.push_str(&format!( - "color: {};", - color.to_string().replace("0x", "#").trim_matches('\"') - )); - } + // Serialize the color attributes. + style.push_str(&serialize_color_attribute(attrs, FONT_COLOR, COLOR)); // Serialize the background color attributes. - if let Some(color) = attrs.get(BG_COLOR) { - style.push_str(&format!( - "background-color: {};", - color.to_string().replace("0x", "#").trim_matches('\"') - )); - } + style.push_str(&serialize_color_attribute( + attrs, + BG_COLOR, + BACKGROUND_COLOR, + )); // Serialize the href attributes. if let Some(href) = attrs.get(HREF) { - html.push_str(&format!("", href)); + html.push_str(&format!("<{} {}={}>", A_TAG_NAME, HREF, href)); } - // Serialize the code attributes. if let Some(code) = attrs.get(CODE) { if code.as_bool().unwrap_or(false) { - html.push_str(""); + html.push_str(&format!("<{}>", CODE_TAG_NAME)); } } + // Serialize the italic, underline, strikethrough, bold, formula attributes. if let Some(italic) = attrs.get(ITALIC) { if italic.as_bool().unwrap_or(false) { - style.push_str("font-style: italic;"); + style.push_str(FONT_STYLE_ITALIC); } } if let Some(underline) = attrs.get(UNDERLINE) { if underline.as_bool().unwrap_or(false) { - style.push_str("text-decoration: underline;"); + style.push_str(TEXT_DECORATION_UNDERLINE); } } if let Some(strikethrough) = attrs.get(STRIKETHROUGH) { if strikethrough.as_bool().unwrap_or(false) { - style.push_str("text-decoration: line-through;"); + style.push_str(TEXT_DECORATION_LINE_THROUGH); } } if let Some(bold) = attrs.get(BOLD) { if bold.as_bool().unwrap_or(false) { - style.push_str("font-weight: bold;"); + style.push_str(FONT_WEIGHT_BOLD); } } if let Some(formula) = attrs.get(FORMULA) { if formula.as_bool().unwrap_or(false) { - style.push_str("font-family: fantasy;"); + style.push_str(FONT_FAMILY_FANTASY); } } + if let Some(direction) = attrs.get(TEXT_DIRECTION) { + html_attributes.push_str(&format!(" {}=\"{}\"", DIR_ATTR_NAME, direction)); + } } - // Serialize the attributes to style. if !style.is_empty() { - html.push_str(&format!("", style)); + html_attributes.push_str(&format!(" {}=\"{}\"", STYLE, style)); + } + + if !html_attributes.is_empty() { + html.push_str(&format!("<{}{}>", SPAN_TAG_NAME, html_attributes)); } // Serialize the insert field. html.push_str(&self.insert); // Close the style tag. - if !style.is_empty() { - html.push_str(""); + if !html_attributes.is_empty() { + html.push_str(&format!("", SPAN_TAG_NAME)); } // Close the tags: , . if let Some(attrs) = &self.attributes { - if attrs.contains_key(HREF) { - html.push_str(""); - } if attrs.contains_key(CODE) { - html.push_str(""); + html.push_str(&format!("", CODE_TAG_NAME)); + } + if attrs.contains_key(HREF) { + html.push_str(&format!("", A_TAG_NAME)); } } html } } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct NestedBlock { #[serde(default)] - pub id: String, #[serde(rename = "type")] pub ty: String, #[serde(default)] @@ -262,7 +259,6 @@ pub struct NestedBlock { impl Eq for NestedBlock {} impl PartialEq for NestedBlock { - // ignore the id field fn eq(&self, other: &Self) -> bool { self.ty == other.ty && self.data.iter().all(|(k, v)| { @@ -278,24 +274,9 @@ impl PartialEq for NestedBlock { } } -pub struct ConvertBlockToHtmlParams { - pub prev_block_ty: Option, - pub next_block_ty: Option, -} - impl NestedBlock { - pub fn new( - id: String, - ty: String, - data: HashMap, - children: Vec, - ) -> Self { - Self { - id, - ty, - data, - children, - } + pub fn new(ty: String, data: HashMap, children: Vec) -> Self { + Self { ty, data, children } } pub fn add_child(&mut self, child: NestedBlock) { @@ -316,115 +297,147 @@ impl NestedBlock { let next_block_ty = params.next_block_ty.unwrap_or_default(); match self.ty.as_str() { + //

    Hello

    HEADING => { let level = self.data.get(LEVEL).unwrap_or(&Value::Null); if level.as_u64().unwrap_or(0) > 6 { - html.push_str(&format!("
    {}
    ", text_html)); + html.push_str(&format!("<{}>{}", H6_TAG_NAME, text_html, H6_TAG_NAME)); } else { html.push_str(&format!("{}", level, text_html, level)); } }, + //

    Hello

    PARAGRAPH => { - html.push_str(&format!("

    {}

    ", text_html)); + html.push_str(&format!("<{}>{}", P_TAG_NAME, text_html, P_TAG_NAME)); html.push_str(&convert_nested_block_children_to_html(Arc::new( self.to_owned(), ))); }, + // CALLOUT => { html.push_str(&format!( - "

    {}{}

    ", + "<{}>{}{}", + ASIDE_TAG_NAME, self .data .get(ICON) .unwrap_or(&Value::Null) .to_string() .trim_matches('\"'), - text_html + text_html, + ASIDE_TAG_NAME )); }, + // Google Logo IMAGE => { html.push_str(&format!( - "{}", + "<{} src={} alt={} />", + IMG_TAG_NAME, self.data.get(URL).unwrap(), "AppFlowy-Image" )); }, + //
    DIVIDER => { - html.push_str("
    "); + html.push_str(&format!("<{} />", HR_TAG_NAME)); }, + //

    $$x = {-b \pm \sqrt{b^2-4ac} \over 2a}.$$

    MATH_EQUATION => { let formula = self.data.get(FORMULA).unwrap_or(&Value::Null); html.push_str(&format!( - "

    {}

    ", - formula.to_string().trim_matches('\"') + "<{}>{}", + P_TAG_NAME, + formula.to_string().trim_matches('\"'), + P_TAG_NAME )); }, + //
    console.log('Hello World!');
    CODE => { let language = self.data.get(LANGUAGE).unwrap_or(&Value::Null); html.push_str(&format!( - "
    {}
    ", + "<{}><{} {}=\"{}-{}\">{}", + PRE_TAG_NAME, + CODE_TAG_NAME, + CLASS, + LANGUAGE, language.to_string().trim_matches('\"'), - text_html + text_html, + CODE_TAG_NAME, + PRE_TAG_NAME )); }, - BULLETED_LIST | NUMBERED_LIST | TODO_LIST | TOGGLE_LIST => { - let list_type = match self.ty.as_str() { - BULLETED_LIST => "ul", - NUMBERED_LIST => "ol", - TODO_LIST => "ul", - TOGGLE_LIST => "ul", - _ => "ul", // Default to "ul" for unknown types + //
    Hello

    World!

    + TOGGLE_LIST => { + html.push_str(&format!("<{}>", DETAILS_TAG_NAME)); + html.push_str(&format!( + "<{}>{}", + SUMMARY_TAG_NAME, text_html, SUMMARY_TAG_NAME + )); + html.push_str(&convert_nested_block_children_to_html(Arc::new( + self.to_owned(), + ))); + html.push_str(&format!("", DETAILS_TAG_NAME)); + }, + //
    • Hello
    • World!
    + BULLETED_LIST | NUMBERED_LIST | TODO_LIST => { + let list_type = if self.ty == NUMBERED_LIST { + OL_TAG_NAME + } else { + UL_TAG_NAME }; if prev_block_ty != self.ty { html.push_str(&format!("<{}>", list_type)); } if self.ty == TODO_LIST { - let checked_str = if self + let checked = self .data .get(CHECKED) - .and_then(|checked| checked.as_bool()) - .unwrap_or(false) - { - "x" - } else { - " " - }; - html.push_str(&format!("
  • [{}] {}
  • ", checked_str, text_html)); + .and_then(|v| v.as_bool()) + .unwrap_or_default(); + //
  • Hello
  • + html.push_str(&format!( + "<{} {}=\"{}\" {}=\"{}\">{}", + LI_TAG_NAME, ROLE, CHECKBOX, ARIA_CHECKED, checked, text_html + )); } else { - html.push_str(&format!("
  • {}
  • ", text_html)); + html.push_str(&format!("<{}>{}", LI_TAG_NAME, text_html)); } html.push_str(&convert_nested_block_children_to_html(Arc::new( self.to_owned(), ))); + html.push_str(&format!("", LI_TAG_NAME)); if next_block_ty != self.ty { html.push_str(&format!("", list_type)); } }, + //

    Hello

    World!

    QUOTE => { if prev_block_ty != self.ty { - html.push_str("
    "); + html.push_str(&format!("<{}>", BLOCKQUOTE_TAG_NAME)); } - html.push_str(&format!("

    {}

    ", text_html)); + html.push_str(&format!("<{}>{}", P_TAG_NAME, text_html, P_TAG_NAME)); html.push_str(&convert_nested_block_children_to_html(Arc::new( self.to_owned(), ))); if next_block_ty != self.ty { - html.push_str("
    "); + html.push_str(&format!("", BLOCKQUOTE_TAG_NAME)); } }, + //

    Hello

    PAGE => { if !text_html.is_empty() { - html.push_str(&format!("

    {}

    ", text_html)); + html.push_str(&format!("<{}>{}", P_TAG_NAME, text_html, P_TAG_NAME)); } html.push_str(&convert_nested_block_children_to_html(Arc::new( self.to_owned(), ))); }, + //

    Hello

    _ => { - html.push_str(&format!("

    {}

    ", text_html)); + html.push_str(&format!("<{}>{}", P_TAG_NAME, text_html, P_TAG_NAME)); html.push_str(&convert_nested_block_children_to_html(Arc::new( self.to_owned(), ))); @@ -439,7 +452,7 @@ impl NestedBlock { let delta_text = self .data - .get("delta") + .get(DELTA) .and_then(convert_insert_delta_from_json) .map(|delta| delta_to_text(&delta)) .unwrap_or_default(); @@ -479,3 +492,46 @@ impl NestedBlock { text } } + +pub struct ConvertBlockToHtmlParams { + pub prev_block_ty: Option, + pub next_block_ty: Option, +} + +#[derive(PartialEq, Eq, Debug, ProtoBuf_Enum, Clone, Default)] +pub enum InputType { + #[default] + Html = 0, + PlainText = 1, +} + +#[derive(Default, ProtoBuf, Debug, Validate)] +pub struct ConvertDataToJsonPayloadPB { + #[pb(index = 1)] + #[validate(custom = "required_not_empty_str")] + pub data: String, + + #[pb(index = 2)] + pub input_type: InputType, +} + +pub struct ConvertDataToJsonParams { + pub data: String, + pub input_type: InputType, +} + +#[derive(Default, ProtoBuf, Debug)] +pub struct ConvertDataToJsonResponsePB { + #[pb(index = 1)] + pub json: String, +} + +impl TryInto for ConvertDataToJsonPayloadPB { + type Error = ErrorCode; + fn try_into(self) -> Result { + Ok(ConvertDataToJsonParams { + data: self.data, + input_type: self.input_type, + }) + } +} diff --git a/frontend/rust-lib/flowy-document2/src/parser/utils.rs b/frontend/rust-lib/flowy-document2/src/parser/utils.rs index 0897164e70..e5365f2227 100644 --- a/frontend/rust-lib/flowy-document2/src/parser/utils.rs +++ b/frontend/rust-lib/flowy-document2/src/parser/utils.rs @@ -1,74 +1,11 @@ -use crate::parser::constant::DELTA; use crate::parser::parser_entities::{ ConvertBlockToHtmlParams, InsertDelta, NestedBlock, Selection, }; -use collab_document::blocks::{Block, DocumentData}; +use collab_document::blocks::DocumentData; use serde_json::Value; use std::collections::HashMap; use std::sync::Arc; - -pub struct ConvertBlockToJsonParams { - pub(crate) blocks: HashMap>, - pub(crate) relation_map: HashMap>>, - pub(crate) delta_map: HashMap>, -} -pub fn block_to_nested_json( - block_id: &str, - convert_params: &ConvertBlockToJsonParams, -) -> Option { - let blocks = &convert_params.blocks; - let relation_map = &convert_params.relation_map; - let delta_map = &convert_params.delta_map; - // Attempt to retrieve the block using the block_id - let block = blocks.get(block_id)?; - - // Retrieve the children for this block from the relation map - let children = relation_map.get(&block.id)?; - - // Recursively convert children blocks to JSON - let children: Vec<_> = children - .iter() - .filter_map(|child_id| block_to_nested_json(child_id, convert_params)) - .collect(); - - // Clone block data - let mut data = block.data.clone(); - - // Insert delta into data if available - if let Some(delta) = delta_map.get(&block.id) { - if let Ok(delta_value) = serde_json::to_value(delta) { - data.insert(DELTA.to_string(), delta_value); - } - } - - // Create and return the NestedBlock - Some(NestedBlock { - id: block.id.to_string(), - ty: block.ty.to_string(), - children, - data, - }) -} - -pub fn get_flat_block_ids(block_id: &str, data: &DocumentData) -> Vec { - let blocks = &data.blocks; - let children_map = &data.meta.children_map; - - if let Some(block) = blocks.get(block_id) { - let mut result = vec![block.id.clone()]; - - if let Some(child_ids) = children_map.get(&block.children) { - for child_id in child_ids { - let child_blocks = get_flat_block_ids(child_id, data); - result.extend(child_blocks); - } - - return result; - } - } - - vec![] -} +use validator::ValidationError; pub fn get_delta_for_block(block_id: &str, data: &DocumentData) -> Option> { let text_map = data.meta.text_map.as_ref()?; // Retrieve the text_map reference @@ -165,3 +102,25 @@ pub fn convert_nested_block_children_to_html(block: Arc) -> String pub fn convert_insert_delta_from_json(delta_value: &Value) -> Option> { serde_json::from_value::>(delta_value.to_owned()).ok() } + +pub fn required_not_empty_str(s: &str) -> Result<(), ValidationError> { + if s.is_empty() { + return Err(ValidationError::new("should not be empty string")); + } + Ok(()) +} + +pub fn serialize_color_attribute( + attrs: &HashMap, + attr_name: &str, + css_property: &str, +) -> String { + if let Some(color) = attrs.get(attr_name) { + return format!( + "{}: {};", + css_property, + color.to_string().replace("0x", "#").trim_matches('\"') + ); + } + "".to_string() +} diff --git a/frontend/rust-lib/flowy-document2/tests/assets/html/bulleted_list.html b/frontend/rust-lib/flowy-document2/tests/assets/html/bulleted_list.html index ae621dac0b..bad75cfbb8 100644 --- a/frontend/rust-lib/flowy-document2/tests/assets/html/bulleted_list.html +++ b/frontend/rust-lib/flowy-document2/tests/assets/html/bulleted_list.html @@ -1 +1 @@ -
    • Highlight
    • You can also

      • nest
    \ No newline at end of file +
    • Highlight

      You can also

      • nest
    \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/assets/html/callout.html b/frontend/rust-lib/flowy-document2/tests/assets/html/callout.html index 14e7c5d4e7..09c25736c7 100644 --- a/frontend/rust-lib/flowy-document2/tests/assets/html/callout.html +++ b/frontend/rust-lib/flowy-document2/tests/assets/html/callout.html @@ -1,6 +1,6 @@ -

    🥰 +

    \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/assets/html/google_docs.html b/frontend/rust-lib/flowy-document2/tests/assets/html/google_docs.html new file mode 100644 index 0000000000..0de659e9ba --- /dev/null +++ b/frontend/rust-lib/flowy-document2/tests/assets/html/google_docs.html @@ -0,0 +1 @@ +

    The Notion Document

    Heading-1

    Heading - 2

    Heading - 3

    Heading - 4

    This is a paragraph

    paragraph’s child

    • This is a bulleted list - 1

      • This is a bulleted list - 1 - 1

    • This is a bulleted list - 2

    This is a paragraph

    • unticked

      This is a todo - 1

      • unticked

        This is a todo - 1-1

    This is a paragraph

    1. This is a numbered list -1

    2. This is a numbered list -2

      1. This is a numbered list-1-1

    This is a paragraph

    This is a paragraph


    This is a paragraph font-color bg-color bold italic underline strike-through inline-code inline-formula link
    \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/assets/html/notion.html b/frontend/rust-lib/flowy-document2/tests/assets/html/notion.html new file mode 100644 index 0000000000..ebd0b8eb3f --- /dev/null +++ b/frontend/rust-lib/flowy-document2/tests/assets/html/notion.html @@ -0,0 +1,34 @@ +

    The Notion Document

    +

    Heading-1

    +

    Heading - 2

    +

    Heading - 3

    +

    This is a paragraph

    +

    paragraph’s child

    +
    • This is a bulleted list - 1
      • This is a bulleted list - 1 - 1
    • This is a bulleted list - 2
    +

    This is a paragraph

    +
    • [ ] This is a todo - 1
      • [ ] This is a paragraph - 1-1
    +
    1. This is a numbered list -1
    +

    This is a paragraph

    +
    • This is a toggle list

      This is a toggle child

    • +
    +

    This is a quote

    This is a quote child

    +

    This is a paragraph

    +
    +
    // This is the main function.
    +fn main() {
    +    // Print text to the console.
    +    **println**!("Hello World!");
    +}
    +

    This is a paragraph

    +

    <aside> + 💡 callout

    +

    </aside>

    +

    This is a paragraph font-color bg-color bold italic underline strike-through inline-code $inline-formula$ link

    +

    $$ + |x| = \begin{cases} + x, &\quad x \geq 0 \\ + -x, &\quad x < 0 + \end{cases} + $$

    +

    End

    + \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/assets/html/numbered_list.html b/frontend/rust-lib/flowy-document2/tests/assets/html/numbered_list.html index 7bcc0ec06b..d4e8134c02 100644 --- a/frontend/rust-lib/flowy-document2/tests/assets/html/numbered_list.html +++ b/frontend/rust-lib/flowy-document2/tests/assets/html/numbered_list.html @@ -1 +1 @@ -
    1. Highlight
    2. You can also

      1. nest
    \ No newline at end of file +
    1. Highlight

      You can also

      1. nest
    \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/assets/html/todo_list.html b/frontend/rust-lib/flowy-document2/tests/assets/html/todo_list.html index 19f48f2410..46dcabd198 100644 --- a/frontend/rust-lib/flowy-document2/tests/assets/html/todo_list.html +++ b/frontend/rust-lib/flowy-document2/tests/assets/html/todo_list.html @@ -1 +1 @@ -
    • [x] Highlight
    • You can also

      • [ ] nest
    \ No newline at end of file +
    • Highlight

      You can also

      • nest
    \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/assets/html/toggle_list.html b/frontend/rust-lib/flowy-document2/tests/assets/html/toggle_list.html index a8e93bdf74..11df3f80b0 100644 --- a/frontend/rust-lib/flowy-document2/tests/assets/html/toggle_list.html +++ b/frontend/rust-lib/flowy-document2/tests/assets/html/toggle_list.html @@ -1 +1 @@ -
    • Click ? at the bottom right for help and support.
    • This is a paragraph

      • This is a toggle list
    \ No newline at end of file +
    Click ? at the bottom right for help and support.

    This is a paragraph

    This is a toggle list
    \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/assets/json/google_docs.json b/frontend/rust-lib/flowy-document2/tests/assets/json/google_docs.json new file mode 100644 index 0000000000..27aa86f462 --- /dev/null +++ b/frontend/rust-lib/flowy-document2/tests/assets/json/google_docs.json @@ -0,0 +1,351 @@ +{ + "children": [ + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "The Notion Document" + } + ], + "level": 1, + "text_direction": "ltr" + }, + "type": "heading" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "Heading-1" + } + ], + "level": 1, + "text_direction": "ltr" + }, + "type": "heading" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "Heading - 2" + } + ], + "level": 2, + "text_direction": "ltr" + }, + "type": "heading" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "Heading - 3" + } + ], + "level": 3, + "text_direction": "ltr" + }, + "type": "heading" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "Heading - 4" + } + ], + "level": 3, + "text_direction": "ltr" + }, + "type": "heading" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a paragraph" + } + ], + "text_direction": "ltr" + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "paragraph’s child" + } + ], + "text_direction": "ltr" + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a bulleted list - 1" + } + ] + }, + "type": "bulleted_list" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a bulleted list - 1 - 1" + } + ] + }, + "type": "bulleted_list" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a bulleted list - 2" + } + ] + }, + "type": "bulleted_list" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a paragraph" + } + ], + "text_direction": "ltr" + }, + "type": "paragraph" + }, + { + "children": [ + { + "children": [], + "data": { + "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEgAAABICAYAAABV7bNHAAAA1ElEQVR4Ae3bMQ4BURSFYY2xBuwQ7BIkTGxFRj9Oo9RdkXn5TvL3L19u+2ZmZmZmZhVbpH26pFcaJ9IrndMudb/CWadHGiden1bll9MIzqd79SUd0thY20qga4NA50qgoUGgoRJo/NL/V/N+QIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEyFeEZyXQpUGgUyXQrkGgTSVQl/qGcG5pnkq3Sn0jOMv0k3Vpm05pmNjfsGPalFyOmZmZmdkbSS9cKbtzhxMAAAAASUVORK5CYII=" + }, + "type": "image" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a todo - 1" + } + ], + "text_direction": "ltr" + }, + "type": "paragraph" + } + ], + "data": { + "checked": false, + "text_direction": "ltr" + }, + "type": "todo_list" + }, + { + "children": [ + { + "children": [], + "data": { + "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAEgAAABICAYAAABV7bNHAAAA1ElEQVR4Ae3bMQ4BURSFYY2xBuwQ7BIkTGxFRj9Oo9RdkXn5TvL3L19u+2ZmZmZmZhVbpH26pFcaJ9IrndMudb/CWadHGiden1bll9MIzqd79SUd0thY20qga4NA50qgoUGgoRJo/NL/V/N+QIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIECBAgQIAAAQIEyFeEZyXQpUGgUyXQrkGgTSVQl/qGcG5pnkq3Sn0jOMv0k3Vpm05pmNjfsGPalFyOmZmZmdkbSS9cKbtzhxMAAAAASUVORK5CYII=" + }, + "type": "image" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a todo - 1-1" + } + ], + "text_direction": "ltr" + }, + "type": "paragraph" + } + ], + "data": { + "checked": false, + "text_direction": "ltr" + }, + "type": "todo_list" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a paragraph" + } + ], + "text_direction": "ltr" + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a numbered list -1" + } + ] + }, + "type": "numbered_list" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a numbered list -2" + } + ] + }, + "type": "numbered_list" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a numbered list-1-1" + } + ] + }, + "type": "numbered_list" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a paragraph" + } + ], + "text_direction": "ltr" + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": { + "bg_color": "transparent", + "font_color": "#000000" + }, + "insert": "This is a paragraph" + } + ], + "text_direction": "ltr" + }, + "type": "paragraph" + }, + { + "children": [], + "data": {}, + "type": "divider" + }, + { + "children": [], + "data": {}, + "type": "paragraph" + } + ], + "data": {}, + "type": "page" +} \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/assets/json/notion.json b/frontend/rust-lib/flowy-document2/tests/assets/json/notion.json new file mode 100644 index 0000000000..0e5f83fd13 --- /dev/null +++ b/frontend/rust-lib/flowy-document2/tests/assets/json/notion.json @@ -0,0 +1,371 @@ +{ + "type": "page", + "data": {}, + "children": [ + { + "type": "heading", + "data": { + "delta": [ + { + "attributes": null, + "insert": "The Notion Document" + } + ], + "level": 1 + }, + "children": [] + }, + { + "type": "heading", + "data": { + "level": 1, + "delta": [ + { + "attributes": null, + "insert": "Heading-1" + } + ] + }, + "children": [] + }, + { + "type": "heading", + "data": { + "level": 2, + "delta": [ + { + "attributes": null, + "insert": "Heading - 2" + } + ] + }, + "children": [] + }, + { + "type": "heading", + "data": { + "level": 3, + "delta": [ + { + "attributes": null, + "insert": "Heading - 3" + } + ] + }, + "children": [] + }, + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph" + } + ] + }, + "children": [] + }, + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "paragraph’s child" + } + ] + }, + "children": [] + }, + { + "type": "bulleted_list", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a bulleted list - 1" + } + ] + }, + "children": [ + { + "type": "bulleted_list", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a bulleted list - 1 - 1" + } + ] + }, + "children": [] + } + ] + }, + { + "type": "bulleted_list", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a bulleted list - 2" + } + ] + }, + "children": [] + }, + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph" + } + ] + }, + "children": [] + }, + { + "type": "bulleted_list", + "data": { + "delta": [ + { + "attributes": null, + "insert": "[ ] This is a todo - 1" + } + ] + }, + "children": [ + { + "type": "bulleted_list", + "data": { + "delta": [ + { + "attributes": null, + "insert": "[ ] This is a paragraph - 1-1" + } + ] + }, + "children": [] + } + ] + }, + { + "type": "numbered_list", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a numbered list -1" + } + ] + }, + "children": [] + }, + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph" + } + ] + }, + "children": [] + }, + { + "type": "bulleted_list", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a toggle list" + } + ] + }, + "children": [ + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a toggle child" + } + ] + }, + "children": [] + } + ] + }, + { + "type": "quote", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a quote" + } + ] + }, + "children": [ + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a quote child" + } + ] + }, + "children": [] + } + ] + }, + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph" + } + ] + }, + "children": [] + }, + { + "type": "divider", + "data": {}, + "children": [] + }, + { + "type": "code", + "data": { + "delta": [ + { + "attributes": null, + "insert": "// This is the main function.\nfn main() {\n // Print text to the console.\n **println**!(\"Hello World!\");\n}" + } + ], + "language": "jsx" + }, + "children": [] + }, + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph" + } + ] + }, + "children": [] + }, + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "" + } + ] + }, + "children": [] + }, + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph font-color bg-color " + }, + { + "attributes": { + "bold": true + }, + "insert": "bold" + }, + { + "attributes": { + "italic": true + }, + "insert": "italic underline " + }, + { + "attributes": { + "italic": true, + "strikethrough": true + }, + "insert": "strike-through" + }, + { + "attributes": { + "code": true, + "italic": true + }, + "insert": "inline-code" + }, + { + "attributes": { + "italic": true + }, + "insert": " $inline-formula$ " + }, + { + "attributes": { + "href": "https://www.notion.so/The-Notion-Document-d4236da306b84f6199e4091705042d78?pvs=21", + "italic": true + }, + "insert": "link" + } + ] + }, + "children": [] + }, + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "$$\n |x| = \\begin{cases}\n x, &\\quad x \\geq 0 \\\\\n -x, &\\quad x < 0\n \\end{cases}\n $$" + } + ] + }, + "children": [] + }, + { + "type": "paragraph", + "data": { + "delta": [ + { + "attributes": null, + "insert": "End" + } + ] + }, + "children": [] + } + ] +} \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/assets/json/plain_text.json b/frontend/rust-lib/flowy-document2/tests/assets/json/plain_text.json new file mode 100644 index 0000000000..33d86667e0 --- /dev/null +++ b/frontend/rust-lib/flowy-document2/tests/assets/json/plain_text.json @@ -0,0 +1,510 @@ +{ + "children": [ + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "# The Notion Document" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "# Heading-1" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "## Heading - 2" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "### Heading - 3" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "paragraph’s child" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "- This is a bulleted list - 1" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": " - This is a bulleted list - 1 - 1" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "- This is a bulleted list - 2" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "- [ ] This is a todo - 1" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": " - [ ] This is a paragraph - 1-1" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "1. This is a numbered list -1" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "- This is a toggle list" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": " This is a toggle child" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "> This is a quote" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": ">" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": ">" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "> This is a quote child" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": ">" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "---" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "```jsx" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "// This is the main function." + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "fn main() {" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": " // Print text to the console." + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": " **println**!(\"Hello World!\");" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "}" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "```" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "This is a paragraph font-color bg-color **bold** *italic underline ~~strike-through~~ `inline-code` $inline-formula$ [link](https://www.notion.so/The-Notion-Document-d4236da306b84f6199e4091705042d78?pvs=21)*" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "$$" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "|x| = \\begin{cases}             " + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "  x, &\\quad x \\geq 0 \\\\           " + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": " -x, &\\quad x < 0             " + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "\\end{cases}" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "$$" + } + ] + }, + "type": "paragraph" + }, + { + "children": [], + "data": { + "delta": [ + { + "attributes": null, + "insert": "End" + } + ] + }, + "type": "paragraph" + } + ], + "data": {}, + "type": "page" +} \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/assets/text/plain_text.txt b/frontend/rust-lib/flowy-document2/tests/assets/text/plain_text.txt new file mode 100644 index 0000000000..71c07e6b78 --- /dev/null +++ b/frontend/rust-lib/flowy-document2/tests/assets/text/plain_text.txt @@ -0,0 +1,64 @@ +# The Notion Document + +# Heading-1 + +## Heading - 2 + +### Heading - 3 + +This is a paragraph + +paragraph’s child + +- This is a bulleted list - 1 + - This is a bulleted list - 1 - 1 +- This is a bulleted list - 2 + +This is a paragraph + +- [ ] This is a todo - 1 + - [ ] This is a paragraph - 1-1 +1. This is a numbered list -1 + +This is a paragraph + +- This is a toggle list + + This is a toggle child + + +> This is a quote +> +> +> This is a quote child +> + +This is a paragraph + +--- + +```jsx +// This is the main function. +fn main() { + // Print text to the console. + **println**!("Hello World!"); +} +``` + +This is a paragraph + + + +This is a paragraph font-color bg-color **bold** *italic underline ~~strike-through~~ `inline-code` $inline-formula$ [link](https://www.notion.so/The-Notion-Document-d4236da306b84f6199e4091705042d78?pvs=21)* + +$$ +|x| = \begin{cases}              +  x, &\quad x \geq 0 \\            + -x, &\quad x < 0              +\end{cases} +$$ + +End \ No newline at end of file diff --git a/frontend/rust-lib/flowy-document2/tests/parser/html/mod.rs b/frontend/rust-lib/flowy-document2/tests/parser/html/mod.rs new file mode 100644 index 0000000000..945eb97109 --- /dev/null +++ b/frontend/rust-lib/flowy-document2/tests/parser/html/mod.rs @@ -0,0 +1 @@ +mod parser_test; diff --git a/frontend/rust-lib/flowy-document2/tests/parser/html/parser_test.rs b/frontend/rust-lib/flowy-document2/tests/parser/html/parser_test.rs new file mode 100644 index 0000000000..c70c38bea1 --- /dev/null +++ b/frontend/rust-lib/flowy-document2/tests/parser/html/parser_test.rs @@ -0,0 +1,45 @@ +use flowy_document2::parser::external::parser::ExternalDataToNestedJSONParser; +use flowy_document2::parser::parser_entities::{InputType, NestedBlock}; + +macro_rules! generate_test_cases { + ($($ty:ident),*) => { + [ + $( + ( + include_str!(concat!("../../assets/json/", stringify!($ty), ".json")), + include_str!(concat!("../../assets/html/", stringify!($ty), ".html")), + ) + ),* + ] + }; +} + +/// test convert data to json +/// - input html:

    Hello

    World!

    +#[tokio::test] +async fn html_to_document_test() { + let test_cases = generate_test_cases!(notion, google_docs); + + for (json, html) in test_cases.iter() { + let parser = ExternalDataToNestedJSONParser::new(html.to_string(), InputType::Html); + let block = parser.to_nested_block(); + assert!(block.is_some()); + let block = block.unwrap(); + let expect_block = serde_json::from_str::(json).unwrap(); + assert_eq!(block, expect_block); + } +} + +/// test convert data to json +/// - input plain text: Hello World! +#[tokio::test] +async fn plain_text_to_document_test() { + let plain_text = include_str!("../../assets/text/plain_text.txt"); + let parser = ExternalDataToNestedJSONParser::new(plain_text.to_string(), InputType::PlainText); + let block = parser.to_nested_block(); + assert!(block.is_some()); + let block = block.unwrap(); + let expect_json = include_str!("../../assets/json/plain_text.json"); + let expect_block = serde_json::from_str::(expect_json).unwrap(); + assert_eq!(block, expect_block); +} diff --git a/frontend/rust-lib/flowy-document2/tests/parser/mod.rs b/frontend/rust-lib/flowy-document2/tests/parser/mod.rs index 18ec9c9976..71758c3fe4 100644 --- a/frontend/rust-lib/flowy-document2/tests/parser/mod.rs +++ b/frontend/rust-lib/flowy-document2/tests/parser/mod.rs @@ -1,3 +1,4 @@ mod document_data_parser_test; -mod html_text; +mod html; mod json; +mod parse_to_html_text; diff --git a/frontend/rust-lib/flowy-document2/tests/parser/html_text/mod.rs b/frontend/rust-lib/flowy-document2/tests/parser/parse_to_html_text/mod.rs similarity index 100% rename from frontend/rust-lib/flowy-document2/tests/parser/html_text/mod.rs rename to frontend/rust-lib/flowy-document2/tests/parser/parse_to_html_text/mod.rs diff --git a/frontend/rust-lib/flowy-document2/tests/parser/html_text/test.rs b/frontend/rust-lib/flowy-document2/tests/parser/parse_to_html_text/test.rs similarity index 90% rename from frontend/rust-lib/flowy-document2/tests/parser/html_text/test.rs rename to frontend/rust-lib/flowy-document2/tests/parser/parse_to_html_text/test.rs index 9935443a14..894d27e045 100644 --- a/frontend/rust-lib/flowy-document2/tests/parser/html_text/test.rs +++ b/frontend/rust-lib/flowy-document2/tests/parser/parse_to_html_text/test.rs @@ -1,4 +1,4 @@ -use crate::parser::html_text::utils::{assert_document_html_eq, assert_document_text_eq}; +use crate::parser::parse_to_html_text::utils::{assert_document_html_eq, assert_document_text_eq}; macro_rules! generate_test_cases { ($($block_ty:ident),*) => { diff --git a/frontend/rust-lib/flowy-document2/tests/parser/html_text/utils.rs b/frontend/rust-lib/flowy-document2/tests/parser/parse_to_html_text/utils.rs similarity index 100% rename from frontend/rust-lib/flowy-document2/tests/parser/html_text/utils.rs rename to frontend/rust-lib/flowy-document2/tests/parser/parse_to_html_text/utils.rs