feat: support convert external data to nested json (#3848)

* feat: support convert external data to nested json

* fix: add some comment

* fix: code review

* fix: code review

* fix: code view

* fix: code view

* fix: update tauri cargo lock file

* fix: remove reduant function

* fix: parse dir attribute in element

* fix: add comment about parse dir

* fix: code review

* fix: code review

* fix: code review

* fix: code review
This commit is contained in:
Kilu.He 2023-11-02 22:13:29 +08:00 committed by GitHub
parent 1ad85416d8
commit dc0af0f4c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
33 changed files with 2543 additions and 352 deletions

View File

@ -770,7 +770,7 @@ dependencies = [
"parking_lot",
"realtime-entity",
"reqwest",
"scraper",
"scraper 0.17.1",
"serde",
"serde_json",
"serde_repr",
@ -2064,6 +2064,7 @@ dependencies = [
"nanoid",
"parking_lot",
"protobuf",
"scraper 0.18.0",
"serde",
"serde_json",
"strum_macros 0.21.1",
@ -2071,6 +2072,7 @@ dependencies = [
"tokio-stream",
"tracing",
"uuid",
"validator",
]
[[package]]
@ -5354,6 +5356,22 @@ dependencies = [
"tendril",
]
[[package]]
name = "scraper"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3693f9a0203d49a7ba8f38aa915316b3d535c1862d03dae7009cb71a3408b36a"
dependencies = [
"ahash 0.8.3",
"cssparser 0.31.2",
"ego-tree",
"getopts",
"html5ever 0.26.0",
"once_cell",
"selectors 0.25.0",
"tendril",
]
[[package]]
name = "sct"
version = "0.7.0"

View File

@ -668,7 +668,7 @@ dependencies = [
"parking_lot",
"realtime-entity",
"reqwest",
"scraper",
"scraper 0.17.1",
"serde",
"serde_json",
"serde_repr",
@ -1885,6 +1885,7 @@ dependencies = [
"nanoid",
"parking_lot",
"protobuf",
"scraper 0.18.0",
"serde",
"serde_json",
"strum_macros 0.21.1",
@ -1894,6 +1895,7 @@ dependencies = [
"tracing",
"tracing-subscriber",
"uuid",
"validator",
]
[[package]]
@ -4701,6 +4703,22 @@ dependencies = [
"tendril",
]
[[package]]
name = "scraper"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3693f9a0203d49a7ba8f38aa915316b3d535c1862d03dae7009cb71a3408b36a"
dependencies = [
"ahash 0.8.3",
"cssparser",
"ego-tree",
"getopts",
"html5ever",
"once_cell",
"selectors",
"tendril",
]
[[package]]
name = "sct"
version = "0.7.0"

View File

@ -5,7 +5,8 @@ use serde_json::Value;
use flowy_document2::entities::*;
use flowy_document2::event_map::DocumentEvent;
use flowy_document2::parser::parser_entities::{
ConvertDocumentPayloadPB, ConvertDocumentResponsePB,
ConvertDataToJsonPayloadPB, ConvertDataToJsonResponsePB, ConvertDocumentPayloadPB,
ConvertDocumentResponsePB,
};
use flowy_folder2::entities::{CreateViewPayloadPB, ViewLayoutPB, ViewPB};
use flowy_folder2::event_map::FolderEvent;
@ -124,6 +125,20 @@ impl DocumentEventTest {
.parse::<ConvertDocumentResponsePB>()
}
// convert data to json for document event test
pub async fn convert_data_to_json(
&self,
payload: ConvertDataToJsonPayloadPB,
) -> ConvertDataToJsonResponsePB {
let core = &self.inner;
EventBuilder::new(core.clone())
.event(DocumentEvent::ConvertDataToJSON)
.payload(payload)
.async_send()
.await
.parse::<ConvertDataToJsonResponsePB>()
}
pub async fn create_text(&self, payload: TextDeltaPayloadPB) {
let core = &self.inner;
EventBuilder::new(core.clone())

View File

@ -2,7 +2,9 @@ use collab_document::blocks::json_str_to_hashmap;
use event_integration::document::document_event::DocumentEventTest;
use event_integration::document::utils::*;
use flowy_document2::entities::*;
use flowy_document2::parser::parser_entities::{ConvertDocumentPayloadPB, ExportTypePB};
use flowy_document2::parser::parser_entities::{
ConvertDataToJsonPayloadPB, ConvertDocumentPayloadPB, InputType, NestedBlock, ParseTypePB,
};
use serde_json::{json, Value};
use std::collections::HashMap;
@ -125,7 +127,7 @@ async fn apply_text_delta_test() {
macro_rules! generate_convert_document_test_cases {
($($json:ident, $text:ident, $html:ident),*) => {
[
$((ExportTypePB { json: $json, text: $text, html: $html }, ($json, $text, $html))),*
$((ParseTypePB { json: $json, text: $text, html: $html }, ($json, $text, $html))),*
]
};
}
@ -145,7 +147,7 @@ async fn convert_document_test() {
let copy_payload = ConvertDocumentPayloadPB {
document_id: view.id.to_string(),
range: None,
export_types: export_types.clone(),
parse_types: export_types.clone(),
};
let result = test.convert_document(copy_payload).await;
assert_eq!(result.json.is_some(), *json_assert);
@ -153,3 +155,53 @@ async fn convert_document_test() {
assert_eq!(result.html.is_some(), *html_assert);
}
}
/// test convert data to json
/// - input html: <p>Hello</p><p> World!</p>
/// - input plain text: Hello World!
/// - output json: { "type": "page", "data": {}, "children": [{ "type": "paragraph", "children": [], "data": { "delta": [{ "insert": "Hello" }] } }, { "type": "paragraph", "children": [], "data": { "delta": [{ "insert": " World!" }] } }] }
#[tokio::test]
async fn convert_data_to_json_test() {
let test = DocumentEventTest::new().await;
let _ = test.create_document().await;
let html = r#"<p>Hello</p><p>World!</p>"#;
let payload = ConvertDataToJsonPayloadPB {
data: html.to_string(),
input_type: InputType::Html,
};
let result = test.convert_data_to_json(payload).await;
let expect_json = json!({
"type": "page",
"data": {},
"children": [{
"type": "paragraph",
"children": [],
"data": {
"delta": [{ "insert": "Hello" }]
}
}, {
"type": "paragraph",
"children": [],
"data": {
"delta": [{ "insert": "World!" }]
}
}]
});
let expect_json = serde_json::from_value::<NestedBlock>(expect_json).unwrap();
assert!(serde_json::from_str::<NestedBlock>(&result.json)
.unwrap()
.eq(&expect_json));
let plain_text = "Hello\nWorld!";
let payload = ConvertDataToJsonPayloadPB {
data: plain_text.to_string(),
input_type: InputType::PlainText,
};
let result = test.convert_data_to_json(payload).await;
assert!(serde_json::from_str::<NestedBlock>(&result.json)
.unwrap()
.eq(&expect_json));
}

View File

@ -18,7 +18,7 @@ flowy-notification = { workspace = true }
flowy-error = { path = "../flowy-error", features = ["impl_from_serde", "impl_from_sqlite", "impl_from_dispatch_error", "impl_from_collab"] }
lib-dispatch = { workspace = true }
lib-infra = { path = "../../../shared-lib/lib-infra" }
validator = "0.16.0"
protobuf = {version = "2.28.0"}
bytes = { version = "1.5" }
nanoid = "0.4.0"
@ -33,6 +33,7 @@ indexmap = {version = "1.9.2", features = ["serde"]}
uuid = { version = "1.3.3", features = ["v4"] }
futures = "0.3.26"
tokio-stream = { version = "0.1.14", features = ["sync"] }
scraper = "0.18.0"
[dev-dependencies]
tempfile = "3.4.0"

View File

@ -319,6 +319,7 @@ pub struct ExportDataPB {
#[pb(index = 2)]
pub export_type: ExportType,
}
#[derive(PartialEq, Eq, Debug, ProtoBuf_Enum, Clone, Default)]
pub enum ConvertType {
#[default]
@ -337,6 +338,7 @@ impl From<i32> for ConvertType {
}
}
/// for convert data to document
/// for the json type
/// the data is the json string
#[derive(Default, ProtoBuf, Debug)]

View File

@ -12,14 +12,18 @@ use collab_document::blocks::{
};
use flowy_error::{FlowyError, FlowyResult};
use lib_dispatch::prelude::{data_result_ok, AFPluginData, AFPluginState, DataResult};
use lib_dispatch::prelude::{
data_result_ok, AFPluginData, AFPluginDataValidator, AFPluginState, DataResult,
};
use crate::entities::*;
use crate::parser::document_data_parser::DocumentDataParser;
use crate::parser::parser_entities::{
ConvertDataToJsonParams, ConvertDataToJsonPayloadPB, ConvertDataToJsonResponsePB,
ConvertDocumentParams, ConvertDocumentPayloadPB, ConvertDocumentResponsePB,
};
use crate::parser::external::parser::ExternalDataToNestedJSONParser;
use crate::{manager::DocumentManager, parser::json::parser::JsonToDocumentParser};
fn upgrade_document(
@ -309,16 +313,46 @@ impl From<(&Vec<BlockEvent>, bool)> for DocEventPB {
}
}
/**
* Handler for converting a document to a JSON string, HTML string, or plain text string.
* @param data: AFPluginData<[ConvertDocumentPayloadPB]>
* @param manager: AFPluginState<Weak<DocumentManager>>
* @return DataResult<[ConvertDocumentResponsePB], FlowyError>
*/
pub async fn convert_document(
/// Handler for converting a document to a JSON string, HTML string, or plain text string.
///
/// ConvertDocumentPayloadPB is the input of this event.
/// ConvertDocumentResponsePB is the output of this event.
///
/// # Examples
///
/// Basic usage:
///
/// ```txt
/// // document: [{ "block_id": "1", "type": "paragraph", "data": {"delta": [{ "insert": "Hello World!" }] } }, { "block_id": "2", "type": "paragraph", "data": {"delta": [{ "insert": "Hello World!" }] }
/// let test = DocumentEventTest::new().await;
/// let view = test.create_document().await;
/// let payload = ConvertDocumentPayloadPB {
/// document_id: view.id,
/// range: Some(RangePB {
/// start: SelectionPB {
/// block_id: "1".to_string(),
/// index: 0,
/// length: 5,
/// },
/// end: SelectionPB {
/// block_id: "2".to_string(),
/// index: 5,
/// length: 7,
/// }
/// }),
/// parse_types: ParseTypePB {
/// json: true,
/// text: true,
/// html: true,
/// },
/// };
/// let result = test.convert_document(payload).await;
/// assert_eq!(result.json, Some("[{ \"block_id\": \"1\", \"type\": \"paragraph\", \"data\": {\"delta\": [{ \"insert\": \"Hello\" }] } }, { \"block_id\": \"2\", \"type\": \"paragraph\", \"data\": {\"delta\": [{ \"insert\": \" World!\" }] } }".to_string()));
/// assert_eq!(result.text, Some("Hello\n World!".to_string()));
/// assert_eq!(result.html, Some("<p>Hello</p><p> World!</p>".to_string()));
/// ```
/// #
pub async fn convert_document_handler(
data: AFPluginData<ConvertDocumentPayloadPB>,
manager: AFPluginState<Weak<DocumentManager>>,
) -> DataResult<ConvertDocumentResponsePB, FlowyError> {
@ -329,7 +363,7 @@ pub async fn convert_document(
let document_data = document.lock().get_document_data()?;
let parser = DocumentDataParser::new(Arc::new(document_data), params.range);
if !params.export_types.any_enabled() {
if !params.parse_types.any_enabled() {
return data_result_ok(ConvertDocumentResponsePB::default());
}
@ -337,16 +371,43 @@ pub async fn convert_document(
data_result_ok(ConvertDocumentResponsePB {
json: params
.export_types
.parse_types
.json
.then(|| serde_json::to_string(root).unwrap_or_default()),
html: params
.export_types
.parse_types
.html
.then(|| parser.to_html_with_json(root)),
text: params
.export_types
.parse_types
.text
.then(|| parser.to_text_with_json(root)),
})
}
/// Handler for converting a string to a JSON string.
/// # Examples
/// Basic usage:
/// ```txt
/// let test = DocumentEventTest::new().await;
/// let payload = ConvertDataToJsonPayloadPB {
/// data: "<p>Hello</p><p> World!</p>".to_string(),
/// input_type: InputTypePB::Html,
/// };
/// let result: ConvertDataToJsonResponsePB = test.convert_data_to_json(payload).await;
/// let expect_json = json!({ "type": "page", "data": {}, "children": [{ "type": "paragraph", "children": [], "data": { "delta": [{ "insert": "Hello" }] } }, { "type": "paragraph", "children": [], "data": { "delta": [{ "insert": " World!" }] } }] });
/// assert!(serde_json::from_str::<NestedBlock>(&result.json).unwrap().eq(&serde_json::from_value::<NestedBlock>(expect_json).unwrap()));
/// ```
pub(crate) async fn convert_data_to_json_handler(
data: AFPluginData<ConvertDataToJsonPayloadPB>,
) -> DataResult<ConvertDataToJsonResponsePB, FlowyError> {
let payload: ConvertDataToJsonParams = data.validate()?.into_inner().try_into()?;
let parser = ExternalDataToNestedJSONParser::new(payload.data, payload.input_type);
let result = match parser.to_nested_block() {
Some(result) => serde_json::to_string(&result)?,
None => "".to_string(),
};
data_result_ok(ConvertDataToJsonResponsePB { json: result })
}

View File

@ -5,7 +5,6 @@ use strum_macros::Display;
use flowy_derive::{Flowy_Event, ProtoBuf_Enum};
use lib_dispatch::prelude::AFPlugin;
use crate::event_handler::convert_document;
use crate::event_handler::get_snapshot_handler;
use crate::{event_handler::*, manager::DocumentManager};
@ -28,7 +27,11 @@ pub fn init(document_manager: Weak<DocumentManager>) -> AFPlugin {
.event(DocumentEvent::GetDocumentSnapshots, get_snapshot_handler)
.event(DocumentEvent::CreateText, create_text_handler)
.event(DocumentEvent::ApplyTextDeltaEvent, apply_text_delta_handler)
.event(DocumentEvent::ConvertDocument, convert_document)
.event(DocumentEvent::ConvertDocument, convert_document_handler)
.event(
DocumentEvent::ConvertDataToJSON,
convert_data_to_json_handler,
)
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Display, ProtoBuf_Enum, Flowy_Event)]
@ -79,48 +82,17 @@ pub enum DocumentEvent {
#[event(input = "TextDeltaPayloadPB")]
ApplyTextDeltaEvent = 11,
/// Handler for converting a document to a JSON string, HTML string, or plain text string.
///
/// ConvertDocumentPayloadPB is the input of this event.
/// ConvertDocumentResponsePB is the output of this event.
///
/// # Examples
///
/// Basic usage:
///
/// ```txt
/// // document: [{ "block_id": "1", "type": "paragraph", "data": {"delta": [{ "insert": "Hello World!" }] } }, { "block_id": "2", "type": "paragraph", "data": {"delta": [{ "insert": "Hello World!" }] }
/// let test = DocumentEventTest::new().await;
/// let view = test.create_document().await;
/// let payload = ConvertDocumentPayloadPB {
/// document_id: view.id,
/// range: Some(RangePB {
/// start: SelectionPB {
/// block_id: "1".to_string(),
/// index: 0,
/// length: 5,
/// },
/// end: SelectionPB {
/// block_id: "2".to_string(),
/// index: 5,
/// length: 7,
/// }
/// }),
/// export_types: ConvertTypePB {
/// json: true,
/// text: true,
/// html: true,
/// },
/// };
/// let result = test.convert_document(payload).await;
/// assert_eq!(result.json, Some("[{ \"block_id\": \"1\", \"type\": \"paragraph\", \"data\": {\"delta\": [{ \"insert\": \"Hello\" }] } }, { \"block_id\": \"2\", \"type\": \"paragraph\", \"data\": {\"delta\": [{ \"insert\": \" World!\" }] } }".to_string()));
/// assert_eq!(result.text, Some("Hello\n World!".to_string()));
/// assert_eq!(result.html, Some("<p>Hello</p><p> World!</p>".to_string()));
/// ```
/// #
// document in event_handler.rs -> convert_document
#[event(
input = "ConvertDocumentPayloadPB",
output = "ConvertDocumentResponsePB"
)]
ConvertDocument = 12,
// document in event_handler.rs -> convert_data_to_json
#[event(
input = "ConvertDataToJsonPayloadPB",
output = "ConvertDataToJsonResponsePB"
)]
ConvertDataToJSON = 13,
}

View File

@ -32,6 +32,92 @@ pub const CODE: &str = "code";
pub const UNDERLINE: &str = "underline";
pub const FONT_COLOR: &str = "font_color";
pub const BG_COLOR: &str = "bg_color";
pub const HREF: &str = "href";
pub const FORMULA: &str = "formula";
pub const MENTION: &str = "mention";
pub const TEXT_DIRECTION: &str = "text_direction";
pub const HTML_TAG_NAME: &str = "html";
pub const HR_TAG_NAME: &str = "hr";
pub const META_TAG_NAME: &str = "meta";
pub const LINK_TAG_NAME: &str = "link";
pub const SCRIPT_TAG_NAME: &str = "script";
pub const STYLE_TAG_NAME: &str = "style";
pub const IFRAME_TAG_NAME: &str = "iframe";
pub const NOSCRIPT_TAG_NAME: &str = "noscript";
pub const HEAD_TAG_NAME: &str = "head";
pub const H1_TAG_NAME: &str = "h1";
pub const H2_TAG_NAME: &str = "h2";
pub const H3_TAG_NAME: &str = "h3";
pub const H4_TAG_NAME: &str = "h4";
pub const H5_TAG_NAME: &str = "h5";
pub const H6_TAG_NAME: &str = "h6";
pub const P_TAG_NAME: &str = "p";
pub const ASIDE_TAG_NAME: &str = "aside";
pub const ARTICLE_TAG_NAME: &str = "article";
pub const UL_TAG_NAME: &str = "ul";
pub const OL_TAG_NAME: &str = "ol";
pub const LI_TAG_NAME: &str = "li";
pub const BLOCKQUOTE_TAG_NAME: &str = "blockquote";
pub const PRE_TAG_NAME: &str = "pre";
pub const IMG_TAG_NAME: &str = "img";
pub const B_TAG_NAME: &str = "b";
pub const CODE_TAG_NAME: &str = "code";
pub const STRONG_TAG_NAME: &str = "strong";
pub const EM_TAG_NAME: &str = "em";
pub const U_TAG_NAME: &str = "u";
pub const S_TAG_NAME: &str = "s";
pub const SPAN_TAG_NAME: &str = "span";
pub const BR_TAG_NAME: &str = "br";
pub const A_TAG_NAME: &str = "a";
pub const BASE_TAG_NAME: &str = "base";
pub const ABBR_TAG_NAME: &str = "abbr";
pub const ADDRESS_TAG_NAME: &str = "address";
pub const DBO_TAG_NAME: &str = "bdo";
pub const DIR_ATTR_NAME: &str = "dir";
pub const RTL_ATTR_VALUE: &str = "rtl";
pub const CITE_TAG_NAME: &str = "cite";
pub const DEL_TAG_NAME: &str = "del";
pub const DETAILS_TAG_NAME: &str = "details";
pub const SUMMARY_TAG_NAME: &str = "summary";
pub const DFN_TAG_NAME: &str = "dfn";
pub const DL_TAG_NAME: &str = "dl";
pub const I_TAG_NAME: &str = "i";
pub const VAR_TAG_NAME: &str = "var";
pub const INS_TAG_NAME: &str = "ins";
pub const MENU_TAG_NAME: &str = "menu";
pub const MARK_TAG_NAME: &str = "mark";
pub const FONT_WEIGHT: &str = "font-weight";
pub const FONT_STYLE: &str = "font-style";
pub const TEXT_DECORATION: &str = "text-decoration";
pub const BACKGROUND_COLOR: &str = "background-color";
pub const COLOR: &str = "color";
pub const LINE_THROUGH: &str = "line-through";
pub const FONT_STYLE_ITALIC: &str = "font-style: italic;";
pub const TEXT_DECORATION_UNDERLINE: &str = "text-decoration: underline;";
pub const TEXT_DECORATION_LINE_THROUGH: &str = "text-decoration: line-through;";
pub const FONT_WEIGHT_BOLD: &str = "font-weight: bold;";
pub const FONT_FAMILY_FANTASY: &str = "font-family: fantasy;";
pub const SRC: &str = "src";
pub const HREF: &str = "href";
pub const ROLE: &str = "role";
pub const CHECKBOX: &str = "checkbox";
pub const ARIA_CHECKED: &str = "aria-checked";
pub const CLASS: &str = "class";
pub const STYLE: &str = "style";

View File

@ -1,10 +1,7 @@
use crate::parser::parser_entities::{ConvertBlockToHtmlParams, NestedBlock, Range};
use crate::parser::utils::{
block_to_nested_json, get_delta_for_block, get_delta_for_selection, get_flat_block_ids,
ConvertBlockToJsonParams,
};
use crate::parser::constant::DELTA;
use crate::parser::parser_entities::{ConvertBlockToHtmlParams, InsertDelta, NestedBlock, Range};
use crate::parser::utils::{get_delta_for_block, get_delta_for_selection};
use collab_document::blocks::DocumentData;
use std::collections::HashMap;
use std::sync::Arc;
/// DocumentDataParser is a struct for parsing a document's data and converting it to JSON, HTML, or text.
@ -61,120 +58,94 @@ impl DocumentDataParser {
/// Converts the document data to a nested JSON structure, considering the optional range.
pub fn to_json(&self) -> Option<NestedBlock> {
let root_id = &self.document_data.page_id;
// flatten the block id list.
let block_id_list = get_flat_block_ids(root_id, &self.document_data);
// collect the block ids in the range.
let mut in_range_block_ids = self.collect_in_range_block_ids(&block_id_list);
// insert the root block id if it is not in the in-range block ids.
if !in_range_block_ids.contains(root_id) {
in_range_block_ids.push(root_id.to_string());
}
// build the parameters for converting the block to JSON with the in-range block ids.
let convert_params = self.build_convert_json_params(&in_range_block_ids);
// convert the root block to JSON.
let mut root = block_to_nested_json(root_id, &convert_params)?;
// If the start block's parent is outside the in-range selection, we need to insert the start block.
if self.should_insert_start_block() {
self.insert_start_block_json(&mut root, &convert_params);
}
Some(root)
let mut children = vec![];
let mut start_found = false;
let mut end_found = false;
self.block_to_nested_block(root_id, &mut children, &mut start_found, &mut end_found)
}
/// Collects the block ids in the range.
fn collect_in_range_block_ids(&self, block_id_list: &Vec<String>) -> Vec<String> {
if let Some(range) = &self.range {
// Find the positions of start and end block IDs in the list
let mut start_index = block_id_list
.iter()
.position(|id| id == &range.start.block_id)
.unwrap_or(0);
let mut end_index = block_id_list
.iter()
.position(|id| id == &range.end.block_id)
.unwrap_or(0);
if start_index > end_index {
// Swap start and end if they are in reverse order
std::mem::swap(&mut start_index, &mut end_index);
}
// Slice the block IDs based on the positions of start and end
block_id_list[start_index..=end_index].to_vec()
} else {
// If no range is specified, return the entire list
block_id_list.to_owned()
}
}
/// Builds the parameters for converting the block to JSON.
/// ConvertBlockToJsonParams format:
/// {
/// blocks: HashMap<String, Arc<Block>>, // in-range blocks
/// relation_map: HashMap<String, Arc<Vec<String>>>, // in-range blocks' children
/// delta_map: HashMap<String, String>, // in-range blocks' delta
/// }
fn build_convert_json_params(&self, block_id_list: &[String]) -> ConvertBlockToJsonParams {
let mut delta_map = HashMap::new();
let mut in_range_blocks = HashMap::new();
let mut relation_map = HashMap::new();
for block_id in block_id_list {
if let Some(block) = self.document_data.blocks.get(block_id) {
// Insert the block into the in-range block map.
in_range_blocks.insert(block_id.to_string(), Arc::new(block.to_owned()));
// If the block has children, insert the children into the relation map.
if let Some(children) = self.document_data.meta.children_map.get(&block.children) {
relation_map.insert(block_id.to_string(), Arc::new(children.to_owned()));
}
let delta = match &self.range {
Some(range) if block_id == &range.start.block_id => {
get_delta_for_selection(&range.start, &self.document_data)
},
Some(range) if block_id == &range.end.block_id => {
get_delta_for_selection(&range.end, &self.document_data)
},
_ => get_delta_for_block(block_id, &self.document_data),
};
// If the delta exists, insert it into the delta map.
if let Some(delta) = delta {
delta_map.insert(block_id.to_string(), delta);
}
}
}
ConvertBlockToJsonParams {
blocks: in_range_blocks,
relation_map,
delta_map,
}
}
// Checks if the start block should be inserted whether the start block's parent is outside the in-range selection.
fn should_insert_start_block(&self) -> bool {
if let Some(range) = &self.range {
if let Some(start_block) = self.document_data.blocks.get(&range.start.block_id) {
return start_block.parent != self.document_data.page_id;
}
}
false
}
// Inserts the start block JSON to the root JSON.
fn insert_start_block_json(
fn block_to_nested_block(
&self,
root: &mut NestedBlock,
convert_params: &ConvertBlockToJsonParams,
block_id: &str,
children: &mut Vec<NestedBlock>,
start_found: &mut bool,
end_found: &mut bool,
) -> Option<NestedBlock> {
let block = self.document_data.blocks.get(block_id)?;
let delta = self.get_delta(block_id);
// Prepare the data, including delta if available
let mut data = block.data.clone();
if let Some(delta) = delta {
if let Ok(delta_value) = serde_json::to_value(delta) {
data.insert(DELTA.to_string(), delta_value);
}
}
// Get the child IDs for the current block
if let Some(block_children_ids) = self.document_data.meta.children_map.get(&block.children) {
for child_id in block_children_ids {
if let Some(range) = &self.range {
if child_id == &range.start.block_id {
*start_found = true;
}
if child_id == &range.end.block_id {
*end_found = true;
// Process the "end" block recursively
self.process_child_block(child_id, children, start_found, end_found);
break;
}
}
if self.range.is_some() {
if !*start_found {
// Don't insert children before the "start" block is found
self.block_to_nested_block(child_id, children, start_found, end_found);
continue;
}
if *end_found {
// Stop inserting children after the "end" block is found
break;
}
}
// Process child blocks recursively
self.process_child_block(child_id, children, start_found, end_found);
}
}
Some(NestedBlock {
ty: block.ty.clone(),
children: children.to_owned(),
data,
})
}
fn get_delta(&self, block_id: &str) -> Option<Vec<InsertDelta>> {
match &self.range {
Some(range) if block_id == range.start.block_id => {
get_delta_for_selection(&range.start, &self.document_data)
},
Some(range) if block_id == range.end.block_id => {
get_delta_for_selection(&range.end, &self.document_data)
},
_ => get_delta_for_block(block_id, &self.document_data),
}
}
fn process_child_block(
&self,
child_id: &str,
children: &mut Vec<NestedBlock>,
start_found: &mut bool,
end_found: &mut bool,
) {
let start = &self.range.as_ref().unwrap().start;
if let Some(start_block_json) = block_to_nested_json(&start.block_id, convert_params) {
root.children.insert(0, start_block_json);
let mut child_children = vec![];
if let Some(child) =
self.block_to_nested_block(child_id, &mut child_children, start_found, end_found)
{
children.push(child);
}
}
}

View File

@ -0,0 +1,2 @@
pub mod parser;
mod utils;

View File

@ -0,0 +1,40 @@
use crate::parser::external::utils::{flatten_element_to_block, parse_plaintext_to_nested_block};
use crate::parser::parser_entities::{InputType, NestedBlock};
use scraper::Html;
/// External data to nested json parser.
#[derive(Debug, Clone, Default)]
pub struct ExternalDataToNestedJSONParser {
/// External data. for example: html string, plain text string.
external_data: String,
/// External data type. for example: [InputType]::Html, [InputType]::PlainText.
input_type: InputType,
}
impl ExternalDataToNestedJSONParser {
pub fn new(data: String, input_type: InputType) -> Self {
Self {
external_data: data,
input_type,
}
}
/// Format to nested block.
///
/// Example:
/// - input html: <p><strong>Hello</strong></p><p> World!</p>
/// - output json:
/// ```json
/// { "type": "page", "data": {}, "children": [{ "type": "paragraph", "children": [], "data": { "delta": [{ "insert": "Hello", attributes: { "bold": true } }] } }, { "type": "paragraph", "children": [], "data": { "delta": [{ "insert": " World!", attributes: null }] } }] }
/// ```
pub fn to_nested_block(&self) -> Option<NestedBlock> {
match self.input_type {
InputType::Html => {
let fragment = Html::parse_fragment(&self.external_data);
let root_element = fragment.root_element();
flatten_element_to_block(root_element)
},
InputType::PlainText => parse_plaintext_to_nested_block(&self.external_data),
}
}
}

View File

@ -0,0 +1,559 @@
use crate::parser::constant::*;
use crate::parser::parser_entities::{InsertDelta, NestedBlock};
use scraper::node::Attrs;
use scraper::ElementRef;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
const INLINE_TAGS: [&str; 18] = [
A_TAG_NAME,
EM_TAG_NAME,
STRONG_TAG_NAME,
U_TAG_NAME,
S_TAG_NAME,
CODE_TAG_NAME,
SPAN_TAG_NAME,
ADDRESS_TAG_NAME,
BASE_TAG_NAME,
CITE_TAG_NAME,
DFN_TAG_NAME,
I_TAG_NAME,
VAR_TAG_NAME,
ABBR_TAG_NAME,
INS_TAG_NAME,
DEL_TAG_NAME,
MARK_TAG_NAME,
"",
];
const LINK_TAGS: [&str; 2] = [A_TAG_NAME, BASE_TAG_NAME];
const ITALIC_TAGS: [&str; 6] = [
EM_TAG_NAME,
I_TAG_NAME,
VAR_TAG_NAME,
CITE_TAG_NAME,
DFN_TAG_NAME,
ADDRESS_TAG_NAME,
];
const BOLD_TAGS: [&str; 2] = [STRONG_TAG_NAME, B_TAG_NAME];
const UNDERLINE_TAGS: [&str; 3] = [U_TAG_NAME, ABBR_TAG_NAME, INS_TAG_NAME];
const STRIKETHROUGH_TAGS: [&str; 2] = [S_TAG_NAME, DEL_TAG_NAME];
const IGNORE_TAGS: [&str; 7] = [
META_TAG_NAME,
HEAD_TAG_NAME,
LINK_TAG_NAME,
SCRIPT_TAG_NAME,
STYLE_TAG_NAME,
NOSCRIPT_TAG_NAME,
IFRAME_TAG_NAME,
];
const HEADING_TAGS: [&str; 6] = [
H1_TAG_NAME,
H2_TAG_NAME,
H3_TAG_NAME,
H4_TAG_NAME,
H5_TAG_NAME,
H6_TAG_NAME,
];
const SHOULD_EXPAND_TAGS: [&str; 4] = [UL_TAG_NAME, OL_TAG_NAME, DL_TAG_NAME, MENU_TAG_NAME];
#[derive(Debug, Serialize, Deserialize)]
pub enum JSONResult {
Block(NestedBlock),
Delta(InsertDelta),
BlockArray(Vec<NestedBlock>),
DeltaArray(Vec<InsertDelta>),
}
/// Flatten element to block
pub fn flatten_element_to_block(node: ElementRef) -> Option<NestedBlock> {
if let Some(JSONResult::Block(block)) = flatten_element_to_json(node, &None, &None) {
return Some(block);
}
None
}
/// Parse plaintext to nested block
pub fn parse_plaintext_to_nested_block(plaintext: &str) -> Option<NestedBlock> {
let lines: Vec<&str> = plaintext
.lines()
.filter(|line| !line.trim().is_empty())
.collect();
let mut current_block = NestedBlock {
ty: PAGE.to_string(),
..Default::default()
};
for line in lines {
let mut data = HashMap::new();
// Insert plaintext into delta
if let Ok(delta) = serde_json::to_value(vec![InsertDelta {
insert: line.to_string(),
attributes: None,
}]) {
data.insert(DELTA.to_string(), delta);
}
// Create a new block for each non-empty line
current_block.children.push(NestedBlock {
ty: PARAGRAPH.to_string(),
data,
children: Default::default(),
});
}
if current_block.children.is_empty() {
return None;
}
Some(current_block)
}
fn flatten_element_to_json(
node: ElementRef,
list_type: &Option<String>,
attributes: &Option<HashMap<String, Value>>,
) -> Option<JSONResult> {
let tag_name = get_tag_name(node.to_owned());
if IGNORE_TAGS.contains(&tag_name.as_str()) {
return None;
}
if INLINE_TAGS.contains(&tag_name.as_str()) {
return process_inline_element(node, attributes.to_owned());
}
let mut data = HashMap::new();
// insert dir into attrs when dir is rtl
// for example: <bdo dir="rtl">Right to left</bdo> -> { "attributes": { "text_direction": "rtl" }, "insert": "Right to left" }
if let Some(dir) = find_attribute_value(node.to_owned(), DIR_ATTR_NAME) {
data.insert(TEXT_DIRECTION.to_string(), Value::String(dir));
}
if HEADING_TAGS.contains(&tag_name.as_str()) {
return process_heading_element(node, data);
}
if SHOULD_EXPAND_TAGS.contains(&tag_name.as_str()) {
return process_nested_element(node);
}
match tag_name.as_str() {
LI_TAG_NAME => process_li_element(node, list_type.to_owned(), data),
BLOCKQUOTE_TAG_NAME | DETAILS_TAG_NAME => {
process_node_summary_and_details(QUOTE.to_string(), node, data)
},
PRE_TAG_NAME => process_code_element(node),
IMG_TAG_NAME => process_image_element(node),
B_TAG_NAME => {
// Compatible with Google Docs, <b id=xxx> is the document top level tag, so we need to process it's children
let id = find_attribute_value(node.to_owned(), "id");
if id.is_some() {
return process_nested_element(node);
}
process_inline_element(node, attributes.to_owned())
},
_ => process_default_element(node, data),
}
}
fn process_default_element(
node: ElementRef,
mut data: HashMap<String, Value>,
) -> Option<JSONResult> {
let tag_name = get_tag_name(node.to_owned());
let ty = match tag_name.as_str() {
HTML_TAG_NAME => PAGE,
P_TAG_NAME => PARAGRAPH,
ASIDE_TAG_NAME | ARTICLE_TAG_NAME => CALLOUT,
HR_TAG_NAME => DIVIDER,
_ => PARAGRAPH,
};
let (delta, children) = process_node_children(node, &None, None);
if !delta.is_empty() {
data.insert(DELTA.to_string(), delta_to_json(&delta));
}
Some(JSONResult::Block(NestedBlock {
ty: ty.to_string(),
children,
data,
}))
}
fn process_image_element(node: ElementRef) -> Option<JSONResult> {
let mut data = HashMap::new();
if let Some(src) = find_attribute_value(node, SRC) {
data.insert(URL.to_string(), Value::String(src));
}
Some(JSONResult::Block(NestedBlock {
ty: IMAGE.to_string(),
children: Default::default(),
data,
}))
}
fn process_code_element(node: ElementRef) -> Option<JSONResult> {
let mut data = HashMap::new();
// find code element and get language and delta, then insert into data
if let Some(code_child) = find_child_node(node.to_owned(), CODE_TAG_NAME.to_string()) {
// get language
if let Some(class) = find_attribute_value(code_child.to_owned(), CLASS) {
let lang = class.split('-').last().unwrap_or_default();
data.insert(LANGUAGE.to_string(), Value::String(lang.to_string()));
}
// get delta
let text = code_child.text().collect::<String>();
if let Ok(delta) = serde_json::to_value(vec![InsertDelta {
insert: text,
attributes: None,
}]) {
data.insert(DELTA.to_string(), delta);
}
}
Some(JSONResult::Block(NestedBlock {
ty: CODE.to_string(),
children: Default::default(),
data,
}))
}
// process "ul" | "ol" | "dl" | "menu" element
fn process_nested_element(node: ElementRef) -> Option<JSONResult> {
let tag_name = get_tag_name(node.to_owned());
let ty = match tag_name.as_str() {
UL_TAG_NAME => BULLETED_LIST,
OL_TAG_NAME => NUMBERED_LIST,
_ => PARAGRAPH,
};
let (_, children) = process_node_children(node, &Some(ty.to_string()), None);
Some(JSONResult::BlockArray(children))
}
// process <li> element, if it's a checkbox, then return a todo list, otherwise return a normal list.
fn process_li_element(
node: ElementRef,
list_type: Option<String>,
mut data: HashMap<String, Value>,
) -> Option<JSONResult> {
let mut ty = list_type.unwrap_or(BULLETED_LIST.to_string());
if let Some(role) = find_attribute_value(node.to_owned(), ROLE) {
if role == CHECKBOX {
if let Some(checked_attr) = find_attribute_value(node.to_owned(), ARIA_CHECKED) {
let checked = match checked_attr.as_str() {
"true" => true,
"false" => false,
_ => false,
};
data.insert(
CHECKED.to_string(),
serde_json::to_value(checked).unwrap_or_default(),
);
}
data.insert(
CHECKED.to_string(),
serde_json::to_value(false).unwrap_or_default(),
);
ty = TODO_LIST.to_string();
}
}
process_node_summary_and_details(ty, node, data)
}
// Process children and handle potential nesting
// <li>
// <p> title </p>
// <p> content </p>
// </li>
// Or Process children and handle potential consecutive arrangement
// <li>title<p>content</p></li>
// li | blockquote | details
fn process_node_summary_and_details(
ty: String,
node: ElementRef,
mut data: HashMap<String, Value>,
) -> Option<JSONResult> {
let (delta, children) = process_node_children(node, &Some(ty.to_string()), None);
if delta.is_empty() {
if let Some(first_child) = children.first() {
let mut data = HashMap::new();
if let Some(first_child_delta) = first_child.data.get(DELTA) {
data.insert(DELTA.to_string(), first_child_delta.to_owned());
let rest_children = children.iter().skip(1).cloned().collect();
return Some(JSONResult::Block(NestedBlock {
ty,
children: rest_children,
data,
}));
}
}
} else {
data.insert(DELTA.to_string(), delta_to_json(&delta));
}
Some(JSONResult::Block(NestedBlock {
ty,
children,
data: data.to_owned(),
}))
}
fn process_heading_element(
node: ElementRef,
mut data: HashMap<String, Value>,
) -> Option<JSONResult> {
let tag_name = get_tag_name(node.to_owned());
let level = match tag_name.chars().last().unwrap_or_default() {
'1' => 1,
'2' => 2,
// default to h3 even if it's h4, h5, h6
_ => 3,
};
data.insert(
LEVEL.to_string(),
serde_json::to_value(level).unwrap_or_default(),
);
let (delta, children) = process_node_children(node, &None, None);
if !delta.is_empty() {
data.insert(
DELTA.to_string(),
serde_json::to_value(delta).unwrap_or_default(),
);
}
Some(JSONResult::Block(NestedBlock {
ty: HEADING.to_string(),
children,
data,
}))
}
// process <a> <em> <strong> <u> <s> <code> <span> <br>
fn process_inline_element(
node: ElementRef,
attributes: Option<HashMap<String, Value>>,
) -> Option<JSONResult> {
let tag_name = get_tag_name(node.to_owned());
let attributes = get_delta_attributes_for(&tag_name, &get_node_attrs(node), attributes);
let (delta, children) = process_node_children(node, &None, attributes);
Some(if !delta.is_empty() {
JSONResult::DeltaArray(delta)
} else {
JSONResult::BlockArray(children)
})
}
fn process_node_children(
node: ElementRef,
list_type: &Option<String>,
attributes: Option<HashMap<String, Value>>,
) -> (Vec<InsertDelta>, Vec<NestedBlock>) {
let tag_name = get_tag_name(node.to_owned());
let mut delta = Vec::new();
let mut children = Vec::new();
for child in node.children() {
if let Some(child_element) = ElementRef::wrap(child) {
if let Some(child_json) = flatten_element_to_json(child_element, list_type, &attributes) {
match child_json {
JSONResult::Delta(op) => delta.push(op),
JSONResult::Block(block) => children.push(block),
JSONResult::BlockArray(blocks) => children.extend(blocks),
JSONResult::DeltaArray(ops) => delta.extend(ops),
}
}
} else {
// put text into delta while child is a text node
let text = child
.value()
.as_text()
.map(|text| text.text.to_string())
.unwrap_or_default();
if let Some(op) = node_to_delta(&tag_name, text, &mut get_node_attrs(node), &attributes) {
delta.push(op);
}
}
}
(delta, children)
}
// get attributes from style
// for example: style="font-weight: bold; font-style: italic; text-decoration: underline; text-decoration: line-through;"
fn get_attributes_with_style(style: &str) -> HashMap<String, Value> {
let mut attributes = HashMap::new();
for property in style.split(';') {
let parts: Vec<&str> = property.split(':').map(|s| s.trim()).collect::<Vec<&str>>();
if parts.len() != 2 {
continue;
}
let (key, value) = (parts[0], parts[1]);
match key {
FONT_WEIGHT if value.contains(BOLD) => {
attributes.insert(BOLD.to_string(), Value::Bool(true));
},
FONT_STYLE if value.contains(ITALIC) => {
attributes.insert(ITALIC.to_string(), Value::Bool(true));
},
TEXT_DECORATION if value.contains(UNDERLINE) => {
attributes.insert(UNDERLINE.to_string(), Value::Bool(true));
},
TEXT_DECORATION if value.contains(LINE_THROUGH) => {
attributes.insert(STRIKETHROUGH.to_string(), Value::Bool(true));
},
BACKGROUND_COLOR => {
attributes.insert(BG_COLOR.to_string(), Value::String(value.to_string()));
},
COLOR => {
attributes.insert(FONT_COLOR.to_string(), Value::String(value.to_string()));
},
_ => {},
}
}
attributes
}
// get attributes from tag name
// input <a href="https://www.google.com">Google</a>
// export attributes: { "href": "https://www.google.com" }
// input <em>Italic</em>
// export attributes: { "italic": true }
// input <strong>Bold</strong>
// export attributes: { "bold": true }
// input <u>Underline</u>
// export attributes: { "underline": true }
// input <s>Strikethrough</s>
// export attributes: { "strikethrough": true }
// input <code>Code</code>
// export attributes: { "code": true }
fn get_delta_attributes_for(
tag_name: &str,
attrs: &Attrs,
parent_attributes: Option<HashMap<String, Value>>,
) -> Option<HashMap<String, Value>> {
let href = find_attribute_value_from_attrs(attrs, HREF);
let style = find_attribute_value_from_attrs(attrs, STYLE);
let mut attributes = get_attributes_with_style(&style);
if let Some(parent_attributes) = parent_attributes {
parent_attributes.iter().for_each(|(k, v)| {
attributes.insert(k.to_string(), v.clone());
});
}
match tag_name {
CODE_TAG_NAME => {
attributes.insert(CODE.to_string(), Value::Bool(true));
},
MARK_TAG_NAME => {
attributes.insert(BG_COLOR.to_string(), Value::String("#FFFF00".to_string()));
},
_ => {
if LINK_TAGS.contains(&tag_name) {
attributes.insert(HREF.to_string(), Value::String(href));
}
if ITALIC_TAGS.contains(&tag_name) {
attributes.insert(ITALIC.to_string(), Value::Bool(true));
}
if BOLD_TAGS.contains(&tag_name) {
attributes.insert(BOLD.to_string(), Value::Bool(true));
}
if UNDERLINE_TAGS.contains(&tag_name) {
attributes.insert(UNDERLINE.to_string(), Value::Bool(true));
}
if STRIKETHROUGH_TAGS.contains(&tag_name) {
attributes.insert(STRIKETHROUGH.to_string(), Value::Bool(true));
}
},
}
if attributes.is_empty() {
None
} else {
Some(attributes)
}
}
// transform text_node to delta
// input <a href="https://www.google.com">Google</a>
// export delta: [{ "insert": "Google", "attributes": { "href": "https://www.google.com" } }]
fn node_to_delta(
tag_name: &str,
text: String,
attrs: &mut Attrs,
parent_attributes: &Option<HashMap<String, Value>>,
) -> Option<InsertDelta> {
let attributes = get_delta_attributes_for(tag_name, attrs, parent_attributes.to_owned());
if text.trim().is_empty() {
return None;
}
Some(InsertDelta {
insert: text,
attributes,
})
}
// get tag name from node
fn get_tag_name(node: ElementRef) -> String {
node.value().name().to_string()
}
fn get_node_attrs(node: ElementRef) -> Attrs {
node.value().attrs()
}
// find attribute value from node
fn find_attribute_value(node: ElementRef, attr_name: &str) -> Option<String> {
node
.value()
.attrs()
.find(|(name, _)| *name == attr_name)
.map(|(_, value)| value.to_string())
}
fn find_attribute_value_from_attrs(attrs: &Attrs, attr_name: &str) -> String {
// The attrs need to be mutable, because the find method will consume the attrs
// So we clone it and use the clone one
let mut attrs = attrs.clone();
attrs
.find(|(name, _)| *name == attr_name)
.map(|(_, value)| value.to_string())
.unwrap_or_default()
}
fn find_child_node(node: ElementRef, child_tag_name: String) -> Option<ElementRef> {
node
.children()
.find(|child| {
if let Some(child_element) = ElementRef::wrap(child.to_owned()) {
return get_tag_name(child_element) == child_tag_name;
}
false
})
.and_then(|child| ElementRef::wrap(child.to_owned()))
}
fn delta_to_json(delta: &Vec<InsertDelta>) -> Value {
serde_json::to_value(delta).unwrap_or_default()
}

View File

@ -1,5 +1,6 @@
pub mod constant;
pub mod document_data_parser;
pub mod external;
pub mod json;
pub mod parser_entities;
pub mod utils;

View File

@ -1,19 +1,16 @@
use crate::parse::NotEmptyStr;
use crate::parser::constant::{
BG_COLOR, BOLD, BULLETED_LIST, CALLOUT, CHECKED, CODE, DELTA, DIVIDER, FONT_COLOR, FORMULA,
HEADING, HREF, ICON, IMAGE, ITALIC, LANGUAGE, LEVEL, MATH_EQUATION, NUMBERED_LIST, PAGE,
PARAGRAPH, QUOTE, STRIKETHROUGH, TODO_LIST, TOGGLE_LIST, UNDERLINE, URL,
};
use crate::parser::constant::*;
use crate::parser::utils::{
convert_insert_delta_from_json, convert_nested_block_children_to_html, delta_to_html,
delta_to_text,
delta_to_text, required_not_empty_str, serialize_color_attribute,
};
use flowy_derive::ProtoBuf;
use flowy_derive::{ProtoBuf, ProtoBuf_Enum};
use flowy_error::ErrorCode;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
use std::sync::Arc;
use validator::Validate;
#[derive(Default, ProtoBuf)]
pub struct SelectionPB {
@ -43,7 +40,7 @@ pub struct RangePB {
* @field text: bool // export text data
*/
#[derive(Default, ProtoBuf, Debug, Clone)]
pub struct ExportTypePB {
pub struct ParseTypePB {
#[pb(index = 1)]
pub json: bool,
@ -57,7 +54,7 @@ pub struct ExportTypePB {
* ConvertDocumentPayloadPB
* @field document_id: String
* @file range: Option<RangePB> - optional // if range is None, copy the whole document
* @field export_types: [ExportTypePB]
* @field parse_types: [ParseTypePB]
*/
#[derive(Default, ProtoBuf)]
pub struct ConvertDocumentPayloadPB {
@ -68,7 +65,7 @@ pub struct ConvertDocumentPayloadPB {
pub range: Option<RangePB>,
#[pb(index = 3)]
pub export_types: ExportTypePB,
pub parse_types: ParseTypePB,
}
#[derive(Default, ProtoBuf, Debug)]
@ -92,7 +89,7 @@ pub struct Range {
pub end: Selection,
}
pub struct ExportType {
pub struct ParseType {
pub json: bool,
pub html: bool,
pub text: bool,
@ -101,10 +98,10 @@ pub struct ExportType {
pub struct ConvertDocumentParams {
pub document_id: String,
pub range: Option<Range>,
pub export_types: ExportType,
pub parse_types: ParseType,
}
impl ExportType {
impl ParseType {
pub fn any_enabled(&self) -> bool {
self.json || self.html || self.text
}
@ -129,9 +126,9 @@ impl From<RangePB> for Range {
}
}
impl From<ExportTypePB> for ExportType {
fn from(data: ExportTypePB) -> Self {
ExportType {
impl From<ParseTypePB> for ParseType {
fn from(data: ParseTypePB) -> Self {
ParseType {
json: data.json,
html: data.html,
text: data.text,
@ -148,7 +145,7 @@ impl TryInto<ConvertDocumentParams> for ConvertDocumentPayloadPB {
Ok(ConvertDocumentParams {
document_id: document_id.0,
range,
export_types: self.export_types.into(),
parse_types: self.parse_types.into(),
})
}
}
@ -169,88 +166,88 @@ impl InsertDelta {
pub fn to_html(&self) -> String {
let mut html = String::new();
let mut style = String::new();
let mut html_attributes = String::new();
// If there are attributes, serialize them as a HashMap.
if let Some(attrs) = &self.attributes {
// Serialize the font color attributes.
if let Some(color) = attrs.get(FONT_COLOR) {
style.push_str(&format!(
"color: {};",
color.to_string().replace("0x", "#").trim_matches('\"')
));
}
// Serialize the color attributes.
style.push_str(&serialize_color_attribute(attrs, FONT_COLOR, COLOR));
// Serialize the background color attributes.
if let Some(color) = attrs.get(BG_COLOR) {
style.push_str(&format!(
"background-color: {};",
color.to_string().replace("0x", "#").trim_matches('\"')
));
}
style.push_str(&serialize_color_attribute(
attrs,
BG_COLOR,
BACKGROUND_COLOR,
));
// Serialize the href attributes.
if let Some(href) = attrs.get(HREF) {
html.push_str(&format!("<a href={}>", href));
html.push_str(&format!("<{} {}={}>", A_TAG_NAME, HREF, href));
}
// Serialize the code attributes.
if let Some(code) = attrs.get(CODE) {
if code.as_bool().unwrap_or(false) {
html.push_str("<code>");
html.push_str(&format!("<{}>", CODE_TAG_NAME));
}
}
// Serialize the italic, underline, strikethrough, bold, formula attributes.
if let Some(italic) = attrs.get(ITALIC) {
if italic.as_bool().unwrap_or(false) {
style.push_str("font-style: italic;");
style.push_str(FONT_STYLE_ITALIC);
}
}
if let Some(underline) = attrs.get(UNDERLINE) {
if underline.as_bool().unwrap_or(false) {
style.push_str("text-decoration: underline;");
style.push_str(TEXT_DECORATION_UNDERLINE);
}
}
if let Some(strikethrough) = attrs.get(STRIKETHROUGH) {
if strikethrough.as_bool().unwrap_or(false) {
style.push_str("text-decoration: line-through;");
style.push_str(TEXT_DECORATION_LINE_THROUGH);
}
}
if let Some(bold) = attrs.get(BOLD) {
if bold.as_bool().unwrap_or(false) {
style.push_str("font-weight: bold;");
style.push_str(FONT_WEIGHT_BOLD);
}
}
if let Some(formula) = attrs.get(FORMULA) {
if formula.as_bool().unwrap_or(false) {
style.push_str("font-family: fantasy;");
style.push_str(FONT_FAMILY_FANTASY);
}
}
if let Some(direction) = attrs.get(TEXT_DIRECTION) {
html_attributes.push_str(&format!(" {}=\"{}\"", DIR_ATTR_NAME, direction));
}
}
// Serialize the attributes to style.
if !style.is_empty() {
html.push_str(&format!("<span style=\"{}\">", style));
html_attributes.push_str(&format!(" {}=\"{}\"", STYLE, style));
}
if !html_attributes.is_empty() {
html.push_str(&format!("<{}{}>", SPAN_TAG_NAME, html_attributes));
}
// Serialize the insert field.
html.push_str(&self.insert);
// Close the style tag.
if !style.is_empty() {
html.push_str("</span>");
if !html_attributes.is_empty() {
html.push_str(&format!("</{}>", SPAN_TAG_NAME));
}
// Close the tags: <a>, <code>.
if let Some(attrs) = &self.attributes {
if attrs.contains_key(HREF) {
html.push_str("</a>");
}
if attrs.contains_key(CODE) {
html.push_str("</code>");
html.push_str(&format!("</{}>", CODE_TAG_NAME));
}
if attrs.contains_key(HREF) {
html.push_str(&format!("</{}>", A_TAG_NAME));
}
}
html
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct NestedBlock {
#[serde(default)]
pub id: String,
#[serde(rename = "type")]
pub ty: String,
#[serde(default)]
@ -262,7 +259,6 @@ pub struct NestedBlock {
impl Eq for NestedBlock {}
impl PartialEq for NestedBlock {
// ignore the id field
fn eq(&self, other: &Self) -> bool {
self.ty == other.ty
&& self.data.iter().all(|(k, v)| {
@ -278,24 +274,9 @@ impl PartialEq for NestedBlock {
}
}
pub struct ConvertBlockToHtmlParams {
pub prev_block_ty: Option<String>,
pub next_block_ty: Option<String>,
}
impl NestedBlock {
pub fn new(
id: String,
ty: String,
data: HashMap<String, Value>,
children: Vec<NestedBlock>,
) -> Self {
Self {
id,
ty,
data,
children,
}
pub fn new(ty: String, data: HashMap<String, Value>, children: Vec<NestedBlock>) -> Self {
Self { ty, data, children }
}
pub fn add_child(&mut self, child: NestedBlock) {
@ -316,115 +297,147 @@ impl NestedBlock {
let next_block_ty = params.next_block_ty.unwrap_or_default();
match self.ty.as_str() {
// <h1>Hello</h1>
HEADING => {
let level = self.data.get(LEVEL).unwrap_or(&Value::Null);
if level.as_u64().unwrap_or(0) > 6 {
html.push_str(&format!("<h6>{}</h6>", text_html));
html.push_str(&format!("<{}>{}</{}>", H6_TAG_NAME, text_html, H6_TAG_NAME));
} else {
html.push_str(&format!("<h{}>{}</h{}>", level, text_html, level));
}
},
// <p>Hello</p>
PARAGRAPH => {
html.push_str(&format!("<p>{}</p>", text_html));
html.push_str(&format!("<{}>{}</{}>", P_TAG_NAME, text_html, P_TAG_NAME));
html.push_str(&convert_nested_block_children_to_html(Arc::new(
self.to_owned(),
)));
},
// <aside>😁Hello</aside>
CALLOUT => {
html.push_str(&format!(
"<p>{}{}</p>",
"<{}>{}{}</{}>",
ASIDE_TAG_NAME,
self
.data
.get(ICON)
.unwrap_or(&Value::Null)
.to_string()
.trim_matches('\"'),
text_html
text_html,
ASIDE_TAG_NAME
));
},
// <img src="https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png" alt="Google Logo" />
IMAGE => {
html.push_str(&format!(
"<img src={} alt={} />",
"<{} src={} alt={} />",
IMG_TAG_NAME,
self.data.get(URL).unwrap(),
"AppFlowy-Image"
));
},
// <hr />
DIVIDER => {
html.push_str("<hr />");
html.push_str(&format!("<{} />", HR_TAG_NAME));
},
// <p>$$x = {-b \pm \sqrt{b^2-4ac} \over 2a}.$$</p>
MATH_EQUATION => {
let formula = self.data.get(FORMULA).unwrap_or(&Value::Null);
html.push_str(&format!(
"<p>{}</p>",
formula.to_string().trim_matches('\"')
"<{}>{}</{}>",
P_TAG_NAME,
formula.to_string().trim_matches('\"'),
P_TAG_NAME
));
},
// <pre><code class="language-js">console.log('Hello World!');</code></pre>
CODE => {
let language = self.data.get(LANGUAGE).unwrap_or(&Value::Null);
html.push_str(&format!(
"<pre><code class=\"language-{}\">{}</code></pre>",
"<{}><{} {}=\"{}-{}\">{}</{}></{}>",
PRE_TAG_NAME,
CODE_TAG_NAME,
CLASS,
LANGUAGE,
language.to_string().trim_matches('\"'),
text_html
text_html,
CODE_TAG_NAME,
PRE_TAG_NAME
));
},
BULLETED_LIST | NUMBERED_LIST | TODO_LIST | TOGGLE_LIST => {
let list_type = match self.ty.as_str() {
BULLETED_LIST => "ul",
NUMBERED_LIST => "ol",
TODO_LIST => "ul",
TOGGLE_LIST => "ul",
_ => "ul", // Default to "ul" for unknown types
// <details><summary>Hello</summary><p>World!</p></details>
TOGGLE_LIST => {
html.push_str(&format!("<{}>", DETAILS_TAG_NAME));
html.push_str(&format!(
"<{}>{}</{}>",
SUMMARY_TAG_NAME, text_html, SUMMARY_TAG_NAME
));
html.push_str(&convert_nested_block_children_to_html(Arc::new(
self.to_owned(),
)));
html.push_str(&format!("</{}>", DETAILS_TAG_NAME));
},
// <ul><li>Hello</li><li>World!</li></ul>
BULLETED_LIST | NUMBERED_LIST | TODO_LIST => {
let list_type = if self.ty == NUMBERED_LIST {
OL_TAG_NAME
} else {
UL_TAG_NAME
};
if prev_block_ty != self.ty {
html.push_str(&format!("<{}>", list_type));
}
if self.ty == TODO_LIST {
let checked_str = if self
let checked = self
.data
.get(CHECKED)
.and_then(|checked| checked.as_bool())
.unwrap_or(false)
{
"x"
} else {
" "
};
html.push_str(&format!("<li>[{}] {}</li>", checked_str, text_html));
.and_then(|v| v.as_bool())
.unwrap_or_default();
// <li role="checkbox" aria-checked="true">Hello</li>
html.push_str(&format!(
"<{} {}=\"{}\" {}=\"{}\">{}",
LI_TAG_NAME, ROLE, CHECKBOX, ARIA_CHECKED, checked, text_html
));
} else {
html.push_str(&format!("<li>{}</li>", text_html));
html.push_str(&format!("<{}>{}", LI_TAG_NAME, text_html));
}
html.push_str(&convert_nested_block_children_to_html(Arc::new(
self.to_owned(),
)));
html.push_str(&format!("</{}>", LI_TAG_NAME));
if next_block_ty != self.ty {
html.push_str(&format!("</{}>", list_type));
}
},
// <blockquote><p>Hello</p><p>World!</p></blockquote>
QUOTE => {
if prev_block_ty != self.ty {
html.push_str("<blockquote>");
html.push_str(&format!("<{}>", BLOCKQUOTE_TAG_NAME));
}
html.push_str(&format!("<p>{}</p>", text_html));
html.push_str(&format!("<{}>{}</{}>", P_TAG_NAME, text_html, P_TAG_NAME));
html.push_str(&convert_nested_block_children_to_html(Arc::new(
self.to_owned(),
)));
if next_block_ty != self.ty {
html.push_str("</blockquote>");
html.push_str(&format!("</{}>", BLOCKQUOTE_TAG_NAME));
}
},
// <p>Hello</p>
PAGE => {
if !text_html.is_empty() {
html.push_str(&format!("<p>{}</p>", text_html));
html.push_str(&format!("<{}>{}</{}>", P_TAG_NAME, text_html, P_TAG_NAME));
}
html.push_str(&convert_nested_block_children_to_html(Arc::new(
self.to_owned(),
)));
},
// <p>Hello</p>
_ => {
html.push_str(&format!("<p>{}</p>", text_html));
html.push_str(&format!("<{}>{}</{}>", P_TAG_NAME, text_html, P_TAG_NAME));
html.push_str(&convert_nested_block_children_to_html(Arc::new(
self.to_owned(),
)));
@ -439,7 +452,7 @@ impl NestedBlock {
let delta_text = self
.data
.get("delta")
.get(DELTA)
.and_then(convert_insert_delta_from_json)
.map(|delta| delta_to_text(&delta))
.unwrap_or_default();
@ -479,3 +492,46 @@ impl NestedBlock {
text
}
}
pub struct ConvertBlockToHtmlParams {
pub prev_block_ty: Option<String>,
pub next_block_ty: Option<String>,
}
#[derive(PartialEq, Eq, Debug, ProtoBuf_Enum, Clone, Default)]
pub enum InputType {
#[default]
Html = 0,
PlainText = 1,
}
#[derive(Default, ProtoBuf, Debug, Validate)]
pub struct ConvertDataToJsonPayloadPB {
#[pb(index = 1)]
#[validate(custom = "required_not_empty_str")]
pub data: String,
#[pb(index = 2)]
pub input_type: InputType,
}
pub struct ConvertDataToJsonParams {
pub data: String,
pub input_type: InputType,
}
#[derive(Default, ProtoBuf, Debug)]
pub struct ConvertDataToJsonResponsePB {
#[pb(index = 1)]
pub json: String,
}
impl TryInto<ConvertDataToJsonParams> for ConvertDataToJsonPayloadPB {
type Error = ErrorCode;
fn try_into(self) -> Result<ConvertDataToJsonParams, Self::Error> {
Ok(ConvertDataToJsonParams {
data: self.data,
input_type: self.input_type,
})
}
}

View File

@ -1,74 +1,11 @@
use crate::parser::constant::DELTA;
use crate::parser::parser_entities::{
ConvertBlockToHtmlParams, InsertDelta, NestedBlock, Selection,
};
use collab_document::blocks::{Block, DocumentData};
use collab_document::blocks::DocumentData;
use serde_json::Value;
use std::collections::HashMap;
use std::sync::Arc;
pub struct ConvertBlockToJsonParams {
pub(crate) blocks: HashMap<String, Arc<Block>>,
pub(crate) relation_map: HashMap<String, Arc<Vec<String>>>,
pub(crate) delta_map: HashMap<String, Vec<InsertDelta>>,
}
pub fn block_to_nested_json(
block_id: &str,
convert_params: &ConvertBlockToJsonParams,
) -> Option<NestedBlock> {
let blocks = &convert_params.blocks;
let relation_map = &convert_params.relation_map;
let delta_map = &convert_params.delta_map;
// Attempt to retrieve the block using the block_id
let block = blocks.get(block_id)?;
// Retrieve the children for this block from the relation map
let children = relation_map.get(&block.id)?;
// Recursively convert children blocks to JSON
let children: Vec<_> = children
.iter()
.filter_map(|child_id| block_to_nested_json(child_id, convert_params))
.collect();
// Clone block data
let mut data = block.data.clone();
// Insert delta into data if available
if let Some(delta) = delta_map.get(&block.id) {
if let Ok(delta_value) = serde_json::to_value(delta) {
data.insert(DELTA.to_string(), delta_value);
}
}
// Create and return the NestedBlock
Some(NestedBlock {
id: block.id.to_string(),
ty: block.ty.to_string(),
children,
data,
})
}
pub fn get_flat_block_ids(block_id: &str, data: &DocumentData) -> Vec<String> {
let blocks = &data.blocks;
let children_map = &data.meta.children_map;
if let Some(block) = blocks.get(block_id) {
let mut result = vec![block.id.clone()];
if let Some(child_ids) = children_map.get(&block.children) {
for child_id in child_ids {
let child_blocks = get_flat_block_ids(child_id, data);
result.extend(child_blocks);
}
return result;
}
}
vec![]
}
use validator::ValidationError;
pub fn get_delta_for_block(block_id: &str, data: &DocumentData) -> Option<Vec<InsertDelta>> {
let text_map = data.meta.text_map.as_ref()?; // Retrieve the text_map reference
@ -165,3 +102,25 @@ pub fn convert_nested_block_children_to_html(block: Arc<NestedBlock>) -> String
pub fn convert_insert_delta_from_json(delta_value: &Value) -> Option<Vec<InsertDelta>> {
serde_json::from_value::<Vec<InsertDelta>>(delta_value.to_owned()).ok()
}
pub fn required_not_empty_str(s: &str) -> Result<(), ValidationError> {
if s.is_empty() {
return Err(ValidationError::new("should not be empty string"));
}
Ok(())
}
pub fn serialize_color_attribute(
attrs: &HashMap<String, Value>,
attr_name: &str,
css_property: &str,
) -> String {
if let Some(color) = attrs.get(attr_name) {
return format!(
"{}: {};",
css_property,
color.to_string().replace("0x", "#").trim_matches('\"')
);
}
"".to_string()
}

View File

@ -1 +1 @@
<meta charset="UTF-8"><ul><li>Highlight</li><p>You can also</p><ul><li>nest</li></ul></ul>
<meta charset="UTF-8"><ul><li>Highlight<p>You can also</p><ul><li>nest</li></ul></li></ul>

View File

@ -1,6 +1,6 @@
<meta charset="UTF-8"><p>🥰
<meta charset="UTF-8"><aside>🥰
Like AppFlowy? Follow us:
<a href="https://github.com/AppFlowy-IO/AppFlowy">GitHub</a>
<a href="https://twitter.com/appflowy">Twitter</a>: @appflowy
<a href="https://blog-appflowy.ghost.io/">Newsletter</a>
</p>
</aside>

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,34 @@
<meta charset='utf-8'><h1>The Notion Document</h1>
<h1>Heading-1</h1>
<h2>Heading - 2</h2>
<h3>Heading - 3</h3>
<p>This is a paragraph</p>
<p>paragraphs child</p>
<ul><li>This is a bulleted list - 1<ul><li>This is a bulleted list - 1 - 1</li></ul></li><li>This is a bulleted list - 2</li></ul>
<p>This is a paragraph</p>
<ul><li>[ ] This is a todo - 1<ul><li>[ ] This is a paragraph - 1-1</li></ul></li></ul>
<ol><li>This is a numbered list -1</li></ol>
<p>This is a paragraph</p>
<ul><li><p>This is a toggle list</p><p>This is a toggle child</p></li>
</ul>
<blockquote><p>This is a quote</p><p>This is a quote child</p></blockquote>
<p>This is a paragraph</p>
<hr>
<pre><code class="language-jsx">// This is the main function.
fn main() {
// Print text to the console.
**println**!(&quot;Hello World!&quot;);
}</code></pre>
<p>This is a paragraph</p>
<p>&lt;aside&gt;
💡 callout</p>
<p>&lt;/aside&gt;</p>
<p>This is a paragraph font-color bg-color <strong>bold</strong> <em>italic underline <s>strike-through</s> <code>inline-code</code> $inline-formula$ <a href="https://www.notion.so/The-Notion-Document-d4236da306b84f6199e4091705042d78?pvs=21">link</a></em></p>
<p>$$
|x| = \begin{cases}
x, &amp;\quad x \geq 0 \\
-x, &amp;\quad x &lt; 0
\end{cases}
$$</p>
<p>End</p>
<!-- notionvc: 0b0229d7-b98a-4e36-8a64-f944de21ef0e -->

View File

@ -1 +1 @@
<meta charset="UTF-8"><ol><li>Highlight</li><p>You can also</p><ol><li>nest</li></ol></ol>
<meta charset="UTF-8"><ol><li>Highlight<p>You can also</p><ol><li>nest</li></ol></li></ol>

View File

@ -1 +1 @@
<meta charset="UTF-8"><ul><li>[x] Highlight</li><p>You can also</p><ul><li>[ ] nest</li></ul></ul>
<meta charset="UTF-8"><ul><li role="checkbox" aria-checked="true">Highlight<p>You can also</p><ul><li role="checkbox" aria-checked="false">nest</li></ul></li></ul>

View File

@ -1 +1 @@
<meta charset="UTF-8"><ul><li>Click <code>?</code> at the bottom right for help and support.</li><p>This is a paragraph</p><ul><li>This is a toggle list</li></ul></ul>
<meta charset="UTF-8"><details><summary>Click <code>?</code> at the bottom right for help and support.</summary><p>This is a paragraph</p><details><summary>This is a toggle list</summary></details></details>

View File

@ -0,0 +1,351 @@
{
"children": [
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "The Notion Document"
}
],
"level": 1,
"text_direction": "ltr"
},
"type": "heading"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "Heading-1"
}
],
"level": 1,
"text_direction": "ltr"
},
"type": "heading"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "Heading - 2"
}
],
"level": 2,
"text_direction": "ltr"
},
"type": "heading"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "Heading - 3"
}
],
"level": 3,
"text_direction": "ltr"
},
"type": "heading"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "Heading - 4"
}
],
"level": 3,
"text_direction": "ltr"
},
"type": "heading"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a paragraph"
}
],
"text_direction": "ltr"
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "paragraphs child"
}
],
"text_direction": "ltr"
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a bulleted list - 1"
}
]
},
"type": "bulleted_list"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a bulleted list - 1 - 1"
}
]
},
"type": "bulleted_list"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a bulleted list - 2"
}
]
},
"type": "bulleted_list"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a paragraph"
}
],
"text_direction": "ltr"
},
"type": "paragraph"
},
{
"children": [
{
"children": [],
"data": {
"url": ""
},
"type": "image"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a todo - 1"
}
],
"text_direction": "ltr"
},
"type": "paragraph"
}
],
"data": {
"checked": false,
"text_direction": "ltr"
},
"type": "todo_list"
},
{
"children": [
{
"children": [],
"data": {
"url": ""
},
"type": "image"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a todo - 1-1"
}
],
"text_direction": "ltr"
},
"type": "paragraph"
}
],
"data": {
"checked": false,
"text_direction": "ltr"
},
"type": "todo_list"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a paragraph"
}
],
"text_direction": "ltr"
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a numbered list -1"
}
]
},
"type": "numbered_list"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a numbered list -2"
}
]
},
"type": "numbered_list"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a numbered list-1-1"
}
]
},
"type": "numbered_list"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a paragraph"
}
],
"text_direction": "ltr"
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": {
"bg_color": "transparent",
"font_color": "#000000"
},
"insert": "This is a paragraph"
}
],
"text_direction": "ltr"
},
"type": "paragraph"
},
{
"children": [],
"data": {},
"type": "divider"
},
{
"children": [],
"data": {},
"type": "paragraph"
}
],
"data": {},
"type": "page"
}

View File

@ -0,0 +1,371 @@
{
"type": "page",
"data": {},
"children": [
{
"type": "heading",
"data": {
"delta": [
{
"attributes": null,
"insert": "The Notion Document"
}
],
"level": 1
},
"children": []
},
{
"type": "heading",
"data": {
"level": 1,
"delta": [
{
"attributes": null,
"insert": "Heading-1"
}
]
},
"children": []
},
{
"type": "heading",
"data": {
"level": 2,
"delta": [
{
"attributes": null,
"insert": "Heading - 2"
}
]
},
"children": []
},
{
"type": "heading",
"data": {
"level": 3,
"delta": [
{
"attributes": null,
"insert": "Heading - 3"
}
]
},
"children": []
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph"
}
]
},
"children": []
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "paragraphs child"
}
]
},
"children": []
},
{
"type": "bulleted_list",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a bulleted list - 1"
}
]
},
"children": [
{
"type": "bulleted_list",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a bulleted list - 1 - 1"
}
]
},
"children": []
}
]
},
{
"type": "bulleted_list",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a bulleted list - 2"
}
]
},
"children": []
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph"
}
]
},
"children": []
},
{
"type": "bulleted_list",
"data": {
"delta": [
{
"attributes": null,
"insert": "[ ] This is a todo - 1"
}
]
},
"children": [
{
"type": "bulleted_list",
"data": {
"delta": [
{
"attributes": null,
"insert": "[ ] This is a paragraph - 1-1"
}
]
},
"children": []
}
]
},
{
"type": "numbered_list",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a numbered list -1"
}
]
},
"children": []
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph"
}
]
},
"children": []
},
{
"type": "bulleted_list",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a toggle list"
}
]
},
"children": [
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a toggle child"
}
]
},
"children": []
}
]
},
{
"type": "quote",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a quote"
}
]
},
"children": [
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a quote child"
}
]
},
"children": []
}
]
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph"
}
]
},
"children": []
},
{
"type": "divider",
"data": {},
"children": []
},
{
"type": "code",
"data": {
"delta": [
{
"attributes": null,
"insert": "// This is the main function.\nfn main() {\n // Print text to the console.\n **println**!(\"Hello World!\");\n}"
}
],
"language": "jsx"
},
"children": []
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph"
}
]
},
"children": []
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "<aside>\n 💡 callout"
}
]
},
"children": []
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "</aside>"
}
]
},
"children": []
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph font-color bg-color "
},
{
"attributes": {
"bold": true
},
"insert": "bold"
},
{
"attributes": {
"italic": true
},
"insert": "italic underline "
},
{
"attributes": {
"italic": true,
"strikethrough": true
},
"insert": "strike-through"
},
{
"attributes": {
"code": true,
"italic": true
},
"insert": "inline-code"
},
{
"attributes": {
"italic": true
},
"insert": " $inline-formula$ "
},
{
"attributes": {
"href": "https://www.notion.so/The-Notion-Document-d4236da306b84f6199e4091705042d78?pvs=21",
"italic": true
},
"insert": "link"
}
]
},
"children": []
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "$$\n |x| = \\begin{cases}\n x, &\\quad x \\geq 0 \\\\\n -x, &\\quad x < 0\n \\end{cases}\n $$"
}
]
},
"children": []
},
{
"type": "paragraph",
"data": {
"delta": [
{
"attributes": null,
"insert": "End"
}
]
},
"children": []
}
]
}

View File

@ -0,0 +1,510 @@
{
"children": [
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "# The Notion Document"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "# Heading-1"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "## Heading - 2"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "### Heading - 3"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "paragraphs child"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "- This is a bulleted list - 1"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": " - This is a bulleted list - 1 - 1"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "- This is a bulleted list - 2"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "- [ ] This is a todo - 1"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": " - [ ] This is a paragraph - 1-1"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "1. This is a numbered list -1"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "- This is a toggle list"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": " This is a toggle child"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "> This is a quote"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": ">"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": ">"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "> This is a quote child"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": ">"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "---"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "```jsx"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "// This is the main function."
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "fn main() {"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": " // Print text to the console."
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": " **println**!(\"Hello World!\");"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "}"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "```"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "<aside>"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "💡 callout"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "</aside>"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "This is a paragraph font-color bg-color **bold** *italic underline ~~strike-through~~ `inline-code` $inline-formula$ [link](https://www.notion.so/The-Notion-Document-d4236da306b84f6199e4091705042d78?pvs=21)*"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "$$"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "|x| = \\begin{cases}             "
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "  x, &\\quad x \\geq 0 \\\\           "
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": " -x, &\\quad x < 0             "
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "\\end{cases}"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "$$"
}
]
},
"type": "paragraph"
},
{
"children": [],
"data": {
"delta": [
{
"attributes": null,
"insert": "End"
}
]
},
"type": "paragraph"
}
],
"data": {},
"type": "page"
}

View File

@ -0,0 +1,64 @@
# The Notion Document
# Heading-1
## Heading - 2
### Heading - 3
This is a paragraph
paragraphs child
- This is a bulleted list - 1
- This is a bulleted list - 1 - 1
- This is a bulleted list - 2
This is a paragraph
- [ ] This is a todo - 1
- [ ] This is a paragraph - 1-1
1. This is a numbered list -1
This is a paragraph
- This is a toggle list
This is a toggle child
> This is a quote
>
>
> This is a quote child
>
This is a paragraph
---
```jsx
// This is the main function.
fn main() {
// Print text to the console.
**println**!("Hello World!");
}
```
This is a paragraph
<aside>
💡 callout
</aside>
This is a paragraph font-color bg-color **bold** *italic underline ~~strike-through~~ `inline-code` $inline-formula$ [link](https://www.notion.so/The-Notion-Document-d4236da306b84f6199e4091705042d78?pvs=21)*
$$
|x| = \begin{cases}             
  x, &\quad x \geq 0 \\           
 -x, &\quad x < 0             
\end{cases}
$$
End

View File

@ -0,0 +1 @@
mod parser_test;

View File

@ -0,0 +1,45 @@
use flowy_document2::parser::external::parser::ExternalDataToNestedJSONParser;
use flowy_document2::parser::parser_entities::{InputType, NestedBlock};
macro_rules! generate_test_cases {
($($ty:ident),*) => {
[
$(
(
include_str!(concat!("../../assets/json/", stringify!($ty), ".json")),
include_str!(concat!("../../assets/html/", stringify!($ty), ".html")),
)
),*
]
};
}
/// test convert data to json
/// - input html: <p>Hello</p><p> World!</p>
#[tokio::test]
async fn html_to_document_test() {
let test_cases = generate_test_cases!(notion, google_docs);
for (json, html) in test_cases.iter() {
let parser = ExternalDataToNestedJSONParser::new(html.to_string(), InputType::Html);
let block = parser.to_nested_block();
assert!(block.is_some());
let block = block.unwrap();
let expect_block = serde_json::from_str::<NestedBlock>(json).unwrap();
assert_eq!(block, expect_block);
}
}
/// test convert data to json
/// - input plain text: Hello World!
#[tokio::test]
async fn plain_text_to_document_test() {
let plain_text = include_str!("../../assets/text/plain_text.txt");
let parser = ExternalDataToNestedJSONParser::new(plain_text.to_string(), InputType::PlainText);
let block = parser.to_nested_block();
assert!(block.is_some());
let block = block.unwrap();
let expect_json = include_str!("../../assets/json/plain_text.json");
let expect_block = serde_json::from_str::<NestedBlock>(expect_json).unwrap();
assert_eq!(block, expect_block);
}

View File

@ -1,3 +1,4 @@
mod document_data_parser_test;
mod html_text;
mod html;
mod json;
mod parse_to_html_text;

View File

@ -1,4 +1,4 @@
use crate::parser::html_text::utils::{assert_document_html_eq, assert_document_text_eq};
use crate::parser::parse_to_html_text::utils::{assert_document_html_eq, assert_document_text_eq};
macro_rules! generate_test_cases {
($($block_ty:ident),*) => {