From 9ab4f45e72b972de18b44f532e5cea4ab0284872 Mon Sep 17 00:00:00 2001 From: Adam Obuchowicz Date: Thu, 25 Nov 2021 11:45:42 +0100 Subject: [PATCH] Refactored `enso-data` crate and text utilities. (#3166) --- Cargo.lock | 40 +- Cargo.toml | 3 +- app/gui/CHANGELOG.md | 4 + app/gui/Cargo.toml | 3 +- .../double-representation/Cargo.toml | 3 +- .../double-representation/src/module.rs | 5 +- .../double-representation/src/text.rs | 165 +++-- app/gui/controller/engine-protocol/Cargo.toml | 3 +- .../src/language_server/types.rs | 67 +- .../controller/engine-protocol/src/types.rs | 10 + app/gui/language/ast/impl/Cargo.toml | 7 +- app/gui/language/ast/impl/src/crumbs.rs | 20 +- app/gui/language/ast/impl/src/id_map.rs | 104 +++ app/gui/language/ast/impl/src/lib.rs | 305 +++++---- app/gui/language/ast/impl/src/test_utils.rs | 5 +- app/gui/language/ast/impl/src/traits.rs | 1 - app/gui/language/parser/Cargo.toml | 3 +- app/gui/language/parser/src/api.rs | 73 +- app/gui/language/parser/src/jsclient.rs | 7 +- app/gui/language/parser/src/test_utils.rs | 3 +- app/gui/language/parser/src/wsclient.rs | 13 +- app/gui/language/parser/tests/bugs.rs | 3 +- app/gui/language/span-tree/Cargo.toml | 5 +- app/gui/language/span-tree/example/Cargo.toml | 1 + app/gui/language/span-tree/example/src/lib.rs | 24 +- app/gui/language/span-tree/src/action.rs | 17 +- app/gui/language/span-tree/src/builder.rs | 10 +- app/gui/language/span-tree/src/generate.rs | 12 +- app/gui/language/span-tree/src/node.rs | 91 +-- app/gui/src/controller/graph.rs | 6 +- app/gui/src/controller/module.rs | 15 +- app/gui/src/controller/searcher.rs | 19 +- app/gui/src/controller/text.rs | 8 +- app/gui/src/ide/integration/project.rs | 8 +- app/gui/src/model/module.rs | 13 +- app/gui/src/model/module/plain.rs | 29 +- app/gui/src/model/module/synchronized.rs | 130 ++-- app/gui/src/model/suggestion_database.rs | 15 +- .../src/model/suggestion_database/entry.rs | 6 +- app/gui/view/graph-editor/Cargo.toml | 1 + .../src/component/node/input/area.rs | 88 +-- .../src/component/node/input/port.rs | 14 +- .../src/component/node/output/area.rs | 4 +- .../src/component/node/output/port.rs | 5 +- lib/rust/{data => data-structures}/Cargo.toml | 2 +- lib/rust/{data => data-structures}/README.md | 2 +- .../benches/bench_tree_query.rs | 2 +- .../src/dependency_graph.rs | 2 +- .../{data => data-structures}/src/diet.rs | 0 .../src/hash_map_tree.rs | 0 .../{data => data-structures}/src/index.rs | 24 +- lib/rust/{data => data-structures}/src/lib.rs | 1 - .../{data => data-structures}/src/opt_vec.rs | 0 lib/rust/data/src/text.rs | 634 ------------------ lib/rust/ensogl/component/text/Cargo.toml | 1 + lib/rust/ensogl/component/text/src/buffer.rs | 20 +- .../ensogl/component/text/src/buffer/data.rs | 17 - .../ensogl/component/text/src/buffer/style.rs | 8 +- .../ensogl/component/text/src/buffer/view.rs | 16 +- .../text/src/buffer/view/movement.rs | 5 +- .../text/src/buffer/view/selection.rs | 4 +- .../component/text/src/buffer/view/word.rs | 6 +- .../component/text/src/component/area.rs | 16 +- lib/rust/ensogl/component/text/src/lib.rs | 64 +- lib/rust/ensogl/core/Cargo.toml | 2 +- lib/rust/ensogl/core/src/data.rs | 10 +- .../ensogl/core/src/display/scene/layer.rs | 2 +- lib/rust/ensogl/core/src/lib.rs | 2 +- .../core/src/system/gpu/data/attribute.rs | 8 +- lib/rust/ensogl/example/Cargo.toml | 1 + lib/rust/ensogl/example/src/list_view.rs | 2 +- lib/rust/parser/Cargo.toml | 2 +- lib/rust/parser/src/macros/registry.rs | 2 +- lib/rust/text/Cargo.toml | 14 + lib/rust/text/src/lib.rs | 109 +++ .../src/buffer/data => text/src}/range.rs | 66 +- .../text/src/buffer/data => text/src}/rope.rs | 0 .../src/buffer/data => text/src}/spans.rs | 6 +- .../text/src/buffer/data => text/src}/text.rs | 121 +++- .../text/src/buffer/data => text/src}/unit.rs | 72 +- lib/rust/types/src/unit.rs | 39 ++ lib/rust/web/Cargo.toml | 2 +- 82 files changed, 1262 insertions(+), 1390 deletions(-) create mode 100644 app/gui/language/ast/impl/src/id_map.rs rename lib/rust/{data => data-structures}/Cargo.toml (95%) rename lib/rust/{data => data-structures}/README.md (64%) rename lib/rust/{data => data-structures}/benches/bench_tree_query.rs (98%) rename lib/rust/{data => data-structures}/src/dependency_graph.rs (99%) rename lib/rust/{data => data-structures}/src/diet.rs (100%) rename lib/rust/{data => data-structures}/src/hash_map_tree.rs (100%) rename lib/rust/{data => data-structures}/src/index.rs (66%) rename lib/rust/{data => data-structures}/src/lib.rs (97%) rename lib/rust/{data => data-structures}/src/opt_vec.rs (100%) delete mode 100644 lib/rust/data/src/text.rs delete mode 100644 lib/rust/ensogl/component/text/src/buffer/data.rs create mode 100644 lib/rust/text/Cargo.toml create mode 100644 lib/rust/text/src/lib.rs rename lib/rust/{ensogl/component/text/src/buffer/data => text/src}/range.rs (67%) rename lib/rust/{ensogl/component/text/src/buffer/data => text/src}/rope.rs (100%) rename lib/rust/{ensogl/component/text/src/buffer/data => text/src}/spans.rs (96%) rename lib/rust/{ensogl/component/text/src/buffer/data => text/src}/text.rs (88%) rename lib/rust/{ensogl/component/text/src/buffer/data => text/src}/unit.rs (60%) diff --git a/Cargo.lock b/Cargo.lock index 5b22b5dc8b..edcf8e942b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,9 +89,10 @@ version = "0.1.0" dependencies = [ "ast-macros", "derive_more", - "enso-data", + "enso-data-structures", "enso-prelude", "enso-shapely", + "enso-text", "failure", "lazy_static", "regex", @@ -714,9 +715,10 @@ version = "0.1.0" dependencies = [ "ast", "engine-protocol", - "enso-data", + "enso-data-structures", "enso-logger", "enso-prelude", + "enso-text", "failure", "itertools 0.10.1", "parser", @@ -770,10 +772,11 @@ dependencies = [ "bytes 0.5.6", "chrono", "enso-build-utilities", - "enso-data", + "enso-data-structures", "enso-logger", "enso-prelude", "enso-shapely", + "enso-text", "enso-web", "failure", "flatbuffers", @@ -827,7 +830,7 @@ dependencies = [ ] [[package]] -name = "enso-data" +name = "enso-data-structures" version = "0.2.0" dependencies = [ "criterion", @@ -895,11 +898,12 @@ dependencies = [ "engine-protocol", "enso-callback", "enso-config", - "enso-data", + "enso-data-structures", "enso-frp", "enso-logger", "enso-prelude", "enso-shapely", + "enso-text", "enso-web", "ensogl", "ensogl-drop-manager", @@ -1070,6 +1074,7 @@ dependencies = [ "ast", "enso-logger", "enso-prelude", + "enso-text", "enso-web", "span-tree", "uuid", @@ -1077,6 +1082,16 @@ dependencies = [ "web-sys", ] +[[package]] +name = "enso-text" +version = "0.1.0" +dependencies = [ + "enso-prelude", + "enso-types", + "serde", + "xi-rope", +] + [[package]] name = "enso-types" version = "0.1.0" @@ -1092,7 +1107,7 @@ version = "0.1.0" dependencies = [ "async-std", "console_error_panic_hook", - "enso-data", + "enso-data-structures", "enso-logger", "enso-prelude", "failure", @@ -1126,7 +1141,7 @@ dependencies = [ "code-builder", "console_error_panic_hook", "enso-callback", - "enso-data", + "enso-data-structures", "enso-frp", "enso-generics", "enso-logger", @@ -1177,6 +1192,7 @@ dependencies = [ "enso-logger", "enso-prelude", "enso-shapely", + "enso-text", "ensogl-core", "ensogl-drop-manager", "ensogl-gui-component", @@ -1222,6 +1238,7 @@ dependencies = [ "enso-frp", "enso-prelude", "enso-shapely", + "enso-text", "enso-types", "ensogl-core", "ensogl-hardcoded-theme", @@ -1791,6 +1808,7 @@ dependencies = [ "enso-logger", "enso-prelude", "enso-shapely", + "enso-text", "ensogl", "ensogl-drop-manager", "ensogl-gui-component", @@ -2591,8 +2609,9 @@ dependencies = [ "bytes 0.5.6", "console_error_panic_hook", "enso-build-utilities", - "enso-data", + "enso-data-structures", "enso-prelude", + "enso-text", "failure", "futures 0.3.17", "js-sys", @@ -2621,7 +2640,7 @@ name = "parser-new" version = "0.1.0" dependencies = [ "criterion", - "enso-data", + "enso-data-structures", "enso-logger", "enso-prelude", "itertools 0.10.1", @@ -3477,8 +3496,9 @@ name = "span-tree" version = "0.1.0" dependencies = [ "ast", - "enso-data", + "enso-data-structures", "enso-prelude", + "enso-text", "failure", "parser", "wasm-bindgen-test 0.3.8", diff --git a/Cargo.toml b/Cargo.toml index ad8c436985..3b507c883f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ members = [ "lib/rust/callback", "lib/rust/code-builder", "lib/rust/config-reader", - "lib/rust/data", + "lib/rust/data-structures", "lib/rust/ensogl", "lib/rust/ensogl/app/theme/hardcoded", "lib/rust/ensogl/core", @@ -54,6 +54,7 @@ members = [ "lib/rust/shapely/macros", "lib/rust/shortcuts", "lib/rust/shortcuts/example", + "lib/rust/text", "lib/rust/types", "lib/rust/web", "lib/rust/not-used/eval-tt", diff --git a/app/gui/CHANGELOG.md b/app/gui/CHANGELOG.md index 0c76717a85..1123f88cc6 100644 --- a/app/gui/CHANGELOG.md +++ b/app/gui/CHANGELOG.md @@ -3,6 +3,10 @@ #### Visual Environment - [Fixed histograms coloring and added a color legend.][3153] +- [Fixed broken node whose expression contains non-ASCII characters.][3166] + +[3153]: https://github.com/enso-org/enso/pull/3153 +[3166]: https://github.com/enso-org/enso/pull/3166 # Enso 2.0.0-alpha.18 (2021-10-12) diff --git a/app/gui/Cargo.toml b/app/gui/Cargo.toml index 6742d25b36..10170c6950 100644 --- a/app/gui/Cargo.toml +++ b/app/gui/Cargo.toml @@ -12,11 +12,12 @@ analytics = { version = "0.1.0", path = "analytics" } double-representation = { version = "0.1.0", path = "controller/double-representation" } enso-config = { path = "config" } enso-callback = { path = "../../lib/rust/callback" } -enso-data = { path = "../../lib/rust/data"} +enso-data-structures = { path = "../../lib/rust/data-structures" } enso-frp = { path = "../../lib/rust/frp" } enso-logger = { path = "../../lib/rust/logger"} enso-prelude = { path = "../../lib/rust/prelude"} enso-shapely = { path = "../../lib/rust/shapely/impl"} +enso-text = { path = "../../lib/rust/text" } enso-web = { path = "../../lib/rust/web" } ensogl = { path = "../../lib/rust/ensogl" } ensogl-examples = { path = "../../lib/rust/ensogl/example" } diff --git a/app/gui/controller/double-representation/Cargo.toml b/app/gui/controller/double-representation/Cargo.toml index 8b34018637..de8e988dba 100644 --- a/app/gui/controller/double-representation/Cargo.toml +++ b/app/gui/controller/double-representation/Cargo.toml @@ -11,9 +11,10 @@ crate-type = ["cdylib", "rlib"] ast = { version = "0.1.0", path = "../../language/ast/impl" } engine-protocol = { version = "0.1.0", path = "../engine-protocol" } parser = { version = "0.1.0", path = "../../language/parser" } -enso-data = { path = "../../../../lib/rust/data"} +enso-data-structures = { path = "../../../../lib/rust/data-structures" } enso-logger = { path = "../../../../lib/rust/logger"} enso-prelude = { path = "../../../../lib/rust/prelude"} +enso-text = { path = "../../../../lib/rust/text" } failure = { version = "0.1.6" } itertools = { version = "0.10.0" } serde = { version = "1.0", features = ["derive"] } diff --git a/app/gui/controller/double-representation/src/module.rs b/app/gui/controller/double-representation/src/module.rs index 370582d279..f49be85f64 100644 --- a/app/gui/controller/double-representation/src/module.rs +++ b/app/gui/controller/double-representation/src/module.rs @@ -19,6 +19,7 @@ use ast::crumbs::ModuleCrumb; use ast::known; use ast::BlockLine; use engine_protocol::language_server; +use enso_text::unit::*; use serde::Deserialize; use serde::Serialize; @@ -754,9 +755,9 @@ pub fn lookup_method( pub fn definition_span( ast: &known::Module, id: &definition::Id, -) -> FallibleResult { +) -> FallibleResult> { let location = locate(ast, id)?; - ast.span_of_descendent_at(&location.crumbs) + ast.range_of_descendant_at(&location.crumbs) } impl DefinitionProvider for known::Module { diff --git a/app/gui/controller/double-representation/src/text.rs b/app/gui/controller/double-representation/src/text.rs index 0d2e51ae04..38033fcb26 100644 --- a/app/gui/controller/double-representation/src/text.rs +++ b/app/gui/controller/double-representation/src/text.rs @@ -3,8 +3,7 @@ use crate::prelude::*; use ast::IdMap; -use enso_data::text::Size; -use enso_data::text::Span; +use enso_text::unit::*; @@ -15,7 +14,7 @@ use enso_data::text::Span; /// Update IdMap to reflect the recent code change. pub fn apply_code_change_to_id_map( id_map: &mut IdMap, - change: &enso_data::text::TextChange, + change: &enso_text::text::Change, code: &str, ) { // TODO [mwu] @@ -26,35 +25,33 @@ pub fn apply_code_change_to_id_map( // API. Because of such expected rewrite and deeper restructuring, we don't really want to // spend much time on refactoring this function right now, even if it could be made nicer. - - let removed = change.replaced_span(); - let inserted = change.inserted.as_str(); - let new_code = change.applied(code); + let removed = &change.range.clone(); + let inserted = change.text.as_str(); + let new_code = change.applied(code).unwrap_or_else(|_| code.to_owned()); let non_white = |c: char| !c.is_whitespace(); let logger = enso_logger::DefaultWarningLogger::new("apply_code_change_to_id_map"); let vector = &mut id_map.vec; - let inserted_size = Size::from_text(inserted); + let inserted_size: Bytes = inserted.len().into(); info!(logger, "Old code:\n```\n{code}\n```"); info!(logger, "New code:\n```\n{new_code}\n```"); info!(logger, "Updating the ID map with the following text edit: {change:?}."); // Remove all entries fully covered by the removed span. - vector.drain_filter(|(span, _)| removed.contains_span(span)); + vector.drain_filter(|(range, _)| removed.contains_range(range)); // If the edited section ends up being the trailing part of AST node, how many bytes should be // trimmed from the id. Precalculated, as is constant in the loop below. - let to_trim_back = { + let to_trim_back: Bytes = { let last_non_white = inserted.rfind(non_white); let inserted_len = || inserted.len(); let length_to_last_non_white = |index| inserted.len() - index - 1; - Size::new(last_non_white.map_or_else(inserted_len, length_to_last_non_white)) + last_non_white.map_or_else(inserted_len, length_to_last_non_white).into() }; // As above but for the front side. - let to_trim_front = { + let to_trim_front: Bytes = { let first_non_white = inserted.find(non_white); - let ret = first_non_white.unwrap_or_else(|| inserted.len()); - Size::new(ret) + first_non_white.unwrap_or_else(|| inserted.len()).into() }; let inserted_non_white = inserted.chars().any(non_white); @@ -65,95 +62,98 @@ pub fn apply_code_change_to_id_map( // This is needed for edits like: `foo f` => `foo` — the earlier `foo` in `foo f` also has a // id map entry, however we want it to be consistently shadowed by the id from the whole App // expression. - let mut preferred: HashMap = default(); + let mut preferred: HashMap, ast::Id> = default(); - for (span, id) in vector.iter_mut() { - // These + for (range, id) in vector.iter_mut() { let mut trim_front = false; let mut trim_back = false; - let initial_span = *span; - info!(logger, "Processing @{span}: `{&code[*span]}`."); - if span.index > removed.end() { + let initial_range = *range; + info!(logger, "Processing @{range}: `{&code[*range]}`."); + if range.start > removed.end { debug!(logger, "Node after the edited region."); // AST node starts after edited region — it will be simply shifted. - let code_between = &code[Span::from(removed.end()..span.index)]; - span.move_left(removed.size); - span.move_right(inserted_size); + let between_range: enso_text::Range<_> = (removed.end..range.start).into(); + let code_between = &code[between_range]; + *range = range.moved_left(removed.size()).moved_right(inserted_size); // If there are only spaces between current AST symbol and insertion, extend the symbol. // This is for cases like line with `foo ` being changed into `foo j`. debug!(logger, "Between: `{code_between}`."); if all_spaces(code_between) && inserted_non_white { debug!(logger, "Will extend the node leftwards."); - span.extend_left(inserted_size); - span.extend_left(Size::from_text(code_between)); + range.start -= inserted_size + between_range.size(); trim_front = true; } - } else if span.index >= removed.index { - // AST node starts inside the edited region. It doesn't end strictly inside it. + } else if range.start >= removed.start { + // AST node starts inside the edited region. It does not have to end inside it. debug!(logger, "Node overlapping with the end of the edited region."); - let removed_before = span.index - removed.index; - span.move_left(removed_before); - span.shrink_right(removed.size - removed_before); - span.extend_right(inserted_size); + let removed_before = range.start - removed.start; + *range = range.moved_left(removed_before); + range.end -= removed.size() - removed_before; + range.end += inserted_size; trim_front = true; - } else if span.end() >= removed.index { + } else if range.end >= removed.start { // AST node starts before the edited region and reaches (or possibly goes past) its end. debug!(logger, "Node overlapping with the beginning of the edited region."); - if span.end() <= removed.end() { + if range.end <= removed.end { trim_back = true; } - let removed_chars = (span.end() - removed.index).min(removed.size); - span.shrink_right(removed_chars); - span.extend_right(inserted_size); + let removed_chars = (range.end - removed.start).min(removed.size()); + range.end -= removed_chars; + range.end += inserted_size; } else { debug!(logger, "Node before the edited region."); // If there are only spaces between current AST symbol and insertion, extend the symbol. // This is for cases like line with `foo ` being changed into `foo j`. - let between = &code[Span::from(span.end()..removed.index)]; + let between_range: enso_text::Range<_> = (range.end..removed.start).into(); + let between = &code[between_range]; if all_spaces(between) && inserted_non_white { debug!(logger, "Will extend "); - span.size += Size::from_text(between) + inserted_size; + range.end += between_range.size() + inserted_size; trim_back = true; } } - if trim_front && to_trim_front.non_empty() { - span.shrink_left(to_trim_front); - debug!(logger, "Trimming front {to_trim_front} bytes."); + if trim_front && to_trim_front > 0.bytes() { + range.start += to_trim_front; + debug!(logger, "Trimming front {to_trim_front.as_usize()} chars."); } if trim_back { - if to_trim_back.non_empty() { - span.shrink_right(to_trim_back); - debug!(logger, "Trimming back {to_trim_back} bytes."); + if to_trim_back > 0.bytes() { + range.end += -to_trim_back; + debug!(logger, "Trimming back {to_trim_back.as_usize()} chars."); } - let new_repr = &new_code[*span]; + let new_repr = &new_code[*range]; // Trim trailing spaces - let spaces = spaces_size(new_repr.chars().rev()); - if spaces.non_empty() { - debug!(logger, "Additionally trimming {spaces} trailing spaces."); + let space_count = spaces_size(new_repr.chars().rev()); + let spaces_len: Bytes = (space_count.as_usize() * ' '.len_utf8()).into(); + if spaces_len > 0.bytes() { + debug!(logger, "Additionally trimming {space_count.as_usize()} trailing spaces."); debug!(logger, "The would-be code: `{new_repr}`."); - span.shrink_right(spaces); + range.end -= spaces_len; } } // If we edited front or end of an AST node, its extended (or shrunk) span will be // preferred. if trim_front || trim_back { - preferred.insert(*span, *id); + preferred.insert(*range, *id); } - info!( - logger, - "Processing for id {id}: {initial_span} ->\t{span}.\n\ - Code: `{&code[initial_span]}` => `{&new_code[*span]}`" - ); + info!(logger, || { + let old_fragment = &code[initial_range]; + let new_fragment = &new_code[*range]; + iformat!( + "Processing for id {id}: {initial_range} ->\t{range}.\n + Code: `{old_fragment}` => `{new_fragment}`" + ) + }); } // If non-preferred entry collides with the preferred one, remove the former. - vector.drain_filter(|(span, id)| { - preferred.get(span).map(|preferred_id| id != preferred_id).unwrap_or(false) + vector.drain_filter(|(range, id)| { + preferred.get(range).map(|preferred_id| id != preferred_id).unwrap_or(false) }); } @@ -163,9 +163,9 @@ pub fn apply_code_change_to_id_map( // === Helpers === // =============== -/// Returns the byte length of leading space characters sequence. -fn spaces_size(itr: impl Iterator) -> Size { - Size::new(itr.take_while(|c| *c == ' ').fold(0, |acc, _| acc + 1)) +/// Returns the chars count of leading space characters sequence. +fn spaces_size(itr: impl Iterator) -> Chars { + itr.take_while(|c| *c == ' ').fold(0, |acc, _| acc + 1).into() } /// Checks if the given string slice contains only space charactesr. @@ -186,8 +186,6 @@ mod test { use crate::module; use ast::HasIdMap; - use enso_data::text::Index; - use enso_data::text::TextChange; use enso_prelude::default; use parser::Parser; use uuid::Uuid; @@ -195,11 +193,12 @@ mod test { /// A sample text edit used to test "text api" properties. /// /// See `from_markdown` constructor function for helper markdown description. + #[derive(Debug)] struct Case { /// The initial enso program code. pub code: String, /// The edit made to the initial code. - pub change: TextChange, + pub change: enso_text::Change, } impl Case { @@ -227,11 +226,9 @@ mod test { let inserted_code = insertion.map_or("", |insertion| { &marked_code[insertion + INSERTION.len_utf8()..end] }); - let removed_span = Range { - start: Index::new(start), - end: Index::new(erased_finish - START.len_utf8()), - }; - let change = TextChange::replace(removed_span, inserted_code.to_string()); + let range_end = (erased_finish - START.len_utf8()).into(); + let range = enso_text::Range::new(start.into(), range_end); + let change = enso_text::Change { range, text: inserted_code.to_string() }; Case { code, change } } _ => panic!("Invalid markdown in the marked code: {}.", marked_code), @@ -240,7 +237,7 @@ mod test { /// Code after applying the change fn resulting_code(&self) -> String { - self.change.applied(&self.code) + self.change.applied(&self.code).expect("Change removed range out of bounds") } /// Checks if the text operation described by this case keeps the node IDs intact. @@ -254,7 +251,17 @@ mod test { let code2 = self.resulting_code(); let ast2 = parser.parse_module(&code2, id_map.clone()).unwrap(); - assert_same_node_ids(&ast1, &ast2); + self.assert_same_node_ids(&ast1, &ast2); + } + + /// Checks that both module AST contain `main` function that has the same sequence of node + /// IDs, as described by the `main_nodes` function. + fn assert_same_node_ids(&self, ast1: &ast::known::Module, ast2: &ast::known::Module) { + let ids1 = main_nodes(ast1); + let ids2 = main_nodes(ast2); + DEBUG!("IDs1: {ids1:?}"); + DEBUG!("IDs2: {ids2:?}"); + assert_eq!(ids1, ids2, "Node ids mismatch in {:?}", self); } } @@ -279,28 +286,18 @@ mod test { nodes.into_iter().map(|node| node.id()).collect() } - /// Checks that both module AST contain `main` function that has the same sequence of node IDs, - /// as described by the `main_nodes` function. - fn assert_same_node_ids(ast1: &ast::known::Module, ast2: &ast::known::Module) { - let ids1 = main_nodes(ast1); - let ids2 = main_nodes(ast2); - DEBUG!("IDs1: {ids1:?}"); - DEBUG!("IDs2: {ids2:?}"); - assert_eq!(ids1, ids2); - } - #[test] fn test_case_markdown() { let case = Case::from_markdown("foo«aa⎀bb»c"); assert_eq!(case.code, "fooaac"); - assert_eq!(case.change.inserted, "bb"); - assert_eq!(case.change.replaced, Index::new(3)..Index::new(5)); + assert_eq!(case.change.text, "bb"); + assert_eq!(case.change.range, 3.bytes()..5.bytes()); assert_eq!(case.resulting_code(), "foobbc"); let case = Case::from_markdown("foo«aa»c"); assert_eq!(case.code, "fooaac"); - assert_eq!(case.change.inserted, ""); - assert_eq!(case.change.replaced, Index::new(3)..Index::new(5)); + assert_eq!(case.change.text, ""); + assert_eq!(case.change.range, 3.bytes()..5.bytes()); assert_eq!(case.resulting_code(), "fooc"); } diff --git a/app/gui/controller/engine-protocol/Cargo.toml b/app/gui/controller/engine-protocol/Cargo.toml index fe501461c4..63417c4236 100644 --- a/app/gui/controller/engine-protocol/Cargo.toml +++ b/app/gui/controller/engine-protocol/Cargo.toml @@ -8,10 +8,11 @@ edition = "2018" crate-type = ["cdylib", "rlib"] [dependencies] -enso-data = { path = "../../../../lib/rust/data"} +enso-data-structures = { path = "../../../../lib/rust/data-structures" } enso-logger = { path = "../../../../lib/rust/logger"} enso-prelude = { path = "../../../../lib/rust/prelude"} enso-shapely = { path = "../../../../lib/rust/shapely/impl"} +enso-text = { path = "../../../../lib/rust/text"} json-rpc = { path = "../../../../lib/rust/json-rpc" } chrono = { version = "0.4", features = ["serde"] } failure = { version = "0.1.8" } diff --git a/app/gui/controller/engine-protocol/src/language_server/types.rs b/app/gui/controller/engine-protocol/src/language_server/types.rs index c4cba7e759..2536bca011 100644 --- a/app/gui/controller/engine-protocol/src/language_server/types.rs +++ b/app/gui/controller/engine-protocol/src/language_server/types.rs @@ -1,6 +1,6 @@ //! This module contains language server types. -use super::*; +use crate::language_server::*; @@ -456,17 +456,17 @@ pub struct Position { pub character: usize, } -impls! { From + &From for Position { |location| +impls! { From + &From for Position { |location| Position { - line : location.line, - character : location.column, + line: location.line.as_usize(), + character: location.column.as_usize(), } }} -impls! { From + &From for enso_data::text::TextLocation { |position| - enso_data::text::TextLocation { - line : position.line, - column : position.character, +impls! { From + &From for enso_text::Location { |position| + enso_text::Location { + line: position.line.into(), + column: position.character.into(), } }} @@ -484,15 +484,15 @@ pub struct TextRange { pub end: Position, } -impls! { From + &From > for TextRange { |range| +impls! { From + &From > for TextRange { |range| TextRange { start : range.start.into(), end : range.end.into(), } }} -impls! { From + &From for Range { |range| - range.start.into()..range.end.into() +impls! { From + &From for enso_text::Range { |range| + enso_text::Range::new(range.start.into(), range.end.into()) }} @@ -513,12 +513,11 @@ pub struct TextEdit { impl TextEdit { /// Compute an edit that represents the difference between the two given strings based on their /// common pre- and postfix. This is an approximation of the diff between the two strings that - /// assumes that anythign between the common prefix and the common post-fix has changed. + /// assumes that anything between the common prefix and the common post-fix has changed. /// /// Example: /// ``` /// # use engine_protocol::language_server::{TextEdit, Position, TextRange}; - /// # use enso_data::text::TextLocation; /// let source = "\n333<->🌊12345\n"; /// let target = "\n333x🔥12345\n"; /// let diff = TextEdit::from_prefix_postfix_differences(source, target); @@ -546,31 +545,29 @@ impl TextEdit { /// }; /// assert_eq!(diff, TextEdit { range: edit_range, text: "".to_string() }); /// ``` - pub fn from_prefix_postfix_differences(source: &str, target: &str) -> TextEdit { - use enso_data::text::Index; - use enso_data::text::TextLocation; + pub fn from_prefix_postfix_differences( + source: impl Into, + target: impl Into, + ) -> TextEdit { + use enso_text::unit::*; + use enso_text::Range; - let source_length = source.chars().count(); - let target_length = target.chars().count(); + let source = source.into(); + let target = target.into(); + let common_lengths = source.common_prefix_and_suffix(&target); - let common_prefix_len = common_prefix_length(source, target); - let common_postfix_len = common_postfix_length(source, target); - let common_parts_len = common_prefix_len + common_postfix_len; - let overlaping_chars = common_parts_len.saturating_sub(source_length.min(target_length)); - let prefix_length = common_prefix_len; - let postfix_length = common_postfix_len - overlaping_chars; + let source_start_byte = common_lengths.prefix; + let source_end_byte = Bytes::from(source.len()) - common_lengths.suffix; - let source_start_index = Index::new(prefix_length); - let source_end_index = Index::new(source_length - postfix_length); + let source_start_position = source.location_of_byte_offset_snapped(source_start_byte); + let source_end_position = source.location_of_byte_offset_snapped(source_end_byte); + let source_text_range = Range::new(source_start_position, source_end_position); - let source_start_position = TextLocation::from_index(source, source_start_index); - let source_end_position = TextLocation::from_index(source, source_end_index); - let source_text_range = source_start_position..source_end_position; + let target_len: Bytes = target.len().into(); + let target_range = common_lengths.prefix..(target_len - common_lengths.suffix); + let target_text = target.sub(target_range).to_string(); - let target_range = prefix_length..(target_length - postfix_length); - let target_text = target.chars().skip(target_range.start).take(target_range.len()); - - TextEdit { range: source_text_range.into(), text: target_text.collect() } + TextEdit { range: source_text_range.into(), text: target_text } } /// Return the edit moved by the given number of lines. @@ -798,14 +795,14 @@ pub struct SuggestionEntryScope { pub end: Position, } -impls! { From + &From > for SuggestionEntryScope { |range| +impls! { From + &From > for SuggestionEntryScope { |range| SuggestionEntryScope { start : range.start().into(), end : range.end().into(), } }} -impls! { From + &From for RangeInclusive { |this| +impls! { From + &From for RangeInclusive { |this| this.start.into()..=this.end.into() }} diff --git a/app/gui/controller/engine-protocol/src/types.rs b/app/gui/controller/engine-protocol/src/types.rs index 12c4d2170e..03a81ba01c 100644 --- a/app/gui/controller/engine-protocol/src/types.rs +++ b/app/gui/controller/engine-protocol/src/types.rs @@ -33,6 +33,16 @@ impl Sha3_224 { hasher.input(data); hasher.into() } + + /// Create new SHA3-224 digest of any arbitrary data split into chunks. + pub fn from_parts<'a>(parts: impl IntoIterator) -> Self { + use sha3::Digest; + let mut hasher = sha3::Sha3_224::new(); + for part in parts { + hasher.input(part) + } + hasher.into() + } } impl From for Sha3_224 { diff --git a/app/gui/language/ast/impl/Cargo.toml b/app/gui/language/ast/impl/Cargo.toml index c339feb27d..5607825a97 100644 --- a/app/gui/language/ast/impl/Cargo.toml +++ b/app/gui/language/ast/impl/Cargo.toml @@ -17,6 +17,7 @@ serde_json = { version = "1.0" } shrinkwraprs = { version = "0.2.1" } uuid = { version = "0.8.1", features = ["serde", "v4", "wasm-bindgen"] } ast-macros = { path = "../macros" } -enso-data = { path = "../../../../../lib/rust/data"} -enso-prelude = { path = "../../../../../lib/rust/prelude"} -enso-shapely = { path = "../../../../../lib/rust/shapely/impl"} +enso-data-structures = { path = "../../../../../lib/rust/data-structures" } +enso-text = { path = "../../../../../lib/rust/text" } +enso-prelude = { path = "../../../../../lib/rust/prelude" } +enso-shapely = { path = "../../../../../lib/rust/shapely/impl" } diff --git a/app/gui/language/ast/impl/src/crumbs.rs b/app/gui/language/ast/impl/src/crumbs.rs index 3bf52ec017..a1a869ddcc 100644 --- a/app/gui/language/ast/impl/src/crumbs.rs +++ b/app/gui/language/ast/impl/src/crumbs.rs @@ -12,9 +12,9 @@ use crate::Shifted; use crate::ShiftedVec1; use crate::TokenConsumer; -use enso_data::text::Index; -use enso_data::text::Size; -use enso_data::text::Span; +use enso_text as text; +use enso_text::traits::*; +use enso_text::unit::*; @@ -1465,16 +1465,16 @@ pub trait TraversableAst: Sized { } /// Calculate the span of the descendent AST node described by given crumbs.. - fn span_of_descendent_at(&self, crumbs: &[Crumb]) -> FallibleResult { - let mut position = Index::new(0); + fn range_of_descendant_at(&self, crumbs: &[Crumb]) -> FallibleResult> { + let mut position = 0.bytes(); let mut ast = self.my_ast()?; for crumb in crumbs { let child = ast.get(crumb)?; - let child_ix = ast.child_offset(child)?; - position += Span::from_beginning_to(child_ix).size; + let child_offset = ast.child_offset(child)?; + position += child_offset; ast = child; } - Ok(Span::new(position, Size::new(ast.len()))) + Ok(text::Range::new(position, position + ast.len())) } } @@ -2243,8 +2243,8 @@ mod tests { let two = ast.get_traversing(&crumbs_to_two).unwrap(); assert_eq!(two.repr(), "2"); - let two_span = ast.span_of_descendent_at(&crumbs_to_two).unwrap(); - assert_eq!(two_span, Span::from(4..5)); + let two_span = ast.range_of_descendant_at(&crumbs_to_two).unwrap(); + assert_eq!(two_span, 4.bytes()..5.bytes()); assert_eq!(&expected_code[two_span], "2"); } } diff --git a/app/gui/language/ast/impl/src/id_map.rs b/app/gui/language/ast/impl/src/id_map.rs new file mode 100644 index 0000000000..3998ef16c4 --- /dev/null +++ b/app/gui/language/ast/impl/src/id_map.rs @@ -0,0 +1,104 @@ +//! A module containing structures describing id-map. +//! +//! The Id Map is a mapping between code spans and some particular id. Its a part of our language's +//! source file: the parser gives the id of particular span to the AST node representing that span. + +use crate::prelude::*; + +use crate::Id; + +use enso_text::unit::*; +use serde::Deserialize; +use serde::Serialize; +use uuid::Uuid; + + + +// ============= +// === IdMap === +// ============= + +/// A mapping between text position and immutable ID. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct IdMap { + pub vec: Vec<(enso_text::Range, Id)>, +} + +impl IdMap { + /// Create a new instance. + pub fn new(vec: Vec<(enso_text::Range, Id)>) -> IdMap { + IdMap { vec } + } + /// Assigns Span to given ID. + pub fn insert(&mut self, span: impl Into>, id: Id) { + self.vec.push((span.into(), id)); + } + /// Generate random Uuid for span. + pub fn generate(&mut self, span: impl Into>) { + self.vec.push((span.into(), Uuid::new_v4())); + } +} + + + +// ====================== +// === IdMapForParser === +// ====================== + +/// Strongly typed index of char. +/// +/// Part of json representation of id_map: see [`JsonIdMap`]. +#[allow(missing_docs)] +#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +struct Index { + value: usize, +} + +/// A size expressed in chars. +/// +/// Part of json representation of id_map: see [`JsonIdMap`]. +#[allow(missing_docs)] +#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +struct Size { + value: usize, +} + +/// The index and size of a span of some text. +/// +/// Part of json representation of id_map: see [`JsonIdMap`]. +#[allow(missing_docs)] +#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +struct Span { + index: Index, + size: Size, +} + +/// An another representation of id map, which is the exact mirror of the id-map json stored in +/// a source file. +#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] +#[serde(transparent)] +pub struct JsonIdMap { + vec: Vec<(Span, Id)>, +} + +impl JsonIdMap { + /// Create from the [`IdMap`] structure. + /// + /// The code is needed for transforming byte offsets to codepoint offsets. + pub fn from_id_map(id_map: &IdMap, code: &str) -> Self { + let char_offsets = code.char_indices().map(|(idx, _)| idx).collect_vec(); + let mapped_vec = id_map.vec.iter().map(|(range, id)| { + let byte_start = range.start.as_usize(); + let byte_end = range.end.as_usize(); + let start: Chars = char_offsets.binary_search(&byte_start).unwrap_both().into(); + let end: Chars = char_offsets.binary_search(&byte_end).unwrap_both().into(); + let size = end - start; + let span = Span { + index: Index { value: start.as_usize() }, + size: Size { value: size.as_usize() }, + }; + (span, *id) + }); + Self { vec: mapped_vec.collect() } + } +} diff --git a/app/gui/language/ast/impl/src/lib.rs b/app/gui/language/ast/impl/src/lib.rs index 31cb5eecdd..1b453c960c 100644 --- a/app/gui/language/ast/impl/src/lib.rs +++ b/app/gui/language/ast/impl/src/lib.rs @@ -8,6 +8,7 @@ pub mod assoc; #[warn(missing_docs)] pub mod crumbs; +pub mod id_map; #[warn(missing_docs)] pub mod identifier; #[warn(missing_docs)] @@ -66,13 +67,12 @@ use crate::prelude::*; pub use crumbs::Crumb; pub use crumbs::Crumbs; +pub use id_map::IdMap; use ast_macros::*; -use enso_data::text::Index; -use enso_data::text::Size; -use enso_data::text::Span; - use enso_shapely::*; +use enso_text::traits::*; +use enso_text::unit::*; use serde::de::Deserializer; use serde::de::Visitor; use serde::ser::SerializeStruct; @@ -86,34 +86,6 @@ pub type Stream = Vec; -// ============= -// === IdMap === -// ============= - -/// A mapping between text position and immutable ID. -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize)] -#[serde(transparent)] -pub struct IdMap { - pub vec: Vec<(Span, Id)>, -} - -impl IdMap { - /// Create a new instance. - pub fn new(vec: Vec<(Span, Id)>) -> IdMap { - IdMap { vec } - } - /// Assigns Span to given ID. - pub fn insert(&mut self, span: impl Into, id: Id) { - self.vec.push((span.into(), id)); - } - /// Generate random Uuid for span. - pub fn generate(&mut self, span: impl Into) { - self.vec.push((span.into(), Uuid::new_v4())); - } -} - - - // ============== // === Errors === // ============== @@ -287,7 +259,7 @@ impl Ast { pub fn new>>(shape: S, id: Option) -> Ast { let shape = shape.into(); let id = id.unwrap_or_else(Id::new_v4); - let length = shape.len(); + let length = shape.char_count(); Ast::from_ast_id_len(shape, Some(id), length) } @@ -298,13 +270,13 @@ impl Ast { /// Tracking issue: https://github.com/enso-org/ide/issues/434 pub fn new_no_id>>(shape: S) -> Ast { let shape = shape.into(); - let length = shape.len(); + let length = shape.char_count(); Ast::from_ast_id_len(shape, None, length) } /// Just wraps shape, id and len into Ast node. - fn from_ast_id_len(shape: Shape, id: Option, len: usize) -> Ast { - let with_length = WithLength { wrapped: shape, len }; + fn from_ast_id_len(shape: Shape, id: Option, char_count: Chars) -> Ast { + let with_length = WithLength { wrapped: shape, length: char_count }; let with_id = WithID { wrapped: with_length, id }; Ast { wrapped: Rc::new(with_id) } } @@ -348,10 +320,10 @@ impl Ast { /// /// Returned index is the position of the first character of child's text representation within /// the text representation of this AST node. - pub fn child_offset(&self, child: &Ast) -> FallibleResult { + pub fn child_offset(&self, child: &Ast) -> FallibleResult { let searched_token = Token::Ast(child); let mut found_child = false; - let mut position = 0; + let mut position = 0.bytes(); self.shape().feed_to(&mut |token: Token| { if searched_token == token { found_child = true @@ -360,17 +332,17 @@ impl Ast { } }); if found_child { - Ok(Index::new(position)) + Ok(position) } else { Err(NoSuchChild.into()) } } /// Get the span (relative to self) for a child node identified by given crumb. - pub fn span_of_child_at(&self, crumb: &Crumb) -> FallibleResult { + pub fn span_of_child_at(&self, crumb: &Crumb) -> FallibleResult> { let child = self.get(crumb)?; - let index = self.child_offset(child)?; - Ok(Span::new(index, Size::new(child.len()))) + let offset = self.child_offset(child)?; + Ok(enso_text::Range::new(offset, offset + child.len())) } } @@ -404,7 +376,7 @@ impl Serialize for Ast { if self.id.is_some() { state.serialize_field(ID, &self.id)?; } - state.serialize_field(LENGTH, &self.len)?; + state.serialize_field(LENGTH, &self.length.as_usize())?; state.end() } } @@ -440,7 +412,7 @@ impl<'de> Visitor<'de> for AstDeserializationVisitor { let shape = shape.ok_or_else(|| serde::de::Error::missing_field(SHAPE))?; let id = id.unwrap_or(None); // allow missing `id` field let len = len.ok_or_else(|| serde::de::Error::missing_field(LENGTH))?; - Ok(Ast::from_ast_id_len(shape, id, len)) + Ok(Ast::from_ast_id_len(shape, id, len.into())) } } @@ -994,6 +966,12 @@ impl TokenConsumer for F { } } +impl<'a> HasTokens for Token<'a> { + fn feed_to(&self, consumer: &mut impl TokenConsumer) { + consumer.feed(self.clone()) + } +} + impl HasTokens for &str { fn feed_to(&self, consumer: &mut impl TokenConsumer) { consumer.feed(Token::Str(self)); @@ -1078,21 +1056,21 @@ pub trait HasIdMap { #[derive(Debug, Clone, Default)] struct IdMapBuilder { id_map: IdMap, - offset: usize, + offset: Bytes, } impl TokenConsumer for IdMapBuilder { fn feed(&mut self, token: Token) { match token { - Token::Off(val) => self.offset += val, - Token::Chr(_) => self.offset += 1, - Token::Str(val) => self.offset += val.chars().count(), + Token::Off(val) => self.offset += Bytes::from(' '.len_utf8() * val), + Token::Chr(_) => self.offset += 1.bytes(), + Token::Str(val) => self.offset += Bytes::from(val.len()), Token::Ast(val) => { let begin = self.offset; val.shape().feed_to(self); + let end = self.offset; if let Some(id) = val.id { - let span = Span::from_indices(Index::new(begin), Index::new(self.offset)); - self.id_map.insert(span, id); + self.id_map.insert(begin..end, id); } } } @@ -1108,68 +1086,33 @@ impl HasIdMap for T { } -// === HasLength === - -/// Things that can be asked about their length. -pub trait HasLength { - /// Length of the textual representation of This type in Unicode codepoints. - /// - /// Usually implemented together with `HasRepr`.For any `T:HasLength+HasRepr` - /// for `t:T` the following must hold: `t.len() == t.repr().len()`. - fn len(&self) -> usize; - - /// More efficient implementation of `t.len() == 0` - fn is_empty(&self) -> bool { - self.len() == 0 - } -} - -#[derive(Debug, Clone, Copy, Default)] -struct LengthBuilder { - length: usize, -} - -impl TokenConsumer for LengthBuilder { - fn feed(&mut self, token: Token) { - match token { - Token::Off(val) => self.length += val, - Token::Chr(_) => self.length += 1, - Token::Str(val) => self.length += val.chars().count(), - Token::Ast(val) => val.shape().feed_to(self), - } - } -} - -impl HasLength for T { - fn len(&self) -> usize { - let mut consumer = LengthBuilder::default(); - self.feed_to(&mut consumer); - consumer.length - } -} - -impl HasLength for Token<'_> { - fn len(&self) -> usize { - match self { - Token::Off(val) => *val, - Token::Chr(_) => 1, - Token::Str(val) => val.chars().count(), - // The below is different and cannot be unified with `LengthBuilder` because below will - // use the cached length, while `LengthBuilder` will traverse subtree. - Token::Ast(val) => val.len(), - } - } -} - - // === HasRepr === /// Things that can be asked about their textual representation. -/// -/// See also `HasLength`. pub trait HasRepr { /// Obtain the text representation for the This type. fn repr(&self) -> String; + + /// Get the representation length in bytes. + /// + /// May be implemented in a quicker way than building string. Must meet the constraint + /// `x.len() == x.repr().len()` for any `x: impl HasRepr`. + fn len(&self) -> Bytes { + self.repr().len().into() + } + + /// Check if the representation is empty. + fn is_empty(&self) -> bool { + self.len() >= 0.bytes() + } + + /// Get the representation length in chars. + /// + /// May be implemented in a quicker way than building string. Must meet the constraint + /// `x.char_count() == x.repr().chars().count()` for any `x: impl HasRepr`. + fn char_count(&self) -> Chars { + self.repr().chars().count().into() + } } #[derive(Debug, Clone, Default)] @@ -1188,12 +1131,57 @@ impl TokenConsumer for ReprBuilder { } } +#[derive(Debug, Clone, Copy, Default)] +struct LengthBuilder { + length: Bytes, +} + +impl TokenConsumer for LengthBuilder { + fn feed(&mut self, token: Token) { + match token { + Token::Off(val) => self.length += Bytes::from(' '.len_utf8() * val), + Token::Chr(chr) => self.length += Bytes::from(chr.len_utf8()), + Token::Str(val) => self.length += Bytes::from(val.len()), + Token::Ast(val) => val.shape().feed_to(self), + } + } +} + +#[derive(Debug, Clone, Copy, Default)] +struct CharCountBuilder { + char_count: Chars, +} + + +impl TokenConsumer for CharCountBuilder { + fn feed(&mut self, token: Token) { + match token { + Token::Off(val) => self.char_count += Chars::from(val), + Token::Chr(_) => self.char_count += 1.chars(), + Token::Str(val) => self.char_count += Chars::from(val.chars().count()), + Token::Ast(val) => val.shape().feed_to(self), + } + } +} + impl HasRepr for T { fn repr(&self) -> String { let mut consumer = ReprBuilder::default(); self.feed_to(&mut consumer); consumer.repr } + + fn len(&self) -> Bytes { + let mut consumer = LengthBuilder::default(); + self.feed_to(&mut consumer); + consumer.length + } + + fn char_count(&self) -> Chars { + let mut consumer = CharCountBuilder::default(); + self.feed_to(&mut consumer); + consumer.char_count + } } @@ -1228,55 +1216,60 @@ impl> Layer for WithID { } } -impl HasLength for WithID -where T: HasLength +impl HasRepr for WithID +where T: HasRepr { - fn len(&self) -> usize { + fn repr(&self) -> String { + self.deref().repr() + } + + fn len(&self) -> Bytes { self.deref().len() } + + fn char_count(&self) -> Chars { + self.deref().char_count() + } } #[derive(Debug, Clone)] -struct TraverserWithIndex { - index: usize, +struct TraverserWithOffset { + offset: Chars, callback: F, } -impl TraverserWithIndex { - pub fn new(callback: F) -> TraverserWithIndex { - let offset = 0; - TraverserWithIndex { index: offset, callback } +impl TraverserWithOffset { + pub fn new(callback: F) -> TraverserWithOffset { + let offset = 0.chars(); + TraverserWithOffset { offset, callback } } } -impl TokenConsumer for TraverserWithIndex -where F: FnMut(Index, &Ast) +impl TokenConsumer for TraverserWithOffset +where F: FnMut(Chars, &Ast) { fn feed(&mut self, token: Token) { - match token { - Token::Off(val) => self.index += val, - Token::Chr(_) => self.index += 1, - Token::Str(val) => self.index += val.chars().count(), - Token::Ast(val) => { - (self.callback)(Index::new(self.index), val); - val.shape().feed_to(self); - } + if let Token::Ast(val) = token { + (self.callback)(self.offset, val); + val.shape().feed_to(self); + } else { + self.offset += token.char_count(); } } } /// Visits each Ast node, while keeping track of its index. -pub fn traverse_with_index(ast: &impl HasTokens, f: impl FnMut(Index, &Ast)) { - let mut traverser = TraverserWithIndex::new(f); +pub fn traverse_with_offset(ast: &impl HasTokens, f: impl FnMut(Chars, &Ast)) { + let mut traverser = TraverserWithOffset::new(f); ast.feed_to(&mut traverser); } /// Visits each Ast node, while keeping track of its span. -pub fn traverse_with_span(ast: &impl HasTokens, mut f: impl FnMut(Span, &Ast)) { - traverse_with_index(ast, move |index, ast| { - f(Span::new(index, enso_data::text::Size::new(ast.len())), ast) +pub fn traverse_with_span(ast: &impl HasTokens, mut f: impl FnMut(enso_text::Range, &Ast)) { + traverse_with_offset(ast, move |offset, ast| { + f(enso_text::Range::new(offset, offset + ast.char_count()), ast) }) } @@ -1292,21 +1285,31 @@ pub struct WithLength { #[shrinkwrap(main_field)] #[serde(flatten)] pub wrapped: T, - pub len: usize, + pub length: Chars, } -impl HasLength for WithLength { - fn len(&self) -> usize { - self.len +impl HasRepr for WithLength +where T: HasRepr +{ + fn repr(&self) -> String { + self.deref().repr() + } + + fn len(&self) -> Bytes { + self.deref().len() + } + + fn char_count(&self) -> Chars { + self.length } } impl Layer for WithLength -where T: HasLength + Into +where T: HasRepr + Into { fn layered(t: T) -> Self { - let length = t.len(); - WithLength { wrapped: t.into(), len: length } + let char_count = t.char_count(); + WithLength { wrapped: t.into(), length: char_count } } } @@ -1715,7 +1718,6 @@ impl From for SegmentFmt { mod tests { use super::*; - use enso_data::text::Size; use serde::de::DeserializeOwned; /// Assert that given value round trips JSON serialization. @@ -1749,19 +1751,22 @@ mod tests { #[test] fn ast_length() { - let ast = Ast::prefix(Ast::var("XX"), Ast::var("YY")); - assert_eq!(ast.len(), 5) + let ast = Ast::prefix(Ast::var("XĄ"), Ast::var("YY")); + assert_eq!(ast.len(), 6.bytes()); + assert_eq!(ast.char_count(), 5.chars()); } #[test] fn ast_repr() { - let ast = Ast::prefix(Ast::var("XX"), Ast::var("YY")); - assert_eq!(ast.repr().as_str(), "XX YY") + let ast = Ast::prefix(Ast::var("XĄ"), Ast::var("YY")); + assert_eq!(ast.repr().as_str(), "XĄ YY") } #[test] fn ast_id_map() { - let span = |ix, length| Span::new(Index::new(ix), Size::new(length)); + let span = |ix: usize, length: usize| { + enso_text::Range::::new(ix.into(), (ix + length).into()) + }; let uid = default(); let ids = vec![(span(0, 2), uid), (span(3, 2), uid), (span(0, 5), uid)]; let func = Ast::new(Var { name: "XX".into() }, Some(uid)); @@ -1777,7 +1782,7 @@ mod tests { let v = Var { name: ident.clone() }; let ast = Ast::from(v); assert!(ast.wrapped.id.is_some()); - assert_eq!(ast.wrapped.wrapped.len, ident.len()); + assert_eq!(ast.wrapped.wrapped.length, ident.chars().count().into()); } #[test] @@ -1809,8 +1814,8 @@ mod tests { let expected_uuid = Id::parse_str(uuid_str).ok(); assert_eq!(ast.id, expected_uuid); - let expected_length = 3; - assert_eq!(ast.len, expected_length); + let expected_length = 3.chars(); + assert_eq!(ast.length, expected_length); let expected_var = Var { name: var_name.into() }; let expected_shape = Shape::from(expected_var); @@ -1884,13 +1889,15 @@ mod tests { #[test] fn utf8_lengths() { let var = Ast::var("価"); - assert_eq!(var.len(), 1); + assert_eq!(var.char_count(), 1.chars()); + assert_eq!(var.len(), 3.bytes()); let idmap = var.id_map(); - assert_eq!(idmap.vec[0].0, Span::from(0..1)); + assert_eq!(idmap.vec[0].0, enso_text::Range::new(0.bytes(), 3.bytes())); assert_eq!(idmap.vec[0].1, var.id.unwrap()); let builder_with_char = Token::Chr('壱'); - assert_eq!(builder_with_char.len(), 1); + assert_eq!(builder_with_char.char_count(), 1.chars()); + assert_eq!(builder_with_char.len(), 3.bytes()); } } diff --git a/app/gui/language/ast/impl/src/test_utils.rs b/app/gui/language/ast/impl/src/test_utils.rs index 980d8b234f..077c5073ae 100644 --- a/app/gui/language/ast/impl/src/test_utils.rs +++ b/app/gui/language/ast/impl/src/test_utils.rs @@ -3,7 +3,6 @@ use crate::prelude::*; use crate::Ast; -use crate::HasLength; use crate::HasRepr; use crate::Module; use crate::Shape; @@ -34,8 +33,8 @@ pub fn expect_single_line(ast: &Ast) -> &Ast { /// spans we calculate. pub fn validate_spans(ast: &Ast) { for node in ast.iter_recursive() { - let calculated = node.shape().len(); - let declared = node.wrapped.wrapped.len; + let calculated = node.shape().char_count(); + let declared = node.wrapped.wrapped.length; assert_eq!(calculated, declared, "`{}` part of `{}`", node.repr(), ast.repr()); } } diff --git a/app/gui/language/ast/impl/src/traits.rs b/app/gui/language/ast/impl/src/traits.rs index b769889693..9bd11d47ac 100644 --- a/app/gui/language/ast/impl/src/traits.rs +++ b/app/gui/language/ast/impl/src/traits.rs @@ -3,5 +3,4 @@ pub use crate::crumbs::Crumbable; pub use crate::crumbs::TraversableAst; pub use crate::HasID; -pub use crate::HasLength; pub use crate::HasRepr; diff --git a/app/gui/language/parser/Cargo.toml b/app/gui/language/parser/Cargo.toml index 6103db97b1..1e21201143 100644 --- a/app/gui/language/parser/Cargo.toml +++ b/app/gui/language/parser/Cargo.toml @@ -10,8 +10,9 @@ crate-type = ["cdylib", "rlib"] [dependencies] ast = { path = "../ast/impl" } -enso-data = { path = "../../../../lib/rust/data"} +enso-data-structures = { path = "../../../../lib/rust/data-structures" } enso-prelude = { path = "../../../../lib/rust/prelude", features = ["serde", "serde_json"] } +enso-text = { path = "../../../../lib/rust/text" } console_error_panic_hook = { version = "0.1.6" } failure = { version = "0.1" } js-sys = { version = "0.3.28" } diff --git a/app/gui/language/parser/src/api.rs b/app/gui/language/parser/src/api.rs index 2e2212c441..b3aeab219a 100644 --- a/app/gui/language/parser/src/api.rs +++ b/app/gui/language/parser/src/api.rs @@ -4,16 +4,18 @@ use crate::prelude::*; use ast::HasIdMap; use ast::HasRepr; -use enso_data::text::ByteIndex; +use enso_text::traits::*; +use enso_text::unit::*; +use enso_text::Range; use serde::de::DeserializeOwned; use serde::Deserialize; use serde::Serialize; +use ast::id_map::JsonIdMap; pub use ast::Ast; - // ================ // == SourceFile == // ================ @@ -36,11 +38,11 @@ pub struct SourceFile { /// The whole content of file. pub content: String, /// The range in bytes of module's "Code" section. - pub code: Range, + pub code: Range, /// The range in bytes of module's "Id Map" section. - pub id_map: Range, + pub id_map: Range, /// The range in bytes of module's "Metadata" section. - pub metadata: Range, + pub metadata: Range, } impl Display for SourceFile { @@ -57,30 +59,32 @@ impl SourceFile { /// the whole contents is treated as the code. pub fn new(content: String) -> Self { pub const METADATA_LINES: usize = 3; - let newline_indices = enso_data::text::rev_newline_byte_indices(&content); - let newline_indices_from_end = newline_indices.take(METADATA_LINES).collect_vec(); - match newline_indices_from_end.as_slice() { + let nl_offsets = content.char_indices().filter_map(|(ix, c)| (c == '\n').as_some(ix)); + let nl_offsets_bytes = nl_offsets.map(Bytes::from); + let nl_offsets_from_end = nl_offsets_bytes.rev().take(METADATA_LINES).collect_vec(); + match nl_offsets_from_end.as_slice() { [last, before_last, two_before_last] => { // Last line should be metadata. Line before should be id map. Line before is the // metadata tag. // We check that tag matches and that trailing lines looks like JSON list/object // respectively. - let code_length = *two_before_last + 1 - NEWLINES_BEFORE_TAG; - let code_range = 0..code_length; - let tag_range = two_before_last + 1..*before_last; - let id_map_range = before_last + 1..*last; - let metadata_range = last + 1..content.len(); - let tag = &content[tag_range]; - let idmap = &content[id_map_range.clone()]; - let metadata = &content[metadata_range.clone()]; + let code_length = *two_before_last + 1.bytes() - Bytes::from(NEWLINES_BEFORE_TAG); + let code_range = 0.bytes()..code_length; + let tag_range = two_before_last + 1.bytes()..*before_last; + let id_map_range = before_last + 1.bytes()..*last; + let metadata_range = last + 1.bytes()..Bytes::from(content.len()); + let tag = &content[tag_range.start.as_usize()..tag_range.end.as_usize()]; + let idmap = &content[id_map_range.start.as_usize()..id_map_range.end.as_usize()]; + let metadata = + &content[metadata_range.start.as_usize()..metadata_range.end.as_usize()]; let tag_matching = tag == METADATA_TAG; let idmap_matching = Self::looks_like_idmap(idmap); let metadata_matching = Self::looks_like_metadata(metadata); if tag_matching && idmap_matching && metadata_matching { SourceFile { - code: ByteIndex::new_range(code_range), - id_map: ByteIndex::new_range(id_map_range), - metadata: ByteIndex::new_range(metadata_range), + code: code_range.into(), + id_map: id_map_range.into(), + metadata: metadata_range.into(), content, } } else { @@ -93,10 +97,11 @@ impl SourceFile { /// Create a description of source file consisting only of code, with no metadata. fn new_without_metadata(content: String) -> Self { + let length = Bytes::from(content.len()); Self { - code: ByteIndex::new_range(0..content.len()), - id_map: ByteIndex::new_range(content.len()..content.len()), - metadata: ByteIndex::new_range(content.len()..content.len()), + code: (0.bytes()..length).into(), + id_map: (length..length).into(), + metadata: (length..length).into(), content, } } @@ -126,8 +131,10 @@ impl SourceFile { self.slice(&self.metadata) } - fn slice(&self, range: &Range) -> &str { - &self.content[range.start.value..range.end.value] + fn slice(&self, range: &Range) -> &str { + let start = range.start.as_usize(); + let end = range.end.as_usize(); + &self.content[start..end] } } @@ -180,18 +187,22 @@ impl ParsedSourceFile { let before_idmap = "\n"; let before_metadata = "\n"; let code = self.ast.repr(); - let id_map = to_json_single_line(&self.ast.id_map())?; + let json_id_map = JsonIdMap::from_id_map(&self.ast.id_map(), &code); + let id_map = to_json_single_line(&json_id_map)?; let metadata = to_json_single_line(&self.metadata)?; let id_map_start = code.len() + before_tag.len() + METADATA_TAG.len() + before_idmap.len(); + let id_map_start_bytes = Bytes::from(id_map_start); let metadata_start = id_map_start + id_map.len() + before_metadata.len(); + let metadata_start_bytes = Bytes::from(metadata_start); Ok(SourceFile { content: iformat!( "{code}{before_tag}{METADATA_TAG}{before_idmap}{id_map}\ {before_metadata}{metadata}" ), - code: ByteIndex::new_range(0..code.len()), - id_map: ByteIndex::new_range(id_map_start..id_map_start + id_map.len()), - metadata: ByteIndex::new_range(metadata_start..metadata_start + metadata.len()), + code: (0.bytes()..Bytes::from(code.len())).into(), + id_map: (id_map_start_bytes..id_map_start_bytes + Bytes::from(id_map.len())).into(), + metadata: (metadata_start_bytes..metadata_start_bytes + Bytes::from(metadata.len())) + .into(), }) } } @@ -259,12 +270,14 @@ mod test { let main = ast::Ast::var("main"); let node = ast::Ast::infix_var("2", "+", "2"); let infix = ast::Ast::infix(main, "=", node); - let ast = ast::Ast::one_line_module(infix).try_into().unwrap(); + let ast: ast::known::Module = ast::Ast::one_line_module(infix).try_into().unwrap(); + let repr = ast.repr(); let metadata = Metadata { foo: 321 }; let source = ParsedSourceFile { ast, metadata }; let serialized = source.serialize().unwrap(); - let expected_id_map = to_json_single_line(&source.ast.id_map()).unwrap(); + let expected_json_id_map = JsonIdMap::from_id_map(&source.ast.id_map(), &repr); + let expected_id_map = to_json_single_line(&expected_json_id_map).unwrap(); let expected_metadata = to_json_single_line(&source.metadata).unwrap(); let expected_content = iformat!( r#"main = 2 + 2 diff --git a/app/gui/language/parser/src/jsclient.rs b/app/gui/language/parser/src/jsclient.rs index 1cc0fdbb96..ba6503f09a 100644 --- a/app/gui/language/parser/src/jsclient.rs +++ b/app/gui/language/parser/src/jsclient.rs @@ -3,13 +3,15 @@ use crate::prelude::*; use crate::api; +use crate::api::Ast; use crate::from_json_str_without_recursion_limit; -use api::Ast; +use ast::id_map::JsonIdMap; use ast::IdMap; - use wasm_bindgen::prelude::*; + + pub type Result = std::result::Result; #[derive(Debug, Fail)] @@ -66,6 +68,7 @@ impl Client { /// Parses Enso code with JS-based parser. pub fn parse(&self, program: String, ids: IdMap) -> api::Result { let ast = || { + let ids = JsonIdMap::from_id_map(&ids, &program); let json_ids = serde_json::to_string(&ids)?; let json_ast = parse(program, json_ids)?; let ast = from_json_str_without_recursion_limit(&json_ast)?; diff --git a/app/gui/language/parser/src/test_utils.rs b/app/gui/language/parser/src/test_utils.rs index dd280e1012..2a67b49a05 100644 --- a/app/gui/language/parser/src/test_utils.rs +++ b/app/gui/language/parser/src/test_utils.rs @@ -8,7 +8,6 @@ use ast::test_utils::expect_shape; use ast::test_utils::expect_single_line; use ast::test_utils::validate_spans; use ast::Ast; -use ast::HasLength; use ast::HasRepr; use ast::Shape; @@ -40,7 +39,7 @@ impl ParserTestExts for Parser { let program = program.into(); DEBUG!("parsing " program); let ast = self.parse(program.clone(), default()).unwrap(); - assert_eq!(ast.shape().len(), program.len()); + assert_eq!(ast.shape().len().as_usize(), program.len()); validate_spans(&ast); assert_eq!(ast.repr(), program, "{:?}", ast); ast diff --git a/app/gui/language/parser/src/wsclient.rs b/app/gui/language/parser/src/wsclient.rs index 7dcbedcf4d..2ec8f29ea9 100644 --- a/app/gui/language/parser/src/wsclient.rs +++ b/app/gui/language/parser/src/wsclient.rs @@ -1,18 +1,18 @@ #![cfg(not(target_arch = "wasm32"))] use crate::api; +use crate::api::Ast; +use crate::api::Error::*; +use crate::api::Metadata; +use crate::api::ParsedSourceFile; use crate::prelude::*; use websocket::stream::sync::TcpStream; use websocket::ClientBuilder; use websocket::Message; -use api::Ast; -use api::Error::*; -use api::Metadata; -use api::ParsedSourceFile; +use ast::id_map::JsonIdMap; use ast::IdMap; - use serde::de::DeserializeOwned; use std::fmt::Formatter; @@ -92,7 +92,7 @@ impl From for Error { #[allow(clippy::enum_variant_names)] #[derive(Debug, serde::Serialize, serde::Deserialize)] pub enum Request { - ParseRequest { program: String, ids: IdMap }, + ParseRequest { program: String, ids: JsonIdMap }, ParseRequestWithMetadata { content: String }, DocParserGenerateHtmlSource { program: String }, DocParserGenerateHtmlFromDoc { code: String }, @@ -238,6 +238,7 @@ impl Client { /// Sends a request to parser service to parse Enso code. pub fn parse(&mut self, program: String, ids: IdMap) -> api::Result { + let ids = JsonIdMap::from_id_map(&ids, &program); let request = Request::ParseRequest { program, ids }; let response = self.rpc_call::(request)?; match response { diff --git a/app/gui/language/parser/tests/bugs.rs b/app/gui/language/parser/tests/bugs.rs index eb944c1e07..fef917152b 100644 --- a/app/gui/language/parser/tests/bugs.rs +++ b/app/gui/language/parser/tests/bugs.rs @@ -12,7 +12,8 @@ fn no_doc_found() { let program = std::env::args().nth(1).unwrap_or(input); let parser = parser::DocParser::new_or_panic(); let gen_code = parser.generate_html_docs(program).unwrap(); - assert_eq!(gen_code.len(), 22); // should be 0 + // gen_code should be empty. + assert_eq!(gen_code.len(), 22, "Generated length differs from the expected\"{}\"", gen_code); } #[wasm_bindgen_test] diff --git a/app/gui/language/span-tree/Cargo.toml b/app/gui/language/span-tree/Cargo.toml index 8bc8652c8a..812134194e 100644 --- a/app/gui/language/span-tree/Cargo.toml +++ b/app/gui/language/span-tree/Cargo.toml @@ -6,8 +6,9 @@ edition = "2018" [dependencies] ast = { path = "../ast/impl" } -enso-data = { path = "../../../../lib/rust/data"} -enso-prelude = { path = "../../../../lib/rust/prelude"} +enso-data-structures = { path = "../../../../lib/rust/data-structures" } +enso-text = { path = "../../../../lib/rust/text" } +enso-prelude = { path = "../../../../lib/rust/prelude" } failure = { version = "0.1.6" } [dev-dependencies] diff --git a/app/gui/language/span-tree/example/Cargo.toml b/app/gui/language/span-tree/example/Cargo.toml index ca7f551dc7..e02108dd1c 100644 --- a/app/gui/language/span-tree/example/Cargo.toml +++ b/app/gui/language/span-tree/example/Cargo.toml @@ -9,6 +9,7 @@ crate-type = ["cdylib", "rlib"] [dependencies] ast = { path = "../../ast/impl" } +enso-text = { path = "../../../../../lib/rust/text" } span-tree = { path = "../../span-tree" } enso-web = { path = "../../../../../lib/rust/web" } enso-prelude = { path = "../../../../../lib/rust/prelude"} diff --git a/app/gui/language/span-tree/example/src/lib.rs b/app/gui/language/span-tree/example/src/lib.rs index 211d2d46a9..756ea15890 100644 --- a/app/gui/language/span-tree/example/src/lib.rs +++ b/app/gui/language/span-tree/example/src/lib.rs @@ -5,6 +5,7 @@ use enso_web as web; use ast::crumbs::PatternMatchCrumb::*; use ast::crumbs::*; +use enso_text::traits::*; use span_tree::builder::Builder; use span_tree::node; use span_tree::node::InsertionPointType; @@ -60,16 +61,19 @@ pub fn main() { .kind(node::Kind::Chained) .crumbs(PrefixCrumb::Func) .new_child(|t| { - t.size(9).kind(node::Kind::Operation).crumbs(PrefixCrumb::Func).new_ast_id() + t.size(9.bytes()) + .kind(node::Kind::Operation) + .crumbs(PrefixCrumb::Func) + .new_ast_id() }) - .new_child(|t| t.size(1)) + .new_child(|t| t.size(1.bytes())) .new_child(|t| { - t.size(4) + t.size(4.bytes()) .kind(node::Kind::this().removable()) .crumbs(PrefixCrumb::Arg) .new_ast_id() }) - .new_child(|t| t.size(1)) + .new_child(|t| t.size(1.bytes())) }) .new_child(|t| { t.new_ast_id() @@ -77,26 +81,26 @@ pub fn main() { .crumbs(PrefixCrumb::Arg) .new_child(|t| { t.new_ast_id() - .offset(1) + .offset(1.bytes()) .kind(node::Kind::argument().removable()) .crumbs(parens_cr) .new_child(|t| { - t.size(12) + t.size(12.bytes()) .kind(node::Kind::Operation) .crumbs(PrefixCrumb::Func) .new_ast_id() }) - .new_child(|t| t.size(1)) + .new_child(|t| t.size(1.bytes())) .new_child(|t| { - t.size(6) + t.size(6.bytes()) .kind(node::Kind::this().removable()) .crumbs(PrefixCrumb::Arg) .new_ast_id() }) - .new_child(|t| t.size(1)) + .new_child(|t| t.size(1.bytes())) }) }) - .new_child(|t| t.size(1)); + .new_child(|t| t.size(1.bytes())); DEBUG!("{input_span_tree2:#?}"); } diff --git a/app/gui/language/span-tree/src/action.rs b/app/gui/language/span-tree/src/action.rs index 11b0e5e943..887d9d23fd 100644 --- a/app/gui/language/span-tree/src/action.rs +++ b/app/gui/language/span-tree/src/action.rs @@ -238,10 +238,7 @@ mod test { use crate::SpanTree; use ast::HasRepr; - use enso_data::text::Index; - use enso_data::text::Span; use parser::Parser; - use std::ops::Range; use wasm_bindgen_test::wasm_bindgen_test; #[wasm_bindgen_test] @@ -259,12 +256,9 @@ mod test { let ast = parser.parse_line_ast(self.expr).unwrap(); let ast_id = ast.id; let tree = ast.generate_tree(&context::Empty).unwrap(): SpanTree; - let span_begin = Index::new(self.span.start); - let span_end = Index::new(self.span.end); - let span = Span::from_indices(span_begin, span_end); - let node = tree.root_ref().find_by_span(&span); + let node = tree.root_ref().find_by_span(&self.span.clone().into()); let node = node.expect( - format!("Invalid case {:?}: no node with span {:?}", self, span).as_str(), + format!("Invalid case {:?}: no node with span {:?}", self, self.span).as_str(), ); let arg = Ast::new(ast::Var { name: "foo".to_string() }, None); let result = match &self.action { @@ -346,12 +340,9 @@ mod test { fn run(&self, parser: &Parser) { let ast = parser.parse_line_ast(self.expr).unwrap(); let tree: SpanTree = ast.generate_tree(&context::Empty).unwrap(); - let span_begin = Index::new(self.span.start); - let span_end = Index::new(self.span.end); - let span = Span::from_indices(span_begin, span_end); - let node = tree.root_ref().find_by_span(&span); + let node = tree.root_ref().find_by_span(&self.span.clone().into()); let node = node.expect( - format!("Invalid case {:?}: no node with span {:?}", self, span).as_str(), + format!("Invalid case {:?}: no node with span {:?}", self, self.span).as_str(), ); let expected: HashSet = self.expected.iter().cloned().collect(); diff --git a/app/gui/language/span-tree/src/builder.rs b/app/gui/language/span-tree/src/builder.rs index 231c8a8062..7b206c78f0 100644 --- a/app/gui/language/span-tree/src/builder.rs +++ b/app/gui/language/span-tree/src/builder.rs @@ -5,7 +5,6 @@ use crate::Node; use crate::SpanTree; use ast::crumbs::IntoCrumbs; -use enso_data::text::Size; @@ -29,9 +28,8 @@ pub trait Builder: Sized { crumbs: impl IntoCrumbs, ) -> ChildBuilder { let kind = kind.into(); - let node = Node::::new().with_kind(kind).with_size(Size::new(len)); - let child = - node::Child { node, offset: Size::new(offset), ast_crumbs: crumbs.into_crumbs() }; + let node = Node::::new().with_kind(kind).with_size(len.into()); + let child = node::Child { node, offset: offset.into(), ast_crumbs: crumbs.into_crumbs() }; ChildBuilder { built: child, parent: self } } @@ -50,7 +48,7 @@ pub trait Builder: Sized { fn add_empty_child(mut self, offset: usize, insert_type: node::InsertionPointType) -> Self { let child = node::Child { node: Node::::new().with_kind(insert_type), - offset: Size::new(offset), + offset: offset.into(), ast_crumbs: vec![], }; self.node_being_built().children.push(child); @@ -81,7 +79,7 @@ pub struct TreeBuilder { impl TreeBuilder { /// Create new builder for tree with root having length `len`. pub fn new(len: usize) -> Self { - let built = Node::::new().with_kind(node::Kind::Root).with_size(Size::new(len)); + let built = Node::::new().with_kind(node::Kind::Root).with_size(len.into()); TreeBuilder { built } } diff --git a/app/gui/language/span-tree/src/generate.rs b/app/gui/language/span-tree/src/generate.rs index d66bdf0acf..c08d525d98 100644 --- a/app/gui/language/span-tree/src/generate.rs +++ b/app/gui/language/span-tree/src/generate.rs @@ -16,10 +16,10 @@ use ast::assoc::Assoc; use ast::crumbs::Located; use ast::opr::GeneralizedInfix; use ast::Ast; -use ast::HasLength; +use ast::HasRepr; use ast::MacroAmbiguousSegment; use ast::MacroMatchSegment; -use enso_data::text::Size; +use enso_text::unit::*; pub use context::Context; @@ -58,7 +58,7 @@ impl SpanTreeGenerator for &str { kind: impl Into, _: &impl Context, ) -> FallibleResult> { - Ok(Node::::new().with_kind(kind).with_size(Size::new(self.len()))) + Ok(Node::::new().with_kind(kind).with_size(self.chars().count().into())) } } @@ -83,7 +83,7 @@ impl SpanTreeGenerator for String { /// An utility to generate children with increasing offsets. #[derive(Debug, Default)] struct ChildGenerator { - current_offset: Size, + current_offset: Bytes, children: Vec>, } @@ -91,7 +91,7 @@ impl ChildGenerator { /// Add spacing to current generator state. It will be taken into account for the next generated /// children's offsets fn spacing(&mut self, size: usize) { - self.current_offset += Size::new(size); + self.current_offset += Bytes::from(size); } fn generate_ast_node( @@ -220,7 +220,7 @@ fn generate_node_for_ast( .unwrap() .generate_node(kind, context), _ => { - let size = Size::new(ast.len()); + let size = ast.len(); let ast_id = ast.id; let children = default(); let name = ast::identifier::name(ast); diff --git a/app/gui/language/span-tree/src/node.rs b/app/gui/language/span-tree/src/node.rs index e63fa23f74..0015d1057e 100644 --- a/app/gui/language/span-tree/src/node.rs +++ b/app/gui/language/span-tree/src/node.rs @@ -4,11 +4,11 @@ use crate::prelude::*; use crate::iter::LeafIterator; use crate::iter::TreeFragment; - use crate::ArgumentInfo; + use ast::crumbs::IntoCrumbs; -use enso_data::text::Index; -use enso_data::text::Size; +use enso_text as text; +use enso_text::unit::*; pub mod kind; pub use kind::*; @@ -30,7 +30,7 @@ pub trait Payload = Default + Clone; #[allow(missing_docs)] pub struct Node { pub kind: Kind, - pub size: Size, + pub size: Bytes, pub children: Vec>, pub ast_id: Option, pub payload: T, @@ -126,7 +126,7 @@ impl Node { self.kind = k.into(); self } - pub fn with_size(mut self, size: Size) -> Self { + pub fn with_size(mut self, size: Bytes) -> Self { self.size = size; self } @@ -176,7 +176,7 @@ pub struct Child { /// A child node. pub node: Node, /// An offset counted from the parent node starting index to the start of this node's span. - pub offset: Size, + pub offset: Bytes, /// AST crumbs which lead from parent to child associated AST node. pub ast_crumbs: ast::Crumbs, } @@ -273,14 +273,14 @@ impl ChildBuilder { f: impl FnOnce(Self) -> Self, ) -> Self { let child: ChildBuilder = ChildBuilder::new(default()); - let child = f(child.offset(offset).size(size).kind(kind).crumbs(crumbs)); + let child = f(child.offset(offset.into()).size(size.into()).kind(kind).crumbs(crumbs)); self.node.children.push(child.child); self } /// Offset setter. - pub fn offset(mut self, offset: usize) -> Self { - self.offset = Size::new(offset); + pub fn offset(mut self, offset: Bytes) -> Self { + self.offset = offset; self } @@ -297,8 +297,8 @@ impl ChildBuilder { } /// Size setter. - pub fn size(mut self, size: usize) -> Self { - self.node.size = Size::new(size); + pub fn size(mut self, size: Bytes) -> Self { + self.node.size = size; self } @@ -421,13 +421,13 @@ impl InvalidCrumb { #[derive(Clone, Debug)] pub struct Ref<'a, T = ()> { /// The node's ref. - pub node: &'a Node, - /// Span begin being an index counted from the root expression. - pub span_begin: Index, + pub node: &'a Node, + /// Span begin's offset counted from the root expression. + pub span_offset: Bytes, /// Crumbs specifying this node position related to root. - pub crumbs: Crumbs, + pub crumbs: Crumbs, /// Ast crumbs locating associated AST node, related to the root's AST node. - pub ast_crumbs: ast::Crumbs, + pub ast_crumbs: ast::Crumbs, } /// A result of `get_subnode_by_ast_crumbs` @@ -442,22 +442,24 @@ pub struct NodeFoundByAstCrumbs<'a, 'b, T = ()> { impl<'a, T: Payload> Ref<'a, T> { /// Constructor. pub fn new(node: &'a Node) -> Self { - let span_begin = default(); + let span_offset = default(); let crumbs = default(); let ast_crumbs = default(); - Self { node, span_begin, crumbs, ast_crumbs } + Self { node, span_offset, crumbs, ast_crumbs } } /// Get span of current node. - pub fn span(&self) -> enso_data::text::Span { - enso_data::text::Span::new(self.span_begin, self.node.size) + pub fn span(&self) -> text::Range { + let start = self.span_offset; + let end = self.span_offset + self.node.size; + (start..end).into() } /// Get the reference to child with given index. Fails if index if out of bounds. pub fn child(self, index: usize) -> FallibleResult { let node = self.node; let crumbs = self.crumbs; - let mut span_begin = self.span_begin; + let mut span_offset = self.span_offset; let mut ast_crumbs = self.ast_crumbs; let count = node.children.len(); @@ -465,10 +467,10 @@ impl<'a, T: Payload> Ref<'a, T> { None => Err(InvalidCrumb::new(count, index, &crumbs).into()), Some(child) => { let node = &child.node; - span_begin += child.offset; + span_offset += child.offset; let crumbs = crumbs.into_sub(index); ast_crumbs.extend(child.ast_crumbs.iter().cloned()); - Ok(Self { node, span_begin, crumbs, ast_crumbs }) + Ok(Self { node, span_offset, crumbs, ast_crumbs }) } } } @@ -551,12 +553,12 @@ impl<'a, T: Payload> Ref<'a, T> { /// Get the node which exactly matches the given Span. If there many such node's, it pick first /// found by DFS. - pub fn find_by_span(self, span: &enso_data::text::Span) -> Option> { + pub fn find_by_span(self, span: &text::Range) -> Option> { if self.span() == *span { Some(self) } else { self.children_iter().find_map(|ch| { - ch.span().contains_span(span).and_option_from(|| ch.find_by_span(span)) + ch.span().contains_range(span).and_option_from(|| ch.find_by_span(span)) }) } } @@ -637,15 +639,15 @@ impl<'a, T: Payload> Ref<'a, T> { #[derive(Debug)] pub struct RefMut<'a, T = ()> { /// The node's ref. - node: &'a mut Node, - /// An offset counted from the parent node starting index to the start of this node's span. - pub offset: Size, - /// Span begin being an index counted from the root expression. - pub span_begin: Index, + node: &'a mut Node, + /// An offset counted from the parent node start to the start of this node's span. + pub offset: Bytes, + /// Span begin's offset counted from the root expression. + pub span_offset: Bytes, /// Crumbs specifying this node position related to root. - pub crumbs: Crumbs, + pub crumbs: Crumbs, /// Ast crumbs locating associated AST node, related to the root's AST node. - pub ast_crumbs: ast::Crumbs, + pub ast_crumbs: ast::Crumbs, } impl<'a, T: Payload> RefMut<'a, T> { @@ -655,7 +657,7 @@ impl<'a, T: Payload> RefMut<'a, T> { let span_begin = default(); let crumbs = default(); let ast_crumbs = default(); - Self { node, offset, span_begin, crumbs, ast_crumbs } + Self { node, offset, span_offset: span_begin, crumbs, ast_crumbs } } /// Payload accessor. @@ -669,15 +671,15 @@ impl<'a, T: Payload> RefMut<'a, T> { } /// Get span of current node. - pub fn span(&self) -> enso_data::text::Span { - enso_data::text::Span::new(self.span_begin, self.node.size) + pub fn span(&self) -> text::Range { + text::Range::new(self.span_offset, self.span_offset + self.size) } /// Helper function for building child references. fn child_from_ref( index: usize, child: &'a mut Child, - mut span_begin: Index, + mut span_begin: Bytes, crumbs: Crumbs, mut ast_crumbs: ast::Crumbs, ) -> RefMut<'a, T> { @@ -686,13 +688,13 @@ impl<'a, T: Payload> RefMut<'a, T> { span_begin += child.offset; let crumbs = crumbs.into_sub(index); ast_crumbs.extend(child.ast_crumbs.iter().cloned()); - Self { node, offset, span_begin, crumbs, ast_crumbs } + Self { node, offset, span_offset: span_begin, crumbs, ast_crumbs } } /// Get the reference to child with given index. Fails if index if out of bounds. pub fn child(self, index: usize) -> FallibleResult> { let node = self.node; - let span_begin = self.span_begin; + let span_begin = self.span_offset; let crumbs = self.crumbs; let ast_crumbs = self.ast_crumbs; let count = node.children.len(); @@ -704,7 +706,7 @@ impl<'a, T: Payload> RefMut<'a, T> { /// Iterator over all direct children producing `RefMut`s. pub fn children_iter(self) -> impl Iterator> { - let span_begin = self.span_begin; + let span_begin = self.span_offset; let crumbs = self.crumbs; let ast_crumbs = self.ast_crumbs; self.node.children.iter_mut().enumerate().map(move |(index, child)| { @@ -840,6 +842,7 @@ mod test { use crate::SpanTree; use ast::crumbs; + use enso_text::unit::*; #[test] fn node_lookup() { @@ -862,11 +865,11 @@ mod test { let grand_child2 = child2.clone().get_descendant(&vec![1]).unwrap(); // Span begin. - assert_eq!(root.span_begin.value, 0); - assert_eq!(child1.span_begin.value, 0); - assert_eq!(child2.span_begin.value, 2); - assert_eq!(grand_child1.span_begin.value, 2); - assert_eq!(grand_child2.span_begin.value, 5); + assert_eq!(root.span_offset, 0.bytes()); + assert_eq!(child1.span_offset, 0.bytes()); + assert_eq!(child2.span_offset, 2.bytes()); + assert_eq!(grand_child1.span_offset, 2.bytes()); + assert_eq!(grand_child2.span_offset, 5.bytes()); // Length assert_eq!(root.node.size.value, 7); diff --git a/app/gui/src/controller/graph.rs b/app/gui/src/controller/graph.rs index e2625419f8..bc66f11b0c 100644 --- a/app/gui/src/controller/graph.rs +++ b/app/gui/src/controller/graph.rs @@ -961,6 +961,7 @@ pub mod tests { use crate::executor::test_utils::TestWithLocalPoolExecutor; use crate::model::module::Position; + use crate::model::module::TextChange; use crate::model::suggestion_database; use crate::test::mock::data; @@ -969,8 +970,7 @@ pub mod tests { use double_representation::identifier::NormalizedName; use double_representation::project; use engine_protocol::language_server::MethodPointer; - use enso_data::text::Index; - use enso_data::text::TextChange; + use enso_text::traits::*; use parser::Parser; use wasm_bindgen_test::wasm_bindgen_test; @@ -1092,7 +1092,7 @@ pub mod tests { fn graph_controller_notification_relay() { Fixture::set_up().run(|graph| async move { let mut sub = graph.subscribe(); - let change = TextChange::insert(Index::new(12), "2".into()); + let change = TextChange { range: (12.bytes()..12.bytes()).into(), text: "2".into() }; graph.module.apply_code_change(change, &graph.parser, default()).unwrap(); assert_eq!(Some(Notification::Invalidate), sub.next().await); }); diff --git a/app/gui/src/controller/module.rs b/app/gui/src/controller/module.rs index 4bf0144783..9d900d8548 100644 --- a/app/gui/src/controller/module.rs +++ b/app/gui/src/controller/module.rs @@ -3,10 +3,10 @@ use crate::prelude::*; use crate::model::module::Path; +use crate::model::module::TextChange; use ast; use ast::HasIdMap; -use data::text::*; use double_representation::module; use double_representation::project; use double_representation::text::apply_code_change_to_id_map; @@ -211,8 +211,7 @@ mod test { use ast; use ast::Ast; use ast::BlockLine; - use enso_data::text::Index; - use enso_data::text::Span; + use enso_text::traits::*; use parser::Parser; use uuid::Uuid; use wasm_bindgen_test::wasm_bindgen_test; @@ -229,16 +228,16 @@ mod test { let uuid3 = Uuid::new_v4(); let uuid4 = Uuid::new_v4(); let id_map = ast::IdMap::new(vec![ - (Span::new(Index::new(0), Size::new(1)), uuid1), - (Span::new(Index::new(1), Size::new(1)), uuid2), - (Span::new(Index::new(2), Size::new(1)), uuid3), - (Span::new(Index::new(0), Size::new(3)), uuid4), + ((0.bytes()..1.bytes()).into(), uuid1), + ((1.bytes()..2.bytes()).into(), uuid2), + ((2.bytes()..3.bytes()).into(), uuid3), + ((0.bytes()..3.bytes()).into(), uuid4), ]); let controller = Handle::new_mock(location, code, id_map, ls, parser, default()).unwrap(); // Change code from "2+2" to "22+2" - let change = TextChange::insert(Index::new(0), "2".to_string()); + let change = enso_text::Change::inserted(0.bytes(), "2".to_string()); controller.apply_code_change(change).unwrap(); let expected_ast = Ast::new_no_id(ast::Module { lines: vec![BlockLine { diff --git a/app/gui/src/controller/searcher.rs b/app/gui/src/controller/searcher.rs index 76a3ad4c2a..e3170f2a8d 100644 --- a/app/gui/src/controller/searcher.rs +++ b/app/gui/src/controller/searcher.rs @@ -13,7 +13,6 @@ use crate::model::suggestion_database::entry::CodeToInsert; use crate::model::traits::*; use crate::notification; -use data::text::TextLocation; use double_representation::graph::GraphInfo; use double_representation::graph::LocationHint; use double_representation::module::QualifiedName; @@ -21,6 +20,7 @@ use double_representation::node::NodeInfo; use double_representation::project; use double_representation::tp; use engine_protocol::language_server; +use enso_text::Location; use flo_stream::Subscriber; use parser::Parser; @@ -476,7 +476,7 @@ pub struct Searcher { language_server: Rc, ide: controller::Ide, this_arg: Rc>, - position_in_code: Immutable, + position_in_code: Immutable, } impl Searcher { @@ -517,7 +517,8 @@ impl Searcher { let module_ast = graph.graph().module.ast(); let def_id = graph.graph().id; let def_span = double_representation::module::definition_span(&module_ast, &def_id)?; - let position = TextLocation::convert_span(module_ast.repr(), &def_span).end; + let module_repr: enso_text::Text = module_ast.repr().into(); + let position = module_repr.location_of_byte_offset_snapped(def_span.end); let this_arg = Rc::new( matches!(mode, Mode::NewNode { .. }) .and_option_from(|| ThisNode::new(selected_nodes, &graph.graph())), @@ -1203,11 +1204,11 @@ pub mod test { impl MockData { fn change_main_body(&mut self, line: &str) { - let code = dbg!(crate::test::mock::main_from_lines(&[line])); - let location = data::text::TextLocation::at_document_end(&code); + let code: enso_text::Text = dbg!(crate::test::mock::main_from_lines(&[line])).into(); + let location = code.location_of_text_end(); // TODO [mwu] Not nice that we ended up with duplicated mock data for code. - self.graph.module.code = code.clone(); - self.graph.graph.code = code; + self.graph.module.code = (&code).into(); + self.graph.graph.code = code.into(); self.code_location = location.into(); } @@ -1250,8 +1251,8 @@ pub mod test { let mut client = language_server::MockClient::default(); client.require_all_calls(); client_setup(&mut data, &mut client); - let end_of_code = TextLocation::at_document_end(&data.graph.module.code); - let code_range = TextLocation::at_document_begin()..=end_of_code; + let end_of_code = enso_text::Text::from(&data.graph.module.code).location_of_text_end(); + let code_range = enso_text::Location::default()..=end_of_code; let graph = data.graph.controller(); let node = &graph.graph().nodes().unwrap()[0]; let this = ThisNode::new(vec![node.info.id()], &graph.graph()); diff --git a/app/gui/src/controller/text.rs b/app/gui/src/controller/text.rs index 9177150457..5e526e7379 100644 --- a/app/gui/src/controller/text.rs +++ b/app/gui/src/controller/text.rs @@ -6,14 +6,13 @@ use crate::prelude::*; use crate::controller::FilePath; +use crate::model::module::TextChange; -use data::text::TextChange; use engine_protocol::language_server; use json_rpc::error::RpcError; use std::pin::Pin; - // ==================== // === Notification === // ==================== @@ -172,7 +171,7 @@ mod test { use crate::executor::test_utils::TestWithLocalPoolExecutor; - use data::text::Index; + use enso_text::traits::*; use parser::Parser; use wasm_bindgen_test::wasm_bindgen_test; @@ -202,7 +201,8 @@ mod test { }; let mut sub = controller.subscribe(); - module.apply_code_change(TextChange::insert(Index::new(8), "2".to_string())).unwrap(); + let change = enso_text::Change::inserted(8.bytes(), "2".to_string()); + module.apply_code_change(change).unwrap(); assert_eq!(Some(Notification::Invalidate), sub.next().await); }) } diff --git a/app/gui/src/ide/integration/project.rs b/app/gui/src/ide/integration/project.rs index 1a208735bd..38a8c41577 100644 --- a/app/gui/src/ide/integration/project.rs +++ b/app/gui/src/ide/integration/project.rs @@ -31,7 +31,6 @@ use crate::model::traits::*; use analytics; use bimap::BiMap; use engine_protocol::language_server::ExpressionUpdatePayload; -use enso_data::text::TextChange; use enso_frp as frp; use ensogl::display::traits::*; use ensogl_gui_component::file_browser::model::AnyFolderContent; @@ -1741,11 +1740,10 @@ impl Model { spawn(exit_node_action); } - fn code_changed_in_ui(&self, changes: &Vec) -> FallibleResult { + fn code_changed_in_ui(&self, changes: &Vec) -> FallibleResult { for change in changes { - let range_start = data::text::Index::new(change.range.start.value as usize); - let range_end = data::text::Index::new(change.range.end.value as usize); - let converted = TextChange::replace(range_start..range_end, change.text.to_string()); + let converted = + enso_text::text::Change { range: change.range, text: (&change.text).into() }; self.text.apply_text_change(converted)?; } Ok(()) diff --git a/app/gui/src/model/module.rs b/app/gui/src/model/module.rs index e82c63b106..f939736774 100644 --- a/app/gui/src/model/module.rs +++ b/app/gui/src/model/module.rs @@ -13,8 +13,6 @@ use crate::controller::FilePath; use ast::constants::LANGUAGE_FILE_EXTENSION; use ast::constants::SOURCE_DIRECTORY; -use data::text::TextChange; -use data::text::TextLocation; use double_representation::definition::DefinitionInfo; use double_representation::identifier::ReferentName; use double_representation::project; @@ -61,6 +59,15 @@ pub enum ModulePathViolation { +// =============== +// === Aliases === +// =============== + +/// A specialization of text change used in module's text changes across controllers. +pub type TextChange = enso_text::Change; + + + // ============ // === Path === // ============ @@ -271,7 +278,7 @@ pub enum NotificationKind { /// The code change description. change: TextChange, /// Information about line:col position of replaced fragment. - replaced_location: Range, + replaced_location: enso_text::Range, }, /// The metadata (e.g. some node's position) has been changed. MetadataChanged, diff --git a/app/gui/src/model/module/plain.rs b/app/gui/src/model/module/plain.rs index 5d3fb9bed2..0b5c5334d8 100644 --- a/app/gui/src/model/module/plain.rs +++ b/app/gui/src/model/module/plain.rs @@ -10,10 +10,9 @@ use crate::model::module::Notification; use crate::model::module::NotificationKind; use crate::model::module::Path; use crate::model::module::ProjectMetadata; +use crate::model::module::TextChange; use crate::notification; -use data::text::TextChange; -use data::text::TextLocation; use double_representation::definition::DefinitionInfo; use flo_stream::Subscriber; use parser::api::ParsedSourceFile; @@ -160,10 +159,12 @@ impl model::module::API for Module { parser: &Parser, new_id_map: ast::IdMap, ) -> FallibleResult { - let code = self.ast().repr(); - let replaced_location = TextLocation::convert_range(&code, &change.replaced); - let new_code = change.applied(&code); - let new_ast = parser.parse(new_code, new_id_map)?.try_into()?; + let mut code: enso_text::Text = self.ast().repr().into(); + let replaced_start = code.location_of_byte_offset_snapped(change.range.start); + let replaced_end = code.location_of_byte_offset_snapped(change.range.end); + let replaced_location = enso_text::Range::new(replaced_start, replaced_end); + code.apply_change(change.as_ref()); + let new_ast = parser.parse(code.into(), new_id_map)?.try_into()?; let notification = NotificationKind::CodeChanged { change, replaced_location }; self.update_content(notification, |content| content.ast = new_ast) } @@ -234,15 +235,15 @@ mod test { use crate::executor::test_utils::TestWithLocalPoolExecutor; use crate::model::module::Position; - use data::text; + use enso_text::traits::*; #[wasm_bindgen_test] fn applying_code_change() { let _test = TestWithLocalPoolExecutor::set_up(); let module = model::module::test::plain_from_code("2 + 2"); let change = TextChange { - replaced: text::Index::new(2)..text::Index::new(5), - inserted: "- abc".to_string(), + range: enso_text::Range::new(2.bytes(), 5.bytes()), + text: "- abc".to_string(), }; module.apply_code_change(change, &Parser::new_or_panic(), default()).unwrap(); assert_eq!("2 - abc", module.ast().repr()); @@ -272,12 +273,14 @@ mod test { // Code change let change = TextChange { - replaced: text::Index::new(0)..text::Index::new(1), - inserted: "foo".to_string(), + range: enso_text::Range::new(0.bytes(), 1.bytes()), + text: "foo".to_string(), }; module.apply_code_change(change.clone(), &Parser::new_or_panic(), default()).unwrap(); - let replaced_location = - TextLocation { line: 0, column: 0 }..TextLocation { line: 0, column: 1 }; + let replaced_location = enso_text::Range { + start: enso_text::Location { line: 0.line(), column: 0.column() }, + end: enso_text::Location { line: 0.line(), column: 1.column() }, + }; expect_notification(NotificationKind::CodeChanged { change, replaced_location }); // Metadata update diff --git a/app/gui/src/model/module/synchronized.rs b/app/gui/src/model/module/synchronized.rs index c83d7ddd70..2e09cbcf47 100644 --- a/app/gui/src/model/module/synchronized.rs +++ b/app/gui/src/model/module/synchronized.rs @@ -8,16 +8,19 @@ use crate::model::module::Notification; use crate::model::module::NotificationKind; use crate::model::module::Path; use crate::model::module::ProjectMetadata; +use crate::model::module::TextChange; use crate::model::module::API; use ast::IdMap; -use data::text::TextChange; -use data::text::TextLocation; use double_representation::definition::DefinitionInfo; use double_representation::graph::Id; use engine_protocol::language_server; use engine_protocol::language_server::TextEdit; use engine_protocol::types::Sha3_224; +use enso_text::unit::*; +use enso_text::Location; +use enso_text::Range; +use enso_text::Text; use flo_stream::Subscriber; use parser::api::SourceFile; use parser::Parser; @@ -33,62 +36,64 @@ use parser::Parser; #[derive(Clone, Debug, Eq, PartialEq)] struct ContentSummary { digest: Sha3_224, - end_of_file: TextLocation, + end_of_file: Location, } impl ContentSummary { - fn new(text: &str) -> Self { - Self { - digest: Sha3_224::new(text.as_bytes()), - end_of_file: TextLocation::at_document_end(text), - } + fn new(text: &Text) -> Self { + let parts = text.rope.iter_chunks(..).map(|s| s.as_bytes()); + Self { digest: Sha3_224::from_parts(parts), end_of_file: text.location_of_text_end() } } } /// The information about module's content. In addition to minimal summery defined in /// `ContentSummary` it adds information about sections, what enables efficient updates after code /// and metadata changes. -#[derive(Clone, Debug, Eq, PartialEq, Shrinkwrap)] +#[derive(Clone, Debug, Shrinkwrap)] struct ParsedContentSummary { #[shrinkwrap(main_field)] summary: ContentSummary, - source: String, - code: Range, - id_map: Range, - metadata: Range, + source: Text, + code: Range, + id_map: Range, + metadata: Range, } impl ParsedContentSummary { /// Get summary from `SourceFile`. fn from_source(source: &SourceFile) -> Self { + let content = Text::from(&source.content); + let code = source.code.map(|i| content.location_of_byte_offset_snapped(i)); + let id_map = source.id_map.map(|i| content.location_of_byte_offset_snapped(i)); + let metadata = source.metadata.map(|i| content.location_of_byte_offset_snapped(i)); ParsedContentSummary { - summary: ContentSummary::new(&source.content), - source: source.content.clone(), - code: TextLocation::convert_byte_range(&source.content, &source.code), - id_map: TextLocation::convert_byte_range(&source.content, &source.id_map), - metadata: TextLocation::convert_byte_range(&source.content, &source.metadata), + summary: ContentSummary::new(&content), + source: content, + code, + id_map, + metadata, } } // Get fragment of string with code. - pub fn code_slice(&self) -> &str { + pub fn code_slice(&self) -> Text { self.slice(&self.code) } /// Get fragment of string with id map. - pub fn id_map_slice(&self) -> &str { + pub fn id_map_slice(&self) -> Text { self.slice(&self.id_map) } /// Get fragment of string with metadata. - pub fn metadata_slice(&self) -> &str { + pub fn metadata_slice(&self) -> Text { self.slice(&self.metadata) } - fn slice(&self, range: &Range) -> &str { - let start_ix = range.start.to_index(&self.source); - let end_ix = range.end.to_index(&self.source); - &self.source[start_ix.value..end_ix.value] + fn slice(&self, range: &Range) -> Text { + let start_ix = self.source.byte_offset_of_location_snapped(range.start); + let end_ix = self.source.byte_offset_of_location_snapped(range.end); + self.source.sub(Range::new(start_ix, end_ix)) } } @@ -149,8 +154,9 @@ impl Module { let file_path = path.file_path().clone(); info!(logger, "Opening module {file_path}"); let opened = language_server.client.open_text_file(&file_path).await?; + let content: Text = (&opened.content).into(); info!(logger, "Read content of the module {path}, digest is {opened.current_version:?}"); - let end_of_file = TextLocation::at_document_end(&opened.content); + let end_of_file = content.location_of_text_end(); // TODO[ao] We should not fail here when metadata are malformed, but discard them and set // default instead. let source = parser.parse_with_metadata(opened.content)?; @@ -314,9 +320,9 @@ impl Module { NotificationKind::Invalidate => self.partial_invalidation(summary, new_file).await, NotificationKind::CodeChanged { change, replaced_location } => { let code_change = - TextEdit { range: replaced_location.into(), text: change.inserted }; + TextEdit { range: replaced_location.into(), text: change.text }; let id_map_change = TextEdit { - range: summary.id_map.clone().into(), + range: summary.id_map.into(), text: new_file.id_map_slice().to_string(), }; //id_map goes first, because code change may alter its position. @@ -325,7 +331,7 @@ impl Module { } NotificationKind::MetadataChanged => { let edits = vec![TextEdit { - range: summary.metadata.clone().into(), + range: summary.metadata.into(), text: new_file.metadata_slice().to_string(), }]; self.notify_language_server(&summary.summary, &new_file, edits).await @@ -342,27 +348,26 @@ impl Module { new_file: SourceFile, ) -> impl Future> + 'static { debug!(self.logger, "Handling full invalidation: {ls_content:?}."); - let range = TextLocation::at_document_begin()..ls_content.end_of_file; + let range = Range::new(Location::default(), ls_content.end_of_file); let edits = vec![TextEdit { range: range.into(), text: new_file.content.clone() }]; self.notify_language_server(ls_content, &new_file, edits) } - fn edit_for_snipped(start: &TextLocation, source: &str, target: &str) -> Option { + fn edit_for_snipped(start: &Location, source: Text, target: Text) -> Option { // This is an implicit assumption that always seems to be true. Otherwise finding the // correct location for the final edit would be more complex. - debug_assert_eq!(start.column, 0); + debug_assert_eq!(start.column, 0.column()); - (source != target).as_some_from(|| { - let edit = TextEdit::from_prefix_postfix_differences(source, target); - edit.move_by_lines(start.line) - }) + let edit = TextEdit::from_prefix_postfix_differences(&source, &target); + (edit.range.start != edit.range.end) + .as_some_from(|| edit.move_by_lines(start.line.as_usize())) } fn edit_for_code(ls_content: &ParsedContentSummary, new_file: &SourceFile) -> Option { Self::edit_for_snipped( &ls_content.code.start, ls_content.code_slice(), - new_file.code_slice(), + new_file.code_slice().into(), ) } @@ -373,7 +378,7 @@ impl Module { Self::edit_for_snipped( &ls_content.metadata.start, ls_content.metadata_slice(), - new_file.metadata_slice(), + new_file.metadata_slice().into(), ) } @@ -384,7 +389,7 @@ impl Module { Self::edit_for_snipped( &ls_content.id_map.start, ls_content.id_map_slice(), - new_file.id_map_slice(), + new_file.id_map_slice().into(), ) } @@ -475,12 +480,12 @@ pub mod test { use crate::test::Runner; - use data::text; - use data::text::TextChange; use engine_protocol::language_server::FileEdit; use engine_protocol::language_server::MockClient; use engine_protocol::language_server::Position; use engine_protocol::language_server::TextRange; + use enso_text::Change; + use enso_text::Text; use json_rpc::error::RpcError; use wasm_bindgen_test::wasm_bindgen_test; @@ -492,14 +497,15 @@ pub mod test { struct LsClientSetup { logger: Logger, path: Path, - current_ls_content: Rc>, + current_ls_content: Rc>, current_ls_version: Rc>, } impl LsClientSetup { - fn new(parent: impl AnyLogger, path: Path, initial_content: impl Into) -> Self { + fn new(parent: impl AnyLogger, path: Path, initial_content: impl Into) -> Self { let current_ls_content = initial_content.into(); - let current_ls_version = Sha3_224::new(current_ls_content.as_bytes()); + let current_ls_version = + Sha3_224::from_parts(current_ls_content.iter_chunks(..).map(|ch| ch.as_bytes())); let logger = Logger::new_sub(parent, "LsClientSetup"); debug!(logger, "Initial content:\n===\n{current_ls_content}\n==="); Self { @@ -531,7 +537,8 @@ pub mod test { let result = f(edits); let new_content = apply_edits(content_so_far, &edits); let actual_old = this.current_ls_version.get(); - let actual_new = Sha3_224::new(new_content.as_bytes()); + let actual_new = + Sha3_224::from_parts(new_content.iter_chunks(..).map(|s| s.as_bytes())); debug!(this.logger, "Actual digest: {actual_old} => {actual_new}"); debug!(this.logger, "Declared digest: {edits.old_version} => {edits.new_version}"); debug!(this.logger, "New content:\n===\n{new_content}\n==="); @@ -550,7 +557,7 @@ pub mod test { } /// The single text edit with accompanying metadata idmap changes. - fn expect_edit_w_metadata( + fn expect_edit_with_metadata( &self, client: &mut MockClient, f: impl FnOnce(&TextEdit) -> json_rpc::Result<()> + 'static, @@ -559,12 +566,12 @@ pub mod test { self.expect_some_edit(client, move |edit| { if let [edit_idmap, edit_code] = edit.edits.as_slice() { let code_so_far = this.current_ls_content.get(); - let file_so_far = SourceFile::new(code_so_far); + let file_so_far = SourceFile::new((&code_so_far).into()); // TODO [mwu] // Currently this assumes that the whole idmap is replaced at each edit. // This code should be adjusted, if partial metadata updates are implemented. let idmap_range = - TextLocation::convert_byte_range(&file_so_far.content, &file_so_far.id_map); + file_so_far.id_map.map(|x| code_so_far.location_of_byte_offset_snapped(x)); let idmap_range = TextRange::from(idmap_range); assert_eq!(edit_idmap.range, idmap_range); assert!(SourceFile::looks_like_idmap(&edit_idmap.text)); @@ -600,18 +607,21 @@ pub mod test { fn whole_document_range(&self) -> TextRange { let code_so_far = self.current_ls_content.get(); - let end_of_file = TextLocation::at_document_end(&code_so_far); + let end_of_file = code_so_far.location_of_text_end(); TextRange { start: Position { line: 0, character: 0 }, end: end_of_file.into() } } } - fn apply_edit(code: &str, edit: &TextEdit) -> String { - let start = TextLocation::from(edit.range.start).to_index(code); - let end = TextLocation::from(edit.range.end).to_index(code); - data::text::TextChange::replace(start..end, edit.text.clone()).applied(code) + fn apply_edit(code: impl Into, edit: &TextEdit) -> Text { + let mut code = code.into(); + let start_loc = code.byte_offset_of_location_snapped(edit.range.start.into()); + let end_loc = code.byte_offset_of_location_snapped(edit.range.end.into()); + let change = Change { range: Range::new(start_loc, end_loc), text: edit.text.clone() }; + code.apply_change(change); + code } - fn apply_edits(code: impl Into, file_edit: &FileEdit) -> String { + fn apply_edits(code: impl Into, file_edit: &FileEdit) -> Text { let initial = code.into(); file_edit.edits.iter().fold(initial, |content, edit| apply_edit(&content, edit)) } @@ -664,10 +674,7 @@ pub mod test { let new_ast = parser.parse_module(new_content, default()).unwrap(); module.update_ast(new_ast).unwrap(); runner.perhaps_run_until_stalled(&mut fixture); - let change = TextChange { - replaced: text::Index::new(20)..text::Index::new(24), - inserted: "Test 2".to_string(), - }; + let change = TextChange { range: (20..24).into(), text: "Test 2".to_string() }; module.apply_code_change(change, &Parser::new_or_panic(), default()).unwrap(); runner.perhaps_run_until_stalled(&mut fixture); }; @@ -689,7 +696,7 @@ pub mod test { // Opening module and metadata generation. edit_handler.expect_full_invalidation(client); // Applying code update. - edit_handler.expect_edit_w_metadata(client, |edit| { + edit_handler.expect_edit_with_metadata(client, |edit| { assert_eq!(edit.text, "Test 2"); assert_eq!(edit.range, TextRange { start: Position { line: 1, character: 13 }, @@ -703,10 +710,7 @@ pub mod test { let (_module, controller) = fixture.synchronized_module_w_controller(); runner.perhaps_run_until_stalled(&mut fixture); - let change = TextChange { - replaced: text::Index::new(20)..text::Index::new(24), - inserted: "Test 2".to_string(), - }; + let change = TextChange { range: (20..24).into(), text: "Test 2".to_string() }; controller.apply_code_change(change).unwrap(); runner.perhaps_run_until_stalled(&mut fixture); }; diff --git a/app/gui/src/model/suggestion_database.rs b/app/gui/src/model/suggestion_database.rs index 66f32ca434..7bf71e08c6 100644 --- a/app/gui/src/model/suggestion_database.rs +++ b/app/gui/src/model/suggestion_database.rs @@ -8,10 +8,10 @@ use crate::model::module::MethodId; use crate::model::suggestion_database::entry::Kind; use crate::notification; -use data::text::TextLocation; use double_representation::module::QualifiedName; use engine_protocol::language_server; use engine_protocol::language_server::SuggestionId; +use enso_text::Location; use flo_stream::Subscriber; use language_server::types::SuggestionDatabaseUpdatesEvent; use language_server::types::SuggestionsDatabaseVersion; @@ -191,7 +191,7 @@ impl SuggestionDatabase { &self, name: impl Str, module: &QualifiedName, - location: TextLocation, + location: Location, ) -> Vec> { self.entries .borrow() @@ -209,7 +209,7 @@ impl SuggestionDatabase { &self, name: impl Str, module: &QualifiedName, - location: TextLocation, + location: Location, ) -> Vec> { self.entries .borrow() @@ -280,11 +280,9 @@ mod test { use engine_protocol::language_server::SuggestionEntryScope; use engine_protocol::language_server::SuggestionsDatabaseEntry; use engine_protocol::language_server::SuggestionsDatabaseModification; - use enso_data::text::TextLocation; + use enso_text::traits::*; use wasm_bindgen_test::wasm_bindgen_test_configure; - - wasm_bindgen_test_configure!(run_in_browser); @@ -523,7 +521,10 @@ mod test { assert_eq!(db.lookup(3).unwrap().arguments[2].repr_type, "TestAtom"); assert!(db.lookup(3).unwrap().arguments[2].is_suspended); assert_eq!(db.lookup(3).unwrap().arguments[2].default_value, None); - let range = TextLocation { line: 1, column: 5 }..=TextLocation { line: 3, column: 0 }; + let range = Location { line: 1.line(), column: 5.column() }..=Location { + line: 3.line(), + column: 0.column(), + }; assert_eq!(db.lookup(3).unwrap().scope, Scope::InModule { range }); assert_eq!(db.version.get(), 6); diff --git a/app/gui/src/model/suggestion_database/entry.rs b/app/gui/src/model/suggestion_database/entry.rs index 0498952df0..c1c005d6fe 100644 --- a/app/gui/src/model/suggestion_database/entry.rs +++ b/app/gui/src/model/suggestion_database/entry.rs @@ -5,12 +5,12 @@ use crate::prelude::*; use crate::model::module::MethodId; use ast::constants::keywords; -use data::text::TextLocation; use double_representation::module; use double_representation::tp; use engine_protocol::language_server; use engine_protocol::language_server::FieldUpdate; use engine_protocol::language_server::SuggestionsDatabaseModification; +use enso_text::Location; use language_server::types::FieldAction; use std::collections::BTreeSet; @@ -77,7 +77,7 @@ pub enum Scope { /// Local symbol that is visible only in a particular section of the module where it has been /// defined. #[allow(missing_docs)] - InModule { range: RangeInclusive }, + InModule { range: RangeInclusive }, } /// Represents code snippet and the imports needed for it to work. @@ -202,7 +202,7 @@ impl Entry { } /// Checks if entry is visible at given location in a specific module. - pub fn is_visible_at(&self, module: &module::QualifiedName, location: TextLocation) -> bool { + pub fn is_visible_at(&self, module: &module::QualifiedName, location: Location) -> bool { match &self.scope { Scope::Everywhere => true, Scope::InModule { range } => self.module == *module && range.contains(&location), diff --git a/app/gui/view/graph-editor/Cargo.toml b/app/gui/view/graph-editor/Cargo.toml index 0934b353d0..e65a2a09ad 100644 --- a/app/gui/view/graph-editor/Cargo.toml +++ b/app/gui/view/graph-editor/Cargo.toml @@ -17,6 +17,7 @@ enso-logger = { path = "../../../../lib/rust/logger"} enso-prelude = { path = "../../../../lib/rust/prelude"} engine-protocol = { version = "0.1.0", path = "../../controller/engine-protocol" } enso-shapely = { path = "../../../../lib/rust/shapely/impl"} +enso-text = { version = "0.1.0", path = "../../../../lib/rust/text" } ensogl = { version = "0.1.0", path = "../../../../lib/rust/ensogl" } ensogl-gui-component = { version = "0.1.0", path = "../../../../lib/rust/ensogl/component/gui" } ensogl-text = { version = "0.1.0", path = "../../../../lib/rust/ensogl/component/text" } diff --git a/app/gui/view/graph-editor/src/component/node/input/area.rs b/app/gui/view/graph-editor/src/component/node/input/area.rs index bfb206ae43..a2db59b6b2 100644 --- a/app/gui/view/graph-editor/src/component/node/input/area.rs +++ b/app/gui/view/graph-editor/src/component/node/input/area.rs @@ -3,8 +3,18 @@ use crate::prelude::*; +use crate::component::type_coloring; +use crate::node; +use crate::node::input::port; +use crate::node::profiling; +use crate::view; +use crate::Type; + + use enso_frp as frp; use enso_frp; +use enso_text::traits::*; +use enso_text::unit::*; use ensogl::application::Application; use ensogl::data::color; use ensogl::display; @@ -15,15 +25,7 @@ use ensogl::gui::cursor; use ensogl::Animation; use ensogl_hardcoded_theme as theme; use ensogl_text as text; -use ensogl_text::buffer::data::unit::traits::*; -use text::Text; - -use crate::component::type_coloring; -use crate::node; -use crate::node::input::port; -use crate::node::profiling; -use crate::view; -use crate::Type; +use ensogl_text::Text; @@ -107,13 +109,13 @@ impl Debug for Expression { /// Helper struct used for `Expression` conversions. #[derive(Debug, Default)] struct ExprConversion { - prev_tok_local_index: usize, + prev_tok_local_index: Bytes, /// Index of the last traverse parent node in the `SpanTree`. - last_parent_tok_index: usize, + last_parent_tok_index: Bytes, } impl ExprConversion { - fn new(last_parent_tok_index: usize) -> Self { + fn new(last_parent_tok_index: Bytes) -> Self { let prev_tok_local_index = default(); Self { prev_tok_local_index, last_parent_tok_index } } @@ -124,27 +126,27 @@ impl From for Expression { /// structure. It also computes `port::Model` values in the `viz_code` representation. fn from(t: node::Expression) -> Self { // The length difference between `code` and `viz_code` so far. - let mut shift = 0; + let mut shift = 0.bytes(); let mut span_tree = t.input_span_tree.map(|_| port::Model::default()); let mut viz_code = String::new(); let code = t.code; span_tree.root_ref_mut().dfs_with_layer_data(ExprConversion::default(), |node, info| { let is_expected_arg = node.is_expected_argument(); let span = node.span(); - let mut size = span.size.value; - let mut index = span.index.value; - let offset_from_prev_tok = node.offset.value - info.prev_tok_local_index; - info.prev_tok_local_index = node.offset.value + size; - viz_code += &" ".repeat(offset_from_prev_tok); + let mut size = span.size(); + let mut index = span.start; + let offset_from_prev_tok = node.offset - info.prev_tok_local_index; + info.prev_tok_local_index = node.offset + size; + viz_code += &" ".repeat(offset_from_prev_tok.as_usize()); if node.children.is_empty() { - viz_code += &code[index..index + size]; + viz_code += &code.as_str()[enso_text::Range::new(index, index + size)]; } index += shift; if is_expected_arg { if let Some(name) = node.name() { - size = name.len(); - index += 1; - shift += 1 + size; + size = name.len().into(); + index += 1.bytes(); + shift += 1.bytes() + size; viz_code += " "; viz_code += name; } @@ -446,8 +448,11 @@ impl Area { let expr = self.model.expression.borrow(); expr.root_ref().get_descendant(crumbs).ok().map(|node| { let unit = GLYPH_WIDTH; - let width = unit * node.payload.length as f32; - let x = width / 2.0 + unit * node.payload.index as f32; + let range_before = ensogl_text::Range::new(0.bytes(), node.payload.index); + let char_offset: Chars = expr.viz_code[range_before].chars().count().into(); + let char_count: Chars = expr.viz_code[node.payload.range()].chars().count().into(); + let width = unit * (i32::from(char_count) as f32); + let x = width / 2.0 + unit * (i32::from(char_offset) as f32); Vector2::new(TEXT_OFFSET + x, 0.0) }) } @@ -480,10 +485,10 @@ struct PortLayerBuilder { parent: display::object::Instance, /// Information whether the parent port was a parensed expression. parent_parensed: bool, - /// The number of glyphs the expression should be shifted. For example, consider `(foo bar)`, - /// where expression `foo bar` does not get its own port, and thus a 1 glyph shift should be - /// applied when considering its children. - shift: usize, + /// The number of chars the expression should be shifted. For example, consider + /// `(foo bar)`, where expression `foo bar` does not get its own port, and thus a 1 char + /// shift should be applied when considering its children. + shift: Chars, /// The depth at which the current expression is, where root is at depth 0. depth: usize, } @@ -494,7 +499,7 @@ impl PortLayerBuilder { parent: impl display::Object, parent_frp: Option, parent_parensed: bool, - shift: usize, + shift: Chars, depth: usize, ) -> Self { let parent = parent.display_object().clone_ref(); @@ -511,7 +516,7 @@ impl PortLayerBuilder { parent: display::object::Instance, new_parent_frp: Option, parent_parensed: bool, - shift: usize, + shift: Chars, ) -> Self { let depth = self.depth + 1; let parent_frp = new_parent_frp.or_else(|| self.parent_frp.clone()); @@ -528,7 +533,8 @@ impl Area { let mut is_header = true; let mut id_crumbs_map = HashMap::new(); let builder = PortLayerBuilder::empty(&self.model.ports); - expression.root_ref_mut().dfs_with_layer_data(builder, |mut node, builder| { + let code = &expression.viz_code; + expression.span_tree.root_ref_mut().dfs_with_layer_data(builder, |mut node, builder| { let is_parensed = node.is_parensed(); let skip_opr = if SKIP_OPERATIONS { node.is_operation() && !is_header @@ -560,14 +566,20 @@ impl Area { ); } + let range_before_start = node.payload.index - node.payload.local_index; + let range_before_end = node.payload.index; + let range_before = ensogl_text::Range::new(range_before_start, range_before_end); + let local_char_offset: Chars = code[range_before].chars().count().into(); + let new_parent = if not_a_port { builder.parent.clone_ref() } else { let port = &mut node; - let index = port.payload.local_index + builder.shift; - let size = port.payload.length; + + let index = local_char_offset + builder.shift; + let size: Chars = code[port.payload.range()].chars().count().into(); let unit = GLYPH_WIDTH; - let width = unit * size as f32; + let width = unit * i32::from(size) as f32; let width_padded = width + 2.0 * PORT_PADDING_X; let height = 18.0; let padded_size = Vector2(width_padded, height); @@ -576,7 +588,7 @@ impl Area { let scene = self.model.scene(); let port_shape = port.payload_mut().init_shape(logger, scene, size, node::HEIGHT); - port_shape.mod_position(|t| t.x = unit * index as f32); + port_shape.mod_position(|t| t.x = unit * i32::from(index) as f32); if DEBUG { port_shape.mod_position(|t| t.y = DEBUG_PORT_OFFSET) } @@ -683,7 +695,7 @@ impl Area { } } let new_parent_frp = Some(node.frp.output.clone_ref()); - let new_shift = if !not_a_port { 0 } else { builder.shift + node.payload.local_index }; + let new_shift = if !not_a_port { 0.chars() } else { builder.shift + local_char_offset }; builder.nested(new_parent, new_parent_frp, is_parensed, new_shift) }); *self.model.id_crumbs_map.borrow_mut() = id_crumbs_map; @@ -782,9 +794,7 @@ impl Area { frp::extend! { port_network set_color <- all_with(&label_color,&self.set_edit_mode,|&color, _| color); eval set_color ([label](color) { - let start_bytes = (index as i32).bytes(); - let end_bytes = ((index + length) as i32).bytes(); - let range = ensogl_text::buffer::Range::from(start_bytes..end_bytes); + let range = enso_text::Range::new(index, index + length); label.set_color_bytes(range,color::Rgba::from(color)); }); } diff --git a/app/gui/view/graph-editor/src/component/node/input/port.rs b/app/gui/view/graph-editor/src/component/node/input/port.rs index 46cc9c8f44..e790dd7a30 100644 --- a/app/gui/view/graph-editor/src/component/node/input/port.rs +++ b/app/gui/view/graph-editor/src/component/node/input/port.rs @@ -1,5 +1,6 @@ use crate::prelude::*; +use enso_text::unit::*; use ensogl::data::color; use ensogl::display; use ensogl::display::scene::Scene; @@ -151,9 +152,9 @@ pub struct Model { pub frp: Frp, pub shape: Option, pub name: Option, - pub index: usize, - pub local_index: usize, - pub length: usize, + pub index: Bytes, + pub local_index: Bytes, + pub length: Bytes, pub highlight_color: color::Lcha, // TODO needed? and other fields? } @@ -186,4 +187,11 @@ impl Model { self.shape = Some(shape); self.shape.as_ref().unwrap().clone_ref() } + + /// The range of this port. + pub fn range(&self) -> enso_text::Range { + let start = self.index; + let end = self.index + self.length; + enso_text::Range::new(start, end) + } } diff --git a/app/gui/view/graph-editor/src/component/node/output/area.rs b/app/gui/view/graph-editor/src/component/node/output/area.rs index 04e1f6eb56..2bf8824955 100644 --- a/app/gui/view/graph-editor/src/component/node/output/area.rs +++ b/app/gui/view/graph-editor/src/component/node/output/area.rs @@ -105,8 +105,8 @@ impl From for Expression { span_tree.root_ref_mut().dfs_with_layer_data((), |node, ()| { let span = node.span(); let port = node.payload_mut(); - port.index = span.index.value; - port.length = span.size.value; + port.index = span.start; + port.length = span.size(); }); Expression { code, span_tree, whole_expr_type, whole_expr_id } } diff --git a/app/gui/view/graph-editor/src/component/node/output/port.rs b/app/gui/view/graph-editor/src/component/node/output/port.rs index 23f8d3ae23..bab6fc4c79 100644 --- a/app/gui/view/graph-editor/src/component/node/output/port.rs +++ b/app/gui/view/graph-editor/src/component/node/output/port.rs @@ -8,6 +8,7 @@ use crate::view; use crate::Type; use enso_frp as frp; +use enso_text::unit::*; use ensogl::data::color; use ensogl::display; use ensogl::display::shape::primitive::def::class::ShapeOps; @@ -447,8 +448,8 @@ pub struct Model { pub shape: Option, pub type_label: Option, pub display_object: Option, - pub index: usize, - pub length: usize, + pub index: Bytes, + pub length: Bytes, port_count: usize, port_index: usize, } diff --git a/lib/rust/data/Cargo.toml b/lib/rust/data-structures/Cargo.toml similarity index 95% rename from lib/rust/data/Cargo.toml rename to lib/rust/data-structures/Cargo.toml index a51e9d4049..be7237bec5 100644 --- a/lib/rust/data/Cargo.toml +++ b/lib/rust/data-structures/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "enso-data" +name = "enso-data-structures" version = "0.2.0" authors = ["Enso Team "] edition = "2018" diff --git a/lib/rust/data/README.md b/lib/rust/data-structures/README.md similarity index 64% rename from lib/rust/data/README.md rename to lib/rust/data-structures/README.md index eaa41cf063..58adf8ce5f 100644 --- a/lib/rust/data/README.md +++ b/lib/rust/data-structures/README.md @@ -1,3 +1,3 @@ -# Enso Data +# Enso Data Structures A collection of useful data structures. diff --git a/lib/rust/data/benches/bench_tree_query.rs b/lib/rust/data-structures/benches/bench_tree_query.rs similarity index 98% rename from lib/rust/data/benches/bench_tree_query.rs rename to lib/rust/data-structures/benches/bench_tree_query.rs index 445be9c8ed..6a4e4a5fff 100644 --- a/lib/rust/data/benches/bench_tree_query.rs +++ b/lib/rust/data-structures/benches/bench_tree_query.rs @@ -1,6 +1,6 @@ //! This file contains benchmarks of the query performance for the HashTree structure. -use enso_data::hash_map_tree::HashMapTree; +use enso_data_structures::hash_map_tree::HashMapTree; use itertools::*; use criterion::black_box; diff --git a/lib/rust/data/src/dependency_graph.rs b/lib/rust/data-structures/src/dependency_graph.rs similarity index 99% rename from lib/rust/data/src/dependency_graph.rs rename to lib/rust/data-structures/src/dependency_graph.rs index 66f8f37da8..c4d25cd530 100644 --- a/lib/rust/data/src/dependency_graph.rs +++ b/lib/rust/data-structures/src/dependency_graph.rs @@ -234,7 +234,7 @@ impl Extend<(T, Node)> for DependencyGraph { /// Utility macro allowing easy construction of the [`DependencyGraph`]. The following code: /// ``` -/// use crate::enso_data::dependency_graph; +/// use crate::enso_data_structures::dependency_graph; /// dependency_graph!(1->2, 2->3); /// ``` /// will produce: diff --git a/lib/rust/data/src/diet.rs b/lib/rust/data-structures/src/diet.rs similarity index 100% rename from lib/rust/data/src/diet.rs rename to lib/rust/data-structures/src/diet.rs diff --git a/lib/rust/data/src/hash_map_tree.rs b/lib/rust/data-structures/src/hash_map_tree.rs similarity index 100% rename from lib/rust/data/src/hash_map_tree.rs rename to lib/rust/data-structures/src/hash_map_tree.rs diff --git a/lib/rust/data/src/index.rs b/lib/rust/data-structures/src/index.rs similarity index 66% rename from lib/rust/data/src/index.rs rename to lib/rust/data-structures/src/index.rs index 9ef508244a..74a76d2206 100644 --- a/lib/rust/data/src/index.rs +++ b/lib/rust/data-structures/src/index.rs @@ -1,4 +1,4 @@ -//! This module defines a typed index struct. Useful to introduce type safety when using indexes +//! This module defines a typed index struct. Useful to introduce type safety when using indexes of //! several indexable containers. use crate::prelude::*; @@ -9,7 +9,27 @@ use crate::prelude::*; // === Index === // ============= -/// Typed newtype for `usize` meant to be used as a typed index. +/// Typed wrapper for `usize` meant to be used as a typed index. +/// +/// Useful to introduce type safety when using indexes of several indexable containers, for example: +/// +/// ```no_run +/// # use enso_data_structures::index::Index; +/// # struct Edge {} +/// # struct Vertex {} +/// # fn do_something(_e: &Edge, _v : &Vertex) {} +/// struct Graph { +/// edges: Vec, +/// vertices: Vec, +/// } +/// +/// impl Graph { +/// /// When calling this function, you won't mix the edge id with vertex id. +/// fn do_something_with_vertex_and_edge(&self, v: Index, e: Index) { +/// do_something(&self.edges[e.raw], &self.vertices[v.raw]); +/// } +/// } +/// ``` pub struct Index { /// Raw value. pub raw: usize, diff --git a/lib/rust/data/src/lib.rs b/lib/rust/data-structures/src/lib.rs similarity index 97% rename from lib/rust/data/src/lib.rs rename to lib/rust/data-structures/src/lib.rs index e3f5ae9854..6050b987c8 100644 --- a/lib/rust/data/src/lib.rs +++ b/lib/rust/data-structures/src/lib.rs @@ -17,6 +17,5 @@ pub mod diet; pub mod hash_map_tree; pub mod index; pub mod opt_vec; -pub mod text; pub use enso_prelude as prelude; diff --git a/lib/rust/data/src/opt_vec.rs b/lib/rust/data-structures/src/opt_vec.rs similarity index 100% rename from lib/rust/data/src/opt_vec.rs rename to lib/rust/data-structures/src/opt_vec.rs diff --git a/lib/rust/data/src/text.rs b/lib/rust/data/src/text.rs deleted file mode 100644 index 4a67b12a9b..0000000000 --- a/lib/rust/data/src/text.rs +++ /dev/null @@ -1,634 +0,0 @@ -//! The common structures for text location and manipulation. - -use enso_prelude::*; - -use serde::Deserialize; -use serde::Serialize; -use std::ops::Add; -use std::ops::AddAssign; -use std::ops::Range; -use std::ops::Sub; -use std::ops::SubAssign; - - - -/// ====================================== -/// === Text Coordinates And Distances === -/// ====================================== - -// === Index === - -/// Strongly typed index into container. -#[allow(missing_docs)] -#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] -pub struct Index { - pub value: usize, -} - -impl Index { - /// Initializes Index with given value. - pub fn new(value: usize) -> Self { - Index { value } - } - - /// Create char index from the byte index. It must traverse the content to count chars. - pub fn convert_byte_index(content: impl Str, index: ByteIndex) -> Self { - let slice = &content.as_ref()[..index.value]; - Self::new(slice.chars().count()) - } - - /// Checked subtraction. Computes `self - rhs`, returning `None` if overflow occurred. - pub fn checked_sub(self, rhs: Size) -> Option { - self.value.checked_sub(rhs.value).map(Self::new) - } -} - -impl Display for Index { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.value) - } -} - - -// === ByteIndex === - -/// Strongly typed index of byte in String (which may differ with analogous character index, -/// because some chars takes more than one byte). -//TODO[ao] We should use structures from ensogl::math::topology to represent different quantities -// and units. -#[allow(missing_docs)] -#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] -pub struct ByteIndex { - pub value: usize, -} - -impl ByteIndex { - /// Initializes Index with given value. - pub fn new(value: usize) -> Self { - ByteIndex { value } - } - - /// Map given Range into Range. - pub fn new_range(value: Range) -> Range { - ByteIndex::new(value.start)..ByteIndex::new(value.end) - } - - /// Index of the next byte. - pub fn next(self) -> Self { - ByteIndex { value: self.value + 1 } - } -} - - -// === Size === - -/// Strongly typed size of container. -#[allow(missing_docs)] -#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] -pub struct Size { - pub value: usize, -} - -impl Size { - /// Initializes Size with given value. - pub fn new(value: usize) -> Self { - Size { value } - } - - /// Obtain a size of given string value. - pub fn from_text(value: impl AsRef) -> Self { - Size::new(value.as_ref().chars().count()) - } - - /// Checks if this is a non-empty size (more than zero elements). - pub fn non_empty(self) -> bool { - self.value > 0 - } - - /// Checks if this is an empty size (zero elements). - pub fn is_empty(self) -> bool { - self.value == 0 - } - - /// Checked subtraction. Computes `self - rhs`, returning `None` if overflow occurred. - pub fn checked_sub(self, rhs: Size) -> Option { - self.value.checked_sub(rhs.value).map(Self::new) - } -} - -impl Add for Size { - type Output = Size; - fn add(self, rhs: Size) -> Size { - Size { value: self.value + rhs.value } - } -} - -impl AddAssign for Size { - fn add_assign(&mut self, rhs: Size) { - *self = *self + rhs; - } -} - -impl Sub for Size { - type Output = Size; - fn sub(self, rhs: Size) -> Size { - Size { value: self.value - rhs.value } - } -} - -impl SubAssign for Size { - fn sub_assign(&mut self, rhs: Size) { - *self = *self - rhs; - } -} - -impl Display for Size { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.value) - } -} - - -// === Span === - -/// Strongly typed span into container with index and size. -#[allow(missing_docs)] -#[derive(Clone, Copy, Debug, Default, Hash, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] -pub struct Span { - pub index: Index, - pub size: Size, -} - -impl Span { - /// Initializes Span with given values. - pub fn new(index: Index, size: Size) -> Self { - Span { index, size } - } - - /// Creates a span describing a range between two indices. - pub fn from_indices(begin: Index, end: Index) -> Self { - if end < begin { - Self::from_indices(end, begin) - } else { - let index = begin; - let size = end - begin; - Span { index, size } - } - } - - /// Creates a span from zero up to given index. - pub fn from_beginning_to(index: Index) -> Self { - Span::from_indices(Index::new(0), index) - } - - /// Creates a span from zero index with given length. - pub fn from_beginning(size: Size) -> Self { - Span { index: Index::new(0), size } - } - - /// Get the index of the last character in the span. - /// - /// If the span is empty returns `None`. - pub fn last(&self) -> Option { - if self.is_empty() { - None - } else { - self.end().checked_sub(Size::new(1)) - } - } - - /// Get the character after last character of this span. - /// - /// If span has size 0, it returns the `index` field. - pub fn end(&self) -> Index { - self.index + self.size - } - - /// Check if this span contains character under `index`. - pub fn contains(&self, index: Index) -> bool { - self.index <= index && self.end() > index - } - - /// Check if this span contains the whole another span. - pub fn contains_span(&self, span: &Span) -> bool { - self.index <= span.index && self.end() >= span.end() - } - - /// Converts span to `Range`. - pub fn range(self) -> Range { - let start = self.index.value; - let end = self.end().value; - start..end - } - - /// Expand the span by moving its left (start) index. - pub fn extend_left(&mut self, size: Size) { - self.index -= size; - self.size += size; - } - - /// Expand the span by moving its right (end) index. - pub fn extend_right(&mut self, size: Size) { - self.size += size; - } - - /// Shrink the span by moving its left (start) index. - pub fn shrink_left(&mut self, size: Size) { - self.index += size; - self.size -= size; - } - - /// Shrink the span by moving its right (end) index. - pub fn shrink_right(&mut self, size: Size) { - self.size -= size; - } - - /// Move the whole span left, maintaining its size. - pub fn move_left(&mut self, size: Size) { - self.index -= size; - } - - /// Move the whole span right, maintaining its size. - pub fn move_right(&mut self, size: Size) { - self.index += size; - } - - /// Move the start index of the span, adjusting the size. - pub fn set_left(&mut self, new_left: Index) { - let end = self.end(); - self.index = new_left; - self.size = end - new_left; - } - - /// Move the end index of the span, adjusting the size. - pub fn set_right(&mut self, new_right: Index) { - self.size = new_right - self.index; - } - - /// Check if this is an empty span (zero elements). - pub fn is_empty(self) -> bool { - self.size.is_empty() - } -} - -impls! { From + &From > for Span { |range| - Span::from_indices(Index::new(range.start), Index::new(range.end)) -}} - -impls! { From + &From for Range { |this| - this.range() -}} - -impl PartialEq> for Span { - fn eq(&self, other: &Range) -> bool { - &self.range() == other - } -} - -impl Display for Span { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}..{}", self.index.value, self.end().value) - } -} - -impl std::ops::Index for str { - type Output = str; - - fn index(&self, span: Span) -> &Self::Output { - // Note: Unwraps in this method are justified, as OOB access panic is expected behavior - // for []-style indexing operations. - let mut iter = self.char_indices(); - let first = iter.nth(span.index.value).unwrap(); - let to_last = span.last().map(|last| last - span.index); - let last_as_nth = to_last.and_then(|i| i.checked_sub(Size::new(1))); - let last = last_as_nth.map_or(first, |nth| iter.nth(nth.value).unwrap()); - if span.is_empty() { - &self[first.0..first.0] - } else { - &self[first.0..last.0 + last.1.len_utf8()] - } - } -} - -impl std::ops::Index for String { - type Output = str; - - fn index(&self, index: Span) -> &Self::Output { - &self.as_str()[index] - } -} - -impl From> for Span { - fn from(range: Range) -> Self { - Span::from_indices(range.start, range.end) - } -} - - -// === Operators for Index and Size === - -impl Add for Index { - type Output = Index; - fn add(self, rhs: Size) -> Index { - Index { value: self.value + rhs.value } - } -} - -impl AddAssign for Index { - fn add_assign(&mut self, rhs: Size) { - *self = *self + rhs; - } -} - -impl Sub for Index { - type Output = Index; - fn sub(self, rhs: Size) -> Index { - Index { value: self.value - rhs.value } - } -} - -impl SubAssign for Index { - fn sub_assign(&mut self, rhs: Size) { - *self = *self - rhs; - } -} - -impl Sub for Index { - type Output = Size; - fn sub(self, rhs: Index) -> Size { - Size { value: self.value - rhs.value } - } -} - - -// === TextLocation === - -/// A position of character in a multiline text. -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct TextLocation { - /// Line index. - pub line: usize, - /// Column is a index of char in given line. - pub column: usize, -} - -/// Short pretty print representation in the form of `line:column`. -impl Display for TextLocation { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{}:{}", self.line, self.column) - } -} - -impl TextLocation { - /// Create location at begin of given line. - pub fn at_line_begin(line_index: usize) -> Self { - TextLocation { line: line_index, column: 0 } - } - - /// Create location at begin of the whole document. - pub fn at_document_begin() -> Self { - TextLocation { line: 0, column: 0 } - } - - /// Create location at and of the whole document. It iterates over all the content. - pub fn at_document_end(content: impl Str) -> Self { - Self::after_chars(content.as_ref().chars()) - } - - /// Convert from index of document with `content`. It iterates over all characters before - /// `index`. - pub fn from_index(content: impl Str, index: Index) -> Self { - let before = content.as_ref().chars().take(index.value); - Self::after_chars(before) - } - - /// Convert self to the text index. - /// - /// This operation involves iterating over content characters and is O(n). - /// - /// Behavior for out-of-bounds index conversion is unspecified but will never panic. - pub fn to_index(self, content: impl AsRef) -> Index { - let line_index = match self.line { - 0 => 0, - _ => { - let content = content.as_ref(); - newline_indices(content).nth(self.line.saturating_sub(1)).map_or(0, |i| i + 1) - } - }; - Index::new(line_index + self.column) - } - - /// Converts a range of indices into a range of TextLocation. It iterates over all characters - /// before range's end. - pub fn convert_range(content: impl Str, range: &Range) -> Range { - let content = content.as_ref(); - Self::from_index(content, range.start)..Self::from_index(content, range.end) - } - - /// Converts a span into a range of TextLocation. It iterates over all characters before range's - /// end. - pub fn convert_span(content: impl Str, span: &Span) -> Range { - let range = span.index..span.end(); - Self::convert_range(content, &range) - } - - /// Converts a range in bytes into a range of TextLocation. It iterates over all characters - /// before range's end. - pub fn convert_byte_range(content: impl Str, range: &Range) -> Range { - let start = Index::convert_byte_index(content.as_ref(), range.start); - let end = Index::convert_byte_index(content.as_ref(), range.end); - Self::convert_range(content, &(start..end)) - } - - fn after_chars(chars: IntoCharsIter) -> Self - where IntoCharsIter: IntoIterator { - let iter = chars.into_iter(); - let len = iter.clone().count(); - let newlines = iter.enumerate().filter(|(_, c)| *c == '\n'); - let newlines_indices = newlines.map(|(i, _)| i); - TextLocation { - line: newlines_indices.clone().count(), - column: len - newlines_indices.last().map_or(0, |i| i + 1), - } - } -} - - - -// ============== -// === Change === -// ============== - -/// A template for structure describing a text operation in one place. -/// -/// This is a generalized template, because we use different representation for both index -/// (e.g. `Index` or `TextLocation`) and inserted content (it may be just String, but also e.g. -/// Vec, or Vec> split by newlines). -#[derive(Clone, Debug, Eq, Hash, PartialEq)] -pub struct TextChangeTemplate { - /// Text fragment to be replaced. If we don't mean to remove any text, this should be an empty - /// range with start set at position there `lines` will be inserted - /// (see `TextChangeTemplate::insert` definition). - pub replaced: Range, - /// Text which replaces fragment described in `replaced` field. - pub inserted: Content, -} - -/// The simplest change representation. -pub type TextChange = TextChangeTemplate; - - -// === Constructors === - -impl TextChangeTemplate { - /// Creates operation which inserts text at given position. - pub fn insert(at: Index, text: Content) -> Self { - TextChangeTemplate { replaced: at..at, inserted: text } - } -} - -impl TextChangeTemplate { - /// Creates operation which replaces text at given range with given string. - pub fn replace(replaced: Range, text: Content) -> Self { - let inserted = text; - TextChangeTemplate { replaced, inserted } - } -} - -impl TextChangeTemplate { - /// Calculate the size of the replaced text. - pub fn replaced_size(&self) -> Index::Output { - self.replaced.end.clone() - self.replaced.start.clone() - } -} - -impl TextChangeTemplate { - /// Calculate the size of the replaced text. - pub fn replaced_span(&self) -> Span { - let index = self.replaced.start; - let size = self.replaced_size(); - Span { index, size } - } - - /// Applies the text edit on given `String` value. - /// - /// # Panics - /// - /// Panics if the replaced span is out of the string value bounds. - pub fn apply(&self, target: &mut String) - where Content: AsRef { - //debug!(logger, "change: {change:?}, my code: \n```\n{code}\n```"); - let replaced_indices = self.replaced.start.value..self.replaced.end.value; - //debug!(logger, "replacing range {replaced_indices:?} with {change.inserted}"); - target.replace_range(replaced_indices, self.inserted.as_ref()); - } - - /// Applies the text edit on string and returns the result. - /// - /// # Panics - /// - /// Panics if the replaced span is out of the string value bounds. - pub fn applied(&self, target: &str) -> String - where Content: AsRef { - let mut target = target.to_string(); - self.apply(&mut target); - target - } -} - -impl TextChangeTemplate { - /// Creates operation which deletes text at given range. - pub fn delete(range: Range) -> Self { - TextChangeTemplate { replaced: range, inserted: default() } - } -} - - - -// ================= -// === Utilities === -// ================= - -/// Get indices (char-counting) of the new line characters. -pub fn newline_indices(text: &str) -> impl Iterator + '_ { - text.chars().enumerate().filter_map(|(ix, c)| (c == '\n').as_some(ix)) -} - -/// Get indices (byte-counting) of the new line characters. -pub fn newline_byte_indices(text: &str) -> impl Iterator + '_ { - text.as_bytes().iter().enumerate().filter_map(|(ix, c)| (*c == b'\n').as_some(ix)) -} - -/// Get indices (byte-counting) of the new line characters, beginning from the text end. -pub fn rev_newline_byte_indices(text: &str) -> impl Iterator + '_ { - text.as_bytes().iter().enumerate().rev().filter_map(|(ix, c)| (*c == b'\n').as_some(ix)) -} - -/// Split text to lines handling both CR and CRLF line endings. -pub fn split_to_lines(text: &str) -> impl Iterator + '_ { - text.split('\n').map(cut_cr_at_end_of_line).map(|s| s.to_string()) -} - -/// Returns slice without carriage return (also known as CR or `'\r'`) at line's end -#[rustversion::since(2020-02-01)] -fn cut_cr_at_end_of_line(from: &str) -> &str { - from.strip_suffix('\r').unwrap_or(from) -} - - - -// ============ -// === Text === -// ============ - -#[cfg(test)] -mod test { - use super::*; - - use super::Index; - - fn assert_round_trip(str: &str, index: Index, location: TextLocation) { - assert_eq!(TextLocation::from_index(str, index), location); - assert_eq!(location.to_index(str), index); - } - - #[test] - fn converting_index_to_location() { - let str = "first\nsecond\nthird"; - assert_round_trip(str, Index::new(0), TextLocation { line: 0, column: 0 }); - assert_round_trip(str, Index::new(5), TextLocation { line: 0, column: 5 }); - assert_round_trip(str, Index::new(6), TextLocation { line: 1, column: 0 }); - assert_round_trip(str, Index::new(9), TextLocation { line: 1, column: 3 }); - assert_round_trip(str, Index::new(12), TextLocation { line: 1, column: 6 }); - assert_round_trip(str, Index::new(13), TextLocation { line: 2, column: 0 }); - assert_round_trip(str, Index::new(18), TextLocation { line: 2, column: 5 }); - - let str = ""; - assert_round_trip(str, Index::new(0), TextLocation { line: 0, column: 0 }); - //assert_eq!(TextLocation {line:0, column:0}, TextLocation::from_index(str,Index::new(0))); - - let str = "\n"; - assert_round_trip(str, Index::new(0), TextLocation { line: 0, column: 0 }); - assert_round_trip(str, Index::new(1), TextLocation { line: 1, column: 0 }); - } - - #[test] - fn text_location_at_end() { - let str = "first\nsecond\nthird"; - assert_eq!(TextLocation::at_document_end(str), TextLocation { line: 2, column: 5 }); - assert_eq!(TextLocation::at_document_end(""), TextLocation { line: 0, column: 0 }); - assert_eq!(TextLocation::at_document_end("\n"), TextLocation { line: 1, column: 0 }); - } - - #[test] - fn indexing_utf8() { - let str = "zazó黄ć gęślą jaźń"; - assert_eq!(&str[Span::from(2..5)], "zó黄"); - assert_eq!(&str[Span::from(5..5)], ""); - assert_eq!(Size::from_text("日本語").value, 3); - assert_eq!(&"日本語"[Span::from(0..0)], ""); - assert_eq!(&"日本語"[Span::from(0..3)], "日本語"); - assert_eq!(&"日本語"[Span::from(0..1)], "日"); - assert_eq!(&"日本語"[Span::from(2..3)], "語"); - } -} diff --git a/lib/rust/ensogl/component/text/Cargo.toml b/lib/rust/ensogl/component/text/Cargo.toml index 91f47070eb..b2e5bcbc0d 100644 --- a/lib/rust/ensogl/component/text/Cargo.toml +++ b/lib/rust/ensogl/component/text/Cargo.toml @@ -11,6 +11,7 @@ crate-type = ["rlib", "cdylib"] enso-frp = { path = "../../../frp" } enso-prelude = { path = "../../../prelude"} enso-shapely = { path = "../../../shapely/impl"} +enso-text = { path = "../../../text" } enso-types = { path = "../../../types" } ensogl-core = { path = "../../core" } ensogl-text-embedded-fonts = { path = "embedded-fonts" } diff --git a/lib/rust/ensogl/component/text/src/buffer.rs b/lib/rust/ensogl/component/text/src/buffer.rs index 69185c5ee9..889121e2dc 100644 --- a/lib/rust/ensogl/component/text/src/buffer.rs +++ b/lib/rust/ensogl/component/text/src/buffer.rs @@ -9,22 +9,22 @@ use crate::prelude::*; // === Exports === // =============== -pub mod data; pub mod style; pub mod view; /// Common traits. pub mod traits { - pub use super::data::traits::*; + pub use enso_text::traits::*; } -pub use data::unit::*; -pub use data::Range; -pub use data::Text; -pub use data::TextCell; pub use style::*; pub use view::*; +pub use enso_text::unit::*; +pub use enso_text::Range; +pub use enso_text::Text; +pub use enso_text::TextCell; + // ============== @@ -103,7 +103,7 @@ impl BufferData { } /// Query style information for the provided range. - pub fn sub_style(&self, range: impl data::RangeBounds) -> Style { + pub fn sub_style(&self, range: impl enso_text::RangeBounds) -> Style { let range = self.crop_byte_range(range); self.style.sub(range) } @@ -119,7 +119,7 @@ impl BufferData { trait Setter { /// Replace the range with the provided value. The exact meaning of this function depends on the /// provided data type. See implementations provided in the `style` module. - fn replace(&self, range: impl data::RangeBounds, data: T); + fn replace(&self, range: impl enso_text::RangeBounds, data: T); } /// Generic setter for default value for metadata like colors, font weight, etc. @@ -130,7 +130,7 @@ trait DefaultSetter { } impl Setter for Buffer { - fn replace(&self, range: impl data::RangeBounds, text: Text) { + fn replace(&self, range: impl enso_text::RangeBounds, text: Text) { let range = self.crop_byte_range(range); let size = text.byte_size(); self.text.replace(range, text); @@ -139,7 +139,7 @@ impl Setter for Buffer { } impl Setter<&Text> for Buffer { - fn replace(&self, range: impl data::RangeBounds, text: &Text) { + fn replace(&self, range: impl enso_text::RangeBounds, text: &Text) { self.replace(range, text.clone()) } } diff --git a/lib/rust/ensogl/component/text/src/buffer/data.rs b/lib/rust/ensogl/component/text/src/buffer/data.rs deleted file mode 100644 index 66cb2cede1..0000000000 --- a/lib/rust/ensogl/component/text/src/buffer/data.rs +++ /dev/null @@ -1,17 +0,0 @@ -//! The data hold by the text buffer. Under the hood it is implemented as an efficient string rope. - -pub mod range; -pub mod rope; -pub mod spans; -pub mod text; -pub mod unit; - -pub use range::Range; -pub use range::RangeBounds; -pub use rope::metric; -pub use rope::Cursor; -pub use spans::Spans; -pub use text::Text; -pub use text::TextCell; -pub use unit::traits; -pub use unit::*; diff --git a/lib/rust/ensogl/component/text/src/buffer/style.rs b/lib/rust/ensogl/component/text/src/buffer/style.rs index 0cb6537f22..f823e1581a 100644 --- a/lib/rust/ensogl/component/text/src/buffer/style.rs +++ b/lib/rust/ensogl/component/text/src/buffer/style.rs @@ -117,14 +117,14 @@ macro_rules! define_styles { $( impl Setter> for Buffer { - fn replace(&self, range:impl data::RangeBounds, data:Option<$field_type>) { + fn replace(&self, range:impl enso_text::RangeBounds, data:Option<$field_type>) { let range = self.crop_byte_range(range); self.data.style.cell.borrow_mut().$field.replace_resize(range,range.size(),data) } } impl Setter<$field_type> for Buffer { - fn replace(&self, range:impl data::RangeBounds, data:$field_type) { + fn replace(&self, range:impl enso_text::RangeBounds, data:$field_type) { self.replace(range,Some(data)) } } @@ -174,7 +174,7 @@ impl StyleIterator { #[derive(Clone, Debug, Default)] #[allow(missing_docs)] pub struct Property { - pub spans: data::Spans>, + pub spans: enso_text::Spans>, default: T, } @@ -202,7 +202,7 @@ impl Property { // === Deref === impl Deref for Property { - type Target = data::Spans>; + type Target = enso_text::Spans>; fn deref(&self) -> &Self::Target { &self.spans } diff --git a/lib/rust/ensogl/component/text/src/buffer/view.rs b/lib/rust/ensogl/component/text/src/buffer/view.rs index 6eca67fe22..7c369c4c47 100644 --- a/lib/rust/ensogl/component/text/src/buffer/view.rs +++ b/lib/rust/ensogl/component/text/src/buffer/view.rs @@ -10,9 +10,6 @@ pub use movement::*; pub use selection::Selection; use crate::buffer; -use crate::buffer::data::text::BoundsError; -use crate::buffer::data::unit::*; -use crate::buffer::data::Text; use crate::buffer::style; use crate::buffer::style::Style; use crate::buffer::Buffer; @@ -20,6 +17,10 @@ use crate::buffer::DefaultSetter; use crate::buffer::Setter; use enso_frp as frp; +use enso_text::text::BoundsError; +use enso_text::text::Change; +use enso_text::unit::*; +use enso_text::Text; use ensogl_core::data::color; @@ -58,15 +59,6 @@ pub struct HistoryData { // === Changes === // =============== -/// A single change done to the text content. -#[derive(Clone, Debug, Default)] -pub struct Change { - /// Range of old text being replaced. - pub range: buffer::Range, - /// The text inserted in place of `range`. - pub text: Text, -} - /// The summary of single text modification, usually returned by `modify`-like functions in /// `ViewBuffer`. #[derive(Clone, Debug, Default)] diff --git a/lib/rust/ensogl/component/text/src/buffer/view/movement.rs b/lib/rust/ensogl/component/text/src/buffer/view/movement.rs index bf5ec9d904..b87276dbf8 100644 --- a/lib/rust/ensogl/component/text/src/buffer/view/movement.rs +++ b/lib/rust/ensogl/component/text/src/buffer/view/movement.rs @@ -1,9 +1,10 @@ //! Text cursor transform implementation. -use super::*; -use crate::buffer::data::unit::*; use crate::buffer::view::selection; use crate::buffer::view::word::WordCursor; +use crate::buffer::view::*; + +use enso_text::unit::*; diff --git a/lib/rust/ensogl/component/text/src/buffer/view/selection.rs b/lib/rust/ensogl/component/text/src/buffer/view/selection.rs index 2100a78a06..5c0fa8cbfd 100644 --- a/lib/rust/ensogl/component/text/src/buffer/view/selection.rs +++ b/lib/rust/ensogl/component/text/src/buffer/view/selection.rs @@ -2,8 +2,8 @@ use crate::prelude::*; -use crate::buffer::data::unit::*; -use crate::buffer::data::Range; +use enso_text::unit::*; +use enso_text::Range; diff --git a/lib/rust/ensogl/component/text/src/buffer/view/word.rs b/lib/rust/ensogl/component/text/src/buffer/view/word.rs index 59ecc1c93f..d1db0d8687 100644 --- a/lib/rust/ensogl/component/text/src/buffer/view/word.rs +++ b/lib/rust/ensogl/component/text/src/buffer/view/word.rs @@ -1,9 +1,9 @@ //! Implementation of a cursor allowing word-based traversal. - -use crate::buffer::data::rope; -use crate::buffer::data::unit::*; use crate::prelude::*; +use enso_text::rope; +use enso_text::unit::*; + // ================== diff --git a/lib/rust/ensogl/component/text/src/component/area.rs b/lib/rust/ensogl/component/text/src/component/area.rs index b0c2675d3d..e7a5bea7d1 100644 --- a/lib/rust/ensogl/component/text/src/component/area.rs +++ b/lib/rust/ensogl/component/text/src/component/area.rs @@ -4,7 +4,6 @@ use crate::prelude::*; use crate::buffer; -use crate::buffer::data::unit::*; use crate::buffer::style; use crate::buffer::Text; use crate::buffer::Transform; @@ -17,6 +16,7 @@ use crate::typeface::pen; use enso_frp as frp; use enso_frp::io::keyboard::Key; +use enso_text::unit::*; use ensogl_core::application; use ensogl_core::application::shortcut; use ensogl_core::application::Application; @@ -261,7 +261,7 @@ ensogl_core::define_endpoints! { pointer_style (cursor::Style), width (f32), height (f32), - changed (Vec), + changed (Vec), content (Text), hovered (bool), selection_color (color::Rgb), @@ -616,7 +616,7 @@ impl AreaModel { let mut selection_map = self.selection_map.borrow_mut(); let mut new_selection_map = SelectionMap::default(); for sel in selections { - let sel = self.buffer.snap_selection(*sel); + let sel = self.snap_selection(*sel); let id = sel.id; let start_line = sel.start.line.as_usize(); let end_line = sel.end.line.as_usize(); @@ -873,6 +873,16 @@ impl AreaModel { _ => None, } } + + /// Constrain the selection to values fitting inside of the current text buffer. + fn snap_selection( + &self, + selection: buffer::selection::Selection, + ) -> buffer::selection::Selection { + let start = self.buffer.snap_location(selection.start); + let end = self.buffer.snap_location(selection.end); + selection.with_start(start).with_end(end) + } } impl display::Object for AreaModel { diff --git a/lib/rust/ensogl/component/text/src/lib.rs b/lib/rust/ensogl/component/text/src/lib.rs index fadb85e7cf..15e471e05e 100644 --- a/lib/rust/ensogl/component/text/src/lib.rs +++ b/lib/rust/ensogl/component/text/src/lib.rs @@ -1,67 +1,7 @@ //! Ensogl text rendering implementation. //! -//! To properly understand the implementation and its assumptions, you have to know a lot about -//! text encoding in different formats and text rendering. Especially, these links are very useful: -//! - https://gankra.github.io/blah/text-hates-you -//! - https://lord.io/blog/2019/text-editing-hates-you-too -//! - https://utf8everywhere.org -//! - https://docs.google.com/document/d/1wuzzMOvKOJw93SWZAqoim1VUl9mloUxE0W6Ki_G23tw/edit (copy) https://docs.google.com/document/d/1D7iWPWQHrWY276WPVFZTi8JJqUnTcIVJs4dlG0IdCp8 -//! -//! As a very short introduction, there are several common names used in this implementation: -//! -//! - **Code point** Any numerical value in the Unicode codespace. For instance: U+3243F. -//! -//! - **Code unit** The minimal bit combination that can represent a unit of encoded text. For -//! example, UTF-8, UTF-16 and UTF-32 use 8-bit, 16-bit and 32-bit code units respectively. The -//! above code point will be encoded as four code units ‘f0 b2 90 bf’ in UTF-8, two code units -//! ‘d889 dc3f’ in UTF-16 and as a single code unit ‘0003243f’ in UTF-32. Note that these are just -//! sequences of groups of bits; how they are stored on an octet-oriented media depends on the -//! endianness of the particular encoding. When storing the above UTF-16 code units, they will be -//! converted to ‘d8 89 dc 3f’ in UTF-16BE and to ‘89 d8 3f dc’ in UTF-16LE. -//! -//! - **Abstract character** A unit of information used for the organization, control, or -//! representation of textual data. The standard says: -//! -//! > For the Unicode Standard, [...] the repertoire is inherently open. Because Unicode is a -//! > universal encoding, any abstract character that could ever be encoded is a potential -//! > candidate to be encoded, regardless of whether the character is currently known. -//! -//! The definition is indeed abstract. Whatever one can think of as a character—is an abstract -//! character. For example, "tengwar letter ungwe" is an abstract character, although it is not -//! yet representable in Unicode. -//! -//! - **Encoded character, Coded character** A mapping between a code point and an abstract -//! character. For example, U+1F428 is a coded character which represents the abstract character -//! . -//! -//! This mapping is neither total, nor injective, nor surjective: -//! - Surragates, noncharacters and unassigned code points do not correspond to abstract -//! characters at all. -//! - Some abstract characters can be encoded by different code points; U+03A9 greek capital -//! letter omega and U+2126 ohm sign both correspond to the same abstract character ‘Ω’, and -//! must be treated identically. -//! - Some abstract characters cannot be encoded by a single code point. These are represented by -//! sequences of coded characters. For example, the only way to represent the abstract character -//! is by the sequence U+044E cyrillic small letter yu -//! followed by U+0301 combining acute accent. -//! -//! Moreover, for some abstract characters, there exist representations using multiple code -//! points, in addition to the single coded character form. The abstract character ǵ can be coded -//! by the single code point U+01F5 latin small letter g with acute, or by the sequence -//! . -//! -//! - **User-perceived character** Whatever the end user thinks of as a character. This notion is -//! language dependent. For instance, ‘ch’ is two letters in English and Latin, but considered to -//! be one letter in Czech and Slovak. -//! -//! - **Grapheme cluster** A sequence of coded characters that ‘should be kept together’. Grapheme -//! clusters approximate the notion of user-perceived characters in a language independent way. -//! They are used for, e.g., cursor movement and selection. -//! -//! - **Glyph** A particular shape within a font. Fonts are collections of glyphs designed by a type -//! designer. It’s the text shaping and rendering engine responsibility to convert a sequence of -//! code points into a sequence of glyphs within the specified font. The rules for this conversion -//! might be complicated, locale dependent, and are beyond the scope of the Unicode standard. +//! To properly understand the implementation and its assumptions, please read the documentation +//! of [`enso_text`] crate carefully. #![feature(trait_alias)] #![feature(type_ascription)] diff --git a/lib/rust/ensogl/core/Cargo.toml b/lib/rust/ensogl/core/Cargo.toml index d5405a4316..d4bd87e65e 100644 --- a/lib/rust/ensogl/core/Cargo.toml +++ b/lib/rust/ensogl/core/Cargo.toml @@ -15,7 +15,7 @@ no_unboxed_callbacks = [] [dependencies] code-builder = { path = "../../code-builder" } enso-callback = { path = "../../callback" } -enso-data = { path = "../../data"} +enso-data-structures = { path = "../../data-structures" } enso-frp = { path = "../../frp" } enso-generics = { path = "../../generics"} enso-logger = { path = "../../logger"} diff --git a/lib/rust/ensogl/core/src/data.rs b/lib/rust/ensogl/core/src/data.rs index 5c60251089..3ac454d240 100644 --- a/lib/rust/ensogl/core/src/data.rs +++ b/lib/rust/ensogl/core/src/data.rs @@ -7,8 +7,8 @@ pub mod function; pub mod mix; pub mod seq; -pub use enso_data::dependency_graph; -pub use enso_data::hash_map_tree; -pub use enso_data::hash_map_tree::HashMapTree; -pub use enso_data::index::Index; -pub use enso_data::opt_vec::OptVec; +pub use enso_data_structures::dependency_graph; +pub use enso_data_structures::hash_map_tree; +pub use enso_data_structures::hash_map_tree::HashMapTree; +pub use enso_data_structures::index::Index; +pub use enso_data_structures::opt_vec::OptVec; diff --git a/lib/rust/ensogl/core/src/display/scene/layer.rs b/lib/rust/ensogl/core/src/display/scene/layer.rs index 3e802ecec4..e582c19182 100644 --- a/lib/rust/ensogl/core/src/display/scene/layer.rs +++ b/lib/rust/ensogl/core/src/display/scene/layer.rs @@ -16,7 +16,7 @@ use crate::display::shape::ShapeSystemInstance; use crate::display::symbol::SymbolId; use crate::system::gpu::data::attribute; -use enso_data::dependency_graph::DependencyGraph; +use enso_data_structures::dependency_graph::DependencyGraph; use enso_shapely::shared; use smallvec::alloc::collections::BTreeSet; use std::any::TypeId; diff --git a/lib/rust/ensogl/core/src/lib.rs b/lib/rust/ensogl/core/src/lib.rs index a2883e1368..a72aeb7432 100644 --- a/lib/rust/ensogl/core/src/lib.rs +++ b/lib/rust/ensogl/core/src/lib.rs @@ -64,7 +64,7 @@ pub mod prelude { pub use super::types::*; pub use crate::data::container::AddMut; pub use crate::shapes_order_dependencies; - pub use enso_data as data; + pub use enso_data_structures as data; pub use enso_logger as logger; pub use enso_logger::AnyLogger; pub use enso_logger::DefaultWarningLogger as Logger; diff --git a/lib/rust/ensogl/core/src/system/gpu/data/attribute.rs b/lib/rust/ensogl/core/src/system/gpu/data/attribute.rs index fc0b5c0faa..2fd532940a 100644 --- a/lib/rust/ensogl/core/src/system/gpu/data/attribute.rs +++ b/lib/rust/ensogl/core/src/system/gpu/data/attribute.rs @@ -64,25 +64,25 @@ shared! { AttributeScope /// possible: /// /// 1. Keeping track of all free indexes in a sorted container (like [`BTreeSet`] or the specialized -/// [`enso_data::Diet`] and in case the biggest index is freed, iterating over the indexes and +/// [`enso_data_structures::Diet`] and in case the biggest index is freed, iterating over the indexes and /// freeing as much as possible. This solution has the downside that the indexes are stored in /// order, so insertion and deletion is much slower than when using unordered [`Vec`]. Also, this /// does not work well if a instance with a big ID is kept alive, as it will prevent memory of -/// all instances with smaller IDs from being cleaned. See benchmarks in the `enso_data::diet` +/// all instances with smaller IDs from being cleaned. See benchmarks in the `enso_data_structures::diet` /// module to learn more. /// /// 2. Keeping track of all free indexes in an unordered container and in case the biggest index is /// freed, sorting the container and freeing the memory. As an optimization, the sorting might /// be performed after the frame (or several frames) was drawn. It's not obvious when this /// solution will be slower / faster than the solution (1), but time differences may be big. -/// See benchmarks in the `enso_data::diet` module to learn more. +/// See benchmarks in the `enso_data_structures::diet` module to learn more. /// /// 3. Keeping track of all free indexes and in case a lot of them are free, re-ordering the /// instances and freeing the memory. This would require all instance-users (like [`Sprite`]s) to /// keep instance IDs in some kind of `Rc>`, which may slow attrib read/write down. /// However, this solution works well even if an instance with a big ID is kept alive. It's not /// obvious when this solution will be slower / faster than other ones, but time differences may -/// be big. See benchmarks in the `enso_data::diet` module to learn more. +/// be big. See benchmarks in the `enso_data_structures::diet` module to learn more. /// /// To learn more about these mechanisms and connected design decisions, read the docs of /// [`Symbol`], especially the "Changing attribute & GPU memory consumption" sections. diff --git a/lib/rust/ensogl/example/Cargo.toml b/lib/rust/ensogl/example/Cargo.toml index 12d7dc4c30..7e9eebc56b 100644 --- a/lib/rust/ensogl/example/Cargo.toml +++ b/lib/rust/ensogl/example/Cargo.toml @@ -12,6 +12,7 @@ enso-frp = { path = "../../frp" } enso-logger = { path = "../../logger"} enso-prelude = { path = "../../prelude"} enso-shapely = { path = "../../shapely/impl"} +enso-text = { path = "../../text" } ensogl-core = { path = "../core" } ensogl-gui-component = { path = "../component/gui" } ensogl-text = { path = "../component/text" } diff --git a/lib/rust/ensogl/example/src/list_view.rs b/lib/rust/ensogl/example/src/list_view.rs index 789193e571..29e043b3b4 100644 --- a/lib/rust/ensogl/example/src/list_view.rs +++ b/lib/rust/ensogl/example/src/list_view.rs @@ -2,12 +2,12 @@ use crate::prelude::*; +use enso_text::unit::Bytes; use ensogl_core::application::Application; use ensogl_core::display::object::ObjectOps; use ensogl_core::system::web; use ensogl_gui_component::list_view; use ensogl_hardcoded_theme as theme; -use ensogl_text::buffer::data::unit::Bytes; use ensogl_text_msdf_sys::run_once_initialized; use logger::TraceLogger as Logger; use wasm_bindgen::prelude::*; diff --git a/lib/rust/parser/Cargo.toml b/lib/rust/parser/Cargo.toml index 860a7ee819..ff4307583e 100644 --- a/lib/rust/parser/Cargo.toml +++ b/lib/rust/parser/Cargo.toml @@ -12,7 +12,7 @@ test = true bench = true [dependencies] -enso-data = { version = "0.2.0", path = "../data" } +enso-data-structures = { version = "0.2.0", path = "../data-structures" } enso-logger = { version = "0.3.0", path = "../logger" } enso-prelude = { version = "0.2.0", path = "../prelude" } lexer = { version = "0.1.0", path = "lexer/generation" } diff --git a/lib/rust/parser/src/macros/registry.rs b/lib/rust/parser/src/macros/registry.rs index 32bec988d4..6381a1a4b1 100644 --- a/lib/rust/parser/src/macros/registry.rs +++ b/lib/rust/parser/src/macros/registry.rs @@ -1,7 +1,7 @@ //! The macro registry that can be queried during the process of macro resolution. use crate::prelude::*; -use enso_data::hash_map_tree::*; +use enso_data_structures::hash_map_tree::*; use crate::macros::definition::Definition; use crate::macros::literal::Literal; diff --git a/lib/rust/text/Cargo.toml b/lib/rust/text/Cargo.toml new file mode 100644 index 0000000000..6f4d6936e6 --- /dev/null +++ b/lib/rust/text/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "enso-text" +version = "0.1.0" +authors = ["Enso Team "] +edition = "2018" + +[lib] +crate-type = ["rlib", "cdylib"] + +[dependencies] +enso-prelude = { path = "../prelude"} +enso-types = { path = "../types" } +xi-rope = { version = "0.3.0" } +serde = "1.0" diff --git a/lib/rust/text/src/lib.rs b/lib/rust/text/src/lib.rs new file mode 100644 index 0000000000..cbd013a600 --- /dev/null +++ b/lib/rust/text/src/lib.rs @@ -0,0 +1,109 @@ +//! The text operation utilities. +//! +//! This crate contains several utility structures for operations on text: +//! * The effective [`Text`] structure, optimized for middle-insertions, based on the rope +//! structure. +//! * A set of units, forcing the developers to think about how the text positions are expressed (in +//! chars, or in bytes? Or maybe in _grapheme clusters_)? +//! * An alternative [`Range`] with text-related trait implementations + copyable. +//! * Interval tree structure [`Spans`] useful for text rich decorations. +//! +//! To properly understand the implementation and its assumptions, you have to know a lot about +//! text encoding in different formats and text rendering. Especially, these links are very useful: +//! - https://gankra.github.io/blah/text-hates-you +//! - https://lord.io/blog/2019/text-editing-hates-you-too +//! - https://utf8everywhere.org +//! - https://docs.google.com/document/d/1wuzzMOvKOJw93SWZAqoim1VUl9mloUxE0W6Ki_G23tw/edit (copy) https://docs.google.com/document/d/1D7iWPWQHrWY276WPVFZTi8JJqUnTcIVJs4dlG0IdCp8 +//! +//! As a very short introduction, there are several common names used in this implementation: +//! +//! - **Code point** Any numerical value in the Unicode codespace. For instance: U+3243F. +//! +//! - **Code unit** The minimal bit combination that can represent a unit of encoded text. For +//! example, UTF-8, UTF-16 and UTF-32 use 8-bit, 16-bit and 32-bit code units respectively. The +//! above code point will be encoded as four code units ‘f0 b2 90 bf’ in UTF-8, two code units +//! ‘d889 dc3f’ in UTF-16 and as a single code unit ‘0003243f’ in UTF-32. Note that these are just +//! sequences of groups of bits; how they are stored on an octet-oriented media depends on the +//! endianness of the particular encoding. When storing the above UTF-16 code units, they will be +//! converted to ‘d8 89 dc 3f’ in UTF-16BE and to ‘89 d8 3f dc’ in UTF-16LE. +//! +//! - **Abstract character** A unit of information used for the organization, control, or +//! representation of textual data. The standard says: +//! +//! > For the Unicode Standard, [...] the repertoire is inherently open. Because Unicode is a +//! > universal encoding, any abstract character that could ever be encoded is a potential +//! > candidate to be encoded, regardless of whether the character is currently known. +//! +//! The definition is indeed abstract. Whatever one can think of as a character—is an abstract +//! character. For example, "tengwar letter ungwe" is an abstract character, although it is not +//! yet representable in Unicode. +//! +//! - **Encoded character, Coded character** A mapping between a code point and an abstract +//! character. For example, U+1F428 is a coded character which represents the abstract character +//! . +//! +//! This mapping is neither total, nor injective, nor surjective: +//! - Surragates, noncharacters and unassigned code points do not correspond to abstract +//! characters at all. +//! - Some abstract characters can be encoded by different code points; U+03A9 greek capital +//! letter omega and U+2126 ohm sign both correspond to the same abstract character ‘Ω’, and +//! must be treated identically. +//! - Some abstract characters cannot be encoded by a single code point. These are represented by +//! sequences of coded characters. For example, the only way to represent the abstract character +//! is by the sequence U+044E cyrillic small letter yu +//! followed by U+0301 combining acute accent. +//! +//! Moreover, for some abstract characters, there exist representations using multiple code +//! points, in addition to the single coded character form. The abstract character ǵ can be coded +//! by the single code point U+01F5 latin small letter g with acute, or by the sequence +//! . +//! +//! - **User-perceived character** Whatever the end user thinks of as a character. This notion is +//! language dependent. For instance, ‘ch’ is two letters in English and Latin, but considered to +//! be one letter in Czech and Slovak. +//! +//! - **Grapheme cluster** A sequence of coded characters that ‘should be kept together’. Grapheme +//! clusters approximate the notion of user-perceived characters in a language independent way. +//! They are used for, e.g., cursor movement and selection. +//! +//! - **Glyph** A particular shape within a font. Fonts are collections of glyphs designed by a type +//! designer. It’s the text shaping and rendering engine responsibility to convert a sequence of +//! code points into a sequence of glyphs within the specified font. The rules for this conversion +//! might be complicated, locale dependent, and are beyond the scope of the Unicode standard. +//! +//! The Rust and our structures uses UTF-8 encoding. The Rust's [`char`] primitive type corresponds +//! roughly with the *Code points* (to be precise, it corresponds with *scalar values* which are +//! "code points except high-surrogate and low-surrogate code points" - but the surrogate code +//! points are not used uin UTF-8 anyway). + +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unsafe_code)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] + +pub mod range; +pub mod rope; +pub mod spans; +pub mod text; +pub mod unit; + +pub use range::Range; +pub use range::RangeBounds; +pub use rope::metric; +pub use rope::Cursor; +pub use spans::Spans; +pub use text::Change; +pub use text::Text; +pub use text::TextCell; +pub use unit::traits; +pub use unit::*; + +/// Commonly used utilities. +pub mod prelude { + pub use enso_prelude::*; + pub use enso_types::*; +} diff --git a/lib/rust/ensogl/component/text/src/buffer/data/range.rs b/lib/rust/text/src/range.rs similarity index 67% rename from lib/rust/ensogl/component/text/src/buffer/data/range.rs rename to lib/rust/text/src/range.rs index 1b62eb1a65..1d61c6eb04 100644 --- a/lib/rust/ensogl/component/text/src/buffer/data/range.rs +++ b/lib/rust/text/src/range.rs @@ -3,8 +3,8 @@ use crate::prelude::*; -use super::rope; -use super::unit::*; +use crate::rope; +use crate::unit::*; @@ -16,7 +16,7 @@ use super::unit::*; /// /// Unlike `std::ops::Range`, this type implements `Copy`, and contains text-related trait /// implementations. -#[derive(Clone, Copy, Default, PartialEq, Eq)] +#[derive(Clone, Copy, Default, PartialEq, Eq, Hash)] #[allow(missing_docs)] pub struct Range { pub start: T, @@ -49,10 +49,22 @@ impl Range { Self { start, end } } + /// Return new range with the `offset` subtracted from both ends. + pub fn moved_left(&self, offset: T) -> Self + where T: Clone + Sub { + Self { start: self.start.clone() - offset.clone(), end: self.end.clone() - offset } + } + + /// Return new range with the `offset` added to both ends. + pub fn moved_right(&self, offset: T) -> Self + where T: Clone + Add { + Self { start: self.start.clone() + offset.clone(), end: self.end.clone() + offset } + } + /// Map both values with the provided function. - pub fn map(&self, f: impl Fn(T) -> T) -> Self + pub fn map(&self, f: impl Fn(T) -> U) -> Range where T: Clone { - self.with_start(f(self.start.clone())).with_end(f(self.end.clone())) + Range { start: f(self.start.clone()), end: f(self.end.clone()) } } /// Map the start value with the provided function. @@ -66,6 +78,20 @@ impl Range { where T: Clone { self.with_end(f(self.end.clone())) } + + /// Check if the range contains the given value. + pub fn contains(&self, value: &U) -> bool + where + T: PartialOrd, + U: PartialOrd, { + value >= &self.start && value < &self.end + } + + /// Check if the range contains all values from `other` range. + pub fn contains_range(&self, other: &Range) -> bool + where T: PartialOrd { + self.start <= other.start && self.end >= other.end + } } @@ -93,10 +119,16 @@ impl Debug for Range { } } -impl From> for Range { - fn from(range: std::ops::Range) -> Range { +impl> From> for Range { + fn from(range: std::ops::Range) -> Range { let std::ops::Range { start, end } = range; - Range { start, end } + Range { start: start.into(), end: end.into() } + } +} + +impl> PartialEq> for Range { + fn eq(&self, other: &std::ops::Range) -> bool { + (&self.start, &self.end) == (&other.start, &other.end) } } @@ -121,6 +153,24 @@ impl From> for Range { } } +impl Index> for str { + type Output = str; + + fn index(&self, index: Range) -> &Self::Output { + let start = index.start.as_usize(); + let end = index.end.as_usize(); + &self[start..end] + } +} + +impl Index> for String { + type Output = str; + + fn index(&self, index: Range) -> &Self::Output { + &self.as_str()[index] + } +} + // === Conversions === diff --git a/lib/rust/ensogl/component/text/src/buffer/data/rope.rs b/lib/rust/text/src/rope.rs similarity index 100% rename from lib/rust/ensogl/component/text/src/buffer/data/rope.rs rename to lib/rust/text/src/rope.rs diff --git a/lib/rust/ensogl/component/text/src/buffer/data/spans.rs b/lib/rust/text/src/spans.rs similarity index 96% rename from lib/rust/ensogl/component/text/src/buffer/data/spans.rs rename to lib/rust/text/src/spans.rs index 1f5baf7c50..9be973603c 100644 --- a/lib/rust/ensogl/component/text/src/buffer/data/spans.rs +++ b/lib/rust/text/src/spans.rs @@ -2,9 +2,9 @@ use crate::prelude::*; -use super::range::Range; -use super::rope; -use super::unit::*; +use crate::range::Range; +use crate::rope; +use crate::unit::*; diff --git a/lib/rust/ensogl/component/text/src/buffer/data/text.rs b/lib/rust/text/src/text.rs similarity index 88% rename from lib/rust/ensogl/component/text/src/buffer/data/text.rs rename to lib/rust/text/src/text.rs index 013cc7c68b..e935f00b46 100644 --- a/lib/rust/ensogl/component/text/src/buffer/data/text.rs +++ b/lib/rust/text/src/text.rs @@ -2,13 +2,14 @@ use crate::prelude::*; -use super::range::Range; -use super::range::RangeBounds; -use super::rope; -use super::rope::Rope; -use super::unit::*; -use crate::selection::Selection; +use crate::range::Range; +use crate::range::RangeBounds; +use crate::rope; +use crate::rope::Rope; +use crate::unit::*; +use crate::prelude::fmt::Formatter; +use enso_types::min; // ============ @@ -109,11 +110,16 @@ impl Text { } } - /// Constrain the selection to values valid inside of the current text buffer. - pub fn snap_selection(&self, selection: Selection) -> Selection { - let start = self.snap_location(selection.start); - let end = self.snap_location(selection.end); - selection.with_start(start).with_end(end) + /// Return the offset to the next codepoint if any. See the [`crate`] documentation to learn + /// more about codepoints. + pub fn next_codepoint_offset(&self, offset: Bytes) -> Option { + self.rope.next_codepoint_offset(offset.as_usize()).map(|t| Bytes(t as i32)) + } + + /// Return the offset to the previous codepoint if any. See the [`crate`] documentation to learn + /// more about codepoints. + pub fn prev_codepoint_offset(&self, offset: Bytes) -> Option { + self.rope.prev_codepoint_offset(offset.as_usize()).map(|t| Bytes(t as i32)) } /// Return the offset to the next grapheme if any. See the documentation of the library to @@ -142,6 +148,13 @@ impl Text { let range = self.crop_byte_range(range); self.rope.edit(range.into_rope_interval(), text.rope); } + + /// Apply the given change on the current text. + /// + /// See also [`Self::replace`]. + pub fn apply_change(&mut self, change: Change>) { + self.replace(change.range, change.text) + } } @@ -390,7 +403,7 @@ impl Text { let mut offset = self.byte_offset_of_line_index(line_index)?; let mut column = 0.column(); while offset < tgt_offset { - match self.next_grapheme_offset(offset) { + match self.next_codepoint_offset(offset) { None => return Err(BoundsError(TooBig)), Some(off) => { offset = off; @@ -567,6 +580,36 @@ impl Text { } +// === Common Prefix and Suffix === + +/// The return value of [`Text::common_prefix_and_suffix`] function. +#[allow(missing_docs)] +#[derive(Clone, Copy, Debug, Default)] +pub struct CommonPrefixAndSuffix { + pub prefix: Bytes, + pub suffix: Bytes, +} + +impl Text { + /// Returns the length in bytes of common prefix and suffix. + /// + /// The prefix and suffix lengths does not overlap, so the sum of their length will not exceed + /// the length of both texts. + pub fn common_prefix_and_suffix(&self, other: &Text) -> CommonPrefixAndSuffix { + let mut scanner = xi_rope::compare::RopeScanner::new(&self.rope, &other.rope); + let (prefix, suffix) = scanner.find_min_diff_range(); + CommonPrefixAndSuffix { prefix: prefix.into(), suffix: suffix.into() } + } +} + +// === Display === + +impl Display for Text { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(&self.rope, f) + } +} + // =================== // === Conversions === @@ -697,10 +740,6 @@ impl TextCell { self.cell.borrow().snap_location(location) } - pub fn snap_selection(&self, selection: Selection) -> Selection { - self.cell.borrow().snap_selection(selection) - } - pub fn next_grapheme_offset(&self, offset: Bytes) -> Option { self.cell.borrow().next_grapheme_offset(offset) } @@ -860,3 +899,53 @@ impl TextCell { self.cell.borrow().location_of_byte_offset_snapped(offset) } } + + + +// ============== +// === Change === +// ============== + +/// A single change done to the text content. +#[derive(Clone, Debug, Default, Eq, PartialEq)] +pub struct Change { + /// Range of old text being replaced. + pub range: Range, + /// The text inserted in place of `range`. + pub text: String, +} + + +impl Change { + /// Create a change being an insert of the `text` at given `offset` (no text will be removed). + pub fn inserted(offset: Metric, text: String) -> Self + where Metric: Copy { + Self { range: Range::new(offset, offset), text } + } + + /// Return new [`Change`] with copied range and a reference to self's string. + pub fn as_ref(&self) -> Change + where Metric: Copy { + Change { range: self.range, text: &self.text } + } +} + + +// === Applying Change === + +impl> Change { + /// Apply the change on the given string. + pub fn apply(&self, target: &mut String) -> Result<(), BoundsError> { + let start_byte = self.range.start.as_usize(); + let end_byte = self.range.end.as_usize(); + target.replace_range(start_byte..end_byte, self.text.as_ref()); + Ok(()) + } + + /// Return a new string being a `target` with this change applied. + pub fn applied(&self, target: &str) -> Result { + let mut string = target.to_owned(); + self.apply(&mut string)?; + Ok(string) + } +} diff --git a/lib/rust/ensogl/component/text/src/buffer/data/unit.rs b/lib/rust/text/src/unit.rs similarity index 60% rename from lib/rust/ensogl/component/text/src/buffer/data/unit.rs rename to lib/rust/text/src/unit.rs index 026767c091..d1557e7b10 100644 --- a/lib/rust/ensogl/component/text/src/buffer/data/unit.rs +++ b/lib/rust/text/src/unit.rs @@ -2,6 +2,7 @@ //! level dependencies in the whole library. use crate::prelude::*; + use enso_types::newtype; use enso_types::unit; @@ -14,6 +15,7 @@ use enso_types::unit; /// Common traits. pub mod traits { pub use super::bytes::Into as TRAIT_bytes_into; + pub use super::chars::Into as TRAIT_chars_into; pub use super::column::Into as TRAIT_column_into; pub use super::line::Into as TRAIT_line_into; } @@ -60,6 +62,60 @@ impl From<&usize> for Bytes { +// ============= +// === Chars === +// ============= + +unit! { +/// An offset in the buffer in Rust's chars (being roughly the Unicode code points. +/// +/// See [`crate`] documentation to know more about codepoints. +Chars::chars(i32) +} + +impl Chars { + /// Saturating conversion to `usize`. + pub fn as_usize(self) -> usize { + self.value.max(0) as usize + } +} + +impl> chars::Into for Range { + type Output = Range; + fn chars(self) -> Self::Output { + let start = self.start.into(); + let end = self.end.into(); + Range { start, end } + } +} + +impl From for Chars { + fn from(t: usize) -> Self { + (t as i32).into() + } +} + +impl From<&usize> for Chars { + fn from(t: &usize) -> Self { + (*t as i32).into() + } +} + +impl serde::Serialize for Chars { + fn serialize(&self, serializer: S) -> Result + where S: serde::Serializer { + i32::from(self).serialize(serializer) + } +} + +impl<'de> serde::Deserialize<'de> for Chars { + fn deserialize(deserializer: D) -> Result + where D: serde::Deserializer<'de> { + i32::deserialize(deserializer).map(|val| val.into()) + } +} + + // ============ // === Line === // ============ @@ -104,7 +160,17 @@ impl From<&usize> for Line { // TODO: Improvement idea. Create `i32Saturated` type which will have all operations saturated. // This will make this unit safer. unit! { -/// A type representing horizontal measurements expressed as number of grapheme clusters. +/// A type representing horizontal measurements expressed as number of Rust's chars (being roughly +/// the Unicode code points. +/// +/// See [`crate`] documentation to know more about codepoints. +/// +/// Note: The reason of representing Column as a code point is that our text rendering engine +/// display each codepoint as a separate glyph (so it does not support the _grapheme clusters_). +/// This should be fixed when doing +/// https://www.pivotaltracker.com/n/projects/2539304/stories/180392693: after that, the column +/// should be measured in grapheme clusters, to have Text Area cursors behave correctly (and the +/// usages shall be then fixed, e.g. [`crate::text::Text::column_of_byte_offset`]). Column::column(i32) } @@ -141,8 +207,8 @@ impl From<&usize> for Column { newtype! { /// A type representing 2d measurements. Location { - line : Line, - column : Column, + line: Line, + column: Column, }} impl Location { diff --git a/lib/rust/types/src/unit.rs b/lib/rust/types/src/unit.rs index 8010aee73f..46f33c9684 100644 --- a/lib/rust/types/src/unit.rs +++ b/lib/rust/types/src/unit.rs @@ -62,6 +62,7 @@ macro_rules! unsigned_unit { pub mod $vname { use super::*; use std::ops::AddAssign; + use std::ops::SubAssign; $crate::newtype_struct! {$(#$meta)* $name {value : $field_type}} $crate::impl_UNIT_x_UNIT_to_UNIT! {Sub::sub for $name} @@ -72,6 +73,9 @@ macro_rules! unsigned_unit { $crate::impl_FIELD_x_UNIT_to_UNIT! {Mul::mul for $name :: $field_type} $crate::impl_UNIT_x_UNIT_to_FIELD! {Div::div for $name :: $field_type} $crate::impl_UNIT_x_UNIT! {AddAssign::add_assign for $name} + $crate::impl_UNIT_x_UNIT! {SubAssign::sub_assign for $name} + + $crate::impl_unit_display! {$name::value} pub trait Into { type Output; @@ -102,12 +106,16 @@ macro_rules! unsigned_unit_proxy { pub mod $vname { use super::*; use std::ops::AddAssign; + use std::ops::SubAssign; $crate::newtype_struct! {$(#$meta)* $name {value : $field_type}} $crate::impl_UNIT_x_UNIT_to_UNIT! {Sub::sub for $name} $crate::impl_UNIT_x_UNIT_to_UNIT! {Add::add for $name} $crate::impl_UNIT_x_UNIT_to_UNIT! {SaturatingAdd::saturating_add for $name} $crate::impl_UNIT_x_UNIT! {AddAssign::add_assign for $name} + $crate::impl_UNIT_x_UNIT! {SubAssign::sub_assign for $name} + + $crate::impl_unit_display! {$name::value} pub trait Into { type Output; @@ -138,6 +146,7 @@ macro_rules! unsigned_unit_float_like { pub mod $vname { use super::*; use std::ops::AddAssign; + use std::ops::SubAssign; $crate::newtype_struct_float_like! {$(#$meta)* $name {value : $field_type}} $crate::impl_UNIT_x_UNIT_to_UNIT! {Sub::sub for $name} @@ -148,6 +157,9 @@ macro_rules! unsigned_unit_float_like { $crate::impl_FIELD_x_UNIT_to_UNIT! {Mul::mul for $name :: $field_type} $crate::impl_UNIT_x_UNIT_to_FIELD! {Div::div for $name :: $field_type} $crate::impl_UNIT_x_UNIT! {AddAssign::add_assign for $name} + $crate::impl_UNIT_x_UNIT! {SubAssign::sub_assign for $name} + + $crate::impl_unit_display! {$name::value} pub trait Into { type Output; @@ -178,6 +190,7 @@ macro_rules! signed_unit { pub mod $vname { use super::*; use std::ops::AddAssign; + use std::ops::SubAssign; $crate::newtype_struct! {$(#$meta)* $name {value : $field_type}} $crate::impl_UNIT_x_UNIT_to_UNIT! {Sub::sub for $name} @@ -188,8 +201,11 @@ macro_rules! signed_unit { $crate::impl_FIELD_x_UNIT_to_UNIT! {Mul::mul for $name :: $field_type} $crate::impl_UNIT_x_UNIT_to_FIELD! {Div::div for $name :: $field_type} $crate::impl_UNIT_x_UNIT! {AddAssign::add_assign for $name} + $crate::impl_UNIT_x_UNIT! {SubAssign::sub_assign for $name} $crate::impl_UNIT_to_UNIT! {Neg::neg for $name} + $crate::impl_unit_display! {$name::value} + pub trait Into { type Output; fn $vname(self) -> Self::Output; @@ -226,6 +242,7 @@ macro_rules! signed_unit_float_like { pub mod $vname { use super::*; use std::ops::AddAssign; + use std::ops::SubAssign; $crate::newtype_struct_float_like! {$(#$meta)* $name {value : $field_type}} $crate::impl_UNIT_x_UNIT_to_UNIT! {Sub::sub for $name} @@ -235,8 +252,11 @@ macro_rules! signed_unit_float_like { $crate::impl_FIELD_x_UNIT_to_UNIT! {Mul::mul for $name :: $field_type} $crate::impl_UNIT_x_UNIT_to_FIELD! {Div::div for $name :: $field_type} $crate::impl_UNIT_x_UNIT! {AddAssign::add_assign for $name} + $crate::impl_UNIT_x_UNIT! {SubAssign::sub_assign for $name} $crate::impl_UNIT_to_UNIT! {Neg::neg for $name} + $crate::impl_unit_display! {$name::value} + /// Unit conversion and associated method. It has associated type in order to allow /// complex conversions, like `(10,10).px()` be converted the same way as /// `(10.px(),10.px())`. @@ -275,6 +295,7 @@ macro_rules! signed_unit_float_like { macro_rules! newtype { ($(#$meta:tt)* $name:ident { $($field:ident : $field_type:ty),* $(,)? }) => { use std::ops::AddAssign; + use std::ops::SubAssign; $crate::newtype_struct! {$(#$meta)* $name { $($field : $field_type),*}} @@ -291,6 +312,12 @@ macro_rules! newtype { *self = Self { $($field:self.$field.add(rhs.$field)),* } } } + + impl SubAssign<$name> for $name { + fn sub_assign(&mut self, rhs:Self) { + *self = Self { $($field:self.$field.sub(rhs.$field)),* } + } + } }; } @@ -759,3 +786,15 @@ macro_rules! impl_T_x_FIELD { } )*}; } + +/// Unit definition macro. See module docs to learn more. +#[macro_export] +macro_rules! impl_unit_display { + ($name:ident :: $field:ident) => { + impl std::fmt::Display for $name { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} [{}]", self.$field, stringify!($name)) + } + } + }; +} diff --git a/lib/rust/web/Cargo.toml b/lib/rust/web/Cargo.toml index 4739c9845b..61c93f7f5a 100644 --- a/lib/rust/web/Cargo.toml +++ b/lib/rust/web/Cargo.toml @@ -10,7 +10,7 @@ edition = "2018" default = ["console_error_panic_hook"] [dependencies] -enso-data = { path = "../data" } +enso-data-structures = { path = "../data-structures" } enso-logger = { path = "../logger" } enso-prelude = { path = "../prelude", features = ["wasm-bindgen"] } console_error_panic_hook = { version = "0.1.1", optional = true }