diff --git a/gui/src/rust/Cargo.lock b/gui/src/rust/Cargo.lock index ef15376e116..30bc26397c6 100644 --- a/gui/src/rust/Cargo.lock +++ b/gui/src/rust/Cargo.lock @@ -948,6 +948,13 @@ dependencies = [ "slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "fuzzly" +version = "0.1.0" +dependencies = [ + "enso-prelude 0.1.0", +] + [[package]] name = "generator" version = "0.6.21" @@ -1139,6 +1146,7 @@ dependencies = [ "failure 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "flo_stream 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "futures 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "fuzzly 0.1.0", "ide-view 0.1.0", "itertools 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)", "js-sys 0.3.35 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/gui/src/rust/Cargo.toml b/gui/src/rust/Cargo.toml index 882b34b3cff..99e18c9c6ca 100644 --- a/gui/src/rust/Cargo.toml +++ b/gui/src/rust/Cargo.toml @@ -28,6 +28,7 @@ members = [ "lib/enso-shapely/macros", "lib/eval-tt", "lib/frp", + "lib/fuzzly", "lib/generics", "lib/logger", "lib/optics", diff --git a/gui/src/rust/ide/Cargo.toml b/gui/src/rust/ide/Cargo.toml index a14bddaf62f..c601bd8e9a0 100644 --- a/gui/src/rust/ide/Cargo.toml +++ b/gui/src/rust/ide/Cargo.toml @@ -12,6 +12,7 @@ enso-callback = { version = "0.1.0" , path = "../lib/callback" ensogl = { version = "0.1.0" , path = "../ensogl" } ensogl-text-msdf-sys = { version = "0.1.0" , path = "../ensogl/lib/text/msdf-sys" } ensogl-system-web = { version = "0.1.0" , path = "../lib/system/web" } +fuzzly = { version = "0.1.0" , path = "../lib/fuzzly" } data = { version = "0.1.0" , path = "../lib/data" } enso-frp = { version = "0.1.0" , path = "../lib/frp" } enso-prelude = { version = "0.1.0" , path = "../lib/enso-prelude" } diff --git a/gui/src/rust/ide/lib/json-rpc/src/macros.rs b/gui/src/rust/ide/lib/json-rpc/src/macros.rs index 0781a1eb095..416c1c142c1 100644 --- a/gui/src/rust/ide/lib/json-rpc/src/macros.rs +++ b/gui/src/rust/ide/lib/json-rpc/src/macros.rs @@ -181,7 +181,7 @@ macro_rules! make_rpc_methods { fn drop(&mut self) { if self.require_all_calls.get() && !std::thread::panicking() { $(assert!(self.expect.$method.borrow().is_empty(), - "Didn't make expected call {}");)* + "Didn't make expected call {}");)* //TODO[ao] print method name. } } } diff --git a/gui/src/rust/ide/src/controller/searcher.rs b/gui/src/rust/ide/src/controller/searcher.rs index 6a6ae9503c9..7474fe815ef 100644 --- a/gui/src/rust/ide/src/controller/searcher.rs +++ b/gui/src/rust/ide/src/controller/searcher.rs @@ -1,4 +1,5 @@ //! This module contains all structures related to Searcher Controller. +pub mod suggestion; use crate::prelude::*; @@ -15,23 +16,27 @@ use enso_protocol::language_server; use flo_stream::Subscriber; use parser::Parser; +pub use suggestion::Suggestion; -// ======================= -// === Suggestion List === -// ======================= -/// Suggestion for input completion: possible functions, arguments, etc. -pub type CompletionSuggestion = Rc; +// ===================== +// === Notifications === +// ===================== -/// A single suggestion on the Searcher suggestion list. -#[derive(Clone,CloneRef,Debug,Eq,PartialEq)] -pub enum Suggestion { - /// Suggestion for input completion: possible functions, arguments, etc. - Completion(CompletionSuggestion) - // In future, other suggestion types will be added (like suggestions of actions, etc.). +/// The notification emitted by Searcher Controller +#[derive(Copy,Clone,Debug,Eq,PartialEq)] +pub enum Notification { + /// A new Suggestion list is available. + NewSuggestionList } + + +// =================== +// === Suggestions === +// =================== + /// List of suggestions available in Searcher. #[derive(Clone,CloneRef,Debug)] pub enum Suggestions { @@ -40,7 +45,7 @@ pub enum Suggestions { /// The suggestion list is loaded. #[allow(missing_docs)] Loaded { - list : Rc> + list : Rc }, /// Loading suggestion list resulted in error. Error(Rc) @@ -58,7 +63,7 @@ impl Suggestions { } /// Get the list of suggestions. Returns None if still loading or error was returned. - pub fn list(&self) -> Option<&Vec> { + pub fn list(&self) -> Option<&suggestion::List> { match self { Self::Loaded {list} => Some(list), _ => None, @@ -74,18 +79,6 @@ impl Default for Suggestions { -// ===================== -// === Notifications === -// ===================== - -/// The notification emitted by Searcher Controller -#[derive(Copy,Clone,Debug,Eq,PartialEq)] -pub enum Notification { - /// A new Suggestion list is available. - NewSuggestionList -} - - // =================== // === Input Parts === // =================== @@ -275,7 +268,7 @@ pub enum Mode { #[allow(missing_docs)] pub struct FragmentAddedByPickingSuggestion { pub id : CompletedFragmentId, - pub picked_suggestion : CompletionSuggestion, + pub picked_suggestion : suggestion::Completion, } impl FragmentAddedByPickingSuggestion { @@ -418,13 +411,16 @@ impl Searcher { pub fn set_input(&self, new_input:String) -> FallibleResult<()> { debug!(self.logger, "Manually setting input to {new_input}"); let parsed_input = ParsedInput::new(new_input,&self.parser)?; - let old_expr = self.data.borrow().input.expression.clone(); - let new_expr = parsed_input.expression.clone(); + let old_expr = self.data.borrow().input.expression.repr(); + let new_expr = parsed_input.expression.repr(); self.data.borrow_mut().input = parsed_input; self.invalidate_fragments_added_by_picking(); - if old_expr.repr() != new_expr.repr() { + if old_expr != new_expr { self.reload_list() + } else if let Suggestions::Loaded {list} = self.data.borrow().suggestions.clone_ref() { + list.update_filtering(&self.data.borrow().input.pattern); + executor::global::spawn(self.notifier.publish(Notification::NewSuggestionList)); } Ok(()) } @@ -441,7 +437,7 @@ impl Searcher { /// Code that will be inserted by expanding given suggestion at given location. /// /// Code depends on the location, as the first fragment can introduce `this` variable access. - fn code_to_insert(&self, suggestion:&CompletionSuggestion, id:CompletedFragmentId) -> String { + fn code_to_insert(&self, suggestion:&suggestion::Completion, id:CompletedFragmentId) -> String { let var = self.this_var_for(id); suggestion.code_to_insert(var) } @@ -452,7 +448,7 @@ impl Searcher { /// suggestion will be remembered, and the searcher's input will be updated and returned by this /// function. pub fn pick_completion - (&self, picked_suggestion:CompletionSuggestion) -> FallibleResult { + (&self, picked_suggestion:suggestion::Completion) -> FallibleResult { let id = self.data.borrow().input.next_completion_id(); let code_to_insert = self.code_to_insert(&picked_suggestion,id); let added_ast = self.parser.parse_line(&code_to_insert)?; @@ -641,8 +637,8 @@ impl Searcher { /// Process multiple completion responses from the engine into a single list of suggestion. fn suggestions_from_responses (&self, responses:Vec>) - -> FallibleResult> { - let mut suggestions = Vec::new(); + -> FallibleResult { + let suggestions = suggestion::List::new(); for response in responses { let response = response?; let entries = response.results.iter().filter_map(|id| { @@ -655,10 +651,11 @@ impl Searcher { }); suggestions.extend(entries); } + suggestions.update_filtering(&self.data.borrow().input.pattern); Ok(suggestions) } - fn possible_function_calls(&self) -> Vec { + fn possible_function_calls(&self) -> Vec { let opt_result = || { let call_ast = self.data.borrow().input.expression.as_ref()?.func.clone_ref(); let call = SimpleFunctionCall::try_new(&call_ast)?; @@ -699,7 +696,7 @@ impl Searcher { /// Get the suggestion that was selected by the user into the function. /// /// This suggestion shall be used to request better suggestions from the engine. - fn intended_function_suggestion(&self) -> Option { + fn intended_function_suggestion(&self) -> Option { let id = CompletedFragmentId::Function; let fragment = self.data.borrow().find_picked_fragment(id).cloned(); fragment.map(|f| f.picked_suggestion.clone_ref()) @@ -827,11 +824,11 @@ mod test { data : MockData, test : TestWithLocalPoolExecutor, searcher : Searcher, - entry1 : CompletionSuggestion, - entry2 : CompletionSuggestion, - entry3 : CompletionSuggestion, - entry4 : CompletionSuggestion, - entry9 : CompletionSuggestion, + entry1 : suggestion::Completion, + entry2 : suggestion::Completion, + entry3 : suggestion::Completion, + entry4 : suggestion::Completion, + entry9 : suggestion::Completion, } impl Fixture { @@ -989,7 +986,7 @@ mod test { data.selected_node = true; // We expect following calls: // 1) for the function - with the "this" filled (if the test case says so); - // 2) for subsequent completion - without "this" + // 2) for subsequent completions - without "this" data.expect_completion(client,case.sets_this.as_some(mock_type),None,&[1,5,9]); data.expect_completion(client,None,None,&[1,5,9]); data.expect_completion(client,None,None,&[1,5,9]); @@ -1124,7 +1121,7 @@ mod test { assert!(searcher.suggestions().is_loading()); test.run_until_stalled(); let expected_list = vec![Suggestion::Completion(entry1),Suggestion::Completion(entry9)]; - assert_eq!(searcher.suggestions().list(), Some(&expected_list)); + assert_eq!(searcher.suggestions().list().unwrap().to_suggestion_vec(), expected_list); let notification = subscriber.next().boxed_local().expect_ready(); assert_eq!(notification, Some(Notification::NewSuggestionList)); } diff --git a/gui/src/rust/ide/src/controller/searcher/suggestion.rs b/gui/src/rust/ide/src/controller/searcher/suggestion.rs new file mode 100644 index 00000000000..2a9dd476e9a --- /dev/null +++ b/gui/src/rust/ide/src/controller/searcher/suggestion.rs @@ -0,0 +1,161 @@ +//! All structures related to the suggestion list provided by SearcherController. + +use crate::prelude::*; + + +// =================== +// === Suggestion === +// =================== + +/// Suggestion for input completion: possible functions, arguments, etc. +pub type Completion = Rc; + +/// A single suggestion on the Searcher suggestion list. +#[derive(Clone,CloneRef,Debug,Eq,PartialEq)] +pub enum Suggestion { + /// Suggestion for input completion: possible functions, arguments, etc. + Completion(Completion) + // In future, other suggestion types will be added (like suggestions of actions, etc.). +} + +impl Suggestion { + /// The suggestion caption (suggested function name, or action name, etc.). + pub fn caption(&self) -> &String { + match self { + Self::Completion(completion) => &completion.name + } + } +} + + + +// ================== +// === List Entry === +// ================== + +/// Information how the Suggestion list entry matches the filtering pattern. +#[allow(missing_docs)] +#[derive(Clone,Debug,PartialEq)] +pub enum MatchInfo { + DoesNotMatch, + Matches {subsequence:fuzzly::Subsequence} +} + +/// The single suggestion list entry. +#[allow(missing_docs)] +#[derive(Clone,Debug)] +pub struct ListEntry { + pub match_info : MatchInfo, + pub suggestion : Suggestion, +} + +impl ListEntry { + /// Update the current match info according to the new filtering pattern. + pub fn update_matching_info(&mut self, pattern:impl Str) { + let matches = fuzzly::matches(self.suggestion.caption(),pattern.as_ref()); + let subsequence = matches.and_option_from(|| { + let metric = fuzzly::metric::default(); + fuzzly::find_best_subsequence(self.suggestion.caption(),pattern,metric) + }); + self.match_info = match subsequence { + Some(subsequence) => MatchInfo::Matches {subsequence}, + None => MatchInfo::DoesNotMatch, + }; + } + + /// Compare how two entries matches filtering pattern. + /// + /// The "greater" entry matches better. + pub fn compare_match_scores(&self, rhs:&ListEntry) -> std::cmp::Ordering { + use MatchInfo::*; + use std::cmp::Ordering::*; + match (&self.match_info,&rhs.match_info) { + (DoesNotMatch ,DoesNotMatch ) => Equal, + (DoesNotMatch ,Matches {..} ) => Less, + (Matches {..} ,DoesNotMatch ) => Greater, + (Matches {subsequence:lhs},Matches {subsequence:rhs}) => lhs.compare_scores(rhs), + } + } +} + +impl From for ListEntry { + fn from(suggestion:Suggestion) -> Self { + let subsequence = default(); + let match_info = MatchInfo::Matches {subsequence}; + ListEntry {match_info,suggestion} + } +} + + + +// ============ +// === List === +// ============ + +/// Suggestion list. +/// +/// This structure should be notified about filtering changes. using `update_filtering` function. +#[derive(Clone,Debug,Default)] +pub struct List { + entries : RefCell> +} + +impl List { + /// Create new empty list. + pub fn new() -> Self { + default() + } + + /// Create list from suggestions. + /// + /// The list will assume that the filtering pattern is an empty string. + pub fn from_suggestions(suggestions:impl IntoIterator) -> Self { + Self {entries:RefCell::new(suggestions.into_iter().map(ListEntry::from).collect())} + } + + /// Update the list filtering. + /// + /// The "matching score" of each entry is recalculated against the given pattern and the entries + /// are re-ordered, so the best matches will go first. + pub fn update_filtering(&self, pattern:impl Str) { + let mut entries_mut = self.entries.borrow_mut(); + for entry in entries_mut.iter_mut() { + entry.update_matching_info(pattern.as_ref()); + } + entries_mut.sort_by(|l,r| l.compare_match_scores(r).reverse()); + } + + /// Length of the suggestion list. + pub fn len(&self) -> usize { self.entries.borrow().len() } + + /// Check if list is empty. + pub fn is_empty(&self) -> bool { self.entries.borrow().is_empty() } + + /// Iterate over suggestion entries. + pub fn iter<'a>(&'a self) -> impl Iterator + 'a { + let existing_ids = (0..self.len()).take_while(move |id| *id < self.len()); + existing_ids.filter_map(move |id| self.entries.borrow().get(id).cloned()) + } + + /// Extend the list with new suggestions. + /// + /// The new suggestions will be put at end, regardless the current filtering. This function + /// is meant to be a part of list's initialization. + pub fn extend>(&self, iter: T) { + self.entries.borrow_mut().extend(iter.into_iter().map(ListEntry::from)) + } + + /// Convert to the suggestion vector. + /// + /// Used for testing. + pub fn to_suggestion_vec(&self) -> Vec { + self.entries.borrow().iter().map(|entry| entry.suggestion.clone_ref()).collect() + } +} + +impl From for List +where IntoIter : IntoIterator { + fn from(suggestions:IntoIter) -> Self { + Self::from_suggestions(suggestions) + } +} diff --git a/gui/src/rust/lib/fuzzly/Cargo.toml b/gui/src/rust/lib/fuzzly/Cargo.toml new file mode 100644 index 00000000000..1265a98da5d --- /dev/null +++ b/gui/src/rust/lib/fuzzly/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "fuzzly" +version = "0.1.0" +authors = ["Enso Team "] +edition = "2018" + +[lib] +crate-type = ["rlib", "cdylib"] + +[dependencies] +enso-prelude = { version = "0.1.0" , path = "../enso-prelude" } diff --git a/gui/src/rust/lib/fuzzly/src/lib.rs b/gui/src/rust/lib/fuzzly/src/lib.rs new file mode 100644 index 00000000000..a0bc99c0f1f --- /dev/null +++ b/gui/src/rust/lib/fuzzly/src/lib.rs @@ -0,0 +1,30 @@ +//! Fuzzly Search Utilities. +//! +//! This crate is designed to be used in various search engines; when you get the list of names +//! matching the given pattern, the next step is to order the items, so the best matches +//! are listed first. In such case the `find_best_subsequence` function may be used to score (order +//! priority) for each element. +//! +//! The metrics used for scoring may be adjusted by implementing `Metric` trait, or by customizing +//! parameters of metrics defined in `metric` module. +#![feature(option_result_contains)] + +#![warn(missing_docs)] +#![warn(trivial_casts)] +#![warn(trivial_numeric_casts)] +#![warn(unused_import_braces)] +#![warn(unused_qualifications)] +#![warn(unsafe_code)] +#![warn(missing_copy_implementations)] +#![warn(missing_debug_implementations)] + +pub mod subsequence_graph; +pub mod metric; +pub mod score; + +pub use enso_prelude as prelude; +pub use metric::Metric; +pub use subsequence_graph::Graph as SubsequenceGraph; +pub use score::Subsequence; +pub use score::matches; +pub use score::find_best_subsequence; diff --git a/gui/src/rust/lib/fuzzly/src/metric.rs b/gui/src/rust/lib/fuzzly/src/metric.rs new file mode 100644 index 00000000000..1c2a95100c1 --- /dev/null +++ b/gui/src/rust/lib/fuzzly/src/metric.rs @@ -0,0 +1,139 @@ +//! The Metric trait definition and standard implementations. +use crate::prelude::*; + +use crate::subsequence_graph; + + + +// ============= +// === Trait === +// ============= + +/// Provides functions for measure pattern match score for specific text from various aspect. +/// +/// The pattern match is represented as a path in `SubsequenceGraph` (see it's docs for details). +/// Its score is counted as a sum of measures "how good is the vertex/edge" for each vertex and +/// edge on the path. +pub trait Metric { + /// How good is vertex on the path on the Subsequence Graph. + fn measure_vertex(&self, vertex:subsequence_graph::Vertex, text:&str, pattern:&str) -> f32; + + /// How good is the edge on the path on the Subsequence Graph. + fn measure_edge(&self, edge:subsequence_graph::Edge, text:&str, pattern:&str) -> f32; + + /// Return a new metric being a sum of this and `rhs`. + fn sum(self, rhs:Rhs) -> Sum where Self:Sized { Sum(self, rhs) } +} + + + +// ========================== +// === The Default Metric === +// ========================== + +/// The default metric, recommended by this library +pub fn default() -> impl Metric { + SubsequentLettersBonus::default().sum(CaseMatchBonus::default()) +} + + + +// ======================= +// === Implementations === +// ======================= + +// === Sum === + +/// The structure representing the sum of two metrics +#[derive(Copy,Clone,Debug,Default)] +pub struct Sum(Metrics1,Metrics2); + +impl Metric for Sum { + fn measure_vertex(&self, vertex:subsequence_graph::Vertex, text:&str, pattern:&str) -> f32 { + let Self(left,right) = self; + let left = left.measure_vertex(vertex,text,pattern); + let right = right.measure_vertex(vertex,text,pattern); + left + right + } + + fn measure_edge(&self, edge:subsequence_graph::Edge, text:&str, pattern:&str) -> f32 { + let Self(left,right) = self; + let left = left.measure_edge(edge,text,pattern); + let right = right.measure_edge(edge,text,pattern); + left + right + } +} + + +// === SubsequentLettersBonus === + +/// A metric which measure how far are matched letters from each other and how far is first matched +/// char from text beginning and last character from text ending. +#[derive(Copy,Clone,Debug)] +pub struct SubsequentLettersBonus { + /// The base weight of this metric. + pub base_weight:f32, + /// How important is the distance of first matched char from the text beginning. + pub beginning_weight:f32, + /// How important is the distance of last matched char from the text ending. + pub ending_weight:f32, +} + +impl Default for SubsequentLettersBonus { + fn default() -> Self { + SubsequentLettersBonus { + base_weight : 1.0, + beginning_weight : 0.5, + ending_weight : 0.01, + } + } +} + +impl Metric for SubsequentLettersBonus { + fn measure_vertex(&self, vertex:subsequence_graph::Vertex, text: &str, _pattern: &str) -> f32 { + let is_first_pattern_char = vertex.layer == 0; + let is_last_pattern_char = text.len().checked_sub(1).contains(&vertex.layer); + let first_char_bonus = if is_first_pattern_char { + self.base_weight / (vertex.position_in_text as f32 + 1.0) * self.beginning_weight + } else {0.0}; + let last_char_bonus = if is_last_pattern_char { + self.base_weight / (text.len() - vertex.position_in_text) as f32 * self.ending_weight + } else {0.0}; + first_char_bonus + last_char_bonus + } + + fn measure_edge(&self, edge:subsequence_graph::Edge, _text: &str, _pattern: &str) -> f32 { + self.base_weight / (edge.to.position_in_text - edge.from.position_in_text) as f32 + } +} + + +// === CaseMatchBonus === + +/// A metrics which scores the matches where case matches. +#[derive(Copy,Clone,Debug)] +pub struct CaseMatchBonus { + /// A score added for each char matching. + pub bonus_per_char : f32, +} + +impl Default for CaseMatchBonus { + fn default() -> Self { + CaseMatchBonus { + bonus_per_char : 0.01, + } + } +} + +impl Metric for CaseMatchBonus { + fn measure_vertex(&self, vertex:subsequence_graph::Vertex, text:&str, pattern:&str) -> f32 { + let text_ch = text.chars().nth(vertex.position_in_text); + let pattern_ch = pattern.chars().nth(vertex.layer); + match (text_ch,pattern_ch) { + (Some(w),Some(q)) if w.is_uppercase() == q.is_uppercase() => self.bonus_per_char, + _ => 0.0, + } + } + + fn measure_edge(&self, _:subsequence_graph::Edge, _:&str, _:&str) -> f32 { 0.0 } +} diff --git a/gui/src/rust/lib/fuzzly/src/score.rs b/gui/src/rust/lib/fuzzly/src/score.rs new file mode 100644 index 00000000000..9c6ade812ed --- /dev/null +++ b/gui/src/rust/lib/fuzzly/src/score.rs @@ -0,0 +1,312 @@ +//! Scoring how given text matches the given pattern. + +use crate::prelude::*; + +use crate::metric::Metric; +use crate::subsequence_graph; +use crate::SubsequenceGraph; + +use std::collections::hash_map::Entry; + + + +// ===================== +// === VerticesScore === +// ===================== + +/// The description of path which finishes at some specific vertex. +#[derive(Clone,Copy,Debug)] +struct InputPath { + value : f32, + from : subsequence_graph::Vertex, +} + +/// The score of single vertex in graph. +/// +/// The score is a sum of measure of the vertex alone, and the best score of input path. +/// The `best_input_path` is updated during the scoring algorithm run. See the `score_match` +/// function. +#[derive(Copy,Clone,Debug,Default)] +struct VertexScore { + my_measure : f32, + best_input_path : Option, +} + +impl VertexScore { + fn new(my_measure:f32) -> Self { + let best_input_path = default(); + VertexScore {my_measure,best_input_path} + } + + fn update_input_path(&mut self, candidate:InputPath) { + let new_score = match self.best_input_path.take() { + Some(score) if score.value < candidate.value => candidate, + Some(score) => score, + None => candidate, + }; + self.best_input_path = Some(new_score) + } + + fn score(&self) -> f32 { + self.my_measure + self.best_input_path.map_or(0.0, |s| s.value) + } +} + +/// All graph's vertices' scores. +/// +/// Used in the `score_match` function. +#[derive(Debug,Default)] +struct VerticesScores(HashMap); + +impl VerticesScores { + fn init_vertex(&mut self, vertex:subsequence_graph::Vertex, measure:f32) { + let Self(scores) = self; + scores.insert(vertex,VertexScore::new(measure)); + } + + fn update_input_path(&mut self, edge:subsequence_graph::Edge, value:f32) { + let Self(scores) = self; + let subsequence_graph::Edge{from,to} = edge; + let candidate = InputPath{value,from}; + match scores.entry(to) { + Entry::Occupied(mut entry) => { entry.get_mut().update_input_path(candidate) } + Entry::Vacant(entry) => { + let mut vertex = VertexScore::default(); + vertex.update_input_path(candidate); + entry.insert(vertex); + } + } + } + + fn get_score(&self, vertex:subsequence_graph::Vertex) -> f32 { + let Self(scores) = self; + scores.get(&vertex).map(|v| v.score()).unwrap_or(0.0) + } + + fn best_vertex + (&self, vertices:impl Iterator) + -> Option { + let pairs = vertices.map(|v| (v,self.get_score(v))); + let best_pair = pairs.fold(None, |prev,(vertex,score)| { + match prev { + Some((_,prev_score)) if score > prev_score => Some((vertex,score)), + Some(prev) => Some(prev), + None => Some((vertex,score)), + } + }); + best_pair.map(|(vertex,_)| vertex) + } + + fn best_path_rev(&self, end:subsequence_graph::Vertex) -> BestPathRevIter { + BestPathRevIter { + scores : self, + next_vertex : Some(end), + } + } +} + +struct BestPathRevIter<'a> { + scores : &'a VerticesScores, + next_vertex : Option +} + +impl<'a> Iterator for BestPathRevIter<'a> { + type Item = subsequence_graph::Vertex; + + fn next(&mut self) -> Option { + let next = std::mem::take(&mut self.next_vertex); + self.next_vertex = (|| { + let VerticesScores(scores) = self.scores; + Some(scores.get(&next?)?.best_input_path?.from) + })(); + next + } +} + + + +// =================== +// === Score Match === +// =================== + +/// Fast-check if the pattern matches text. +/// +/// This is faster way than calling `score_match(text,pattern,metric).is_some()`, therefore it's +/// recommended to call this function before scoring when we are not sure if the pattern actually +/// matches the text. +pub fn matches(text:impl Str, pattern:impl Str) -> bool { + let mut pattern_chars = pattern.as_ref().chars(); + let mut next_pattern_char = pattern_chars.next(); + for text_char in text.as_ref().chars() { + match next_pattern_char { + Some(ch) if ch.eq_ignore_ascii_case(&text_char) => { + next_pattern_char = pattern_chars.next() + }, + Some(_) => {}, + None => { break; } + } + } + next_pattern_char.is_none() +} + +/// The result of `find_best_subsequence` function. +#[derive(Clone,Debug,Default,PartialEq)] +pub struct Subsequence { + /// The score of found subsequence. + pub score:f32, + /// Indices of `text`'s chars which belong to the subsequence. + pub indices:Vec +} + +impl Subsequence { + /// Compare scores of subsequences. + /// + /// The `f32` does not implement total ordering, however that does not help when we want to + /// sort items by their matching score. Therefore this function assumes that all NaNs are the + /// lowest values. + pub fn compare_scores(&self, rhs:&Subsequence) -> std::cmp::Ordering { + if self.score.is_nan() && rhs.score.is_nan() { std::cmp::Ordering::Equal } + else if self.score.is_nan() { std::cmp::Ordering::Less } + else if rhs.score.is_nan() { std::cmp::Ordering::Greater } + else if self.score < rhs.score { std::cmp::Ordering::Less } + else if self.score > rhs.score { std::cmp::Ordering::Greater } + else { std::cmp::Ordering::Equal } + } +} + +/// Find best subsequence in `text` which case-insensitively equals to `pattern` in terms of given +/// `metric`. +/// +/// Returns `None` if `text` does not match `pattern`. Empty `pattern` gives 0.0 score. +/// +/// ## Algorithm specification +/// +/// In essence, it looks through all possible subsequences of `text` being the `pattern` and pick +/// the one with the best score. Not directly (because there may be a lot of such subsequences), but +/// by building the `SubsequenceGraph` and computing best score for each vertex. See +/// `SubsequenceGraph` docs for detailed description of the graph. +pub fn find_best_subsequence +(text:impl Str, pattern:impl Str, metric:impl Metric) -> Option { + let text = text.as_ref(); + let pattern = pattern.as_ref(); + if pattern.is_empty() { + Some(default()) + } else { + let last_layer = pattern.chars().count() - 1; + let mut scores = VerticesScores::default(); + let graph = SubsequenceGraph::new(text,pattern); + for vertex in &graph.vertices { + let measure = metric.measure_vertex(*vertex,text,pattern); + scores.init_vertex(*vertex,measure); + } + for edge in &graph.edges { + let from_score = scores.get_score(edge.from); + let input_score = from_score + metric.measure_edge(*edge,text,pattern); + scores.update_input_path(*edge,input_score); + } + let end_vertices = graph.vertices_in_layer(last_layer).cloned(); + let best_vertex = scores.best_vertex(end_vertices)?; + let score = scores.get_score(best_vertex); + let best_path_rev = scores.best_path_rev(best_vertex); + let mut indices = best_path_rev.map(|v| v.position_in_text).collect_vec(); + indices.reverse(); + Some(Subsequence {score,indices}) + } +} + + + +// ============= +// === Tests === +// ============= + +#[cfg(test)] +mod test { + use super::*; + + mod mock_metric { + use super::*; + + use crate::metric; + + #[derive(Debug,Default)] + pub struct WordIndex; + + impl Metric for WordIndex { + fn measure_vertex + (&self, vertex:subsequence_graph::Vertex, _text:&str, _pattern:&str) -> f32 { + vertex.position_in_text as f32 + } + + fn measure_edge(&self, _:subsequence_graph::Edge, _:&str, _:&str) -> f32 { 0.0 } + } + + #[derive(Debug,Default)] + pub struct SquareEdgeLength; + + impl Metric for SquareEdgeLength { + fn measure_vertex(&self, _:subsequence_graph::Vertex, _:&str, _:&str) -> f32 { 0.0 } + + fn measure_edge(&self, edge:subsequence_graph::Edge, _text:&str, _pattern:&str) -> f32 { + (edge.to.position_in_text - edge.from.position_in_text).pow(2) as f32 + } + } + + pub type Sum = metric::Sum; + } + + #[test] + fn matches_test() { + assert!( matches("abba", "aba")); + assert!( matches("abba", "ba" )); + assert!( matches("abba", "" )); + assert!(!matches("abba", "abc")); + assert!(!matches("abba", "baa")); + assert!(!matches("" , "ba" )); + } + + #[test] + fn finding_best_subsequence() { + let pattern = "abc"; + let text = "aabxbacc"; + + let expected = Subsequence { + score : 12.0, + indices : vec![1,4,7] // Always pick the latest character possible + }; + assert_eq!(find_best_subsequence(text,pattern,mock_metric::WordIndex), Some(expected)); + + let expected = Subsequence { + score : 29.0, + indices : vec![0,2,7] // Prefer the long edges + }; + assert_eq!(find_best_subsequence(text,pattern,mock_metric::SquareEdgeLength), Some(expected)); + + let expected = Subsequence { + score : 38.0, + indices : vec![0,2,7] // The edges metric should have more impact + }; + assert_eq!(find_best_subsequence(text,pattern,mock_metric::Sum::default()), Some(expected)); + } + + #[test] + fn finding_best_subsequence_when_does_not_match() { + let pattern = "abc"; + let text = "aabxbyy"; + assert_eq!(find_best_subsequence(text,pattern,mock_metric::Sum::default()), None); + } + + #[test] + fn finding_best_subsequence_corner_cases() { + let pattern = ""; + let text = "any"; + let expected = Subsequence { + score : 0.0, + indices : vec![], + }; + assert_eq!(find_best_subsequence(text,pattern,mock_metric::Sum::default()), Some(expected)); + let pattern = "any"; + let text = ""; + assert_eq!(find_best_subsequence(text,pattern,mock_metric::Sum::default()), None); + } +} diff --git a/gui/src/rust/lib/fuzzly/src/subsequence_graph.rs b/gui/src/rust/lib/fuzzly/src/subsequence_graph.rs new file mode 100644 index 00000000000..76a143d6209 --- /dev/null +++ b/gui/src/rust/lib/fuzzly/src/subsequence_graph.rs @@ -0,0 +1,209 @@ +//! The Subsequence Graph. +use crate::prelude::*; + +use std::collections::BTreeSet; + + + +// ============= +// === Graph === +// ============= + +/// A graph vertex. +/// +/// The vertices are identified by two indexes: a layer index and text's char index. See +/// `Graph` docs for details. +/// +/// The field order is significant, because it affects how they are ordered in the `Graph`'s +/// `vertices`. +#[derive(Copy,Clone,Debug,Eq,Hash,Ord,PartialEq,PartialOrd)] +pub struct Vertex { + /// The layer this vertex belongs to. It is equal to position in `pattern`. + pub layer:usize, + /// The position in `text` this vertex represents. + pub position_in_text:usize, +} + +/// A graph edge. +/// +/// The field order is significant, because it affects how they are ordered in the `Graph`'s +/// `edges`. +#[allow(missing_docs)] +#[derive(Copy,Clone,Debug,Eq,Hash,Ord,PartialEq,PartialOrd)] +pub struct Edge { + pub from : Vertex, + pub to : Vertex, +} + +/// The Subsequence Graph. +/// +/// This structure helps analyzing all subsequences in given `text` which are case insensitively +/// equal to given `pattern`. The graph is directional. +/// +/// The vertices are arranged in `pattern.len()` layers: each vertex in i-th layer represents +/// a possible position of the i-th subsequence element in `text`. +/// +/// Each edge _v → w_ is spanned between vertices from consecutive layers _i_ and _i_+1, and +/// indicates that having i-th subsequence element at position represented by _v_ we can pick +/// (i+1)-th subsequence element at position represented by _w_. +/// +/// In such graph all paths spanned between first and last layer represents the possible subsequence +/// of `text`. +/// +/// We keep vertices and edges ordered, because the scoring algorithm requires this ordering to be +/// effective. +#[allow(missing_docs)] +#[derive(Clone,Debug,Default,Eq,PartialEq)] +pub struct Graph { + pub vertices : BTreeSet, + pub edges : BTreeSet, +} + +impl Graph { + /// Generate graph based on `text` and `pattern`. + pub fn new(text:impl Str, pattern:impl Str) -> Self { + let vertices = Self::create_vertices(text.as_ref(),pattern.as_ref()); + let edges = Self::create_edges(&vertices); + Graph{vertices,edges} + } + + fn create_vertices(text:&str, pattern:&str) -> BTreeSet { + let mut result = BTreeSet::default(); + let mut first_reachable_text_char = 0; + for (layer,pattern_ch) in pattern.chars().enumerate() { + // For each layer we skip positions which won't be reachable. + let to_skip = first_reachable_text_char; + first_reachable_text_char = text.len(); + for (position_in_text,text_ch) in text.chars().enumerate().skip(to_skip) { + if pattern_ch.eq_ignore_ascii_case(&text_ch) { + result.insert(Vertex {layer,position_in_text}); + first_reachable_text_char = first_reachable_text_char.min(position_in_text+1); + } + } + } + result + } + + fn create_edges(vertices:&BTreeSet) -> BTreeSet { + let mut result = BTreeSet::default(); + for from in vertices { + let first_possible_to = Vertex{ + layer : from.layer + 1, + position_in_text: from.position_in_text + 1, + }; + let first_impossible_to = Vertex{ + layer : from.layer + 2, + position_in_text: 0, + }; + for to in vertices.range(first_possible_to..first_impossible_to) { + result.insert(Edge{from:*from, to:*to}); + } + } + result + } + + /// Returns an iterator over all vertices in given layer. + pub fn vertices_in_layer(&self, index:usize) -> impl Iterator { + let start = Vertex{ layer:index , position_in_text:0}; + let end = Vertex{ layer:index + 1, position_in_text:0}; + self.vertices.range(start..end) + } +} + + + +// ============= +// === Tests === +// ============= + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn generating_graph() { + struct Case { + text : &'static str, + pattern : &'static str, + vertices : Vec<(usize,usize)>, + edges : Vec<((usize,usize),(usize,usize))>, + } + + impl Case { + fn run(self) { + let graph = Graph::new(self.text, self.pattern); + let expected_vertices = self.vertices.into_iter().map(Self::convert_vertex); + let expected_edges = self.edges.into_iter().map(|(from,to)| Edge { + from : Self::convert_vertex(from), + to : Self::convert_vertex(to), + }); + let expected_graph = Graph { + vertices : expected_vertices.collect(), + edges : expected_edges.collect() + }; + assert_eq!(graph, expected_graph); + } + + fn convert_vertex((layer,position_in_text):(usize, usize)) -> Vertex { + Vertex{layer,position_in_text} + } + } + + let classic = Case { + text: "lalala", + pattern: "alA", + vertices : vec![(0,1),(0,3),(0,5),(1,2),(1,4),(2,3),(2,5)], + edges : vec! + [ ((0,1),(1,2)) + , ((0,1),(1,4)) + , ((0,3),(1,4)) + , ((1,2),(2,3)) + , ((1,2),(2,5)) + , ((1,4),(2,5)) + ] + }; + let missing_layer = Case { + text: "laall", + pattern: "ala", + vertices : vec![(0,1),(0,2),(1,3),(1,4)], + edges : vec! + [ ((0,1),(1,3)) + , ((0,1),(1,4)) + , ((0,2),(1,3)) + , ((0,2),(1,4)) + ] + }; + let empty_text = Case { + text: "", + pattern: "ala", + vertices : vec![], + edges : vec![], + }; + let empty_pattern = Case { + text: "lalala", + pattern: "", + vertices : vec![], + edges : vec![], + }; + let longer_pattern = Case { + text: "la", + pattern: "ala", + vertices : vec![(0,1)], + edges : vec![], + }; + let non_ascii = Case { + text: "test wiadomości push: ęśąćż", + pattern: "tęś", + vertices : vec![(0,0),(0,3),(1,22),(2,23)], + edges : vec! + [ ((0,0) ,(1,22)) + , ((0,3) ,(1,22)) + , ((1,22),(2,23)) + ] + }; + + for case in vec![classic,missing_layer,empty_pattern,empty_text,longer_pattern,non_ascii] { + case.run() + } + } +}