Searcher controller: Filtering suggestions (https://github.com/enso-org/ide/pull/717)

Original commit: e39878050d
This commit is contained in:
Adam Obuchowicz 2020-08-14 12:38:03 +02:00 committed by GitHub
parent 4417a9b1f7
commit 666906fa88
11 changed files with 912 additions and 43 deletions

View File

@ -948,6 +948,13 @@ dependencies = [
"slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fuzzly"
version = "0.1.0"
dependencies = [
"enso-prelude 0.1.0",
]
[[package]]
name = "generator"
version = "0.6.21"
@ -1139,6 +1146,7 @@ dependencies = [
"failure 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"flo_stream 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"futures 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"fuzzly 0.1.0",
"ide-view 0.1.0",
"itertools 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
"js-sys 0.3.35 (registry+https://github.com/rust-lang/crates.io-index)",

View File

@ -28,6 +28,7 @@ members = [
"lib/enso-shapely/macros",
"lib/eval-tt",
"lib/frp",
"lib/fuzzly",
"lib/generics",
"lib/logger",
"lib/optics",

View File

@ -12,6 +12,7 @@ enso-callback = { version = "0.1.0" , path = "../lib/callback"
ensogl = { version = "0.1.0" , path = "../ensogl" }
ensogl-text-msdf-sys = { version = "0.1.0" , path = "../ensogl/lib/text/msdf-sys" }
ensogl-system-web = { version = "0.1.0" , path = "../lib/system/web" }
fuzzly = { version = "0.1.0" , path = "../lib/fuzzly" }
data = { version = "0.1.0" , path = "../lib/data" }
enso-frp = { version = "0.1.0" , path = "../lib/frp" }
enso-prelude = { version = "0.1.0" , path = "../lib/enso-prelude" }

View File

@ -181,7 +181,7 @@ macro_rules! make_rpc_methods {
fn drop(&mut self) {
if self.require_all_calls.get() && !std::thread::panicking() {
$(assert!(self.expect.$method.borrow().is_empty(),
"Didn't make expected call {}");)*
"Didn't make expected call {}");)* //TODO[ao] print method name.
}
}
}

View File

@ -1,4 +1,5 @@
//! This module contains all structures related to Searcher Controller.
pub mod suggestion;
use crate::prelude::*;
@ -15,23 +16,27 @@ use enso_protocol::language_server;
use flo_stream::Subscriber;
use parser::Parser;
pub use suggestion::Suggestion;
// =======================
// === Suggestion List ===
// =======================
/// Suggestion for input completion: possible functions, arguments, etc.
pub type CompletionSuggestion = Rc<model::suggestion_database::Entry>;
// =====================
// === Notifications ===
// =====================
/// A single suggestion on the Searcher suggestion list.
#[derive(Clone,CloneRef,Debug,Eq,PartialEq)]
pub enum Suggestion {
/// Suggestion for input completion: possible functions, arguments, etc.
Completion(CompletionSuggestion)
// In future, other suggestion types will be added (like suggestions of actions, etc.).
/// The notification emitted by Searcher Controller
#[derive(Copy,Clone,Debug,Eq,PartialEq)]
pub enum Notification {
/// A new Suggestion list is available.
NewSuggestionList
}
// ===================
// === Suggestions ===
// ===================
/// List of suggestions available in Searcher.
#[derive(Clone,CloneRef,Debug)]
pub enum Suggestions {
@ -40,7 +45,7 @@ pub enum Suggestions {
/// The suggestion list is loaded.
#[allow(missing_docs)]
Loaded {
list : Rc<Vec<Suggestion>>
list : Rc<suggestion::List>
},
/// Loading suggestion list resulted in error.
Error(Rc<failure::Error>)
@ -58,7 +63,7 @@ impl Suggestions {
}
/// Get the list of suggestions. Returns None if still loading or error was returned.
pub fn list(&self) -> Option<&Vec<Suggestion>> {
pub fn list(&self) -> Option<&suggestion::List> {
match self {
Self::Loaded {list} => Some(list),
_ => None,
@ -74,18 +79,6 @@ impl Default for Suggestions {
// =====================
// === Notifications ===
// =====================
/// The notification emitted by Searcher Controller
#[derive(Copy,Clone,Debug,Eq,PartialEq)]
pub enum Notification {
/// A new Suggestion list is available.
NewSuggestionList
}
// ===================
// === Input Parts ===
// ===================
@ -275,7 +268,7 @@ pub enum Mode {
#[allow(missing_docs)]
pub struct FragmentAddedByPickingSuggestion {
pub id : CompletedFragmentId,
pub picked_suggestion : CompletionSuggestion,
pub picked_suggestion : suggestion::Completion,
}
impl FragmentAddedByPickingSuggestion {
@ -418,13 +411,16 @@ impl Searcher {
pub fn set_input(&self, new_input:String) -> FallibleResult<()> {
debug!(self.logger, "Manually setting input to {new_input}");
let parsed_input = ParsedInput::new(new_input,&self.parser)?;
let old_expr = self.data.borrow().input.expression.clone();
let new_expr = parsed_input.expression.clone();
let old_expr = self.data.borrow().input.expression.repr();
let new_expr = parsed_input.expression.repr();
self.data.borrow_mut().input = parsed_input;
self.invalidate_fragments_added_by_picking();
if old_expr.repr() != new_expr.repr() {
if old_expr != new_expr {
self.reload_list()
} else if let Suggestions::Loaded {list} = self.data.borrow().suggestions.clone_ref() {
list.update_filtering(&self.data.borrow().input.pattern);
executor::global::spawn(self.notifier.publish(Notification::NewSuggestionList));
}
Ok(())
}
@ -441,7 +437,7 @@ impl Searcher {
/// Code that will be inserted by expanding given suggestion at given location.
///
/// Code depends on the location, as the first fragment can introduce `this` variable access.
fn code_to_insert(&self, suggestion:&CompletionSuggestion, id:CompletedFragmentId) -> String {
fn code_to_insert(&self, suggestion:&suggestion::Completion, id:CompletedFragmentId) -> String {
let var = self.this_var_for(id);
suggestion.code_to_insert(var)
}
@ -452,7 +448,7 @@ impl Searcher {
/// suggestion will be remembered, and the searcher's input will be updated and returned by this
/// function.
pub fn pick_completion
(&self, picked_suggestion:CompletionSuggestion) -> FallibleResult<String> {
(&self, picked_suggestion:suggestion::Completion) -> FallibleResult<String> {
let id = self.data.borrow().input.next_completion_id();
let code_to_insert = self.code_to_insert(&picked_suggestion,id);
let added_ast = self.parser.parse_line(&code_to_insert)?;
@ -641,8 +637,8 @@ impl Searcher {
/// Process multiple completion responses from the engine into a single list of suggestion.
fn suggestions_from_responses
(&self, responses:Vec<json_rpc::Result<language_server::response::Completion>>)
-> FallibleResult<Vec<Suggestion>> {
let mut suggestions = Vec::new();
-> FallibleResult<suggestion::List> {
let suggestions = suggestion::List::new();
for response in responses {
let response = response?;
let entries = response.results.iter().filter_map(|id| {
@ -655,10 +651,11 @@ impl Searcher {
});
suggestions.extend(entries);
}
suggestions.update_filtering(&self.data.borrow().input.pattern);
Ok(suggestions)
}
fn possible_function_calls(&self) -> Vec<CompletionSuggestion> {
fn possible_function_calls(&self) -> Vec<suggestion::Completion> {
let opt_result = || {
let call_ast = self.data.borrow().input.expression.as_ref()?.func.clone_ref();
let call = SimpleFunctionCall::try_new(&call_ast)?;
@ -699,7 +696,7 @@ impl Searcher {
/// Get the suggestion that was selected by the user into the function.
///
/// This suggestion shall be used to request better suggestions from the engine.
fn intended_function_suggestion(&self) -> Option<CompletionSuggestion> {
fn intended_function_suggestion(&self) -> Option<suggestion::Completion> {
let id = CompletedFragmentId::Function;
let fragment = self.data.borrow().find_picked_fragment(id).cloned();
fragment.map(|f| f.picked_suggestion.clone_ref())
@ -827,11 +824,11 @@ mod test {
data : MockData,
test : TestWithLocalPoolExecutor,
searcher : Searcher,
entry1 : CompletionSuggestion,
entry2 : CompletionSuggestion,
entry3 : CompletionSuggestion,
entry4 : CompletionSuggestion,
entry9 : CompletionSuggestion,
entry1 : suggestion::Completion,
entry2 : suggestion::Completion,
entry3 : suggestion::Completion,
entry4 : suggestion::Completion,
entry9 : suggestion::Completion,
}
impl Fixture {
@ -989,7 +986,7 @@ mod test {
data.selected_node = true;
// We expect following calls:
// 1) for the function - with the "this" filled (if the test case says so);
// 2) for subsequent completion - without "this"
// 2) for subsequent completions - without "this"
data.expect_completion(client,case.sets_this.as_some(mock_type),None,&[1,5,9]);
data.expect_completion(client,None,None,&[1,5,9]);
data.expect_completion(client,None,None,&[1,5,9]);
@ -1124,7 +1121,7 @@ mod test {
assert!(searcher.suggestions().is_loading());
test.run_until_stalled();
let expected_list = vec![Suggestion::Completion(entry1),Suggestion::Completion(entry9)];
assert_eq!(searcher.suggestions().list(), Some(&expected_list));
assert_eq!(searcher.suggestions().list().unwrap().to_suggestion_vec(), expected_list);
let notification = subscriber.next().boxed_local().expect_ready();
assert_eq!(notification, Some(Notification::NewSuggestionList));
}

View File

@ -0,0 +1,161 @@
//! All structures related to the suggestion list provided by SearcherController.
use crate::prelude::*;
// ===================
// === Suggestion ===
// ===================
/// Suggestion for input completion: possible functions, arguments, etc.
pub type Completion = Rc<model::suggestion_database::Entry>;
/// A single suggestion on the Searcher suggestion list.
#[derive(Clone,CloneRef,Debug,Eq,PartialEq)]
pub enum Suggestion {
/// Suggestion for input completion: possible functions, arguments, etc.
Completion(Completion)
// In future, other suggestion types will be added (like suggestions of actions, etc.).
}
impl Suggestion {
/// The suggestion caption (suggested function name, or action name, etc.).
pub fn caption(&self) -> &String {
match self {
Self::Completion(completion) => &completion.name
}
}
}
// ==================
// === List Entry ===
// ==================
/// Information how the Suggestion list entry matches the filtering pattern.
#[allow(missing_docs)]
#[derive(Clone,Debug,PartialEq)]
pub enum MatchInfo {
DoesNotMatch,
Matches {subsequence:fuzzly::Subsequence}
}
/// The single suggestion list entry.
#[allow(missing_docs)]
#[derive(Clone,Debug)]
pub struct ListEntry {
pub match_info : MatchInfo,
pub suggestion : Suggestion,
}
impl ListEntry {
/// Update the current match info according to the new filtering pattern.
pub fn update_matching_info(&mut self, pattern:impl Str) {
let matches = fuzzly::matches(self.suggestion.caption(),pattern.as_ref());
let subsequence = matches.and_option_from(|| {
let metric = fuzzly::metric::default();
fuzzly::find_best_subsequence(self.suggestion.caption(),pattern,metric)
});
self.match_info = match subsequence {
Some(subsequence) => MatchInfo::Matches {subsequence},
None => MatchInfo::DoesNotMatch,
};
}
/// Compare how two entries matches filtering pattern.
///
/// The "greater" entry matches better.
pub fn compare_match_scores(&self, rhs:&ListEntry) -> std::cmp::Ordering {
use MatchInfo::*;
use std::cmp::Ordering::*;
match (&self.match_info,&rhs.match_info) {
(DoesNotMatch ,DoesNotMatch ) => Equal,
(DoesNotMatch ,Matches {..} ) => Less,
(Matches {..} ,DoesNotMatch ) => Greater,
(Matches {subsequence:lhs},Matches {subsequence:rhs}) => lhs.compare_scores(rhs),
}
}
}
impl From<Suggestion> for ListEntry {
fn from(suggestion:Suggestion) -> Self {
let subsequence = default();
let match_info = MatchInfo::Matches {subsequence};
ListEntry {match_info,suggestion}
}
}
// ============
// === List ===
// ============
/// Suggestion list.
///
/// This structure should be notified about filtering changes. using `update_filtering` function.
#[derive(Clone,Debug,Default)]
pub struct List {
entries : RefCell<Vec<ListEntry>>
}
impl List {
/// Create new empty list.
pub fn new() -> Self {
default()
}
/// Create list from suggestions.
///
/// The list will assume that the filtering pattern is an empty string.
pub fn from_suggestions(suggestions:impl IntoIterator<Item=Suggestion>) -> Self {
Self {entries:RefCell::new(suggestions.into_iter().map(ListEntry::from).collect())}
}
/// Update the list filtering.
///
/// The "matching score" of each entry is recalculated against the given pattern and the entries
/// are re-ordered, so the best matches will go first.
pub fn update_filtering(&self, pattern:impl Str) {
let mut entries_mut = self.entries.borrow_mut();
for entry in entries_mut.iter_mut() {
entry.update_matching_info(pattern.as_ref());
}
entries_mut.sort_by(|l,r| l.compare_match_scores(r).reverse());
}
/// Length of the suggestion list.
pub fn len(&self) -> usize { self.entries.borrow().len() }
/// Check if list is empty.
pub fn is_empty(&self) -> bool { self.entries.borrow().is_empty() }
/// Iterate over suggestion entries.
pub fn iter<'a>(&'a self) -> impl Iterator<Item=ListEntry> + 'a {
let existing_ids = (0..self.len()).take_while(move |id| *id < self.len());
existing_ids.filter_map(move |id| self.entries.borrow().get(id).cloned())
}
/// Extend the list with new suggestions.
///
/// The new suggestions will be put at end, regardless the current filtering. This function
/// is meant to be a part of list's initialization.
pub fn extend<T:IntoIterator<Item=Suggestion>>(&self, iter: T) {
self.entries.borrow_mut().extend(iter.into_iter().map(ListEntry::from))
}
/// Convert to the suggestion vector.
///
/// Used for testing.
pub fn to_suggestion_vec(&self) -> Vec<Suggestion> {
self.entries.borrow().iter().map(|entry| entry.suggestion.clone_ref()).collect()
}
}
impl<IntoIter> From<IntoIter> for List
where IntoIter : IntoIterator<Item=Suggestion> {
fn from(suggestions:IntoIter) -> Self {
Self::from_suggestions(suggestions)
}
}

View File

@ -0,0 +1,11 @@
[package]
name = "fuzzly"
version = "0.1.0"
authors = ["Enso Team <contact@luna-lang.org>"]
edition = "2018"
[lib]
crate-type = ["rlib", "cdylib"]
[dependencies]
enso-prelude = { version = "0.1.0" , path = "../enso-prelude" }

View File

@ -0,0 +1,30 @@
//! Fuzzly Search Utilities.
//!
//! This crate is designed to be used in various search engines; when you get the list of names
//! matching the given pattern, the next step is to order the items, so the best matches
//! are listed first. In such case the `find_best_subsequence` function may be used to score (order
//! priority) for each element.
//!
//! The metrics used for scoring may be adjusted by implementing `Metric` trait, or by customizing
//! parameters of metrics defined in `metric` module.
#![feature(option_result_contains)]
#![warn(missing_docs)]
#![warn(trivial_casts)]
#![warn(trivial_numeric_casts)]
#![warn(unused_import_braces)]
#![warn(unused_qualifications)]
#![warn(unsafe_code)]
#![warn(missing_copy_implementations)]
#![warn(missing_debug_implementations)]
pub mod subsequence_graph;
pub mod metric;
pub mod score;
pub use enso_prelude as prelude;
pub use metric::Metric;
pub use subsequence_graph::Graph as SubsequenceGraph;
pub use score::Subsequence;
pub use score::matches;
pub use score::find_best_subsequence;

View File

@ -0,0 +1,139 @@
//! The Metric trait definition and standard implementations.
use crate::prelude::*;
use crate::subsequence_graph;
// =============
// === Trait ===
// =============
/// Provides functions for measure pattern match score for specific text from various aspect.
///
/// The pattern match is represented as a path in `SubsequenceGraph` (see it's docs for details).
/// Its score is counted as a sum of measures "how good is the vertex/edge" for each vertex and
/// edge on the path.
pub trait Metric {
/// How good is vertex on the path on the Subsequence Graph.
fn measure_vertex(&self, vertex:subsequence_graph::Vertex, text:&str, pattern:&str) -> f32;
/// How good is the edge on the path on the Subsequence Graph.
fn measure_edge(&self, edge:subsequence_graph::Edge, text:&str, pattern:&str) -> f32;
/// Return a new metric being a sum of this and `rhs`.
fn sum<Rhs:Metric>(self, rhs:Rhs) -> Sum<Self,Rhs> where Self:Sized { Sum(self, rhs) }
}
// ==========================
// === The Default Metric ===
// ==========================
/// The default metric, recommended by this library
pub fn default() -> impl Metric {
SubsequentLettersBonus::default().sum(CaseMatchBonus::default())
}
// =======================
// === Implementations ===
// =======================
// === Sum ===
/// The structure representing the sum of two metrics
#[derive(Copy,Clone,Debug,Default)]
pub struct Sum<Metrics1,Metrics2>(Metrics1,Metrics2);
impl<M1:Metric, M2:Metric> Metric for Sum<M1,M2> {
fn measure_vertex(&self, vertex:subsequence_graph::Vertex, text:&str, pattern:&str) -> f32 {
let Self(left,right) = self;
let left = left.measure_vertex(vertex,text,pattern);
let right = right.measure_vertex(vertex,text,pattern);
left + right
}
fn measure_edge(&self, edge:subsequence_graph::Edge, text:&str, pattern:&str) -> f32 {
let Self(left,right) = self;
let left = left.measure_edge(edge,text,pattern);
let right = right.measure_edge(edge,text,pattern);
left + right
}
}
// === SubsequentLettersBonus ===
/// A metric which measure how far are matched letters from each other and how far is first matched
/// char from text beginning and last character from text ending.
#[derive(Copy,Clone,Debug)]
pub struct SubsequentLettersBonus {
/// The base weight of this metric.
pub base_weight:f32,
/// How important is the distance of first matched char from the text beginning.
pub beginning_weight:f32,
/// How important is the distance of last matched char from the text ending.
pub ending_weight:f32,
}
impl Default for SubsequentLettersBonus {
fn default() -> Self {
SubsequentLettersBonus {
base_weight : 1.0,
beginning_weight : 0.5,
ending_weight : 0.01,
}
}
}
impl Metric for SubsequentLettersBonus {
fn measure_vertex(&self, vertex:subsequence_graph::Vertex, text: &str, _pattern: &str) -> f32 {
let is_first_pattern_char = vertex.layer == 0;
let is_last_pattern_char = text.len().checked_sub(1).contains(&vertex.layer);
let first_char_bonus = if is_first_pattern_char {
self.base_weight / (vertex.position_in_text as f32 + 1.0) * self.beginning_weight
} else {0.0};
let last_char_bonus = if is_last_pattern_char {
self.base_weight / (text.len() - vertex.position_in_text) as f32 * self.ending_weight
} else {0.0};
first_char_bonus + last_char_bonus
}
fn measure_edge(&self, edge:subsequence_graph::Edge, _text: &str, _pattern: &str) -> f32 {
self.base_weight / (edge.to.position_in_text - edge.from.position_in_text) as f32
}
}
// === CaseMatchBonus ===
/// A metrics which scores the matches where case matches.
#[derive(Copy,Clone,Debug)]
pub struct CaseMatchBonus {
/// A score added for each char matching.
pub bonus_per_char : f32,
}
impl Default for CaseMatchBonus {
fn default() -> Self {
CaseMatchBonus {
bonus_per_char : 0.01,
}
}
}
impl Metric for CaseMatchBonus {
fn measure_vertex(&self, vertex:subsequence_graph::Vertex, text:&str, pattern:&str) -> f32 {
let text_ch = text.chars().nth(vertex.position_in_text);
let pattern_ch = pattern.chars().nth(vertex.layer);
match (text_ch,pattern_ch) {
(Some(w),Some(q)) if w.is_uppercase() == q.is_uppercase() => self.bonus_per_char,
_ => 0.0,
}
}
fn measure_edge(&self, _:subsequence_graph::Edge, _:&str, _:&str) -> f32 { 0.0 }
}

View File

@ -0,0 +1,312 @@
//! Scoring how given text matches the given pattern.
use crate::prelude::*;
use crate::metric::Metric;
use crate::subsequence_graph;
use crate::SubsequenceGraph;
use std::collections::hash_map::Entry;
// =====================
// === VerticesScore ===
// =====================
/// The description of path which finishes at some specific vertex.
#[derive(Clone,Copy,Debug)]
struct InputPath {
value : f32,
from : subsequence_graph::Vertex,
}
/// The score of single vertex in graph.
///
/// The score is a sum of measure of the vertex alone, and the best score of input path.
/// The `best_input_path` is updated during the scoring algorithm run. See the `score_match`
/// function.
#[derive(Copy,Clone,Debug,Default)]
struct VertexScore {
my_measure : f32,
best_input_path : Option<InputPath>,
}
impl VertexScore {
fn new(my_measure:f32) -> Self {
let best_input_path = default();
VertexScore {my_measure,best_input_path}
}
fn update_input_path(&mut self, candidate:InputPath) {
let new_score = match self.best_input_path.take() {
Some(score) if score.value < candidate.value => candidate,
Some(score) => score,
None => candidate,
};
self.best_input_path = Some(new_score)
}
fn score(&self) -> f32 {
self.my_measure + self.best_input_path.map_or(0.0, |s| s.value)
}
}
/// All graph's vertices' scores.
///
/// Used in the `score_match` function.
#[derive(Debug,Default)]
struct VerticesScores(HashMap<subsequence_graph::Vertex,VertexScore>);
impl VerticesScores {
fn init_vertex(&mut self, vertex:subsequence_graph::Vertex, measure:f32) {
let Self(scores) = self;
scores.insert(vertex,VertexScore::new(measure));
}
fn update_input_path(&mut self, edge:subsequence_graph::Edge, value:f32) {
let Self(scores) = self;
let subsequence_graph::Edge{from,to} = edge;
let candidate = InputPath{value,from};
match scores.entry(to) {
Entry::Occupied(mut entry) => { entry.get_mut().update_input_path(candidate) }
Entry::Vacant(entry) => {
let mut vertex = VertexScore::default();
vertex.update_input_path(candidate);
entry.insert(vertex);
}
}
}
fn get_score(&self, vertex:subsequence_graph::Vertex) -> f32 {
let Self(scores) = self;
scores.get(&vertex).map(|v| v.score()).unwrap_or(0.0)
}
fn best_vertex
(&self, vertices:impl Iterator<Item=subsequence_graph::Vertex>)
-> Option<subsequence_graph::Vertex> {
let pairs = vertices.map(|v| (v,self.get_score(v)));
let best_pair = pairs.fold(None, |prev,(vertex,score)| {
match prev {
Some((_,prev_score)) if score > prev_score => Some((vertex,score)),
Some(prev) => Some(prev),
None => Some((vertex,score)),
}
});
best_pair.map(|(vertex,_)| vertex)
}
fn best_path_rev(&self, end:subsequence_graph::Vertex) -> BestPathRevIter {
BestPathRevIter {
scores : self,
next_vertex : Some(end),
}
}
}
struct BestPathRevIter<'a> {
scores : &'a VerticesScores,
next_vertex : Option<subsequence_graph::Vertex>
}
impl<'a> Iterator for BestPathRevIter<'a> {
type Item = subsequence_graph::Vertex;
fn next(&mut self) -> Option<Self::Item> {
let next = std::mem::take(&mut self.next_vertex);
self.next_vertex = (|| {
let VerticesScores(scores) = self.scores;
Some(scores.get(&next?)?.best_input_path?.from)
})();
next
}
}
// ===================
// === Score Match ===
// ===================
/// Fast-check if the pattern matches text.
///
/// This is faster way than calling `score_match(text,pattern,metric).is_some()`, therefore it's
/// recommended to call this function before scoring when we are not sure if the pattern actually
/// matches the text.
pub fn matches(text:impl Str, pattern:impl Str) -> bool {
let mut pattern_chars = pattern.as_ref().chars();
let mut next_pattern_char = pattern_chars.next();
for text_char in text.as_ref().chars() {
match next_pattern_char {
Some(ch) if ch.eq_ignore_ascii_case(&text_char) => {
next_pattern_char = pattern_chars.next()
},
Some(_) => {},
None => { break; }
}
}
next_pattern_char.is_none()
}
/// The result of `find_best_subsequence` function.
#[derive(Clone,Debug,Default,PartialEq)]
pub struct Subsequence {
/// The score of found subsequence.
pub score:f32,
/// Indices of `text`'s chars which belong to the subsequence.
pub indices:Vec<usize>
}
impl Subsequence {
/// Compare scores of subsequences.
///
/// The `f32` does not implement total ordering, however that does not help when we want to
/// sort items by their matching score. Therefore this function assumes that all NaNs are the
/// lowest values.
pub fn compare_scores(&self, rhs:&Subsequence) -> std::cmp::Ordering {
if self.score.is_nan() && rhs.score.is_nan() { std::cmp::Ordering::Equal }
else if self.score.is_nan() { std::cmp::Ordering::Less }
else if rhs.score.is_nan() { std::cmp::Ordering::Greater }
else if self.score < rhs.score { std::cmp::Ordering::Less }
else if self.score > rhs.score { std::cmp::Ordering::Greater }
else { std::cmp::Ordering::Equal }
}
}
/// Find best subsequence in `text` which case-insensitively equals to `pattern` in terms of given
/// `metric`.
///
/// Returns `None` if `text` does not match `pattern`. Empty `pattern` gives 0.0 score.
///
/// ## Algorithm specification
///
/// In essence, it looks through all possible subsequences of `text` being the `pattern` and pick
/// the one with the best score. Not directly (because there may be a lot of such subsequences), but
/// by building the `SubsequenceGraph` and computing best score for each vertex. See
/// `SubsequenceGraph` docs for detailed description of the graph.
pub fn find_best_subsequence
(text:impl Str, pattern:impl Str, metric:impl Metric) -> Option<Subsequence> {
let text = text.as_ref();
let pattern = pattern.as_ref();
if pattern.is_empty() {
Some(default())
} else {
let last_layer = pattern.chars().count() - 1;
let mut scores = VerticesScores::default();
let graph = SubsequenceGraph::new(text,pattern);
for vertex in &graph.vertices {
let measure = metric.measure_vertex(*vertex,text,pattern);
scores.init_vertex(*vertex,measure);
}
for edge in &graph.edges {
let from_score = scores.get_score(edge.from);
let input_score = from_score + metric.measure_edge(*edge,text,pattern);
scores.update_input_path(*edge,input_score);
}
let end_vertices = graph.vertices_in_layer(last_layer).cloned();
let best_vertex = scores.best_vertex(end_vertices)?;
let score = scores.get_score(best_vertex);
let best_path_rev = scores.best_path_rev(best_vertex);
let mut indices = best_path_rev.map(|v| v.position_in_text).collect_vec();
indices.reverse();
Some(Subsequence {score,indices})
}
}
// =============
// === Tests ===
// =============
#[cfg(test)]
mod test {
use super::*;
mod mock_metric {
use super::*;
use crate::metric;
#[derive(Debug,Default)]
pub struct WordIndex;
impl Metric for WordIndex {
fn measure_vertex
(&self, vertex:subsequence_graph::Vertex, _text:&str, _pattern:&str) -> f32 {
vertex.position_in_text as f32
}
fn measure_edge(&self, _:subsequence_graph::Edge, _:&str, _:&str) -> f32 { 0.0 }
}
#[derive(Debug,Default)]
pub struct SquareEdgeLength;
impl Metric for SquareEdgeLength {
fn measure_vertex(&self, _:subsequence_graph::Vertex, _:&str, _:&str) -> f32 { 0.0 }
fn measure_edge(&self, edge:subsequence_graph::Edge, _text:&str, _pattern:&str) -> f32 {
(edge.to.position_in_text - edge.from.position_in_text).pow(2) as f32
}
}
pub type Sum = metric::Sum<WordIndex,SquareEdgeLength>;
}
#[test]
fn matches_test() {
assert!( matches("abba", "aba"));
assert!( matches("abba", "ba" ));
assert!( matches("abba", "" ));
assert!(!matches("abba", "abc"));
assert!(!matches("abba", "baa"));
assert!(!matches("" , "ba" ));
}
#[test]
fn finding_best_subsequence() {
let pattern = "abc";
let text = "aabxbacc";
let expected = Subsequence {
score : 12.0,
indices : vec![1,4,7] // Always pick the latest character possible
};
assert_eq!(find_best_subsequence(text,pattern,mock_metric::WordIndex), Some(expected));
let expected = Subsequence {
score : 29.0,
indices : vec![0,2,7] // Prefer the long edges
};
assert_eq!(find_best_subsequence(text,pattern,mock_metric::SquareEdgeLength), Some(expected));
let expected = Subsequence {
score : 38.0,
indices : vec![0,2,7] // The edges metric should have more impact
};
assert_eq!(find_best_subsequence(text,pattern,mock_metric::Sum::default()), Some(expected));
}
#[test]
fn finding_best_subsequence_when_does_not_match() {
let pattern = "abc";
let text = "aabxbyy";
assert_eq!(find_best_subsequence(text,pattern,mock_metric::Sum::default()), None);
}
#[test]
fn finding_best_subsequence_corner_cases() {
let pattern = "";
let text = "any";
let expected = Subsequence {
score : 0.0,
indices : vec![],
};
assert_eq!(find_best_subsequence(text,pattern,mock_metric::Sum::default()), Some(expected));
let pattern = "any";
let text = "";
assert_eq!(find_best_subsequence(text,pattern,mock_metric::Sum::default()), None);
}
}

View File

@ -0,0 +1,209 @@
//! The Subsequence Graph.
use crate::prelude::*;
use std::collections::BTreeSet;
// =============
// === Graph ===
// =============
/// A graph vertex.
///
/// The vertices are identified by two indexes: a layer index and text's char index. See
/// `Graph` docs for details.
///
/// The field order is significant, because it affects how they are ordered in the `Graph`'s
/// `vertices`.
#[derive(Copy,Clone,Debug,Eq,Hash,Ord,PartialEq,PartialOrd)]
pub struct Vertex {
/// The layer this vertex belongs to. It is equal to position in `pattern`.
pub layer:usize,
/// The position in `text` this vertex represents.
pub position_in_text:usize,
}
/// A graph edge.
///
/// The field order is significant, because it affects how they are ordered in the `Graph`'s
/// `edges`.
#[allow(missing_docs)]
#[derive(Copy,Clone,Debug,Eq,Hash,Ord,PartialEq,PartialOrd)]
pub struct Edge {
pub from : Vertex,
pub to : Vertex,
}
/// The Subsequence Graph.
///
/// This structure helps analyzing all subsequences in given `text` which are case insensitively
/// equal to given `pattern`. The graph is directional.
///
/// The vertices are arranged in `pattern.len()` layers: each vertex in i-th layer represents
/// a possible position of the i-th subsequence element in `text`.
///
/// Each edge _v → w_ is spanned between vertices from consecutive layers _i_ and _i_+1, and
/// indicates that having i-th subsequence element at position represented by _v_ we can pick
/// (i+1)-th subsequence element at position represented by _w_.
///
/// In such graph all paths spanned between first and last layer represents the possible subsequence
/// of `text`.
///
/// We keep vertices and edges ordered, because the scoring algorithm requires this ordering to be
/// effective.
#[allow(missing_docs)]
#[derive(Clone,Debug,Default,Eq,PartialEq)]
pub struct Graph {
pub vertices : BTreeSet<Vertex>,
pub edges : BTreeSet<Edge>,
}
impl Graph {
/// Generate graph based on `text` and `pattern`.
pub fn new(text:impl Str, pattern:impl Str) -> Self {
let vertices = Self::create_vertices(text.as_ref(),pattern.as_ref());
let edges = Self::create_edges(&vertices);
Graph{vertices,edges}
}
fn create_vertices(text:&str, pattern:&str) -> BTreeSet<Vertex> {
let mut result = BTreeSet::default();
let mut first_reachable_text_char = 0;
for (layer,pattern_ch) in pattern.chars().enumerate() {
// For each layer we skip positions which won't be reachable.
let to_skip = first_reachable_text_char;
first_reachable_text_char = text.len();
for (position_in_text,text_ch) in text.chars().enumerate().skip(to_skip) {
if pattern_ch.eq_ignore_ascii_case(&text_ch) {
result.insert(Vertex {layer,position_in_text});
first_reachable_text_char = first_reachable_text_char.min(position_in_text+1);
}
}
}
result
}
fn create_edges(vertices:&BTreeSet<Vertex>) -> BTreeSet<Edge> {
let mut result = BTreeSet::default();
for from in vertices {
let first_possible_to = Vertex{
layer : from.layer + 1,
position_in_text: from.position_in_text + 1,
};
let first_impossible_to = Vertex{
layer : from.layer + 2,
position_in_text: 0,
};
for to in vertices.range(first_possible_to..first_impossible_to) {
result.insert(Edge{from:*from, to:*to});
}
}
result
}
/// Returns an iterator over all vertices in given layer.
pub fn vertices_in_layer(&self, index:usize) -> impl Iterator<Item=&Vertex> {
let start = Vertex{ layer:index , position_in_text:0};
let end = Vertex{ layer:index + 1, position_in_text:0};
self.vertices.range(start..end)
}
}
// =============
// === Tests ===
// =============
#[cfg(test)]
mod test {
use super::*;
#[test]
fn generating_graph() {
struct Case {
text : &'static str,
pattern : &'static str,
vertices : Vec<(usize,usize)>,
edges : Vec<((usize,usize),(usize,usize))>,
}
impl Case {
fn run(self) {
let graph = Graph::new(self.text, self.pattern);
let expected_vertices = self.vertices.into_iter().map(Self::convert_vertex);
let expected_edges = self.edges.into_iter().map(|(from,to)| Edge {
from : Self::convert_vertex(from),
to : Self::convert_vertex(to),
});
let expected_graph = Graph {
vertices : expected_vertices.collect(),
edges : expected_edges.collect()
};
assert_eq!(graph, expected_graph);
}
fn convert_vertex((layer,position_in_text):(usize, usize)) -> Vertex {
Vertex{layer,position_in_text}
}
}
let classic = Case {
text: "lalala",
pattern: "alA",
vertices : vec![(0,1),(0,3),(0,5),(1,2),(1,4),(2,3),(2,5)],
edges : vec!
[ ((0,1),(1,2))
, ((0,1),(1,4))
, ((0,3),(1,4))
, ((1,2),(2,3))
, ((1,2),(2,5))
, ((1,4),(2,5))
]
};
let missing_layer = Case {
text: "laall",
pattern: "ala",
vertices : vec![(0,1),(0,2),(1,3),(1,4)],
edges : vec!
[ ((0,1),(1,3))
, ((0,1),(1,4))
, ((0,2),(1,3))
, ((0,2),(1,4))
]
};
let empty_text = Case {
text: "",
pattern: "ala",
vertices : vec![],
edges : vec![],
};
let empty_pattern = Case {
text: "lalala",
pattern: "",
vertices : vec![],
edges : vec![],
};
let longer_pattern = Case {
text: "la",
pattern: "ala",
vertices : vec![(0,1)],
edges : vec![],
};
let non_ascii = Case {
text: "test wiadomości push: ęśąćż",
pattern: "tęś",
vertices : vec![(0,0),(0,3),(1,22),(2,23)],
edges : vec!
[ ((0,0) ,(1,22))
, ((0,3) ,(1,22))
, ((1,22),(2,23))
]
};
for case in vec![classic,missing_layer,empty_pattern,empty_text,longer_pattern,non_ascii] {
case.run()
}
}
}