diff --git a/Cargo.lock b/Cargo.lock index e1c8cb0c5f..a3e577531b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2040,6 +2040,10 @@ dependencies = [ "slab", ] +[[package]] +name = "fuzzy" +version = "0.1.0" + [[package]] name = "generator" version = "0.6.23" @@ -5912,6 +5916,7 @@ dependencies = [ "env_logger", "fsevent", "futures", + "fuzzy", "gpui", "http-auth-basic", "ignore", diff --git a/Cargo.toml b/Cargo.toml index 96a40d043e..e26dc720f8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["fsevent", "gpui", "gpui_macros", "server", "sum_tree", "zed", "zrpc"] +members = ["fsevent", "fuzzy", "gpui", "gpui_macros", "server", "sum_tree", "zed", "zrpc"] default-members = ["zed"] [patch.crates-io] diff --git a/fuzzy/Cargo.toml b/fuzzy/Cargo.toml new file mode 100644 index 0000000000..cce17a95a5 --- /dev/null +++ b/fuzzy/Cargo.toml @@ -0,0 +1,6 @@ +[package] +name = "fuzzy" +version = "0.1.0" +edition = "2018" + +[dependencies] diff --git a/zed/src/fuzzy/char_bag.rs b/fuzzy/src/char_bag.rs similarity index 100% rename from zed/src/fuzzy/char_bag.rs rename to fuzzy/src/char_bag.rs diff --git a/fuzzy/src/lib.rs b/fuzzy/src/lib.rs new file mode 100644 index 0000000000..a6cc3f55c3 --- /dev/null +++ b/fuzzy/src/lib.rs @@ -0,0 +1,614 @@ +mod char_bag; + +use std::{ + borrow::Cow, + cmp::Ordering, + path::Path, + sync::atomic::{self, AtomicBool}, + sync::Arc, +}; + +pub use char_bag::CharBag; + +const BASE_DISTANCE_PENALTY: f64 = 0.6; +const ADDITIONAL_DISTANCE_PENALTY: f64 = 0.05; +const MIN_DISTANCE_PENALTY: f64 = 0.2; + +pub struct Matcher<'a> { + query: &'a [char], + lowercase_query: &'a [char], + query_char_bag: CharBag, + smart_case: bool, + max_results: usize, + min_score: f64, + match_positions: Vec, + last_positions: Vec, + score_matrix: Vec>, + best_position_matrix: Vec, +} + +trait Match: Ord { + fn score(&self) -> f64; + fn set_positions(&mut self, positions: Vec); +} + +trait MatchCandidate { + fn has_chars(&self, bag: CharBag) -> bool; + fn to_string<'a>(&'a self) -> Cow<'a, str>; +} + +#[derive(Clone, Debug)] +pub struct PathMatchCandidate<'a> { + pub path: &'a Arc, + pub char_bag: CharBag, +} + +#[derive(Clone, Debug)] +pub struct PathMatch { + pub score: f64, + pub positions: Vec, + pub tree_id: usize, + pub path: Arc, + pub path_prefix: Arc, +} + +#[derive(Clone, Debug)] +pub struct StringMatchCandidate { + pub string: String, + pub char_bag: CharBag, +} + +impl Match for PathMatch { + fn score(&self) -> f64 { + self.score + } + + fn set_positions(&mut self, positions: Vec) { + self.positions = positions; + } +} + +impl Match for StringMatch { + fn score(&self) -> f64 { + self.score + } + + fn set_positions(&mut self, positions: Vec) { + self.positions = positions; + } +} + +impl<'a> MatchCandidate for PathMatchCandidate<'a> { + fn has_chars(&self, bag: CharBag) -> bool { + self.char_bag.is_superset(bag) + } + + fn to_string(&self) -> Cow<'a, str> { + self.path.to_string_lossy() + } +} + +impl<'a> MatchCandidate for &'a StringMatchCandidate { + fn has_chars(&self, bag: CharBag) -> bool { + self.char_bag.is_superset(bag) + } + + fn to_string(&self) -> Cow<'a, str> { + self.string.as_str().into() + } +} + +#[derive(Clone, Debug)] +pub struct StringMatch { + pub score: f64, + pub positions: Vec, + pub string: String, +} + +impl PartialEq for StringMatch { + fn eq(&self, other: &Self) -> bool { + self.score.eq(&other.score) + } +} + +impl Eq for StringMatch {} + +impl PartialOrd for StringMatch { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for StringMatch { + fn cmp(&self, other: &Self) -> Ordering { + self.score + .partial_cmp(&other.score) + .unwrap_or(Ordering::Equal) + .then_with(|| self.string.cmp(&other.string)) + } +} + +impl PartialEq for PathMatch { + fn eq(&self, other: &Self) -> bool { + self.score.eq(&other.score) + } +} + +impl Eq for PathMatch {} + +impl PartialOrd for PathMatch { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for PathMatch { + fn cmp(&self, other: &Self) -> Ordering { + self.score + .partial_cmp(&other.score) + .unwrap_or(Ordering::Equal) + .then_with(|| self.tree_id.cmp(&other.tree_id)) + .then_with(|| Arc::as_ptr(&self.path).cmp(&Arc::as_ptr(&other.path))) + } +} + +impl<'a> Matcher<'a> { + pub fn new( + query: &'a [char], + lowercase_query: &'a [char], + query_char_bag: CharBag, + smart_case: bool, + max_results: usize, + ) -> Self { + Self { + query, + lowercase_query, + query_char_bag, + min_score: 0.0, + last_positions: vec![0; query.len()], + match_positions: vec![0; query.len()], + score_matrix: Vec::new(), + best_position_matrix: Vec::new(), + smart_case, + max_results, + } + } + + pub fn match_strings( + &mut self, + candidates: &[StringMatchCandidate], + results: &mut Vec, + cancel_flag: &AtomicBool, + ) { + self.match_internal( + &[], + &[], + candidates.iter(), + results, + cancel_flag, + |candidate, score| StringMatch { + score, + positions: Vec::new(), + string: candidate.string.to_string(), + }, + ) + } + + pub fn match_paths( + &mut self, + tree_id: usize, + path_prefix: Arc, + path_entries: impl Iterator>, + results: &mut Vec, + cancel_flag: &AtomicBool, + ) { + let prefix = path_prefix.chars().collect::>(); + let lowercase_prefix = prefix + .iter() + .map(|c| c.to_ascii_lowercase()) + .collect::>(); + self.match_internal( + &prefix, + &lowercase_prefix, + path_entries, + results, + cancel_flag, + |candidate, score| PathMatch { + score, + tree_id, + positions: Vec::new(), + path: candidate.path.clone(), + path_prefix: path_prefix.clone(), + }, + ) + } + + fn match_internal( + &mut self, + prefix: &[char], + lowercase_prefix: &[char], + candidates: impl Iterator, + results: &mut Vec, + cancel_flag: &AtomicBool, + build_match: F, + ) where + R: Match, + F: Fn(&C, f64) -> R, + { + let mut candidate_chars = Vec::new(); + let mut lowercase_candidate_chars = Vec::new(); + + for candidate in candidates { + if !candidate.has_chars(self.query_char_bag) { + continue; + } + + if cancel_flag.load(atomic::Ordering::Relaxed) { + break; + } + + candidate_chars.clear(); + lowercase_candidate_chars.clear(); + for c in candidate.to_string().chars() { + candidate_chars.push(c); + lowercase_candidate_chars.push(c.to_ascii_lowercase()); + } + + if !self.find_last_positions(&lowercase_prefix, &lowercase_candidate_chars) { + continue; + } + + let matrix_len = self.query.len() * (prefix.len() + candidate_chars.len()); + self.score_matrix.clear(); + self.score_matrix.resize(matrix_len, None); + self.best_position_matrix.clear(); + self.best_position_matrix.resize(matrix_len, 0); + + let score = self.score_match( + &candidate_chars, + &lowercase_candidate_chars, + &prefix, + &lowercase_prefix, + ); + + if score > 0.0 { + let mut mat = build_match(&candidate, score); + if let Err(i) = results.binary_search_by(|m| mat.cmp(&m)) { + if results.len() < self.max_results { + mat.set_positions(self.match_positions.clone()); + results.insert(i, mat); + } else if i < results.len() { + results.pop(); + mat.set_positions(self.match_positions.clone()); + results.insert(i, mat); + } + if results.len() == self.max_results { + self.min_score = results.last().unwrap().score(); + } + } + } + } + } + + fn find_last_positions(&mut self, prefix: &[char], path: &[char]) -> bool { + let mut path = path.iter(); + let mut prefix_iter = prefix.iter(); + for (i, char) in self.query.iter().enumerate().rev() { + if let Some(j) = path.rposition(|c| c == char) { + self.last_positions[i] = j + prefix.len(); + } else if let Some(j) = prefix_iter.rposition(|c| c == char) { + self.last_positions[i] = j; + } else { + return false; + } + } + true + } + + fn score_match( + &mut self, + path: &[char], + path_cased: &[char], + prefix: &[char], + lowercase_prefix: &[char], + ) -> f64 { + let score = self.recursive_score_match( + path, + path_cased, + prefix, + lowercase_prefix, + 0, + 0, + self.query.len() as f64, + ) * self.query.len() as f64; + + if score <= 0.0 { + return 0.0; + } + + let path_len = prefix.len() + path.len(); + let mut cur_start = 0; + let mut byte_ix = 0; + let mut char_ix = 0; + for i in 0..self.query.len() { + let match_char_ix = self.best_position_matrix[i * path_len + cur_start]; + while char_ix < match_char_ix { + let ch = prefix + .get(char_ix) + .or_else(|| path.get(char_ix - prefix.len())) + .unwrap(); + byte_ix += ch.len_utf8(); + char_ix += 1; + } + cur_start = match_char_ix + 1; + self.match_positions[i] = byte_ix; + } + + score + } + + fn recursive_score_match( + &mut self, + path: &[char], + path_cased: &[char], + prefix: &[char], + lowercase_prefix: &[char], + query_idx: usize, + path_idx: usize, + cur_score: f64, + ) -> f64 { + if query_idx == self.query.len() { + return 1.0; + } + + let path_len = prefix.len() + path.len(); + + if let Some(memoized) = self.score_matrix[query_idx * path_len + path_idx] { + return memoized; + } + + let mut score = 0.0; + let mut best_position = 0; + + let query_char = self.lowercase_query[query_idx]; + let limit = self.last_positions[query_idx]; + + let mut last_slash = 0; + for j in path_idx..=limit { + let path_char = if j < prefix.len() { + lowercase_prefix[j] + } else { + path_cased[j - prefix.len()] + }; + let is_path_sep = path_char == '/' || path_char == '\\'; + + if query_idx == 0 && is_path_sep { + last_slash = j; + } + + if query_char == path_char || (is_path_sep && query_char == '_' || query_char == '\\') { + let curr = if j < prefix.len() { + prefix[j] + } else { + path[j - prefix.len()] + }; + + let mut char_score = 1.0; + if j > path_idx { + let last = if j - 1 < prefix.len() { + prefix[j - 1] + } else { + path[j - 1 - prefix.len()] + }; + + if last == '/' { + char_score = 0.9; + } else if last == '-' || last == '_' || last == ' ' || last.is_numeric() { + char_score = 0.8; + } else if last.is_lowercase() && curr.is_uppercase() { + char_score = 0.8; + } else if last == '.' { + char_score = 0.7; + } else if query_idx == 0 { + char_score = BASE_DISTANCE_PENALTY; + } else { + char_score = MIN_DISTANCE_PENALTY.max( + BASE_DISTANCE_PENALTY + - (j - path_idx - 1) as f64 * ADDITIONAL_DISTANCE_PENALTY, + ); + } + } + + // Apply a severe penalty if the case doesn't match. + // This will make the exact matches have higher score than the case-insensitive and the + // path insensitive matches. + if (self.smart_case || curr == '/') && self.query[query_idx] != curr { + char_score *= 0.001; + } + + let mut multiplier = char_score; + + // Scale the score based on how deep within the path we found the match. + if query_idx == 0 { + multiplier /= ((prefix.len() + path.len()) - last_slash) as f64; + } + + let mut next_score = 1.0; + if self.min_score > 0.0 { + next_score = cur_score * multiplier; + // Scores only decrease. If we can't pass the previous best, bail + if next_score < self.min_score { + // Ensure that score is non-zero so we use it in the memo table. + if score == 0.0 { + score = 1e-18; + } + continue; + } + } + + let new_score = self.recursive_score_match( + path, + path_cased, + prefix, + lowercase_prefix, + query_idx + 1, + j + 1, + next_score, + ) * multiplier; + + if new_score > score { + score = new_score; + best_position = j; + // Optimization: can't score better than 1. + if new_score == 1.0 { + break; + } + } + } + } + + if best_position != 0 { + self.best_position_matrix[query_idx * path_len + path_idx] = best_position; + } + + self.score_matrix[query_idx * path_len + path_idx] = Some(score); + score + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::path::PathBuf; + + #[test] + fn test_get_last_positions() { + let mut query: &[char] = &['d', 'c']; + let mut matcher = Matcher::new(query, query, query.into(), false, 10); + let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']); + assert_eq!(result, false); + + query = &['c', 'd']; + let mut matcher = Matcher::new(query, query, query.into(), false, 10); + let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']); + assert_eq!(result, true); + assert_eq!(matcher.last_positions, vec![2, 4]); + + query = &['z', '/', 'z', 'f']; + let mut matcher = Matcher::new(query, query, query.into(), false, 10); + let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']); + assert_eq!(result, true); + assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]); + } + + #[test] + fn test_match_path_entries() { + let paths = vec![ + "", + "a", + "ab", + "abC", + "abcd", + "alphabravocharlie", + "AlphaBravoCharlie", + "thisisatestdir", + "/////ThisIsATestDir", + "/this/is/a/test/dir", + "/test/tiatd", + ]; + + assert_eq!( + match_query("abc", false, &paths), + vec![ + ("abC", vec![0, 1, 2]), + ("abcd", vec![0, 1, 2]), + ("AlphaBravoCharlie", vec![0, 5, 10]), + ("alphabravocharlie", vec![4, 5, 10]), + ] + ); + assert_eq!( + match_query("t/i/a/t/d", false, &paths), + vec![("/this/is/a/test/dir", vec![1, 5, 6, 8, 9, 10, 11, 15, 16]),] + ); + + assert_eq!( + match_query("tiatd", false, &paths), + vec![ + ("/test/tiatd", vec![6, 7, 8, 9, 10]), + ("/this/is/a/test/dir", vec![1, 6, 9, 11, 16]), + ("/////ThisIsATestDir", vec![5, 9, 11, 12, 16]), + ("thisisatestdir", vec![0, 2, 6, 7, 11]), + ] + ); + } + + #[test] + fn test_match_multibyte_path_entries() { + let paths = vec!["aαbβ/cγdδ", "αβγδ/bcde", "c1️⃣2️⃣3️⃣/d4️⃣5️⃣6️⃣/e7️⃣8️⃣9️⃣/f", "/d/🆒/h"]; + assert_eq!("1️⃣".len(), 7); + assert_eq!( + match_query("bcd", false, &paths), + vec![ + ("αβγδ/bcde", vec![9, 10, 11]), + ("aαbβ/cγdδ", vec![3, 7, 10]), + ] + ); + assert_eq!( + match_query("cde", false, &paths), + vec![ + ("αβγδ/bcde", vec![10, 11, 12]), + ("c1️⃣2️⃣3️⃣/d4️⃣5️⃣6️⃣/e7️⃣8️⃣9️⃣/f", vec![0, 23, 46]), + ] + ); + } + + fn match_query<'a>( + query: &str, + smart_case: bool, + paths: &Vec<&'a str>, + ) -> Vec<(&'a str, Vec)> { + let lowercase_query = query.to_lowercase().chars().collect::>(); + let query = query.chars().collect::>(); + let query_chars = CharBag::from(&lowercase_query[..]); + + let path_arcs = paths + .iter() + .map(|path| Arc::from(PathBuf::from(path))) + .collect::>(); + let mut path_entries = Vec::new(); + for (i, path) in paths.iter().enumerate() { + let lowercase_path = path.to_lowercase().chars().collect::>(); + let char_bag = CharBag::from(lowercase_path.as_slice()); + path_entries.push(PathMatchCandidate { + char_bag, + path: path_arcs.get(i).unwrap(), + }); + } + + let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case, 100); + + let cancel_flag = AtomicBool::new(false); + let mut results = Vec::new(); + matcher.match_paths( + 0, + "".into(), + path_entries.into_iter(), + &mut results, + &cancel_flag, + ); + + results + .into_iter() + .map(|result| { + ( + paths + .iter() + .copied() + .find(|p| result.path.as_ref() == Path::new(p)) + .unwrap(), + result.positions, + ) + }) + .collect() + } +} diff --git a/sum_tree/Cargo.toml b/sum_tree/Cargo.toml index 510044cfb8..6a9893502e 100644 --- a/sum_tree/Cargo.toml +++ b/sum_tree/Cargo.toml @@ -3,8 +3,6 @@ name = "sum_tree" version = "0.1.0" edition = "2018" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] arrayvec = "0.7.1" diff --git a/zed/Cargo.toml b/zed/Cargo.toml index 6f14398eb0..06ad32a498 100644 --- a/zed/Cargo.toml +++ b/zed/Cargo.toml @@ -28,6 +28,7 @@ dirs = "3.0" easy-parallel = "3.1.0" fsevent = { path = "../fsevent" } futures = "0.3" +fuzzy = { path = "../fuzzy" } gpui = { path = "../gpui" } http-auth-basic = "0.1.3" ignore = "0.4" diff --git a/zed/src/file_finder.rs b/zed/src/file_finder.rs index b6c6ad18b2..7d5cfe64de 100644 --- a/zed/src/file_finder.rs +++ b/zed/src/file_finder.rs @@ -1,10 +1,10 @@ use crate::{ editor::{self, Editor}, + fuzzy::PathMatch, project::Project, settings::Settings, util, workspace::Workspace, - worktree::PathMatch, }; use gpui::{ action, diff --git a/zed/src/fuzzy.rs b/zed/src/fuzzy.rs index 582af400b4..bbc519c7cc 100644 --- a/zed/src/fuzzy.rs +++ b/zed/src/fuzzy.rs @@ -1,158 +1,14 @@ -mod char_bag; - -use crate::util; +use crate::{ + util, + worktree::{EntryKind, Snapshot}, +}; use gpui::executor; use std::{ - borrow::Cow, - cmp::Ordering, - path::Path, - sync::atomic::{self, AtomicBool}, - sync::Arc, + cmp, + sync::{atomic::AtomicBool, Arc}, }; -pub use char_bag::CharBag; - -const BASE_DISTANCE_PENALTY: f64 = 0.6; -const ADDITIONAL_DISTANCE_PENALTY: f64 = 0.05; -const MIN_DISTANCE_PENALTY: f64 = 0.2; - -pub struct Matcher<'a> { - query: &'a [char], - lowercase_query: &'a [char], - query_char_bag: CharBag, - smart_case: bool, - max_results: usize, - min_score: f64, - match_positions: Vec, - last_positions: Vec, - score_matrix: Vec>, - best_position_matrix: Vec, -} - -trait Match: Ord { - fn score(&self) -> f64; - fn set_positions(&mut self, positions: Vec); -} - -trait MatchCandidate { - fn has_chars(&self, bag: CharBag) -> bool; - fn to_string<'a>(&'a self) -> Cow<'a, str>; -} - -#[derive(Clone, Debug)] -pub struct PathMatchCandidate<'a> { - pub path: &'a Arc, - pub char_bag: CharBag, -} - -#[derive(Clone, Debug)] -pub struct PathMatch { - pub score: f64, - pub positions: Vec, - pub tree_id: usize, - pub path: Arc, - pub path_prefix: Arc, -} - -#[derive(Clone, Debug)] -pub struct StringMatchCandidate { - pub string: String, - pub char_bag: CharBag, -} - -impl Match for PathMatch { - fn score(&self) -> f64 { - self.score - } - - fn set_positions(&mut self, positions: Vec) { - self.positions = positions; - } -} - -impl Match for StringMatch { - fn score(&self) -> f64 { - self.score - } - - fn set_positions(&mut self, positions: Vec) { - self.positions = positions; - } -} - -impl<'a> MatchCandidate for PathMatchCandidate<'a> { - fn has_chars(&self, bag: CharBag) -> bool { - self.char_bag.is_superset(bag) - } - - fn to_string(&self) -> Cow<'a, str> { - self.path.to_string_lossy() - } -} - -impl<'a> MatchCandidate for &'a StringMatchCandidate { - fn has_chars(&self, bag: CharBag) -> bool { - self.char_bag.is_superset(bag) - } - - fn to_string(&self) -> Cow<'a, str> { - self.string.as_str().into() - } -} - -#[derive(Clone, Debug)] -pub struct StringMatch { - pub score: f64, - pub positions: Vec, - pub string: String, -} - -impl PartialEq for StringMatch { - fn eq(&self, other: &Self) -> bool { - self.score.eq(&other.score) - } -} - -impl Eq for StringMatch {} - -impl PartialOrd for StringMatch { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for StringMatch { - fn cmp(&self, other: &Self) -> Ordering { - self.score - .partial_cmp(&other.score) - .unwrap_or(Ordering::Equal) - .then_with(|| self.string.cmp(&other.string)) - } -} - -impl PartialEq for PathMatch { - fn eq(&self, other: &Self) -> bool { - self.score.eq(&other.score) - } -} - -impl Eq for PathMatch {} - -impl PartialOrd for PathMatch { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for PathMatch { - fn cmp(&self, other: &Self) -> Ordering { - self.score - .partial_cmp(&other.score) - .unwrap_or(Ordering::Equal) - .then_with(|| self.tree_id.cmp(&other.tree_id)) - .then_with(|| Arc::as_ptr(&self.path).cmp(&Arc::as_ptr(&other.path))) - } -} +pub use fuzzy::*; pub async fn match_strings( candidates: &[StringMatchCandidate], @@ -210,463 +66,110 @@ pub async fn match_strings( results } -impl<'a> Matcher<'a> { - pub fn new( - query: &'a [char], - lowercase_query: &'a [char], - query_char_bag: CharBag, - smart_case: bool, - max_results: usize, - ) -> Self { - Self { - query, - lowercase_query, - query_char_bag, - min_score: 0.0, - last_positions: vec![0; query.len()], - match_positions: vec![0; query.len()], - score_matrix: Vec::new(), - best_position_matrix: Vec::new(), - smart_case, - max_results, - } +pub async fn match_paths( + snapshots: &[Snapshot], + query: &str, + include_ignored: bool, + smart_case: bool, + max_results: usize, + cancel_flag: &AtomicBool, + background: Arc, +) -> Vec { + let path_count: usize = if include_ignored { + snapshots.iter().map(Snapshot::file_count).sum() + } else { + snapshots.iter().map(Snapshot::visible_file_count).sum() + }; + if path_count == 0 { + return Vec::new(); } - pub fn match_strings( - &mut self, - candidates: &[StringMatchCandidate], - results: &mut Vec, - cancel_flag: &AtomicBool, - ) { - self.match_internal( - &[], - &[], - candidates.iter(), - results, - cancel_flag, - |candidate, score| StringMatch { - score, - positions: Vec::new(), - string: candidate.string.to_string(), - }, - ) - } + let lowercase_query = query.to_lowercase().chars().collect::>(); + let query = query.chars().collect::>(); - pub fn match_paths( - &mut self, - tree_id: usize, - path_prefix: Arc, - path_entries: impl Iterator>, - results: &mut Vec, - cancel_flag: &AtomicBool, - ) { - let prefix = path_prefix.chars().collect::>(); - let lowercase_prefix = prefix - .iter() - .map(|c| c.to_ascii_lowercase()) - .collect::>(); - self.match_internal( - &prefix, - &lowercase_prefix, - path_entries, - results, - cancel_flag, - |candidate, score| PathMatch { - score, - tree_id, - positions: Vec::new(), - path: candidate.path.clone(), - path_prefix: path_prefix.clone(), - }, - ) - } + let lowercase_query = &lowercase_query; + let query = &query; + let query_char_bag = CharBag::from(&lowercase_query[..]); - fn match_internal( - &mut self, - prefix: &[char], - lowercase_prefix: &[char], - candidates: impl Iterator, - results: &mut Vec, - cancel_flag: &AtomicBool, - build_match: F, - ) where - R: Match, - F: Fn(&C, f64) -> R, - { - let mut candidate_chars = Vec::new(); - let mut lowercase_candidate_chars = Vec::new(); + let num_cpus = background.num_cpus().min(path_count); + let segment_size = (path_count + num_cpus - 1) / num_cpus; + let mut segment_results = (0..num_cpus) + .map(|_| Vec::with_capacity(max_results)) + .collect::>(); - for candidate in candidates { - if !candidate.has_chars(self.query_char_bag) { - continue; - } + background + .scoped(|scope| { + for (segment_idx, results) in segment_results.iter_mut().enumerate() { + scope.spawn(async move { + let segment_start = segment_idx * segment_size; + let segment_end = segment_start + segment_size; + let mut matcher = Matcher::new( + query, + lowercase_query, + query_char_bag, + smart_case, + max_results, + ); - if cancel_flag.load(atomic::Ordering::Relaxed) { - break; - } + let mut tree_start = 0; + for snapshot in snapshots { + let tree_end = if include_ignored { + tree_start + snapshot.file_count() + } else { + tree_start + snapshot.visible_file_count() + }; - candidate_chars.clear(); - lowercase_candidate_chars.clear(); - for c in candidate.to_string().chars() { - candidate_chars.push(c); - lowercase_candidate_chars.push(c.to_ascii_lowercase()); - } + if tree_start < segment_end && segment_start < tree_end { + let path_prefix: Arc = + if snapshot.root_entry().map_or(false, |e| e.is_file()) { + snapshot.root_name().into() + } else if snapshots.len() > 1 { + format!("{}/", snapshot.root_name()).into() + } else { + "".into() + }; - if !self.find_last_positions(&lowercase_prefix, &lowercase_candidate_chars) { - continue; - } + let start = cmp::max(tree_start, segment_start) - tree_start; + let end = cmp::min(tree_end, segment_end) - tree_start; + let paths = snapshot + .files(include_ignored, start) + .take(end - start) + .map(|entry| { + if let EntryKind::File(char_bag) = entry.kind { + PathMatchCandidate { + path: &entry.path, + char_bag, + } + } else { + unreachable!() + } + }); - let matrix_len = self.query.len() * (prefix.len() + candidate_chars.len()); - self.score_matrix.clear(); - self.score_matrix.resize(matrix_len, None); - self.best_position_matrix.clear(); - self.best_position_matrix.resize(matrix_len, 0); - - let score = self.score_match( - &candidate_chars, - &lowercase_candidate_chars, - &prefix, - &lowercase_prefix, - ); - - if score > 0.0 { - let mut mat = build_match(&candidate, score); - if let Err(i) = results.binary_search_by(|m| mat.cmp(&m)) { - if results.len() < self.max_results { - mat.set_positions(self.match_positions.clone()); - results.insert(i, mat); - } else if i < results.len() { - results.pop(); - mat.set_positions(self.match_positions.clone()); - results.insert(i, mat); - } - if results.len() == self.max_results { - self.min_score = results.last().unwrap().score(); - } - } - } - } - } - - fn find_last_positions(&mut self, prefix: &[char], path: &[char]) -> bool { - let mut path = path.iter(); - let mut prefix_iter = prefix.iter(); - for (i, char) in self.query.iter().enumerate().rev() { - if let Some(j) = path.rposition(|c| c == char) { - self.last_positions[i] = j + prefix.len(); - } else if let Some(j) = prefix_iter.rposition(|c| c == char) { - self.last_positions[i] = j; - } else { - return false; - } - } - true - } - - fn score_match( - &mut self, - path: &[char], - path_cased: &[char], - prefix: &[char], - lowercase_prefix: &[char], - ) -> f64 { - let score = self.recursive_score_match( - path, - path_cased, - prefix, - lowercase_prefix, - 0, - 0, - self.query.len() as f64, - ) * self.query.len() as f64; - - if score <= 0.0 { - return 0.0; - } - - let path_len = prefix.len() + path.len(); - let mut cur_start = 0; - let mut byte_ix = 0; - let mut char_ix = 0; - for i in 0..self.query.len() { - let match_char_ix = self.best_position_matrix[i * path_len + cur_start]; - while char_ix < match_char_ix { - let ch = prefix - .get(char_ix) - .or_else(|| path.get(char_ix - prefix.len())) - .unwrap(); - byte_ix += ch.len_utf8(); - char_ix += 1; - } - cur_start = match_char_ix + 1; - self.match_positions[i] = byte_ix; - } - - score - } - - fn recursive_score_match( - &mut self, - path: &[char], - path_cased: &[char], - prefix: &[char], - lowercase_prefix: &[char], - query_idx: usize, - path_idx: usize, - cur_score: f64, - ) -> f64 { - if query_idx == self.query.len() { - return 1.0; - } - - let path_len = prefix.len() + path.len(); - - if let Some(memoized) = self.score_matrix[query_idx * path_len + path_idx] { - return memoized; - } - - let mut score = 0.0; - let mut best_position = 0; - - let query_char = self.lowercase_query[query_idx]; - let limit = self.last_positions[query_idx]; - - let mut last_slash = 0; - for j in path_idx..=limit { - let path_char = if j < prefix.len() { - lowercase_prefix[j] - } else { - path_cased[j - prefix.len()] - }; - let is_path_sep = path_char == '/' || path_char == '\\'; - - if query_idx == 0 && is_path_sep { - last_slash = j; - } - - if query_char == path_char || (is_path_sep && query_char == '_' || query_char == '\\') { - let curr = if j < prefix.len() { - prefix[j] - } else { - path[j - prefix.len()] - }; - - let mut char_score = 1.0; - if j > path_idx { - let last = if j - 1 < prefix.len() { - prefix[j - 1] - } else { - path[j - 1 - prefix.len()] - }; - - if last == '/' { - char_score = 0.9; - } else if last == '-' || last == '_' || last == ' ' || last.is_numeric() { - char_score = 0.8; - } else if last.is_lowercase() && curr.is_uppercase() { - char_score = 0.8; - } else if last == '.' { - char_score = 0.7; - } else if query_idx == 0 { - char_score = BASE_DISTANCE_PENALTY; - } else { - char_score = MIN_DISTANCE_PENALTY.max( - BASE_DISTANCE_PENALTY - - (j - path_idx - 1) as f64 * ADDITIONAL_DISTANCE_PENALTY, - ); - } - } - - // Apply a severe penalty if the case doesn't match. - // This will make the exact matches have higher score than the case-insensitive and the - // path insensitive matches. - if (self.smart_case || curr == '/') && self.query[query_idx] != curr { - char_score *= 0.001; - } - - let mut multiplier = char_score; - - // Scale the score based on how deep within the path we found the match. - if query_idx == 0 { - multiplier /= ((prefix.len() + path.len()) - last_slash) as f64; - } - - let mut next_score = 1.0; - if self.min_score > 0.0 { - next_score = cur_score * multiplier; - // Scores only decrease. If we can't pass the previous best, bail - if next_score < self.min_score { - // Ensure that score is non-zero so we use it in the memo table. - if score == 0.0 { - score = 1e-18; + matcher.match_paths( + snapshot.id(), + path_prefix, + paths, + results, + &cancel_flag, + ); } - continue; + if tree_end >= segment_end { + break; + } + tree_start = tree_end; } - } - - let new_score = self.recursive_score_match( - path, - path_cased, - prefix, - lowercase_prefix, - query_idx + 1, - j + 1, - next_score, - ) * multiplier; - - if new_score > score { - score = new_score; - best_position = j; - // Optimization: can't score better than 1. - if new_score == 1.0 { - break; - } - } + }) } - } + }) + .await; - if best_position != 0 { - self.best_position_matrix[query_idx * path_len + path_idx] = best_position; + let mut results = Vec::new(); + for segment_result in segment_results { + if results.is_empty() { + results = segment_result; + } else { + util::extend_sorted(&mut results, segment_result, max_results, |a, b| b.cmp(&a)); } - - self.score_matrix[query_idx * path_len + path_idx] = Some(score); - score - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::path::PathBuf; - - #[test] - fn test_get_last_positions() { - let mut query: &[char] = &['d', 'c']; - let mut matcher = Matcher::new(query, query, query.into(), false, 10); - let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']); - assert_eq!(result, false); - - query = &['c', 'd']; - let mut matcher = Matcher::new(query, query, query.into(), false, 10); - let result = matcher.find_last_positions(&['a', 'b', 'c'], &['b', 'd', 'e', 'f']); - assert_eq!(result, true); - assert_eq!(matcher.last_positions, vec![2, 4]); - - query = &['z', '/', 'z', 'f']; - let mut matcher = Matcher::new(query, query, query.into(), false, 10); - let result = matcher.find_last_positions(&['z', 'e', 'd', '/'], &['z', 'e', 'd', '/', 'f']); - assert_eq!(result, true); - assert_eq!(matcher.last_positions, vec![0, 3, 4, 8]); - } - - #[test] - fn test_match_path_entries() { - let paths = vec![ - "", - "a", - "ab", - "abC", - "abcd", - "alphabravocharlie", - "AlphaBravoCharlie", - "thisisatestdir", - "/////ThisIsATestDir", - "/this/is/a/test/dir", - "/test/tiatd", - ]; - - assert_eq!( - match_query("abc", false, &paths), - vec![ - ("abC", vec![0, 1, 2]), - ("abcd", vec![0, 1, 2]), - ("AlphaBravoCharlie", vec![0, 5, 10]), - ("alphabravocharlie", vec![4, 5, 10]), - ] - ); - assert_eq!( - match_query("t/i/a/t/d", false, &paths), - vec![("/this/is/a/test/dir", vec![1, 5, 6, 8, 9, 10, 11, 15, 16]),] - ); - - assert_eq!( - match_query("tiatd", false, &paths), - vec![ - ("/test/tiatd", vec![6, 7, 8, 9, 10]), - ("/this/is/a/test/dir", vec![1, 6, 9, 11, 16]), - ("/////ThisIsATestDir", vec![5, 9, 11, 12, 16]), - ("thisisatestdir", vec![0, 2, 6, 7, 11]), - ] - ); - } - - #[test] - fn test_match_multibyte_path_entries() { - let paths = vec!["aαbβ/cγdδ", "αβγδ/bcde", "c1️⃣2️⃣3️⃣/d4️⃣5️⃣6️⃣/e7️⃣8️⃣9️⃣/f", "/d/🆒/h"]; - assert_eq!("1️⃣".len(), 7); - assert_eq!( - match_query("bcd", false, &paths), - vec![ - ("αβγδ/bcde", vec![9, 10, 11]), - ("aαbβ/cγdδ", vec![3, 7, 10]), - ] - ); - assert_eq!( - match_query("cde", false, &paths), - vec![ - ("αβγδ/bcde", vec![10, 11, 12]), - ("c1️⃣2️⃣3️⃣/d4️⃣5️⃣6️⃣/e7️⃣8️⃣9️⃣/f", vec![0, 23, 46]), - ] - ); - } - - fn match_query<'a>( - query: &str, - smart_case: bool, - paths: &Vec<&'a str>, - ) -> Vec<(&'a str, Vec)> { - let lowercase_query = query.to_lowercase().chars().collect::>(); - let query = query.chars().collect::>(); - let query_chars = CharBag::from(&lowercase_query[..]); - - let path_arcs = paths - .iter() - .map(|path| Arc::from(PathBuf::from(path))) - .collect::>(); - let mut path_entries = Vec::new(); - for (i, path) in paths.iter().enumerate() { - let lowercase_path = path.to_lowercase().chars().collect::>(); - let char_bag = CharBag::from(lowercase_path.as_slice()); - path_entries.push(PathMatchCandidate { - char_bag, - path: path_arcs.get(i).unwrap(), - }); - } - - let mut matcher = Matcher::new(&query, &lowercase_query, query_chars, smart_case, 100); - - let cancel_flag = AtomicBool::new(false); - let mut results = Vec::new(); - matcher.match_paths( - 0, - "".into(), - path_entries.into_iter(), - &mut results, - &cancel_flag, - ); - - results - .into_iter() - .map(|result| { - ( - paths - .iter() - .copied() - .find(|p| result.path.as_ref() == Path::new(p)) - .unwrap(), - result.positions, - ) - }) - .collect() } + results } diff --git a/zed/src/project.rs b/zed/src/project.rs index 7b5480209e..e7cc636ad1 100644 --- a/zed/src/project.rs +++ b/zed/src/project.rs @@ -1,17 +1,16 @@ use crate::{ fs::Fs, - fuzzy::{CharBag, Matcher, PathMatchCandidate}, + fuzzy::{self, PathMatch}, language::LanguageRegistry, rpc::Client, - util::{self, TryFutureExt as _}, - worktree::{self, EntryKind, PathMatch, Snapshot, Worktree}, + util::TryFutureExt as _, + worktree::{self, Worktree}, AppState, }; use anyhow::Result; use futures::Future; use gpui::{AppContext, Entity, ModelContext, ModelHandle, Task}; use std::{ - cmp, path::Path, sync::{atomic::AtomicBool, Arc}, }; @@ -191,7 +190,7 @@ impl Project { pub fn match_paths<'a>( &self, - query: &str, + query: &'a str, include_ignored: bool, smart_case: bool, max_results: usize, @@ -203,111 +202,19 @@ impl Project { .iter() .map(|worktree| worktree.read(cx).snapshot()) .collect::>(); - - let path_count: usize = if include_ignored { - snapshots.iter().map(Snapshot::file_count).sum() - } else { - snapshots.iter().map(Snapshot::visible_file_count).sum() - }; - - let lowercase_query = query.to_lowercase().chars().collect::>(); - let query = query.chars().collect::>(); - let query_char_bag = CharBag::from(&lowercase_query[..]); - let background = cx.background().clone(); async move { - if path_count == 0 { - return Vec::new(); - } - - let num_cpus = background.num_cpus().min(path_count); - let segment_size = (path_count + num_cpus - 1) / num_cpus; - let mut segment_results = (0..num_cpus) - .map(|_| Vec::with_capacity(max_results)) - .collect::>(); - - let lowercase_query = &lowercase_query; - let query = &query; - let snapshots = snapshots.as_slice(); - - background - .scoped(|scope| { - for (segment_idx, results) in segment_results.iter_mut().enumerate() { - scope.spawn(async move { - let segment_start = segment_idx * segment_size; - let segment_end = segment_start + segment_size; - let mut matcher = Matcher::new( - query, - lowercase_query, - query_char_bag, - smart_case, - max_results, - ); - - let mut tree_start = 0; - for snapshot in snapshots { - let tree_end = if include_ignored { - tree_start + snapshot.file_count() - } else { - tree_start + snapshot.visible_file_count() - }; - - if tree_start < segment_end && segment_start < tree_end { - let path_prefix: Arc = - if snapshot.root_entry().map_or(false, |e| e.is_file()) { - snapshot.root_name().into() - } else if snapshots.len() > 1 { - format!("{}/", snapshot.root_name()).into() - } else { - "".into() - }; - - let start = cmp::max(tree_start, segment_start) - tree_start; - let end = cmp::min(tree_end, segment_end) - tree_start; - let paths = snapshot - .files(include_ignored, start) - .take(end - start) - .map(|entry| { - if let EntryKind::File(char_bag) = entry.kind { - PathMatchCandidate { - path: &entry.path, - char_bag, - } - } else { - unreachable!() - } - }); - - matcher.match_paths( - snapshot.id(), - path_prefix, - paths, - results, - &cancel_flag, - ); - } - if tree_end >= segment_end { - break; - } - tree_start = tree_end; - } - }) - } - }) - .await; - - let mut results = Vec::new(); - for segment_result in segment_results { - if results.is_empty() { - results = segment_result; - } else { - util::extend_sorted(&mut results, segment_result, max_results, |a, b| { - b.cmp(&a) - }); - } - } - results + fuzzy::match_paths( + snapshots.as_slice(), + query, + include_ignored, + smart_case, + max_results, + cancel_flag, + background, + ) + .await } } } diff --git a/zed/src/worktree.rs b/zed/src/worktree.rs index 959b3a1c24..b20b7530fd 100644 --- a/zed/src/worktree.rs +++ b/zed/src/worktree.rs @@ -4,7 +4,6 @@ use self::ignore::IgnoreStack; use crate::{ editor::{self, Buffer, History, Operation, Rope}, fs::{self, Fs}, - fuzzy, fuzzy::CharBag, language::{Language, LanguageRegistry}, rpc::{self, proto, Status}, @@ -14,7 +13,6 @@ use crate::{ use ::ignore::gitignore::{Gitignore, GitignoreBuilder}; use anyhow::{anyhow, Result}; use futures::{Stream, StreamExt}; -pub use fuzzy::PathMatch; use gpui::{ executor, AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, MutableAppContext, Task, UpgradeModelHandle, WeakModelHandle,