Optimize matching of multiple file-watch globs using the globset crate

This commit is contained in:
Max Brunsfeld 2023-05-19 09:13:31 -07:00
parent 4bda5c4d69
commit 459cc9c959
9 changed files with 119 additions and 252 deletions

4
Cargo.lock generated
View File

@ -4851,7 +4851,7 @@ dependencies = [
"fuzzy",
"git",
"git2",
"glob",
"globset",
"gpui",
"ignore",
"itertools",
@ -5949,7 +5949,7 @@ dependencies = [
"collections",
"editor",
"futures 0.3.28",
"glob",
"globset",
"gpui",
"language",
"log",

View File

@ -77,6 +77,7 @@ async-trait = { version = "0.1" }
ctor = { version = "0.1" }
env_logger = { version = "0.9" }
futures = { version = "0.3" }
globset = { version = "0.4" }
glob = { version = "0.3.1" }
lazy_static = { version = "1.4.0" }
log = { version = "0.4.16", features = ["kv_unstable_serde"] }

View File

@ -42,7 +42,7 @@ anyhow.workspace = true
async-trait.workspace = true
backtrace = "0.3"
futures.workspace = true
glob.workspace = true
globset.workspace = true
ignore = "0.4"
lazy_static.workspace = true
log.workspace = true

View File

@ -1,129 +0,0 @@
use anyhow::{anyhow, Result};
use std::path::Path;
#[derive(Default)]
pub struct LspGlobSet {
patterns: Vec<glob::Pattern>,
}
impl LspGlobSet {
pub fn clear(&mut self) {
self.patterns.clear();
}
/// Add a pattern to the glob set.
///
/// LSP's glob syntax supports bash-style brace expansion. For example,
/// the pattern '*.{js,ts}' would match all JavaScript or TypeScript files.
/// This is not a part of the standard libc glob syntax, and isn't supported
/// by the `glob` crate. So we pre-process the glob patterns, producing a
/// separate glob `Pattern` object for each part of a brace expansion.
pub fn add_pattern(&mut self, pattern: &str) -> Result<()> {
// Find all of the ranges of `pattern` that contain matched curly braces.
let mut expansion_ranges = Vec::new();
let mut expansion_start_ix = None;
for (ix, c) in pattern.match_indices(|c| ['{', '}'].contains(&c)) {
match c {
"{" => {
if expansion_start_ix.is_some() {
return Err(anyhow!("nested braces in glob patterns aren't supported"));
}
expansion_start_ix = Some(ix);
}
"}" => {
if let Some(start_ix) = expansion_start_ix {
expansion_ranges.push(start_ix..ix + 1);
}
expansion_start_ix = None;
}
_ => {}
}
}
// Starting with a single pattern, process each brace expansion by cloning
// the pattern once per element of the expansion.
let mut unexpanded_patterns = vec![];
let mut expanded_patterns = vec![pattern.to_string()];
for outer_range in expansion_ranges.into_iter().rev() {
let inner_range = (outer_range.start + 1)..(outer_range.end - 1);
std::mem::swap(&mut unexpanded_patterns, &mut expanded_patterns);
for unexpanded_pattern in unexpanded_patterns.drain(..) {
for part in unexpanded_pattern[inner_range.clone()].split(',') {
let mut expanded_pattern = unexpanded_pattern.clone();
expanded_pattern.replace_range(outer_range.clone(), part);
expanded_patterns.push(expanded_pattern);
}
}
}
// Parse the final glob patterns and add them to the set.
for pattern in expanded_patterns {
let pattern = glob::Pattern::new(&pattern)?;
self.patterns.push(pattern);
}
Ok(())
}
pub fn matches(&self, path: &Path) -> bool {
self.patterns
.iter()
.any(|pattern| pattern.matches_path(path))
}
}
impl std::fmt::Debug for LspGlobSet {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_set()
.entries(self.patterns.iter().map(|p| p.as_str()))
.finish()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_glob_set() {
let mut watch = LspGlobSet::default();
watch.add_pattern("/a/**/*.rs").unwrap();
watch.add_pattern("/a/**/Cargo.toml").unwrap();
assert!(watch.matches("/a/b.rs".as_ref()));
assert!(watch.matches("/a/b/c.rs".as_ref()));
assert!(!watch.matches("/b/c.rs".as_ref()));
assert!(!watch.matches("/a/b.ts".as_ref()));
}
#[test]
fn test_brace_expansion() {
let mut watch = LspGlobSet::default();
watch.add_pattern("/a/*.{ts,js,tsx}").unwrap();
assert!(watch.matches("/a/one.js".as_ref()));
assert!(watch.matches("/a/two.ts".as_ref()));
assert!(watch.matches("/a/three.tsx".as_ref()));
assert!(!watch.matches("/a/one.j".as_ref()));
assert!(!watch.matches("/a/two.s".as_ref()));
assert!(!watch.matches("/a/three.t".as_ref()));
assert!(!watch.matches("/a/four.t".as_ref()));
assert!(!watch.matches("/a/five.xt".as_ref()));
}
#[test]
fn test_multiple_brace_expansion() {
let mut watch = LspGlobSet::default();
watch.add_pattern("/a/{one,two,three}.{b*c,d*e}").unwrap();
assert!(watch.matches("/a/one.bic".as_ref()));
assert!(watch.matches("/a/two.dole".as_ref()));
assert!(watch.matches("/a/three.deeee".as_ref()));
assert!(!watch.matches("/a/four.bic".as_ref()));
assert!(!watch.matches("/a/one.be".as_ref()));
}
}

View File

@ -1,6 +1,5 @@
mod ignore;
mod lsp_command;
mod lsp_glob_set;
mod project_settings;
pub mod search;
pub mod terminals;
@ -19,6 +18,7 @@ use futures::{
future::{try_join_all, Shared},
AsyncWriteExt, Future, FutureExt, StreamExt, TryFutureExt,
};
use globset::{Glob, GlobSet, GlobSetBuilder};
use gpui::{
AnyModelHandle, AppContext, AsyncAppContext, BorrowAppContext, Entity, ModelContext,
ModelHandle, Task, WeakModelHandle,
@ -41,7 +41,6 @@ use lsp::{
DocumentHighlightKind, LanguageServer, LanguageServerId,
};
use lsp_command::*;
use lsp_glob_set::LspGlobSet;
use postage::watch;
use project_settings::ProjectSettings;
use rand::prelude::*;
@ -226,7 +225,7 @@ pub enum LanguageServerState {
language: Arc<Language>,
adapter: Arc<CachedLspAdapter>,
server: Arc<LanguageServer>,
watched_paths: HashMap<WorktreeId, LspGlobSet>,
watched_paths: HashMap<WorktreeId, GlobSet>,
simulate_disk_based_diagnostics_completion: Option<Task<()>>,
},
}
@ -2867,8 +2866,10 @@ impl Project {
if let Some(LanguageServerState::Running { watched_paths, .. }) =
self.language_servers.get_mut(&language_server_id)
{
watched_paths.clear();
eprintln!("change watch");
let mut builders = HashMap::default();
for watcher in params.watchers {
eprintln!(" {}", watcher.glob_pattern);
for worktree in &self.worktrees {
if let Some(worktree) = worktree.upgrade(cx) {
let worktree = worktree.read(cx);
@ -2878,17 +2879,26 @@ impl Project {
.strip_prefix(abs_path)
.and_then(|s| s.strip_prefix(std::path::MAIN_SEPARATOR))
{
watched_paths
.entry(worktree.id())
.or_default()
.add_pattern(suffix)
.log_err();
if let Some(glob) = Glob::new(suffix).log_err() {
builders
.entry(worktree.id())
.or_insert_with(|| GlobSetBuilder::new())
.add(glob);
}
break;
}
}
}
}
}
watched_paths.clear();
for (worktree_id, builder) in builders {
if let Ok(globset) = builder.build() {
watched_paths.insert(worktree_id, globset);
}
}
cx.notify();
}
}
@ -4725,6 +4735,10 @@ impl Project {
changes: &HashMap<(Arc<Path>, ProjectEntryId), PathChange>,
cx: &mut ModelContext<Self>,
) {
if changes.is_empty() {
return;
}
let worktree_id = worktree_handle.read(cx).id();
let mut language_server_ids = self
.language_server_ids
@ -4750,7 +4764,7 @@ impl Project {
changes: changes
.iter()
.filter_map(|((path, _), change)| {
if watched_paths.matches(&path) {
if watched_paths.is_match(&path) {
Some(lsp::FileEvent {
uri: lsp::Url::from_file_path(abs_path.join(path))
.unwrap(),

View File

@ -1,6 +1,7 @@
use crate::{worktree::WorktreeHandle, Event, *};
use fs::{FakeFs, LineEnding, RealFs};
use futures::{future, StreamExt};
use globset::Glob;
use gpui::{executor::Deterministic, test::subscribe, AppContext};
use language::{
language_settings::{AllLanguageSettings, LanguageSettingsContent},
@ -505,7 +506,7 @@ async fn test_reporting_fs_changes_to_language_servers(cx: &mut gpui::TestAppCon
register_options: serde_json::to_value(
lsp::DidChangeWatchedFilesRegistrationOptions {
watchers: vec![lsp::FileSystemWatcher {
glob_pattern: "*.{rs,c}".to_string(),
glob_pattern: "/the-root/*.{rs,c}".to_string(),
kind: None,
}],
},
@ -3393,7 +3394,7 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) {
search_query,
false,
true,
vec![glob::Pattern::new("*.odd").unwrap()],
vec![Glob::new("*.odd").unwrap().compile_matcher()],
Vec::new()
),
cx
@ -3411,7 +3412,7 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) {
search_query,
false,
true,
vec![glob::Pattern::new("*.rs").unwrap()],
vec![Glob::new("*.rs").unwrap().compile_matcher()],
Vec::new()
),
cx
@ -3433,8 +3434,8 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) {
false,
true,
vec![
glob::Pattern::new("*.ts").unwrap(),
glob::Pattern::new("*.odd").unwrap(),
Glob::new("*.ts").unwrap().compile_matcher(),
Glob::new("*.odd").unwrap().compile_matcher(),
],
Vec::new()
),
@ -3457,9 +3458,9 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) {
false,
true,
vec![
glob::Pattern::new("*.rs").unwrap(),
glob::Pattern::new("*.ts").unwrap(),
glob::Pattern::new("*.odd").unwrap(),
Glob::new("*.rs").unwrap().compile_matcher(),
Glob::new("*.ts").unwrap().compile_matcher(),
Glob::new("*.odd").unwrap().compile_matcher(),
],
Vec::new()
),
@ -3504,7 +3505,7 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) {
false,
true,
Vec::new(),
vec![glob::Pattern::new("*.odd").unwrap()],
vec![Glob::new("*.odd").unwrap().compile_matcher()],
),
cx
)
@ -3527,7 +3528,7 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) {
false,
true,
Vec::new(),
vec![glob::Pattern::new("*.rs").unwrap()],
vec![Glob::new("*.rs").unwrap().compile_matcher()],
),
cx
)
@ -3549,8 +3550,8 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) {
true,
Vec::new(),
vec![
glob::Pattern::new("*.ts").unwrap(),
glob::Pattern::new("*.odd").unwrap(),
Glob::new("*.ts").unwrap().compile_matcher(),
Glob::new("*.odd").unwrap().compile_matcher(),
],
),
cx
@ -3573,9 +3574,9 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) {
true,
Vec::new(),
vec![
glob::Pattern::new("*.rs").unwrap(),
glob::Pattern::new("*.ts").unwrap(),
glob::Pattern::new("*.odd").unwrap(),
Glob::new("*.rs").unwrap().compile_matcher(),
Glob::new("*.ts").unwrap().compile_matcher(),
Glob::new("*.odd").unwrap().compile_matcher(),
],
),
cx
@ -3612,8 +3613,8 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex
search_query,
false,
true,
vec![glob::Pattern::new("*.odd").unwrap()],
vec![glob::Pattern::new("*.odd").unwrap()],
vec![Glob::new("*.odd").unwrap().compile_matcher()],
vec![Glob::new("*.odd").unwrap().compile_matcher()],
),
cx
)
@ -3630,8 +3631,8 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex
search_query,
false,
true,
vec![glob::Pattern::new("*.ts").unwrap()],
vec![glob::Pattern::new("*.ts").unwrap()],
vec![Glob::new("*.ts").unwrap().compile_matcher()],
vec![Glob::new("*.ts").unwrap().compile_matcher()],
),
cx
)
@ -3649,12 +3650,12 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex
false,
true,
vec![
glob::Pattern::new("*.ts").unwrap(),
glob::Pattern::new("*.odd").unwrap()
Glob::new("*.ts").unwrap().compile_matcher(),
Glob::new("*.odd").unwrap().compile_matcher()
],
vec![
glob::Pattern::new("*.ts").unwrap(),
glob::Pattern::new("*.odd").unwrap()
Glob::new("*.ts").unwrap().compile_matcher(),
Glob::new("*.odd").unwrap().compile_matcher()
],
),
cx
@ -3673,12 +3674,12 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex
false,
true,
vec![
glob::Pattern::new("*.ts").unwrap(),
glob::Pattern::new("*.odd").unwrap()
Glob::new("*.ts").unwrap().compile_matcher(),
Glob::new("*.odd").unwrap().compile_matcher()
],
vec![
glob::Pattern::new("*.rs").unwrap(),
glob::Pattern::new("*.odd").unwrap()
Glob::new("*.rs").unwrap().compile_matcher(),
Glob::new("*.odd").unwrap().compile_matcher()
],
),
cx

View File

@ -1,6 +1,7 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use anyhow::Result;
use client::proto;
use globset::{Glob, GlobMatcher};
use itertools::Itertools;
use language::{char_kind, Rope};
use regex::{Regex, RegexBuilder};
@ -19,8 +20,8 @@ pub enum SearchQuery {
query: Arc<str>,
whole_word: bool,
case_sensitive: bool,
files_to_include: Vec<glob::Pattern>,
files_to_exclude: Vec<glob::Pattern>,
files_to_include: Vec<GlobMatcher>,
files_to_exclude: Vec<GlobMatcher>,
},
Regex {
regex: Regex,
@ -28,8 +29,8 @@ pub enum SearchQuery {
multiline: bool,
whole_word: bool,
case_sensitive: bool,
files_to_include: Vec<glob::Pattern>,
files_to_exclude: Vec<glob::Pattern>,
files_to_include: Vec<GlobMatcher>,
files_to_exclude: Vec<GlobMatcher>,
},
}
@ -38,8 +39,8 @@ impl SearchQuery {
query: impl ToString,
whole_word: bool,
case_sensitive: bool,
files_to_include: Vec<glob::Pattern>,
files_to_exclude: Vec<glob::Pattern>,
files_to_include: Vec<GlobMatcher>,
files_to_exclude: Vec<GlobMatcher>,
) -> Self {
let query = query.to_string();
let search = AhoCorasickBuilder::new()
@ -60,8 +61,8 @@ impl SearchQuery {
query: impl ToString,
whole_word: bool,
case_sensitive: bool,
files_to_include: Vec<glob::Pattern>,
files_to_exclude: Vec<glob::Pattern>,
files_to_include: Vec<GlobMatcher>,
files_to_exclude: Vec<GlobMatcher>,
) -> Result<Self> {
let mut query = query.to_string();
let initial_query = Arc::from(query.as_str());
@ -95,40 +96,16 @@ impl SearchQuery {
message.query,
message.whole_word,
message.case_sensitive,
message
.files_to_include
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| glob::Pattern::new(glob_str))
.collect::<Result<_, _>>()?,
message
.files_to_exclude
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| glob::Pattern::new(glob_str))
.collect::<Result<_, _>>()?,
deserialize_globs(&message.files_to_include)?,
deserialize_globs(&message.files_to_exclude)?,
)
} else {
Ok(Self::text(
message.query,
message.whole_word,
message.case_sensitive,
message
.files_to_include
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| glob::Pattern::new(glob_str))
.collect::<Result<_, _>>()?,
message
.files_to_exclude
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| glob::Pattern::new(glob_str))
.collect::<Result<_, _>>()?,
deserialize_globs(&message.files_to_include)?,
deserialize_globs(&message.files_to_exclude)?,
))
}
}
@ -143,12 +120,12 @@ impl SearchQuery {
files_to_include: self
.files_to_include()
.iter()
.map(ToString::to_string)
.map(|g| g.glob().to_string())
.join(","),
files_to_exclude: self
.files_to_exclude()
.iter()
.map(ToString::to_string)
.map(|g| g.glob().to_string())
.join(","),
}
}
@ -289,7 +266,7 @@ impl SearchQuery {
matches!(self, Self::Regex { .. })
}
pub fn files_to_include(&self) -> &[glob::Pattern] {
pub fn files_to_include(&self) -> &[GlobMatcher] {
match self {
Self::Text {
files_to_include, ..
@ -300,7 +277,7 @@ impl SearchQuery {
}
}
pub fn files_to_exclude(&self) -> &[glob::Pattern] {
pub fn files_to_exclude(&self) -> &[GlobMatcher] {
match self {
Self::Text {
files_to_exclude, ..
@ -317,14 +294,23 @@ impl SearchQuery {
!self
.files_to_exclude()
.iter()
.any(|exclude_glob| exclude_glob.matches_path(file_path))
.any(|exclude_glob| exclude_glob.is_match(file_path))
&& (self.files_to_include().is_empty()
|| self
.files_to_include()
.iter()
.any(|include_glob| include_glob.matches_path(file_path)))
.any(|include_glob| include_glob.is_match(file_path)))
}
None => self.files_to_include().is_empty(),
}
}
}
fn deserialize_globs(glob_set: &str) -> Result<Vec<GlobMatcher>> {
glob_set
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| Ok(Glob::new(glob_str)?.compile_matcher()))
.collect()
}

View File

@ -27,7 +27,7 @@ serde.workspace = true
serde_derive.workspace = true
smallvec.workspace = true
smol.workspace = true
glob.workspace = true
globset.workspace = true
[dev-dependencies]
client = { path = "../client", features = ["test-support"] }

View File

@ -2,12 +2,14 @@ use crate::{
SearchOption, SelectNextMatch, SelectPrevMatch, ToggleCaseSensitive, ToggleRegex,
ToggleWholeWord,
};
use anyhow::Result;
use collections::HashMap;
use editor::{
items::active_match_index, scroll::autoscroll::Autoscroll, Anchor, Editor, MultiBuffer,
SelectAll, MAX_TAB_TITLE_LEN,
};
use futures::StreamExt;
use globset::{Glob, GlobMatcher};
use gpui::{
actions,
elements::*,
@ -571,46 +573,30 @@ impl ProjectSearchView {
fn build_search_query(&mut self, cx: &mut ViewContext<Self>) -> Option<SearchQuery> {
let text = self.query_editor.read(cx).text(cx);
let included_files = match self
.included_files_editor
.read(cx)
.text(cx)
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| glob::Pattern::new(glob_str))
.collect::<Result<_, _>>()
{
Ok(included_files) => {
self.panels_with_errors.remove(&InputPanel::Include);
included_files
}
Err(_e) => {
self.panels_with_errors.insert(InputPanel::Include);
cx.notify();
return None;
}
};
let excluded_files = match self
.excluded_files_editor
.read(cx)
.text(cx)
.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| glob::Pattern::new(glob_str))
.collect::<Result<_, _>>()
{
Ok(excluded_files) => {
self.panels_with_errors.remove(&InputPanel::Exclude);
excluded_files
}
Err(_e) => {
self.panels_with_errors.insert(InputPanel::Exclude);
cx.notify();
return None;
}
};
let included_files =
match Self::load_glob_set(&self.included_files_editor.read(cx).text(cx)) {
Ok(included_files) => {
self.panels_with_errors.remove(&InputPanel::Include);
included_files
}
Err(_e) => {
self.panels_with_errors.insert(InputPanel::Include);
cx.notify();
return None;
}
};
let excluded_files =
match Self::load_glob_set(&self.excluded_files_editor.read(cx).text(cx)) {
Ok(excluded_files) => {
self.panels_with_errors.remove(&InputPanel::Exclude);
excluded_files
}
Err(_e) => {
self.panels_with_errors.insert(InputPanel::Exclude);
cx.notify();
return None;
}
};
if self.regex {
match SearchQuery::regex(
text,
@ -640,6 +626,14 @@ impl ProjectSearchView {
}
}
fn load_glob_set(text: &str) -> Result<Vec<GlobMatcher>> {
text.split(',')
.map(str::trim)
.filter(|glob_str| !glob_str.is_empty())
.map(|glob_str| anyhow::Ok(Glob::new(glob_str)?.compile_matcher()))
.collect()
}
fn select_match(&mut self, direction: Direction, cx: &mut ViewContext<Self>) {
if let Some(index) = self.active_match_index {
let match_ranges = self.model.read(cx).match_ranges.clone();