mirror of
https://github.com/zed-industries/zed.git
synced 2024-11-08 07:35:01 +03:00
project search: Increase perf up to 10x by batching git status
calls (#16936)
***Update**: after rebasing on top of https://github.com/zed-industries/zed/pull/16915/ (that also changed how search worked), the results are still good, but not 10x. Instead of going from 10s to 500ms, it goes from 10s to 3s.* This improves the performance of project-wide search by an order of magnitude. After digging in, @as-cii and I found that opening buffers was the bottleneck for project-wide search (since Zed opens, parses, ... buffers when finding them, which is something VS Code doesn't do, for example). So this PR improves the performance of opening multiple buffers at once. It does this by doing two things: - It batches scan-requests in the worktree. When we search, we search files in chunks of 64. Previously we'd handle all 64 scan requests separately. The new code checks if the scan requests can be batched and if so it, it does that. - It batches `git status` calls when reloading the project entries for the opened buffers. Instead of calling `git status` for each file, it calls `git status` for a batch of files, and then extracts the status for each file. (It has to be said that I think the slow performance on `main` has been a regression introduced over the last few months with the changes made to project/worktree/git. I don't think it was this slow ~5 months ago. But I also don't think it was this fast ~5 months ago.) ## Benchmarks | Search | Before | After (without https://github.com/zed-industries/zed/pull/16915) | After (with https://github.com/zed-industries/zed/pull/16915) |--------|--------|-------|------| | `zed.dev` at `2b2a501192e78e`, searching for `<` (`4484` results) | 3.0s<br>2.9s<br>2.89s | 489ms<br>517ms<br>476ms | n/a | | `zed.dev` at `2b2a501192e78e`, searching for `:` (`25886+` results) | 3.9s<br>3.9s<br>3.8s | 70ms<br>66ms<br>72ms | n/a | | `zed` at `55dda0e6af`, searching for `<` (`10937+` results) | 10s<br>11s<br>12s | 500m<br>499ms<br>543ms | 3.4s<br>3.1s<br> | (All results recorded after doing a warm-up run that would start language servers etc.) Release Notes: - Performance of project-wide search has been improved by up to 10x. --------- Co-authored-by: Antonio <antonio@zed.dev>
This commit is contained in:
parent
d1dceef945
commit
8ec680cecb
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -13856,6 +13856,7 @@ dependencies = [
|
||||
"serde",
|
||||
"serde_json",
|
||||
"settings",
|
||||
"smallvec",
|
||||
"smol",
|
||||
"sum_tree",
|
||||
"text",
|
||||
|
@ -36,11 +36,7 @@ pub trait GitRepository: Send + Sync {
|
||||
/// Returns the SHA of the current HEAD.
|
||||
fn head_sha(&self) -> Option<String>;
|
||||
|
||||
fn statuses(&self, path_prefix: &Path) -> Result<GitStatus>;
|
||||
|
||||
fn status(&self, path: &Path) -> Option<GitFileStatus> {
|
||||
Some(self.statuses(path).ok()?.entries.first()?.1)
|
||||
}
|
||||
fn status(&self, path_prefixes: &[PathBuf]) -> Result<GitStatus>;
|
||||
|
||||
fn branches(&self) -> Result<Vec<Branch>>;
|
||||
fn change_branch(&self, _: &str) -> Result<()>;
|
||||
@ -126,14 +122,14 @@ impl GitRepository for RealGitRepository {
|
||||
Some(self.repository.lock().head().ok()?.target()?.to_string())
|
||||
}
|
||||
|
||||
fn statuses(&self, path_prefix: &Path) -> Result<GitStatus> {
|
||||
fn status(&self, path_prefixes: &[PathBuf]) -> Result<GitStatus> {
|
||||
let working_directory = self
|
||||
.repository
|
||||
.lock()
|
||||
.workdir()
|
||||
.context("failed to read git work directory")?
|
||||
.to_path_buf();
|
||||
GitStatus::new(&self.git_binary_path, &working_directory, path_prefix)
|
||||
GitStatus::new(&self.git_binary_path, &working_directory, path_prefixes)
|
||||
}
|
||||
|
||||
fn branches(&self) -> Result<Vec<Branch>> {
|
||||
@ -245,13 +241,16 @@ impl GitRepository for FakeGitRepository {
|
||||
None
|
||||
}
|
||||
|
||||
fn statuses(&self, path_prefix: &Path) -> Result<GitStatus> {
|
||||
fn status(&self, path_prefixes: &[PathBuf]) -> Result<GitStatus> {
|
||||
let state = self.state.lock();
|
||||
let mut entries = state
|
||||
.worktree_statuses
|
||||
.iter()
|
||||
.filter_map(|(repo_path, status)| {
|
||||
if repo_path.0.starts_with(path_prefix) {
|
||||
if path_prefixes
|
||||
.iter()
|
||||
.any(|path_prefix| repo_path.0.starts_with(path_prefix))
|
||||
{
|
||||
Some((repo_path.to_owned(), *status))
|
||||
} else {
|
||||
None
|
||||
|
@ -15,14 +15,10 @@ impl GitStatus {
|
||||
pub(crate) fn new(
|
||||
git_binary: &Path,
|
||||
working_directory: &Path,
|
||||
mut path_prefix: &Path,
|
||||
path_prefixes: &[PathBuf],
|
||||
) -> Result<Self> {
|
||||
let mut child = Command::new(git_binary);
|
||||
|
||||
if path_prefix == Path::new("") {
|
||||
path_prefix = Path::new(".");
|
||||
}
|
||||
|
||||
child
|
||||
.current_dir(working_directory)
|
||||
.args([
|
||||
@ -32,7 +28,13 @@ impl GitStatus {
|
||||
"--untracked-files=all",
|
||||
"-z",
|
||||
])
|
||||
.arg(path_prefix)
|
||||
.args(path_prefixes.iter().map(|path_prefix| {
|
||||
if *path_prefix == Path::new("") {
|
||||
Path::new(".")
|
||||
} else {
|
||||
path_prefix
|
||||
}
|
||||
}))
|
||||
.stdin(Stdio::null())
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped());
|
||||
|
@ -41,6 +41,7 @@ schemars.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
settings.workspace = true
|
||||
smallvec.workspace = true
|
||||
smol.workspace = true
|
||||
sum_tree.workspace = true
|
||||
text.workspace = true
|
||||
|
@ -38,6 +38,7 @@ use postage::{
|
||||
};
|
||||
use rpc::proto::{self, AnyProtoClient};
|
||||
use settings::{Settings, SettingsLocation, SettingsStore};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use smol::channel::{self, Sender};
|
||||
use std::{
|
||||
any::Any,
|
||||
@ -124,7 +125,7 @@ pub struct LocalWorktree {
|
||||
|
||||
struct ScanRequest {
|
||||
relative_paths: Vec<Arc<Path>>,
|
||||
done: barrier::Sender,
|
||||
done: SmallVec<[barrier::Sender; 1]>,
|
||||
}
|
||||
|
||||
pub struct RemoteWorktree {
|
||||
@ -339,7 +340,7 @@ enum ScanState {
|
||||
Updated {
|
||||
snapshot: LocalSnapshot,
|
||||
changes: UpdatedEntriesSet,
|
||||
barrier: Option<barrier::Sender>,
|
||||
barrier: SmallVec<[barrier::Sender; 1]>,
|
||||
scanning: bool,
|
||||
},
|
||||
}
|
||||
@ -1659,7 +1660,7 @@ impl LocalWorktree {
|
||||
self.scan_requests_tx
|
||||
.try_send(ScanRequest {
|
||||
relative_paths: paths,
|
||||
done: tx,
|
||||
done: smallvec![tx],
|
||||
})
|
||||
.ok();
|
||||
rx
|
||||
@ -2699,7 +2700,7 @@ impl BackgroundScannerState {
|
||||
work_directory: workdir_path,
|
||||
statuses: repo
|
||||
.repo_ptr
|
||||
.statuses(&repo_path)
|
||||
.status(&[repo_path.0])
|
||||
.log_err()
|
||||
.unwrap_or_default(),
|
||||
});
|
||||
@ -3568,7 +3569,7 @@ impl BackgroundScanner {
|
||||
state.snapshot.completed_scan_id = state.snapshot.scan_id;
|
||||
}
|
||||
|
||||
self.send_status_update(false, None);
|
||||
self.send_status_update(false, SmallVec::new());
|
||||
|
||||
// Process any any FS events that occurred while performing the initial scan.
|
||||
// For these events, update events cannot be as precise, because we didn't
|
||||
@ -3588,7 +3589,7 @@ impl BackgroundScanner {
|
||||
select_biased! {
|
||||
// Process any path refresh requests from the worktree. Prioritize
|
||||
// these before handling changes reported by the filesystem.
|
||||
request = self.scan_requests_rx.recv().fuse() => {
|
||||
request = self.next_scan_request().fuse() => {
|
||||
let Ok(request) = request else { break };
|
||||
if !self.process_scan_request(request, false).await {
|
||||
return;
|
||||
@ -3669,7 +3670,7 @@ impl BackgroundScanner {
|
||||
)
|
||||
.await;
|
||||
|
||||
self.send_status_update(scanning, Some(request.done))
|
||||
self.send_status_update(scanning, request.done)
|
||||
}
|
||||
|
||||
async fn process_events(&mut self, mut abs_paths: Vec<PathBuf>) {
|
||||
@ -3777,7 +3778,7 @@ impl BackgroundScanner {
|
||||
#[cfg(test)]
|
||||
self.state.lock().snapshot.check_git_invariants();
|
||||
|
||||
self.send_status_update(false, None);
|
||||
self.send_status_update(false, SmallVec::new());
|
||||
}
|
||||
|
||||
async fn forcibly_load_paths(&self, paths: &[Arc<Path>]) -> bool {
|
||||
@ -3834,7 +3835,7 @@ impl BackgroundScanner {
|
||||
select_biased! {
|
||||
// Process any path refresh requests before moving on to process
|
||||
// the scan queue, so that user operations are prioritized.
|
||||
request = self.scan_requests_rx.recv().fuse() => {
|
||||
request = self.next_scan_request().fuse() => {
|
||||
let Ok(request) = request else { break };
|
||||
if !self.process_scan_request(request, true).await {
|
||||
return;
|
||||
@ -3853,7 +3854,7 @@ impl BackgroundScanner {
|
||||
) {
|
||||
Ok(_) => {
|
||||
last_progress_update_count += 1;
|
||||
self.send_status_update(true, None);
|
||||
self.send_status_update(true, SmallVec::new());
|
||||
}
|
||||
Err(count) => {
|
||||
last_progress_update_count = count;
|
||||
@ -3879,7 +3880,7 @@ impl BackgroundScanner {
|
||||
.await;
|
||||
}
|
||||
|
||||
fn send_status_update(&self, scanning: bool, barrier: Option<barrier::Sender>) -> bool {
|
||||
fn send_status_update(&self, scanning: bool, barrier: SmallVec<[barrier::Sender; 1]>) -> bool {
|
||||
let mut state = self.state.lock();
|
||||
if state.changed_paths.is_empty() && scanning {
|
||||
return true;
|
||||
@ -3954,7 +3955,7 @@ impl BackgroundScanner {
|
||||
if let Some((work_directory, repository)) = repo {
|
||||
let t0 = Instant::now();
|
||||
let statuses = repository
|
||||
.statuses(Path::new(""))
|
||||
.status(&[PathBuf::from("")])
|
||||
.log_err()
|
||||
.unwrap_or_default();
|
||||
log::trace!("computed git status in {:?}", t0.elapsed());
|
||||
@ -4167,6 +4168,32 @@ impl BackgroundScanner {
|
||||
}
|
||||
}
|
||||
|
||||
// Group all relative paths by their git repository.
|
||||
let mut paths_by_git_repo = HashMap::default();
|
||||
for relative_path in relative_paths.iter() {
|
||||
if let Some((repo_entry, repo)) = state.snapshot.repo_for_path(relative_path) {
|
||||
if let Ok(repo_path) = repo_entry.relativize(&state.snapshot, relative_path) {
|
||||
paths_by_git_repo
|
||||
.entry(repo.git_dir_path.clone())
|
||||
.or_insert_with(|| RepoPaths {
|
||||
repo: repo.repo_ptr.clone(),
|
||||
repo_paths: Vec::new(),
|
||||
relative_paths: Vec::new(),
|
||||
})
|
||||
.add_paths(&relative_path, repo_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now call `git status` once per repository and collect each file's git status.
|
||||
let mut git_statuses_by_relative_path =
|
||||
paths_by_git_repo
|
||||
.into_values()
|
||||
.fold(HashMap::default(), |mut map, repo_paths| {
|
||||
map.extend(repo_paths.into_git_file_statuses());
|
||||
map
|
||||
});
|
||||
|
||||
for (path, metadata) in relative_paths.iter().zip(metadata.into_iter()) {
|
||||
let abs_path: Arc<Path> = root_abs_path.join(&path).into();
|
||||
match metadata {
|
||||
@ -4192,15 +4219,7 @@ impl BackgroundScanner {
|
||||
fs_entry.is_external = is_external;
|
||||
fs_entry.is_private = self.is_path_private(path);
|
||||
|
||||
if !is_dir && !fs_entry.is_ignored && !fs_entry.is_external {
|
||||
if let Some((repo_entry, repo)) = state.snapshot.repo_for_path(path) {
|
||||
if let Ok(repo_path) = repo_entry.relativize(&state.snapshot, path) {
|
||||
fs_entry.git_status = repo.repo_ptr.status(&repo_path);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let (Some(scan_queue_tx), true) = (&scan_queue_tx, fs_entry.is_dir()) {
|
||||
if let (Some(scan_queue_tx), true) = (&scan_queue_tx, is_dir) {
|
||||
if state.should_scan_directory(&fs_entry)
|
||||
|| (fs_entry.path.as_os_str().is_empty()
|
||||
&& abs_path.file_name() == Some(*DOT_GIT))
|
||||
@ -4211,7 +4230,11 @@ impl BackgroundScanner {
|
||||
}
|
||||
}
|
||||
|
||||
state.insert_entry(fs_entry, self.fs.as_ref());
|
||||
if !is_dir && !fs_entry.is_ignored && !fs_entry.is_external {
|
||||
fs_entry.git_status = git_statuses_by_relative_path.remove(path);
|
||||
}
|
||||
|
||||
state.insert_entry(fs_entry.clone(), self.fs.as_ref());
|
||||
}
|
||||
Ok(None) => {
|
||||
self.remove_repo_path(path, &mut state.snapshot);
|
||||
@ -4310,7 +4333,7 @@ impl BackgroundScanner {
|
||||
select_biased! {
|
||||
// Process any path refresh requests before moving on to process
|
||||
// the queue of ignore statuses.
|
||||
request = self.scan_requests_rx.recv().fuse() => {
|
||||
request = self.next_scan_request().fuse() => {
|
||||
let Ok(request) = request else { break };
|
||||
if !self.process_scan_request(request, true).await {
|
||||
return;
|
||||
@ -4380,7 +4403,12 @@ impl BackgroundScanner {
|
||||
if !entry.is_dir() && !entry.is_ignored && !entry.is_external {
|
||||
if let Some((ref repo_entry, local_repo)) = repo {
|
||||
if let Ok(repo_path) = repo_entry.relativize(&snapshot, &entry.path) {
|
||||
entry.git_status = local_repo.repo_ptr.status(&repo_path);
|
||||
let status = local_repo
|
||||
.repo_ptr
|
||||
.status(&[repo_path.0.clone()])
|
||||
.ok()
|
||||
.and_then(|status| status.get(&repo_path));
|
||||
entry.git_status = status;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -4519,7 +4547,7 @@ impl BackgroundScanner {
|
||||
select_biased! {
|
||||
// Process any path refresh requests before moving on to process
|
||||
// the queue of git statuses.
|
||||
request = self.scan_requests_rx.recv().fuse() => {
|
||||
request = self.next_scan_request().fuse() => {
|
||||
let Ok(request) = request else { break };
|
||||
if !self.process_scan_request(request, true).await {
|
||||
return;
|
||||
@ -4537,7 +4565,7 @@ impl BackgroundScanner {
|
||||
fn update_git_statuses(&self, job: UpdateGitStatusesJob) {
|
||||
log::trace!("updating git statuses for repo {:?}", job.work_directory.0);
|
||||
let t0 = Instant::now();
|
||||
let Some(statuses) = job.repository.statuses(Path::new("")).log_err() else {
|
||||
let Some(statuses) = job.repository.status(&[PathBuf::from("")]).log_err() else {
|
||||
return;
|
||||
};
|
||||
log::trace!(
|
||||
@ -4726,6 +4754,15 @@ impl BackgroundScanner {
|
||||
fn is_path_private(&self, path: &Path) -> bool {
|
||||
!self.share_private_files && self.settings.is_path_private(path)
|
||||
}
|
||||
|
||||
async fn next_scan_request(&self) -> Result<ScanRequest> {
|
||||
let mut request = self.scan_requests_rx.recv().await?;
|
||||
while let Ok(next_request) = self.scan_requests_rx.try_recv() {
|
||||
request.relative_paths.extend(next_request.relative_paths);
|
||||
request.done.extend(next_request.done);
|
||||
}
|
||||
Ok(request)
|
||||
}
|
||||
}
|
||||
|
||||
fn swap_to_front(child_paths: &mut Vec<PathBuf>, file: &OsStr) {
|
||||
@ -4748,6 +4785,31 @@ fn char_bag_for_path(root_char_bag: CharBag, path: &Path) -> CharBag {
|
||||
result
|
||||
}
|
||||
|
||||
struct RepoPaths {
|
||||
repo: Arc<dyn GitRepository>,
|
||||
relative_paths: Vec<Arc<Path>>,
|
||||
repo_paths: Vec<PathBuf>,
|
||||
}
|
||||
|
||||
impl RepoPaths {
|
||||
fn add_paths(&mut self, relative_path: &Arc<Path>, repo_path: RepoPath) {
|
||||
self.relative_paths.push(relative_path.clone());
|
||||
self.repo_paths.push(repo_path.0);
|
||||
}
|
||||
|
||||
fn into_git_file_statuses(self) -> HashMap<Arc<Path>, GitFileStatus> {
|
||||
let mut statuses = HashMap::default();
|
||||
if let Ok(status) = self.repo.status(&self.repo_paths) {
|
||||
for (repo_path, relative_path) in self.repo_paths.into_iter().zip(self.relative_paths) {
|
||||
if let Some(path_status) = status.get(&repo_path) {
|
||||
statuses.insert(relative_path, path_status);
|
||||
}
|
||||
}
|
||||
}
|
||||
statuses
|
||||
}
|
||||
}
|
||||
|
||||
struct ScanJob {
|
||||
abs_path: Arc<Path>,
|
||||
path: Arc<Path>,
|
||||
|
Loading…
Reference in New Issue
Block a user