Simplify and improve concurrency of git status updates (#12513)

The quest for responsiveness in large git repos continues. This is a
follow-up to https://github.com/zed-industries/zed/pull/12444

Release Notes:

- N/A
This commit is contained in:
Max Brunsfeld 2024-05-31 09:10:09 -07:00 committed by GitHub
parent 356fcec337
commit d12b8c3945
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -78,8 +78,6 @@ pub const FS_WATCH_LATENCY: Duration = Duration::from_millis(100);
#[cfg(not(feature = "test-support"))] #[cfg(not(feature = "test-support"))]
pub const FS_WATCH_LATENCY: Duration = Duration::from_millis(100); pub const FS_WATCH_LATENCY: Duration = Duration::from_millis(100);
const GIT_STATUS_UPDATE_BATCH_SIZE: usize = 1024;
#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)] #[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)]
pub struct WorktreeId(usize); pub struct WorktreeId(usize);
@ -4293,7 +4291,7 @@ impl BackgroundScanner {
async fn update_git_repositories(&self, dot_git_paths: Vec<PathBuf>) { async fn update_git_repositories(&self, dot_git_paths: Vec<PathBuf>) {
log::debug!("reloading repositories: {dot_git_paths:?}"); log::debug!("reloading repositories: {dot_git_paths:?}");
let (update_job_tx, update_job_rx) = channel::unbounded(); let mut repo_updates = Vec::new();
{ {
let mut state = self.state.lock(); let mut state = self.state.lock();
let scan_id = state.snapshot.scan_id; let scan_id = state.snapshot.scan_id;
@ -4308,7 +4306,7 @@ impl BackgroundScanner {
.then(|| (*entry_id, repo.clone())) .then(|| (*entry_id, repo.clone()))
}); });
let (work_dir, repository) = match existing_repository_entry { let (work_directory, repository) = match existing_repository_entry {
None => { None => {
match state.build_git_repository(dot_git_dir.into(), self.fs.as_ref()) { match state.build_git_repository(dot_git_dir.into(), self.fs.as_ref()) {
Some(output) => output, Some(output) => output,
@ -4327,7 +4325,6 @@ impl BackgroundScanner {
continue; continue;
}; };
log::info!("reload git repository {dot_git_dir:?}");
let repo = &repository.repo_ptr; let repo = &repository.repo_ptr;
let branch = repo.branch_name(); let branch = repo.branch_name();
repo.reload_index(); repo.reload_index();
@ -4345,41 +4342,16 @@ impl BackgroundScanner {
} }
}; };
let statuses = repository repo_updates.push(UpdateGitStatusesJob {
.statuses(Path::new("")) location_in_repo: state
.log_err()
.unwrap_or_default();
let entries = state.snapshot.entries_by_path.clone();
let location_in_repo = state
.snapshot
.repository_entries
.get(&work_dir)
.and_then(|repo| repo.location_in_repo.clone());
let mut files =
state
.snapshot .snapshot
.traverse_from_path(true, false, false, work_dir.0.as_ref()); .repository_entries
let mut start_path = work_dir.0.clone(); .get(&work_directory)
while start_path.starts_with(&work_dir.0) { .and_then(|repo| repo.location_in_repo.clone())
files.advance_by(GIT_STATUS_UPDATE_BATCH_SIZE); .clone(),
let end_path = files.entry().map(|e| e.path.clone()); work_directory,
smol::block_on(update_job_tx.send(UpdateGitStatusesJob { repository,
start_path: start_path.clone(), });
end_path: end_path.clone(),
entries: entries.clone(),
location_in_repo: location_in_repo.clone(),
containing_repository: ScanJobContainingRepository {
work_directory: work_dir.clone(),
statuses: statuses.clone(),
},
}))
.unwrap();
if let Some(end_path) = end_path {
start_path = end_path;
} else {
break;
}
}
} }
// Remove any git repositories whose .git entry no longer exists. // Remove any git repositories whose .git entry no longer exists.
@ -4414,87 +4386,92 @@ impl BackgroundScanner {
.repository_entries .repository_entries
.retain(|_, entry| ids_to_preserve.contains(&entry.work_directory.0)); .retain(|_, entry| ids_to_preserve.contains(&entry.work_directory.0));
} }
drop(update_job_tx);
let (mut updates_done_tx, mut updates_done_rx) = barrier::channel();
self.executor self.executor
.scoped(|scope| { .scoped(|scope| {
for _ in 0..self.executor.num_cpus() { scope.spawn(async {
scope.spawn(async { for repo_update in repo_updates {
loop { self.update_git_statuses(repo_update);
select_biased! { }
// Process any path refresh requests before moving on to process updates_done_tx.blocking_send(()).ok();
// the queue of git statuses. });
request = self.scan_requests_rx.recv().fuse() => {
let Ok(request) = request else { break };
if !self.process_scan_request(request, true).await {
return;
}
}
// Process git status updates in batches. scope.spawn(async {
job = update_job_rx.recv().fuse() => { loop {
let Ok(job) = job else { break }; select_biased! {
self.update_git_statuses(job); // Process any path refresh requests before moving on to process
// the queue of git statuses.
request = self.scan_requests_rx.recv().fuse() => {
let Ok(request) = request else { break };
if !self.process_scan_request(request, true).await {
return;
} }
} }
_ = updates_done_rx.recv().fuse() => break,
} }
}); }
} });
}) })
.await; .await;
} }
/// Update the git statuses for a given batch of entries. /// Update the git statuses for a given batch of entries.
fn update_git_statuses(&self, job: UpdateGitStatusesJob) { fn update_git_statuses(&self, job: UpdateGitStatusesJob) {
// Determine which entries in this batch have changed their git status. log::trace!("updating git statuses for repo {:?}", job.work_directory.0);
let t0 = Instant::now(); let t0 = Instant::now();
let mut edits = Vec::new(); let Some(statuses) = job.repository.statuses(Path::new("")).log_err() else {
for entry in Traversal::new(&job.entries, true, false, false, &job.start_path) { return;
if job };
.end_path log::trace!(
.as_ref() "computed git statuses for repo {:?} in {:?}",
.map_or(false, |end| &entry.path >= end) job.work_directory.0,
{ t0.elapsed()
);
let t0 = Instant::now();
let mut changes = Vec::new();
let snapshot = self.state.lock().snapshot.snapshot.clone();
for file in snapshot.traverse_from_path(true, false, false, job.work_directory.0.as_ref()) {
let Ok(repo_path) = file.path.strip_prefix(&job.work_directory.0) else {
break; break;
}
let Ok(repo_path) = entry
.path
.strip_prefix(&job.containing_repository.work_directory)
else {
continue;
}; };
let repo_path = RepoPath(if let Some(location) = &job.location_in_repo { let git_status = if let Some(location) = &job.location_in_repo {
location.join(repo_path) statuses.get(&location.join(repo_path))
} else { } else {
repo_path.to_path_buf() statuses.get(&repo_path)
}); };
let git_status = job.containing_repository.statuses.get(&repo_path); if file.git_status != git_status {
if entry.git_status != git_status { let mut entry = file.clone();
let mut entry = entry.clone();
entry.git_status = git_status; entry.git_status = git_status;
edits.push(Edit::Insert(entry)); changes.push((entry.path, git_status));
} }
} }
let mut state = self.state.lock();
let edits = changes
.iter()
.filter_map(|(path, git_status)| {
let entry = state.snapshot.entry_for_path(path)?.clone();
Some(Edit::Insert(Entry {
git_status: *git_status,
..entry.clone()
}))
})
.collect();
// Apply the git status changes. // Apply the git status changes.
if edits.len() > 0 { util::extend_sorted(
let mut state = self.state.lock(); &mut state.changed_paths,
let path_changes = edits.iter().map(|edit| { changes.iter().map(|p| p.0.clone()),
if let Edit::Insert(entry) = edit { usize::MAX,
entry.path.clone() Ord::cmp,
} else { );
unreachable!() state.snapshot.entries_by_path.edit(edits, &());
}
});
util::extend_sorted(&mut state.changed_paths, path_changes, usize::MAX, Ord::cmp);
state.snapshot.entries_by_path.edit(edits, &());
}
log::trace!( log::trace!(
"refreshed git status of entries starting with {} in {:?}", "applied git status updates for repo {:?} in {:?}",
// entries.len(), job.work_directory.0,
job.start_path.display(), t0.elapsed(),
t0.elapsed()
); );
} }
@ -4664,11 +4641,9 @@ struct UpdateIgnoreStatusJob {
} }
struct UpdateGitStatusesJob { struct UpdateGitStatusesJob {
entries: SumTree<Entry>, work_directory: RepositoryWorkDirectory,
start_path: Arc<Path>,
end_path: Option<Arc<Path>>,
containing_repository: ScanJobContainingRepository,
location_in_repo: Option<Arc<Path>>, location_in_repo: Option<Arc<Path>>,
repository: Arc<dyn GitRepository>,
} }
pub trait WorktreeModelHandle { pub trait WorktreeModelHandle {