Fix git repository state corruption when work dir's metadata is updated (#16926)

Fixes https://github.com/zed-industries/zed/issues/13176

Release Notes:

- Fixed an issue where git state would stop updating if the root
directory of a git repository was updated in certain ways
This commit is contained in:
Max Brunsfeld 2024-08-26 17:46:50 -07:00 committed by GitHub
parent 2de420a67b
commit bea6786f14
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 191 additions and 47 deletions

View File

@ -846,6 +846,35 @@ impl FakeFs {
state.next_mtime = next_mtime;
}
pub async fn touch_path(&self, path: impl AsRef<Path>) {
let mut state = self.state.lock();
let path = path.as_ref();
let new_mtime = state.next_mtime;
let new_inode = state.next_inode;
state.next_inode += 1;
state.next_mtime += Duration::from_nanos(1);
state
.write_path(path, move |entry| {
match entry {
btree_map::Entry::Vacant(e) => {
e.insert(Arc::new(Mutex::new(FakeFsEntry::File {
inode: new_inode,
mtime: new_mtime,
content: Vec::new(),
})));
}
btree_map::Entry::Occupied(mut e) => match &mut *e.get_mut().lock() {
FakeFsEntry::File { mtime, .. } => *mtime = new_mtime,
FakeFsEntry::Dir { mtime, .. } => *mtime = new_mtime,
FakeFsEntry::Symlink { .. } => {}
},
}
Ok(())
})
.unwrap();
state.emit_event([path.to_path_buf()]);
}
pub async fn insert_file(&self, path: impl AsRef<Path>, content: Vec<u8>) {
self.write_file_internal(path, content).unwrap()
}

View File

@ -41,7 +41,8 @@ use settings::{Settings, SettingsLocation, SettingsStore};
use smol::channel::{self, Sender};
use std::{
any::Any,
cmp::{self, Ordering},
cmp::Ordering,
collections::hash_map,
convert::TryFrom,
ffi::OsStr,
fmt,
@ -299,7 +300,7 @@ struct BackgroundScannerState {
/// as part of the current update. These entry ids may be re-used
/// if the same inode is discovered at a new path, or if the given
/// path is re-created after being deleted.
removed_entry_ids: HashMap<(u64, SystemTime), ProjectEntryId>,
removed_entries: HashMap<u64, Entry>,
changed_paths: Vec<Arc<Path>>,
prev_snapshot: Snapshot,
}
@ -1022,7 +1023,7 @@ impl LocalWorktree {
scanned_dirs: Default::default(),
path_prefixes_to_scan: Default::default(),
paths_to_scan: Default::default(),
removed_entry_ids: Default::default(),
removed_entries: Default::default(),
changed_paths: Default::default(),
}),
phase: BackgroundScannerPhase::InitialScan,
@ -2636,6 +2637,26 @@ impl LocalSnapshot {
}
}
#[cfg(test)]
fn check_git_invariants(&self) {
let dotgit_paths = self
.git_repositories
.iter()
.map(|repo| repo.1.git_dir_path.clone())
.collect::<HashSet<_>>();
let work_dir_paths = self
.repository_entries
.iter()
.map(|repo| repo.0.clone().0)
.collect::<HashSet<_>>();
assert_eq!(dotgit_paths.len(), work_dir_paths.len());
assert_eq!(self.repository_entries.iter().count(), work_dir_paths.len());
assert_eq!(self.git_repositories.iter().count(), work_dir_paths.len());
for (_, entry) in self.repository_entries.iter() {
self.git_repositories.get(&entry.work_directory).unwrap();
}
}
#[cfg(test)]
pub fn entries_without_ids(&self, include_ignored: bool) -> Vec<(&Path, u64, bool)> {
let mut paths = Vec::new();
@ -2704,8 +2725,17 @@ impl BackgroundScannerState {
fn reuse_entry_id(&mut self, entry: &mut Entry) {
if let Some(mtime) = entry.mtime {
if let Some(removed_entry_id) = self.removed_entry_ids.remove(&(entry.inode, mtime)) {
entry.id = removed_entry_id;
// If an entry with the same inode was removed from the worktree during this scan,
// then it *might* represent the same file or directory. But the OS might also have
// re-used the inode for a completely different file or directory.
//
// Conditionally reuse the old entry's id:
// * if the mtime is the same, the file was probably been renamed.
// * if the path is the same, the file may just have been updated
if let Some(removed_entry) = self.removed_entries.remove(&entry.inode) {
if removed_entry.mtime == Some(mtime) || removed_entry.path == entry.path {
entry.id = removed_entry.id;
}
} else if let Some(existing_entry) = self.snapshot.entry_for_path(&entry.path) {
entry.id = existing_entry.id;
}
@ -2797,30 +2827,47 @@ impl BackgroundScannerState {
}
self.snapshot.entries_by_path = new_entries;
let mut entries_by_id_edits = Vec::new();
let mut removed_ids = Vec::with_capacity(removed_entries.summary().count);
for entry in removed_entries.cursor::<()>() {
if let Some(mtime) = entry.mtime {
let removed_entry_id = self
.removed_entry_ids
.entry((entry.inode, mtime))
.or_insert(entry.id);
*removed_entry_id = cmp::max(*removed_entry_id, entry.id);
match self.removed_entries.entry(entry.inode) {
hash_map::Entry::Occupied(mut e) => {
let prev_removed_entry = e.get_mut();
if entry.id > prev_removed_entry.id {
*prev_removed_entry = entry.clone();
}
}
hash_map::Entry::Vacant(e) => {
e.insert(entry.clone());
}
}
entries_by_id_edits.push(Edit::Remove(entry.id));
}
self.snapshot.entries_by_id.edit(entries_by_id_edits, &());
if path.file_name() == Some(&GITIGNORE) {
let abs_parent_path = self.snapshot.abs_path.join(path.parent().unwrap());
if let Some((_, needs_update)) = self
.snapshot
.ignores_by_parent_abs_path
.get_mut(abs_parent_path.as_path())
{
*needs_update = true;
if entry.path.file_name() == Some(&GITIGNORE) {
let abs_parent_path = self.snapshot.abs_path.join(entry.path.parent().unwrap());
if let Some((_, needs_update)) = self
.snapshot
.ignores_by_parent_abs_path
.get_mut(abs_parent_path.as_path())
{
*needs_update = true;
}
}
if let Err(ix) = removed_ids.binary_search(&entry.id) {
removed_ids.insert(ix, entry.id);
}
}
self.snapshot.entries_by_id.edit(
removed_ids.iter().map(|&id| Edit::Remove(id)).collect(),
&(),
);
self.snapshot
.git_repositories
.retain(|id, _| removed_ids.binary_search(&id).is_err());
self.snapshot
.repository_entries
.retain(|repo_path, _| !repo_path.0.starts_with(path));
#[cfg(test)]
self.snapshot.check_invariants(false);
}
@ -3722,11 +3769,14 @@ impl BackgroundScanner {
{
let mut state = self.state.lock();
state.snapshot.completed_scan_id = state.snapshot.scan_id;
for (_, entry_id) in mem::take(&mut state.removed_entry_ids) {
state.scanned_dirs.remove(&entry_id);
for (_, entry) in mem::take(&mut state.removed_entries) {
state.scanned_dirs.remove(&entry.id);
}
}
#[cfg(test)]
self.state.lock().snapshot.check_git_invariants();
self.send_status_update(false, None);
}
@ -4139,7 +4189,6 @@ impl BackgroundScanner {
let is_dir = fs_entry.is_dir();
fs_entry.is_ignored = ignore_stack.is_abs_path_ignored(&abs_path, is_dir);
fs_entry.is_external = is_external;
fs_entry.is_private = self.is_path_private(path);
@ -4168,7 +4217,6 @@ impl BackgroundScanner {
self.remove_repo_path(path, &mut state.snapshot);
}
Err(err) => {
// TODO - create a special 'error' entry in the entries tree to mark this
log::error!("error reading file {abs_path:?} on event: {err:#}");
}
}
@ -4198,9 +4246,6 @@ impl BackgroundScanner {
}
}
// TODO statuses
// Track when a .git is removed and iterate over the file system there
Some(())
}

View File

@ -1212,6 +1212,76 @@ async fn test_create_directory_during_initial_scan(cx: &mut TestAppContext) {
);
}
#[gpui::test]
async fn test_bump_mtime_of_git_repo_workdir(cx: &mut TestAppContext) {
init_test(cx);
// Create a worktree with a git directory.
let fs = FakeFs::new(cx.background_executor.clone());
fs.insert_tree(
"/root",
json!({
".git": {},
"a.txt": "",
"b": {
"c.txt": "",
},
}),
)
.await;
let tree = Worktree::local(
"/root".as_ref(),
true,
fs.clone(),
Default::default(),
&mut cx.to_async(),
)
.await
.unwrap();
cx.executor().run_until_parked();
let (old_entry_ids, old_mtimes) = tree.read_with(cx, |tree, _| {
(
tree.entries(true, 0).map(|e| e.id).collect::<Vec<_>>(),
tree.entries(true, 0).map(|e| e.mtime).collect::<Vec<_>>(),
)
});
// Regression test: after the directory is scanned, touch the git repo's
// working directory, bumping its mtime. That directory keeps its project
// entry id after the directories are re-scanned.
fs.touch_path("/root").await;
cx.executor().run_until_parked();
let (new_entry_ids, new_mtimes) = tree.read_with(cx, |tree, _| {
(
tree.entries(true, 0).map(|e| e.id).collect::<Vec<_>>(),
tree.entries(true, 0).map(|e| e.mtime).collect::<Vec<_>>(),
)
});
assert_eq!(new_entry_ids, old_entry_ids);
assert_ne!(new_mtimes, old_mtimes);
// Regression test: changes to the git repository should still be
// detected.
fs.set_status_for_repo_via_git_operation(
&Path::new("/root/.git"),
&[(Path::new("b/c.txt"), GitFileStatus::Modified)],
);
cx.executor().run_until_parked();
let snapshot = tree.read_with(cx, |tree, _| tree.snapshot());
check_propagated_statuses(
&snapshot,
&[
(Path::new(""), Some(GitFileStatus::Modified)),
(Path::new("a.txt"), None),
(Path::new("b/c.txt"), Some(GitFileStatus::Modified)),
],
);
}
#[gpui::test]
async fn test_create_dir_all_on_create_entry(cx: &mut TestAppContext) {
init_test(cx);
@ -2409,25 +2479,25 @@ async fn test_propagate_git_statuses(cx: &mut TestAppContext) {
(Path::new("f/no-status.txt"), None),
],
);
}
#[track_caller]
fn check_propagated_statuses(
snapshot: &Snapshot,
expected_statuses: &[(&Path, Option<GitFileStatus>)],
) {
let mut entries = expected_statuses
#[track_caller]
fn check_propagated_statuses(
snapshot: &Snapshot,
expected_statuses: &[(&Path, Option<GitFileStatus>)],
) {
let mut entries = expected_statuses
.iter()
.map(|(path, _)| snapshot.entry_for_path(path).unwrap().clone())
.collect::<Vec<_>>();
snapshot.propagate_git_statuses(&mut entries);
assert_eq!(
entries
.iter()
.map(|(path, _)| snapshot.entry_for_path(path).unwrap().clone())
.collect::<Vec<_>>();
snapshot.propagate_git_statuses(&mut entries);
assert_eq!(
entries
.iter()
.map(|e| (e.path.as_ref(), e.git_status))
.collect::<Vec<_>>(),
expected_statuses
);
}
.map(|e| (e.path.as_ref(), e.git_status))
.collect::<Vec<_>>(),
expected_statuses
);
}
#[track_caller]