From bea6786f14050755810449143b9cfc0e35d9bb0d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 26 Aug 2024 17:46:50 -0700 Subject: [PATCH] Fix git repository state corruption when work dir's metadata is updated (#16926) Fixes https://github.com/zed-industries/zed/issues/13176 Release Notes: - Fixed an issue where git state would stop updating if the root directory of a git repository was updated in certain ways --- crates/fs/src/fs.rs | 29 +++++++ crates/worktree/src/worktree.rs | 105 ++++++++++++++++++-------- crates/worktree/src/worktree_tests.rs | 104 ++++++++++++++++++++----- 3 files changed, 191 insertions(+), 47 deletions(-) diff --git a/crates/fs/src/fs.rs b/crates/fs/src/fs.rs index 804d029c98..4dab145128 100644 --- a/crates/fs/src/fs.rs +++ b/crates/fs/src/fs.rs @@ -846,6 +846,35 @@ impl FakeFs { state.next_mtime = next_mtime; } + pub async fn touch_path(&self, path: impl AsRef) { + let mut state = self.state.lock(); + let path = path.as_ref(); + let new_mtime = state.next_mtime; + let new_inode = state.next_inode; + state.next_inode += 1; + state.next_mtime += Duration::from_nanos(1); + state + .write_path(path, move |entry| { + match entry { + btree_map::Entry::Vacant(e) => { + e.insert(Arc::new(Mutex::new(FakeFsEntry::File { + inode: new_inode, + mtime: new_mtime, + content: Vec::new(), + }))); + } + btree_map::Entry::Occupied(mut e) => match &mut *e.get_mut().lock() { + FakeFsEntry::File { mtime, .. } => *mtime = new_mtime, + FakeFsEntry::Dir { mtime, .. } => *mtime = new_mtime, + FakeFsEntry::Symlink { .. } => {} + }, + } + Ok(()) + }) + .unwrap(); + state.emit_event([path.to_path_buf()]); + } + pub async fn insert_file(&self, path: impl AsRef, content: Vec) { self.write_file_internal(path, content).unwrap() } diff --git a/crates/worktree/src/worktree.rs b/crates/worktree/src/worktree.rs index fecfdb11fe..1a11f9b27d 100644 --- a/crates/worktree/src/worktree.rs +++ b/crates/worktree/src/worktree.rs @@ -41,7 +41,8 @@ use settings::{Settings, SettingsLocation, SettingsStore}; use smol::channel::{self, Sender}; use std::{ any::Any, - cmp::{self, Ordering}, + cmp::Ordering, + collections::hash_map, convert::TryFrom, ffi::OsStr, fmt, @@ -299,7 +300,7 @@ struct BackgroundScannerState { /// as part of the current update. These entry ids may be re-used /// if the same inode is discovered at a new path, or if the given /// path is re-created after being deleted. - removed_entry_ids: HashMap<(u64, SystemTime), ProjectEntryId>, + removed_entries: HashMap, changed_paths: Vec>, prev_snapshot: Snapshot, } @@ -1022,7 +1023,7 @@ impl LocalWorktree { scanned_dirs: Default::default(), path_prefixes_to_scan: Default::default(), paths_to_scan: Default::default(), - removed_entry_ids: Default::default(), + removed_entries: Default::default(), changed_paths: Default::default(), }), phase: BackgroundScannerPhase::InitialScan, @@ -2636,6 +2637,26 @@ impl LocalSnapshot { } } + #[cfg(test)] + fn check_git_invariants(&self) { + let dotgit_paths = self + .git_repositories + .iter() + .map(|repo| repo.1.git_dir_path.clone()) + .collect::>(); + let work_dir_paths = self + .repository_entries + .iter() + .map(|repo| repo.0.clone().0) + .collect::>(); + assert_eq!(dotgit_paths.len(), work_dir_paths.len()); + assert_eq!(self.repository_entries.iter().count(), work_dir_paths.len()); + assert_eq!(self.git_repositories.iter().count(), work_dir_paths.len()); + for (_, entry) in self.repository_entries.iter() { + self.git_repositories.get(&entry.work_directory).unwrap(); + } + } + #[cfg(test)] pub fn entries_without_ids(&self, include_ignored: bool) -> Vec<(&Path, u64, bool)> { let mut paths = Vec::new(); @@ -2704,8 +2725,17 @@ impl BackgroundScannerState { fn reuse_entry_id(&mut self, entry: &mut Entry) { if let Some(mtime) = entry.mtime { - if let Some(removed_entry_id) = self.removed_entry_ids.remove(&(entry.inode, mtime)) { - entry.id = removed_entry_id; + // If an entry with the same inode was removed from the worktree during this scan, + // then it *might* represent the same file or directory. But the OS might also have + // re-used the inode for a completely different file or directory. + // + // Conditionally reuse the old entry's id: + // * if the mtime is the same, the file was probably been renamed. + // * if the path is the same, the file may just have been updated + if let Some(removed_entry) = self.removed_entries.remove(&entry.inode) { + if removed_entry.mtime == Some(mtime) || removed_entry.path == entry.path { + entry.id = removed_entry.id; + } } else if let Some(existing_entry) = self.snapshot.entry_for_path(&entry.path) { entry.id = existing_entry.id; } @@ -2797,30 +2827,47 @@ impl BackgroundScannerState { } self.snapshot.entries_by_path = new_entries; - let mut entries_by_id_edits = Vec::new(); + let mut removed_ids = Vec::with_capacity(removed_entries.summary().count); for entry in removed_entries.cursor::<()>() { - if let Some(mtime) = entry.mtime { - let removed_entry_id = self - .removed_entry_ids - .entry((entry.inode, mtime)) - .or_insert(entry.id); - *removed_entry_id = cmp::max(*removed_entry_id, entry.id); + match self.removed_entries.entry(entry.inode) { + hash_map::Entry::Occupied(mut e) => { + let prev_removed_entry = e.get_mut(); + if entry.id > prev_removed_entry.id { + *prev_removed_entry = entry.clone(); + } + } + hash_map::Entry::Vacant(e) => { + e.insert(entry.clone()); + } } - entries_by_id_edits.push(Edit::Remove(entry.id)); - } - self.snapshot.entries_by_id.edit(entries_by_id_edits, &()); - if path.file_name() == Some(&GITIGNORE) { - let abs_parent_path = self.snapshot.abs_path.join(path.parent().unwrap()); - if let Some((_, needs_update)) = self - .snapshot - .ignores_by_parent_abs_path - .get_mut(abs_parent_path.as_path()) - { - *needs_update = true; + if entry.path.file_name() == Some(&GITIGNORE) { + let abs_parent_path = self.snapshot.abs_path.join(entry.path.parent().unwrap()); + if let Some((_, needs_update)) = self + .snapshot + .ignores_by_parent_abs_path + .get_mut(abs_parent_path.as_path()) + { + *needs_update = true; + } + } + + if let Err(ix) = removed_ids.binary_search(&entry.id) { + removed_ids.insert(ix, entry.id); } } + self.snapshot.entries_by_id.edit( + removed_ids.iter().map(|&id| Edit::Remove(id)).collect(), + &(), + ); + self.snapshot + .git_repositories + .retain(|id, _| removed_ids.binary_search(&id).is_err()); + self.snapshot + .repository_entries + .retain(|repo_path, _| !repo_path.0.starts_with(path)); + #[cfg(test)] self.snapshot.check_invariants(false); } @@ -3722,11 +3769,14 @@ impl BackgroundScanner { { let mut state = self.state.lock(); state.snapshot.completed_scan_id = state.snapshot.scan_id; - for (_, entry_id) in mem::take(&mut state.removed_entry_ids) { - state.scanned_dirs.remove(&entry_id); + for (_, entry) in mem::take(&mut state.removed_entries) { + state.scanned_dirs.remove(&entry.id); } } + #[cfg(test)] + self.state.lock().snapshot.check_git_invariants(); + self.send_status_update(false, None); } @@ -4139,7 +4189,6 @@ impl BackgroundScanner { let is_dir = fs_entry.is_dir(); fs_entry.is_ignored = ignore_stack.is_abs_path_ignored(&abs_path, is_dir); - fs_entry.is_external = is_external; fs_entry.is_private = self.is_path_private(path); @@ -4168,7 +4217,6 @@ impl BackgroundScanner { self.remove_repo_path(path, &mut state.snapshot); } Err(err) => { - // TODO - create a special 'error' entry in the entries tree to mark this log::error!("error reading file {abs_path:?} on event: {err:#}"); } } @@ -4198,9 +4246,6 @@ impl BackgroundScanner { } } - // TODO statuses - // Track when a .git is removed and iterate over the file system there - Some(()) } diff --git a/crates/worktree/src/worktree_tests.rs b/crates/worktree/src/worktree_tests.rs index 297b1b4a5c..c21fea1401 100644 --- a/crates/worktree/src/worktree_tests.rs +++ b/crates/worktree/src/worktree_tests.rs @@ -1212,6 +1212,76 @@ async fn test_create_directory_during_initial_scan(cx: &mut TestAppContext) { ); } +#[gpui::test] +async fn test_bump_mtime_of_git_repo_workdir(cx: &mut TestAppContext) { + init_test(cx); + + // Create a worktree with a git directory. + let fs = FakeFs::new(cx.background_executor.clone()); + fs.insert_tree( + "/root", + json!({ + ".git": {}, + "a.txt": "", + "b": { + "c.txt": "", + }, + }), + ) + .await; + + let tree = Worktree::local( + "/root".as_ref(), + true, + fs.clone(), + Default::default(), + &mut cx.to_async(), + ) + .await + .unwrap(); + cx.executor().run_until_parked(); + + let (old_entry_ids, old_mtimes) = tree.read_with(cx, |tree, _| { + ( + tree.entries(true, 0).map(|e| e.id).collect::>(), + tree.entries(true, 0).map(|e| e.mtime).collect::>(), + ) + }); + + // Regression test: after the directory is scanned, touch the git repo's + // working directory, bumping its mtime. That directory keeps its project + // entry id after the directories are re-scanned. + fs.touch_path("/root").await; + cx.executor().run_until_parked(); + + let (new_entry_ids, new_mtimes) = tree.read_with(cx, |tree, _| { + ( + tree.entries(true, 0).map(|e| e.id).collect::>(), + tree.entries(true, 0).map(|e| e.mtime).collect::>(), + ) + }); + assert_eq!(new_entry_ids, old_entry_ids); + assert_ne!(new_mtimes, old_mtimes); + + // Regression test: changes to the git repository should still be + // detected. + fs.set_status_for_repo_via_git_operation( + &Path::new("/root/.git"), + &[(Path::new("b/c.txt"), GitFileStatus::Modified)], + ); + cx.executor().run_until_parked(); + + let snapshot = tree.read_with(cx, |tree, _| tree.snapshot()); + check_propagated_statuses( + &snapshot, + &[ + (Path::new(""), Some(GitFileStatus::Modified)), + (Path::new("a.txt"), None), + (Path::new("b/c.txt"), Some(GitFileStatus::Modified)), + ], + ); +} + #[gpui::test] async fn test_create_dir_all_on_create_entry(cx: &mut TestAppContext) { init_test(cx); @@ -2409,25 +2479,25 @@ async fn test_propagate_git_statuses(cx: &mut TestAppContext) { (Path::new("f/no-status.txt"), None), ], ); +} - #[track_caller] - fn check_propagated_statuses( - snapshot: &Snapshot, - expected_statuses: &[(&Path, Option)], - ) { - let mut entries = expected_statuses +#[track_caller] +fn check_propagated_statuses( + snapshot: &Snapshot, + expected_statuses: &[(&Path, Option)], +) { + let mut entries = expected_statuses + .iter() + .map(|(path, _)| snapshot.entry_for_path(path).unwrap().clone()) + .collect::>(); + snapshot.propagate_git_statuses(&mut entries); + assert_eq!( + entries .iter() - .map(|(path, _)| snapshot.entry_for_path(path).unwrap().clone()) - .collect::>(); - snapshot.propagate_git_statuses(&mut entries); - assert_eq!( - entries - .iter() - .map(|e| (e.path.as_ref(), e.git_status)) - .collect::>(), - expected_statuses - ); - } + .map(|e| (e.path.as_ref(), e.git_status)) + .collect::>(), + expected_statuses + ); } #[track_caller]