workingcopy: ignore Git submodules in "status"

Summary: To avoid showing a bunch of "?" files, we need to ignore things within submodules. For watchman, we do similar to Python and add submodules to the ignored dirs list. For the manual walker, we similarly skip directories in the ignore list. Note that the walker already skipped the submodules due to the presence of the "dot dir" (e.g. ".sl") in the submodules, but I piped through the ignored dirs skipping to be consistent.

Reviewed By: quark-zju

Differential Revision: D46547238

fbshipit-source-id: b001dc02a3c73f69d74fe36615a812f9ac983944
This commit is contained in:
Muir Manders 2023-06-15 19:48:34 -07:00 committed by Facebook GitHub Bot
parent ae180addae
commit 2b6c27c219
9 changed files with 106 additions and 26 deletions

View File

@ -59,6 +59,7 @@ py_class!(class walker |py| {
let walker = Walker::new( let walker = Walker::new(
root.to_path_buf(), root.to_path_buf(),
dot_dir, dot_dir,
Vec::new(),
matcher, matcher,
include_directories, include_directories,
).map_pyerr(py)?; ).map_pyerr(py)?;

View File

@ -615,6 +615,7 @@ impl Repo {
Ok(WorkingCopy::new( Ok(WorkingCopy::new(
vfs, vfs,
self.storage_format(),
filesystem, filesystem,
treestate, treestate,
tree_resolver, tree_resolver,

View File

@ -39,6 +39,7 @@ impl PendingChanges for EdenFileSystem {
&self, &self,
_matcher: Arc<dyn Matcher + Send + Sync + 'static>, _matcher: Arc<dyn Matcher + Send + Sync + 'static>,
_ignore_matcher: Arc<dyn Matcher + Send + Sync + 'static>, _ignore_matcher: Arc<dyn Matcher + Send + Sync + 'static>,
_ignore_dirs: Vec<PathBuf>,
_last_write: SystemTime, _last_write: SystemTime,
_config: &dyn Config, _config: &dyn Config,
_io: &IO, _io: &IO,

View File

@ -5,6 +5,7 @@
* GNU General Public License version 2. * GNU General Public License version 2.
*/ */
use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use std::time::SystemTime; use std::time::SystemTime;
@ -43,6 +44,8 @@ pub trait PendingChanges {
matcher: Arc<dyn Matcher + Send + Sync + 'static>, matcher: Arc<dyn Matcher + Send + Sync + 'static>,
// Git ignore matcher, except won't match committed files. // Git ignore matcher, except won't match committed files.
ignore_matcher: Arc<dyn Matcher + Send + Sync + 'static>, ignore_matcher: Arc<dyn Matcher + Send + Sync + 'static>,
// Directories to always ignore such as ".sl".
ignore_dirs: Vec<PathBuf>,
last_write: SystemTime, last_write: SystemTime,
config: &dyn Config, config: &dyn Config,
io: &IO, io: &IO,

View File

@ -6,6 +6,7 @@
*/ */
use std::collections::HashSet; use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::Arc; use std::sync::Arc;
use std::time::SystemTime; use std::time::SystemTime;
@ -69,13 +70,20 @@ impl PendingChangesTrait for PhysicalFileSystem {
&self, &self,
matcher: Arc<dyn Matcher + Send + Sync + 'static>, matcher: Arc<dyn Matcher + Send + Sync + 'static>,
_ignore_matcher: Arc<dyn Matcher + Send + Sync + 'static>, _ignore_matcher: Arc<dyn Matcher + Send + Sync + 'static>,
ignore_dirs: Vec<PathBuf>,
last_write: SystemTime, last_write: SystemTime,
config: &dyn Config, config: &dyn Config,
_io: &IO, _io: &IO,
) -> Result<Box<dyn Iterator<Item = Result<PendingChangeResult>>>> { ) -> Result<Box<dyn Iterator<Item = Result<PendingChangeResult>>>> {
let root = self.vfs.root().to_path_buf(); let root = self.vfs.root().to_path_buf();
let ident = identity::must_sniff_dir(&root)?; let ident = identity::must_sniff_dir(&root)?;
let walker = Walker::new(root, ident.dot_dir().to_string(), matcher.clone(), false)?; let walker = Walker::new(
root,
ident.dot_dir().to_string(),
ignore_dirs,
matcher.clone(),
false,
)?;
let manifests = let manifests =
WorkingCopy::current_manifests(&self.treestate.lock(), &self.tree_resolver)?; WorkingCopy::current_manifests(&self.treestate.lock(), &self.tree_resolver)?;
let file_change_detector = FileChangeDetector::new( let file_change_detector = FileChangeDetector::new(

View File

@ -5,6 +5,7 @@
* GNU General Public License version 2. * GNU General Public License version 2.
*/ */
use std::collections::HashSet;
use std::fs; use std::fs;
use std::fs::DirEntry; use std::fs::DirEntry;
use std::fs::Metadata; use std::fs::Metadata;
@ -98,6 +99,8 @@ pub struct WalkerData<M> {
result_cnt: AtomicU64, result_cnt: AtomicU64,
root: PathBuf, root: PathBuf,
include_directories: bool, include_directories: bool,
dot_dir: String,
skip_dirs: HashSet<RepoPathBuf>,
} }
impl<M> WalkerData<M> { impl<M> WalkerData<M> {
@ -118,7 +121,6 @@ pub struct Walker<M> {
result_receiver: Receiver<Result<WalkEntry>>, result_receiver: Receiver<Result<WalkEntry>>,
has_walked: bool, has_walked: bool,
payload: Arc<WalkerData<M>>, payload: Arc<WalkerData<M>>,
dot_dir: String,
} }
impl<M> Walker<M> impl<M> Walker<M>
@ -134,6 +136,7 @@ where
pub fn new( pub fn new(
root: PathBuf, root: PathBuf,
dot_dir: String, dot_dir: String,
skip_dirs: Vec<PathBuf>,
matcher: M, matcher: M,
include_directories: bool, include_directories: bool,
) -> Result<Self> { ) -> Result<Self> {
@ -154,8 +157,12 @@ where
root, root,
matcher, matcher,
include_directories, include_directories,
dot_dir,
skip_dirs: skip_dirs
.into_iter()
.map(|p| Ok(p.try_into()?))
.collect::<Result<_>>()?,
}), }),
dot_dir,
}) })
} }
@ -163,7 +170,6 @@ where
// child and increment busy_nodes atomic. // child and increment busy_nodes atomic.
fn match_entry_and_enqueue( fn match_entry_and_enqueue(
dir: &RepoPathBuf, dir: &RepoPathBuf,
dot_dir: &str,
entry: DirEntry, entry: DirEntry,
shared_data: Arc<WalkerData<M>>, shared_data: Arc<WalkerData<M>>,
) -> Result<()> { ) -> Result<()> {
@ -188,7 +194,7 @@ where
.enqueue_result(Ok(WalkEntry::File(candidate_path, entry.metadata()?)))?; .enqueue_result(Ok(WalkEntry::File(candidate_path, entry.metadata()?)))?;
} }
} else if filetype.is_dir() { } else if filetype.is_dir() {
if filename.as_str() != dot_dir if !shared_data.skip_dirs.contains(filename)
&& shared_data && shared_data
.matcher .matcher
.matches_directory(candidate_path.as_repo_path())? .matches_directory(candidate_path.as_repo_path())?
@ -217,7 +223,6 @@ where
for _t in 0..self.threads.capacity() { for _t in 0..self.threads.capacity() {
let shared_data = self.payload.clone(); let shared_data = self.payload.clone();
let dot_dir = self.dot_dir.clone();
// TODO make sure that _t is different for each thread // TODO make sure that _t is different for each thread
self.threads.push(thread::spawn(move || { self.threads.push(thread::spawn(move || {
@ -237,7 +242,9 @@ where
let abs_dir_path = shared_data.root.join(dir.as_str()); let abs_dir_path = shared_data.root.join(dir.as_str());
// Skip nested repos. // Skip nested repos.
if !dir.is_empty() && abs_dir_path.join(&dot_dir).exists() { if !dir.is_empty()
&& abs_dir_path.join(&shared_data.dot_dir).exists()
{
return Ok(()); return Ok(());
} }
@ -248,7 +255,6 @@ where
entry.map_err(|e| WalkError::IOError(dir.clone(), e))?; entry.map_err(|e| WalkError::IOError(dir.clone(), e))?;
if let Err(e) = Walker::match_entry_and_enqueue( if let Err(e) = Walker::match_entry_and_enqueue(
&dir, &dir,
&dot_dir,
entry, entry,
shared_data.clone(), shared_data.clone(),
) { ) {
@ -353,7 +359,13 @@ mod tests {
let files = vec!["dirA/a.txt", "b.txt"]; let files = vec!["dirA/a.txt", "b.txt"];
let root_dir = create_directory(&directories, &files)?; let root_dir = create_directory(&directories, &files)?;
let root_path = PathBuf::from(root_dir.path()); let root_path = PathBuf::from(root_dir.path());
let walker = Walker::new(root_path, ".hg".to_string(), NeverMatcher::new(), false)?; let walker = Walker::new(
root_path,
".hg".to_string(),
Vec::new(),
NeverMatcher::new(),
false,
)?;
let walked_files: Result<Vec<_>> = walker.collect(); let walked_files: Result<Vec<_>> = walker.collect();
let walked_files = walked_files?; let walked_files = walked_files?;
assert!(walked_files.is_empty()); assert!(walked_files.is_empty());
@ -369,6 +381,7 @@ mod tests {
let walker = Walker::new( let walker = Walker::new(
root_path, root_path,
".hg".to_string(), ".hg".to_string(),
Vec::new(),
TreeMatcher::from_rules(["foo/bar/**"].iter(), true).unwrap(), TreeMatcher::from_rules(["foo/bar/**"].iter(), true).unwrap(),
false, false,
)?; )?;
@ -388,7 +401,13 @@ mod tests {
let files = vec!["dirA/a.txt", "dirA/b.txt", "dirB/dirC/dirD/c.txt"]; let files = vec!["dirA/a.txt", "dirA/b.txt", "dirB/dirC/dirD/c.txt"];
let root_dir = create_directory(&directories, &files)?; let root_dir = create_directory(&directories, &files)?;
let root_path = PathBuf::from(root_dir.path()); let root_path = PathBuf::from(root_dir.path());
let walker = Walker::new(root_path, ".hg".to_string(), AlwaysMatcher::new(), true)?; let walker = Walker::new(
root_path,
".hg".to_string(),
Vec::new(),
AlwaysMatcher::new(),
true,
)?;
let walked_files: Result<Vec<_>> = walker.collect(); let walked_files: Result<Vec<_>> = walker.collect();
let walked_files = walked_files?; let walked_files = walked_files?;
// Includes root dir "" // Includes root dir ""

View File

@ -111,7 +111,11 @@ impl WatchmanFileSystem {
}) })
} }
async fn query_files(&self, config: WatchmanConfig) -> Result<QueryResult<StatusQuery>> { async fn query_files(
&self,
config: WatchmanConfig,
ignore_dirs: Vec<PathBuf>,
) -> Result<QueryResult<StatusQuery>> {
let start = std::time::Instant::now(); let start = std::time::Instant::now();
// This starts watchman if it isn't already started. // This starts watchman if it isn't already started.
@ -123,11 +127,20 @@ impl WatchmanFileSystem {
.resolve_root(CanonicalPath::canonicalize(self.vfs.root())?) .resolve_root(CanonicalPath::canonicalize(self.vfs.root())?)
.await?; .await?;
let ident = identity::must_sniff_dir(self.vfs.root())?; let mut expr = None;
let excludes = Expr::Any(vec![Expr::DirName(DirNameTerm { if !ignore_dirs.is_empty() {
path: PathBuf::from(ident.dot_dir()), expr = Some(Expr::Not(Box::new(Expr::Any(
depth: None, ignore_dirs
})]); .into_iter()
.map(|p| {
Expr::DirName(DirNameTerm {
path: p,
depth: None,
})
})
.collect(),
))));
}
// The crawl is done - display a generic "we're querying" spinner. // The crawl is done - display a generic "we're querying" spinner.
let _bar = ProgressBar::register_new("querying watchman", 0, ""); let _bar = ProgressBar::register_new("querying watchman", 0, "");
@ -137,7 +150,7 @@ impl WatchmanFileSystem {
&resolved, &resolved,
QueryRequestCommon { QueryRequestCommon {
since: config.clock, since: config.clock,
expression: Some(Expr::Not(Box::new(excludes))), expression: expr,
sync_timeout: config.sync_timeout.into(), sync_timeout: config.sync_timeout.into(),
..Default::default() ..Default::default()
}, },
@ -199,6 +212,7 @@ impl PendingChanges for WatchmanFileSystem {
&self, &self,
matcher: Arc<dyn Matcher + Send + Sync + 'static>, matcher: Arc<dyn Matcher + Send + Sync + 'static>,
mut ignore_matcher: Arc<dyn Matcher + Send + Sync + 'static>, mut ignore_matcher: Arc<dyn Matcher + Send + Sync + 'static>,
ignore_dirs: Vec<PathBuf>,
last_write: SystemTime, last_write: SystemTime,
config: &dyn Config, config: &dyn Config,
io: &IO, io: &IO,
@ -239,11 +253,16 @@ impl PendingChanges for WatchmanFileSystem {
// Instrument query_files() from outside to avoid async weirdness. // Instrument query_files() from outside to avoid async weirdness.
let _span = tracing::info_span!("query_files").entered(); let _span = tracing::info_span!("query_files").entered();
async_runtime::block_on(self.query_files(WatchmanConfig { async_runtime::block_on(self.query_files(
clock: prev_clock.clone(), WatchmanConfig {
sync_timeout: clock: prev_clock.clone(),
config.get_or::<Duration>("fsmonitor", "timeout", || Duration::from_secs(10))?, sync_timeout:
}))? config.get_or::<Duration>("fsmonitor", "timeout", || {
Duration::from_secs(10)
})?,
},
ignore_dirs,
))?
}; };
progress_handle.abort(); progress_handle.abort();

View File

@ -15,6 +15,7 @@ use anyhow::Context;
use anyhow::Result; use anyhow::Result;
use configmodel::Config; use configmodel::Config;
use configmodel::ConfigExt; use configmodel::ConfigExt;
use identity::Identity;
use io::IO; use io::IO;
use manifest::FileType; use manifest::FileType;
use manifest::Manifest; use manifest::Manifest;
@ -36,6 +37,7 @@ use storemodel::ReadFileContents;
use treestate::filestate::StateFlags; use treestate::filestate::StateFlags;
use treestate::tree::VisitorResult; use treestate::tree::VisitorResult;
use treestate::treestate::TreeState; use treestate::treestate::TreeState;
use types::repo::StorageFormat;
use types::HgId; use types::HgId;
use types::RepoPath; use types::RepoPath;
use types::RepoPathBuf; use types::RepoPathBuf;
@ -48,6 +50,7 @@ use crate::filesystem::ChangeType;
use crate::filesystem::FileSystemType; use crate::filesystem::FileSystemType;
use crate::filesystem::PendingChangeResult; use crate::filesystem::PendingChangeResult;
use crate::filesystem::PendingChanges; use crate::filesystem::PendingChanges;
use crate::git::parse_submodules;
use crate::physicalfs::PhysicalFileSystem; use crate::physicalfs::PhysicalFileSystem;
use crate::status::compute_status; use crate::status::compute_status;
use crate::util::walk_treestate; use crate::util::walk_treestate;
@ -71,6 +74,8 @@ impl AsRef<Box<dyn PendingChanges + Send>> for FileSystem {
pub struct WorkingCopy { pub struct WorkingCopy {
vfs: VFS, vfs: VFS,
ident: Identity,
format: StorageFormat,
treestate: Arc<Mutex<TreeState>>, treestate: Arc<Mutex<TreeState>>,
tree_resolver: ArcReadTreeManifest, tree_resolver: ArcReadTreeManifest,
filesystem: Mutex<FileSystem>, filesystem: Mutex<FileSystem>,
@ -82,6 +87,7 @@ pub struct WorkingCopy {
impl WorkingCopy { impl WorkingCopy {
pub fn new( pub fn new(
vfs: VFS, vfs: VFS,
format: StorageFormat,
// TODO: Have constructor figure out FileSystemType // TODO: Have constructor figure out FileSystemType
file_system_type: FileSystemType, file_system_type: FileSystemType,
treestate: Arc<Mutex<TreeState>>, treestate: Arc<Mutex<TreeState>>,
@ -121,6 +127,8 @@ impl WorkingCopy {
Ok(WorkingCopy { Ok(WorkingCopy {
vfs, vfs,
format,
ident,
treestate, treestate,
tree_resolver, tree_resolver,
filesystem, filesystem,
@ -270,11 +278,10 @@ impl WorkingCopy {
if fs.file_system_type == FileSystemType::Eden { if fs.file_system_type == FileSystemType::Eden {
sparse_matchers.push(Arc::new(AlwaysMatcher::new())); sparse_matchers.push(Arc::new(AlwaysMatcher::new()));
} else { } else {
let ident = identity::must_sniff_dir(&fs.vfs.root())?;
for manifest in manifests.iter() { for manifest in manifests.iter() {
match crate::sparse::repo_matcher( match crate::sparse::repo_matcher(
&self.vfs, &self.vfs,
&fs.vfs.root().join(ident.dot_dir()), &fs.vfs.root().join(self.ident.dot_dir()),
manifest.read().clone(), manifest.read().clone(),
fs.file_store.clone(), fs.file_store.clone(),
)? { )? {
@ -329,11 +336,32 @@ impl WorkingCopy {
let matcher = Arc::new(DifferenceMatcher::new(matcher, ignore_matcher.clone())); let matcher = Arc::new(DifferenceMatcher::new(matcher, ignore_matcher.clone()));
let mut ignore_dirs = vec![PathBuf::from(self.ident.dot_dir())];
if self.format.is_git() {
// Ignore file within submodules. Python has some logic additional
// logic layered on top to add submodule info into status results.
let git_modules_path = self.vfs.join(".gitmodules".try_into()?);
if git_modules_path.exists() {
ignore_dirs.extend(
parse_submodules(&util::file::read(&git_modules_path)?)?
.into_iter()
.map(|s| PathBuf::from(s.path)),
);
}
}
let pending_changes = self let pending_changes = self
.filesystem .filesystem
.lock() .lock()
.inner .inner
.pending_changes(matcher.clone(), ignore_matcher, last_write, config, io)? .pending_changes(
matcher.clone(),
ignore_matcher,
ignore_dirs,
last_write,
config,
io,
)?
.filter_map(|result| match result { .filter_map(|result| match result {
Ok(PendingChangeResult::File(change_type)) => { Ok(PendingChangeResult::File(change_type)) => {
match matcher.matches_file(change_type.get_path()) { match matcher.matches_file(change_type.get_path()) {

View File

@ -2,7 +2,7 @@
#require git no-windows #require git no-windows
#debugruntest-compatible #debugruntest-compatible
$ setconfig workingcopy.ruststatus=False $ setconfig workingcopy.ruststatus=true
$ . $TESTDIR/git.sh $ . $TESTDIR/git.sh
$ setconfig diff.git=true ui.allowemptycommit=true $ setconfig diff.git=true ui.allowemptycommit=true
$ enable rebase $ enable rebase