nativecheckout: only check for actually unknown files

Summary:
The old native checkout check_unknown logic would iterate over every
new file action and check if the file exists. This can be expensive because 1)
it stats every path, even though most will not exist, and 2) the stat goes
through the path auditor, which stats each parent directory as well.

For initial clones where there are no files on disk already, this isn't too bad,
since the top level directories aren't present so the path auditor aborts early.
When resuming a partially completed checkout however, this can be quite slow
(multiple minutes) since some directories already exist and therefore the
path auditing becomes much more expensive.

We have to run status anyway for the checkout, so let's go ahead and run it
earlier and use its results to only check the actual unknown files. This makes
the check O(unknown files) instead of O(total files to be written).
Additionally, the list of unknown files should be reduced since a resumed
checkout will remove already-written files from the list of files to be checked.
So really this is O(unknown files that differ from what's expected).

Differential Revision: D34278256

fbshipit-source-id: 39ad20387ad2622695864608c7033aefa1eb0df1
This commit is contained in:
Durham Goode 2022-02-25 14:09:21 -08:00 committed by Facebook GitHub Bot
parent bab3e49911
commit 7b97187f3e
3 changed files with 18 additions and 3 deletions

View File

@ -2719,10 +2719,13 @@ def donativecheckout(repo, p1, p2, xp1, xp2, matcher, force, partial, wc, prerec
store = repo.fileslog.contentstore
else:
store = repo.fileslog.filescmstore
status = nativestatus.status(repo.status(unknown=True))
unknown = plan.check_unknown_files(
p2.manifest(),
store,
repo.dirstate._map._tree,
status,
)
if unknown:
for f in unknown:
@ -2735,7 +2738,6 @@ def donativecheckout(repo, p1, p2, xp1, xp2, matcher, force, partial, wc, prerec
)
)
status = nativestatus.status(repo.status())
conflicts = plan.check_conflicts(status)
if conflicts:
msg = _("%d conflicting file changes:\n") % len(conflicts)

View File

@ -100,16 +100,23 @@ py_class!(class checkoutplan |py| {
checkoutplan::create_instance(py, plan)
}
def check_unknown_files(&self, manifest: &treemanifest, store: ImplInto<ArcReadFileContents>, state: &PyTreeState) -> PyResult<Vec<String>> {
def check_unknown_files(
&self,
manifest: &treemanifest,
store: ImplInto<ArcReadFileContents>,
state: &PyTreeState,
status: &PyStatus,
) -> PyResult<Vec<String>> {
let plan = self.plan(py);
let state = state.get_state(py);
let manifest = manifest.get_underlying(py);
let store = store.into();
let status = status.extract_inner_ref(py);
let unknown = py.allow_threads(move || -> Result<_> {
let mut state = state.lock();
let manifest = manifest.read();
try_block_unless_interrupted(
plan.check_unknown_files(&*manifest, store.as_ref(), &mut state))
plan.check_unknown_files(&*manifest, store.as_ref(), &mut state, status))
}).map_pyerr(py)?;
Ok(unknown.into_iter().map(|p|p.to_string()).collect())
}

View File

@ -306,6 +306,7 @@ impl CheckoutPlan {
manifest: &impl Manifest,
store: &dyn ReadFileContents<Error = anyhow::Error>,
tree_state: &mut TreeState,
status: &Status,
) -> Result<Vec<RepoPathBuf>> {
let vfs = &self.checkout.vfs;
let mut check_content = vec![];
@ -319,6 +320,11 @@ impl CheckoutPlan {
for file_action in new_files {
let file = &file_action.path;
if !matches!(status.status(file), Some(FileStatus::Unknown)) {
continue;
}
let state = if vfs.case_sensitive() {
tree_state.get(file)?
} else {