cmdutil: add a fast path to _makefollowlogfilematcher

Summary:
Aaron recently noticed that `hg log -p -l 1 fbcode/assistant` ran for a very
long time while it only produce one diff. The profile shows that most of the
time is spent collecting ancestors of files that involves lots of downloads.
Even when everything is cached locally, its run time is terrible at over 5min.

Looking at the profile, the code spent most of its time in the `populate`
function that tries to build the set of files present in a specific revision.
This operation has a O(N*M) complexity, with N the number of files, and M the
number of revisions per files.

To speed this up, we split the problem: 1) for individual files and 2) for
directories.

For 1), the user intention is to get correct history, so let's keep the code as
it is. For 2) let's simply match files in the given directory, with no
consideration for the revision or the ancestors.

Reviewed By: quark-zju

Differential Revision: D15542664

fbshipit-source-id: 63028894babbb3efd41d09454d60d5241d7c3787
This commit is contained in:
Xavier Deguillard 2019-05-30 18:40:10 -07:00 committed by Facebook Github Bot
parent 628c8f95f1
commit a805a7acac
2 changed files with 32 additions and 23 deletions

View File

@ -2598,7 +2598,7 @@ def walkchangerevs(repo, match, opts, prepare):
return iterate()
def _makefollowlogfilematcher(repo, files, followfirst):
def _makefollowlogfilematcher(repo, paths, followfirst, pctx):
# When displaying a revision with --patch --follow FILE, we have
# to know which file of the revision must be diffed. With
# --follow, we want the names of the ancestors of FILE in the
@ -2608,17 +2608,28 @@ def _makefollowlogfilematcher(repo, files, followfirst):
# good enough).
fcache = {}
fcacheready = [False]
pctx = repo["."]
# "files" might be directories. Normalize them to actual files.
for path in files:
dirs = []
files = []
for path in paths:
if path not in pctx:
# "path" could be a prefix matching a directory, use the
# real matcher interface to handle it.
files = list(pctx.walk(scmutil.match(pctx, files, default="path")))
break
dirs.append(path)
else:
files.append(path)
def populate():
# When directories are passed in, walking the ancestors graph can be
# extremely expensive, let's not attempt to do it and instead just match
# all the files under the given directories.
if not dirs:
dirmatcher = None
else:
dirmatcher = matchmod.match(
repo.root, repo.getcwd(), patterns=["path:%s" % path for path in dirs]
)
def populatefiles():
# Walk the ancestors graph for all the files passed in.
for fn in files:
fctx = pctx[fn]
fcache.setdefault(fctx.introrev(), set()).add(fctx.path())
@ -2627,10 +2638,16 @@ def _makefollowlogfilematcher(repo, files, followfirst):
def filematcher(rev):
if not fcacheready[0]:
# Lazy initialization
populatefiles()
fcacheready[0] = True
populate()
return scmutil.matchfiles(repo, fcache.get(rev, []))
fileset = fcache.get(rev, [])
if not fileset:
filematcher = None
else:
filematcher = scmutil.matchfiles(repo, fcache.get(rev, []))
return matchmod.union([dirmatcher, filematcher], repo.root, repo.getcwd())
return filematcher
@ -2763,7 +2780,9 @@ def _makelogrevset(repo, pats, opts, revs):
if follow and not match.always() and not slowpath:
# _makefollowlogfilematcher expects its files argument to be
# relative to the repo root, so use match.files(), not pats.
filematcher = _makefollowlogfilematcher(repo, match.files(), followfirst)
filematcher = _makefollowlogfilematcher(
repo, match.files(), followfirst, repo[startrev]
)
else:
filematcher = _makenofollowlogfilematcher(repo, pats, opts)
if filematcher is None:

View File

@ -142,22 +142,12 @@ log on directory
date: Thu Jan 01 00:00:02 1970 +0000
summary: b
diff -r 9161b9aeaf16 -r d89b0a12d229 b
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/b Thu Jan 01 00:00:02 1970 +0000
@@ -0,0 +1,1 @@
+a
changeset: 0:9161b9aeaf16
user: test
date: Thu Jan 01 00:00:01 1970 +0000
summary: a
diff -r 000000000000 -r 9161b9aeaf16 a
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/a Thu Jan 01 00:00:01 1970 +0000
@@ -0,0 +1,1 @@
+a
-f, pattern