revset: prefetch text for ancestorsaged()

Summary:
This is a bit tricky - look-ahead does not work because the content of commit
texts decide whether the commit is in the resulting set or not.

Add a special case for `ancestorsaged` so it does not end up with 1-by-1
fetching.

Reviewed By: sfilipco

Differential Revision: D24324797

fbshipit-source-id: 844ac6a6637e25900eea00d99704a59dfc7a0345
This commit is contained in:
Jun Wu 2020-12-14 13:10:41 -08:00 committed by Facebook GitHub Bot
parent af45384c96
commit fa8817c9b4
2 changed files with 63 additions and 7 deletions

View File

@ -25,7 +25,16 @@ generatorset = smartset.generatorset
_maxlogdepth = 0x80000000 _maxlogdepth = 0x80000000
def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse, stopfunc=None): def _walkrevtree(
pfunc,
revs,
startdepth,
stopdepth,
reverse,
stopfunc=None,
prefetchtext=False,
repo=None,
):
"""Walk DAG using 'pfunc' from the given 'revs' nodes """Walk DAG using 'pfunc' from the given 'revs' nodes
'pfunc(rev)' should return the parent/child revisions of the given 'rev' 'pfunc(rev)' should return the parent/child revisions of the given 'rev'
@ -52,6 +61,11 @@ def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse, stopfunc=None):
revs.sort(reverse) revs.sort(reverse)
irevs = iter(revs) irevs = iter(revs)
pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first) pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first)
prefetched = set()
if repo is None:
# prefetch requires repo
prefetchtext = False
inputrev = next(irevs, None) inputrev = next(irevs, None)
if inputrev is not None: if inputrev is not None:
@ -62,6 +76,24 @@ def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse, stopfunc=None):
currev, curdepth = heapq.heappop(pendingheap) currev, curdepth = heapq.heappop(pendingheap)
currev = heapsign * currev currev = heapsign * currev
if prefetchtext and currev not in prefetched:
# prefetch text for 'n' ancestors of currrev
if curdepth < 1000:
n = 100
elif curdepth < 10000:
n = 1000
else:
n = 10000
for ctx in (
repo.revs("sort(ancestors(%d), -rev)", currev)
.prefetch("text")
.iterctx(repo)
):
prefetched.add(ctx.rev())
n -= 1
if n <= 0:
break
# Process the stopfunc after the rev has been added to the heap, instead # Process the stopfunc after the rev has been added to the heap, instead
# of before, so we don't pay the stopfunc cost for revs that might not # of before, so we don't pay the stopfunc cost for revs that might not
# even be reached. This saves us from having to traverse to ancient # even be reached. This saves us from having to traverse to ancient
@ -129,7 +161,9 @@ def filerevancestors(fctxs, followfirst=False):
return generatorset(gen, iterasc=False) return generatorset(gen, iterasc=False)
def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc): def _genrevancestors(
repo, revs, followfirst, startdepth, stopdepth, cutfunc, prefetchtext=False
):
if followfirst: if followfirst:
cut = 1 cut = 1
else: else:
@ -145,12 +179,25 @@ def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
if cutfunc is not None: if cutfunc is not None:
revs = revs.filter(lambda rev: not cutfunc(rev)) revs = revs.filter(lambda rev: not cutfunc(rev))
return _walkrevtree( return _walkrevtree(
plainpfunc, revs, startdepth, stopdepth, reverse=True, stopfunc=cutfunc plainpfunc,
revs,
startdepth,
stopdepth,
reverse=True,
stopfunc=cutfunc,
prefetchtext=prefetchtext,
repo=repo,
) )
def revancestors( def revancestors(
repo, revs, followfirst=False, startdepth=None, stopdepth=None, cutfunc=None repo,
revs,
followfirst=False,
startdepth=None,
stopdepth=None,
cutfunc=None,
prefetchtext=False,
): ):
"""Like revlog.ancestors(), but supports additional options, includes """Like revlog.ancestors(), but supports additional options, includes
the given revs themselves, and returns a smartset the given revs themselves, and returns a smartset
@ -173,7 +220,15 @@ def revancestors(
|/ |/
A A
""" """
gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc) gen = _genrevancestors(
repo,
revs,
followfirst,
startdepth,
stopdepth,
cutfunc,
prefetchtext=prefetchtext,
)
return generatorset(gen, iterasc=False) return generatorset(gen, iterasc=False)

View File

@ -652,8 +652,9 @@ def ancestorsaged(repo, subset, x):
def notyounger(x): def notyounger(x):
return repo[x].date()[0] < end return repo[x].date()[0] < end
# PERF: cutfunc is not prefetch-friendly. s = dagop.revancestors(
s = dagop.revancestors(repo, heads, cutfunc=older if start is not None else None) repo, heads, cutfunc=older if start is not None else None, prefetchtext=True
)
if end is not None: if end is not None:
s = s.filter(notyounger) s = s.filter(notyounger)