mirror of
https://github.com/facebook/sapling.git
synced 2024-10-07 07:17:55 +03:00
ancestor: faster algorithm for difference of ancestor sets
One of the major reasons rebase is slow in large repositories is the computation of the detach set: the set of ancestors of the changesets to rebase not in the destination parent. This is currently done via a revset that does two walks all the way to the root of the DAG. Instead of doing that, to find ancestors of a set <revs> not in another set <common> we walk up the tree in reverse revision number order, maintaining sets of nodes visited from <revs>, <common> or both. For the common case where the sets are close both topologically and in revision number (relative to repository size), this has been found to speed up rebase by around 15-20%. When the nodes are farther apart and the DAG is highly branching, it is harder to say which would win. Here's how long computing the detach set takes in a linear repository with over 400000 changesets, rebasing near tip: Rebasing across 4 changesets Revset method: 2.2s New algorithm: 0.00015s Rebasing across 250 changesets Revset method: 2.2s New algorithm: 0.00069s Rebasing across 10000 changesets Revset method: 2.4s New algorithm: 0.019s
This commit is contained in:
parent
022cc130e9
commit
a0694d4a53
@ -6,6 +6,7 @@
|
||||
# GNU General Public License version 2 or any later version.
|
||||
|
||||
import heapq
|
||||
from node import nullrev
|
||||
|
||||
def ancestor(a, b, pfunc):
|
||||
"""
|
||||
@ -89,3 +90,166 @@ def ancestor(a, b, pfunc):
|
||||
gx = x.next()
|
||||
except StopIteration:
|
||||
return None
|
||||
|
||||
def missingancestors(revs, bases, pfunc):
|
||||
"""Return all the ancestors of revs that are not ancestors of bases.
|
||||
|
||||
This may include elements from revs.
|
||||
|
||||
Equivalent to the revset (::revs - ::bases). Revs are returned in
|
||||
revision number order, which is a topological order.
|
||||
|
||||
revs and bases should both be iterables. pfunc must return a list of
|
||||
parent revs for a given revs.
|
||||
|
||||
graph is a dict of child->parent adjacency lists for this graph:
|
||||
o 13
|
||||
|
|
||||
| o 12
|
||||
| |
|
||||
| | o 11
|
||||
| | |\
|
||||
| | | | o 10
|
||||
| | | | |
|
||||
| o---+ | 9
|
||||
| | | | |
|
||||
o | | | | 8
|
||||
/ / / /
|
||||
| | o | 7
|
||||
| | | |
|
||||
o---+ | 6
|
||||
/ / /
|
||||
| | o 5
|
||||
| |/
|
||||
| o 4
|
||||
| |
|
||||
o | 3
|
||||
| |
|
||||
| o 2
|
||||
|/
|
||||
o 1
|
||||
|
|
||||
o 0
|
||||
>>> graph = {0: [-1], 1: [0], 2: [1], 3: [1], 4: [2], 5: [4], 6: [4],
|
||||
... 7: [4], 8: [-1], 9: [6, 7], 10: [5], 11: [3, 7], 12: [9],
|
||||
... 13: [8]}
|
||||
>>> pfunc = graph.get
|
||||
|
||||
Empty revs
|
||||
>>> missingancestors([], [1], pfunc)
|
||||
[]
|
||||
>>> missingancestors([], [], pfunc)
|
||||
[]
|
||||
|
||||
If bases is empty, it's the same as if it were [nullrev]
|
||||
>>> missingancestors([12], [], pfunc)
|
||||
[0, 1, 2, 4, 6, 7, 9, 12]
|
||||
|
||||
Trivial case: revs == bases
|
||||
>>> missingancestors([0], [0], pfunc)
|
||||
[]
|
||||
>>> missingancestors([4, 5, 6], [6, 5, 4], pfunc)
|
||||
[]
|
||||
|
||||
With nullrev
|
||||
>>> missingancestors([-1], [12], pfunc)
|
||||
[]
|
||||
>>> missingancestors([12], [-1], pfunc)
|
||||
[0, 1, 2, 4, 6, 7, 9, 12]
|
||||
|
||||
9 is a parent of 12. 7 is a parent of 9, so an ancestor of 12. 6 is an
|
||||
ancestor of 12 but not of 7.
|
||||
>>> missingancestors([12], [9], pfunc)
|
||||
[12]
|
||||
>>> missingancestors([9], [12], pfunc)
|
||||
[]
|
||||
>>> missingancestors([12, 9], [7], pfunc)
|
||||
[6, 9, 12]
|
||||
>>> missingancestors([7, 6], [12], pfunc)
|
||||
[]
|
||||
|
||||
More complex cases
|
||||
>>> missingancestors([10], [11, 12], pfunc)
|
||||
[5, 10]
|
||||
>>> missingancestors([11], [10], pfunc)
|
||||
[3, 7, 11]
|
||||
>>> missingancestors([11], [10, 12], pfunc)
|
||||
[3, 11]
|
||||
>>> missingancestors([12], [10], pfunc)
|
||||
[6, 7, 9, 12]
|
||||
>>> missingancestors([12], [11], pfunc)
|
||||
[6, 9, 12]
|
||||
>>> missingancestors([10, 11, 12], [13], pfunc)
|
||||
[0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12]
|
||||
>>> missingancestors([13], [10, 11, 12], pfunc)
|
||||
[8, 13]
|
||||
"""
|
||||
|
||||
revsvisit = set(revs)
|
||||
basesvisit = set(bases)
|
||||
if not revsvisit:
|
||||
return []
|
||||
if not basesvisit:
|
||||
basesvisit.add(nullrev)
|
||||
start = max(max(revsvisit), max(basesvisit))
|
||||
bothvisit = revsvisit.intersection(basesvisit)
|
||||
revsvisit.difference_update(bothvisit)
|
||||
basesvisit.difference_update(bothvisit)
|
||||
# At this point, we hold the invariants that:
|
||||
# - revsvisit is the set of nodes we know are an ancestor of at least one
|
||||
# of the nodes in revs
|
||||
# - basesvisit is the same for bases
|
||||
# - bothvisit is the set of nodes we know are ancestors of at least one of
|
||||
# the nodes in revs and one of the nodes in bases
|
||||
# - a node may be in none or one, but not more, of revsvisit, basesvisit
|
||||
# and bothvisit at any given time
|
||||
# Now we walk down in reverse topo order, adding parents of nodes already
|
||||
# visited to the sets while maintaining the invariants. When a node is
|
||||
# found in both revsvisit and basesvisit, it is removed from them and
|
||||
# added to bothvisit instead. When revsvisit becomes empty, there are no
|
||||
# more ancestors of revs that aren't also ancestors of bases, so exit.
|
||||
|
||||
missing = []
|
||||
for curr in xrange(start, nullrev, -1):
|
||||
if not revsvisit:
|
||||
break
|
||||
|
||||
if curr in bothvisit:
|
||||
bothvisit.remove(curr)
|
||||
# curr's parents might have made it into revsvisit or basesvisit
|
||||
# through another path
|
||||
for p in pfunc(curr):
|
||||
revsvisit.discard(p)
|
||||
basesvisit.discard(p)
|
||||
bothvisit.add(p)
|
||||
continue
|
||||
|
||||
# curr will never be in both revsvisit and basesvisit, since if it
|
||||
# were it'd have been pushed to bothvisit
|
||||
if curr in revsvisit:
|
||||
missing.append(curr)
|
||||
thisvisit = revsvisit
|
||||
othervisit = basesvisit
|
||||
elif curr in basesvisit:
|
||||
thisvisit = basesvisit
|
||||
othervisit = revsvisit
|
||||
else:
|
||||
# not an ancestor of a or b: ignore
|
||||
continue
|
||||
|
||||
thisvisit.remove(curr)
|
||||
for p in pfunc(curr):
|
||||
if p == nullrev:
|
||||
pass
|
||||
elif p in othervisit or p in bothvisit:
|
||||
# p is implicitly in thisvisit. This means p is or should be
|
||||
# in bothvisit
|
||||
revsvisit.discard(p)
|
||||
basesvisit.discard(p)
|
||||
bothvisit.add(p)
|
||||
else:
|
||||
# visit later
|
||||
thisvisit.add(p)
|
||||
|
||||
missing.reverse()
|
||||
return missing
|
||||
|
Loading…
Reference in New Issue
Block a user