treemanifest: add known argument to getancestor api

Summary:
During a repack we often want to access the ancestory for a bunch of nodes that
might be ancestors of each other. Using getancestors for that results in a lot
of duplicated work. For instance, getancestors(0) returns [0], getancestors(1)
returns [0, 1], getancestors(2) returns [0, 1, 2], etc. Which is n^2

This patch adds an optional `known` argument for getancestors that let's the
caller tell getancestors what ancestors it's already aware of. Then getancestors
can short circuit when it reaches that level. This avoids duplicate work during
repack.

Test Plan:
Ran treemanifest repack in our large repo and verified it made
progress over the nodes much faster than before

Reviewers: #mercurial, quark

Reviewed By: quark

Subscribers: mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D4901308

Signature: t1:4901308:1492640896:27d4a90c2993cd1fefbd8dbc211f2ec181178bce
This commit is contained in:
Durham Goode 2017-04-19 21:14:04 -07:00
parent dae50fc99e
commit 68030f1dd7
4 changed files with 60 additions and 23 deletions

View File

@ -185,14 +185,29 @@ class manifestrevlogstore(object):
revision = self.get(name, node)
return [(name, node, None, nullid, revision)]
def getancestors(self, name, node):
def getancestors(self, name, node, known=None):
if known is None:
known = set()
if node in known:
return []
rl = self._revlog(name)
ancestors = {}
missing = set((node,))
for ancrev in rl.ancestors([rl.rev(node)], inclusive=True):
ancnode = rl.node(ancrev)
missing.discard(ancnode)
p1, p2 = rl.parents(ancnode)
if p1 != nullid and p1 not in known:
missing.add(p1)
if p2 != nullid and p2 not in known:
missing.add(p2)
linknode = self._cl.node(rl.linkrev(ancrev))
ancestors[rl.node(ancrev)] = (p1, p2, linknode, '')
if not missing:
break
return ancestors
def add(self, *args):

View File

@ -1,6 +1,6 @@
import hashlib, struct
from mercurial import util
from mercurial.node import hex
from mercurial.node import hex, nullid
import basepack, constants
# (filename hash, offset, size)
@ -30,16 +30,16 @@ class historypackstore(basepack.basepackstore):
def getpack(self, path):
return historypack(path)
def getancestors(self, name, node):
def getancestors(self, name, node, known=None):
for pack in self.packs:
try:
return pack.getancestors(name, node)
return pack.getancestors(name, node, known=known)
except KeyError:
pass
for pack in self.refresh():
try:
return pack.getancestors(name, node)
return pack.getancestors(name, node, known=known)
except KeyError:
pass
@ -64,7 +64,7 @@ class historypack(basepack.basepack):
return missing
def getancestors(self, name, node):
def getancestors(self, name, node, known=None):
"""Returns as many ancestors as we're aware of.
return value: {
@ -72,12 +72,19 @@ class historypack(basepack.basepack):
...
}
"""
if known is None:
known = set()
if node in known:
return []
filename, offset, size = self._findsection(name)
ancestors = set((node,))
pending = set((node,))
data = self._data[offset:offset + size]
results = {}
o = 0
while o < len(data):
if not pending:
break
entry = struct.unpack(PACKFORMAT, data[o:o + PACKENTRYLENGTH])
o += PACKENTRYLENGTH
copyfrom = None
@ -86,11 +93,18 @@ class historypack(basepack.basepack):
copyfrom = data[o:o + copyfromlen]
o += copyfromlen
if entry[ANC_NODE] in ancestors:
ancestors.add(entry[ANC_P1NODE])
ancestors.add(entry[ANC_P2NODE])
result = (entry[ANC_P1NODE],
entry[ANC_P2NODE],
ancnode = entry[ANC_NODE]
if ancnode in pending:
pending.remove(ancnode)
p1node = entry[ANC_P1NODE]
p2node = entry[ANC_P2NODE]
if p1node != nullid and p1node not in known:
pending.add(p1node)
if p2node != nullid and p2node not in known:
pending.add(p2node)
result = (p1node,
p2node,
entry[ANC_LINKNODE],
copyfrom)
results[entry[ANC_NODE]] = result

View File

@ -11,7 +11,7 @@ class unionmetadatastore(object):
# history can't be found.
self.allowincomplete = kwargs.get('allowincomplete', False)
def getancestors(self, name, node):
def getancestors(self, name, node, known=None):
"""Returns as many ancestors as we're aware of.
return value: {
@ -19,6 +19,11 @@ class unionmetadatastore(object):
...
}
"""
if known is None:
known = set()
if node in known:
return []
ancestors = {}
def traverse(curname, curnode):
# TODO: this algorithm has the potential to traverse parts of
@ -37,9 +42,9 @@ class unionmetadatastore(object):
missing.append((name, node))
continue
p1, p2, linknode, copyfrom = value
if p1 != nullid:
if p1 != nullid and p1 not in known:
queue.append((copyfrom or curname, p1))
if p2 != nullid:
if p2 != nullid and p2 not in known:
queue.append((curname, p2))
return missing
@ -47,7 +52,8 @@ class unionmetadatastore(object):
while missing:
curname, curnode = missing.pop()
try:
ancestors.update(self._getpartialancestors(curname, curnode))
ancestors.update(self._getpartialancestors(curname, curnode,
known=known))
newmissing = traverse(curname, curnode)
missing.extend(newmissing)
except KeyError:
@ -61,10 +67,10 @@ class unionmetadatastore(object):
# TODO: ancestors should probably be (name, node) -> (value)
return ancestors
def _getpartialancestors(self, name, node):
def _getpartialancestors(self, name, node, known=None):
for store in self.stores:
try:
return store.getancestors(name, node)
return store.getancestors(name, node, known=known)
except KeyError:
pass
@ -86,7 +92,7 @@ class unionmetadatastore(object):
store.markledger(ledger)
class remotefilelogmetadatastore(basestore.basestore):
def getancestors(self, name, node):
def getancestors(self, name, node, known=None):
"""Returns as many ancestors as we're aware of.
return value: {
@ -107,10 +113,10 @@ class remotemetadatastore(object):
self._fileservice = fileservice
self._shared = shared
def getancestors(self, name, node):
def getancestors(self, name, node, known=None):
self._fileservice.prefetch([(name, hex(node))], force=True,
fetchdata=False, fetchhistory=True)
return self._shared.getancestors(name, node)
return self._shared.getancestors(name, node, known=known)
def add(self, name, node, data):
raise RuntimeError("cannot add to a remote store")

View File

@ -290,7 +290,8 @@ class repacker(object):
if node in ancestors:
continue
try:
ancestors.update(self.history.getancestors(filename, node))
ancestors.update(self.history.getancestors(filename, node,
known=ancestors))
except KeyError:
# Since we're packing data entries, we may not have the
# corresponding history entries for them. It's not a big
@ -376,7 +377,8 @@ class repacker(object):
for node in nodes:
if node in ancestors:
continue
ancestors.update(self.history.getancestors(filename, node))
ancestors.update(self.history.getancestors(filename, node,
known=ancestors))
# Order the nodes children first
orderednodes = reversed(self._toposort(ancestors))