mirror of
https://github.com/facebook/sapling.git
synced 2024-10-11 17:27:53 +03:00
68030f1dd7
Summary: During a repack we often want to access the ancestory for a bunch of nodes that might be ancestors of each other. Using getancestors for that results in a lot of duplicated work. For instance, getancestors(0) returns [0], getancestors(1) returns [0, 1], getancestors(2) returns [0, 1, 2], etc. Which is n^2 This patch adds an optional `known` argument for getancestors that let's the caller tell getancestors what ancestors it's already aware of. Then getancestors can short circuit when it reaches that level. This avoids duplicate work during repack. Test Plan: Ran treemanifest repack in our large repo and verified it made progress over the nodes much faster than before Reviewers: #mercurial, quark Reviewed By: quark Subscribers: mjpieters Differential Revision: https://phabricator.intern.facebook.com/D4901308 Signature: t1:4901308:1492640896:27d4a90c2993cd1fefbd8dbc211f2ec181178bce
129 lines
4.4 KiB
Python
129 lines
4.4 KiB
Python
import basestore, shallowutil
|
|
from mercurial.node import hex, nullid
|
|
|
|
class unionmetadatastore(object):
|
|
def __init__(self, *args, **kwargs):
|
|
self.stores = args
|
|
self.writestore = kwargs.get('writestore')
|
|
|
|
# If allowincomplete==True then the union store can return partial
|
|
# ancestor lists, otherwise it will throw a KeyError if a full
|
|
# history can't be found.
|
|
self.allowincomplete = kwargs.get('allowincomplete', False)
|
|
|
|
def getancestors(self, name, node, known=None):
|
|
"""Returns as many ancestors as we're aware of.
|
|
|
|
return value: {
|
|
node: (p1, p2, linknode, copyfrom),
|
|
...
|
|
}
|
|
"""
|
|
if known is None:
|
|
known = set()
|
|
if node in known:
|
|
return []
|
|
|
|
ancestors = {}
|
|
def traverse(curname, curnode):
|
|
# TODO: this algorithm has the potential to traverse parts of
|
|
# history twice. Ex: with A->B->C->F and A->B->D->F, both D and C
|
|
# may be queued as missing, then B and A are traversed for both.
|
|
queue = [(curname, curnode)]
|
|
missing = []
|
|
seen = set()
|
|
while queue:
|
|
name, node = queue.pop()
|
|
if (name, node) in seen:
|
|
continue
|
|
seen.add((name, node))
|
|
value = ancestors.get(node)
|
|
if not value:
|
|
missing.append((name, node))
|
|
continue
|
|
p1, p2, linknode, copyfrom = value
|
|
if p1 != nullid and p1 not in known:
|
|
queue.append((copyfrom or curname, p1))
|
|
if p2 != nullid and p2 not in known:
|
|
queue.append((curname, p2))
|
|
return missing
|
|
|
|
missing = [(name, node)]
|
|
while missing:
|
|
curname, curnode = missing.pop()
|
|
try:
|
|
ancestors.update(self._getpartialancestors(curname, curnode,
|
|
known=known))
|
|
newmissing = traverse(curname, curnode)
|
|
missing.extend(newmissing)
|
|
except KeyError:
|
|
# If we allow incomplete histories, don't throw.
|
|
if not self.allowincomplete:
|
|
raise
|
|
# If the requested name+node doesn't exist, always throw.
|
|
if (curname, curnode) == (name, node):
|
|
raise
|
|
|
|
# TODO: ancestors should probably be (name, node) -> (value)
|
|
return ancestors
|
|
|
|
def _getpartialancestors(self, name, node, known=None):
|
|
for store in self.stores:
|
|
try:
|
|
return store.getancestors(name, node, known=known)
|
|
except KeyError:
|
|
pass
|
|
|
|
raise KeyError((name, node))
|
|
|
|
def add(self, name, node, data):
|
|
raise RuntimeError("cannot add content only to remotefilelog "
|
|
"contentstore")
|
|
|
|
def getmissing(self, keys):
|
|
missing = keys
|
|
for store in self.stores:
|
|
if missing:
|
|
missing = store.getmissing(missing)
|
|
return missing
|
|
|
|
def markledger(self, ledger):
|
|
for store in self.stores:
|
|
store.markledger(ledger)
|
|
|
|
class remotefilelogmetadatastore(basestore.basestore):
|
|
def getancestors(self, name, node, known=None):
|
|
"""Returns as many ancestors as we're aware of.
|
|
|
|
return value: {
|
|
node: (p1, p2, linknode, copyfrom),
|
|
...
|
|
}
|
|
"""
|
|
data = self._getdata(name, node)
|
|
ancestors = shallowutil.ancestormap(data)
|
|
return ancestors
|
|
|
|
def add(self, name, node, parents, linknode):
|
|
raise RuntimeError("cannot add metadata only to remotefilelog "
|
|
"metadatastore")
|
|
|
|
class remotemetadatastore(object):
|
|
def __init__(self, ui, fileservice, shared):
|
|
self._fileservice = fileservice
|
|
self._shared = shared
|
|
|
|
def getancestors(self, name, node, known=None):
|
|
self._fileservice.prefetch([(name, hex(node))], force=True,
|
|
fetchdata=False, fetchhistory=True)
|
|
return self._shared.getancestors(name, node, known=known)
|
|
|
|
def add(self, name, node, data):
|
|
raise RuntimeError("cannot add to a remote store")
|
|
|
|
def getmissing(self, keys):
|
|
return keys
|
|
|
|
def markledger(self, ledger):
|
|
pass
|