remotefilelog: move getancestors into remotefilelog

Summary:
The getancestors call is a _very_ expansive at it requires walking all the
ancestors of the given node, which potentially requires O(N) network round
trips. Since we want to discourage/remove such behavior, let's move it out of
the store layer, and move it one layer up. The handful of places that calls
into ancestormap would need to be optimized to not call it later.

Reviewed By: DurhamG

Differential Revision: D17946852

fbshipit-source-id: 93eb7873b685ee88f8af5d4ceca500738d779396
This commit is contained in:
Xavier Deguillard 2019-11-07 08:49:58 -08:00 committed by Facebook Github Bot
parent d47be1f2fa
commit 230af20935
3 changed files with 65 additions and 100 deletions

View File

@ -12,70 +12,9 @@ from . import shallowutil
class unionmetadatastore(object):
def __init__(self, *args, **kwargs):
def __init__(self, *args):
self.stores = list(args)
# If allowincomplete==True then the union store can return partial
# ancestor lists, otherwise it will throw a KeyError if a full
# history can't be found.
self.allowincomplete = kwargs.get("allowincomplete", False)
def getancestors(self, name, node, known=None):
"""Returns as many ancestors as we're aware of.
return value: {
node: (p1, p2, linknode, copyfrom),
...
}
"""
if known is None:
known = set()
if node in known:
return []
ancestors = {}
def traverse(curname, curnode):
# TODO: this algorithm has the potential to traverse parts of
# history twice. Ex: with A->B->C->F and A->B->D->F, both D and C
# may be queued as missing, then B and A are traversed for both.
queue = [(curname, curnode)]
missing = []
seen = set()
while queue:
name, node = queue.pop()
if (name, node) in seen:
continue
seen.add((name, node))
value = ancestors.get(node)
if not value:
missing.append((name, node))
continue
p1, p2, linknode, copyfrom = value
if p1 != nullid and p1 not in known:
queue.append((copyfrom or name, p1))
if p2 != nullid and p2 not in known:
queue.append((name, p2))
return missing
missing = [(name, node)]
while missing:
curname, curnode = missing.pop()
try:
ancestors.update({curnode: self.getnodeinfo(curname, curnode)})
newmissing = traverse(curname, curnode)
missing.extend(newmissing)
except KeyError:
# If we allow incomplete histories, don't throw.
if not self.allowincomplete:
raise
# If the requested name+node doesn't exist, always throw.
if (curname, curnode) == (name, node):
raise
# TODO: ancestors should probably be (name, node) -> (value)
return ancestors
def getnodeinfo(self, name, node):
for store in self.stores:
try:

View File

@ -312,8 +312,64 @@ class remotefilelog(object):
validatehash = validatehash and vhash
return text, validatehash
def _getancestors(self, node):
"""Returns as many ancestors as we're aware of.
return value: {
node: (p1, p2, linknode, copyfrom),
...
}
This is a very expansive operation as it requires the entire history
for the node, potentially requiring O(N) server roundtrips.
"""
known = set()
ancestors = {}
def traverse(curname, curnode):
# TODO: this algorithm has the potential to traverse parts of
# history twice. Ex: with A->B->C->F and A->B->D->F, both D and C
# may be queued as missing, then B and A are traversed for both.
queue = [(curname, curnode)]
missing = []
seen = set()
while queue:
name, node = queue.pop()
if (name, node) in seen:
continue
seen.add((name, node))
value = ancestors.get(node)
if not value:
missing.append((name, node))
continue
p1, p2, linknode, copyfrom = value
if p1 != nullid and p1 not in known:
queue.append((copyfrom or name, p1))
if p2 != nullid and p2 not in known:
queue.append((name, p2))
return missing
missing = [(self.filename, node)]
while missing:
curname, curnode = missing.pop()
try:
ancestors.update(
{
curnode: self.repo.fileslog.metadatastore.getnodeinfo(
curname, curnode
)
}
)
newmissing = traverse(curname, curnode)
missing.extend(newmissing)
except KeyError:
raise
# TODO: ancestors should probably be (name, node) -> (value)
return ancestors
def ancestormap(self, node):
return self.repo.fileslog.metadatastore.getancestors(self.filename, node)
return self._getancestors(node)
def getnodeinfo(self, node):
return self.repo.fileslog.metadatastore.getnodeinfo(self.filename, node)
@ -503,9 +559,7 @@ class remotefileslog(filelog.fileslog):
os.umask(mask)
sunioncontentstore = unioncontentstore(*sharedcontentstores)
sunionmetadatastore = unionmetadatastore(
*sharedmetadatastores, allowincomplete=True
)
sunionmetadatastore = unionmetadatastore(*sharedmetadatastores)
remotecontent, remotemetadata = self.makeremotestores(
sunioncontentstore, sunionmetadatastore
)

View File

@ -115,32 +115,6 @@ class histpacktestsbase(object):
self.assertEquals(linknode, actual[2])
self.assertEquals(copyfrom, actual[3])
def testAddAncestorChain(self):
"""Test putting multiple revisions in into a pack and read the ancestor
chain.
"""
revisions = []
filename = "foo"
lastnode = nullid
for i in range(10):
node = self.getFakeHash()
revisions.append((filename, node, lastnode, nullid, nullid, None))
lastnode = node
# revisions must be added in topological order, newest first
revisions = list(reversed(revisions))
pack = self.createPack(revisions)
store = unionmetadatastore(pack)
# Test that the chain has all the entries
ancestors = store.getancestors(revisions[0][0], revisions[0][1], known=None)
for filename, node, p1, p2, linknode, copyfrom in revisions:
ap1, ap2, alinknode, acopyfrom = ancestors[node]
self.assertEquals(ap1, p1)
self.assertEquals(ap2, p2)
self.assertEquals(alinknode, linknode)
self.assertEquals(acopyfrom, copyfrom)
def testPackMany(self):
"""Pack many related and unrelated ancestors.
"""
@ -175,14 +149,12 @@ class histpacktestsbase(object):
# Verify the pack contents
for (filename, node), (p1, p2, lastnode) in allentries.iteritems():
ancestors = store.getancestors(filename, node, known=None)
self.assertEquals(ancestorcounts[(filename, node)], len(ancestors))
for anode, (ap1, ap2, alinknode, copyfrom) in ancestors.iteritems():
ep1, ep2, elinknode = allentries[(filename, anode)]
self.assertEquals(ap1, ep1)
self.assertEquals(ap2, ep2)
self.assertEquals(alinknode, elinknode)
self.assertEquals(copyfrom, None)
ap1, ap2, alinknode, acopyfrom = store.getnodeinfo(filename, node)
ep1, ep2, elinknode = allentries[(filename, node)]
self.assertEquals(ap1, ep1)
self.assertEquals(ap2, ep2)
self.assertEquals(alinknode, elinknode)
self.assertEquals(acopyfrom, None)
def testGetNodeInfo(self):
revisions = []