git_handler: terminate new commit DAG traversal at known commits

Any commit in _map_git is already known, so there's no point walking further
down the DAG.

For a repo with over 50,000 commits, this brings down a no-op hg pull from 38
seconds to 2.5.
This commit is contained in:
Siddharth Agarwal 2014-02-18 20:30:27 -08:00
parent 6f79df86d2
commit 7d37b2a516

View File

@ -616,7 +616,7 @@ class GitHandler(object):
seen = set(todo)
while todo:
sha = todo[-1]
if sha in done:
if sha in done or sha in self._map_git:
todo.pop()
continue
assert isinstance(sha, str)
@ -627,7 +627,7 @@ class GitHandler(object):
convert_list[sha] = obj
assert isinstance(obj, Commit)
for p in obj.parents:
if p not in done:
if p not in done and p not in self._map_git:
todo.append(p)
# process parents of a commit before processing the
# commit itself, and come back to this commit later
@ -637,7 +637,7 @@ class GitHandler(object):
done.add(sha)
todo.pop()
return convert_list, [commit for commit in commits if not commit in self._map_git]
return convert_list, commits
def import_git_objects(self, remote_name=None, refs=None):
convert_list, commits = self.getnewgitcommits(refs)