From 27bf2140b31da95e6e12d9e46b68362effec134a Mon Sep 17 00:00:00 2001 From: Siddharth Agarwal Date: Tue, 14 Oct 2014 15:54:58 -0700 Subject: [PATCH] git_handler: move get_git_incoming to a separate module git_handler's getting far too big, and we're going to add some more git-to-hg specific logic soon, so now seems like a good time to add a new module. --- hggit/git2hg.py | 66 ++++++++++++++++++++++++++++++++++++++++++++ hggit/git_handler.py | 59 ++------------------------------------- 2 files changed, 68 insertions(+), 57 deletions(-) create mode 100644 hggit/git2hg.py diff --git a/hggit/git2hg.py b/hggit/git2hg.py new file mode 100644 index 0000000000..edba5d6276 --- /dev/null +++ b/hggit/git2hg.py @@ -0,0 +1,66 @@ +# git2hg.py - convert Git repositories and commits to Mercurial ones + +from dulwich.objects import Commit, Tag + +def find_incoming(git_object_store, git_map, refs): + '''find what commits need to be imported + + git_object_store is a dulwich object store. + git_map is a map with keys being Git commits that have already been imported + refs is a map of refs to SHAs that we're interested in.''' + + # import heads and fetched tags as remote references + todo = [] + done = set() + commit_cache = {} + + # get a list of all the head shas + seenheads = set() + if refs: + for sha in refs.itervalues(): + # refs contains all the refs in the server, not just the ones + # we are pulling + if sha in git_object_store: + obj = git_object_store[sha] + while isinstance(obj, Tag): + obj_type, sha = obj.object + obj = git_object_store[sha] + if isinstance (obj, Commit) and sha not in seenheads: + seenheads.add(sha) + todo.append(sha) + + # sort by commit date + def commitdate(sha): + obj = git_object_store[sha] + return obj.commit_time-obj.commit_timezone + + todo.sort(key=commitdate, reverse=True) + + # traverse the heads getting a list of all the unique commits in + # topological order + commits = [] + seen = set(todo) + while todo: + sha = todo[-1] + if sha in done or sha in git_map: + todo.pop() + continue + assert isinstance(sha, str) + if sha in commit_cache: + obj = commit_cache[sha] + else: + obj = git_object_store[sha] + commit_cache[sha] = obj + assert isinstance(obj, Commit) + for p in obj.parents: + if p not in done and p not in git_map: + todo.append(p) + # process parents of a commit before processing the + # commit itself, and come back to this commit later + break + else: + commits.append(sha) + done.add(sha) + todo.pop() + + return commit_cache, commits diff --git a/hggit/git_handler.py b/hggit/git_handler.py index 55c4962497..3fc2105af3 100644 --- a/hggit/git_handler.py +++ b/hggit/git_handler.py @@ -26,6 +26,7 @@ from mercurial import error from mercurial import url import _ssh +import git2hg import hg2git import util from overlay import overlayrepo @@ -663,63 +664,7 @@ class GitHandler(object): def get_git_incoming(self, refs): if refs is None: refs = self.git.refs.as_dict() - git_object_store = self.git.object_store - - # import heads and fetched tags as remote references - todo = [] - done = set() - commit_cache = {} - - # get a list of all the head shas - seenheads = set() - if refs: - for sha in refs.itervalues(): - # refs contains all the refs in the server, not just the ones - # we are pulling - if sha in git_object_store: - obj = git_object_store[sha] - while isinstance(obj, Tag): - obj_type, sha = obj.object - obj = git_object_store[sha] - if isinstance (obj, Commit) and sha not in seenheads: - seenheads.add(sha) - todo.append(sha) - - # sort by commit date - def commitdate(sha): - obj = git_object_store[sha] - return obj.commit_time-obj.commit_timezone - - todo.sort(key=commitdate, reverse=True) - - # traverse the heads getting a list of all the unique commits in - # topological order - commits = [] - seen = set(todo) - while todo: - sha = todo[-1] - if sha in done or sha in self._map_git: - todo.pop() - continue - assert isinstance(sha, str) - if sha in commit_cache: - obj = commit_cache[sha] - else: - obj = git_object_store[sha] - commit_cache[sha] = obj - assert isinstance(obj, Commit) - for p in obj.parents: - if p not in done and p not in self._map_git: - todo.append(p) - # process parents of a commit before processing the - # commit itself, and come back to this commit later - break - else: - commits.append(sha) - done.add(sha) - todo.pop() - - return commit_cache, commits + return git2hg.find_incoming(self.git.object_store, self._map_git, refs) def import_git_objects(self, remote_name=None, refs=None): commit_cache, commits = self.get_git_incoming(refs)