2014-10-15 02:54:58 +04:00
|
|
|
# git2hg.py - convert Git repositories and commits to Mercurial ones
|
|
|
|
|
|
|
|
from dulwich.objects import Commit, Tag
|
2019-01-30 03:25:33 +03:00
|
|
|
from edenscm.mercurial import util
|
|
|
|
from edenscm.mercurial.node import bin
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2014-10-15 02:54:58 +04:00
|
|
|
|
|
|
|
def find_incoming(git_object_store, git_map, refs):
|
2018-05-30 12:16:33 +03:00
|
|
|
"""find what commits need to be imported
|
2014-10-15 02:54:58 +04:00
|
|
|
|
|
|
|
git_object_store is a dulwich object store.
|
|
|
|
git_map is a map with keys being Git commits that have already been imported
|
2018-05-30 12:16:33 +03:00
|
|
|
refs is a map of refs to SHAs that we're interested in."""
|
2014-10-15 02:54:58 +04:00
|
|
|
|
|
|
|
done = set()
|
|
|
|
commit_cache = {}
|
|
|
|
|
|
|
|
# sort by commit date
|
|
|
|
def commitdate(sha):
|
|
|
|
obj = git_object_store[sha]
|
2018-01-09 16:53:20 +03:00
|
|
|
return obj.commit_time - obj.commit_timezone
|
2014-10-15 02:54:58 +04:00
|
|
|
|
2014-10-15 23:22:03 +04:00
|
|
|
# get a list of all the head shas
|
|
|
|
def get_heads(refs):
|
|
|
|
todo = []
|
|
|
|
seenheads = set()
|
2015-05-15 22:57:52 +03:00
|
|
|
for ref, sha in refs.iteritems():
|
2014-10-15 23:22:03 +04:00
|
|
|
# refs could contain refs on the server that we haven't pulled down
|
2015-05-15 22:57:52 +03:00
|
|
|
# the objects for; also make sure it's a sha and not a symref
|
2018-05-30 12:16:33 +03:00
|
|
|
if ref != "HEAD" and sha in git_object_store:
|
2014-10-15 23:22:03 +04:00
|
|
|
obj = git_object_store[sha]
|
|
|
|
while isinstance(obj, Tag):
|
|
|
|
obj_type, sha = obj.object
|
|
|
|
obj = git_object_store[sha]
|
|
|
|
if isinstance(obj, Commit) and sha not in seenheads:
|
|
|
|
seenheads.add(sha)
|
|
|
|
todo.append(sha)
|
|
|
|
|
|
|
|
todo.sort(key=commitdate, reverse=True)
|
|
|
|
return todo
|
|
|
|
|
2014-10-16 01:21:09 +04:00
|
|
|
def get_unseen_commits(todo):
|
2018-05-30 12:16:33 +03:00
|
|
|
"""get all unseen commits reachable from todo in topological order
|
2014-10-16 01:21:09 +04:00
|
|
|
|
|
|
|
'unseen' means not reachable from the done set and not in the git map.
|
2018-05-30 12:16:33 +03:00
|
|
|
Mutates todo and the done set in the process."""
|
2014-10-16 01:21:09 +04:00
|
|
|
commits = []
|
|
|
|
while todo:
|
|
|
|
sha = todo[-1]
|
2018-12-06 22:15:51 +03:00
|
|
|
if sha in done or git_map.lookupbyfirst(bin(sha)) is not None:
|
2014-10-16 01:21:09 +04:00
|
|
|
todo.pop()
|
|
|
|
continue
|
|
|
|
assert isinstance(sha, str)
|
|
|
|
if sha in commit_cache:
|
|
|
|
obj = commit_cache[sha]
|
|
|
|
else:
|
|
|
|
obj = git_object_store[sha]
|
|
|
|
commit_cache[sha] = obj
|
|
|
|
assert isinstance(obj, Commit)
|
|
|
|
for p in obj.parents:
|
2018-12-06 22:15:51 +03:00
|
|
|
if p not in done and git_map.lookupbyfirst(bin(p)) is None:
|
2014-10-16 01:21:09 +04:00
|
|
|
todo.append(p)
|
|
|
|
# process parents of a commit before processing the
|
|
|
|
# commit itself, and come back to this commit later
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
commits.append(sha)
|
|
|
|
done.add(sha)
|
|
|
|
todo.pop()
|
2014-10-15 02:54:58 +04:00
|
|
|
|
2014-10-16 01:21:09 +04:00
|
|
|
return commits
|
|
|
|
|
|
|
|
todo = get_heads(refs)
|
|
|
|
commits = get_unseen_commits(todo)
|
2014-10-15 02:54:58 +04:00
|
|
|
|
2014-10-15 03:35:37 +04:00
|
|
|
return GitIncomingResult(commits, commit_cache)
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2014-10-15 03:35:37 +04:00
|
|
|
class GitIncomingResult(object):
|
2018-05-30 12:16:33 +03:00
|
|
|
"""struct to store result from find_incoming"""
|
|
|
|
|
2014-10-15 03:35:37 +04:00
|
|
|
def __init__(self, commits, commit_cache):
|
|
|
|
self.commits = commits
|
|
|
|
self.commit_cache = commit_cache
|
2014-10-16 03:54:50 +04:00
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
|
2014-10-16 03:54:50 +04:00
|
|
|
def extract_hg_metadata(message, git_extra):
|
|
|
|
split = message.split("\n--HG--\n", 1)
|
2014-12-02 07:16:24 +03:00
|
|
|
# Renames are explicitly stored in Mercurial but inferred in Git. For
|
|
|
|
# commits that originated in Git we'd like to optionally infer rename
|
|
|
|
# information to store in Mercurial, but for commits that originated in
|
|
|
|
# Mercurial we'd like to disable this. How do we tell whether the commit
|
2014-12-05 12:07:41 +03:00
|
|
|
# originated in Mercurial or in Git? We rely on the presence of extra hg-git
|
|
|
|
# fields in the Git commit.
|
|
|
|
# - Commits exported by hg-git versions past 0.7.0 always store at least one
|
|
|
|
# hg-git field.
|
|
|
|
# - For commits exported by hg-git versions before 0.7.0, this becomes a
|
2018-01-09 16:53:20 +03:00
|
|
|
# heuristic: if the commit has any extra hg fields, it definitely
|
|
|
|
# originated in Mercurial. If the commit doesn't, we aren't really sure.
|
2014-12-05 12:07:41 +03:00
|
|
|
# If we think the commit originated in Mercurial, we set renames to a
|
|
|
|
# dict. If we don't, we set renames to None. Callers can then determine
|
|
|
|
# whether to infer rename information.
|
2014-12-02 07:16:24 +03:00
|
|
|
renames = None
|
2014-10-16 03:54:50 +04:00
|
|
|
extra = {}
|
2014-10-16 04:33:54 +04:00
|
|
|
branch = None
|
2014-10-16 03:54:50 +04:00
|
|
|
if len(split) == 2:
|
2014-12-02 07:16:24 +03:00
|
|
|
renames = {}
|
2014-10-16 03:54:50 +04:00
|
|
|
message, meta = split
|
|
|
|
lines = meta.split("\n")
|
|
|
|
for line in lines:
|
2018-05-30 12:16:33 +03:00
|
|
|
if line == "":
|
2014-10-16 03:54:50 +04:00
|
|
|
continue
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
if " : " not in line:
|
2014-10-16 03:54:50 +04:00
|
|
|
break
|
|
|
|
command, data = line.split(" : ", 1)
|
|
|
|
|
2018-05-30 12:16:33 +03:00
|
|
|
if command == "rename":
|
2014-10-16 03:54:50 +04:00
|
|
|
before, after = data.split(" => ", 1)
|
|
|
|
renames[after] = before
|
2018-05-30 12:16:33 +03:00
|
|
|
if command == "branch":
|
2014-10-16 03:54:50 +04:00
|
|
|
branch = data
|
2018-05-30 12:16:33 +03:00
|
|
|
if command == "extra":
|
2014-10-16 03:54:50 +04:00
|
|
|
k, v = data.split(" : ", 1)
|
2018-01-09 16:53:20 +03:00
|
|
|
extra[k] = util.urlreq.unquote(v)
|
2014-10-16 03:54:50 +04:00
|
|
|
|
|
|
|
git_fn = 0
|
|
|
|
for field, data in git_extra:
|
2018-05-30 12:16:33 +03:00
|
|
|
if field.startswith("HG:"):
|
2014-12-02 07:16:24 +03:00
|
|
|
if renames is None:
|
|
|
|
renames = {}
|
2014-10-16 03:54:50 +04:00
|
|
|
command = field[3:]
|
2018-05-30 12:16:33 +03:00
|
|
|
if command == "rename":
|
|
|
|
before, after = data.split(":", 1)
|
|
|
|
renames[util.urlreq.unquote(after)] = util.urlreq.unquote(before)
|
|
|
|
elif command == "extra":
|
|
|
|
k, v = data.split(":", 1)
|
2018-01-09 16:53:20 +03:00
|
|
|
extra[util.urlreq.unquote(k)] = util.urlreq.unquote(v)
|
2014-10-16 03:54:50 +04:00
|
|
|
else:
|
|
|
|
# preserve ordering in Git by using an incrementing integer for
|
|
|
|
# each field. Note that extra metadata in Git is an ordered list
|
|
|
|
# of pairs.
|
2018-05-30 12:16:33 +03:00
|
|
|
hg_field = "GIT%d-%s" % (git_fn, field)
|
2014-10-16 03:54:50 +04:00
|
|
|
git_fn += 1
|
2018-01-09 16:53:20 +03:00
|
|
|
extra[util.urlreq.quote(hg_field)] = util.urlreq.quote(data)
|
2014-10-16 03:54:50 +04:00
|
|
|
|
|
|
|
return (message, renames, branch, extra)
|