sapling/git_handler.py

604 lines
20 KiB
Python
Raw Normal View History

2009-04-25 01:05:50 +04:00
import os, errno, sys, time, datetime, pickle, copy
import dulwich
from dulwich.repo import Repo
from dulwich.client import SimpleFetchGraphWalker
from hgext import bookmarks
from mercurial.i18n import _
from mercurial.node import bin, hex, nullid
from mercurial import hg, util, context, error
from dulwich.objects import (
Blob,
Commit,
ShaFile,
Tag,
Tree,
hex_to_sha
)
2009-04-29 06:33:03 +04:00
import math
def seconds_to_offset(time):
hours = (float(time) / 60 / 60)
hour_diff = math.fmod(time, 60)
minutes = int(hour_diff)
hours = int(math.floor(hours))
if hours > 12:
sign = '+'
hours = 12 - (hours - 12)
else:
sign = '-'
return sign + str(hours).rjust(2, '0') + str(minutes).rjust(2, '0')
def offset_to_seconds(offset):
if len(offset) == 5:
sign = offset[0:1]
hours = int(offset[1:3])
minutes = int(offset[3:5])
if sign == '+':
hours = 12 + (12 - hours)
return (hours * 60 * 60) + (minutes) * 60
else:
return 0
class GitHandler(object):
def __init__(self, dest_repo, ui):
self.repo = dest_repo
self.ui = ui
2009-04-27 04:23:06 +04:00
self.init_if_missing()
2009-04-25 01:05:50 +04:00
self.load_git()
self.load_map()
self.load_config()
# make the git data directory
2009-04-27 04:23:06 +04:00
def init_if_missing(self):
git_hg_path = os.path.join(self.repo.path, 'git')
if not os.path.exists(git_hg_path):
os.mkdir(git_hg_path)
dulwich.repo.Repo.init_bare(git_hg_path)
2009-04-25 01:05:50 +04:00
def load_git(self):
git_dir = os.path.join(self.repo.path, 'git')
self.git = Repo(git_dir)
2009-04-25 01:05:50 +04:00
## FILE LOAD AND SAVE METHODS
def map_set(self, gitsha, hgsha):
self._map_git[gitsha] = hgsha
self._map_hg[hgsha] = gitsha
def map_hg_get(self, gitsha):
if gitsha in self._map_git:
return self._map_git[gitsha]
else:
return None
def map_git_get(self, hgsha):
if hgsha in self._map_hg:
return self._map_hg[hgsha]
else:
return None
2009-04-29 06:33:03 +04:00
2009-04-25 01:05:50 +04:00
def load_map(self):
self._map_git = {}
self._map_hg = {}
2009-04-25 01:05:50 +04:00
if os.path.exists(self.repo.join('git-mapfile')):
for line in self.repo.opener('git-mapfile'):
gitsha, hgsha = line.strip().split(' ', 1)
self._map_git[gitsha] = hgsha
self._map_hg[hgsha] = gitsha
2009-04-25 01:05:50 +04:00
def save_map(self):
file = self.repo.opener('git-mapfile', 'w+')
for gitsha, hgsha in self._map_git.iteritems():
2009-04-25 01:05:50 +04:00
file.write("%s %s\n" % (gitsha, hgsha))
file.close()
def load_config(self):
self._config = {}
if os.path.exists(self.repo.join('git-config')):
for line in self.repo.opener('git-config'):
key, value = line.strip().split(' ', 1)
self._config[key] = value
def save_config(self):
file = self.repo.opener('git-config', 'w+')
for key, value in self._config.iteritems():
file.write("%s %s\n" % (key, value))
file.close()
## END FILE LOAD AND SAVE METHODS
2009-04-25 01:05:50 +04:00
def fetch(self, remote_name):
self.ui.status(_("fetching from : " + remote_name + "\n"))
self.export_git_objects()
self.fetch_pack(remote_name)
self.import_git_objects(remote_name)
2009-04-25 01:05:50 +04:00
self.save_map()
def push(self, remote_name):
self.ui.status(_("pushing to : " + remote_name + "\n"))
self.export_git_objects()
self.update_references()
self.upload_pack(remote_name)
self.save_map()
def remote_add(self, remote_name, git_url):
self._config['remote.' + remote_name + '.url'] = git_url
self.save_config()
2009-04-29 03:36:57 +04:00
def remote_remove(self, remote_name):
key = 'remote.' + remote_name + '.url'
if key in self._config:
del self._config[key]
self.save_config()
def remote_show(self, remote_name):
key = 'remote.' + remote_name + '.url'
if key in self._config:
name = self._config[key]
print "URL for " + remote_name + " : " + name
else:
print "No remote named : " + remote_name
2009-04-29 06:33:03 +04:00
return
2009-04-29 03:36:57 +04:00
def remote_list(self):
for key, value in self._config.iteritems():
if key[0:6] == 'remote':
print key + "\t" + value
2009-04-29 06:33:03 +04:00
def remote_name_to_url(self, remote_name):
return self._config['remote.' + remote_name + '.url']
def update_references(self):
# TODO : if bookmarks exist, add them as git branches
c = self.map_git_get(hex(self.repo.changelog.tip()))
self.git.set_ref('refs/heads/master', c)
2009-04-29 06:33:03 +04:00
def export_git_objects(self):
print "exporting git objects"
for rev in self.repo.changelog:
self.export_hg_commit(rev)
2009-04-29 06:33:03 +04:00
# convert this commit into git objects
# go through the manifest, convert all blobs/trees we don't have
# write the commit object (with metadata info)
def export_hg_commit(self, rev):
# return if we've already processed this
2009-04-29 06:33:03 +04:00
node = self.repo.changelog.lookup(rev)
phgsha = hex(node)
pgit_sha = self.map_git_get(phgsha)
if pgit_sha:
return pgit_sha
2009-04-29 06:33:03 +04:00
print "converting revision " + str(rev)
2009-04-29 06:33:03 +04:00
# make sure parents are converted first
parents = self.repo.parents(rev)
for parent in parents:
p_rev = parent.rev()
hgsha = hex(parent.node())
git_sha = self.map_git_get(hgsha)
if not p_rev == -1:
if not git_sha:
self.export_hg_commit(p_rev)
2009-04-29 06:33:03 +04:00
ctx = self.repo.changectx(rev)
tree_sha = self.write_git_tree(ctx)
2009-04-29 06:33:03 +04:00
# TODO : something with tags?
# TODO : explicit file renaming, copying?
2009-04-29 06:33:03 +04:00
commit = {}
commit['tree'] = tree_sha
(time, timezone) = ctx.date()
2009-04-29 06:33:03 +04:00
commit['author'] = ctx.user() + ' ' + str(int(time)) + ' ' + seconds_to_offset(timezone)
message = ctx.description()
commit['message'] = ctx.description()
commit['message'] += "\n\n--HG--\n"
commit['message'] += "branch : " + ctx.branch() + "\n"
2009-04-29 06:33:03 +04:00
commit['parents'] = []
for parent in parents:
hgsha = hex(parent.node())
git_sha = self.map_git_get(hgsha)
if git_sha:
commit['parents'].append(git_sha)
2009-04-29 06:33:03 +04:00
commit_sha = self.git.write_commit_hash(commit) # writing new blobs to git
self.map_set(commit_sha, phgsha)
return commit_sha
2009-04-29 06:33:03 +04:00
def write_git_tree(self, ctx):
trees = {}
man = ctx.manifest()
2009-04-29 06:33:03 +04:00
for filenm in man.keys():
# write blob if not in our git database
fctx = ctx.filectx(filenm)
is_exec = 'x' in fctx.flags()
is_link = 'l' in fctx.flags()
file_id = hex(fctx.filenode())
blob_sha = self.map_git_get(file_id)
if not blob_sha:
blob_sha = self.git.write_blob(fctx.data()) # writing new blobs to git
self.map_set(blob_sha, file_id)
parts = filenm.split('/')
if len(parts) > 1:
# get filename and path for leading subdir
filepath = parts[-1:][0]
dirpath = "/".join([v for v in parts[0:-1]]) + '/'
# get subdir name and path for parent dir
parentsub = parts[-2:][0]
parentpath = "/".join([v for v in parts[0:-2]]) + '/'
# set file entry
fileentry = ['blob', filepath, blob_sha, is_exec, is_link]
if dirpath not in trees:
trees[dirpath] = []
trees[dirpath].append(fileentry)
# set directory entry
treeentry = ['tree', parentsub + '/', dirpath]
if parentpath not in trees:
trees[parentpath] = []
if treeentry not in trees[parentpath]:
trees[parentpath].append( treeentry )
else:
2009-04-29 06:33:03 +04:00
fileentry = ['blob', parts[0], blob_sha, is_exec, is_link]
if '/' not in trees:
trees['/'] = []
trees['/'].append(fileentry)
# sort by tree depth, so we write the deepest trees first
dirs = trees.keys()
dirs.sort(lambda a, b: len(b.split('/'))-len(a.split('/')))
2009-04-29 04:28:04 +04:00
dirs.remove('/')
dirs.append('/')
2009-04-29 06:33:03 +04:00
# write all the trees
tree_sha = None
tree_shas = {}
for dirnm in dirs:
tree_data = []
for entry in trees[dirnm]:
# replace tree path with tree SHA
if entry[0] == 'tree':
sha = tree_shas[entry[2]]
entry[2] = sha
tree_data.append(entry)
tree_sha = self.git.write_tree_array(tree_data) # writing new trees to git
tree_shas[dirnm] = tree_sha
return tree_sha # should be the last root tree sha
2009-04-29 06:33:03 +04:00
2009-04-27 02:51:05 +04:00
def remote_head(self, remote_name):
for head, sha in self.git.remote_refs(remote_name).iteritems():
if head == 'HEAD':
return self.map_hg_get(sha)
2009-04-27 02:51:05 +04:00
return None
def upload_pack(self, remote_name):
git_url = self.remote_name_to_url(remote_name)
client, path = self.get_transport_and_path(git_url)
changed = self.get_changed_refs
genpack = self.generate_pack_contents
try:
changed_refs = client.send_pack(path, changed, genpack)
new_refs = {}
for old, new, ref in changed_refs:
self.ui.status(" "+ remote_name + "::" + ref + " : GIT:" + old[0:8] + " => GIT:" + new[0:8] + "\n")
new_refs[ref] = new
self.git.set_remote_refs(new_refs, remote_name)
self.update_hg_bookmarks(remote_name)
except:
raise
2009-04-29 04:28:04 +04:00
# TODO : for now, we'll just push all heads that match remote heads
# * we should have specified push, tracking branches and --all
2009-04-29 06:33:03 +04:00
# takes a dict of refs:shas from the server and returns what should be
# pushed up
def get_changed_refs(self, refs):
keys = refs.keys()
2009-04-29 06:33:03 +04:00
changed = []
2009-04-29 06:33:03 +04:00
if not keys:
2009-04-29 04:28:04 +04:00
return None
2009-04-29 06:33:03 +04:00
2009-04-29 04:28:04 +04:00
# TODO : this is a huge hack
if keys[0] == 'capabilities^{}': # nothing on the server yet - first push
changed.append(("0"*40, self.git.ref('master'), 'refs/heads/master'))
2009-04-29 06:33:03 +04:00
for ref_name in keys:
parts = ref_name.split('/')
if parts[0] == 'refs': # strip off 'refs/heads'
if parts[1] == 'heads':
head = "/".join([v for v in parts[2:]])
local_ref = self.git.ref(ref_name)
2009-04-29 06:33:03 +04:00
if local_ref:
if not local_ref == refs[ref_name]:
changed.append((refs[ref_name], local_ref, ref_name))
return changed
2009-04-29 06:33:03 +04:00
# takes a list of shas the server wants and shas the server has
# and generates a list of commit shas we need to push up
def generate_pack_contents(self, want, have):
graph_walker = SimpleFetchGraphWalker(want, self.git.get_parents)
next = graph_walker.next()
shas = []
while next:
if next in have:
graph_walker.ack(next)
else:
shas.append(next)
next = graph_walker.next()
2009-04-29 06:33:03 +04:00
# so now i have the shas, need to turn them into a list of
# tuples (sha, path) for ALL the objects i'm sending
# TODO : don't send blobs or trees they already have
def get_objects(tree, path):
changes = list()
changes.append((tree, path))
for (mode, name, sha) in tree.entries():
if mode == 57344: # TODO : properly handle submodules
continue
obj = self.git.get_object(sha)
if isinstance (obj, Blob):
changes.append((obj, path + name))
elif isinstance(obj, Tree):
changes.extend (get_objects (obj, path + name + '/'))
return changes
2009-04-29 06:33:03 +04:00
objects = []
for commit_sha in shas:
commit = self.git.commit(commit_sha)
objects.append((commit, 'commit'))
tree = self.git.get_object(commit.tree)
objects.extend( get_objects(tree, '/') )
2009-04-29 06:33:03 +04:00
return objects
2009-04-29 06:33:03 +04:00
def fetch_pack(self, remote_name):
git_url = self.remote_name_to_url(remote_name)
client, path = self.get_transport_and_path(git_url)
graphwalker = SimpleFetchGraphWalker(self.git.heads().values(), self.git.get_parents)
f, commit = self.git.object_store.add_pack()
try:
determine_wants = self.git.object_store.determine_wants_all
refs = client.fetch_pack(path, determine_wants, graphwalker, f.write, sys.stdout.write)
f.close()
commit()
self.git.set_remote_refs(refs, remote_name)
except:
f.close()
raise
def import_git_objects(self, remote_name):
self.ui.status(_("importing Git objects into Hg\n"))
# import heads as remote references
todo = []
done = set()
convert_list = {}
# get a list of all the head shas
for head, sha in self.git.remote_refs(remote_name).iteritems():
todo.append(sha)
# traverse the heads getting a list of all the unique commits
# TODO : stop when we hit a SHA we've already imported
while todo:
sha = todo.pop()
assert isinstance(sha, str)
if sha in done:
continue
done.add(sha)
try:
commit = self.git.commit(sha)
convert_list[sha] = commit
todo.extend([p for p in commit.parents if p not in done])
except:
print "Cannot import tags yet" # TODO
# sort the commits
commits = TopoSort(convert_list).items()
# import each of the commits, oldest first
for csha in commits:
commit = convert_list[csha]
self.import_git_commit(commit)
self.update_hg_bookmarks(remote_name)
def update_hg_bookmarks(self, remote_name):
try:
bms = bookmarks.parse(self.repo)
for head, sha in self.git.remote_refs(remote_name).iteritems():
hgsha = hex_to_sha(self.map_hg_get(sha))
if not head == 'HEAD':
bms[remote_name + '/' + head] = hgsha
bookmarks.write(self.repo, bms)
except AttributeError:
self.repo.ui.warn('creating bookmarks failed, do you have'
' bookmarks enabled?\n')
def import_git_commit(self, commit):
print "importing: " + commit.id
# TODO : look for HG metadata in the message and use it
# TODO : add extra Git data (committer info) as extras to changeset
2009-04-29 06:33:03 +04:00
# TODO : (?) have to handle merge contexts at some point (two parent files, etc)
2009-04-29 06:33:03 +04:00
# TODO : Do something less coarse-grained than try/except on the
# get_file call for removed files
def getfilectx(repo, memctx, f):
2009-04-29 06:33:03 +04:00
try:
(e, sha, data) = self.git.get_file(commit, f)
except TypeError:
raise IOError()
e = '' # TODO : make this a real mode
return context.memfilectx(f, data, 'l' in e, 'x' in e, None)
p1 = "0" * 40
p2 = "0" * 40
2009-04-25 01:05:50 +04:00
if len(commit.parents) > 0:
sha = commit.parents[0]
p1 = self.map_hg_get(sha)
2009-04-25 01:05:50 +04:00
if len(commit.parents) > 1:
sha = commit.parents[1]
p2 = self.map_hg_get(sha)
2009-04-25 01:05:50 +04:00
if len(commit.parents) > 2:
# TODO : map extra parents to the extras file
pass
files = self.git.get_files_changed(commit)
#print files
# get a list of the changed, added, removed files
extra = {}
2009-04-25 01:05:50 +04:00
text = commit.message
date = datetime.datetime.fromtimestamp(commit.author_time).strftime("%Y-%m-%d %H:%M:%S")
ctx = context.memctx(self.repo, (p1, p2), text, files, getfilectx,
commit.author, date, extra)
a = self.repo.commitctx(ctx)
2009-04-25 01:05:50 +04:00
# get changeset id
p2 = hex(self.repo.changelog.tip())
# save changeset to mapping file
gitsha = commit.id
self.map_set(gitsha, p2)
def check_bookmarks(self):
if self.ui.config('extensions', 'hgext.bookmarks') is not None:
print "YOU NEED TO SETUP BOOKMARKS"
def get_transport_and_path(self, uri):
from dulwich.client import TCPGitClient, SSHGitClient, SubprocessGitClient
for handler, transport in (("git://", TCPGitClient), ("git@", SSHGitClient), ("git+ssh://", SSHGitClient)):
if uri.startswith(handler):
if handler == 'git@':
2009-04-28 17:26:21 +04:00
host, path = uri[len(handler):].split(":", 1)
host = 'git@' + host
2009-04-28 17:26:21 +04:00
else:
host, path = uri[len(handler):].split("/", 1)
return transport(host), '/' + path
# if its not git or git+ssh, try a local url..
return SubprocessGitClient(), uri
def clear(self):
git_dir = self.repo.join('git')
mapfile = self.repo.join('git-mapfile')
2009-04-29 06:33:03 +04:00
if os.path.exists(git_dir):
for root, dirs, files in os.walk(git_dir, topdown=False):
for name in files:
os.remove(os.path.join(root, name))
for name in dirs:
os.rmdir(os.path.join(root, name))
os.rmdir(git_dir)
if os.path.exists(mapfile):
os.remove(mapfile)
2009-04-29 06:33:03 +04:00
''
"""
Tarjan's algorithm and topological sorting implementation in Python
by Paul Harrison
Public domain, do with it as you will
"""
class TopoSort(object):
def __init__(self, commitdict):
self._sorted = self.robust_topological_sort(commitdict)
self._shas = []
for level in self._sorted:
for sha in level:
self._shas.append(sha)
def items(self):
self._shas.reverse()
return self._shas
def strongly_connected_components(self, graph):
""" Find the strongly connected components in a graph using
Tarjan's algorithm.
graph should be a dictionary mapping node names to
lists of successor nodes.
"""
result = [ ]
stack = [ ]
low = { }
def visit(node):
if node in low: return
num = len(low)
low[node] = num
stack_pos = len(stack)
stack.append(node)
for successor in graph[node].parents:
visit(successor)
low[node] = min(low[node], low[successor])
if num == low[node]:
component = tuple(stack[stack_pos:])
del stack[stack_pos:]
result.append(component)
for item in component:
low[item] = len(graph)
for node in graph:
visit(node)
return result
def topological_sort(self, graph):
count = { }
for node in graph:
count[node] = 0
for node in graph:
for successor in graph[node]:
count[successor] += 1
ready = [ node for node in graph if count[node] == 0 ]
result = [ ]
while ready:
node = ready.pop(-1)
result.append(node)
for successor in graph[node]:
count[successor] -= 1
if count[successor] == 0:
ready.append(successor)
return result
def robust_topological_sort(self, graph):
""" First identify strongly connected components,
then perform a topological sort on these components. """
components = self.strongly_connected_components(graph)
node_component = { }
for component in components:
for node in component:
node_component[node] = component
component_graph = { }
for component in components:
component_graph[component] = [ ]
for node in graph:
node_c = node_component[node]
for successor in graph[node].parents:
successor_c = node_component[successor]
if node_c != successor_c:
component_graph[node_c].append(successor_c)
return self.topological_sort(component_graph)