# gitlookup.py - server-side support for hg->git and git->hg lookups # # Copyright 2014 Facebook, Inc. # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. ''' extension that will look up hashes from an hg-git map file over the wire. This also provides client and server commands to download all the Git metadata via bundle2. Example usage: - get the git equivalent of hg 47d743e068523a9346a5ea4e429eeab185c886c6 hg identify --id -r\\ _gitlookup_hg_47d743e068523a9346a5ea4e429eeab185c886c6\\ ssh://server/repo - get the hg equivalent of git 6916a3c30f53878032dea8d01074d8c2a03927bd hg identify --id -r\\ _gitlookup_git_6916a3c30f53878032dea8d01074d8c2a03927bd\\ ssh://server/repo :: [gitlookup] # Define the location of the map file with the mapfile config option. mapfile = # The config option onlymapdelta controls how the server handles the hg-git # map. A True value corresponds to serving only missing map data while False # corresponds to serving the complete map. onlymapdelta = False ''' import errno import json from mercurial import ( bundle2, encoding, error, exchange, extensions, hg, localrepo, registrar, util, wireproto, ) from mercurial.node import ( bin, hex, nullid, ) from mercurial.i18n import _ cmdtable = {} command = registrar.command(cmdtable) def wrapwireprotocommand(command, wrapper): '''Wrap the wire proto command named `command' in table Just like extensions.wrapcommand, except for wire protocol commands. ''' assert util.safehasattr(wrapper, '__call__') origfn, args = wireproto.commands[command] def wrap(*args, **kwargs): return util.checksignature(wrapper)( util.checksignature(origfn), *args, **kwargs) wireproto.commands[command] = wrap, args return wrapper def remotelookup(orig, repo, proto, key): k = encoding.tolocal(key) if k.startswith('_gitlookup_'): ret = _dolookup(repo, k) if ret is not None: success = 1 return '%s %s\n' % (success, ret) return orig(repo, proto, key) def locallookup(orig, repo, key): gitlookup = _dolookup(repo, key) if gitlookup: return bin(gitlookup) else: return orig(repo, key) def _dolookup(repo, key): mapfile = repo.ui.configpath('gitlookup', 'mapfile') if mapfile is None: return None if not isinstance(key, str): return None # direction: git to hg = g, hg to git = h if key.startswith('_gitlookup_git_'): direction = 'tohg' sha = key[15:] elif key.startswith('_gitlookup_hg_'): direction = 'togit' sha = key[14:] else: return None hggitmap = open(mapfile, 'rb') for line in hggitmap: gitsha, hgsha = line.strip().split(' ', 1) if direction == 'tohg' and sha == gitsha: return hgsha if direction == 'togit' and sha == hgsha: return gitsha return None @command('gitgetmeta', [], '[SOURCE]') def gitgetmeta(ui, repo, source='default'): '''get git metadata from a server that supports fb_gitmeta''' source, branch = hg.parseurl(ui.expandpath(source)) other = hg.peer(repo, {}, source) ui.status(_('getting git metadata from %s\n') % util.hidepassword(source)) kwargs = {'bundlecaps': exchange.caps20to10(repo)} capsblob = bundle2.encodecaps(bundle2.getrepocaps(repo)) kwargs['bundlecaps'].add('bundle2=' + util.urlreq.quote(capsblob)) # this would ideally not be in the bundlecaps at all, but adding new kwargs # for wire transmissions is not possible as of Mercurial d19164a018a1 kwargs['bundlecaps'].add('fb_gitmeta') kwargs['heads'] = [nullid] kwargs['cg'] = False kwargs['common'] = _getcommonheads(repo) bundle = other.getbundle('pull', **kwargs) try: op = bundle2.processbundle(repo, bundle) except error.BundleValueError as exc: raise error.Abort('missing support for %s' % exc) writebytes = op.records['fb:gitmeta:writebytes'] ui.status(_('wrote %d files (%d bytes)\n') % (len(writebytes), sum(writebytes))) hgheadsfile = 'git-synced-hgheads' gitmapfile = 'git-mapfile' gitmetafiles = set( [gitmapfile, 'git-named-branches', 'git-tags', 'git-remote-refs']) def _getfile(repo, filename): try: return repo.vfs(filename) except (IOError, OSError) as e: if e.errno != errno.ENOENT: repo.ui.warn(_("warning: unable to read %s: %s\n") % (filename, e)) return None def _getcommonheads(repo): commonheads = [] f = _getfile(repo, hgheadsfile) if f: commonheads = f.readlines() commonheads = [bin(x.strip()) for x in commonheads] return commonheads def _isheadmissing(repo, heads): return not all(repo.known(heads)) def _getmissinglines(mapfile, missinghashes): missinglines = set() # Avoid expensive lookup through the map file if there is no missing hash. if not missinghashes: return missinglines hashestofind = missinghashes.copy() for line in mapfile: gitsha, hgsha = line.strip().split(' ', 1) if hgsha in hashestofind: missinglines.add(line) # Return the missing lines if we found all of them. hashestofind.remove(hgsha) if not hashestofind: return missinglines raise error.Abort(_('gitmeta: missing hashes in file %s') % mapfile.name) class _githgmappayload(object): def __init__(self, needfullsync, newheads, missinglines): self.needfullsync = needfullsync self.newheads = newheads self.missinglines = missinglines def _todict(self): d = {} d['needfullsync'] = self.needfullsync d['newheads'] = list(self.newheads) d['missinglines'] = list(self.missinglines) return d def tojson(self): return json.dumps(self._todict()) @classmethod def _fromdict(cls, d): needfullsync = d['needfullsync'] newheads = set(d['newheads']) missinglines = set(d['missinglines']) return cls(needfullsync, newheads, missinglines) @classmethod def fromjson(cls, jsonstr): d = json.loads(jsonstr) return cls._fromdict(d) @exchange.getbundle2partsgenerator('b2x:fb:gitmeta:githgmap') def _getbundlegithgmappart(bundler, repo, source, bundlecaps=None, **kwargs): '''send missing git to hg map data via bundle2''' if 'fb_gitmeta' in bundlecaps: # Do nothing if the config indicates serving the complete git-hg map # file. _getbundlegitmetapart will handle serving the complete file in # this case. if not repo.ui.configbool('gitlookup', 'onlymapdelta', False): return mapfile = _getfile(repo, gitmapfile) if not mapfile: return commonheads = kwargs['common'] # If there are missing heads, we will sync everything. if _isheadmissing(repo, commonheads): commonheads = [] needfullsync = (len(commonheads) == 0) heads = repo.heads() newheads = set(hex(head) for head in heads) missingcommits = repo.changelog.findmissing(commonheads, heads) missinghashes = set(hex(commit) for commit in missingcommits) missinglines = _getmissinglines(mapfile, missinghashes) payload = _githgmappayload(needfullsync, newheads, missinglines) serializedpayload = payload.tojson() part = bundle2.bundlepart( 'b2x:fb:gitmeta:githgmap', [('filename', gitmapfile)], data = serializedpayload ) bundler.addpart(part) @exchange.getbundle2partsgenerator('b2x:fb:gitmeta') def _getbundlegitmetapart(bundler, repo, source, bundlecaps=None, **kwargs): '''send git metadata via bundle2''' if 'fb_gitmeta' in bundlecaps: filestooverwrite = gitmetafiles # Exclude the git-hg map file if the config indicates that the server # should only be serving the missing map data. _getbundle2partsgenerator # will serve the missing map data in this case. if repo.ui.configbool('gitlookup', 'onlymapdelta', False): filestooverwrite = filestooverwrite - set([gitmapfile]) for fname in sorted(filestooverwrite): f = _getfile(repo, fname) if not f: continue part = bundle2.bundlepart('b2x:fb:gitmeta', [('filename', fname)], data=f.read()) bundler.addpart(part) def _writefile(op, filename, data): with op.repo.vfs(filename, 'w+', atomictemp=True) as f: op.repo.ui.note(_('writing .hg/%s\n') % filename) f.write(data) op.records.add('fb:gitmeta:writebytes', len(data)) def _validatepartparams(op, params): if 'filename' not in params: raise error.Abort(_("gitmeta: 'filename' missing")) fname = params['filename'] if fname not in gitmetafiles: op.repo.ui.warn( _("warning: gitmeta: unknown file '%s' skipped\n") % fname) return False return True @bundle2.parthandler('b2x:fb:gitmeta:githgmap', ('filename',)) @bundle2.parthandler('fb:gitmeta:githgmap', ('filename',)) def bundle2getgithgmap(op, part): params = dict(part.mandatoryparams) if _validatepartparams(op, params): filename = params['filename'] with op.repo.wlock(): data = _githgmappayload.fromjson(part.read()) missinglines = data.missinglines # No need to update anything if already in sync. if not missinglines: return if data.needfullsync: newlines = missinglines else: mapfile = _getfile(op.repo, filename) if mapfile: currentlines = set(mapfile.readlines()) if currentlines & missinglines: msg = 'warning: gitmeta: unexpected lines in .hg/%s\n' op.repo.ui.warn(_(msg) % filename) currentlines.update(missinglines) newlines = currentlines else: raise error.Abort( _('gitmeta: could not read from .hg/%s') % filename) _writefile(op, filename, ''.join(newlines)) _writefile(op, hgheadsfile, '\n'.join(data.newheads)) @bundle2.parthandler('b2x:fb:gitmeta', ('filename',)) @bundle2.parthandler('fb:gitmeta', ('filename',)) def bundle2getgitmeta(op, part): '''unbundle a bundle2 containing git metadata on the client''' params = dict(part.mandatoryparams) if _validatepartparams(op, params): filename = params['filename'] with op.repo.wlock(): data = part.read() _writefile(op, filename, data) def extsetup(ui): wrapwireprotocommand('lookup', remotelookup) extensions.wrapfunction(localrepo.localrepository, 'lookup', locallookup)