fastannotate: implement a simple wireprotocol transferring the annotate cache

Summary:
This diff adds a simple wireprotocol that the client can download fastannotate
cache files from the server.

Design decisions:
- Transfer full files. The linelog is not append-only and rsync-like
  incremental updates is not trivial. Since the files look small (usually <
  1MB), send full copies for now. If file size becomes an issue, we can
  improve it later.
- The server can send arbitrary number of files with paths defined. This
  provides the flexibility to send extra files without changing the protocol.
  For example, if we want to speed up `--deleted`, we may want to download
  `linemap` files.
- No compression. The revmap is basically not compress-able because it
  contains the hashes. The linelog could be compressed though, but they are
  small enough, and we may rely on compression on other layers, like SSH.
- `lastnode` check - if the last node of revmap the client reports matches
  what the server has, the server will assume it's up-to-date and do nothing.

Note that this diff is only about the protocol and methods on peer, localrepo
objects. The client-side command is not aware of the feature yet.


Test Plan:
- Clone from a local repo A to B, via the ssh protocol.
- Config both repos so B is the client, A is the server.
- Run `hg debugshell --debug` from B, make sure the client can get contents
  correctly (check the file contents from both the client and the server),
  also make sure the non-existed files will be ignored:

  In [1]: repo.prefetchfastannotate(['README', 'Makefile', 'NOT-EXIStED'])
  running ssh localhost ...
  sending hello command
  sending between command
  remote: 383
  remote: capabilities: .... getannotate
  remote: 1
  fastannotate: requesting 2 files
  sending batch command
  fastannotate: server returned
  fastannotate: writing 4976 bytes to fastannotate/default/README.l
  fastannotate: writing 813 bytes to fastannotate/default/README.m
  fastannotate: writing 9024 bytes to fastannotate/default/Makefile.l
  fastannotate: writing 2453 bytes to fastannotate/default/Makefile.m

- Make sure the second time fetching these files, they will be skipped because
  of the uptodate check:

  In [2]: repo.prefetchfastannotate(['README', 'Makefile', 'NOT-EXIStED'])

- Manually create a diverge case where the client and the server disagrees
  about what "master" (or "mainbranch") is. Make sure that although the
  request will be send by the client, the server can return nothing because of
  the lastnode check.


Reviewers: #mercurial, durham, stash

Reviewed By: stash

Subscribers: mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D4063249

Signature: t1:4063249:1477920081:9bfab3031aa3118060c433f6e27c5b32aa25040c
This commit is contained in:
Jun Wu 2016-10-29 22:45:20 +01:00
parent d8dd5ef6e8
commit ca37364c8d
4 changed files with 207 additions and 3 deletions

View File

@ -38,6 +38,17 @@ be faster than the vanilla 'annotate' if the cache is present.
# sacrifice correctness in some cases for performance (default: False)
perfhack = True
# serve the annotate cache via wire protocol (default: False)
# tip: the .hg/fastannotate directory is portable - can be rsynced
server = True
# update local annotate cache from remote on demand
# (default: True for remotefilelog repo, False otherwise)
client = True
# path to use when connecting to the remote server (default: default)
remotepath = default
"""
from __future__ import absolute_import
@ -46,9 +57,13 @@ from mercurial.i18n import _
from mercurial import (
cmdutil,
error as hgerror,
util,
)
from . import commands
from . import (
commands,
protocol,
)
testedwith = 'internal'
@ -71,3 +86,14 @@ def uisetup(ui):
# local import to avoid overhead of loading hgweb for non-hgweb usages
from . import hgwebsupport
hgwebsupport.replacehgwebannotate()
if ui.configbool('fastannotate', 'server'):
protocol.serveruisetup(ui)
def reposetup(ui, repo):
client = ui.configbool('fastannotate', 'client', default=None)
if client is None:
if util.safehasattr(repo, 'requirements'):
client = 'remotefilelog' in repo.requirements
if client:
protocol.clientreposetup(ui, repo)

View File

@ -152,7 +152,7 @@ def fastannotate(ui, repo, *pats, **opts):
rev, showpath=showpath, showlines=showlines)
break
except faerror.CannotReuseError: # happens if master moves backwards
if rebuild: # give up since we have tried rebuild alreadyraise
if rebuild: # give up since we have tried rebuild already
raise
else: # try a second time rebuilding the cache (slow)
rebuild = True

View File

@ -701,7 +701,7 @@ def _unlinkpaths(paths):
class pathhelper(object):
"""helper for getting paths for lockfile, linelog and revmap"""
def __init__(self, repo, path, opts):
def __init__(self, repo, path, opts=defaultopts):
# different options use different directories
self._vfspath = os.path.join('fastannotate',
opts.shortstr, encodedir(path))

178
fastannotate/protocol.py Normal file
View File

@ -0,0 +1,178 @@
# Copyright 2016-present Facebook. All Rights Reserved.
#
# protocol: logic for a server providing fastannotate support
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from mercurial import (
error,
extensions,
hg,
localrepo,
wireproto,
)
from mercurial.i18n import _
import contextlib
import os
from . import context
# common
def _getmaster(ui):
"""get the mainbranch, and enforce it is set"""
master = ui.config('fastannotate', 'mainbranch')
if not master:
raise error.Abort(_('fastannotate.mainbranch is required '
'for both the client and the server'))
return master
# server-side
def _capabilities(orig, repo, proto):
result = orig(repo, proto)
result.append('getannotate')
return result
def _getannotate(repo, proto, path, lastnode):
# output:
# FILE := vfspath + '\0' + str(size) + '\0' + content
# OUTPUT := '' | FILE + OUTPUT
result = ''
with context.annotatecontext(repo, path) as actx:
# update before responding to the client
master = _getmaster(repo.ui)
if not actx.isuptodate(master):
actx.annotate(master, master)
actx.close() # flush
# send back the full content of revmap and linelog, in the future we
# may want to do some rsync-like fancy updating.
# the lastnode check is not necessary if the client and the server
# agree where the main branch is.
if actx.lastnode != lastnode:
for p in [actx.revmappath, actx.linelogpath]:
if not os.path.exists(p):
continue
content = ''
with open(p, 'rb') as f:
content = f.read()
vfsbaselen = len(repo.vfs.base + '/')
relpath = p[vfsbaselen:]
result += '%s\0%s\0%s' % (relpath, len(content), content)
return result
def _registerwireprotocommand():
if 'getannotate' in wireproto.commands:
return
wireproto.wireprotocommand('getannotate', 'path lastnode')(_getannotate)
def serveruisetup(ui):
_registerwireprotocommand()
extensions.wrapfunction(wireproto, '_capabilities', _capabilities)
# client-side
def _parseresponse(payload):
result = {}
i = 0
l = len(payload) - 1
state = 0 # 0: vfspath, 1: size
vfspath = size = ''
while i < l:
ch = payload[i]
if ch == '\0':
if state == 1:
result[vfspath] = buffer(payload, i + 1, int(size))
i += int(size)
state = 0
vfspath = size = ''
elif state == 0:
state = 1
else:
if state == 1:
size += ch
elif state == 0:
vfspath += ch
i += 1
return result
def peersetup(ui, peer):
class fastannotatepeer(peer.__class__):
@wireproto.batchable
def getannotate(self, path, lastnode=None):
if not self.capable('getannotate'):
ui.warn(_('remote peer cannot provide annotate cache\n'))
yield None, None
else:
args = {'path': path, 'lastnode': lastnode or ''}
f = wireproto.future()
yield args, f
yield _parseresponse(f.value)
peer.__class__ = fastannotatepeer
@contextlib.contextmanager
def annotatepeer(repo):
remotepath = repo.ui.expandpath(
repo.ui.config('fastannotate', 'remotepath', 'default'))
peer = hg.peer(repo.ui, {}, remotepath)
try:
yield peer
finally:
for i in ['close', 'cleanup']:
getattr(peer, i, lambda: None)()
def clientfetch(repo, paths, lastnodemap=None, peer=None):
"""download annotate cache from the server for paths"""
if not paths:
return
if peer is None:
with annotatepeer(repo) as peer:
return clientfetch(repo, paths, lastnodemap, peer)
if lastnodemap is None:
lastnodemap = {}
ui = repo.ui
batcher = peer.batch()
ui.debug('fastannotate: requesting %d files\n' % len(paths))
results = [batcher.getannotate(p, lastnodemap.get(p)) for p in paths]
batcher.submit()
ui.debug('fastannotate: server returned\n')
for result in results:
for path, content in result.value.iteritems():
# ignore malicious paths
if not path.startswith('fastannotate/') or '/../' in (path + '/'):
ui.debug('fastannotate: ignored malicious path %s\n' % path)
continue
if ui.debugflag:
ui.debug('fastannotate: writing %d bytes to %s\n'
% (len(content), path))
repo.vfs.makedirs(os.path.dirname(path))
with repo.vfs(path, 'wb') as f:
f.write(content)
def localreposetup(ui, repo):
class fastannotaterepo(repo.__class__):
def prefetchfastannotate(self, paths, peer=None):
master = _getmaster(repo.ui)
needupdatepaths = []
lastnodemap = {}
for path in paths:
with context.annotatecontext(repo, path) as actx:
if not actx.isuptodate(master):
needupdatepaths.append(path)
lastnodemap[path] = actx.lastnode
if needupdatepaths:
clientfetch(self, needupdatepaths, lastnodemap, peer)
repo.__class__ = fastannotaterepo
def clientreposetup(ui, repo):
_registerwireprotocommand()
if isinstance(repo, localrepo.localrepository):
localreposetup(ui, repo)
if peersetup not in hg.wirepeersetupfuncs:
hg.wirepeersetupfuncs.append(peersetup)