sapling/remotefilelog/shallowrepo.py
Durham Goode faccfe65d4 Add prefetching to checklookup
Summary:
During hg status Mercurial sometimes needs to look at the size of contents of
the file and compare it to what's in history, which requires the file blob.

This patch causes those files to be batch downloaded before they are compared.

There was a previous attempt at this (see the deleted code), but it only wrapped
the dirstate once at the beginning, so it was lost if the dirstate object was
replaced at any point.

Test Plan: Added a test to verify unknown files require only one fetch.

Reviewers: #sourcecontrol, ttung

Reviewed By: ttung

Subscribers: dcapra

Differential Revision: https://phabricator.fb.com/D2756768

Signature: t1:2756768:1450130997:7c7101efe66c998e3182dfbd848aa6b1a57d509f
2015-12-14 14:44:08 -08:00

153 lines
5.9 KiB
Python

# shallowrepo.py - shallow repository that uses remote filelogs
#
# Copyright 2013 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from mercurial.node import hex, nullid, nullrev, bin
from mercurial.i18n import _
from mercurial import localrepo, context, util, match, scmutil
from mercurial.extensions import wrapfunction
import remotefilelog, remotefilectx, fileserverclient, shallowbundle, os
requirement = "remotefilelog"
def wraprepo(repo):
class shallowrepository(repo.__class__):
@util.propertycache
def name(self):
return self.ui.config('remotefilelog', 'reponame', '')
@util.propertycache
def fallbackpath(self):
return repo.ui.config("remotefilelog", "fallbackpath",
# fallbackrepo is the old, deprecated name
repo.ui.config("remotefilelog", "fallbackrepo",
repo.ui.config("paths", "default")))
def sparsematch(self, *revs, **kwargs):
baseinstance = super(shallowrepository, self)
if util.safehasattr(baseinstance, 'sparsematch'):
return baseinstance.sparsematch(*revs, **kwargs)
return None
def file(self, f):
if f[0] == '/':
f = f[1:]
if self.shallowmatch(f):
return remotefilelog.remotefilelog(self.svfs, f, self)
else:
return super(shallowrepository, self).file(f)
def filectx(self, path, changeid=None, fileid=None):
if self.shallowmatch(path):
return remotefilectx.remotefilectx(self, path, changeid, fileid)
else:
return super(shallowrepository, self).filectx(path, changeid, fileid)
@localrepo.unfilteredmethod
def commitctx(self, ctx, error=False):
"""Add a new revision to current repository.
Revision information is passed via the context argument.
"""
# prefetch files that will likely be compared
m1 = ctx.p1().manifest()
files = []
for f in ctx.modified() + ctx.added():
fparent1 = m1.get(f, nullid)
if fparent1 != nullid:
files.append((f, hex(fparent1)))
self.fileservice.prefetch(files)
return super(shallowrepository, self).commitctx(ctx, error=error)
def prefetch(self, revs, base=None, pats=None, opts=None):
"""Prefetches all the necessary file revisions for the given revs
"""
fallbackpath = self.fallbackpath
if fallbackpath:
# If we know a rev is on the server, we should fetch the server
# version of those files, since our local file versions might
# become obsolete if the local commits are stripped.
localrevs = repo.revs('outgoing(%s)', fallbackpath)
if base is not None and base != nullrev:
serverbase = list(repo.revs('first(reverse(::%s) - %ld)', base,
localrevs))
if serverbase:
base = serverbase[0]
else:
localrevs = repo
mf = repo.manifest
if base is not None:
mfdict = mf.read(repo[base].manifestnode())
skip = set(mfdict.iteritems())
else:
skip = set()
# Copy the skip set to start large and avoid constant resizing,
# and since it's likely to be very similar to the prefetch set.
files = skip.copy()
serverfiles = skip.copy()
visited = set()
visited.add(nullrev)
for rev in sorted(revs):
ctx = repo[rev]
if pats:
m = scmutil.match(ctx, pats, opts)
sparsematch = repo.sparsematch(rev)
mfnode = ctx.manifestnode()
mfrev = mf.rev(mfnode)
# Decompressing manifests is expensive.
# When possible, only read the deltas.
p1, p2 = mf.parentrevs(mfrev)
if p1 in visited and p2 in visited:
mfdict = mf.readfast(mfnode)
else:
mfdict = mf.read(mfnode)
diff = mfdict.iteritems()
if pats:
diff = (pf for pf in diff if m(pf[0]))
if sparsematch:
diff = (pf for pf in diff if sparsematch(pf[0]))
if rev not in localrevs:
serverfiles.update(diff)
else:
files.update(diff)
visited.add(mfrev)
files.difference_update(skip)
serverfiles.difference_update(skip)
# Fetch files known to be on the server
if serverfiles:
results = [(path, hex(fnode)) for (path, fnode) in serverfiles]
repo.fileservice.prefetch(results, force=True)
# Fetch files that may or may not be on the server
if files:
results = [(path, hex(fnode)) for (path, fnode) in files]
repo.fileservice.prefetch(results)
repo.__class__ = shallowrepository
repo.shallowmatch = match.always(repo.root, '')
repo.fileservice = fileserverclient.fileserverclient(repo)
repo.includepattern = repo.ui.configlist("remotefilelog", "includepattern", None)
repo.excludepattern = repo.ui.configlist("remotefilelog", "excludepattern", None)
if repo.includepattern or repo.excludepattern:
repo.shallowmatch = match.match(repo.root, '', None,
repo.includepattern, repo.excludepattern)
localpath = os.path.join(repo.svfs.vfs.base, 'data')
if not os.path.exists(localpath):
os.makedirs(localpath)