mirror of
https://github.com/facebook/sapling.git
synced 2024-10-11 01:07:15 +03:00
112b1abe18
This fixes a performance issue with 'hg status' when files are specified on the command-line. Previously, a large amount of largefiles code was executed, even if files were specified on the command-line and those files were not largefiles. This patch fixes the problem by first checking if non-largefiles were specified on the command-line and, just letting the normal status function handle the case if they were. On a brand new machine, the execution time for 'hg status filename' on a repository with largefiles was: real 0m0.636s user 0m0.512s sys 0m0.120s versus the following (the same repository, with largefiles disabled): real 0m0.215s user 0m0.180s sys 0m0.032s After this patch, the performance of 'hg status filename' on the same repository, with largefiles enabled is: real 0m0.228s user 0m0.189s sys 0m0.036s This performance boost is also true when patterns (rather than specific files) are specified on the command-line. In the case where patterns are specified in addition to a file list, we just defer to the normal codepath in order to not spend extra time expanding the patterns to just risk having to expand them again later.
438 lines
21 KiB
Python
438 lines
21 KiB
Python
# Copyright 2009-2010 Gregory P. Ward
|
|
# Copyright 2009-2010 Intelerad Medical Systems Incorporated
|
|
# Copyright 2010-2011 Fog Creek Software
|
|
# Copyright 2010-2011 Unity Technologies
|
|
#
|
|
# This software may be used and distributed according to the terms of the
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
'''setup for largefiles repositories: reposetup'''
|
|
import copy
|
|
import types
|
|
import os
|
|
|
|
from mercurial import context, error, manifest, match as match_, node, util
|
|
from mercurial.i18n import _
|
|
|
|
import lfcommands
|
|
import proto
|
|
import lfutil
|
|
|
|
def reposetup(ui, repo):
|
|
# wire repositories should be given new wireproto functions but not the
|
|
# other largefiles modifications
|
|
if not repo.local():
|
|
return proto.wirereposetup(ui, repo)
|
|
|
|
for name in ('status', 'commitctx', 'commit', 'push'):
|
|
method = getattr(repo, name)
|
|
#if not (isinstance(method, types.MethodType) and
|
|
# method.im_func is repo.__class__.commitctx.im_func):
|
|
if (isinstance(method, types.FunctionType) and
|
|
method.func_name == 'wrap'):
|
|
ui.warn(_('largefiles: repo method %r appears to have already been'
|
|
' wrapped by another extension: '
|
|
'largefiles may behave incorrectly\n')
|
|
% name)
|
|
|
|
class lfiles_repo(repo.__class__):
|
|
lfstatus = False
|
|
def status_nolfiles(self, *args, **kwargs):
|
|
return super(lfiles_repo, self).status(*args, **kwargs)
|
|
|
|
# When lfstatus is set, return a context that gives the names
|
|
# of largefiles instead of their corresponding standins and
|
|
# identifies the largefiles as always binary, regardless of
|
|
# their actual contents.
|
|
def __getitem__(self, changeid):
|
|
ctx = super(lfiles_repo, self).__getitem__(changeid)
|
|
if self.lfstatus:
|
|
class lfiles_manifestdict(manifest.manifestdict):
|
|
def __contains__(self, filename):
|
|
if super(lfiles_manifestdict,
|
|
self).__contains__(filename):
|
|
return True
|
|
return super(lfiles_manifestdict,
|
|
self).__contains__(lfutil.standin(filename))
|
|
class lfiles_ctx(ctx.__class__):
|
|
def files(self):
|
|
filenames = super(lfiles_ctx, self).files()
|
|
return [lfutil.splitstandin(f) or f for f in filenames]
|
|
def manifest(self):
|
|
man1 = super(lfiles_ctx, self).manifest()
|
|
man1.__class__ = lfiles_manifestdict
|
|
return man1
|
|
def filectx(self, path, fileid=None, filelog=None):
|
|
try:
|
|
result = super(lfiles_ctx, self).filectx(path,
|
|
fileid, filelog)
|
|
except error.LookupError:
|
|
# Adding a null character will cause Mercurial to
|
|
# identify this as a binary file.
|
|
result = super(lfiles_ctx, self).filectx(
|
|
lfutil.standin(path), fileid, filelog)
|
|
olddata = result.data
|
|
result.data = lambda: olddata() + '\0'
|
|
return result
|
|
ctx.__class__ = lfiles_ctx
|
|
return ctx
|
|
|
|
# Figure out the status of big files and insert them into the
|
|
# appropriate list in the result. Also removes standin files
|
|
# from the listing. Revert to the original status if
|
|
# self.lfstatus is False.
|
|
def status(self, node1='.', node2=None, match=None, ignored=False,
|
|
clean=False, unknown=False, listsubrepos=False):
|
|
listignored, listclean, listunknown = ignored, clean, unknown
|
|
if not self.lfstatus:
|
|
return super(lfiles_repo, self).status(node1, node2, match,
|
|
listignored, listclean, listunknown, listsubrepos)
|
|
else:
|
|
# some calls in this function rely on the old version of status
|
|
self.lfstatus = False
|
|
if isinstance(node1, context.changectx):
|
|
ctx1 = node1
|
|
else:
|
|
ctx1 = repo[node1]
|
|
if isinstance(node2, context.changectx):
|
|
ctx2 = node2
|
|
else:
|
|
ctx2 = repo[node2]
|
|
working = ctx2.rev() is None
|
|
parentworking = working and ctx1 == self['.']
|
|
|
|
def inctx(file, ctx):
|
|
try:
|
|
if ctx.rev() is None:
|
|
return file in ctx.manifest()
|
|
ctx[file]
|
|
return True
|
|
except KeyError:
|
|
return False
|
|
|
|
if match is None:
|
|
match = match_.always(self.root, self.getcwd())
|
|
|
|
# First check if there were files specified on the
|
|
# command line. If there were, and none of them were
|
|
# largefiles, we should just bail here and let super
|
|
# handle it -- thus gaining a big performance boost.
|
|
lfdirstate = lfutil.openlfdirstate(ui, self)
|
|
if match.files() and not match.anypats():
|
|
matchedfiles = [f for f in match.files() if f in lfdirstate]
|
|
if not matchedfiles:
|
|
return super(lfiles_repo, self).status(node1, node2,
|
|
match, listignored, listclean,
|
|
listunknown, listsubrepos)
|
|
|
|
# Create a copy of match that matches standins instead
|
|
# of largefiles.
|
|
def tostandin(file):
|
|
if inctx(lfutil.standin(file), ctx2):
|
|
return lfutil.standin(file)
|
|
return file
|
|
|
|
# Create a function that we can use to override what is
|
|
# normally the ignore matcher. We've already checked
|
|
# for ignored files on the first dirstate walk, and
|
|
# unecessarily re-checking here causes a huge performance
|
|
# hit because lfdirstate only knows about largefiles
|
|
def _ignoreoverride(self):
|
|
return False
|
|
|
|
m = copy.copy(match)
|
|
m._files = [tostandin(f) for f in m._files]
|
|
|
|
# Get ignored files here even if we weren't asked for them; we
|
|
# must use the result here for filtering later
|
|
result = super(lfiles_repo, self).status(node1, node2, m,
|
|
True, clean, unknown, listsubrepos)
|
|
if working:
|
|
# hold the wlock while we read largefiles and
|
|
# update the lfdirstate
|
|
wlock = repo.wlock()
|
|
try:
|
|
# Any non-largefiles that were explicitly listed must be
|
|
# taken out or lfdirstate.status will report an error.
|
|
# The status of these files was already computed using
|
|
# super's status.
|
|
# Override lfdirstate's ignore matcher to not do
|
|
# anything
|
|
orig_ignore = lfdirstate._ignore
|
|
lfdirstate._ignore = _ignoreoverride
|
|
|
|
match._files = [f for f in match._files if f in
|
|
lfdirstate]
|
|
# Don't waste time getting the ignored and unknown
|
|
# files again; we already have them
|
|
s = lfdirstate.status(match, [], False,
|
|
listclean, False)
|
|
(unsure, modified, added, removed, missing, unknown,
|
|
ignored, clean) = s
|
|
# Replace the list of ignored and unknown files with
|
|
# the previously caclulated lists, and strip out the
|
|
# largefiles
|
|
lfiles = set(lfdirstate._map)
|
|
ignored = set(result[5]).difference(lfiles)
|
|
unknown = set(result[4]).difference(lfiles)
|
|
if parentworking:
|
|
for lfile in unsure:
|
|
standin = lfutil.standin(lfile)
|
|
if standin not in ctx1:
|
|
# from second parent
|
|
modified.append(lfile)
|
|
elif ctx1[standin].data().strip() \
|
|
!= lfutil.hashfile(self.wjoin(lfile)):
|
|
modified.append(lfile)
|
|
else:
|
|
clean.append(lfile)
|
|
lfdirstate.normal(lfile)
|
|
lfdirstate.write()
|
|
else:
|
|
tocheck = unsure + modified + added + clean
|
|
modified, added, clean = [], [], []
|
|
|
|
for lfile in tocheck:
|
|
standin = lfutil.standin(lfile)
|
|
if inctx(standin, ctx1):
|
|
if ctx1[standin].data().strip() != \
|
|
lfutil.hashfile(self.wjoin(lfile)):
|
|
modified.append(lfile)
|
|
else:
|
|
clean.append(lfile)
|
|
else:
|
|
added.append(lfile)
|
|
# Replace the original ignore function
|
|
lfdirstate._ignore = orig_ignore
|
|
finally:
|
|
wlock.release()
|
|
|
|
for standin in ctx1.manifest():
|
|
if not lfutil.isstandin(standin):
|
|
continue
|
|
lfile = lfutil.splitstandin(standin)
|
|
if not match(lfile):
|
|
continue
|
|
if lfile not in lfdirstate:
|
|
removed.append(lfile)
|
|
# Handle unknown and ignored differently
|
|
lfiles = (modified, added, removed, missing, [], [], clean)
|
|
result = list(result)
|
|
# Unknown files
|
|
unknown = set(unknown).difference(ignored)
|
|
result[4] = [f for f in unknown
|
|
if (repo.dirstate[f] == '?' and
|
|
not lfutil.isstandin(f))]
|
|
# Ignored files were calculated earlier by the dirstate,
|
|
# and we already stripped out the largefiles from the list
|
|
result[5] = ignored
|
|
# combine normal files and largefiles
|
|
normals = [[fn for fn in filelist
|
|
if not lfutil.isstandin(fn)]
|
|
for filelist in result]
|
|
result = [sorted(list1 + list2)
|
|
for (list1, list2) in zip(normals, lfiles)]
|
|
else:
|
|
def toname(f):
|
|
if lfutil.isstandin(f):
|
|
return lfutil.splitstandin(f)
|
|
return f
|
|
result = [[toname(f) for f in items] for items in result]
|
|
|
|
if not listunknown:
|
|
result[4] = []
|
|
if not listignored:
|
|
result[5] = []
|
|
if not listclean:
|
|
result[6] = []
|
|
self.lfstatus = True
|
|
return result
|
|
|
|
# As part of committing, copy all of the largefiles into the
|
|
# cache.
|
|
def commitctx(self, *args, **kwargs):
|
|
node = super(lfiles_repo, self).commitctx(*args, **kwargs)
|
|
ctx = self[node]
|
|
for filename in ctx.files():
|
|
if lfutil.isstandin(filename) and filename in ctx.manifest():
|
|
realfile = lfutil.splitstandin(filename)
|
|
lfutil.copytostore(self, ctx.node(), realfile)
|
|
|
|
return node
|
|
|
|
# Before commit, largefile standins have not had their
|
|
# contents updated to reflect the hash of their largefile.
|
|
# Do that here.
|
|
def commit(self, text="", user=None, date=None, match=None,
|
|
force=False, editor=False, extra={}):
|
|
orig = super(lfiles_repo, self).commit
|
|
|
|
wlock = repo.wlock()
|
|
try:
|
|
if getattr(repo, "_isrebasing", False):
|
|
# We have to take the time to pull down the new
|
|
# largefiles now. Otherwise if we are rebasing,
|
|
# any largefiles that were modified in the
|
|
# destination changesets get overwritten, either
|
|
# by the rebase or in the first commit after the
|
|
# rebase.
|
|
lfcommands.updatelfiles(repo.ui, repo)
|
|
# Case 1: user calls commit with no specific files or
|
|
# include/exclude patterns: refresh and commit all files that
|
|
# are "dirty".
|
|
if ((match is None) or
|
|
(not match.anypats() and not match.files())):
|
|
# Spend a bit of time here to get a list of files we know
|
|
# are modified so we can compare only against those.
|
|
# It can cost a lot of time (several seconds)
|
|
# otherwise to update all standins if the largefiles are
|
|
# large.
|
|
lfdirstate = lfutil.openlfdirstate(ui, self)
|
|
dirtymatch = match_.always(repo.root, repo.getcwd())
|
|
s = lfdirstate.status(dirtymatch, [], False, False, False)
|
|
modifiedfiles = []
|
|
for i in s:
|
|
modifiedfiles.extend(i)
|
|
lfiles = lfutil.listlfiles(self)
|
|
# this only loops through largefiles that exist (not
|
|
# removed/renamed)
|
|
for lfile in lfiles:
|
|
if lfile in modifiedfiles:
|
|
if os.path.exists(self.wjoin(lfutil.standin(lfile))):
|
|
# this handles the case where a rebase is being
|
|
# performed and the working copy is not updated
|
|
# yet.
|
|
if os.path.exists(self.wjoin(lfile)):
|
|
lfutil.updatestandin(self,
|
|
lfutil.standin(lfile))
|
|
lfdirstate.normal(lfile)
|
|
for lfile in lfdirstate:
|
|
if lfile in modifiedfiles:
|
|
if not os.path.exists(
|
|
repo.wjoin(lfutil.standin(lfile))):
|
|
lfdirstate.drop(lfile)
|
|
lfdirstate.write()
|
|
|
|
return orig(text=text, user=user, date=date, match=match,
|
|
force=force, editor=editor, extra=extra)
|
|
|
|
for f in match.files():
|
|
if lfutil.isstandin(f):
|
|
raise util.Abort(
|
|
_('file "%s" is a largefile standin') % f,
|
|
hint=('commit the largefile itself instead'))
|
|
|
|
# Case 2: user calls commit with specified patterns: refresh
|
|
# any matching big files.
|
|
smatcher = lfutil.composestandinmatcher(self, match)
|
|
standins = lfutil.dirstate_walk(self.dirstate, smatcher)
|
|
|
|
# No matching big files: get out of the way and pass control to
|
|
# the usual commit() method.
|
|
if not standins:
|
|
return orig(text=text, user=user, date=date, match=match,
|
|
force=force, editor=editor, extra=extra)
|
|
|
|
# Refresh all matching big files. It's possible that the
|
|
# commit will end up failing, in which case the big files will
|
|
# stay refreshed. No harm done: the user modified them and
|
|
# asked to commit them, so sooner or later we're going to
|
|
# refresh the standins. Might as well leave them refreshed.
|
|
lfdirstate = lfutil.openlfdirstate(ui, self)
|
|
for standin in standins:
|
|
lfile = lfutil.splitstandin(standin)
|
|
if lfdirstate[lfile] <> 'r':
|
|
lfutil.updatestandin(self, standin)
|
|
lfdirstate.normal(lfile)
|
|
else:
|
|
lfdirstate.drop(lfile)
|
|
lfdirstate.write()
|
|
|
|
# Cook up a new matcher that only matches regular files or
|
|
# standins corresponding to the big files requested by the
|
|
# user. Have to modify _files to prevent commit() from
|
|
# complaining "not tracked" for big files.
|
|
lfiles = lfutil.listlfiles(repo)
|
|
match = copy.copy(match)
|
|
orig_matchfn = match.matchfn
|
|
|
|
# Check both the list of largefiles and the list of
|
|
# standins because if a largefile was removed, it
|
|
# won't be in the list of largefiles at this point
|
|
match._files += sorted(standins)
|
|
|
|
actualfiles = []
|
|
for f in match._files:
|
|
fstandin = lfutil.standin(f)
|
|
|
|
# ignore known largefiles and standins
|
|
if f in lfiles or fstandin in standins:
|
|
continue
|
|
|
|
# append directory separator to avoid collisions
|
|
if not fstandin.endswith(os.sep):
|
|
fstandin += os.sep
|
|
|
|
# prevalidate matching standin directories
|
|
if util.any(st for st in match._files
|
|
if st.startswith(fstandin)):
|
|
continue
|
|
actualfiles.append(f)
|
|
match._files = actualfiles
|
|
|
|
def matchfn(f):
|
|
if orig_matchfn(f):
|
|
return f not in lfiles
|
|
else:
|
|
return f in standins
|
|
|
|
match.matchfn = matchfn
|
|
return orig(text=text, user=user, date=date, match=match,
|
|
force=force, editor=editor, extra=extra)
|
|
finally:
|
|
wlock.release()
|
|
|
|
def push(self, remote, force=False, revs=None, newbranch=False):
|
|
o = lfutil.findoutgoing(repo, remote, force)
|
|
if o:
|
|
toupload = set()
|
|
o = repo.changelog.nodesbetween(o, revs)[0]
|
|
for n in o:
|
|
parents = [p for p in repo.changelog.parents(n)
|
|
if p != node.nullid]
|
|
ctx = repo[n]
|
|
files = set(ctx.files())
|
|
if len(parents) == 2:
|
|
mc = ctx.manifest()
|
|
mp1 = ctx.parents()[0].manifest()
|
|
mp2 = ctx.parents()[1].manifest()
|
|
for f in mp1:
|
|
if f not in mc:
|
|
files.add(f)
|
|
for f in mp2:
|
|
if f not in mc:
|
|
files.add(f)
|
|
for f in mc:
|
|
if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f,
|
|
None):
|
|
files.add(f)
|
|
|
|
toupload = toupload.union(
|
|
set([ctx[f].data().strip()
|
|
for f in files
|
|
if lfutil.isstandin(f) and f in ctx]))
|
|
lfcommands.uploadlfiles(ui, self, remote, toupload)
|
|
return super(lfiles_repo, self).push(remote, force, revs,
|
|
newbranch)
|
|
|
|
repo.__class__ = lfiles_repo
|
|
|
|
def checkrequireslfiles(ui, repo, **kwargs):
|
|
if 'largefiles' not in repo.requirements and util.any(
|
|
lfutil.shortname+'/' in f[0] for f in repo.store.datafiles()):
|
|
repo.requirements.add('largefiles')
|
|
repo._writerequirements()
|
|
|
|
ui.setconfig('hooks', 'changegroup.lfiles', checkrequireslfiles)
|
|
ui.setconfig('hooks', 'commit.lfiles', checkrequireslfiles)
|