mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 16:57:49 +03:00
97dfe79221
Summary: The isgooddelta tweak was introduced in D2693043 (perftweaks: change revlog delta heuristic, 2015-11-24). Comparing with the existing version, the only change is that we removed `dist > maxdist` check. Note that the upstream commit 895ecec31 (revlog: add an experimental option to mitigated delta issues (issue5480), 2017-06-23) also introduces a config option to override `maxdist` to make the condition fail, which basically does a same thing. Instead of introducing new config options or adding more "if"s to the codebase to make it more obscure, let's just simplify it by disabling the check entirely, and removing the `dist` concept, removing two config options: `experimental.maxdeltachainspan` and `perftweaks.preferdeltas`. The `chainlen > self._maxchainlen` check should be enough for keeping delta chain length bounded. Reviewed By: DurhamG Differential Revision: D6752529 fbshipit-source-id: e8fd8ec39240191db5fb274190fc661e97087a78
360 lines
13 KiB
Python
360 lines
13 KiB
Python
# perftweaks.py
|
|
#
|
|
# Copyright 2015 Facebook, Inc.
|
|
#
|
|
# This software may be used and distributed according to the terms of the
|
|
# GNU General Public License version 2 or any later version.
|
|
"""extension for tweaking Mercurial features to improve performance.
|
|
|
|
::
|
|
|
|
[perftweaks]
|
|
# Whether to use faster hidden cache. It has faster cache hash calculation
|
|
# which only check stat of a few files inside store/ directory.
|
|
fasthiddencache = False
|
|
"""
|
|
|
|
from mercurial import (
|
|
branchmap,
|
|
dispatch,
|
|
extensions,
|
|
localrepo,
|
|
merge,
|
|
namespaces,
|
|
phases,
|
|
scmutil,
|
|
tags,
|
|
util,
|
|
)
|
|
from mercurial.extensions import wrapfunction
|
|
from mercurial.node import bin
|
|
import errno
|
|
import os
|
|
|
|
testedwith = 'ships-with-fb-hgext'
|
|
|
|
def extsetup(ui):
|
|
wrapfunction(tags, '_readtagcache', _readtagcache)
|
|
wrapfunction(merge, '_checkcollision', _checkcollision)
|
|
wrapfunction(branchmap.branchcache, 'update', _branchmapupdate)
|
|
wrapfunction(branchmap, 'read', _branchmapread)
|
|
wrapfunction(branchmap, 'replacecache', _branchmapreplacecache)
|
|
wrapfunction(branchmap, 'updatecache', _branchmapupdatecache)
|
|
|
|
wrapfunction(dispatch, 'runcommand', _trackdirstatesizes)
|
|
wrapfunction(dispatch, 'runcommand', _tracksparseprofiles)
|
|
wrapfunction(merge, 'update', _trackupdatesize)
|
|
|
|
try:
|
|
rebase = extensions.find('rebase')
|
|
wrapfunction(rebase.rebaseruntime, '_preparenewrebase',
|
|
_trackrebasesize)
|
|
except KeyError:
|
|
pass
|
|
|
|
# noderev cache creation
|
|
# The node rev cache is a cache of rev numbers that we are likely to do a
|
|
# node->rev lookup for. Since looking up rev->node is cheaper than
|
|
# node->rev, we use this cache to prefill the changelog radix tree with
|
|
# mappings.
|
|
wrapfunction(branchmap.branchcache, 'write', _branchmapwrite)
|
|
wrapfunction(phases.phasecache, 'advanceboundary', _editphases)
|
|
wrapfunction(phases.phasecache, 'retractboundary', _editphases)
|
|
try:
|
|
remotenames = extensions.find('remotenames')
|
|
wrapfunction(remotenames, 'saveremotenames', _saveremotenames)
|
|
except KeyError:
|
|
pass
|
|
|
|
wrapfunction(namespaces.namespaces, 'singlenode', _singlenode)
|
|
|
|
def reposetup(ui, repo):
|
|
if repo.local() is not None:
|
|
_preloadrevs(repo)
|
|
|
|
# developer config: perftweaks.disableupdatebranchcacheoncommit
|
|
if repo.ui.configbool('perftweaks', 'disableupdatebranchcacheoncommit'):
|
|
class perftweaksrepo(repo.__class__):
|
|
@localrepo.unfilteredmethod
|
|
def updatecaches(self, tr=None):
|
|
# Disable "branchmap.updatecache(self.filtered('served'))"
|
|
# code path guarded by "if tr.changes['revs']". First, we
|
|
# don't have on-disk branchmap. Second, accessing
|
|
# "repo.filtered('served')" alone is not very cheap.
|
|
bakrevs = None
|
|
if tr and 'revs' in tr.changes:
|
|
bakrevs = tr.changes['revs']
|
|
tr.changes['revs'] = frozenset()
|
|
try:
|
|
super(perftweaksrepo, self).updatecaches(tr)
|
|
finally:
|
|
if bakrevs:
|
|
tr.changes['revs'] = bakrevs
|
|
|
|
repo.__class__ = perftweaksrepo
|
|
|
|
# record nodemap lag
|
|
try:
|
|
lag = repo.changelog.nodemap.lag
|
|
ui.log('nodemap_lag', '', nodemap_lag=lag)
|
|
except AttributeError:
|
|
pass
|
|
|
|
def _singlenode(orig, self, repo, name):
|
|
"""Skips reading branches namespace if unnecessary"""
|
|
# developer config: perftweaks.disableresolvingbranches
|
|
if not repo.ui.configbool('perftweaks', 'disableresolvingbranches'):
|
|
return orig(self, repo, name)
|
|
|
|
# If branches are disabled, only resolve the 'default' branch. Loading
|
|
# 'branches' is O(len(changelog)) time complexity because it calls
|
|
# headrevs() which scans the entire changelog.
|
|
names = self._names
|
|
namesbak = names.copy()
|
|
if name != 'default' and 'branches' in names:
|
|
del names['branches']
|
|
try:
|
|
return orig(self, repo, name)
|
|
finally:
|
|
self.names = namesbak
|
|
|
|
def _readtagcache(orig, ui, repo):
|
|
"""Disables reading tags if the repo is known to not contain any."""
|
|
if ui.configbool('perftweaks', 'disabletags'):
|
|
return (None, None, None, {}, False)
|
|
|
|
return orig(ui, repo)
|
|
|
|
def _checkcollision(orig, repo, wmf, actions):
|
|
"""Disables case collision checking since it is known to be very slow."""
|
|
if repo.ui.configbool('perftweaks', 'disablecasecheck'):
|
|
return
|
|
orig(repo, wmf, actions)
|
|
|
|
def _branchmapupdate(orig, self, repo, revgen):
|
|
if not repo.ui.configbool('perftweaks', 'disablebranchcache'):
|
|
return orig(self, repo, revgen)
|
|
|
|
cl = repo.changelog
|
|
tonode = cl.node
|
|
|
|
if self.tiprev == len(cl) - 1 and self.validfor(repo):
|
|
return
|
|
|
|
# Since we have no branches, the default branch heads are equal to
|
|
# cl.headrevs(). Note: cl.headrevs() is already sorted and it may return
|
|
# -1.
|
|
branchheads = [i for i in cl.headrevs() if i >= 0]
|
|
|
|
if not branchheads:
|
|
if 'default' in self:
|
|
del self['default']
|
|
tiprev = -1
|
|
else:
|
|
self['default'] = [tonode(rev) for rev in branchheads]
|
|
tiprev = branchheads[-1]
|
|
self.tipnode = cl.node(tiprev)
|
|
self.tiprev = tiprev
|
|
self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
|
|
repo.ui.log('branchcache', 'perftweaks updated %s branch cache\n',
|
|
repo.filtername)
|
|
|
|
def _branchmapread(orig, repo):
|
|
# developer config: perftweaks.disablebranchcache2
|
|
if not repo.ui.configbool('perftweaks', 'disablebranchcache2'):
|
|
return orig(repo)
|
|
# Don't bother reading branchmap since branchcache.update() will be called
|
|
# anyway and that is O(changelog)
|
|
|
|
def _branchmapreplacecache(orig, repo, bm):
|
|
if not repo.ui.configbool('perftweaks', 'disablebranchcache2'):
|
|
return orig(repo, bm)
|
|
# Don't bother writing branchmap since we don't read it
|
|
|
|
def _branchmapupdatecache(orig, repo):
|
|
if not repo.ui.configbool('perftweaks', 'disablebranchcache2'):
|
|
return orig(repo)
|
|
|
|
# The original logic has unnecessary steps, ex. it calculates the "served"
|
|
# repoview as an attempt to build branchcache for "visible". And then
|
|
# calculates "immutable" for calculating "served", recursively.
|
|
#
|
|
# Just use a shortcut path that construct the branchcache directly.
|
|
partial = repo._branchcaches.get(repo.filtername)
|
|
if partial is None:
|
|
partial = branchmap.branchcache()
|
|
partial.update(repo, None)
|
|
repo._branchcaches[repo.filtername] = partial
|
|
|
|
def _branchmapwrite(orig, self, repo):
|
|
if repo.ui.configbool('perftweaks', 'disablebranchcache2'):
|
|
# Since we don't read the branchcache, don't bother writing it.
|
|
result = None
|
|
else:
|
|
result = orig(self, repo)
|
|
if repo.ui.configbool('perftweaks', 'cachenoderevs', True):
|
|
revs = set()
|
|
nodemap = repo.changelog.nodemap
|
|
for branch, heads in self.iteritems():
|
|
revs.update(nodemap[n] for n in heads)
|
|
name = 'branchheads-%s' % repo.filtername
|
|
_savepreloadrevs(repo, name, revs)
|
|
return result
|
|
|
|
def _saveremotenames(orig, repo, remotepath, branches=None, bookmarks=None):
|
|
result = orig(repo, remotepath, branches=branches, bookmarks=bookmarks)
|
|
if repo.ui.configbool('perftweaks', 'cachenoderevs', True):
|
|
revs = set()
|
|
nodemap = repo.changelog.nodemap
|
|
if bookmarks:
|
|
for b, n in bookmarks.iteritems():
|
|
n = bin(n)
|
|
# remotenames can pass bookmarks that don't exist in the
|
|
# changelog yet. It filters them internally, but we need to as
|
|
# well.
|
|
if n in nodemap:
|
|
revs.add(nodemap[n])
|
|
if branches:
|
|
for branch, nodes in branches.iteritems():
|
|
for n in nodes:
|
|
if n in nodemap:
|
|
revs.add(nodemap[n])
|
|
|
|
name = 'remotenames-%s' % remotepath
|
|
_savepreloadrevs(repo, name, revs)
|
|
|
|
return result
|
|
|
|
def _editphases(orig, self, repo, tr, *args):
|
|
result = orig(self, repo, tr, *args)
|
|
def _write(fp):
|
|
revs = set()
|
|
nodemap = repo.changelog.nodemap
|
|
for phase, roots in enumerate(self.phaseroots):
|
|
for n in roots:
|
|
if n in nodemap:
|
|
revs.add(nodemap[n])
|
|
_savepreloadrevs(repo, 'phaseroots', revs)
|
|
|
|
# We don't actually use the transaction file generator. It's just a hook so
|
|
# we can write out at the same time as phases.
|
|
if tr:
|
|
tr.addfilegenerator('noderev-phaseroot', ('phaseroots-fake',), _write)
|
|
else:
|
|
# fp is not used anyway
|
|
_write(fp=None)
|
|
|
|
return result
|
|
|
|
def _cachefilename(name):
|
|
return 'noderevs/%s' % name
|
|
|
|
def _preloadrevs(repo):
|
|
# Preloading the node-rev map for likely to be used revs saves 100ms on
|
|
# every command. This is because normally to look up a node, hg has to scan
|
|
# the changelog.i file backwards, potentially reading through hundreds of
|
|
# thousands of entries and building a cache of them. Looking up a rev
|
|
# however is fast, because we know exactly what offset in the file to read.
|
|
# Reading old commits is common, since the branchmap needs to to convert old
|
|
# branch heads from node to rev.
|
|
|
|
if repo.ui.configbool('perftweaks', 'cachenoderevs', True):
|
|
repo = repo.unfiltered()
|
|
revs = set()
|
|
cachedir = repo.vfs.join('cache', 'noderevs')
|
|
try:
|
|
for cachefile in os.listdir(cachedir):
|
|
filename = _cachefilename(cachefile)
|
|
revs.update(int(r) for r in repo.cachevfs(filename))
|
|
|
|
getnode = repo.changelog.node
|
|
nodemap = repo.changelog.nodemap
|
|
for r in revs:
|
|
try:
|
|
node = getnode(r)
|
|
nodemap[node] = r
|
|
except (IndexError, ValueError):
|
|
# Rev no longer exists or rev is out of range
|
|
pass
|
|
except EnvironmentError:
|
|
# No permission to read? No big deal
|
|
pass
|
|
|
|
def _savepreloadrevs(repo, name, revs):
|
|
if repo.ui.configbool('perftweaks', 'cachenoderevs', True):
|
|
cachedir = repo.vfs.join('cache', 'noderevs')
|
|
try:
|
|
repo.vfs.mkdir(cachedir)
|
|
except OSError as ex:
|
|
# If we failed because the directory already exists,
|
|
# continue. In all other cases (e.g., no permission to create the
|
|
# directory), just silently return without doing anything.
|
|
if ex.errno != errno.EEXIST:
|
|
return
|
|
|
|
try:
|
|
filename = _cachefilename(name)
|
|
f = repo.cachevfs.open(filename, mode='w+', atomictemp=True)
|
|
f.write('\n'.join(str(r) for r in revs))
|
|
f.close()
|
|
except EnvironmentError:
|
|
# No permission to write? No big deal
|
|
pass
|
|
|
|
def _trackdirstatesizes(runcommand, lui, repo, *args):
|
|
res = runcommand(lui, repo, *args)
|
|
if repo is not None and repo.local():
|
|
dirstate = repo.dirstate
|
|
if 'treedirstate' in getattr(repo, 'requirements', set()):
|
|
# Treedirstate always has the dirstate size available.
|
|
lui.log('dirstate_size', '', dirstate_size=len(dirstate._map))
|
|
elif '_map' in vars(dirstate) and '_map' in vars(dirstate._map):
|
|
# For other dirstate types, access the inner map directly. If the
|
|
# _map attribute is missing on the map, the dirstate was not loaded.
|
|
lui.log('dirstate_size', '', dirstate_size=len(dirstate._map._map))
|
|
return res
|
|
|
|
def _tracksparseprofiles(runcommand, lui, repo, *args):
|
|
res = runcommand(lui, repo, *args)
|
|
if repo is not None and repo.local():
|
|
if util.safehasattr(repo, 'getactiveprofiles'):
|
|
profiles = repo.getactiveprofiles()
|
|
lui.log('sparse_profiles', '',
|
|
active_profiles=','.join(sorted(profiles)))
|
|
return res
|
|
|
|
def _trackupdatesize(orig, repo, node, branchmerge, *args, **kwargs):
|
|
if not branchmerge:
|
|
try:
|
|
distance = len(repo.revs('(%s %% .) + (. %% %s)', node, node))
|
|
repo.ui.log('update_size', '', update_distance=distance)
|
|
except Exception:
|
|
# error may happen like: RepoLookupError: unknown revision '-1'
|
|
pass
|
|
|
|
stats = orig(repo, node, branchmerge, *args, **kwargs)
|
|
repo.ui.log('update_size', '', update_filecount=sum(stats))
|
|
return stats
|
|
|
|
def _trackrebasesize(orig, self, destmap):
|
|
result = orig(self, destmap)
|
|
if not destmap:
|
|
return result
|
|
|
|
# The code assumes the rebase source is roughly a linear stack within a
|
|
# single feature branch, and there is only one destination. If that is not
|
|
# the case, the distance might be not accurate.
|
|
repo = self.repo
|
|
destrev = max(destmap.values())
|
|
rebaseset = destmap.keys()
|
|
commitcount = len(rebaseset)
|
|
distance = len(repo.revs('(%ld %% %d) + (%d %% %ld)',
|
|
rebaseset, destrev, destrev, rebaseset))
|
|
# 'distance' includes the commits being rebased, so subtract them to get the
|
|
# actual distance being traveled. Even though we log update_distance above,
|
|
# a rebase may run multiple updates, so that value might be not be accurate.
|
|
repo.ui.log('rebase_size', '', rebase_commitcount=commitcount,
|
|
rebase_distance=distance - commitcount)
|
|
|
|
return result
|