sapling/fastmanifest/cachemanager.py
Durham Goode 4ddfb704d3 fastmanifest: update to work with manifestlog
Summary:
Upstream Mercurial has refactored the manifest to get rid of the manifest class.
This patch updates fastmanifest to work with the new class structure. In
particular, it removes the hacky wrapping of 3 different manifest construction
functions, with a single wrapping of manifestlog.get(), which makes the code
simpler and more robust.

Test Plan: Ran the tests

Reviewers: #mercurial, rmcelroy

Reviewed By: rmcelroy

Subscribers: rmcelroy, stash, mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D4180874

Signature: t1:4180874:1479286125:bbd2a36aa86237d68036b9d7b0a580829219d869
2016-11-16 12:11:06 -08:00

384 lines
15 KiB
Python

# cachemanager.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import os
import errno
from mercurial import extensions, revlog, scmutil, util, error
import cfastmanifest
import concurrency
import constants
from metrics import metricscollector
from implementation import fastmanifestcache, CacheFullException
def _relevantremonamesrevs(repo):
revs = set()
remotenames = None
try:
remotenames = extensions.find('remotenames')
except KeyError: # remotenames not loaded
pass
if remotenames is not None:
# interesting remotenames to fetch
relevantnames = set(repo.ui.configlist("fastmanifest",
"relevantremotenames",
["master"]))
names = remotenames.readremotenames(repo)
for rev, kind, prefix, name in names:
if name in relevantnames and kind == "bookmarks":
revs.add(repo[rev].rev())
return revs
def fastmanifestcached(repo, subset, x):
"""Revset encompassing all revisions whose manifests are cached"""
# At the high level, we look at what is cached, and go from manifest nodes
# to changelog revs.
#
# 1) We look at all the cached manifest, from each of them we find the first
# changelog rev that introduced each cached manifest thanks to linkrevs.
# 2) We compute the minimum of those changelog revs. It is guaranteed that
# all the changelog revs whose manifest are cached are above that minimum
# rev in the changelog
# 3) From this minimum, we inspect all the more recent and visible changelog
# revisions and keep track of the one whose manifest is cached.
cache = fastmanifestcache.getinstance(repo.store.opener, repo.ui)
manifestsbinnodes = set([revlog.bin(u.replace("fast",""))
for u in cache.ondiskcache])
mfrevlog = repo.manifestlog._revlog
manifestslinkrevs = [mfrevlog.linkrev(mfrevlog.rev(k))
for k in manifestsbinnodes]
cachedrevs = set()
if manifestslinkrevs:
for u in repo.changelog.revs(min(manifestslinkrevs)):
revmanifestbin = repo.changelog.changelogrevision(u).manifest
if revmanifestbin in manifestsbinnodes:
cachedrevs.add(u)
return subset & cachedrevs
def fastmanifesttocache(repo, subset, x):
"""Revset of the interesting revisions to cache. This returns:
- Drafts
- Revisions with a bookmarks
- Revisions with some selected remote bookmarks (master, stable ...)
- Their parents (to make diff -c faster)
- TODO The base of potential rebase operations
- Filtering all of the above to only include recent changes
"""
# Add relevant remotenames to the list of interesting revs
revs = _relevantremonamesrevs(repo)
# Add all the other relevant revs
query = "(not public() & not hidden()) + bookmark()"
cutoff = repo.ui.configint("fastmanifest", "cachecutoffdays", 60)
if cutoff == -1: # no cutoff
datelimit = ""
else:
datelimit = "and date(-%d)" % cutoff
revs.update(scmutil.revrange(repo,["(%s + parents(%s)) %s"
% (query, query, datelimit)]))
metricscollector.get().recordsample("revsetsize", size=len(revs))
return subset & revs
GB = 1024**3
MB = 1024**2
class _systemawarecachelimit(object):
"""A limit that will be tighter as the free disk space reduces"""
def parseconfig(self, ui):
configkeys = set([
'lowgrowthslope',
'lowgrowththresholdgb',
'maxcachesizegb',
'highgrowthslope',
])
configs = {}
for configkey in configkeys:
strconfig = ui.config("fastmanifest", configkey)
if strconfig is None:
continue
try:
configs[configkey] = float(strconfig)
except ValueError:
# Keep default value and print a warning when config is invalid
msg = ("Invalid config for fastmanifest.%s, expected a number")
ui.warn((msg % strconfig))
return configs
def __init__(self, repo=None, opener=None, ui=None):
# Probe the system root partition to know what is available
try:
if repo is None and (opener is None or ui is None):
raise error.Abort("Need to specify repo or (opener and ui)")
if repo is not None:
st = os.statvfs(repo.root)
else:
st = os.statvfs(opener.join(None))
except (OSError, IOError) as ex:
if ex.errno == errno.EACCES:
self.free = 0
self.total = 0
return
raise
self.free = st.f_bavail * st.f_frsize
self.total = st.f_blocks * st.f_frsize
# Read parameters from config
if repo is not None:
self.config = self.parseconfig(repo.ui)
else:
self.config = self.parseconfig(ui)
def bytes(self):
return _systemawarecachelimit.cacheallocation(self.free, **self.config)
@staticmethod
def cacheallocation(
freespace,
lowgrowththresholdgb=constants.DEFAULT_LOWGROWTH_TRESHOLDGB,
lowgrowthslope=constants.DEFAULT_LOWGROWTH_SLOPE,
maxcachesizegb=constants.DEFAULT_MAXCACHESIZEGB,
highgrowthslope=constants.DEFAULT_HIGHGROWTHSLOPE):
"""Given the free space available in bytes, return the size of the cache
When disk space is limited (less than lowgrowththreshold), we increase
the cache size linearly: lowgrowthslope * freespace. Over
lowgrowththreshold, we increase the cache size linearly but faster:
highgrowthslope * freespace until we hit maxcachesize.
These values are configurable, default values are:
[fastmanifest]
lowgrowththresholdgb = 20
lowgrowthslope = 0.1
highgrowthslope = 0.2
maxcachesizegb = 6
^ Cache Size
|
| /------------------- <- maxcachesize
| |
| / <- slope is highgrowthslope
| | <- lowgrowththreshold
| /
| / <- slope is lowgrowslope
|/
-------------------------> Free Space
"""
if freespace < lowgrowththresholdgb * GB:
return min(maxcachesizegb * GB, lowgrowthslope * freespace)
else:
return min(maxcachesizegb * GB, highgrowthslope * freespace)
def cachemanifestpruneall(ui, repo):
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
cache.pruneall()
def cachemanifestlist(ui, repo):
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
total, numentries = cache.ondiskcache.totalsize(silent=False)
ui.status(("cache size is: %s\n" % util.bytecount(total)))
ui.status(("number of entries is: %s\n" % numentries))
if ui.debug:
revs = set(repo.revs("fastmanifestcached()"))
import collections
revstoman = collections.defaultdict(list)
for r in revs:
mannode = revlog.hex(repo.changelog.changelogrevision(r).manifest)
revstoman[mannode].append(str(r))
if revs:
ui.status(("Most relevant cache entries appear first\n"))
ui.status(("="*80))
ui.status(("\nmanifest node |revs\n"))
for h in cache.ondiskcache:
l = h.replace("fast","")
ui.status("%s|%s\n" % (l, ",".join(revstoman.get(l,[]))))
def cachemanifestfillandtrim(ui, repo, revset):
"""Cache the manifests described by `revset`. This priming is subject to
limits imposed by the cache, and thus not all the entries may be written.
"""
try:
with concurrency.looselock(repo.vfs,
"fastmanifest",
constants.WORKER_SPAWN_LOCK_STEAL_TIMEOUT):
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
computedrevs = scmutil.revrange(repo, revset)
sortedrevs = sorted(computedrevs, key=lambda x:-x)
repo.ui.log("fastmanifest", "FM: trying to cache %s\n"
% str(sortedrevs))
if len(sortedrevs) == 0:
# normally, we prune as we make space for new revisions to add
# to the cache. however, if we're not adding any new elements,
# we'll never check the disk cache size. this is an explicit
# check for that particular scenario.
before = cache.ondiskcache.items()
cache.prune()
after = cache.ondiskcache.items()
diff = set(after) - set(before)
if diff:
ui.log("fastmanifest", "FM: removed entries %s\n" %
str(diff))
else:
ui.log("fastmanifest", "FM: no entries removed\n")
else:
revstomannodes = {}
mannodesprocessed = set()
for rev in sortedrevs:
mannode = revlog.hex(
repo.changelog.changelogrevision(rev).manifest)
revstomannodes[rev] = mannode
mannodesprocessed.add(mannode)
if mannode in cache.ondiskcache:
ui.debug("[FM] skipped %s, already cached "
"(fast path)\n" % (mannode,))
repo.ui.log("fastmanifest",
"FM: skip(rev, man) %s->%s\n" %
(rev, mannode))
# Account for the fact that we access this manifest
cache.ondiskcache.touch(mannode)
continue
manifest = repo[rev].manifest()
fastmanifest = cfastmanifest.fastmanifest(manifest.text())
cache.makeroomfor(fastmanifest.bytes(), mannodesprocessed)
try:
cache[mannode] = fastmanifest
repo.ui.log("fastmanifest", "FM: cached(rev,man) "
"%s->%s\n" %
(rev, mannode))
except CacheFullException:
repo.ui.log("fastmanifest", "FM: overflow\n")
break
# Make the least relevant entries have an artificially older
# mtime than the more relevant ones. We use a resolution of 2
# for time to work accross all platforms and ensure that the
# order is marked.
#
# Note that we use sortedrevs and not revs because here we
# don't care about the shuffling, we just want the most relevant
# revisions to have more recent mtime.
mtimemultiplier = 2
for offset, rev in enumerate(sortedrevs):
if rev in revstomannodes:
hexnode = revstomannodes[rev]
cache.ondiskcache.touch(hexnode,
delay=offset * mtimemultiplier)
else:
metricscollector.get().recordsample("cacheoverflow",
hit=True)
pass # We didn't have enough space for that rev
except error.LockHeld:
return
except (OSError, IOError) as ex:
if ex.errno == errno.EACCES:
# permission issue
ui.warn(("warning: not using fastmanifest\n"))
ui.warn(("(make sure that .hg/store is writeable)\n"))
return
raise
total, numentries = cache.ondiskcache.totalsize()
if isinstance(cache.limit, _systemawarecachelimit):
free = cache.limit.free / 1024**2
else:
free = -1
metricscollector.get().recordsample("ondiskcachestats",
bytes=total,
numentries=numentries,
limit=(cache.limit.bytes() / 1024**2),
freespace=free)
class cacher(object):
@staticmethod
def cachemanifest(repo):
revset = ["fastmanifesttocache()"]
cachemanifestfillandtrim(repo.ui, repo, revset)
class triggers(object):
repos_to_update = set()
@staticmethod
def runcommandtrigger(orig, *args, **kwargs):
result = orig(*args, **kwargs)
for repo in triggers.repos_to_update:
repo.ui.log("fastmanifest", "FM: triggering caching for %s\n"
% repo.root)
bg = repo.ui.configbool("fastmanifest",
"cacheonchangebackground",
True)
if bg:
silent_worker = repo.ui.configbool(
"fastmanifest", "silentworker", True)
# see if the user wants us to invoke a specific instance of
# mercurial.
workerexe = os.environ.get("SCM_WORKER_EXE")
cmd = util.hgcmd()[:]
if workerexe is not None:
cmd[0] = workerexe
cmd.extend(["--repository",
repo.root,
"cachemanifest"])
repo.ui.log("fastmanifest", "FM: running command %s\n" % cmd)
concurrency.runshellcommand(cmd, silent_worker=silent_worker)
else:
cacher.cachemanifest(repo)
return result
@staticmethod
def onbookmarkchange(orig, self, *args, **kwargs):
repo = self._repo
ui = repo.ui
if ui.configbool("fastmanifest", "cacheonchange", False):
triggers.repos_to_update.add(repo)
metricscollector.get().recordsample("trigger", source="bookmark")
ui.log("fastmanifest", "FM: caching trigger: bookmark\n")
return orig(self, *args, **kwargs)
@staticmethod
def oncommit(orig, self, *args, **kwargs):
repo = self
ui = repo.ui
if ui.configbool("fastmanifest", "cacheonchange", False):
triggers.repos_to_update.add(repo)
metricscollector.get().recordsample("trigger", source="commit")
ui.log("fastmanifest", "FM: caching trigger: commit\n")
return orig(self, *args, **kwargs)
@staticmethod
def onremotenameschange(orig, repo, *args, **kwargs):
ui = repo.ui
if ui.configbool("fastmanifest", "cacheonchange", False):
triggers.repos_to_update.add(repo)
metricscollector.get().recordsample("trigger", source="remotenames")
ui.log("fastmanifest", "FM: caching trigger: remotenames\n")
return orig(repo, *args, **kwargs)