mirror of
https://github.com/facebook/sapling.git
synced 2024-10-09 16:31:02 +03:00
4ddfb704d3
Summary: Upstream Mercurial has refactored the manifest to get rid of the manifest class. This patch updates fastmanifest to work with the new class structure. In particular, it removes the hacky wrapping of 3 different manifest construction functions, with a single wrapping of manifestlog.get(), which makes the code simpler and more robust. Test Plan: Ran the tests Reviewers: #mercurial, rmcelroy Reviewed By: rmcelroy Subscribers: rmcelroy, stash, mjpieters Differential Revision: https://phabricator.intern.facebook.com/D4180874 Signature: t1:4180874:1479286125:bbd2a36aa86237d68036b9d7b0a580829219d869
384 lines
15 KiB
Python
384 lines
15 KiB
Python
# cachemanager.py
|
|
#
|
|
# Copyright 2016 Facebook, Inc.
|
|
#
|
|
# This software may be used and distributed according to the terms of the
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
import os
|
|
import errno
|
|
|
|
from mercurial import extensions, revlog, scmutil, util, error
|
|
|
|
import cfastmanifest
|
|
import concurrency
|
|
import constants
|
|
from metrics import metricscollector
|
|
from implementation import fastmanifestcache, CacheFullException
|
|
|
|
def _relevantremonamesrevs(repo):
|
|
revs = set()
|
|
remotenames = None
|
|
try:
|
|
remotenames = extensions.find('remotenames')
|
|
except KeyError: # remotenames not loaded
|
|
pass
|
|
if remotenames is not None:
|
|
# interesting remotenames to fetch
|
|
relevantnames = set(repo.ui.configlist("fastmanifest",
|
|
"relevantremotenames",
|
|
["master"]))
|
|
names = remotenames.readremotenames(repo)
|
|
for rev, kind, prefix, name in names:
|
|
if name in relevantnames and kind == "bookmarks":
|
|
revs.add(repo[rev].rev())
|
|
return revs
|
|
|
|
def fastmanifestcached(repo, subset, x):
|
|
"""Revset encompassing all revisions whose manifests are cached"""
|
|
# At the high level, we look at what is cached, and go from manifest nodes
|
|
# to changelog revs.
|
|
#
|
|
# 1) We look at all the cached manifest, from each of them we find the first
|
|
# changelog rev that introduced each cached manifest thanks to linkrevs.
|
|
# 2) We compute the minimum of those changelog revs. It is guaranteed that
|
|
# all the changelog revs whose manifest are cached are above that minimum
|
|
# rev in the changelog
|
|
# 3) From this minimum, we inspect all the more recent and visible changelog
|
|
# revisions and keep track of the one whose manifest is cached.
|
|
cache = fastmanifestcache.getinstance(repo.store.opener, repo.ui)
|
|
manifestsbinnodes = set([revlog.bin(u.replace("fast",""))
|
|
for u in cache.ondiskcache])
|
|
mfrevlog = repo.manifestlog._revlog
|
|
manifestslinkrevs = [mfrevlog.linkrev(mfrevlog.rev(k))
|
|
for k in manifestsbinnodes]
|
|
cachedrevs = set()
|
|
if manifestslinkrevs:
|
|
for u in repo.changelog.revs(min(manifestslinkrevs)):
|
|
revmanifestbin = repo.changelog.changelogrevision(u).manifest
|
|
if revmanifestbin in manifestsbinnodes:
|
|
cachedrevs.add(u)
|
|
return subset & cachedrevs
|
|
|
|
def fastmanifesttocache(repo, subset, x):
|
|
"""Revset of the interesting revisions to cache. This returns:
|
|
- Drafts
|
|
- Revisions with a bookmarks
|
|
- Revisions with some selected remote bookmarks (master, stable ...)
|
|
- Their parents (to make diff -c faster)
|
|
- TODO The base of potential rebase operations
|
|
- Filtering all of the above to only include recent changes
|
|
"""
|
|
|
|
# Add relevant remotenames to the list of interesting revs
|
|
revs = _relevantremonamesrevs(repo)
|
|
|
|
# Add all the other relevant revs
|
|
query = "(not public() & not hidden()) + bookmark()"
|
|
cutoff = repo.ui.configint("fastmanifest", "cachecutoffdays", 60)
|
|
if cutoff == -1: # no cutoff
|
|
datelimit = ""
|
|
else:
|
|
datelimit = "and date(-%d)" % cutoff
|
|
|
|
revs.update(scmutil.revrange(repo,["(%s + parents(%s)) %s"
|
|
% (query, query, datelimit)]))
|
|
|
|
metricscollector.get().recordsample("revsetsize", size=len(revs))
|
|
return subset & revs
|
|
|
|
GB = 1024**3
|
|
MB = 1024**2
|
|
|
|
class _systemawarecachelimit(object):
|
|
"""A limit that will be tighter as the free disk space reduces"""
|
|
def parseconfig(self, ui):
|
|
configkeys = set([
|
|
'lowgrowthslope',
|
|
'lowgrowththresholdgb',
|
|
'maxcachesizegb',
|
|
'highgrowthslope',
|
|
])
|
|
configs = {}
|
|
for configkey in configkeys:
|
|
strconfig = ui.config("fastmanifest", configkey)
|
|
if strconfig is None:
|
|
continue
|
|
try:
|
|
configs[configkey] = float(strconfig)
|
|
except ValueError:
|
|
# Keep default value and print a warning when config is invalid
|
|
msg = ("Invalid config for fastmanifest.%s, expected a number")
|
|
ui.warn((msg % strconfig))
|
|
return configs
|
|
|
|
def __init__(self, repo=None, opener=None, ui=None):
|
|
# Probe the system root partition to know what is available
|
|
try:
|
|
if repo is None and (opener is None or ui is None):
|
|
raise error.Abort("Need to specify repo or (opener and ui)")
|
|
if repo is not None:
|
|
st = os.statvfs(repo.root)
|
|
else:
|
|
st = os.statvfs(opener.join(None))
|
|
except (OSError, IOError) as ex:
|
|
if ex.errno == errno.EACCES:
|
|
self.free = 0
|
|
self.total = 0
|
|
return
|
|
raise
|
|
self.free = st.f_bavail * st.f_frsize
|
|
self.total = st.f_blocks * st.f_frsize
|
|
# Read parameters from config
|
|
if repo is not None:
|
|
self.config = self.parseconfig(repo.ui)
|
|
else:
|
|
self.config = self.parseconfig(ui)
|
|
|
|
def bytes(self):
|
|
return _systemawarecachelimit.cacheallocation(self.free, **self.config)
|
|
|
|
@staticmethod
|
|
def cacheallocation(
|
|
freespace,
|
|
lowgrowththresholdgb=constants.DEFAULT_LOWGROWTH_TRESHOLDGB,
|
|
lowgrowthslope=constants.DEFAULT_LOWGROWTH_SLOPE,
|
|
maxcachesizegb=constants.DEFAULT_MAXCACHESIZEGB,
|
|
highgrowthslope=constants.DEFAULT_HIGHGROWTHSLOPE):
|
|
"""Given the free space available in bytes, return the size of the cache
|
|
|
|
When disk space is limited (less than lowgrowththreshold), we increase
|
|
the cache size linearly: lowgrowthslope * freespace. Over
|
|
lowgrowththreshold, we increase the cache size linearly but faster:
|
|
highgrowthslope * freespace until we hit maxcachesize.
|
|
|
|
These values are configurable, default values are:
|
|
|
|
[fastmanifest]
|
|
lowgrowththresholdgb = 20
|
|
lowgrowthslope = 0.1
|
|
highgrowthslope = 0.2
|
|
maxcachesizegb = 6
|
|
|
|
^ Cache Size
|
|
|
|
|
| /------------------- <- maxcachesize
|
|
| |
|
|
| / <- slope is highgrowthslope
|
|
| | <- lowgrowththreshold
|
|
| /
|
|
| / <- slope is lowgrowslope
|
|
|/
|
|
-------------------------> Free Space
|
|
"""
|
|
|
|
if freespace < lowgrowththresholdgb * GB:
|
|
return min(maxcachesizegb * GB, lowgrowthslope * freespace)
|
|
else:
|
|
return min(maxcachesizegb * GB, highgrowthslope * freespace)
|
|
|
|
def cachemanifestpruneall(ui, repo):
|
|
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
|
|
cache.pruneall()
|
|
|
|
def cachemanifestlist(ui, repo):
|
|
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
|
|
total, numentries = cache.ondiskcache.totalsize(silent=False)
|
|
ui.status(("cache size is: %s\n" % util.bytecount(total)))
|
|
ui.status(("number of entries is: %s\n" % numentries))
|
|
|
|
if ui.debug:
|
|
revs = set(repo.revs("fastmanifestcached()"))
|
|
import collections
|
|
revstoman = collections.defaultdict(list)
|
|
for r in revs:
|
|
mannode = revlog.hex(repo.changelog.changelogrevision(r).manifest)
|
|
revstoman[mannode].append(str(r))
|
|
if revs:
|
|
ui.status(("Most relevant cache entries appear first\n"))
|
|
ui.status(("="*80))
|
|
ui.status(("\nmanifest node |revs\n"))
|
|
for h in cache.ondiskcache:
|
|
l = h.replace("fast","")
|
|
ui.status("%s|%s\n" % (l, ",".join(revstoman.get(l,[]))))
|
|
|
|
def cachemanifestfillandtrim(ui, repo, revset):
|
|
"""Cache the manifests described by `revset`. This priming is subject to
|
|
limits imposed by the cache, and thus not all the entries may be written.
|
|
"""
|
|
try:
|
|
with concurrency.looselock(repo.vfs,
|
|
"fastmanifest",
|
|
constants.WORKER_SPAWN_LOCK_STEAL_TIMEOUT):
|
|
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
|
|
|
|
computedrevs = scmutil.revrange(repo, revset)
|
|
sortedrevs = sorted(computedrevs, key=lambda x:-x)
|
|
repo.ui.log("fastmanifest", "FM: trying to cache %s\n"
|
|
% str(sortedrevs))
|
|
|
|
if len(sortedrevs) == 0:
|
|
# normally, we prune as we make space for new revisions to add
|
|
# to the cache. however, if we're not adding any new elements,
|
|
# we'll never check the disk cache size. this is an explicit
|
|
# check for that particular scenario.
|
|
before = cache.ondiskcache.items()
|
|
cache.prune()
|
|
after = cache.ondiskcache.items()
|
|
diff = set(after) - set(before)
|
|
if diff:
|
|
ui.log("fastmanifest", "FM: removed entries %s\n" %
|
|
str(diff))
|
|
else:
|
|
ui.log("fastmanifest", "FM: no entries removed\n")
|
|
else:
|
|
revstomannodes = {}
|
|
mannodesprocessed = set()
|
|
for rev in sortedrevs:
|
|
mannode = revlog.hex(
|
|
repo.changelog.changelogrevision(rev).manifest)
|
|
revstomannodes[rev] = mannode
|
|
mannodesprocessed.add(mannode)
|
|
|
|
if mannode in cache.ondiskcache:
|
|
ui.debug("[FM] skipped %s, already cached "
|
|
"(fast path)\n" % (mannode,))
|
|
repo.ui.log("fastmanifest",
|
|
"FM: skip(rev, man) %s->%s\n" %
|
|
(rev, mannode))
|
|
|
|
# Account for the fact that we access this manifest
|
|
cache.ondiskcache.touch(mannode)
|
|
continue
|
|
manifest = repo[rev].manifest()
|
|
fastmanifest = cfastmanifest.fastmanifest(manifest.text())
|
|
|
|
cache.makeroomfor(fastmanifest.bytes(), mannodesprocessed)
|
|
|
|
try:
|
|
cache[mannode] = fastmanifest
|
|
repo.ui.log("fastmanifest", "FM: cached(rev,man) "
|
|
"%s->%s\n" %
|
|
(rev, mannode))
|
|
except CacheFullException:
|
|
repo.ui.log("fastmanifest", "FM: overflow\n")
|
|
break
|
|
|
|
# Make the least relevant entries have an artificially older
|
|
# mtime than the more relevant ones. We use a resolution of 2
|
|
# for time to work accross all platforms and ensure that the
|
|
# order is marked.
|
|
#
|
|
# Note that we use sortedrevs and not revs because here we
|
|
# don't care about the shuffling, we just want the most relevant
|
|
# revisions to have more recent mtime.
|
|
mtimemultiplier = 2
|
|
for offset, rev in enumerate(sortedrevs):
|
|
if rev in revstomannodes:
|
|
hexnode = revstomannodes[rev]
|
|
cache.ondiskcache.touch(hexnode,
|
|
delay=offset * mtimemultiplier)
|
|
else:
|
|
metricscollector.get().recordsample("cacheoverflow",
|
|
hit=True)
|
|
pass # We didn't have enough space for that rev
|
|
except error.LockHeld:
|
|
return
|
|
except (OSError, IOError) as ex:
|
|
if ex.errno == errno.EACCES:
|
|
# permission issue
|
|
ui.warn(("warning: not using fastmanifest\n"))
|
|
ui.warn(("(make sure that .hg/store is writeable)\n"))
|
|
return
|
|
raise
|
|
|
|
total, numentries = cache.ondiskcache.totalsize()
|
|
if isinstance(cache.limit, _systemawarecachelimit):
|
|
free = cache.limit.free / 1024**2
|
|
else:
|
|
free = -1
|
|
metricscollector.get().recordsample("ondiskcachestats",
|
|
bytes=total,
|
|
numentries=numentries,
|
|
limit=(cache.limit.bytes() / 1024**2),
|
|
freespace=free)
|
|
|
|
class cacher(object):
|
|
@staticmethod
|
|
def cachemanifest(repo):
|
|
revset = ["fastmanifesttocache()"]
|
|
cachemanifestfillandtrim(repo.ui, repo, revset)
|
|
|
|
class triggers(object):
|
|
repos_to_update = set()
|
|
|
|
|
|
@staticmethod
|
|
def runcommandtrigger(orig, *args, **kwargs):
|
|
result = orig(*args, **kwargs)
|
|
|
|
for repo in triggers.repos_to_update:
|
|
repo.ui.log("fastmanifest", "FM: triggering caching for %s\n"
|
|
% repo.root)
|
|
bg = repo.ui.configbool("fastmanifest",
|
|
"cacheonchangebackground",
|
|
True)
|
|
|
|
if bg:
|
|
silent_worker = repo.ui.configbool(
|
|
"fastmanifest", "silentworker", True)
|
|
|
|
# see if the user wants us to invoke a specific instance of
|
|
# mercurial.
|
|
workerexe = os.environ.get("SCM_WORKER_EXE")
|
|
|
|
cmd = util.hgcmd()[:]
|
|
if workerexe is not None:
|
|
cmd[0] = workerexe
|
|
|
|
cmd.extend(["--repository",
|
|
repo.root,
|
|
"cachemanifest"])
|
|
|
|
repo.ui.log("fastmanifest", "FM: running command %s\n" % cmd)
|
|
concurrency.runshellcommand(cmd, silent_worker=silent_worker)
|
|
else:
|
|
cacher.cachemanifest(repo)
|
|
|
|
return result
|
|
|
|
@staticmethod
|
|
def onbookmarkchange(orig, self, *args, **kwargs):
|
|
repo = self._repo
|
|
ui = repo.ui
|
|
|
|
if ui.configbool("fastmanifest", "cacheonchange", False):
|
|
triggers.repos_to_update.add(repo)
|
|
metricscollector.get().recordsample("trigger", source="bookmark")
|
|
ui.log("fastmanifest", "FM: caching trigger: bookmark\n")
|
|
|
|
return orig(self, *args, **kwargs)
|
|
|
|
@staticmethod
|
|
def oncommit(orig, self, *args, **kwargs):
|
|
repo = self
|
|
ui = repo.ui
|
|
|
|
if ui.configbool("fastmanifest", "cacheonchange", False):
|
|
triggers.repos_to_update.add(repo)
|
|
metricscollector.get().recordsample("trigger", source="commit")
|
|
ui.log("fastmanifest", "FM: caching trigger: commit\n")
|
|
|
|
return orig(self, *args, **kwargs)
|
|
|
|
@staticmethod
|
|
def onremotenameschange(orig, repo, *args, **kwargs):
|
|
ui = repo.ui
|
|
|
|
if ui.configbool("fastmanifest", "cacheonchange", False):
|
|
triggers.repos_to_update.add(repo)
|
|
metricscollector.get().recordsample("trigger", source="remotenames")
|
|
ui.log("fastmanifest", "FM: caching trigger: remotenames\n")
|
|
|
|
return orig(repo, *args, **kwargs)
|