sapling/manifestdiskcache.py
Tony Tung c3ea6021e1 [manifestdiskcache] set up manifestdiskcache without subclassing repo
Summary: It seems subclassing repo can cause problems when a repo is created before the extension is loaded.  This circumvents the issue by directly modifying the data structures in the repo object as needed.

Test Plan: passed test-manifestdiskcache.t

Reviewers: #sourcecontrol, mjpieters

Reviewed By: mjpieters

Subscribers: mitrandir, mjpieters

Differential Revision: https://phabricator.fb.com/D3208899

Tasks: 10836392

Signature: t1:3208899:1461336232:617f8b9258f23ba0b34627785dc2cb4bddcee313
2016-04-22 09:51:07 -07:00

349 lines
12 KiB
Python

# manifestdiskcache.py - manifest disk cache for mercurial
#
# Copyright 2012 Facebook
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
'''Cache manifests on disk to speed up access.
This extension intercepts reads and writes of manifests to cache them on disk.
Enable by setting the config variable manifestdiskcache.enabled to True.
On writes, we spawn a second process (to avoid penalizing interactive use) to
check if we should prune the cache. The pruning is guided by several
configuration variables:
manifestdiskcache.pinned-revsets: revsets to pin in the cache.
manifestdiskcache.cache-size: the upper limit for the size of the cache.
manifestdiskcache.runs-between-prunes: the approximate number of writes that
will elapse before we prune.
manifestdiskcache.seconds-between-prunes: the number of seconds since the last
prune that can elapse before we prune.
Because this is a cache, exceptions are generally suppresed. If the
configuration variable manifestdiskcache.logging is set to True, exceptions will
be written to standard error, but will still be suppressed.
err
'''
from mercurial import bookmarks, changegroup, cmdutil, error, extensions
from mercurial import localrepo, manifest, revlog, util
from mercurial.node import bin, hex
from mercurial.i18n import _
import collections
import os
import random
import subprocess
import sys
import time
import traceback
from extutil import replaceclass
CACHE_SUBDIR = 'manifestdiskcache'
CONFIG_KEY = 'manifestdiskcache'
REPO_ROOT_KEY = 'manifestdiskcachee.repo_root'
HEX_SHA_SIZE_BYTES = 40
testedwith = 'internal'
def extsetup(ui):
global logging
logging = ui.configbool(CONFIG_KEY, 'logging', False)
cmdtable = {}
command = cmdutil.command(cmdtable)
@command(
'prunemanifestdiskcache', [],
_('hg prunemanifestdiskcache'))
def prunemanifestdiskcache(ui, repo):
masterrevset = _masterrevset(ui, repo)
# retrieve the options.
pinnedrevsets = ui.config(CONFIG_KEY,
'pinned-revsets',
"{0} or (draft() and date(-3))".format(
masterrevset))
cachesizelimit = ui.configbytes(CONFIG_KEY, 'cache-size', '5g')
runsbetween = ui.configint(CONFIG_KEY, 'runs-between-prunes', 100)
secondsbetween = ui.configint(CONFIG_KEY, 'seconds-between-prunes', 86400)
# validate the arguments
if runsbetween < 1:
raise error.Abort("runs-between-prunes should be >= 1")
if secondsbetween < 0:
raise error.Abort("seconds-between-prunes should be >= 0")
store = repo.store
opener = store.opener
base = store.opener.join(None)
# decide whether we run.
markerpath = os.path.join(base, CACHE_SUBDIR, '.marker')
try:
stat = os.stat(markerpath)
except OSError:
# create the file.
with open(markerpath, 'w'):
pass
else:
now = time.time()
delta = now - stat.st_mtime
intercept = (1.0 / runsbetween)
odds = intercept + (((1 - intercept) * delta) / secondsbetween)
if odds < random.random():
# no pruning.
ui.note(_("no pruning needed at this time."))
return
# update the file timestamp.
os.utime(markerpath, None)
# fined all the pinned revs.
changelog = repo.changelog
revs = set()
if pinnedrevsets:
try:
revs = repo.revs(pinnedrevsets)
except error.ParseError:
error.Abort("Cannot parse {0}.pinned-revsets.".format(CONFIG_KEY))
pinnednodes = set(hex(changelog.read(changelog.node(rev))[0])
for rev in revs)
# enumerate all the existing cache entries, ordered by time ascending.
entries = []
for dirpath, dirs, files in opener.walk(CACHE_SUBDIR):
for fname in files:
# don't remove the marker.
if fname == '.marker':
continue
path = os.path.join(base, dirpath, fname)
if len(fname) > HEX_SHA_SIZE_BYTES:
# this is probably a temp file. trash it, but do it directly,
# because opener.unlink will try to case-escape.
try:
os.unlink(path)
except Exception:
pass
continue
if fname in pinnednodes:
# pinned rev, move on.
continue
try:
stat = os.stat(path)
except OSError:
# file presumably does not exist.
continue
entries.append(
(stat.st_atime, stat.st_size, path))
entries.sort(reverse=True)
ui.debug("pid: {0}\ncache entries: {1}\n".format(
os.getpid(),
"\n".join(["{0}".format(entry)
for entry in entries])))
# accumulate up to cachesize, then remove the remainder.
accumsize = 0
for atime, size, path in entries:
accumsize += size
if accumsize > cachesizelimit:
# remove the file, but once again, do it directly because
# opener.unlink will try to case-escape.
try:
os.unlink(path)
except Exception:
pass
@replaceclass(changegroup, 'cg1unpacker')
class cg1unpackerwithdc(changegroup.cg1unpacker):
def apply(self, repo, *args, **kwargs):
# disable manifest caching.
repo.manifest.markbatchoperationstart()
try:
# call the original function
return super(cg1unpackerwithdc, self).apply(repo, *args, **kwargs)
finally:
# re-enable manifest caching.
repo.manifest.markbatchoperationend()
@replaceclass(manifest, 'manifest')
class manifestwithdc(manifest.manifest):
def __init__(self, opener, dir='', dirlogcache=None):
super(manifestwithdc, self).__init__(opener, dir, dirlogcache)
self.manifestdiskcacheenabled = False
opts = getattr(opener, 'options', None)
if opts is not None:
self.manifestdiskcacheenabled = opts.get(
CONFIG_KEY, False)
if self.manifestdiskcacheenabled:
# this logic is copied from the constructor of manifest.__init__
if self._dir:
self.diskcachedir = "meta/" + self._dir + CACHE_SUBDIR
else:
self.diskcachedir = CACHE_SUBDIR
self.inbatchoperation = False
self.repo_root = opts[REPO_ROOT_KEY]
def markbatchoperationstart(self):
self.inbatchoperation = True
def markbatchoperationend(self):
self.inbatchoperation = False
def revision(self, nodeorrev, *args, **kwargs):
global logging
if self.manifestdiskcacheenabled:
expectedexception = False
try:
if isinstance(nodeorrev, int):
rev = nodeorrev
node = self.node(nodeorrev)
else:
rev = self.rev(nodeorrev)
node = nodeorrev
hexnode = hex(node)
subpath = os.path.join(self.diskcachedir,
hexnode[0:2], hexnode[2:4], hexnode)
result = None
try:
with self.opener(subpath, "r") as fh:
result = fh.read()
except IOError:
# this is an expected exception, so no need to sound the
# alarms.
expectedexception = True
raise
if result:
# verify that the output passes _checkhash(..)
result = self._checkhash(result, node, rev)
return result
except Exception:
# it's a cache. suppress the exception, disable caching
# going forward, and then report if logging is enabled.
if logging and not expectedexception:
sys.stderr.write("Encountered exception in extension "
"manifestdiskcache: {0}\n".format(
traceback.format_exc()))
result = super(manifestwithdc, self).revision(nodeorrev,
*args, **kwargs)
if self.manifestdiskcacheenabled:
self._writetomanifestcache(hexnode, result, logging)
self._prune_cache()
return result
def _addrevision(self, node, text, *args, **kwargs):
global logging
node = super(manifestwithdc, self)._addrevision(
node, text, *args, **kwargs)
if self.manifestdiskcacheenabled and not self.inbatchoperation:
hexnode = hex(node)
self._writetomanifestcache(hexnode, str(text), logging)
self._prune_cache()
return node
def _writetomanifestcache(self, hexnode, text, loggingenabled):
try:
base = self.opener.join(None)
dirsubpath = os.path.join(self.diskcachedir,
hexnode[0:2],
hexnode[2:4])
entrysubpath = os.path.join(dirsubpath, hexnode)
try:
os.makedirs(os.path.join(base, dirsubpath))
except OSError:
pass
fh = util.atomictempfile(
os.path.join(base, entrysubpath),
mode="w+")
try:
fh.write(text)
finally:
fh.close()
except Exception:
# it's a cache. suppress the exception, disable caching
# going forward, and then report if logging is enabled.
if loggingenabled:
sys.stderr.write("Encountered exception in extension "
"manifestdiskcache: {0}\n".format(
traceback.format_exc()))
def _prune_cache(self):
# spawn a subprocess (but don't wait for it) to prune the cache. this
# may result in us (the main process) becoming a zombie, because we
# could finish execution before the subprocess finishes. if this
# becomes an issue, we can have the spawned subprocess execute the
# double-fork daemonization.
cmd = util.hgcmd()[:]
cmd.extend(["--repository",
self.repo_root,
"prunemanifestdiskcache"])
subprocess.Popen(cmd, close_fds=True)
def reposetup(ui, repo):
if isinstance(repo, localrepo.localrepository):
repo.svfs.options[CONFIG_KEY] = repo.ui.configbool(
CONFIG_KEY, 'enabled', False)
repo.svfs.options[REPO_ROOT_KEY] = repo.root
def _reposnames(ui):
# '' is local repo. This also defines an order precedence for master.
repos = ui.configlist(CONFIG_KEY, 'repos', ['', 'remote/', 'default/'])
names = ui.configlist(CONFIG_KEY, 'names', ['@', 'master', 'stable'])
for repo in repos:
for name in names:
yield repo + name
def _masterrevset(ui, repo):
"""
Try to find the name of ``master`` -- usually a bookmark.
Defaults to 'tip' if no suitable local or remote bookmark is found.
"""
masterstring = ui.config(CONFIG_KEY, 'master')
if masterstring:
return masterstring
names = set(bookmarks.bmstore(repo).keys())
if util.safehasattr(repo, 'names') and 'remotebookmarks' in repo.names:
names.update(set(repo.names['remotebookmarks'].listnames(repo)))
for name in _reposnames(ui):
if name in names:
return name
return 'tip'