sapling/manifestdiskcache.py
Tony Tung 560631448d extract replaceclass out of manifestdiskcache
Summary: It's an useful function.  Plan to use it in another extension.

Test Plan: python ../../hg-crew/tests/run-tests.py --with-hg ../../hg-crew/hg test-manifestdiskcache.t

Reviewers: #sourcecontrol, lcharignon

Reviewed By: lcharignon

Subscribers: lcharignon, mitrandir

Differential Revision: https://phabricator.fb.com/D2896753

Signature: t1:2896753:1454542605:c660ee3e108b497c4a823bd732f98ebf5e147e02
2016-02-03 16:22:34 -08:00

346 lines
12 KiB
Python

# manifestdiskcache.py - manifest disk cache for mercurial
#
# Copyright 2012 Facebook
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
'''Cache manifests on disk to speed up access.
This extension intercepts reads and writes of manifests to cache them on disk.
Enable by setting the config variable manifestdiskcache.enabled to True.
On writes, we spawn a second process (to avoid penalizing interactive use) to
check if we should prune the cache. The pruning is guided by several
configuration variables:
manifestdiskcache.pinned-revsets: revsets to pin in the cache.
manifestdiskcache.cache-size: the upper limit for the size of the cache.
manifestdiskcache.runs-between-prunes: the approximate number of writes that
will elapse before we prune.
manifestdiskcache.seconds-between-prunes: the number of seconds since the last
prune that can elapse before we prune.
Because this is a cache, exceptions are generally suppresed. If the
configuration variable manifestdiskcache.logging is set to True, exceptions will
be written to standard error, but will still be suppressed.
err
'''
from mercurial import bookmarks, changegroup, cmdutil, error, extensions
from mercurial import localrepo, manifest, revlog, util
from mercurial.node import bin, hex
from mercurial.i18n import _
import collections
import os
import random
import subprocess
import sys
import time
import traceback
from extutil import replaceclass
CACHE_SUBDIR = 'manifestdiskcache'
CONFIG_KEY = 'manifestdiskcache'
HEX_SHA_SIZE_BYTES = 40
testedwith = 'internal'
def extsetup(ui):
global logging
logging = ui.configbool(CONFIG_KEY, 'logging', False)
cmdtable = {}
command = cmdutil.command(cmdtable)
@command(
'prunemanifestdiskcache', [],
_('hg prunemanifestdiskcache'))
def prunemanifestdiskcache(ui, repo):
masterrevset = _masterrevset(ui, repo)
# retrieve the options.
pinnedrevsets = ui.config(CONFIG_KEY,
'pinned-revsets',
"{0} or (draft() and date(-3))".format(
masterrevset))
cachesizelimit = ui.configbytes(CONFIG_KEY, 'cache-size', '5g')
runsbetween = ui.configint(CONFIG_KEY, 'runs-between-prunes', 100)
secondsbetween = ui.configint(CONFIG_KEY, 'seconds-between-prunes', 86400)
# validate the arguments
if runsbetween < 1:
raise error.Abort("runs-between-prunes should be >= 1")
if secondsbetween < 0:
raise error.Abort("seconds-between-prunes should be >= 0")
store = repo.store
opener = store.opener
base = store.opener.join(None)
# decide whether we run.
markerpath = os.path.join(base, CACHE_SUBDIR, '.marker')
try:
stat = os.stat(markerpath)
except OSError:
# create the file.
with open(markerpath, 'w'):
pass
else:
now = time.time()
delta = now - stat.st_mtime
intercept = (1.0 / runsbetween)
odds = intercept + (((1 - intercept) * delta) / secondsbetween)
if odds < random.random():
# no pruning.
ui.note(_("no pruning needed at this time."))
return
# update the file timestamp.
os.utime(markerpath, None)
# fined all the pinned revs.
changelog = repo.changelog
revs = set()
if pinnedrevsets:
try:
revs = repo.revs(pinnedrevsets)
except error.ParseError:
error.Abort("Cannot parse {0}.pinned-revsets.".format(CONFIG_KEY))
pinnednodes = set(hex(changelog.read(changelog.node(rev))[0])
for rev in revs)
# enumerate all the existing cache entries, ordered by time ascending.
entries = []
for dirpath, dirs, files in opener.walk(CACHE_SUBDIR):
for fname in files:
# don't remove the marker.
if fname == '.marker':
continue
path = os.path.join(base, dirpath, fname)
if len(fname) > HEX_SHA_SIZE_BYTES:
# this is probably a temp file. trash it, but do it directly,
# because opener.unlink will try to case-escape.
try:
os.unlink(path)
except Exception:
pass
continue
if fname in pinnednodes:
# pinned rev, move on.
continue
try:
stat = os.stat(path)
except OSError:
# file presumably does not exist.
continue
entries.append(
(stat.st_atime, stat.st_size, path))
entries.sort(reverse=True)
ui.debug("pid: {0}\ncache entries: {1}\n".format(
os.getpid(),
"\n".join(["{0}".format(entry)
for entry in entries])))
# accumulate up to cachesize, then remove the remainder.
accumsize = 0
for atime, size, path in entries:
accumsize += size
if accumsize > cachesizelimit:
# remove the file, but once again, do it directly because
# opener.unlink will try to case-escape.
try:
os.unlink(path)
except Exception:
pass
@replaceclass(changegroup, 'cg1unpacker')
class cg1unpackerwithdc(changegroup.cg1unpacker):
def apply(self, repo, *args, **kwargs):
# disable manifest caching.
repo.manifest.markbatchoperationstart()
try:
# call the original function
return super(cg1unpackerwithdc, self).apply(repo, *args, **kwargs)
finally:
# re-enable manifest caching.
repo.manifest.markbatchoperationend()
@replaceclass(manifest, 'manifest')
class manifestwithdc(manifest.manifest):
def __init__(self, opener, dir='', dirlogcache=None):
super(manifestwithdc, self).__init__(opener, dir, dirlogcache)
self.manifestdiskcacheenabled = False
opts = getattr(opener, 'options', None)
if opts is not None:
self.manifestdiskcacheenabled = opts.get(
CONFIG_KEY, False)
if self.manifestdiskcacheenabled:
# this logic is copied from the constructor of manifest.__init__
if self._dir:
self.diskcachedir = "meta/" + self._dir + CACHE_SUBDIR
else:
self.diskcachedir = CACHE_SUBDIR
self.inbatchoperation = False
def markbatchoperationstart(self):
self.inbatchoperation = True
def markbatchoperationend(self):
self.inbatchoperation = False
def revision(self, nodeorrev, *args, **kwargs):
global logging
if self.manifestdiskcacheenabled:
expectedexception = False
try:
if isinstance(nodeorrev, int):
rev = nodeorrev
node = self.node(nodeorrev)
else:
rev = self.rev(nodeorrev)
node = nodeorrev
hexnode = hex(node)
subpath = os.path.join(self.diskcachedir,
hexnode[0:2], hexnode[2:4], hexnode)
result = None
try:
with self.opener(subpath, "r") as fh:
result = fh.read()
except IOError:
# this is an expected exception, so no need to sound the
# alarms.
expectedexception = True
raise
if result:
# verify that the output passes _checkhash(..)
result = self._checkhash(result, node, rev)
return result
except Exception:
# it's a cache. suppress the exception, disable caching
# going forward, and then report if logging is enabled.
if logging and not expectedexception:
sys.stderr.write("Encountered exception in extension "
"manifestdiskcache: {0}\n".format(
traceback.format_exc()))
result = super(manifestwithdc, self).revision(nodeorrev,
*args, **kwargs)
if self.manifestdiskcacheenabled:
self._writetomanifestcache(hexnode, result, logging)
self._prune_cache()
return result
def _addrevision(self, node, text, *args, **kwargs):
global logging
node = super(manifestwithdc, self)._addrevision(
node, text, *args, **kwargs)
if self.manifestdiskcacheenabled and not self.inbatchoperation:
hexnode = hex(node)
self._writetomanifestcache(hexnode, str(text), logging)
self._prune_cache()
return node
def _writetomanifestcache(self, hexnode, text, loggingenabled):
try:
base = self.opener.join(None)
dirsubpath = os.path.join(self.diskcachedir,
hexnode[0:2],
hexnode[2:4])
entrysubpath = os.path.join(dirsubpath, hexnode)
try:
os.makedirs(os.path.join(base, dirsubpath))
except OSError:
pass
fh = util.atomictempfile(
os.path.join(base, entrysubpath),
mode="w+")
try:
fh.write(text)
finally:
fh.close()
except Exception:
# it's a cache. suppress the exception, disable caching
# going forward, and then report if logging is enabled.
if loggingenabled:
sys.stderr.write("Encountered exception in extension "
"manifestdiskcache: {0}\n".format(
traceback.format_exc()))
def _prune_cache(self):
# spawn a subprocess (but don't wait for it) to prune the cache. this
# may result in us (the main process) becoming a zombie, because we
# could finish execution before the subprocess finishes. if this
# becomes an issue, we can have the spawned subprocess execute the
# double-fork daemonization.
cmd = util.hgcmd()[:]
cmd.append("prunemanifestdiskcache")
subprocess.Popen(cmd, close_fds=True)
@replaceclass(localrepo, 'localrepository')
class repowithmdc(localrepo.localrepository):
def _applyopenerreqs(self):
super(repowithmdc, self)._applyopenerreqs()
self.svfs.options[CONFIG_KEY] = self.ui.configbool(
CONFIG_KEY, 'enabled', False)
def _reposnames(ui):
# '' is local repo. This also defines an order precedence for master.
repos = ui.configlist(CONFIG_KEY, 'repos', ['', 'remote/', 'default/'])
names = ui.configlist(CONFIG_KEY, 'names', ['@', 'master', 'stable'])
for repo in repos:
for name in names:
yield repo + name
def _masterrevset(ui, repo):
"""
Try to find the name of ``master`` -- usually a bookmark.
Defaults to 'tip' if no suitable local or remote bookmark is found.
"""
masterstring = ui.config(CONFIG_KEY, 'master')
if masterstring:
return masterstring
names = set(bookmarks.bmstore(repo).keys())
if util.safehasattr(repo, 'names') and 'remotebookmarks' in repo.names:
names.update(set(repo.names['remotebookmarks'].listnames(repo)))
for name in _reposnames(ui):
if name in names:
return name
return 'tip'