mirror of
https://github.com/facebook/sapling.git
synced 2024-10-11 01:07:15 +03:00
2e7cda45b5
Summary: When someone specifies a repository with --repository/-R, the working directory could be anything. Therefore, the worker process to prune the cache needs to know which directory to switch to. Test Plan: wrote a test case with this scenario. fails without patch, succeeds with it. Reviewers: #sourcecontrol, durham Reviewed By: durham Subscribers: mitrandir, mjpieters Differential Revision: https://phabricator.fb.com/D2991853 Tasks: 10230869 Signature: t1:2991853:1456799093:dd03ac1d003f2efd0de0bad740d4792ae81b172e
351 lines
12 KiB
Python
351 lines
12 KiB
Python
# manifestdiskcache.py - manifest disk cache for mercurial
|
|
#
|
|
# Copyright 2012 Facebook
|
|
#
|
|
# This software may be used and distributed according to the terms of the
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
'''Cache manifests on disk to speed up access.
|
|
|
|
This extension intercepts reads and writes of manifests to cache them on disk.
|
|
Enable by setting the config variable manifestdiskcache.enabled to True.
|
|
|
|
On writes, we spawn a second process (to avoid penalizing interactive use) to
|
|
check if we should prune the cache. The pruning is guided by several
|
|
configuration variables:
|
|
|
|
manifestdiskcache.pinned-revsets: revsets to pin in the cache.
|
|
|
|
manifestdiskcache.cache-size: the upper limit for the size of the cache.
|
|
|
|
manifestdiskcache.runs-between-prunes: the approximate number of writes that
|
|
will elapse before we prune.
|
|
|
|
manifestdiskcache.seconds-between-prunes: the number of seconds since the last
|
|
prune that can elapse before we prune.
|
|
|
|
Because this is a cache, exceptions are generally suppresed. If the
|
|
configuration variable manifestdiskcache.logging is set to True, exceptions will
|
|
be written to standard error, but will still be suppressed.
|
|
err
|
|
|
|
'''
|
|
|
|
from mercurial import bookmarks, changegroup, cmdutil, error, extensions
|
|
from mercurial import localrepo, manifest, revlog, util
|
|
from mercurial.node import bin, hex
|
|
from mercurial.i18n import _
|
|
|
|
import collections
|
|
import os
|
|
import random
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
import traceback
|
|
|
|
from extutil import replaceclass
|
|
|
|
CACHE_SUBDIR = 'manifestdiskcache'
|
|
CONFIG_KEY = 'manifestdiskcache'
|
|
REPO_ROOT_KEY = 'manifestdiskcachee.repo_root'
|
|
HEX_SHA_SIZE_BYTES = 40
|
|
|
|
testedwith = 'internal'
|
|
|
|
def extsetup(ui):
|
|
global logging
|
|
logging = ui.configbool(CONFIG_KEY, 'logging', False)
|
|
|
|
cmdtable = {}
|
|
command = cmdutil.command(cmdtable)
|
|
@command(
|
|
'prunemanifestdiskcache', [],
|
|
_('hg prunemanifestdiskcache'))
|
|
def prunemanifestdiskcache(ui, repo):
|
|
masterrevset = _masterrevset(ui, repo)
|
|
|
|
# retrieve the options.
|
|
pinnedrevsets = ui.config(CONFIG_KEY,
|
|
'pinned-revsets',
|
|
"{0} or (draft() and date(-3))".format(
|
|
masterrevset))
|
|
cachesizelimit = ui.configbytes(CONFIG_KEY, 'cache-size', '5g')
|
|
runsbetween = ui.configint(CONFIG_KEY, 'runs-between-prunes', 100)
|
|
secondsbetween = ui.configint(CONFIG_KEY, 'seconds-between-prunes', 86400)
|
|
|
|
# validate the arguments
|
|
if runsbetween < 1:
|
|
raise error.Abort("runs-between-prunes should be >= 1")
|
|
if secondsbetween < 0:
|
|
raise error.Abort("seconds-between-prunes should be >= 0")
|
|
|
|
store = repo.store
|
|
opener = store.opener
|
|
base = store.opener.join(None)
|
|
|
|
# decide whether we run.
|
|
markerpath = os.path.join(base, CACHE_SUBDIR, '.marker')
|
|
try:
|
|
stat = os.stat(markerpath)
|
|
except OSError:
|
|
# create the file.
|
|
with open(markerpath, 'w'):
|
|
pass
|
|
else:
|
|
now = time.time()
|
|
delta = now - stat.st_mtime
|
|
|
|
intercept = (1.0 / runsbetween)
|
|
odds = intercept + (((1 - intercept) * delta) / secondsbetween)
|
|
|
|
if odds < random.random():
|
|
# no pruning.
|
|
ui.note(_("no pruning needed at this time."))
|
|
return
|
|
|
|
# update the file timestamp.
|
|
os.utime(markerpath, None)
|
|
|
|
# fined all the pinned revs.
|
|
changelog = repo.changelog
|
|
revs = set()
|
|
if pinnedrevsets:
|
|
try:
|
|
revs = repo.revs(pinnedrevsets)
|
|
except error.ParseError:
|
|
error.Abort("Cannot parse {0}.pinned-revsets.".format(CONFIG_KEY))
|
|
|
|
pinnednodes = set(hex(changelog.read(changelog.node(rev))[0])
|
|
for rev in revs)
|
|
|
|
# enumerate all the existing cache entries, ordered by time ascending.
|
|
entries = []
|
|
for dirpath, dirs, files in opener.walk(CACHE_SUBDIR):
|
|
for fname in files:
|
|
# don't remove the marker.
|
|
if fname == '.marker':
|
|
continue
|
|
|
|
path = os.path.join(base, dirpath, fname)
|
|
if len(fname) > HEX_SHA_SIZE_BYTES:
|
|
# this is probably a temp file. trash it, but do it directly,
|
|
# because opener.unlink will try to case-escape.
|
|
try:
|
|
os.unlink(path)
|
|
except Exception:
|
|
pass
|
|
continue
|
|
|
|
if fname in pinnednodes:
|
|
# pinned rev, move on.
|
|
continue
|
|
|
|
try:
|
|
stat = os.stat(path)
|
|
except OSError:
|
|
# file presumably does not exist.
|
|
continue
|
|
|
|
entries.append(
|
|
(stat.st_atime, stat.st_size, path))
|
|
entries.sort(reverse=True)
|
|
ui.debug("pid: {0}\ncache entries: {1}\n".format(
|
|
os.getpid(),
|
|
"\n".join(["{0}".format(entry)
|
|
for entry in entries])))
|
|
|
|
# accumulate up to cachesize, then remove the remainder.
|
|
accumsize = 0
|
|
for atime, size, path in entries:
|
|
accumsize += size
|
|
|
|
if accumsize > cachesizelimit:
|
|
# remove the file, but once again, do it directly because
|
|
# opener.unlink will try to case-escape.
|
|
try:
|
|
os.unlink(path)
|
|
except Exception:
|
|
pass
|
|
|
|
@replaceclass(changegroup, 'cg1unpacker')
|
|
class cg1unpackerwithdc(changegroup.cg1unpacker):
|
|
def apply(self, repo, *args, **kwargs):
|
|
# disable manifest caching.
|
|
repo.manifest.markbatchoperationstart()
|
|
try:
|
|
# call the original function
|
|
return super(cg1unpackerwithdc, self).apply(repo, *args, **kwargs)
|
|
finally:
|
|
# re-enable manifest caching.
|
|
repo.manifest.markbatchoperationend()
|
|
|
|
@replaceclass(manifest, 'manifest')
|
|
class manifestwithdc(manifest.manifest):
|
|
def __init__(self, opener, dir='', dirlogcache=None):
|
|
super(manifestwithdc, self).__init__(opener, dir, dirlogcache)
|
|
|
|
self.manifestdiskcacheenabled = False
|
|
opts = getattr(opener, 'options', None)
|
|
if opts is not None:
|
|
self.manifestdiskcacheenabled = opts.get(
|
|
CONFIG_KEY, False)
|
|
|
|
if self.manifestdiskcacheenabled:
|
|
# this logic is copied from the constructor of manifest.__init__
|
|
if self._dir:
|
|
self.diskcachedir = "meta/" + self._dir + CACHE_SUBDIR
|
|
else:
|
|
self.diskcachedir = CACHE_SUBDIR
|
|
|
|
self.inbatchoperation = False
|
|
self.repo_root = opts[REPO_ROOT_KEY]
|
|
|
|
def markbatchoperationstart(self):
|
|
self.inbatchoperation = True
|
|
|
|
def markbatchoperationend(self):
|
|
self.inbatchoperation = False
|
|
|
|
def revision(self, nodeorrev, *args, **kwargs):
|
|
global logging
|
|
|
|
if self.manifestdiskcacheenabled:
|
|
expectedexception = False
|
|
|
|
try:
|
|
if isinstance(nodeorrev, int):
|
|
rev = nodeorrev
|
|
node = self.node(nodeorrev)
|
|
else:
|
|
rev = self.rev(nodeorrev)
|
|
node = nodeorrev
|
|
|
|
hexnode = hex(node)
|
|
|
|
subpath = os.path.join(self.diskcachedir,
|
|
hexnode[0:2], hexnode[2:4], hexnode)
|
|
|
|
result = None
|
|
try:
|
|
with self.opener(subpath, "r") as fh:
|
|
result = fh.read()
|
|
except IOError:
|
|
# this is an expected exception, so no need to sound the
|
|
# alarms.
|
|
expectedexception = True
|
|
raise
|
|
|
|
if result:
|
|
# verify that the output passes _checkhash(..)
|
|
result = self._checkhash(result, node, rev)
|
|
|
|
return result
|
|
except Exception:
|
|
# it's a cache. suppress the exception, disable caching
|
|
# going forward, and then report if logging is enabled.
|
|
if logging and not expectedexception:
|
|
sys.stderr.write("Encountered exception in extension "
|
|
"manifestdiskcache: {0}\n".format(
|
|
traceback.format_exc()))
|
|
|
|
result = super(manifestwithdc, self).revision(nodeorrev,
|
|
*args, **kwargs)
|
|
|
|
if self.manifestdiskcacheenabled:
|
|
self._writetomanifestcache(hexnode, result, logging)
|
|
self._prune_cache()
|
|
|
|
return result
|
|
|
|
def _addrevision(self, node, text, *args, **kwargs):
|
|
global logging
|
|
|
|
node = super(manifestwithdc, self)._addrevision(
|
|
node, text, *args, **kwargs)
|
|
|
|
if self.manifestdiskcacheenabled and not self.inbatchoperation:
|
|
hexnode = hex(node)
|
|
self._writetomanifestcache(hexnode, str(text), logging)
|
|
|
|
self._prune_cache()
|
|
|
|
return node
|
|
|
|
def _writetomanifestcache(self, hexnode, text, loggingenabled):
|
|
try:
|
|
base = self.opener.join(None)
|
|
dirsubpath = os.path.join(self.diskcachedir,
|
|
hexnode[0:2],
|
|
hexnode[2:4])
|
|
entrysubpath = os.path.join(dirsubpath, hexnode)
|
|
|
|
try:
|
|
os.makedirs(os.path.join(base, dirsubpath))
|
|
except OSError:
|
|
pass
|
|
fh = util.atomictempfile(
|
|
os.path.join(base, entrysubpath),
|
|
mode="w+")
|
|
try:
|
|
fh.write(text)
|
|
finally:
|
|
fh.close()
|
|
except Exception:
|
|
# it's a cache. suppress the exception, disable caching
|
|
# going forward, and then report if logging is enabled.
|
|
if loggingenabled:
|
|
sys.stderr.write("Encountered exception in extension "
|
|
"manifestdiskcache: {0}\n".format(
|
|
traceback.format_exc()))
|
|
|
|
def _prune_cache(self):
|
|
# spawn a subprocess (but don't wait for it) to prune the cache. this
|
|
# may result in us (the main process) becoming a zombie, because we
|
|
# could finish execution before the subprocess finishes. if this
|
|
# becomes an issue, we can have the spawned subprocess execute the
|
|
# double-fork daemonization.
|
|
cmd = util.hgcmd()[:]
|
|
cmd.extend(["--repository",
|
|
self.repo_root,
|
|
"prunemanifestdiskcache"])
|
|
subprocess.Popen(cmd, close_fds=True)
|
|
|
|
@replaceclass(localrepo, 'localrepository')
|
|
class repowithmdc(localrepo.localrepository):
|
|
def _applyopenerreqs(self):
|
|
super(repowithmdc, self)._applyopenerreqs()
|
|
self.svfs.options[CONFIG_KEY] = self.ui.configbool(
|
|
CONFIG_KEY, 'enabled', False)
|
|
self.svfs.options[REPO_ROOT_KEY] = self.root
|
|
|
|
def _reposnames(ui):
|
|
# '' is local repo. This also defines an order precedence for master.
|
|
repos = ui.configlist(CONFIG_KEY, 'repos', ['', 'remote/', 'default/'])
|
|
names = ui.configlist(CONFIG_KEY, 'names', ['@', 'master', 'stable'])
|
|
|
|
for repo in repos:
|
|
for name in names:
|
|
yield repo + name
|
|
|
|
def _masterrevset(ui, repo):
|
|
"""
|
|
Try to find the name of ``master`` -- usually a bookmark.
|
|
|
|
Defaults to 'tip' if no suitable local or remote bookmark is found.
|
|
"""
|
|
|
|
masterstring = ui.config(CONFIG_KEY, 'master')
|
|
if masterstring:
|
|
return masterstring
|
|
|
|
names = set(bookmarks.bmstore(repo).keys())
|
|
if util.safehasattr(repo, 'names') and 'remotebookmarks' in repo.names:
|
|
names.update(set(repo.names['remotebookmarks'].listnames(repo)))
|
|
|
|
for name in _reposnames(ui):
|
|
if name in names:
|
|
return name
|
|
|
|
return 'tip'
|