mirror of
https://github.com/facebook/sapling.git
synced 2024-10-07 15:27:13 +03:00
0aa4b1c9a0
Summary: This adds a new strategy for the cache limit that depends on the free space availabe in the system. For system with more than 100GB of free space we allocate 5GB for the cache. With more than 20 GB of free space we allocate 2GB of free space and otherwise 10% of the free space at most. Test Plan: Add a new test Reviewers: ttung, durham, rmcelroy Differential Revision: https://phabricator.intern.facebook.com/D3330894
907 lines
32 KiB
Python
907 lines
32 KiB
Python
# fastmanifest.py
|
|
#
|
|
# Copyright 2016 Facebook, Inc.
|
|
#
|
|
# This software may be used and distributed according to the terms of the
|
|
# GNU General Public License version 2 or any later version.
|
|
"""
|
|
This extension adds fastmanifest, a treemanifest disk cache for speeding up
|
|
manifest comparison. It also contains utilities to investigate manifest access
|
|
patterns.
|
|
|
|
|
|
Configuration options:
|
|
|
|
[fastmanifest]
|
|
logfile = "" # Filename, is not empty will log access to any manifest
|
|
|
|
|
|
Description:
|
|
|
|
`manifestaccesslogger` logs manifest accessed to a logfile specified with
|
|
the option fastmanifest.logfile
|
|
|
|
`fastmanifesttocache` is a revset of relevant manifests to cache
|
|
|
|
`hybridmanifest` is a proxy class for flat and cached manifest that loads
|
|
manifest from cache or from disk.
|
|
It chooses what kind of manifest is relevant to create based on the operation,
|
|
ideally the fastest.
|
|
TODO instantiate fastmanifest when they are more suitable
|
|
|
|
`manifestcache` is the class handling the interface with the cache, it supports
|
|
caching flat and fast manifest and retrieving them.
|
|
TODO logic for loading fastmanifest
|
|
TODO logic for saving fastmanifest
|
|
TODO garbage collection
|
|
|
|
`manifestfactory` is a class whose method wraps manifest creating method of
|
|
manifest.manifest. It intercepts the calls to build hybridmanifest instead of
|
|
regularmanifests. We use a class for that to allow sharing the ui object that
|
|
is not normally accessible to manifests.
|
|
|
|
`debugcachemanifest` is a command calling `_cachemanifest`, a function to add
|
|
manifests to the cache and manipulate what is cached. It allows caching fast
|
|
and flat manifest, asynchronously and synchronously.
|
|
"""
|
|
import array
|
|
import os
|
|
import sys
|
|
|
|
from mercurial import cmdutil
|
|
from mercurial import extensions
|
|
from mercurial import manifest
|
|
from mercurial import mdiff
|
|
from mercurial import revset
|
|
from mercurial import revlog
|
|
from mercurial import scmutil
|
|
from mercurial import util
|
|
from mercurial import bookmarks
|
|
from mercurial import dirstate
|
|
from mercurial import localrepo
|
|
|
|
from extutil import wrapfilecache
|
|
|
|
import fastmanifest_wrapper
|
|
|
|
CACHE_SUBDIR = "manifestcache"
|
|
cmdtable = {}
|
|
command = cmdutil.command(cmdtable)
|
|
|
|
def silent_debug(*args, **kwargs):
|
|
"""Replacement for ui.debug that silently swallows the arguments.
|
|
Typically enabled when running the mercurial test suite by setting:
|
|
--extra-config-opt=fastmanifest.silent=True"""
|
|
pass
|
|
|
|
class manifestaccesslogger(object):
|
|
"""Class to log manifest access and confirm our assumptions"""
|
|
def __init__(self, logfile):
|
|
self._logfile = logfile
|
|
|
|
def revwrap(self, orig, *args, **kwargs):
|
|
"""Wraps manifest.rev and log access"""
|
|
r = orig(*args, **kwargs)
|
|
try:
|
|
with open(self._logfile, "a") as f:
|
|
f.write("%s\n" % r)
|
|
except EnvironmentError:
|
|
pass
|
|
return r
|
|
|
|
|
|
def fastmanifesttocache(repo, subset, x):
|
|
"""Revset of the interesting revisions to cache"""
|
|
return scmutil.revrange(repo, ["not public() + bookmark()"])
|
|
|
|
|
|
class hybridmanifest(object):
|
|
"""
|
|
Hybrid manifest that behaves like a lazy manifest.
|
|
|
|
Initialized with one of the three:
|
|
- flat an existing flat manifest
|
|
- fast an existing fast manifest
|
|
- loadflat a function to load a flat manifest from disk
|
|
"""
|
|
def __init__(self, ui, opener,
|
|
flat=None, fast=None, loadflat=None, node=None):
|
|
self.__flatmanifest = flat
|
|
self.__cachedmanifest = fast
|
|
self.loadflat = loadflat
|
|
|
|
assert (self.__flatmanifest is not None or
|
|
self.__cachedmanifest is None or
|
|
self.loadflat is None)
|
|
|
|
self.ui = ui
|
|
self.opener = opener
|
|
self.node = node
|
|
|
|
self.cachekey = revlog.hex(self.node) if self.node is not None else None
|
|
|
|
self.fastcache = fastmanifestcache.getinstance(opener, self.ui)
|
|
self.debugfastmanifest = (self.ui.configbool("fastmanifest",
|
|
"debugfastmanifest")
|
|
if self.ui is not None
|
|
else False)
|
|
|
|
self.incache = True if self.__cachedmanifest is not None else None
|
|
|
|
if self.ui is None or self.ui.configbool("fastmanifest", "silent"):
|
|
self.debug = silent_debug
|
|
else:
|
|
self.debug = self.ui.debug
|
|
|
|
def _flatmanifest(self):
|
|
if self.__flatmanifest is None:
|
|
if self.loadflat is not None:
|
|
# Load the manifest and cache it.
|
|
self.__flatmanifest = self.loadflat()
|
|
|
|
if isinstance(self.__flatmanifest, hybridmanifest):
|
|
# See comment in extsetup to see why we have to do that
|
|
self.__flatmanifest = self.__flatmanifest._flatmanifest()
|
|
elif self.__cachedmanifest is not None:
|
|
# build a flat manifest from the text of the fastmanifest.
|
|
self.__flatmanifest = manifest.manifestdict(
|
|
self.__cachedmanifest.text())
|
|
|
|
assert isinstance(self.__flatmanifest, manifest.manifestdict)
|
|
return self.__flatmanifest
|
|
|
|
def _cachedmanifest(self):
|
|
if self.incache is None:
|
|
# Cache lookup
|
|
if (self.cachekey is not None and
|
|
self.fastcache.containsnode(self.cachekey)):
|
|
self.__cachedmanifest = self.fastcache.get(self.cachekey)
|
|
elif self.node == revlog.nullid:
|
|
fm = fastmanifest_wrapper.fastManifest()
|
|
self.__cachedmanifest = fastmanifestdict(fm)
|
|
elif self.debugfastmanifest:
|
|
# in debug mode, we always convert into a fastmanifest.
|
|
r = self._flatmanifest()
|
|
fm = fastmanifest_wrapper.fastManifest(r.text())
|
|
self.__cachedmanifest = fastmanifestdict(fm)
|
|
|
|
self.incache = self.__cachedmanifest is not None
|
|
|
|
self.debug("cache %s for fastmanifest %s\n"
|
|
% ("hit" if self.incache else "miss", self.cachekey))
|
|
|
|
return self.__cachedmanifest
|
|
|
|
def _incache(self):
|
|
if self.incache or self.debugfastmanifest:
|
|
return True
|
|
elif self.cachekey:
|
|
return self.fastcache.containsnode(self.cachekey)
|
|
return False
|
|
|
|
def _manifest(self, operation):
|
|
# Get the manifest most suited for the operations (flat or cached)
|
|
# TODO: return fastmanifest when suitable
|
|
c = self._cachedmanifest()
|
|
if c is not None:
|
|
return c
|
|
|
|
r = self._flatmanifest()
|
|
|
|
return r
|
|
|
|
# Proxy all the manifest methods to the flatmanifest except magic methods
|
|
def __getattr__(self, name):
|
|
return getattr(self._manifest(name), name)
|
|
|
|
# Magic methods should be proxied differently than __getattr__
|
|
# For the moment all methods they all use the _flatmanifest
|
|
def __iter__(self):
|
|
return self._manifest('__iter__').__iter__()
|
|
|
|
def __contains__(self, key):
|
|
return self._manifest('__contains__').__contains__(key)
|
|
|
|
def __getitem__(self, key):
|
|
return self._manifest('__getitem__').__getitem__(key)
|
|
|
|
def __setitem__(self, key, val):
|
|
return self._manifest('__setitem__').__setitem__(key, val)
|
|
|
|
def __delitem__(self, key):
|
|
return self._manifest('__delitem__').__delitem__(key)
|
|
|
|
def __len__(self):
|
|
return self._manifest('__len__').__len__()
|
|
|
|
def copy(self):
|
|
copy = self._manifest('copy').copy()
|
|
if isinstance(copy, hybridmanifest):
|
|
return copy
|
|
elif isinstance(copy, fastmanifestdict):
|
|
return hybridmanifest(self.ui, self.opener, fast=copy,
|
|
node=self.node)
|
|
elif isinstance(copy, manifest.manifestdict):
|
|
return hybridmanifest(self.ui, self.opener, flat=copy,
|
|
node=self.node)
|
|
else:
|
|
raise ValueError("unknown manifest type {0}".format(type(copy)))
|
|
|
|
def matches(self, *args, **kwargs):
|
|
matches = self._manifest('matches').matches(*args, **kwargs)
|
|
if isinstance(matches, hybridmanifest):
|
|
return matches
|
|
elif isinstance(matches, fastmanifestdict):
|
|
return hybridmanifest(self.ui, self.opener, fast=matches)
|
|
elif isinstance(matches, manifest.manifestdict):
|
|
return hybridmanifest(self.ui, self.opener, flat=matches)
|
|
else:
|
|
raise ValueError("unknown manifest type {0}".format(type(matches)))
|
|
|
|
def diff(self, m2, *args, **kwargs):
|
|
self.debug("performing diff\n")
|
|
# Find _m1 and _m2 of the same type, to provide the fastest computation
|
|
_m1, _m2 = None, None
|
|
|
|
if isinstance(m2, hybridmanifest):
|
|
self.debug("diff: other side is hybrid manifest\n")
|
|
# CACHE HIT
|
|
if self._incache() and m2._incache():
|
|
_m1, _m2 = self._cachedmanifest(), m2._cachedmanifest()
|
|
# _m1 or _m2 can be None if _incache was True if the cache
|
|
# got garbage collected in the meantime or entry is corrupted
|
|
if _m1 is None or _m2 is None:
|
|
self.debug("diff: unable to load one or "
|
|
"more manifests\n")
|
|
_m1, _m2 = self._flatmanifest(), m2._flatmanifest()
|
|
# CACHE MISS
|
|
else:
|
|
self.debug("diff: cache miss\n")
|
|
_m1, _m2 = self._flatmanifest(), m2._flatmanifest()
|
|
else:
|
|
# This happens when diffing against a new manifest (like rev -1)
|
|
self.debug("diff: other side not hybrid manifest\n")
|
|
_m1, _m2 = self._flatmanifest(), m2
|
|
|
|
assert type(_m1) == type(_m2)
|
|
return _m1.diff(_m2, *args, **kwargs)
|
|
|
|
|
|
class fastmanifestcache(object):
|
|
_instance = None
|
|
@classmethod
|
|
def getinstance(cls, opener, ui):
|
|
if not cls._instance:
|
|
cls._instance = fastmanifestcache(opener, ui)
|
|
return cls._instance
|
|
|
|
def __init__(self, opener, ui):
|
|
self.opener = opener
|
|
self.ui = ui
|
|
self.inmemorycache = {}
|
|
base = opener.join(None)
|
|
self.cachepath = os.path.join(base, CACHE_SUBDIR)
|
|
if not os.path.exists(self.cachepath):
|
|
os.makedirs(self.cachepath)
|
|
if self.ui is None or self.ui.configbool("fastmanifest", "silent"):
|
|
self.debug = silent_debug
|
|
else:
|
|
self.debug = self.ui.debug
|
|
|
|
def keyprefix(self):
|
|
return "fast"
|
|
|
|
def load(self, fpath):
|
|
try:
|
|
fm = fastmanifest_wrapper.fastManifest.load(fpath)
|
|
# touch on access to make this cache a LRU cache
|
|
os.utime(fpath, None)
|
|
except EnvironmentError:
|
|
return None
|
|
else:
|
|
return fastmanifestdict(fm)
|
|
|
|
def dump(self, fpath, manifest):
|
|
# TODO: is this already a hybridmanifest/fastmanifest? if so, we may be
|
|
# able to skip a frivolous conversion step.
|
|
fm = fastmanifest_wrapper.fastManifest(manifest.text())
|
|
fm.save(fpath)
|
|
|
|
def inmemorycachekey(self, hexnode):
|
|
return (self.keyprefix(), hexnode)
|
|
|
|
def filecachepath(self, hexnode):
|
|
return os.path.join(self.cachepath, self.keyprefix() + hexnode)
|
|
|
|
def get(self, hexnode):
|
|
# In memory cache lookup
|
|
ident = self.inmemorycachekey(hexnode)
|
|
r = self.inmemorycache.get(ident, None)
|
|
if r:
|
|
return r
|
|
|
|
# On disk cache lookup
|
|
realfpath = self.filecachepath(hexnode)
|
|
r = self.load(realfpath)
|
|
|
|
# In memory cache update
|
|
if r:
|
|
self.inmemorycache[ident] = r
|
|
return r
|
|
|
|
def containsnode(self, hexnode):
|
|
if self.inmemorycachekey(hexnode) in self.inmemorycache:
|
|
return True
|
|
return os.path.exists(self.filecachepath(hexnode))
|
|
|
|
def put(self, hexnode, manifest):
|
|
if self.containsnode(hexnode):
|
|
self.debug("skipped %s, already cached\n" % hexnode)
|
|
else:
|
|
self.debug("caching revision %s\n" % hexnode)
|
|
|
|
realfpath = self.filecachepath(hexnode)
|
|
tmpfpath = util.mktempcopy(realfpath, True)
|
|
try:
|
|
self.dump(tmpfpath, manifest)
|
|
util.rename(tmpfpath, realfpath)
|
|
finally:
|
|
try:
|
|
os.unlink(tmpfpath)
|
|
except OSError:
|
|
pass
|
|
|
|
def __iter__(self):
|
|
for f in os.listdir(self.cachepath):
|
|
if f.startswith(self.keyprefix()):
|
|
yield f
|
|
|
|
def entrysize(self, f):
|
|
try:
|
|
return os.path.getsize(os.path.join(self.cachepath, f))
|
|
except EnvironmentError:
|
|
return None
|
|
|
|
def prune(self, limit):
|
|
# Get the list of entries and mtime first to avoid race condition
|
|
entries = []
|
|
for entry in self:
|
|
try:
|
|
path = os.path.join(self.cachepath, entry)
|
|
entries.append((entry, os.path.getmtime(path),
|
|
os.path.getsize(path)))
|
|
except EnvironmentError:
|
|
pass
|
|
|
|
# Do nothing, we don't exceed the limit
|
|
if limit.bytes() > sum([e[2] for e in entries]):
|
|
self.debug("nothing to do, cache size < limit\n")
|
|
return
|
|
# [most recently accessed, second most recently accessed ...]
|
|
entriesbyage = sorted(entries, key=lambda x:-x[1])
|
|
|
|
# We traverse the list of entries from the newest to the oldest
|
|
# and once we hit the limit of what we can keep, we stop and
|
|
# trim what is above the limit
|
|
sizetokeep = 0
|
|
startindextodiscard = 0
|
|
for i, entry in enumerate(entriesbyage):
|
|
if sizetokeep + entry[2] > limit.bytes():
|
|
startindextodiscard = i
|
|
break
|
|
sizetokeep += entry[2]
|
|
|
|
for entry in entriesbyage[startindextodiscard:]:
|
|
try:
|
|
self.debug("removing cached manifest %s\n" % entry[0])
|
|
os.unlink(os.path.join(self.cachepath, entry[0]))
|
|
except EnvironmentError:
|
|
pass
|
|
|
|
def pruneall(self):
|
|
for f in self:
|
|
self.debug("removing cached manifest %s\n" % f)
|
|
try:
|
|
os.unlink(os.path.join(self.cachepath, f))
|
|
except EnvironmentError:
|
|
pass
|
|
|
|
class manifestfactory(object):
|
|
def __init__(self, ui):
|
|
self.ui = ui
|
|
|
|
def newmanifest(self, orig, *args, **kwargs):
|
|
loadfn = lambda: orig(*args, **kwargs)
|
|
return hybridmanifest(self.ui,
|
|
args[0].opener,
|
|
loadflat=loadfn)
|
|
|
|
def read(self, orig, *args, **kwargs):
|
|
loadfn = lambda: orig(*args, **kwargs)
|
|
return hybridmanifest(self.ui,
|
|
args[0].opener,
|
|
loadflat=loadfn,
|
|
node=args[1])
|
|
|
|
def daemonize(ui, repo):
|
|
sys.stdout.flush()
|
|
sys.stderr.flush()
|
|
ui.fout.flush()
|
|
ui.ferr.flush()
|
|
ui.fin = sys.stdin = open(os.devnull, "r")
|
|
ui.fout = ui.ferr = sys.stdout = sys.stderr = open(os.devnull, "w")
|
|
repo.ui = ui
|
|
|
|
pid = os.fork()
|
|
if pid > 0:
|
|
sys.exit(0)
|
|
os.setsid()
|
|
pid = os.fork()
|
|
if pid > 0:
|
|
sys.exit(0)
|
|
|
|
class fixedcachelimit(object):
|
|
"""A fix cache limit expressed as a number of bytes"""
|
|
def __init__(self, bytes):
|
|
self._bytes = bytes
|
|
|
|
def bytes(self):
|
|
return self._bytes
|
|
|
|
|
|
GB = 1024**3
|
|
MB = 1024**2
|
|
DEFAULT_LOWGROWTH_TRESHOLDGB = 20
|
|
DEFAULT_MAXCACHESIZEGB = 6
|
|
DEFAULT_LOWGROWTH_SLOPE = 0.1
|
|
DEFAULT_HIGHGROWTHSLOPE = 0.2
|
|
|
|
class systemawarecachelimit(object):
|
|
"""A limit that will be tighter as the free disk space reduces"""
|
|
def __init__(self, repo):
|
|
# Probe the system root partition to know what is available
|
|
st = os.statvfs(repo.root)
|
|
self.free = st.f_bavail * st.f_frsize
|
|
self.total = st.f_blocks * st.f_frsize
|
|
# Read parameters from config
|
|
self.lowgrowththresholdgb = repo.ui.config("fastmanifest",
|
|
"lowgrowththresholdgb",
|
|
DEFAULT_LOWGROWTH_TRESHOLDGB)
|
|
self.lowgrowslope = repo.ui.config("fastmanifest",
|
|
"lowgrowthslope",
|
|
DEFAULT_LOWGROWTH_SLOPE)
|
|
self.maxcachesizegb = repo.ui.config("fastmanifest",
|
|
"maxcachesizegb",
|
|
DEFAULT_MAXCACHESIZEGB)
|
|
self.highgrowthslope = repo.ui.config("fastmanifest",
|
|
"highgrowthslope",
|
|
DEFAULT_HIGHGROWTHSLOPE)
|
|
try:
|
|
self.lowgrowththresholdgb = float(self.lowgrowththresholdgb)
|
|
self.lowgrowslope = float(self.lowgrowslope)
|
|
self.maxcachesizegb = float(self.maxcachesize)
|
|
self.highgrowthslope = float(self.highgrowthslope)
|
|
except ValueError:
|
|
self.lowgrowththresholdgb = DEFAULT_LOWGROWTH_TRESHOLDGB
|
|
self.lowgrowslope = DEFAULT_LOWGROWTH_SLOPE
|
|
self.maxcachesizegb = DEFAULT_MAXCACHESIZEGB
|
|
self.highgrowthslope = DEFAULT_HIGHGROWTHSLOPE
|
|
|
|
def bytes(self):
|
|
return systemawarecachelimit.cacheallocation(
|
|
self.free,
|
|
lowgrowththresholdgb=self.lowgrowththresholdgb,
|
|
highgrowthslope=self.highgrowthslope,
|
|
maxcachesizegb=self.maxcachesizegb,
|
|
lowgrowthslope=self.lowgrowthslope)
|
|
|
|
@staticmethod
|
|
def cacheallocation(freespace,
|
|
lowgrowththresholdgb=DEFAULT_LOWGROWTH_TRESHOLDGB,
|
|
lowgrowslope=DEFAULT_LOWGROWTH_SLOPE,
|
|
maxcachesizegb=DEFAULT_MAXCACHESIZEGB,
|
|
highgrowthslope=DEFAULT_HIGHGROWTHSLOPE):
|
|
"""Given the free space available in bytes, return the size of the cache
|
|
|
|
When disk space is limited (less than lowgrowththreshold), we increase
|
|
the cache size linearly: lowgrowthslope * freespace. Over
|
|
lowgrowththreshold, we increase the cache size linearly but faster:
|
|
highgrowthslope * freespace until we hit maxcachesize.
|
|
|
|
These values are configurable, default values are:
|
|
|
|
[fastmanifest]
|
|
lowgrowththresholdgb = 20
|
|
lowgrowthslope = 0.1
|
|
highgrowthslope = 0.2
|
|
maxcachesizegb = 6
|
|
|
|
^ Cache Size
|
|
|
|
|
| /------------------- <- maxcachesize
|
|
| |
|
|
| / <- slope is highgrowthslope
|
|
| | <- lowgrowththreshold
|
|
| /
|
|
| / <- slope is lowgrowslope
|
|
|/
|
|
-------------------------> Free Space
|
|
"""
|
|
|
|
if freespace < lowgrowththresholdgb * GB:
|
|
return min(maxcachesizegb * GB, lowgrowslope * freespace)
|
|
else:
|
|
return min(maxcachesizegb * GB, highgrowthslope * freespace)
|
|
|
|
|
|
def _cachemanifestpruneall(ui, repo, background):
|
|
if background:
|
|
daemonize(ui, repo)
|
|
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
|
|
cache.pruneall()
|
|
|
|
def _cachemanifestlist(ui, repo):
|
|
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
|
|
totalsize = 0
|
|
for entry in cache:
|
|
entrysize = cache.entrysize(entry)
|
|
if entrysize == -1:
|
|
# Entry was deleted by another process
|
|
continue
|
|
totalsize += entrysize
|
|
ui.status(("%s (size %s)\n" % (entry, util.bytecount(entrysize))))
|
|
ui.status(("cache size is: %s\n" % util.bytecount(totalsize)))
|
|
|
|
def _cachemanifestfillandtrim(ui, repo, revs, limit, background):
|
|
if background:
|
|
daemonize(ui, repo)
|
|
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
|
|
|
|
for rev in revs:
|
|
mannode = revlog.hex(repo.changelog.changelogrevision(rev).manifest)
|
|
if cache.containsnode(mannode):
|
|
ui.debug("skipped %s, already cached (fast path)\n" % mannode)
|
|
continue
|
|
manifest = repo[rev].manifest()
|
|
cache.put(mannode, manifest)
|
|
|
|
if limit is not None:
|
|
cache.prune(limit)
|
|
|
|
class fastmanifestdict(object):
|
|
def __init__(self, fm):
|
|
self._fm = fm
|
|
|
|
def __getitem__(self, key):
|
|
return self._fm[key][0]
|
|
|
|
def find(self, key):
|
|
return self._fm[key]
|
|
|
|
def __len__(self):
|
|
return len(self._fm)
|
|
|
|
def __setitem__(self, key, node):
|
|
if len(node) == 22:
|
|
# sometimes we set the 22nd byte. this is not preserved by
|
|
# lazymanifest or manifest::_lazymanifest.
|
|
node = node[:21]
|
|
self._fm[key] = node, self.flags(key, '')
|
|
|
|
def __contains__(self, key):
|
|
return key in self._fm
|
|
|
|
def __delitem__(self, key):
|
|
del self._fm[key]
|
|
|
|
def __iter__(self):
|
|
return self._fm.__iter__()
|
|
|
|
def iterkeys(self):
|
|
return self._fm.iterkeys()
|
|
|
|
def iterentries(self):
|
|
return self._fm.iterentries()
|
|
|
|
def iteritems(self):
|
|
return (x[:2] for x in self._fm.iterentries())
|
|
|
|
def keys(self):
|
|
return list(self.iterkeys())
|
|
|
|
def filesnotin(self, m2):
|
|
'''Set of files in this manifest that are not in the other'''
|
|
diff = self.diff(m2)
|
|
files = set(filepath
|
|
for filepath, hashflags in diff.iteritems()
|
|
if hashflags[1][0] is None)
|
|
return files
|
|
|
|
@util.propertycache
|
|
def _dirs(self):
|
|
return util.dirs(self)
|
|
|
|
def dirs(self):
|
|
return self._dirs
|
|
|
|
def hasdir(self, dir):
|
|
return dir in self._dirs
|
|
|
|
def _filesfastpath(self, match):
|
|
'''Checks whether we can correctly and quickly iterate over matcher
|
|
files instead of over manifest files.'''
|
|
files = match.files()
|
|
return (len(files) < 100 and (match.isexact() or
|
|
(match.prefix() and all(fn in self for fn in files))))
|
|
|
|
def walk(self, match):
|
|
'''Generates matching file names.
|
|
|
|
Equivalent to manifest.matches(match).iterkeys(), but without creating
|
|
an entirely new manifest.
|
|
|
|
It also reports nonexistent files by marking them bad with match.bad().
|
|
'''
|
|
if match.always():
|
|
for f in iter(self):
|
|
yield f
|
|
return
|
|
|
|
fset = set(match.files())
|
|
|
|
# avoid the entire walk if we're only looking for specific files
|
|
if self._filesfastpath(match):
|
|
for fn in sorted(fset):
|
|
yield fn
|
|
return
|
|
|
|
for fn in self:
|
|
if fn in fset:
|
|
# specified pattern is the exact name
|
|
fset.remove(fn)
|
|
if match(fn):
|
|
yield fn
|
|
|
|
# for dirstate.walk, files=['.'] means "walk the whole tree".
|
|
# follow that here, too
|
|
fset.discard('.')
|
|
|
|
for fn in sorted(fset):
|
|
if not self.hasdir(fn):
|
|
match.bad(fn, None)
|
|
|
|
def matches(self, match):
|
|
'''generate a new manifest filtered by the match argument'''
|
|
if match.always():
|
|
return self.copy()
|
|
|
|
if self._filesfastpath(match):
|
|
nfm = fastmanifest_wrapper.fastManifest()
|
|
for fn in match.files():
|
|
if fn in self._fm:
|
|
nfm[fn] = self._fm[fn]
|
|
m = fastmanifestdict(nfm)
|
|
return m
|
|
|
|
nfm = self._fm.filtercopy(match)
|
|
m = fastmanifestdict(nfm)
|
|
return m
|
|
|
|
def diff(self, m2, clean=False):
|
|
'''Finds changes between the current manifest and m2.
|
|
|
|
Args:
|
|
m2: the manifest to which this manifest should be compared.
|
|
clean: if true, include files unchanged between these manifests
|
|
with a None value in the returned dictionary.
|
|
|
|
The result is returned as a dict with filename as key and
|
|
values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
|
|
nodeid in the current/other manifest and fl1/fl2 is the flag
|
|
in the current/other manifest. Where the file does not exist,
|
|
the nodeid will be None and the flags will be the empty
|
|
string.
|
|
'''
|
|
return self._fm.diff(m2._fm, clean)
|
|
|
|
def setflag(self, key, flag):
|
|
self._fm[key] = self[key], flag
|
|
|
|
def get(self, key, default=None):
|
|
try:
|
|
return self._fm[key][0]
|
|
except KeyError:
|
|
return default
|
|
|
|
def flags(self, key, default=''):
|
|
try:
|
|
return self._fm[key][1]
|
|
except KeyError:
|
|
return default
|
|
|
|
def copy(self):
|
|
c = fastmanifestdict(self._fm.copy())
|
|
return c
|
|
|
|
def text(self, usemanifestv2=False):
|
|
if usemanifestv2:
|
|
return _textv2(self._fm.iterentries())
|
|
else:
|
|
# use (probably) native version for v1
|
|
return self._fm.text()
|
|
|
|
def fastdelta(self, base, changes):
|
|
"""Given a base manifest text as an array.array and a list of changes
|
|
relative to that text, compute a delta that can be used by revlog.
|
|
"""
|
|
delta = []
|
|
dstart = None
|
|
dend = None
|
|
dline = [""]
|
|
start = 0
|
|
# zero copy representation of base as a buffer
|
|
addbuf = util.buffer(base)
|
|
|
|
changes = list(changes)
|
|
if len(changes) < 1000:
|
|
# start with a readonly loop that finds the offset of
|
|
# each line and creates the deltas
|
|
for f, todelete in changes:
|
|
# bs will either be the index of the item or the insert point
|
|
start, end = manifest._msearch(addbuf, f, start)
|
|
if not todelete:
|
|
h, fl = self._fm[f]
|
|
l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
|
|
else:
|
|
if start == end:
|
|
# item we want to delete was not found, error out
|
|
raise AssertionError(
|
|
_("failed to remove %s from manifest") % f)
|
|
l = ""
|
|
if dstart is not None and dstart <= start and dend >= start:
|
|
if dend < end:
|
|
dend = end
|
|
if l:
|
|
dline.append(l)
|
|
else:
|
|
if dstart is not None:
|
|
delta.append([dstart, dend, "".join(dline)])
|
|
dstart = start
|
|
dend = end
|
|
dline = [l]
|
|
|
|
if dstart is not None:
|
|
delta.append([dstart, dend, "".join(dline)])
|
|
# apply the delta to the base, and get a delta for addrevision
|
|
deltatext, arraytext = manifest._addlistdelta(base, delta)
|
|
else:
|
|
# For large changes, it's much cheaper to just build the text and
|
|
# diff it.
|
|
arraytext = array.array('c', self.text())
|
|
deltatext = mdiff.textdiff(base, arraytext)
|
|
|
|
return arraytext, deltatext
|
|
|
|
@command('^debugcachemanifest', [
|
|
('r', 'rev', [], 'cache the manifest for revs', 'REV'),
|
|
('a', 'all', False, 'cache all relevant revisions', ''),
|
|
('l', 'limit', -1, 'limit size of total rev in bytes', 'BYTES'),
|
|
('p', 'pruneall', False, 'prune all the entries'),
|
|
('b', 'background', False,
|
|
'return imediately and process in the background', ''),
|
|
('e', 'list', False, 'list the content of the cache and its size','')],
|
|
'hg debugcachemanifest')
|
|
def debugcachemanifest(ui, repo, *pats, **opts):
|
|
background = opts["background"]
|
|
if opts["limit"] == -1 :
|
|
limit = None
|
|
else:
|
|
limit = fixedcachelimit(opts["limit"])
|
|
|
|
pruneall = opts["pruneall"]
|
|
displaylist = opts['list']
|
|
if opts["all"]:
|
|
revs = scmutil.revrange(repo, ["fastmanifesttocache()"])
|
|
elif opts["rev"]:
|
|
revs = scmutil.revrange(repo, opts["rev"])
|
|
else:
|
|
revs = []
|
|
|
|
ui.debug(("caching rev: %s, background(%s), pruneall(%s), list(%s)\n")
|
|
% (revs, background, pruneall, displaylist))
|
|
|
|
if displaylist and pruneall:
|
|
raise error.Abort("can only use --pruneall or --list not both")
|
|
|
|
if pruneall:
|
|
_cachemanifestpruneall(ui, repo, background)
|
|
return
|
|
|
|
if displaylist:
|
|
_cachemanifestlist(ui, repo)
|
|
return
|
|
|
|
if revs or limit:
|
|
_cachemanifestfillandtrim(ui, repo, revs, limit, background)
|
|
|
|
def triggercacheonbookmarkchange(orig, self, *args, **kwargs):
|
|
repo = self._repo
|
|
revs = scmutil.revrange(repo, ["fastmanifesttocache()"])
|
|
_cachemanifestfillandtrim(repo.ui, repo, revs, None, False)
|
|
return orig(self, *args, **kwargs)
|
|
|
|
def triggercacheondirstatechange(orig, self, *args, **kwargs):
|
|
if util.safehasattr(self, "_fastmanifestrepo"):
|
|
repo = self._fastmanifestrepo
|
|
revs = scmutil.revrange(repo, ["fastmanifesttocache()"])
|
|
_cachemanifestfillandtrim(repo.ui, repo, revs, None, False)
|
|
return orig(self, *args, **kwargs)
|
|
|
|
def triggercacheonremotenameschange(orig, repo, *args, **kwargs):
|
|
revs = scmutil.revrange(repo, ["fastmanifesttocache()"])
|
|
_cachemanifestfillandtrim(repo.ui, repo, revs, None, False)
|
|
return orig(self, *args, **kwargs)
|
|
|
|
def extsetup(ui):
|
|
logfile = ui.config("fastmanifest", "logfile", "")
|
|
factory = manifestfactory(ui)
|
|
if logfile:
|
|
logger = manifestaccesslogger(logfile)
|
|
extensions.wrapfunction(manifest.manifest, 'rev', logger.revwrap)
|
|
# Wraps all the function creating a manifestdict
|
|
# We have to do that because the logic to create manifest can take
|
|
# 7 different codepaths and we want to retain the node information
|
|
# that comes at the top level:
|
|
#
|
|
# read -> _newmanifest ---------------------------> manifestdict
|
|
#
|
|
# readshallowfast -> readshallow -----------------> manifestdict
|
|
# \ \------> _newmanifest --> manifestdict
|
|
# --> readshallowdelta ------------------------> manifestdict
|
|
# \->readdelta -------> _newmanifest --> manifestdict
|
|
# \->slowreaddelta --> _newmanifest --> manifestdict
|
|
#
|
|
# othermethods -----------------------------------> manifestdict
|
|
#
|
|
# We can have hybridmanifest that wraps one hybridmanifest in some
|
|
# codepath. We resolve to the correct flatmanifest when asked in the
|
|
# _flatmanifest method
|
|
#
|
|
# The recursion level is at most 2 because we wrap the two top level
|
|
# functions and _newmanifest (wrapped only for the case of -1)
|
|
|
|
extensions.wrapfunction(manifest.manifest, '_newmanifest',
|
|
factory.newmanifest)
|
|
extensions.wrapfunction(manifest.manifest, 'read', factory.read)
|
|
try:
|
|
extensions.wrapfunction(manifest.manifest, 'readshallowfast',
|
|
factory.read)
|
|
except AttributeError:
|
|
# The function didn't use to be defined in previous versions of hg
|
|
pass
|
|
|
|
revset.symbols['fastmanifesttocache'] = fastmanifesttocache
|
|
revset.safesymbols.add('fastmanifesttocache')
|
|
|
|
if ui.config("fastmanifest", "cacheonchange", False):
|
|
# Trigger to enable caching of relevant manifests
|
|
extensions.wrapfunction(bookmarks.bmstore, '_write',
|
|
triggercacheonbookmarkchange)
|
|
extensions.wrapfunction(dirstate.dirstate, 'write',
|
|
triggercacheondirstatechange)
|
|
try:
|
|
remotenames = extensions.find('remotenames')
|
|
except KeyError:
|
|
pass
|
|
else:
|
|
if remotenames:
|
|
extensions.wrapfunction(remotenames, 'saveremotenames',
|
|
triggercacheonremotenameschange)
|
|
|
|
def wrapdirstate(orig, self):
|
|
dirstate = orig(self)
|
|
dirstate._fastmanifestrepo = self
|
|
return dirstate
|
|
wrapfilecache(localrepo.localrepository, 'dirstate',
|
|
wrapdirstate)
|