mirror of
https://github.com/facebook/sapling.git
synced 2024-10-07 15:27:13 +03:00
703e9954c1
Summary: Also moved `iterentries` alongside other `iter...` methods. Test Plan: pass unit tests Reviewers: #mercurial, lcharignon Reviewed By: lcharignon Subscribers: mitrandir, mjpieters Differential Revision: https://phabricator.intern.facebook.com/D3294761 Signature: t1:3294761:1463438929:822453d8f99e85858cc8bdbac1188e7614d9abb3
638 lines
21 KiB
Python
638 lines
21 KiB
Python
# fastmanifest.py
|
|
#
|
|
# Copyright 2016 Facebook, Inc.
|
|
#
|
|
# This software may be used and distributed according to the terms of the
|
|
# GNU General Public License version 2 or any later version.
|
|
"""
|
|
This extension adds fastmanifest, a treemanifest disk cache for speeding up
|
|
manifest comparison. It also contains utilities to investigate manifest access
|
|
patterns.
|
|
|
|
|
|
Configuration options:
|
|
|
|
[fastmanifest]
|
|
logfile = "" # Filename, is not empty will log access to any manifest
|
|
|
|
|
|
Description:
|
|
|
|
`manifestaccesslogger` logs manifest accessed to a logfile specified with
|
|
the option fastmanifest.logfile
|
|
|
|
`fastmanifesttocache` is a revset of relevant manifests to cache
|
|
|
|
`hybridmanifest` is a proxy class for flat and cached manifest that loads
|
|
manifest from cache or from disk.
|
|
It chooses what kind of manifest is relevant to create based on the operation,
|
|
ideally the fastest.
|
|
TODO instantiate fastmanifest when they are more suitable
|
|
|
|
`manifestcache` is the class handling the interface with the cache, it supports
|
|
caching flat and fast manifest and retrieving them.
|
|
TODO logic for loading fastmanifest
|
|
TODO logic for saving fastmanifest
|
|
TODO garbage collection
|
|
|
|
`manifestfactory` is a class whose method wraps manifest creating method of
|
|
manifest.manifest. It intercepts the calls to build hybridmanifest instead of
|
|
regularmanifests. We use a class for that to allow sharing the ui object that
|
|
is not normally accessible to manifests.
|
|
|
|
`debugcachemanifest` is a command calling `_cachemanifest`, a function to add
|
|
manifests to the cache and manipulate what is cached. It allows caching fast
|
|
and flat manifest, asynchronously and synchronously.
|
|
TODO handle asynchronous save
|
|
TODO size limit handling
|
|
"""
|
|
import array
|
|
import os
|
|
|
|
from mercurial import cmdutil
|
|
from mercurial import extensions
|
|
from mercurial import manifest
|
|
from mercurial import mdiff
|
|
from mercurial import revset
|
|
from mercurial import revlog
|
|
from mercurial import scmutil
|
|
from mercurial import util
|
|
|
|
import fastmanifest_wrapper
|
|
|
|
CACHE_SUBDIR = "manifestcache"
|
|
cmdtable = {}
|
|
command = cmdutil.command(cmdtable)
|
|
|
|
|
|
class manifestaccesslogger(object):
|
|
"""Class to log manifest access and confirm our assumptions"""
|
|
def __init__(self, logfile):
|
|
self._logfile = logfile
|
|
|
|
def revwrap(self, orig, *args, **kwargs):
|
|
"""Wraps manifest.rev and log access"""
|
|
r = orig(*args, **kwargs)
|
|
try:
|
|
with open(self._logfile, "a") as f:
|
|
f.write("%s\n" % r)
|
|
except EnvironmentError:
|
|
pass
|
|
return r
|
|
|
|
|
|
def fastmanifesttocache(repo, subset, x):
|
|
"""Revset of the interesting revisions to cache"""
|
|
return scmutil.revrange(repo, ["not public() + bookmark()"])
|
|
|
|
|
|
class hybridmanifest(object):
|
|
"""
|
|
Hybrid manifest that behaves like a lazy manifest.
|
|
|
|
Initialized with one of the three:
|
|
- flat an existing flat manifest
|
|
- fast an existing fast manifest
|
|
- loadflat a function to load a flat manifest from disk
|
|
"""
|
|
def __init__(self, ui, opener,
|
|
flat=None, fast=None, loadflat=None, node=None):
|
|
self.__flatmanifest = flat
|
|
self.__cachedmanifest = fast
|
|
self.loadflat = loadflat
|
|
|
|
assert (self.__flatmanifest is not None or
|
|
self.__cachedmanifest is None or
|
|
self.loadflat is None)
|
|
|
|
self.ui = ui
|
|
self.opener = opener
|
|
self.node = node
|
|
|
|
self.cachekey = revlog.hex(self.node) if self.node is not None else None
|
|
|
|
self.fastcache = fastmanifestcache.getinstance(opener, self.ui)
|
|
self.debugfastmanifest = (self.ui.configbool("fastmanifest",
|
|
"debugfastmanifest")
|
|
if self.ui is not None
|
|
else False)
|
|
|
|
self.incache = True if self.__cachedmanifest is not None else None
|
|
|
|
def _flatmanifest(self):
|
|
if self.__flatmanifest is None:
|
|
if self.loadflat is not None:
|
|
# Load the manifest and cache it.
|
|
self.__flatmanifest = self.loadflat()
|
|
|
|
if isinstance(self.__flatmanifest, hybridmanifest):
|
|
# See comment in extsetup to see why we have to do that
|
|
self.__flatmanifest = self.__flatmanifest._flatmanifest()
|
|
elif self.__cachedmanifest is not None:
|
|
# build a flat manifest from the text of the fastmanifest.
|
|
self.__flatmanifest = manifest.manifestdict(
|
|
self.__cachedmanifest.text())
|
|
|
|
assert isinstance(self.__flatmanifest, manifest.manifestdict)
|
|
return self.__flatmanifest
|
|
|
|
def _cachedmanifest(self):
|
|
if self.incache is None:
|
|
# Cache lookup
|
|
if (self.cachekey is not None and
|
|
self.fastcache.contains(self.cachekey)):
|
|
self.__cachedmanifest = self.fastcache.get(self.cachekey)
|
|
|
|
self.incache = self.__cachedmanifest is not None
|
|
|
|
self.ui.debug("cache %s for fastmanifest %s\n"
|
|
% ("hit" if self.incache else "miss", self.cachekey))
|
|
|
|
return self.__cachedmanifest
|
|
|
|
def _incache(self):
|
|
if self.incache:
|
|
return True
|
|
elif self.cachekey:
|
|
return self.fastcache.contains(self.cachekey)
|
|
return False
|
|
|
|
def _manifest(self, operation):
|
|
# Get the manifest most suited for the operations (flat or cached)
|
|
# TODO return fastmanifest when suitable
|
|
if self.debugfastmanifest:
|
|
if self.__cachedmanifest:
|
|
return self.__cachedmanifest
|
|
|
|
flatmanifest = self._flatmanifest().text()
|
|
fm = fastmanifest_wrapper.fastManifest(flatmanifest)
|
|
self.__cachedmanifest = fastmanifestdict(fm)
|
|
return self.__cachedmanifest
|
|
|
|
c = self._cachedmanifest()
|
|
if c is not None:
|
|
return c
|
|
|
|
r = self._flatmanifest()
|
|
return r
|
|
|
|
# Proxy all the manifest methods to the flatmanifest except magic methods
|
|
def __getattr__(self, name):
|
|
return getattr(self._manifest(name), name)
|
|
|
|
# Magic methods should be proxied differently than __getattr__
|
|
# For the moment all methods they all use the _flatmanifest
|
|
def __iter__(self):
|
|
return self._manifest('__iter__').__iter__()
|
|
|
|
def __contains__(self, key):
|
|
return self._manifest('__contains__').__contains__(key)
|
|
|
|
def __getitem__(self, key):
|
|
return self._manifest('__getitem__').__getitem__(key)
|
|
|
|
def __setitem__(self, key, val):
|
|
return self._manifest('__setitem__').__setitem__(key, val)
|
|
|
|
def __delitem__(self, key):
|
|
return self._manifest('__delitem__').__delitem__(key)
|
|
|
|
def __len__(self):
|
|
return self._manifest('__len__').__len__()
|
|
|
|
def copy(self):
|
|
copy = self._manifest('copy').copy()
|
|
if isinstance(copy, hybridmanifest):
|
|
return copy
|
|
elif isinstance(copy, fastmanifestdict):
|
|
return hybridmanifest(self.ui, self.opener, fast=copy,
|
|
node=self.node)
|
|
elif isinstance(copy, manifest.manifestdict):
|
|
return hybridmanifest(self.ui, self.opener, flat=copy,
|
|
node=self.node)
|
|
else:
|
|
raise ValueError("unknown manifest type {0}".format(type(copy)))
|
|
|
|
def matches(self, *args, **kwargs):
|
|
matches = self._manifest('matches').matches(*args, **kwargs)
|
|
if isinstance(matches, hybridmanifest):
|
|
return matches
|
|
elif isinstance(matches, fastmanifestdict):
|
|
return hybridmanifest(self.ui, self.opener, fast=matches)
|
|
elif isinstance(matches, manifest.manifestdict):
|
|
return hybridmanifest(self.ui, self.opener, flat=matches)
|
|
else:
|
|
raise ValueError("unknown manifest type {0}".format(type(matches)))
|
|
|
|
def diff(self, m2, *args, **kwargs):
|
|
self.ui.debug("performing diff\n")
|
|
# Find _m1 and _m2 of the same type, to provide the fastest computation
|
|
_m1, _m2 = None, None
|
|
|
|
if isinstance(m2, hybridmanifest):
|
|
self.ui.debug("diff: other side is hybrid manifest\n")
|
|
# CACHE HIT
|
|
if self._incache() and m2._incache():
|
|
_m1, _m2 = self._cachedmanifest(), m2._cachedmanifest()
|
|
# _m1 or _m2 can be None if _incache was True if the cache
|
|
# got garbage collected in the meantime or entry is corrupted
|
|
if not _m1 or not _m2:
|
|
self.ui.debug("diff: unable to load one or "
|
|
"more manifests\n")
|
|
_m1, _m2 = self._flatmanifest(), m2._flatmanifest()
|
|
# CACHE MISS
|
|
else:
|
|
self.ui.debug("diff: cache miss\n")
|
|
_m1, _m2 = self._flatmanifest(), m2._flatmanifest()
|
|
else:
|
|
# This happens when diffing against a new manifest (like rev -1)
|
|
self.ui.debug("diff: other side not hybrid manifest\n")
|
|
_m1, _m2 = self._flatmanifest(), m2
|
|
|
|
assert type(_m1) == type(_m2)
|
|
return _m1.diff(_m2, *args, **kwargs)
|
|
|
|
|
|
class fastmanifestcache(object):
|
|
_instance = None
|
|
@classmethod
|
|
def getinstance(cls, opener, ui):
|
|
if not cls._instance:
|
|
cls._instance = fastmanifestcache(opener, ui)
|
|
return cls._instance
|
|
|
|
def __init__(self, opener, ui):
|
|
self.opener = opener
|
|
self.ui = ui
|
|
self.inmemorycache = {}
|
|
base = opener.join(None)
|
|
self.cachepath = os.path.join(base, CACHE_SUBDIR)
|
|
if not os.path.exists(self.cachepath):
|
|
os.makedirs(self.cachepath)
|
|
|
|
def keyprefix(self):
|
|
return "fast"
|
|
|
|
def load(self, fpath):
|
|
try:
|
|
fm = fastmanifest_wrapper.fastManifest.load(fpath)
|
|
except EnvironmentError:
|
|
return None
|
|
else:
|
|
return fastmanifestdict(fm)
|
|
|
|
def dump(self, fpath, manifest):
|
|
# TODO: is this already a hybridmanifest/fastmanifest? if so, we may be
|
|
# able to skip a frivolous conversion step.
|
|
fm = fastmanifest_wrapper.fastManifest(manifest.text())
|
|
fm.save(fpath)
|
|
|
|
def inmemorycachekey(self, key):
|
|
return (self.keyprefix(), key)
|
|
|
|
def filecachepath(self, key):
|
|
return os.path.join(self.cachepath, self.keyprefix() + key)
|
|
|
|
def get(self, key):
|
|
# In memory cache lookup
|
|
ident = self.inmemorycachekey(key)
|
|
r = self.inmemorycache.get(ident, None)
|
|
if r:
|
|
return r
|
|
|
|
# On disk cache lookup
|
|
realfpath = self.filecachepath(key)
|
|
r = self.load(realfpath)
|
|
|
|
# In memory cache update
|
|
if r:
|
|
self.inmemorycache[ident] = r
|
|
return r
|
|
|
|
def contains(self, key):
|
|
if self.inmemorycachekey(key) in self.inmemorycache:
|
|
return True
|
|
return os.path.exists(self.filecachepath(key))
|
|
|
|
def put(self, key, manifest):
|
|
if self.contains(key):
|
|
self.ui.debug("skipped %s, already cached\n" % key)
|
|
else:
|
|
self.ui.debug("caching revision %s\n" % key)
|
|
|
|
realfpath = self.filecachepath(key)
|
|
tmpfpath = util.mktempcopy(realfpath, True)
|
|
try:
|
|
self.dump(tmpfpath, manifest)
|
|
util.rename(tmpfpath, realfpath)
|
|
finally:
|
|
try:
|
|
os.unlink(tmpfpath)
|
|
except OSError:
|
|
pass
|
|
|
|
def prune(self, limit):
|
|
# TODO logic to prune old entries
|
|
pass
|
|
|
|
class manifestfactory(object):
|
|
def __init__(self, ui):
|
|
self.ui = ui
|
|
|
|
def newmanifest(self, orig, *args, **kwargs):
|
|
loadfn = lambda: orig(*args, **kwargs)
|
|
return hybridmanifest(self.ui,
|
|
args[0].opener,
|
|
loadflat=loadfn)
|
|
|
|
def read(self, orig, *args, **kwargs):
|
|
loadfn = lambda: orig(*args, **kwargs)
|
|
return hybridmanifest(self.ui,
|
|
args[0].opener,
|
|
loadflat=loadfn,
|
|
node=args[1])
|
|
|
|
|
|
def _cachemanifest(ui, repo, revs, sync, limit):
|
|
ui.debug(("caching rev: %s, synchronous(%s)\n")
|
|
% (revs, sync))
|
|
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
|
|
|
|
for rev in revs:
|
|
manifest = repo[rev].manifest()
|
|
nodehex = revlog.hex(manifest.node)
|
|
cache.put(nodehex, manifest)
|
|
|
|
if limit:
|
|
cache.prune(limit)
|
|
|
|
|
|
@command('^debugcachemanifest', [
|
|
('r', 'rev', [], 'cache the manifest for revs', 'REV'),
|
|
('a', 'all', False, 'cache all relevant revisions', ''),
|
|
('l', 'limit', False, 'limit size of total rev in bytes', 'BYTES'),
|
|
('s', 'synchronous', False, 'wait for completion to return', '')],
|
|
'hg debugcachemanifest')
|
|
def debugcachemanifest(ui, repo, *pats, **opts):
|
|
sync = opts["synchronous"]
|
|
limit = opts["limit"]
|
|
if opts["all"]:
|
|
revs = scmutil.revrange(repo, ["fastmanifesttocache()"])
|
|
elif opts["rev"]:
|
|
revs = scmutil.revrange(repo, opts["rev"])
|
|
else:
|
|
revs = []
|
|
_cachemanifest(ui, repo, revs, sync, limit)
|
|
|
|
|
|
def extsetup(ui):
|
|
logfile = ui.config("fastmanifest", "logfile", "")
|
|
factory = manifestfactory(ui)
|
|
if logfile:
|
|
logger = manifestaccesslogger(logfile)
|
|
extensions.wrapfunction(manifest.manifest, 'rev', logger.revwrap)
|
|
# Wraps all the function creating a manifestdict
|
|
# We have to do that because the logic to create manifest can take
|
|
# 7 different codepaths and we want to retain the node information
|
|
# that comes at the top level:
|
|
#
|
|
# read -> _newmanifest ---------------------------> manifestdict
|
|
#
|
|
# readshallowfast -> readshallow -----------------> manifestdict
|
|
# \ \------> _newmanifest --> manifestdict
|
|
# --> readshallowdelta ------------------------> manifestdict
|
|
# \->readdelta -------> _newmanifest --> manifestdict
|
|
# \->slowreaddelta --> _newmanifest --> manifestdict
|
|
#
|
|
# othermethods -----------------------------------> manifestdict
|
|
#
|
|
# We can have hybridmanifest that wraps one hybridmanifest in some
|
|
# codepath. We resolve to the correct flatmanifest when asked in the
|
|
# _flatmanifest method
|
|
#
|
|
# The recursion level is at most 2 because we wrap the two top level
|
|
# functions and _newmanifest (wrapped only for the case of -1)
|
|
|
|
extensions.wrapfunction(manifest.manifest, '_newmanifest',
|
|
factory.newmanifest)
|
|
extensions.wrapfunction(manifest.manifest, 'read', factory.read)
|
|
try:
|
|
extensions.wrapfunction(manifest.manifest, 'readshallowfast',
|
|
factory.read)
|
|
except AttributeError:
|
|
# The function didn't use to be defined in previous versions of hg
|
|
pass
|
|
|
|
revset.symbols['fastmanifesttocache'] = fastmanifesttocache
|
|
revset.safesymbols.add('fastmanifesttocache')
|
|
|
|
class fastmanifestdict(object):
|
|
def __init__(self, fm):
|
|
self._fm = fm
|
|
|
|
def __getitem__(self, key):
|
|
return self._fm[key][0]
|
|
|
|
def find(self, key):
|
|
return self._fm[key]
|
|
|
|
def __len__(self):
|
|
return len(self._fm)
|
|
|
|
def __setitem__(self, key, node):
|
|
self._fm[key] = node, self.flags(key, '')
|
|
|
|
def __contains__(self, key):
|
|
return key in self._fm
|
|
|
|
def __delitem__(self, key):
|
|
del self._fm[key]
|
|
|
|
def __iter__(self):
|
|
return self._fm.__iter__()
|
|
|
|
def iterkeys(self):
|
|
return self._fm.iterkeys()
|
|
|
|
def iterentries(self):
|
|
return self._fm.iterentries()
|
|
|
|
def iteritems(self):
|
|
return (x[:2] for x in self._fm.iterentries())
|
|
|
|
def keys(self):
|
|
return list(self.iterkeys())
|
|
|
|
def filesnotin(self, m2):
|
|
'''Set of files in this manifest that are not in the other'''
|
|
diff = self.diff(m2)
|
|
files = set(filepath
|
|
for filepath, hashflags in diff.iteritems()
|
|
if hashflags[1][0] is None)
|
|
return files
|
|
|
|
@util.propertycache
|
|
def _dirs(self):
|
|
return util.dirs(self)
|
|
|
|
def dirs(self):
|
|
return self._dirs
|
|
|
|
def hasdir(self, dir):
|
|
return dir in self._dirs
|
|
|
|
def _filesfastpath(self, match):
|
|
'''Checks whether we can correctly and quickly iterate over matcher
|
|
files instead of over manifest files.'''
|
|
files = match.files()
|
|
return (len(files) < 100 and (match.isexact() or
|
|
(match.prefix() and all(fn in self for fn in files))))
|
|
|
|
def walk(self, match):
|
|
'''Generates matching file names.
|
|
|
|
Equivalent to manifest.matches(match).iterkeys(), but without creating
|
|
an entirely new manifest.
|
|
|
|
It also reports nonexistent files by marking them bad with match.bad().
|
|
'''
|
|
if match.always():
|
|
for f in iter(self):
|
|
yield f
|
|
return
|
|
|
|
fset = set(match.files())
|
|
|
|
# avoid the entire walk if we're only looking for specific files
|
|
if self._filesfastpath(match):
|
|
for fn in sorted(fset):
|
|
yield fn
|
|
return
|
|
|
|
for fn in self:
|
|
if fn in fset:
|
|
# specified pattern is the exact name
|
|
fset.remove(fn)
|
|
if match(fn):
|
|
yield fn
|
|
|
|
# for dirstate.walk, files=['.'] means "walk the whole tree".
|
|
# follow that here, too
|
|
fset.discard('.')
|
|
|
|
for fn in sorted(fset):
|
|
if not self.hasdir(fn):
|
|
match.bad(fn, None)
|
|
|
|
def matches(self, match):
|
|
'''generate a new manifest filtered by the match argument'''
|
|
if match.always():
|
|
return self.copy()
|
|
|
|
if self._filesfastpath(match):
|
|
m = fastmanifestdict()
|
|
lm = self._fm
|
|
for fn in match.files():
|
|
if fn in lm:
|
|
m._fm[fn] = lm[fn]
|
|
return m
|
|
|
|
m = fastmanifestdict()
|
|
m._fm = self._fm.filtercopy(match)
|
|
return m
|
|
|
|
def diff(self, m2, clean=False):
|
|
'''Finds changes between the current manifest and m2.
|
|
|
|
Args:
|
|
m2: the manifest to which this manifest should be compared.
|
|
clean: if true, include files unchanged between these manifests
|
|
with a None value in the returned dictionary.
|
|
|
|
The result is returned as a dict with filename as key and
|
|
values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
|
|
nodeid in the current/other manifest and fl1/fl2 is the flag
|
|
in the current/other manifest. Where the file does not exist,
|
|
the nodeid will be None and the flags will be the empty
|
|
string.
|
|
'''
|
|
return self._fm.diff(m2._fm, clean)
|
|
|
|
def setflag(self, key, flag):
|
|
self._fm[key] = self[key], flag
|
|
|
|
def get(self, key, default=None):
|
|
try:
|
|
return self._fm[key][0]
|
|
except KeyError:
|
|
return default
|
|
|
|
def flags(self, key, default=''):
|
|
try:
|
|
return self._fm[key][1]
|
|
except KeyError:
|
|
return default
|
|
|
|
def copy(self):
|
|
c = fastmanifestdict(self._fm.copy())
|
|
return c
|
|
|
|
def text(self, usemanifestv2=False):
|
|
if usemanifestv2:
|
|
return _textv2(self._fm.iterentries())
|
|
else:
|
|
# use (probably) native version for v1
|
|
return self._fm.text()
|
|
|
|
def fastdelta(self, base, changes):
|
|
"""Given a base manifest text as an array.array and a list of changes
|
|
relative to that text, compute a delta that can be used by revlog.
|
|
"""
|
|
delta = []
|
|
dstart = None
|
|
dend = None
|
|
dline = [""]
|
|
start = 0
|
|
# zero copy representation of base as a buffer
|
|
addbuf = util.buffer(base)
|
|
|
|
changes = list(changes)
|
|
if len(changes) < 1000:
|
|
# start with a readonly loop that finds the offset of
|
|
# each line and creates the deltas
|
|
for f, todelete in changes:
|
|
# bs will either be the index of the item or the insert point
|
|
start, end = manifest._msearch(addbuf, f, start)
|
|
if not todelete:
|
|
h, fl = self._fm[f]
|
|
l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
|
|
else:
|
|
if start == end:
|
|
# item we want to delete was not found, error out
|
|
raise AssertionError(
|
|
_("failed to remove %s from manifest") % f)
|
|
l = ""
|
|
if dstart is not None and dstart <= start and dend >= start:
|
|
if dend < end:
|
|
dend = end
|
|
if l:
|
|
dline.append(l)
|
|
else:
|
|
if dstart is not None:
|
|
delta.append([dstart, dend, "".join(dline)])
|
|
dstart = start
|
|
dend = end
|
|
dline = [l]
|
|
|
|
if dstart is not None:
|
|
delta.append([dstart, dend, "".join(dline)])
|
|
# apply the delta to the base, and get a delta for addrevision
|
|
deltatext, arraytext = manifest._addlistdelta(base, delta)
|
|
else:
|
|
# For large changes, it's much cheaper to just build the text and
|
|
# diff it.
|
|
arraytext = array.array('c', self.text())
|
|
deltatext = mdiff.textdiff(base, arraytext)
|
|
|
|
return arraytext, deltatext
|