2016-05-26 22:34:38 +03:00
|
|
|
# implementation.py
|
|
|
|
#
|
|
|
|
# Copyright 2016 Facebook, Inc.
|
|
|
|
#
|
|
|
|
# This software may be used and distributed according to the terms of the
|
|
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
|
|
|
|
import array
|
2016-07-12 03:34:01 +03:00
|
|
|
import collections
|
2016-05-26 22:34:38 +03:00
|
|
|
import os
|
|
|
|
import time
|
2016-06-09 00:33:02 +03:00
|
|
|
import heapq
|
2016-05-26 22:34:38 +03:00
|
|
|
|
|
|
|
from mercurial import manifest, mdiff, revlog, util
|
2016-06-17 19:35:39 +03:00
|
|
|
import cachemanager
|
2016-05-26 22:34:38 +03:00
|
|
|
import cfastmanifest
|
2016-06-09 00:34:26 +03:00
|
|
|
from metrics import metricscollector
|
2016-05-26 22:34:38 +03:00
|
|
|
from constants import *
|
|
|
|
|
|
|
|
class hybridmanifest(object):
|
|
|
|
"""
|
|
|
|
Hybrid manifest that behaves like a lazy manifest.
|
|
|
|
|
|
|
|
Initialized with one of the three:
|
|
|
|
- flat an existing flat manifest
|
|
|
|
- fast an existing fast manifest
|
|
|
|
- loadflat a function to load a flat manifest from disk
|
|
|
|
"""
|
|
|
|
def __init__(self, ui, opener,
|
|
|
|
flat=None, fast=None, loadflat=None, node=None):
|
|
|
|
self.__flatmanifest = flat
|
|
|
|
self.__cachedmanifest = fast
|
|
|
|
self.loadflat = loadflat
|
|
|
|
|
|
|
|
assert (self.__flatmanifest is not None or
|
2016-08-24 21:31:06 +03:00
|
|
|
self.__cachedmanifest is not None or
|
|
|
|
self.loadflat is not None)
|
2016-05-26 22:34:38 +03:00
|
|
|
|
|
|
|
self.ui = ui
|
|
|
|
self.opener = opener
|
|
|
|
self.node = node
|
|
|
|
|
|
|
|
self.cachekey = revlog.hex(self.node) if self.node is not None else None
|
|
|
|
|
|
|
|
self.fastcache = fastmanifestcache.getinstance(opener, self.ui)
|
2016-06-01 23:46:38 +03:00
|
|
|
self.debugfastmanifest = self.ui.configbool("fastmanifest",
|
|
|
|
"debugfastmanifest", False)
|
2016-05-26 22:34:38 +03:00
|
|
|
|
|
|
|
self.incache = True if self.__cachedmanifest is not None else None
|
|
|
|
|
2016-06-01 23:46:38 +03:00
|
|
|
if self.ui.configbool("fastmanifest", "silent"):
|
2016-05-26 22:34:38 +03:00
|
|
|
self.debug = _silent_debug
|
|
|
|
else:
|
|
|
|
self.debug = self.ui.debug
|
|
|
|
|
|
|
|
def _flatmanifest(self):
|
|
|
|
if self.__flatmanifest is None:
|
|
|
|
if self.loadflat is not None:
|
|
|
|
# Load the manifest and cache it.
|
|
|
|
self.__flatmanifest = self.loadflat()
|
|
|
|
|
|
|
|
if isinstance(self.__flatmanifest, hybridmanifest):
|
|
|
|
# See comment in extsetup to see why we have to do that
|
|
|
|
self.__flatmanifest = self.__flatmanifest._flatmanifest()
|
|
|
|
elif self.__cachedmanifest is not None:
|
|
|
|
# build a flat manifest from the text of the fastmanifest.
|
|
|
|
self.__flatmanifest = manifest.manifestdict(
|
|
|
|
self.__cachedmanifest.text())
|
|
|
|
|
|
|
|
assert isinstance(self.__flatmanifest, manifest.manifestdict)
|
|
|
|
return self.__flatmanifest
|
|
|
|
|
|
|
|
def _cachedmanifest(self):
|
|
|
|
if self.incache is None:
|
|
|
|
# Cache lookup
|
|
|
|
if (self.cachekey is not None and
|
2016-06-17 19:35:39 +03:00
|
|
|
self.cachekey in self.fastcache):
|
|
|
|
self.__cachedmanifest = self.fastcache[self.cachekey]
|
2016-05-26 22:34:38 +03:00
|
|
|
elif self.node == revlog.nullid:
|
|
|
|
fm = cfastmanifest.fastmanifest()
|
|
|
|
self.__cachedmanifest = fastmanifestdict(fm)
|
|
|
|
elif self.debugfastmanifest:
|
|
|
|
# in debug mode, we always convert into a fastmanifest.
|
|
|
|
r = self._flatmanifest()
|
|
|
|
fm = cfastmanifest.fastmanifest(r.text())
|
|
|
|
self.__cachedmanifest = fastmanifestdict(fm)
|
|
|
|
|
|
|
|
self.incache = self.__cachedmanifest is not None
|
2016-07-06 23:39:42 +03:00
|
|
|
metricscollector.get().recordsample("cachehit", hit=self.incache,
|
|
|
|
node=self.cachekey)
|
2016-05-27 22:41:56 +03:00
|
|
|
self.debug("[FM] cache %s for fastmanifest %s\n"
|
2016-05-26 22:34:38 +03:00
|
|
|
% ("hit" if self.incache else "miss", self.cachekey))
|
|
|
|
|
|
|
|
return self.__cachedmanifest
|
|
|
|
|
|
|
|
def _incache(self):
|
|
|
|
if self.incache or self.debugfastmanifest:
|
|
|
|
return True
|
|
|
|
elif self.cachekey:
|
2016-06-17 19:35:39 +03:00
|
|
|
return self.cachekey in self.fastcache
|
2016-05-26 22:34:38 +03:00
|
|
|
return False
|
|
|
|
|
|
|
|
def _manifest(self, operation):
|
|
|
|
# Get the manifest most suited for the operations (flat or cached)
|
|
|
|
# TODO: return fastmanifest when suitable
|
|
|
|
c = self._cachedmanifest()
|
|
|
|
if c is not None:
|
|
|
|
return c
|
|
|
|
|
|
|
|
r = self._flatmanifest()
|
|
|
|
|
|
|
|
return r
|
|
|
|
|
|
|
|
# Proxy all the manifest methods to the flatmanifest except magic methods
|
|
|
|
def __getattr__(self, name):
|
|
|
|
return getattr(self._manifest(name), name)
|
|
|
|
|
|
|
|
# Magic methods should be proxied differently than __getattr__
|
|
|
|
# For the moment all methods they all use the _flatmanifest
|
|
|
|
def __iter__(self):
|
|
|
|
return self._manifest('__iter__').__iter__()
|
|
|
|
|
|
|
|
def __contains__(self, key):
|
|
|
|
return self._manifest('__contains__').__contains__(key)
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
return self._manifest('__getitem__').__getitem__(key)
|
|
|
|
|
|
|
|
def __setitem__(self, key, val):
|
|
|
|
return self._manifest('__setitem__').__setitem__(key, val)
|
|
|
|
|
|
|
|
def __delitem__(self, key):
|
|
|
|
return self._manifest('__delitem__').__delitem__(key)
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return self._manifest('__len__').__len__()
|
|
|
|
|
2016-06-01 21:00:53 +03:00
|
|
|
def _converttohybridmanifest(self, m):
|
|
|
|
if isinstance(m, hybridmanifest):
|
|
|
|
return m
|
|
|
|
elif isinstance(m, fastmanifestdict):
|
|
|
|
return hybridmanifest(self.ui, self.opener, fast=m,
|
2016-05-26 22:34:38 +03:00
|
|
|
node=self.node)
|
2016-06-01 21:00:53 +03:00
|
|
|
elif isinstance(m, manifest.manifestdict):
|
|
|
|
return hybridmanifest(self.ui, self.opener, flat=m,
|
2016-05-26 22:34:38 +03:00
|
|
|
node=self.node)
|
|
|
|
else:
|
2016-06-01 21:00:53 +03:00
|
|
|
raise ValueError("unknown manifest type {0}".format(type(m)))
|
|
|
|
|
|
|
|
def copy(self):
|
|
|
|
copy = self._manifest('copy').copy()
|
|
|
|
return self._converttohybridmanifest(copy)
|
2016-05-26 22:34:38 +03:00
|
|
|
|
|
|
|
def matches(self, *args, **kwargs):
|
|
|
|
matches = self._manifest('matches').matches(*args, **kwargs)
|
2016-06-01 21:00:53 +03:00
|
|
|
return self._converttohybridmanifest(matches)
|
2016-05-26 22:34:38 +03:00
|
|
|
|
2016-07-06 23:39:42 +03:00
|
|
|
def _getmatchingtypemanifest(self, m2, operation):
|
2016-05-26 22:34:38 +03:00
|
|
|
# Find _m1 and _m2 of the same type, to provide the fastest computation
|
|
|
|
_m1, _m2 = None, None
|
2016-06-09 00:34:26 +03:00
|
|
|
hit = False
|
2016-05-26 22:34:38 +03:00
|
|
|
if isinstance(m2, hybridmanifest):
|
2016-07-06 23:39:42 +03:00
|
|
|
self.debug("[FM] %s: other side is hybrid manifest\n" % operation)
|
2016-05-26 22:34:38 +03:00
|
|
|
# CACHE HIT
|
|
|
|
if self._incache() and m2._incache():
|
|
|
|
_m1, _m2 = self._cachedmanifest(), m2._cachedmanifest()
|
|
|
|
# _m1 or _m2 can be None if _incache was True if the cache
|
|
|
|
# got garbage collected in the meantime or entry is corrupted
|
|
|
|
if _m1 is None or _m2 is None:
|
2016-07-06 23:39:42 +03:00
|
|
|
self.debug("[FM] %s: unable to load one or "
|
|
|
|
"more manifests\n" % operation)
|
2016-05-26 22:34:38 +03:00
|
|
|
_m1, _m2 = self._flatmanifest(), m2._flatmanifest()
|
2016-06-09 00:34:26 +03:00
|
|
|
else:
|
|
|
|
hit = True
|
2016-05-26 22:34:38 +03:00
|
|
|
# CACHE MISS
|
|
|
|
else:
|
2016-07-06 23:39:42 +03:00
|
|
|
self.debug("[FM] %s: cache miss\n" % operation)
|
2016-05-26 22:34:38 +03:00
|
|
|
_m1, _m2 = self._flatmanifest(), m2._flatmanifest()
|
|
|
|
else:
|
|
|
|
# This happens when diffing against a new manifest (like rev -1)
|
2016-07-06 23:39:42 +03:00
|
|
|
self.debug("[FM] %s: other side not hybrid manifest\n" % operation)
|
2016-05-26 22:34:38 +03:00
|
|
|
_m1, _m2 = self._flatmanifest(), m2
|
|
|
|
|
|
|
|
assert type(_m1) == type(_m2)
|
2016-06-09 00:34:26 +03:00
|
|
|
return _m1, _m2, hit
|
|
|
|
|
|
|
|
def diff(self, m2, *args, **kwargs):
|
|
|
|
self.debug("[FM] performing diff\n")
|
2016-07-06 23:39:42 +03:00
|
|
|
_m1, _m2, hit = self._getmatchingtypemanifest(m2, "diff")
|
2016-06-30 21:51:06 +03:00
|
|
|
metricscollector.get().recordsample("diffcachehit", hit=hit)
|
2016-05-26 22:34:38 +03:00
|
|
|
return _m1.diff(_m2, *args, **kwargs)
|
|
|
|
|
2016-05-27 03:03:56 +03:00
|
|
|
def filesnotin(self, m2, *args, **kwargs):
|
2016-05-27 22:41:56 +03:00
|
|
|
self.debug("[FM] performing filesnotin\n")
|
2016-07-06 23:39:42 +03:00
|
|
|
_m1, _m2, hit = self._getmatchingtypemanifest(m2, "filesnotin")
|
2016-06-30 21:51:06 +03:00
|
|
|
metricscollector.get().recordsample("filesnotincachehit", hit=hit)
|
2016-05-27 03:03:56 +03:00
|
|
|
return _m1.filesnotin(_m2, *args, **kwargs)
|
|
|
|
|
2016-05-26 22:34:38 +03:00
|
|
|
class fastmanifestdict(object):
|
|
|
|
def __init__(self, fm):
|
|
|
|
self._fm = fm
|
|
|
|
|
|
|
|
def __getitem__(self, key):
|
|
|
|
return self._fm[key][0]
|
|
|
|
|
|
|
|
def find(self, key):
|
|
|
|
return self._fm[key]
|
|
|
|
|
|
|
|
def __len__(self):
|
|
|
|
return len(self._fm)
|
|
|
|
|
|
|
|
def __setitem__(self, key, node):
|
|
|
|
if len(node) == 22:
|
|
|
|
# sometimes we set the 22nd byte. this is not preserved by
|
|
|
|
# lazymanifest or manifest::_lazymanifest.
|
|
|
|
node = node[:21]
|
|
|
|
self._fm[key] = node, self.flags(key, '')
|
|
|
|
|
|
|
|
def __contains__(self, key):
|
|
|
|
return key in self._fm
|
|
|
|
|
|
|
|
def __delitem__(self, key):
|
|
|
|
del self._fm[key]
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
return self._fm.__iter__()
|
|
|
|
|
|
|
|
def iterkeys(self):
|
|
|
|
return self._fm.iterkeys()
|
|
|
|
|
|
|
|
def iterentries(self):
|
|
|
|
return self._fm.iterentries()
|
|
|
|
|
|
|
|
def iteritems(self):
|
2016-06-07 20:25:49 +03:00
|
|
|
# TODO: we can improve the speed of this by making it return the
|
|
|
|
# right thing from the native code
|
2016-05-26 22:34:38 +03:00
|
|
|
return (x[:2] for x in self._fm.iterentries())
|
|
|
|
|
|
|
|
def keys(self):
|
|
|
|
return list(self.iterkeys())
|
|
|
|
|
|
|
|
def filesnotin(self, m2):
|
|
|
|
'''Set of files in this manifest that are not in the other'''
|
|
|
|
diff = self.diff(m2)
|
|
|
|
files = set(filepath
|
|
|
|
for filepath, hashflags in diff.iteritems()
|
|
|
|
if hashflags[1][0] is None)
|
|
|
|
return files
|
|
|
|
|
|
|
|
@util.propertycache
|
|
|
|
def _dirs(self):
|
|
|
|
return util.dirs(self)
|
|
|
|
|
|
|
|
def dirs(self):
|
|
|
|
return self._dirs
|
|
|
|
|
|
|
|
def hasdir(self, dir):
|
|
|
|
return dir in self._dirs
|
|
|
|
|
|
|
|
def _filesfastpath(self, match):
|
|
|
|
'''Checks whether we can correctly and quickly iterate over matcher
|
|
|
|
files instead of over manifest files.'''
|
|
|
|
files = match.files()
|
|
|
|
return (len(files) < 100 and (match.isexact() or
|
|
|
|
(match.prefix() and all(fn in self for fn in files))))
|
|
|
|
|
|
|
|
def walk(self, match):
|
|
|
|
'''Generates matching file names.
|
|
|
|
|
|
|
|
Equivalent to manifest.matches(match).iterkeys(), but without creating
|
|
|
|
an entirely new manifest.
|
|
|
|
|
|
|
|
It also reports nonexistent files by marking them bad with match.bad().
|
|
|
|
'''
|
|
|
|
if match.always():
|
|
|
|
for f in iter(self):
|
|
|
|
yield f
|
|
|
|
return
|
|
|
|
|
|
|
|
fset = set(match.files())
|
|
|
|
|
|
|
|
# avoid the entire walk if we're only looking for specific files
|
|
|
|
if self._filesfastpath(match):
|
|
|
|
for fn in sorted(fset):
|
|
|
|
yield fn
|
|
|
|
return
|
|
|
|
|
|
|
|
for fn in self:
|
|
|
|
if fn in fset:
|
|
|
|
# specified pattern is the exact name
|
|
|
|
fset.remove(fn)
|
|
|
|
if match(fn):
|
|
|
|
yield fn
|
|
|
|
|
|
|
|
# for dirstate.walk, files=['.'] means "walk the whole tree".
|
|
|
|
# follow that here, too
|
|
|
|
fset.discard('.')
|
|
|
|
|
|
|
|
for fn in sorted(fset):
|
|
|
|
if not self.hasdir(fn):
|
|
|
|
match.bad(fn, None)
|
|
|
|
|
|
|
|
def matches(self, match):
|
|
|
|
'''generate a new manifest filtered by the match argument'''
|
|
|
|
if match.always():
|
|
|
|
return self.copy()
|
|
|
|
|
|
|
|
if self._filesfastpath(match):
|
|
|
|
nfm = cfastmanifest.fastmanifest()
|
|
|
|
for fn in match.files():
|
|
|
|
if fn in self._fm:
|
|
|
|
nfm[fn] = self._fm[fn]
|
|
|
|
m = fastmanifestdict(nfm)
|
|
|
|
return m
|
|
|
|
|
|
|
|
nfm = self._fm.filtercopy(match)
|
|
|
|
m = fastmanifestdict(nfm)
|
|
|
|
return m
|
|
|
|
|
|
|
|
def diff(self, m2, clean=False):
|
|
|
|
'''Finds changes between the current manifest and m2.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
m2: the manifest to which this manifest should be compared.
|
|
|
|
clean: if true, include files unchanged between these manifests
|
|
|
|
with a None value in the returned dictionary.
|
|
|
|
|
|
|
|
The result is returned as a dict with filename as key and
|
|
|
|
values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
|
|
|
|
nodeid in the current/other manifest and fl1/fl2 is the flag
|
|
|
|
in the current/other manifest. Where the file does not exist,
|
|
|
|
the nodeid will be None and the flags will be the empty
|
|
|
|
string.
|
|
|
|
'''
|
|
|
|
return self._fm.diff(m2._fm, clean)
|
|
|
|
|
|
|
|
def setflag(self, key, flag):
|
|
|
|
self._fm[key] = self[key], flag
|
|
|
|
|
|
|
|
def get(self, key, default=None):
|
|
|
|
try:
|
|
|
|
return self._fm[key][0]
|
|
|
|
except KeyError:
|
|
|
|
return default
|
|
|
|
|
|
|
|
def flags(self, key, default=''):
|
|
|
|
try:
|
|
|
|
return self._fm[key][1]
|
|
|
|
except KeyError:
|
|
|
|
return default
|
|
|
|
|
|
|
|
def copy(self):
|
|
|
|
c = fastmanifestdict(self._fm.copy())
|
|
|
|
return c
|
|
|
|
|
|
|
|
def text(self, usemanifestv2=False):
|
|
|
|
if usemanifestv2:
|
2016-06-23 23:42:30 +03:00
|
|
|
raise NotImplementedError("v2 not supported")
|
2016-05-26 22:34:38 +03:00
|
|
|
else:
|
|
|
|
# use (probably) native version for v1
|
|
|
|
return self._fm.text()
|
|
|
|
|
|
|
|
def fastdelta(self, base, changes):
|
|
|
|
"""Given a base manifest text as an array.array and a list of changes
|
|
|
|
relative to that text, compute a delta that can be used by revlog.
|
|
|
|
"""
|
|
|
|
delta = []
|
|
|
|
dstart = None
|
|
|
|
dend = None
|
|
|
|
dline = [""]
|
|
|
|
start = 0
|
|
|
|
# zero copy representation of base as a buffer
|
|
|
|
addbuf = util.buffer(base)
|
|
|
|
|
|
|
|
changes = list(changes)
|
|
|
|
if len(changes) < 1000:
|
|
|
|
# start with a readonly loop that finds the offset of
|
|
|
|
# each line and creates the deltas
|
|
|
|
for f, todelete in changes:
|
|
|
|
# bs will either be the index of the item or the insert point
|
|
|
|
start, end = manifest._msearch(addbuf, f, start)
|
|
|
|
if not todelete:
|
|
|
|
h, fl = self._fm[f]
|
|
|
|
l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
|
|
|
|
else:
|
|
|
|
if start == end:
|
|
|
|
# item we want to delete was not found, error out
|
|
|
|
raise AssertionError(
|
|
|
|
(("failed to remove %s from manifest") % f))
|
|
|
|
l = ""
|
|
|
|
if dstart is not None and dstart <= start and dend >= start:
|
|
|
|
if dend < end:
|
|
|
|
dend = end
|
|
|
|
if l:
|
|
|
|
dline.append(l)
|
|
|
|
else:
|
|
|
|
if dstart is not None:
|
|
|
|
delta.append([dstart, dend, "".join(dline)])
|
|
|
|
dstart = start
|
|
|
|
dend = end
|
|
|
|
dline = [l]
|
|
|
|
|
|
|
|
if dstart is not None:
|
|
|
|
delta.append([dstart, dend, "".join(dline)])
|
|
|
|
# apply the delta to the base, and get a delta for addrevision
|
|
|
|
deltatext, arraytext = manifest._addlistdelta(base, delta)
|
|
|
|
else:
|
|
|
|
# For large changes, it's much cheaper to just build the text and
|
|
|
|
# diff it.
|
|
|
|
arraytext = array.array('c', self.text())
|
|
|
|
deltatext = mdiff.textdiff(base, arraytext)
|
|
|
|
|
|
|
|
return arraytext, deltatext
|
|
|
|
|
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
class ondiskcache(object):
|
|
|
|
def __init__(self, debugf, opener, ui):
|
|
|
|
self.debugf = debugf
|
2016-05-26 22:34:38 +03:00
|
|
|
self.opener = opener
|
|
|
|
self.ui = ui
|
2016-06-17 19:35:39 +03:00
|
|
|
self.pathprefix = "fast"
|
2016-05-26 22:34:38 +03:00
|
|
|
base = opener.join(None)
|
|
|
|
self.cachepath = os.path.join(base, CACHE_SUBDIR)
|
|
|
|
if not os.path.exists(self.cachepath):
|
2016-06-14 01:26:55 +03:00
|
|
|
try:
|
|
|
|
os.makedirs(self.cachepath)
|
|
|
|
except EnvironmentError:
|
|
|
|
# Likely permission issues, in that case, we won't be able to
|
|
|
|
# access the cache afterwards
|
|
|
|
pass
|
2016-05-26 22:34:38 +03:00
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
def _pathfromnode(self, hexnode):
|
|
|
|
return os.path.join(self.cachepath, self.pathprefix + hexnode)
|
2016-05-26 22:34:38 +03:00
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
def touch(self, hexnode, delay=0):
|
2016-05-26 22:34:38 +03:00
|
|
|
filetime = time.time() - delay
|
2016-06-17 19:35:39 +03:00
|
|
|
path = self._pathfromnode(hexnode)
|
2016-05-26 22:34:38 +03:00
|
|
|
try:
|
2016-06-17 19:35:39 +03:00
|
|
|
self.debugf("[FM] refreshing %s with delay %d\n" %(hexnode, delay))
|
2016-05-26 22:34:38 +03:00
|
|
|
os.utime(path, (filetime, filetime))
|
|
|
|
except EnvironmentError:
|
|
|
|
pass
|
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
def __contains__(self, hexnode):
|
|
|
|
path = self._pathfromnode(hexnode)
|
|
|
|
return os.path.exists(path)
|
2016-06-08 20:46:11 +03:00
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
def items(self):
|
2016-06-25 02:13:39 +03:00
|
|
|
"""Return the entries in the cache, sorted from most relevant to least
|
|
|
|
relevant"""
|
2016-06-17 19:35:39 +03:00
|
|
|
entries = []
|
|
|
|
for entry in os.listdir(self.cachepath):
|
|
|
|
try:
|
2016-06-17 19:35:39 +03:00
|
|
|
if entry.startswith(self.pathprefix):
|
2016-06-17 19:35:39 +03:00
|
|
|
path = os.path.join(self.cachepath, entry)
|
|
|
|
entries.append((entry,
|
|
|
|
os.path.getmtime(path),
|
|
|
|
os.path.getsize(path)))
|
|
|
|
except EnvironmentError:
|
|
|
|
pass
|
|
|
|
entries.sort(key=lambda x:(-x[1], x[0]))
|
2016-06-17 19:35:39 +03:00
|
|
|
return [x[0].replace(self.pathprefix, "") for x in entries]
|
2016-06-17 19:35:39 +03:00
|
|
|
|
2016-05-26 22:34:38 +03:00
|
|
|
def __iter__(self):
|
2016-06-17 19:35:39 +03:00
|
|
|
return iter(self.items())
|
2016-05-26 22:34:38 +03:00
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
def setwithlimit(self, hexnode, manifest, limit=-1):
|
2016-07-12 07:58:04 +03:00
|
|
|
"""Writes a manifest to the cache. Returns True if the cache already
|
|
|
|
contains the item or if the write is successful. Returns False if the
|
|
|
|
write fails. Raises CacheFullException if writing the cache entry would
|
|
|
|
cause us to pass the limit.
|
|
|
|
"""
|
2016-06-22 03:07:46 +03:00
|
|
|
if hexnode in self:
|
2016-07-12 07:58:04 +03:00
|
|
|
return True
|
2016-06-17 19:35:39 +03:00
|
|
|
path = self._pathfromnode(hexnode)
|
2016-07-12 00:08:45 +03:00
|
|
|
if (isinstance(manifest, cfastmanifest.fastmanifest) or
|
|
|
|
isinstance(manifest, fastmanifestdict)):
|
|
|
|
fm = manifest
|
|
|
|
else:
|
|
|
|
fm = cfastmanifest.fastmanifest(manifest.text())
|
2016-06-17 19:35:39 +03:00
|
|
|
tmpfpath = util.mktempcopy(path, True)
|
2016-07-12 07:58:19 +03:00
|
|
|
entrysize = fm.bytes()
|
|
|
|
if limit != -1 and self.totalsize()[0] + entrysize > limit:
|
|
|
|
raise CacheFullException()
|
2016-05-26 22:34:38 +03:00
|
|
|
try:
|
2016-06-23 23:42:24 +03:00
|
|
|
fm._save(tmpfpath)
|
2016-06-17 19:35:39 +03:00
|
|
|
util.rename(tmpfpath, path)
|
|
|
|
return True
|
|
|
|
except EnvironmentError:
|
|
|
|
return False
|
|
|
|
finally:
|
|
|
|
try:
|
|
|
|
os.unlink(tmpfpath)
|
|
|
|
except OSError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
def __setitem__(self, hexnode, manifest):
|
|
|
|
self.setwithlimit(hexnode, manifest)
|
|
|
|
|
|
|
|
def __delitem__(self, hexnode):
|
|
|
|
path = self._pathfromnode(hexnode)
|
|
|
|
try:
|
|
|
|
os.unlink(path)
|
|
|
|
except EnvironmentError:
|
|
|
|
pass
|
|
|
|
|
|
|
|
def __getitem__(self, hexnode):
|
|
|
|
path = self._pathfromnode(hexnode)
|
|
|
|
try:
|
|
|
|
fm = cfastmanifest.fastmanifest.load(path)
|
|
|
|
# touch on access to make this cache a LRU cache
|
|
|
|
os.utime(path, None)
|
|
|
|
except EnvironmentError:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
return fastmanifestdict(fm)
|
|
|
|
|
|
|
|
def entrysize(self, hexnode):
|
|
|
|
try:
|
|
|
|
return os.path.getsize(self._pathfromnode(hexnode))
|
2016-05-26 22:34:38 +03:00
|
|
|
except EnvironmentError:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def totalsize(self, silent=True):
|
|
|
|
totalsize = 0
|
|
|
|
numentries = 0
|
|
|
|
for entry in self:
|
|
|
|
entrysize = self.entrysize(entry)
|
|
|
|
if entrysize == -1:
|
|
|
|
# Entry was deleted by another process
|
|
|
|
continue
|
|
|
|
totalsize += entrysize
|
|
|
|
numentries += 1
|
|
|
|
if not silent:
|
2016-06-17 19:35:39 +03:00
|
|
|
msg = "%s (size %s)\n" % (self.pathprefix + entry,
|
|
|
|
util.bytecount(entrysize))
|
|
|
|
self.ui.status(msg)
|
2016-05-26 22:34:38 +03:00
|
|
|
return totalsize, numentries
|
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
class CacheFullException(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class fastmanifestcache(object):
|
2016-07-13 10:02:48 +03:00
|
|
|
@staticmethod
|
|
|
|
def getinstance(opener, ui):
|
|
|
|
if not util.safehasattr(opener, 'fastmanifestcache'):
|
2016-06-17 19:35:39 +03:00
|
|
|
limit = cachemanager._systemawarecachelimit(opener=opener, ui=ui)
|
2016-07-13 10:02:48 +03:00
|
|
|
opener.fastmanifestcache = fastmanifestcache(opener, ui, limit)
|
|
|
|
return opener.fastmanifestcache
|
2016-06-17 19:35:39 +03:00
|
|
|
|
|
|
|
def __init__(self, opener, ui, limit):
|
|
|
|
self.ui = ui
|
|
|
|
if self.ui.configbool("fastmanifest", "silent"):
|
|
|
|
self.debug = _silent_debug
|
|
|
|
else:
|
|
|
|
self.debug = self.ui.debug
|
|
|
|
self.ondiskcache = ondiskcache(self.debug, opener, ui)
|
|
|
|
maxinmemoryentries = self.ui.config("fastmanifest",
|
|
|
|
"maxinmemoryentries",
|
|
|
|
DEFAULT_MAX_MEMORY_ENTRIES)
|
|
|
|
self.inmemorycache = util.lrucachedict(maxinmemoryentries)
|
|
|
|
self.limit = limit
|
|
|
|
|
[fastmanifest] refactor limit code
Summary:
Previously, depending on the code path, the limit specified would not actually take effect. For instance, if we came in from debugmanifestcache, and attempted to populated the cache, we would use `systemawarecachelimit` when filling the cache, and the fixedsize limit specified by the user when pruning.
With this change, we unify the all the cache limit decisions to `fastmanifestcache`. If the user actually overrides the limit, we set the limit in `fastmanifestcache` and let that make the decisions.
We also change the definitions of limit in `hg debugcachemanifest` to:
1) >0 => it's the limit.
2) =0 => use systemawarecachelimit
3) <0 => no limit!
Test Plan: pass existing unit tests. there's a small change in the test output, because we always evaluate the limit now, plus we remove the test for limit=0, since it means something different now.
Reviewers: lcharignon, durham
Reviewed By: durham
Subscribers: trunkagent, mitrandir, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3544997
Signature: t1:3544997:1468281604:8f78f00ebf2afd8f3f1fbefbd82316b97cc4b193
2016-07-12 03:33:37 +03:00
|
|
|
def overridelimit(self, limiter):
|
|
|
|
self.limit = limiter
|
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
def touch(self, hexnode, delay=0):
|
|
|
|
self.ondiskcache.touch(hexnode, delay)
|
|
|
|
|
|
|
|
def __getitem__(self, hexnode):
|
|
|
|
if hexnode in self.inmemorycache:
|
|
|
|
return self.inmemorycache[hexnode]
|
|
|
|
|
|
|
|
r = self.ondiskcache[hexnode]
|
|
|
|
if r:
|
|
|
|
self.inmemorycache[hexnode] = r
|
|
|
|
return r
|
|
|
|
|
|
|
|
def __contains__(self, hexnode):
|
2016-08-24 21:31:06 +03:00
|
|
|
if not self.ui.configbool("fastmanifest", "usecache", True):
|
|
|
|
return False
|
2016-06-17 19:35:39 +03:00
|
|
|
return hexnode in self.inmemorycache or hexnode in self.ondiskcache
|
|
|
|
|
|
|
|
def __setitem__(self, hexnode, manifest):
|
2016-06-22 03:07:46 +03:00
|
|
|
if hexnode in self.ondiskcache and hexnode in self.inmemorycache:
|
2016-06-17 19:35:39 +03:00
|
|
|
self.debug("[FM] skipped %s, already cached\n" % hexnode)
|
2016-05-26 22:34:38 +03:00
|
|
|
return
|
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
if self.limit:
|
|
|
|
if self.ondiskcache.totalsize()[0] > self.limit.bytes():
|
|
|
|
self.debug("[FM] skipped %s, cache full\n" % hexnode)
|
|
|
|
else:
|
|
|
|
self.debug("[FM] caching revision %s\n" % hexnode)
|
|
|
|
ret = self.ondiskcache.setwithlimit(hexnode, manifest,
|
|
|
|
self.limit.bytes())
|
|
|
|
else:
|
|
|
|
self.debug("[FM] caching revision %s\n" % hexnode)
|
|
|
|
self.ondiskcache[hexnode] = manifest
|
2016-06-22 03:07:46 +03:00
|
|
|
self.put_inmemory(hexnode, manifest)
|
2016-05-26 22:34:38 +03:00
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
def put_inmemory(self, hexnode, fmdict):
|
|
|
|
if hexnode not in self.inmemorycache:
|
|
|
|
self.inmemorycache[hexnode] = fmdict.copy()
|
2016-05-26 22:34:38 +03:00
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
def __iter__(self):
|
|
|
|
return self.ondiskcache.__iter__()
|
|
|
|
|
[fastmanifest] refactor limit code
Summary:
Previously, depending on the code path, the limit specified would not actually take effect. For instance, if we came in from debugmanifestcache, and attempted to populated the cache, we would use `systemawarecachelimit` when filling the cache, and the fixedsize limit specified by the user when pruning.
With this change, we unify the all the cache limit decisions to `fastmanifestcache`. If the user actually overrides the limit, we set the limit in `fastmanifestcache` and let that make the decisions.
We also change the definitions of limit in `hg debugcachemanifest` to:
1) >0 => it's the limit.
2) =0 => use systemawarecachelimit
3) <0 => no limit!
Test Plan: pass existing unit tests. there's a small change in the test output, because we always evaluate the limit now, plus we remove the test for limit=0, since it means something different now.
Reviewers: lcharignon, durham
Reviewed By: durham
Subscribers: trunkagent, mitrandir, mjpieters
Differential Revision: https://phabricator.intern.facebook.com/D3544997
Signature: t1:3544997:1468281604:8f78f00ebf2afd8f3f1fbefbd82316b97cc4b193
2016-07-12 03:33:37 +03:00
|
|
|
def prune(self):
|
2016-07-13 03:09:15 +03:00
|
|
|
return self.makeroomfor(0, set())
|
2016-05-26 22:34:38 +03:00
|
|
|
|
|
|
|
def pruneall(self):
|
2016-06-17 19:35:39 +03:00
|
|
|
for entry in reversed(list(self.ondiskcache)):
|
|
|
|
self.debug("[FM] removing cached manifest fast%s\n" % entry)
|
|
|
|
del self.ondiskcache[entry]
|
2016-05-26 22:34:38 +03:00
|
|
|
|
2016-07-12 03:34:01 +03:00
|
|
|
def makeroomfor(self, needed, excluded):
|
|
|
|
"""Make room on disk for a cache entry of size `needed`. Cache entries
|
|
|
|
in `excluded` are not subjected to removal.
|
|
|
|
"""
|
|
|
|
cacheentries = collections.deque(self.ondiskcache.items())
|
|
|
|
maxtotal = self.limit.bytes() - needed
|
|
|
|
|
|
|
|
while (len(cacheentries) > 0 and
|
|
|
|
self.ondiskcache.totalsize()[0] > maxtotal):
|
|
|
|
candidate = cacheentries.pop()
|
|
|
|
|
|
|
|
if candidate in excluded:
|
|
|
|
# it's immune, so skip it.
|
|
|
|
continue
|
|
|
|
|
2016-07-13 03:09:15 +03:00
|
|
|
self.debug("[FM] removing cached manifest fast%s\n" % (candidate,))
|
2016-07-12 03:34:01 +03:00
|
|
|
del self.ondiskcache[candidate]
|
|
|
|
|
|
|
|
|
2016-05-26 22:34:38 +03:00
|
|
|
class manifestfactory(object):
|
|
|
|
def __init__(self, ui):
|
|
|
|
self.ui = ui
|
|
|
|
|
|
|
|
def newmanifest(self, orig, *args, **kwargs):
|
|
|
|
return hybridmanifest(self.ui,
|
|
|
|
args[0].opener,
|
2016-06-07 20:25:49 +03:00
|
|
|
loadflat=lambda: orig(*args, **kwargs))
|
2016-05-26 22:34:38 +03:00
|
|
|
|
|
|
|
def read(self, orig, *args, **kwargs):
|
|
|
|
return hybridmanifest(self.ui,
|
|
|
|
args[0].opener,
|
2016-06-07 20:25:49 +03:00
|
|
|
loadflat=lambda: orig(*args, **kwargs),
|
2016-05-26 22:34:38 +03:00
|
|
|
node=args[1])
|
|
|
|
|
2016-06-08 20:46:11 +03:00
|
|
|
def add(self, orig, *args, **kwargs):
|
|
|
|
origself, m, transaction, link, p1, p2, added, removed = args[:8]
|
|
|
|
fastcache = fastmanifestcache.getinstance(origself.opener, self.ui)
|
|
|
|
|
|
|
|
p1hexnode = revlog.hex(p1)
|
2016-08-24 21:31:06 +03:00
|
|
|
cacheenabled = self.ui.configbool("fastmanifest", "usecache", True)
|
|
|
|
if (cacheenabled and
|
|
|
|
p1hexnode in fastcache and
|
2016-06-08 20:46:11 +03:00
|
|
|
isinstance(m, hybridmanifest) and
|
|
|
|
m._incache()):
|
|
|
|
# yay, we can satisfy this from the fastmanifest.
|
|
|
|
|
2016-06-17 19:35:39 +03:00
|
|
|
p1manifest = fastcache[p1hexnode]
|
2016-06-08 20:46:11 +03:00
|
|
|
|
|
|
|
manifest._checkforbidden(added)
|
|
|
|
# combine the changed lists into one sorted iterator
|
|
|
|
work = heapq.merge([(x, False) for x in added],
|
|
|
|
[(x, True) for x in removed])
|
|
|
|
|
|
|
|
# TODO: potential for optimization: avoid this silly conversion to a
|
|
|
|
# python array.
|
|
|
|
manifestarray = array.array('c', p1manifest.text())
|
|
|
|
|
|
|
|
arraytext, deltatext = m.fastdelta(manifestarray, work)
|
|
|
|
cachedelta = origself.rev(p1), deltatext
|
|
|
|
text = util.buffer(arraytext)
|
|
|
|
node = origself.addrevision(
|
|
|
|
text, transaction, link, p1, p2, cachedelta)
|
|
|
|
hexnode = revlog.hex(node)
|
|
|
|
fastcache.put_inmemory(hexnode,
|
|
|
|
m._cachedmanifest())
|
|
|
|
|
|
|
|
self.ui.debug("[FM] wrote manifest %s\n" % (hexnode,))
|
|
|
|
|
|
|
|
return node
|
|
|
|
else:
|
|
|
|
return orig(*args, **kwargs)
|
|
|
|
|
2016-05-26 22:34:38 +03:00
|
|
|
def _silent_debug(*args, **kwargs):
|
|
|
|
"""Replacement for ui.debug that silently swallows the arguments.
|
|
|
|
Typically enabled when running the mercurial test suite by setting:
|
|
|
|
--extra-config-opt=fastmanifest.silent=True"""
|
|
|
|
pass
|