mirror of
https://github.com/facebook/sapling.git
synced 2024-10-07 07:17:55 +03:00
treemanifest: lazily load manifests
Most operations on treemanifests already visit only relevant submanifests. Notable examples include __getitem__, __contains__, walk/matches with matcher, diff. By making submanifests lazily loaded, we speed up all these operations. The lazy loading is achieved by adding a _load() method that gets defined where we currently eagerly parse the manifest. We make sure to call it before any access to _dirs, _files or _flags. Some timings on the Mozilla repo (with flat manifest timings for reference): hg cat -r . README.txt: 1.644s -> 0.096s (0.255s) hg diff -r .^ -r . : 1.746s -> 0.137s (0.431s) hg files -r . python : 1.508s -> 0.146s (0.335s) hg files -r . : 2.125s -> 2.203s (0.712s)
This commit is contained in:
parent
61642a4536
commit
e9f7136157
@ -441,10 +441,13 @@ def _splittopdir(f):
|
||||
else:
|
||||
return '', f
|
||||
|
||||
_noop = lambda: None
|
||||
|
||||
class treemanifest(object):
|
||||
def __init__(self, dir='', text=''):
|
||||
self._dir = dir
|
||||
self._node = revlog.nullid
|
||||
self._load = _noop
|
||||
self._dirty = False
|
||||
self._dirs = {}
|
||||
# Using _lazymanifest here is a little slower than plain old dicts
|
||||
@ -461,18 +464,22 @@ class treemanifest(object):
|
||||
return self._dir + path
|
||||
|
||||
def __len__(self):
|
||||
self._load()
|
||||
size = len(self._files)
|
||||
for m in self._dirs.values():
|
||||
size += m.__len__()
|
||||
return size
|
||||
|
||||
def _isempty(self):
|
||||
self._load() # for consistency; already loaded by all callers
|
||||
return (not self._files and (not self._dirs or
|
||||
all(m._isempty() for m in self._dirs.values())))
|
||||
|
||||
def __str__(self):
|
||||
return ('<treemanifest dir=%s, node=%s, dirty=%s>' %
|
||||
(self._dir, revlog.hex(self._node), self._dirty))
|
||||
return ('<treemanifest dir=%s, node=%s, loaded=%s, dirty=%s>' %
|
||||
(self._dir, revlog.hex(self._node),
|
||||
bool(self._load is _noop),
|
||||
self._dirty))
|
||||
|
||||
def dir(self):
|
||||
'''The directory that this tree manifest represents, including a
|
||||
@ -491,6 +498,7 @@ class treemanifest(object):
|
||||
self._dirty = False
|
||||
|
||||
def iteritems(self):
|
||||
self._load()
|
||||
for p, n in sorted(self._dirs.items() + self._files.items()):
|
||||
if p in self._files:
|
||||
yield self._subpath(p), n
|
||||
@ -499,6 +507,7 @@ class treemanifest(object):
|
||||
yield f, sn
|
||||
|
||||
def iterkeys(self):
|
||||
self._load()
|
||||
for p in sorted(self._dirs.keys() + self._files.keys()):
|
||||
if p in self._files:
|
||||
yield self._subpath(p)
|
||||
@ -515,6 +524,7 @@ class treemanifest(object):
|
||||
def __contains__(self, f):
|
||||
if f is None:
|
||||
return False
|
||||
self._load()
|
||||
dir, subpath = _splittopdir(f)
|
||||
if dir:
|
||||
if dir not in self._dirs:
|
||||
@ -524,6 +534,7 @@ class treemanifest(object):
|
||||
return f in self._files
|
||||
|
||||
def get(self, f, default=None):
|
||||
self._load()
|
||||
dir, subpath = _splittopdir(f)
|
||||
if dir:
|
||||
if dir not in self._dirs:
|
||||
@ -533,6 +544,7 @@ class treemanifest(object):
|
||||
return self._files.get(f, default)
|
||||
|
||||
def __getitem__(self, f):
|
||||
self._load()
|
||||
dir, subpath = _splittopdir(f)
|
||||
if dir:
|
||||
return self._dirs[dir].__getitem__(subpath)
|
||||
@ -540,6 +552,7 @@ class treemanifest(object):
|
||||
return self._files[f]
|
||||
|
||||
def flags(self, f):
|
||||
self._load()
|
||||
dir, subpath = _splittopdir(f)
|
||||
if dir:
|
||||
if dir not in self._dirs:
|
||||
@ -551,6 +564,7 @@ class treemanifest(object):
|
||||
return self._flags.get(f, '')
|
||||
|
||||
def find(self, f):
|
||||
self._load()
|
||||
dir, subpath = _splittopdir(f)
|
||||
if dir:
|
||||
return self._dirs[dir].find(subpath)
|
||||
@ -558,6 +572,7 @@ class treemanifest(object):
|
||||
return self._files[f], self._flags.get(f, '')
|
||||
|
||||
def __delitem__(self, f):
|
||||
self._load()
|
||||
dir, subpath = _splittopdir(f)
|
||||
if dir:
|
||||
self._dirs[dir].__delitem__(subpath)
|
||||
@ -572,6 +587,7 @@ class treemanifest(object):
|
||||
|
||||
def __setitem__(self, f, n):
|
||||
assert n is not None
|
||||
self._load()
|
||||
dir, subpath = _splittopdir(f)
|
||||
if dir:
|
||||
if dir not in self._dirs:
|
||||
@ -584,6 +600,7 @@ class treemanifest(object):
|
||||
def setflag(self, f, flags):
|
||||
"""Set the flags (symlink, executable) for path f."""
|
||||
assert 'd' not in flags
|
||||
self._load()
|
||||
dir, subpath = _splittopdir(f)
|
||||
if dir:
|
||||
if dir not in self._dirs:
|
||||
@ -597,10 +614,19 @@ class treemanifest(object):
|
||||
copy = treemanifest(self._dir)
|
||||
copy._node = self._node
|
||||
copy._dirty = self._dirty
|
||||
for d in self._dirs:
|
||||
copy._dirs[d] = self._dirs[d].copy()
|
||||
copy._files = dict.copy(self._files)
|
||||
copy._flags = dict.copy(self._flags)
|
||||
def _load():
|
||||
self._load()
|
||||
for d in self._dirs:
|
||||
copy._dirs[d] = self._dirs[d].copy()
|
||||
copy._files = dict.copy(self._files)
|
||||
copy._flags = dict.copy(self._flags)
|
||||
copy._load = _noop
|
||||
copy._load = _load
|
||||
if self._load == _noop:
|
||||
# Chaining _load if it's _noop is functionally correct, but the
|
||||
# chain may end up excessively long (stack overflow), and
|
||||
# will prevent garbage collection of 'self'.
|
||||
copy._load()
|
||||
return copy
|
||||
|
||||
def filesnotin(self, m2):
|
||||
@ -609,6 +635,8 @@ class treemanifest(object):
|
||||
def _filesnotin(t1, t2):
|
||||
if t1._node == t2._node and not t1._dirty and not t2._dirty:
|
||||
return
|
||||
t1._load()
|
||||
t2._load()
|
||||
for d, m1 in t1._dirs.iteritems():
|
||||
if d in t2._dirs:
|
||||
m2 = t2._dirs[d]
|
||||
@ -631,6 +659,7 @@ class treemanifest(object):
|
||||
return self._alldirs
|
||||
|
||||
def hasdir(self, dir):
|
||||
self._load()
|
||||
topdir, subdir = _splittopdir(dir)
|
||||
if topdir:
|
||||
if topdir in self._dirs:
|
||||
@ -673,6 +702,7 @@ class treemanifest(object):
|
||||
return
|
||||
|
||||
# yield this dir's files and walk its submanifests
|
||||
self._load()
|
||||
for p in sorted(self._dirs.keys() + self._files.keys()):
|
||||
if p in self._files:
|
||||
fullp = self._subpath(p)
|
||||
@ -697,6 +727,7 @@ class treemanifest(object):
|
||||
if not match.visitdir(self._dir[:-1] or '.'):
|
||||
return ret
|
||||
|
||||
self._load()
|
||||
for fn in self._files:
|
||||
fullp = self._subpath(fn)
|
||||
if not match(fullp):
|
||||
@ -734,6 +765,8 @@ class treemanifest(object):
|
||||
def _diff(t1, t2):
|
||||
if t1._node == t2._node and not t1._dirty and not t2._dirty:
|
||||
return
|
||||
t1._load()
|
||||
t2._load()
|
||||
for d, m1 in t1._dirs.iteritems():
|
||||
m2 = t2._dirs.get(d, emptytree)
|
||||
_diff(m1, m2)
|
||||
@ -784,6 +817,7 @@ class treemanifest(object):
|
||||
|
||||
def text(self, usemanifestv2=False):
|
||||
"""Get the full data of this manifest as a bytestring."""
|
||||
self._load()
|
||||
flags = self.flags
|
||||
return _text(((f, self[f], flags(f)) for f in self.keys()),
|
||||
usemanifestv2)
|
||||
@ -792,12 +826,23 @@ class treemanifest(object):
|
||||
"""Get the full data of this directory as a bytestring. Make sure that
|
||||
any submanifests have been written first, so their nodeids are correct.
|
||||
"""
|
||||
self._load()
|
||||
flags = self.flags
|
||||
dirs = [(d[:-1], self._dirs[d]._node, 'd') for d in self._dirs]
|
||||
files = [(f, self._files[f], flags(f)) for f in self._files]
|
||||
return _text(sorted(dirs + files), usemanifestv2)
|
||||
|
||||
def read(self, gettext, readsubtree):
|
||||
def _load():
|
||||
# Mark as loaded already here, so __setitem__ and setflag() don't
|
||||
# cause infinite loops when they try to load.
|
||||
self._load = _noop
|
||||
self.parse(gettext(), readsubtree)
|
||||
self._dirty = False
|
||||
self._load = _load
|
||||
|
||||
def writesubtrees(self, m1, m2, writesubtree):
|
||||
self._load() # for consistency; should never have any effect here
|
||||
emptytree = treemanifest()
|
||||
for d, subm in self._dirs.iteritems():
|
||||
subp1 = m1._dirs.get(d, emptytree)._node
|
||||
@ -891,15 +936,17 @@ class manifest(revlog.revlog):
|
||||
return self._newmanifest() # don't upset local cache
|
||||
if node in self._mancache:
|
||||
return self._mancache[node][0]
|
||||
text = self.revision(node)
|
||||
if self._treeondisk:
|
||||
def gettext():
|
||||
return self.revision(node)
|
||||
def readsubtree(dir, subm):
|
||||
return self.dirlog(dir).read(subm)
|
||||
m = self._newmanifest()
|
||||
m.parse(text, readsubtree)
|
||||
m.read(gettext, readsubtree)
|
||||
m.setnode(node)
|
||||
arraytext = None
|
||||
else:
|
||||
text = self.revision(node)
|
||||
m = self._newmanifest(text)
|
||||
arraytext = array.array('c', text)
|
||||
self._mancache[node] = (m, arraytext)
|
||||
|
@ -78,9 +78,11 @@ Removing directory does not create an revlog entry
|
||||
$ rm before after
|
||||
|
||||
Check that hg files (calls treemanifest.walk()) works
|
||||
without loading all directory revlogs
|
||||
|
||||
$ hg co 'desc("add dir2")'
|
||||
2 files updated, 0 files merged, 0 files removed, 0 files unresolved
|
||||
$ mv .hg/store/meta/dir2 .hg/store/meta/dir2-backup
|
||||
$ hg files -r . dir1
|
||||
dir1/a (glob)
|
||||
dir1/b (glob)
|
||||
@ -90,12 +92,14 @@ Check that hg files (calls treemanifest.walk()) works
|
||||
dir1/dir2/b (glob)
|
||||
|
||||
Check that status between revisions works (calls treemanifest.matches())
|
||||
without loading all directory revlogs
|
||||
|
||||
$ hg status --rev 'desc("add dir1")' --rev . dir1
|
||||
A dir1/dir1/a
|
||||
A dir1/dir1/b
|
||||
A dir1/dir2/a
|
||||
A dir1/dir2/b
|
||||
$ mv .hg/store/meta/dir2-backup .hg/store/meta/dir2
|
||||
|
||||
Merge creates 2-parent revision of directory revlog
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user