sapling/treemanifest/__init__.py

346 lines
12 KiB
Python
Raw Normal View History

# __init__.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""allows using and migrating to tree manifests
When autocreatetrees is enabled, you can limit which bookmarks are initially
converted to trees during pull by specifying `treemanifest.allowedtreeroots`.
[treemanifest]
allowedtreeroots = master,stable
Enabling `treemanifest.usecunionstore` will cause the extension to use the
native implementation of the datapack stores.
[treemanifest]
usecunionstore = True
"""
from mercurial import (
changegroup,
cmdutil,
error,
extensions,
localrepo,
mdiff,
util,
)
from mercurial.i18n import _
from mercurial.node import bin, nullid
from remotefilelog.contentstore import unioncontentstore
from remotefilelog.datapack import datapackstore, mutabledatapack
from remotefilelog.historypack import historypackstore, mutablehistorypack
from remotefilelog import shallowutil
import cstore
import struct
cmdtable = {}
command = cmdutil.command(cmdtable)
PACK_CATEGORY='manifests'
def extsetup(ui):
extensions.wrapfunction(changegroup.cg1unpacker, '_unpackmanifests',
_unpackmanifests)
def reposetup(ui, repo):
wraprepo(repo)
def wraprepo(repo):
if not isinstance(repo, localrepo.localrepository):
return
repo.name = repo.ui.config('remotefilelog', 'reponame')
if not repo.name:
raise error.Abort(_("remotefilelog.reponame must be configured"))
try:
extensions.find('fastmanifest')
except KeyError:
raise error.Abort(_("cannot use treemanifest without fastmanifest"))
usecdatapack = repo.ui.configbool('remotefilelog', 'fastdatapack')
packpath = shallowutil.getcachepackpath(repo, PACK_CATEGORY)
localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
PACK_CATEGORY)
if repo.ui.configbool("treemanifest", "usecunionstore"):
datastore = cstore.datapackstore(packpath)
localdatastore = cstore.datapackstore(localpackpath)
repo.svfs.manifestdatastore = cstore.uniondatapackstore(
[localdatastore, datastore])
else:
datastore = datapackstore(repo.ui, packpath, usecdatapack=usecdatapack)
localdatastore = datapackstore(repo.ui, localpackpath,
usecdatapack=usecdatapack)
repo.svfs.manifestdatastore = unioncontentstore(localdatastore,
datastore, writestore=localdatastore)
repo.svfs.sharedmanifestdatastores = [datastore]
repo.svfs.localmanifestdatastores = [localdatastore]
repo.svfs.sharedmanifesthistorystores = [
historypackstore(repo.ui, packpath),
]
repo.svfs.localmanifesthistorystores = [
historypackstore(repo.ui, localpackpath),
]
def _unpackmanifests(orig, self, repo, *args, **kwargs):
mfrevlog = repo.manifestlog._revlog
oldtip = len(mfrevlog)
orig(self, repo, *args, **kwargs)
if (util.safehasattr(repo.svfs, "manifestdatastore") and
repo.ui.configbool('treemanifest', 'autocreatetrees')):
# TODO: only put in cache if pulling from main server
packpath = shallowutil.getcachepackpath(repo, PACK_CATEGORY)
with mutabledatapack(repo.ui, packpath) as dpack:
with mutablehistorypack(repo.ui, packpath) as hpack:
recordmanifest(dpack, hpack, repo, oldtip, len(mfrevlog))
# Alert the store that there may be new packs
repo.svfs.manifestdatastore.markforrefresh()
class InterceptedMutableDataPack(object):
"""This classes intercepts data pack writes and replaces the node for the
root with the provided node. This is useful for forcing a tree manifest to
be referencable via its flat hash.
"""
def __init__(self, pack, node, p1node):
self._pack = pack
self._node = node
self._p1node = p1node
def add(self, name, node, deltabasenode, delta):
# For the root node, provide the flat manifest as the key
if name == "":
node = self._node
if deltabasenode != nullid:
deltabasenode = self._p1node
return self._pack.add(name, node, deltabasenode, delta)
class InterceptedMutableHistoryPack(object):
"""This classes intercepts history pack writes and does two things:
1. replaces the node for the root with the provided node. This is
useful for forcing a tree manifest to be referencable via its flat hash.
2. Records the adds instead of sending them on. Since mutablehistorypack
requires all entries for a file to be written contiguously, we need to
record all the writes across the manifest import before sending them to
the actual mutablehistorypack.
"""
def __init__(self, node, p1node):
self._node = node
self._p1node = p1node
self.entries = []
def add(self, filename, node, p1, p2, linknode, copyfrom):
# For the root node, provide the flat manifest as the key
if filename == "":
node = self._node
if p1 != nullid:
p1 = self._p1node
self.entries.append((filename, node, p1, p2, linknode, copyfrom))
def recordmanifest(datapack, historypack, repo, oldtip, newtip):
cl = repo.changelog
mfl = repo.manifestlog
mfrevlog = mfl._revlog
total = newtip - oldtip
ui = repo.ui
builttrees = {}
message = _('priming tree cache')
ui.progress(message, 0, total=total)
refcount = {}
for rev in xrange(oldtip, newtip):
p1 = mfrevlog.parentrevs(rev)[0]
p1node = mfrevlog.node(p1)
refcount[p1node] = refcount.get(p1node, 0) + 1
allowedtreeroots = set()
for name in repo.ui.configlist('treemanifest', 'allowedtreeroots'):
if name in repo:
allowedtreeroots.add(repo[name].manifestnode())
includedentries = set()
historyentries = {}
for rev in xrange(oldtip, newtip):
ui.progress(message, rev - oldtip, total=total)
p1, p2 = mfrevlog.parentrevs(rev)
p1node = mfrevlog.node(p1)
p2node = mfrevlog.node(p2)
linkrev = mfrevlog.linkrev(rev)
linknode = cl.node(linkrev)
if p1node == nullid:
origtree = cstore.treemanifest(repo.svfs.manifestdatastore)
elif p1node in builttrees:
origtree = builttrees[p1node]
else:
origtree = mfl[p1node].read()._treemanifest()
if origtree is None:
if allowedtreeroots and p1node not in allowedtreeroots:
continue
p1mf = mfl[p1node].read()
p1linknode = cl.node(mfrevlog.linkrev(p1))
origtree = cstore.treemanifest(repo.svfs.manifestdatastore)
for filename, node, flag in p1mf.iterentries():
origtree.set(filename, node, flag)
tempdatapack = InterceptedMutableDataPack(datapack, p1node, nullid)
temphistorypack = InterceptedMutableHistoryPack(p1node, nullid)
for nname, nnode, ntext, np1text, np1, np2 in origtree.finalize():
# No need to compute a delta, since we know the parent isn't
# already a tree.
tempdatapack.add(nname, nnode, nullid, ntext)
temphistorypack.add(nname, nnode, np1, np2, p1linknode, '')
includedentries.add((nname, nnode))
builttrees[p1node] = origtree
# Remove the tree from the cache once we've processed its final use.
# Otherwise memory explodes
p1refcount = refcount[p1node] - 1
if p1refcount == 0:
builttrees.pop(p1node, None)
refcount[p1node] = p1refcount
if p2node != nullid:
node = mfrevlog.node(rev)
diff = mfl[p1node].read().diff(mfl[node].read())
deletes = []
adds = []
for filename, ((anode, aflag), (bnode, bflag)) in diff.iteritems():
if bnode is None:
deletes.append(filename)
else:
adds.append((filename, bnode, bflag))
else:
# This will generally be very quick, since p1 == deltabase
delta = mfrevlog.revdiff(p1, rev)
deletes = []
adds = []
# Inspect the delta and read the added files from it
current = 0
end = len(delta)
while current < end:
try:
block = ''
# Deltas are of the form:
# <start><end><datalen><data>
# Where start and end say what bytes to delete, and data
# says what bytes to insert in their place. So we can just
# read <data> to figure out all the added files.
byte1, byte2, blocklen = struct.unpack(">lll",
delta[current:current + 12])
current += 12
if blocklen:
block = delta[current:current + blocklen]
current += blocklen
except struct.error:
raise RuntimeError("patch cannot be decoded")
# An individual delta block may contain multiple newline
# delimited entries.
for line in block.split('\n'):
if not line:
continue
fname, rest = line.split('\0')
fnode = rest[:40]
fflag = rest[40:]
adds.append((fname, bin(fnode), fflag))
allfiles = set(repo.changelog.readfiles(linkrev))
deletes = allfiles.difference(fname for fname, fnode, fflag in adds)
# Apply the changes on top of the parent tree
newtree = origtree.copy()
for fname in deletes:
newtree.set(fname, None, None)
for fname, fnode, fflags in adds:
newtree.set(fname, fnode, fflags)
tempdatapack = InterceptedMutableDataPack(datapack, mfrevlog.node(rev),
p1node)
temphistorypack = InterceptedMutableHistoryPack(mfrevlog.node(rev),
p1node)
newtreeiter = newtree.finalize(origtree if p1node != nullid else None)
for nname, nnode, ntext, np1text, np1, np2 in newtreeiter:
# Only use deltas if the delta base is in this same pack file
if np1 != nullid and (nname, np1) in includedentries:
delta = mdiff.textdiff(np1text, ntext)
deltabase = np1
else:
delta = ntext
deltabase = nullid
tempdatapack.add(nname, nnode, deltabase, delta)
temphistorypack.add(nname, nnode, np1, np2, linknode, '')
includedentries.add((nname, nnode))
for entry in temphistorypack.entries:
filename, values = entry[0], entry[1:]
historyentries.setdefault(filename, []).append(values)
if ui.configbool('treemanifest', 'verifyautocreate', False):
diff = newtree.diff(origtree)
if len(diff) != len(adds) + len(deletes):
import pdb
pdb.set_trace()
for fname in deletes:
fdiff = diff.get(fname)
if fdiff is None:
import pdb
pdb.set_trace()
pass
else:
l, r = fdiff
if l != (None, ''):
import pdb
pdb.set_trace()
pass
for fname, fnode, fflags in adds:
fdiff = diff.get(fname)
if fdiff is None:
# Sometimes adds are no-ops, so they don't show up in the
# diff.
if origtree.get(fname) != newtree.get(fname):
import pdb
pdb.set_trace()
pass
else:
l, r = fdiff
if l != (fnode, fflags):
import pdb
pdb.set_trace()
pass
builttrees[mfrevlog.node(rev)] = newtree
mfnode = mfrevlog.node(rev)
if refcount.get(mfnode) > 0:
builttrees[mfnode] = newtree
ui.progress(message, None)
for filename, entries in sorted(historyentries.iteritems()):
for entry in entries:
historypack.add(filename, *entry)