treemanifest: move tree delta logic up to python layer

Summary:
Previously the treemanifest code itself would create the text deltas when
writing a tree out. This meant we couldn't make the delta decision based on
other data, like if the p1 commit was in the same pack file.

This patch removes treemanifest.write() and moves all calls over to
treemanifest.finalize() which gives the python/pack layer control over delta
choices. A future patch will use this to ensure tree packs always contain
complete delta chains.

Test Plan: All tests pass

Reviewers: #mercurial, simonfar

Reviewed By: simonfar

Subscribers: mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D4645942

Signature: t1:4645942:1488880851:d0c8c902e7e849072a53344630a9184b6d8e1e7f
This commit is contained in:
Durham Goode 2017-03-07 11:15:25 -08:00
parent cc11141ce5
commit 5bc368c71d
4 changed files with 58 additions and 132 deletions

View File

@ -1183,122 +1183,6 @@ static PyObject *treemanifest_walk(py_treemanifest *self, PyObject *args) {
matcher);
}
void writestore(Manifest *mainManifest, const std::vector<char*> &cmpNodes,
const std::vector<Manifest*> &cmpManifests,
PythonObj &datapack, PythonObj &historypack,
char *linknode, const ManifestFetcher &fetcher,
bool useDeltas) {
NewTreeIterator iterator(mainManifest, cmpNodes, cmpManifests, fetcher);
PythonObj mdiff = PyImport_ImportModule("mercurial.mdiff");
std::string *path = NULL;
ManifestNode *result = NULL;
ManifestNode *p1 = NULL;
ManifestNode *p2 = NULL;
std::string raw;
std::string p1raw;
while (iterator.next(&path, &result, &p1, &p2)) {
result->manifest->serialize(raw);
char *deltanode = (char*)NULLID;
if (!useDeltas || memcmp(p1->node, NULLID, BIN_NODE_SIZE) == 0) {
p1raw.erase();
} else {
p1->manifest->serialize(p1raw);
deltanode = p1->node;
PythonObj deltaArgs = Py_BuildValue("(s#s#)",
p1raw.c_str(), (Py_ssize_t)p1raw.size(),
raw.c_str(), (Py_ssize_t)raw.size());
PythonObj delta = mdiff.callmethod("textdiff", deltaArgs);
char *deltabytes;
Py_ssize_t deltabytelen;
if (PyString_AsStringAndSize(delta, &deltabytes, &deltabytelen)) {
throw pyexception();
}
raw.assign(deltabytes, deltabytelen);
}
PythonObj dataargs = Py_BuildValue("(s#s#s#s#)",
path->c_str(), (Py_ssize_t)path->size(),
result->node, (Py_ssize_t)BIN_NODE_SIZE,
deltanode, (Py_ssize_t)BIN_NODE_SIZE,
raw.c_str(), (Py_ssize_t)raw.size());
datapack.callmethod("add", dataargs);
PythonObj historyargs = Py_BuildValue("(s#s#s#s#s#s#)",
path->c_str(), (Py_ssize_t)path->size(), // filename
result->node, (Py_ssize_t)BIN_NODE_SIZE, // node
p1->node, (Py_ssize_t)BIN_NODE_SIZE, // p1
p2->node, (Py_ssize_t)BIN_NODE_SIZE, // p2
linknode, (Py_ssize_t)BIN_NODE_SIZE, // linknode
Py_None, (Py_ssize_t)0 // copyfrom
);
historypack.callmethod("add", historyargs);
}
}
static PyObject *treemanifest_write(py_treemanifest *self, PyObject *args,
PyObject *kwargs) {
PyObject *datapackObj;
PyObject *historypackObj;
PyObject *p1treeObj = NULL;
PyObject *useDeltaObj = NULL;
char *linknode = NULL;
Py_ssize_t linknodesize = 0;
static char const *kwlist[] = {"datapack", "historypack", "linknode", "p1tree", "useDeltas", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OOs#|OO", (char**)kwlist,
&datapackObj, &historypackObj, &linknode,
&linknodesize, &p1treeObj, &useDeltaObj)) {
return NULL;
}
if (linknodesize != BIN_NODE_SIZE) {
PyErr_SetString(PyExc_ValueError, "linknode must be 20 bytes");
return NULL;
}
py_treemanifest *p1tree = NULL;
if (p1treeObj && p1treeObj != Py_None) {
p1tree = (py_treemanifest*)p1treeObj;
}
// ParseTuple doesn't increment the ref, but the PythonObj will decrement on
// destruct, so let's increment now.
Py_INCREF(datapackObj);
PythonObj datapack = datapackObj;
Py_INCREF(historypackObj);
PythonObj historypack = historypackObj;
bool useDeltas = useDeltaObj ? PyObject_IsTrue(useDeltaObj) : true;
try {
std::vector<char*> cmpNodes;
std::vector<Manifest*> cmpManifests;
if (p1tree) {
assert(p1tree->tm.root.node);
cmpNodes.push_back(p1tree->tm.root.node);
cmpManifests.push_back(p1tree->tm.getRootManifest());
}
writestore(self->tm.getRootManifest(), cmpNodes, cmpManifests, datapack, historypack,
linknode, self->tm.fetcher, useDeltas);
char tempnode[20];
self->tm.getRootManifest()->computeNode(p1tree ? binfromhex(p1tree->tm.root.node).c_str() : NULLID, NULLID, tempnode);
std::string hexnode;
hexfrombin(tempnode, hexnode);
self->tm.root.update(hexnode.c_str(), MANIFEST_DIRECTORY_FLAGPTR);
return PyString_FromStringAndSize(tempnode, BIN_NODE_SIZE);
} catch (const pyexception &ex) {
return NULL;
}
}
static PyObject *treemanifest_finalize(py_treemanifest *self, PyObject *args,
PyObject *kwargs) {
PyObject *p1treeObj = NULL;
@ -1376,8 +1260,6 @@ static PyMethodDef treemanifest_methods[] = {
"returns the text form of the manifest"},
{"walk", (PyCFunction)treemanifest_walk, METH_VARARGS,
"returns a iterator for walking the manifest"},
{"write", (PyCFunction)treemanifest_write, METH_VARARGS|METH_KEYWORDS,
"writes any pending tree changes to the given store"},
{"finalize", (PyCFunction)treemanifest_finalize, METH_VARARGS|METH_KEYWORDS,
"Returns an iterator that outputs each piece of the tree that is new."
"When the iterator completes, the tree is marked as immutable."},

View File

@ -1002,7 +1002,14 @@ class manifestfactory(object):
node, p1)
hpack = treemanifest.InterceptedMutableHistoryPack(
node, p1)
newtree.write(dpack, hpack, revlog.nullid, p1tree=tree)
newtreeiter = newtree.finalize(tree)
for nname, nnode, ntext, np1text, np1, np2 in newtreeiter:
if np1 != revlog.nullid:
delta = mdiff.textdiff(np1text, ntext)
else:
delta = ntext
dpack.add(nname, nnode, np1, delta)
hpack.add(nname, nnode, np1, np2, revlog.nullid, '')
treemanifestcache.getinstance(origself.opener,
self.ui)[node] = newtree

View File

@ -252,7 +252,11 @@ class ctreemanifesttests(unittest.TestCase):
dstore = FakeDataStore()
hstore = FakeHistoryStore()
anode = a.write(dstore, hstore, alinknode)
for name, node, text, p1text, p1, p2 in a.finalize():
dstore.add(name, node, nullid, text)
hstore.add(name, node, p1, p2, alinknode, '')
if not name:
anode = node
a2 = cstore.treemanifest(dstore, anode)
self.assertEquals(list(a.iterentries()), list(a2.iterentries()))
@ -265,7 +269,11 @@ class ctreemanifesttests(unittest.TestCase):
b.set("abc/z", *hashflags())
blinknode = hashflags()[0]
bnode = b.write(dstore, hstore, blinknode)
for name, node, text, p1text, p1, p2 in b.finalize():
dstore.add(name, node, nullid, text)
hstore.add(name, node, p1, p2, blinknode, '')
if not name:
bnode = node
b2 = cstore.treemanifest(dstore, bnode)
self.assertEquals(list(b.iterentries()), list(b2.iterentries()))
@ -282,14 +290,22 @@ class ctreemanifesttests(unittest.TestCase):
dstore = FakeDataStore()
hstore = FakeHistoryStore()
anode = a.write(dstore, hstore, alinknode)
for name, node, text, p1text, p1, p2 in a.finalize():
dstore.add(name, node, nullid, text)
hstore.add(name, node, p1, p2, alinknode, '')
if not name:
anode = node
b = a.copy()
b.set("abc/a", None, None)
b.set("abc/a/foo", *hashflags())
blinknode = hashflags()[0]
bnode = b.write(dstore, hstore, blinknode, p1tree=a, useDeltas=False)
for name, node, text, p1text, p1, p2 in b.finalize(a):
dstore.add(name, node, nullid, text)
hstore.add(name, node, p1, p2, blinknode, '')
if not name:
bnode = node
b2 = cstore.treemanifest(dstore, bnode)
self.assertEquals(list(b.iterentries()), list(b2.iterentries()))
@ -386,8 +402,12 @@ class ctreemanifesttests(unittest.TestCase):
# - committed trees
dstore = FakeDataStore()
hstore = FakeHistoryStore()
a.write(dstore, hstore, alinknode)
b.write(dstore, hstore, blinknode)
for name, node, text, p1text, p1, p2 in a.finalize():
dstore.add(name, node, nullid, text)
hstore.add(name, node, p1, p2, alinknode, '')
for name, node, text, p1text, p1, p2 in b.finalize():
dstore.add(name, node, nullid, text)
hstore.add(name, node, p1, p2, blinknode, '')
diff = a.diff(b)
self.assertEquals(diff, {})
@ -405,8 +425,12 @@ class ctreemanifesttests(unittest.TestCase):
})
# - committed trees
a.write(dstore, hstore, alinknode)
b.write(dstore, hstore, blinknode)
for name, node, text, p1text, p1, p2 in a.finalize():
dstore.add(name, node, nullid, text)
hstore.add(name, node, p1, p2, alinknode, '')
for name, node, text, p1text, p1, p2 in b.finalize():
dstore.add(name, node, nullid, text)
hstore.add(name, node, p1, p2, blinknode, '')
diff = a.diff(b)
self.assertEquals(diff, {

View File

@ -27,6 +27,7 @@ from mercurial import (
error,
extensions,
localrepo,
mdiff,
util,
)
from mercurial.i18n import _
@ -191,9 +192,15 @@ def recordmanifest(datapack, historypack, repo, oldtip, newtip):
origtree = cstore.treemanifest(repo.svfs.manifestdatastore)
for filename, node, flag in p1mf.iterentries():
origtree.set(filename, node, flag)
origtree.write(InterceptedMutableDataPack(datapack, p1node, nullid),
InterceptedMutableHistoryPack(p1node, nullid),
p1linknode)
tempdatapack = InterceptedMutableDataPack(datapack, p1node, nullid)
temphistorypack = InterceptedMutableHistoryPack(p1node, nullid)
for nname, nnode, ntext, np1text, np1, np2 in origtree.finalize():
# No need to compute a delta, since we know the parent isn't
# already a tree.
tempdatapack.add(nname, nnode, nullid, ntext)
temphistorypack.add(nname, nnode, np1, np2, p1linknode, '')
builttrees[p1node] = origtree
# Remove the tree from the cache once we've processed its final use.
@ -265,8 +272,14 @@ def recordmanifest(datapack, historypack, repo, oldtip, newtip):
p1node)
temphistorypack = InterceptedMutableHistoryPack(mfrevlog.node(rev),
p1node)
newtree.write(tempdatapack, temphistorypack, linknode,
p1tree=origtree if p1node != nullid else None)
newtreeiter = newtree.finalize(origtree if p1node != nullid else None)
for nname, nnode, ntext, np1text, np1, np2 in newtreeiter:
if np1 != nullid:
delta = mdiff.textdiff(np1text, ntext)
else:
delta = ntext
tempdatapack.add(nname, nnode, np1, delta)
temphistorypack.add(nname, nnode, np1, np2, linknode, '')
for entry in temphistorypack.entries:
filename, values = entry[0], entry[1:]