mirror of
https://github.com/facebook/sapling.git
synced 2024-10-09 16:31:02 +03:00
ctree: implement treemanifest.write
Summary: This adds a new write() function to treemanifest that allows us to serialize a treemanifest instance into a provided data store. Test Plan: A future diff uses this to serialize trees into a pack file. Reviewers: #fastmanifest Differential Revision: https://phabricator.intern.facebook.com/D3838787
This commit is contained in:
parent
3dd98f3934
commit
29486895ce
@ -17,6 +17,8 @@
|
||||
#define HEX_NODE_SIZE 40
|
||||
#define BIN_NODE_SIZE 20
|
||||
|
||||
const char *const NULLID = "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
|
||||
|
||||
static int8_t hextable[256] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
|
@ -688,6 +688,64 @@ static PyObject *treemanifest_walk(py_treemanifest *self, PyObject *args) {
|
||||
matcher);
|
||||
}
|
||||
|
||||
void writestore(Manifest *mainManifest, const std::vector<char*> &cmpNodes,
|
||||
const std::vector<Manifest*> &cmpManifests,
|
||||
PythonObj &pack, const ManifestFetcher &fetcher) {
|
||||
NewTreeIterator iterator(mainManifest, cmpNodes, cmpManifests, fetcher);
|
||||
|
||||
std::string *path = NULL;
|
||||
Manifest *result = NULL;
|
||||
std::string *node = NULL;
|
||||
std::string raw;
|
||||
while (iterator.next(&path, &result, &node)) {
|
||||
// TODO: find an appropriate delta base and compute the delta
|
||||
result->serialize(raw);
|
||||
PythonObj args = Py_BuildValue("(s#s#s#s#)",
|
||||
path->c_str(), (Py_ssize_t)path->size(),
|
||||
node->c_str(), (Py_ssize_t)BIN_NODE_SIZE,
|
||||
NULLID, (Py_ssize_t)BIN_NODE_SIZE,
|
||||
raw.c_str(), (Py_ssize_t)raw.size());
|
||||
|
||||
pack.callmethod("add", args);
|
||||
}
|
||||
}
|
||||
|
||||
static PyObject *treemanifest_write(py_treemanifest *self, PyObject *args) {
|
||||
PyObject* packObj;
|
||||
py_treemanifest* p1tree = NULL;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "O|O", &packObj, &p1tree)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// ParseTuple doesn't increment the ref, but the PythonObj will decrement on
|
||||
// destruct, so let's increment now.
|
||||
Py_INCREF(packObj);
|
||||
PythonObj pack = packObj;
|
||||
|
||||
try {
|
||||
std::vector<char*> cmpNodes;
|
||||
std::vector<Manifest*> cmpManifests;
|
||||
if (p1tree) {
|
||||
assert(p1tree->tm.root.node);
|
||||
cmpNodes.push_back(p1tree->tm.root.node);
|
||||
cmpManifests.push_back(p1tree->tm.getRootManifest());
|
||||
}
|
||||
|
||||
writestore(self->tm.getRootManifest(), cmpNodes, cmpManifests, pack, self->tm.fetcher);
|
||||
|
||||
char tempnode[20];
|
||||
self->tm.getRootManifest()->computeNode(p1tree ? binfromhex(p1tree->tm.root.node).c_str() : NULLID, NULLID, tempnode);
|
||||
std::string hexnode;
|
||||
hexfrombin(tempnode, hexnode);
|
||||
self->tm.root.update(hexnode.c_str(), MANIFEST_DIRECTORY_FLAGPTR);
|
||||
|
||||
return PyString_FromStringAndSize(tempnode, BIN_NODE_SIZE);
|
||||
} catch (const pyexception &ex) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// ==== treemanifest ctype declaration ====
|
||||
|
||||
static PyMethodDef treemanifest_methods[] = {
|
||||
@ -707,6 +765,8 @@ static PyMethodDef treemanifest_methods[] = {
|
||||
"sets the node and flag for the given filepath\n"},
|
||||
{"walk", (PyCFunction)treemanifest_walk, METH_VARARGS,
|
||||
"returns a iterator for walking the manifest"},
|
||||
{"write", (PyCFunction)treemanifest_write, METH_VARARGS,
|
||||
"writes any pending tree changes to the given store"},
|
||||
{NULL, NULL}
|
||||
};
|
||||
|
||||
|
@ -433,3 +433,217 @@ bool treemanifest::remove(
|
||||
treemanifest *treemanifest::copy() {
|
||||
return new treemanifest(this->fetcher, &this->root);
|
||||
}
|
||||
NewTreeIterator::NewTreeIterator(Manifest *mainRoot,
|
||||
const std::vector<char*> &cmpNodes,
|
||||
const std::vector<Manifest*> &cmpRoots,
|
||||
const ManifestFetcher &fetcher) :
|
||||
mainRoot(mainRoot),
|
||||
cmpNodes(cmpNodes),
|
||||
fetcher(fetcher) {
|
||||
this->mainStack.push_back(stackframe(mainRoot, false));
|
||||
|
||||
for (size_t i = 0; i < cmpRoots.size(); i++) {
|
||||
Manifest *cmpRoot = cmpRoots[i];
|
||||
|
||||
std::vector<stackframe> stack;
|
||||
stack.push_back(stackframe(cmpRoot, false));
|
||||
this->cmpStacks.push_back(stack);
|
||||
}
|
||||
}
|
||||
|
||||
bool NewTreeIterator::popResult(std::string **path, Manifest **result, std::string **node) {
|
||||
stackframe &mainFrame = this->mainStack.back();
|
||||
Manifest *mainManifest = mainFrame.manifest;
|
||||
std::string mainSerialized;
|
||||
|
||||
// When we loop over the cmpStacks, record the cmp nodes that are parents
|
||||
// of the level we're about to return.
|
||||
char parentNodes[2][BIN_NODE_SIZE];
|
||||
memcpy(parentNodes[0], NULLID, BIN_NODE_SIZE);
|
||||
memcpy(parentNodes[1], NULLID, BIN_NODE_SIZE);
|
||||
|
||||
bool alreadyExists = false;
|
||||
|
||||
// Record the nodes of all cmp manifest equivalents
|
||||
for (size_t i = 0; i < cmpStacks.size(); i++) {
|
||||
// If a cmpstack is at the same level as the main stack, it represents
|
||||
// the same diretory and should be inspected.
|
||||
if (this->mainStack.size() == cmpStacks[i].size()) {
|
||||
std::vector<stackframe> &cmpStack = cmpStacks[i];
|
||||
Manifest *cmpManifest = cmpStack.back().manifest;
|
||||
|
||||
if (!alreadyExists) {
|
||||
std::string cmpSerialized;
|
||||
cmpManifest->serialize(cmpSerialized);
|
||||
mainManifest->serialize(mainSerialized);
|
||||
|
||||
// If the main manifest content is identical to a cmp content, we
|
||||
// shouldn't return it. Note: We already do this check when pushing
|
||||
// directories onto the stack, but for in-memory manifests we don't
|
||||
// know the node until after we've traversed the children, so we can't
|
||||
// verify their content until now.
|
||||
if (cmpSerialized.compare(mainSerialized) == 0) {
|
||||
alreadyExists = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Record the cmp parent nodes so later we can compute the main node
|
||||
if (cmpStack.size() > 1) {
|
||||
stackframe &priorCmpFrame = cmpStack[cmpStack.size() - 2];
|
||||
ManifestEntry *priorCmpEntry = priorCmpFrame.currentvalue();
|
||||
memcpy(parentNodes[i], binfromhex(priorCmpEntry->node).c_str(), BIN_NODE_SIZE);
|
||||
} else {
|
||||
// Use the original passed in parent nodes
|
||||
memcpy(parentNodes[i], binfromhex(this->cmpNodes[i]).c_str(), BIN_NODE_SIZE);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// We've finished processing this frame, so pop all the stacks
|
||||
this->mainStack.pop_back();
|
||||
for (size_t i = 0; i < cmpStacks.size(); i++) {
|
||||
if (this->mainStack.size() < cmpStacks[i].size()) {
|
||||
cmpStacks[i].pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
// If the current manifest has the same contents as a cmp manifest,
|
||||
// just give up now. Unless we're the root node (because the root node
|
||||
// will always change based on the parent nodes).
|
||||
if (alreadyExists && this->mainStack.size() > 1) {
|
||||
assert(this->node != NULL);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update the node on the manifest entry
|
||||
char tempnode[BIN_NODE_SIZE];
|
||||
mainManifest->computeNode(parentNodes[0], parentNodes[1], tempnode);
|
||||
this->node.assign(tempnode, 20);
|
||||
if (mainStack.size() > 0) {
|
||||
// Peek back up the stack so we can put the right node on the
|
||||
// ManifestEntry.
|
||||
stackframe &priorFrame = mainStack[mainStack.size() - 1];
|
||||
ManifestEntry *priorEntry = priorFrame.currentvalue();
|
||||
|
||||
std::string hexnode;
|
||||
hexfrombin(tempnode, hexnode);
|
||||
priorEntry->update(hexnode.c_str(), MANIFEST_DIRECTORY_FLAGPTR);
|
||||
}
|
||||
|
||||
*path = &this->path;
|
||||
*result = mainManifest;
|
||||
*node = &this->node;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NewTreeIterator::processDirectory(ManifestEntry *mainEntry) {
|
||||
// mainEntry is a new entry we need to compare against each cmpEntry, and
|
||||
// then push if it is different from all of them.
|
||||
|
||||
// First move all the cmp iterators forward to the same name as mainEntry.
|
||||
bool alreadyExists = false;
|
||||
std::vector<std::vector<stackframe>*> requirePush;
|
||||
for (size_t i = 0; i < cmpStacks.size(); i++) {
|
||||
std::vector<stackframe> &cmpStack = cmpStacks[i];
|
||||
|
||||
// If the cmpStack is at a different level, it is not at the same
|
||||
// location as main, so don't bother searching it.
|
||||
if (cmpStack.size() < mainStack.size()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
stackframe &cmpFrame = cmpStack.back();
|
||||
|
||||
// Move cmp iterator forward until we match or pass the current
|
||||
// mainEntry filename.
|
||||
while (!cmpFrame.isfinished()) {
|
||||
ManifestEntry *cmpEntry = cmpFrame.currentvalue();
|
||||
int cmp = ManifestEntry::compareName(cmpEntry, mainEntry);
|
||||
if (cmp >= 0) {
|
||||
// If the directory names match...
|
||||
if (cmp == 0) {
|
||||
// And the nodes match...
|
||||
if (!alreadyExists &&
|
||||
(mainEntry->node && strncmp(mainEntry->node, cmpEntry->node, 40) == 0)) {
|
||||
// Skip this entry
|
||||
alreadyExists = true;
|
||||
}
|
||||
// Remember this stack so we can push to it later
|
||||
requirePush.push_back(&cmpStack);
|
||||
}
|
||||
break;
|
||||
}
|
||||
cmpFrame.next();
|
||||
}
|
||||
}
|
||||
|
||||
// If mainEntry matched any of the cmpEntries, we should skip mainEntry.
|
||||
if (alreadyExists) {
|
||||
assert(mainEntry->node != NULL);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Otherwise, push to the main stack
|
||||
mainEntry->appendtopath(this->path);
|
||||
Manifest *mainManifest = mainEntry->get_manifest(this->fetcher,
|
||||
this->path.c_str(), this->path.size());
|
||||
this->mainStack.push_back(stackframe(mainManifest, false));
|
||||
|
||||
// And push all cmp stacks we remembered that have the same directory.
|
||||
for (size_t i = 0; i < requirePush.size(); i++) {
|
||||
std::vector<stackframe> *cmpStack = requirePush[i];
|
||||
ManifestEntry *cmpEntry = cmpStack->back().currentvalue();
|
||||
Manifest *cmpManifest = cmpEntry->get_manifest(this->fetcher,
|
||||
this->path.c_str(), this->path.size());
|
||||
cmpStack->push_back(stackframe(cmpManifest, false));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NewTreeIterator::next(std::string **path, Manifest **result, std::string **node) {
|
||||
// Pop the last returned directory off the path
|
||||
size_t slashoffset = this->path.find_last_of('/', this->path.size() - 2);
|
||||
if (slashoffset == std::string::npos) {
|
||||
this->path.erase();
|
||||
} else {
|
||||
this->path.erase(slashoffset + 1);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
if (this->mainStack.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
stackframe &mainFrame = this->mainStack.back();
|
||||
|
||||
// If we've reached the end of this manifest, we've processed all the
|
||||
// children, so we can now return it.
|
||||
if (mainFrame.isfinished()) {
|
||||
// This can return false if this manifest ended up being equivalent to
|
||||
// a cmp parent manifest, which means we should skip it.
|
||||
if (this->popResult(path, result, node)) {
|
||||
if (this->mainStack.size() > 0) {
|
||||
this->mainStack.back().next();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (this->mainStack.size() > 0) {
|
||||
this->mainStack.back().next();
|
||||
}
|
||||
} else {
|
||||
// Use currentvalue instead of next so that the stack of frames match the
|
||||
// actual current filepath.
|
||||
ManifestEntry *mainEntry = mainFrame.currentvalue();
|
||||
if (mainEntry->isdirectory()) {
|
||||
// If we're at a directory, process it, either by pushing it on the
|
||||
// stack, or by skipping it if it already matches a cmp parent.
|
||||
if (!this->processDirectory(mainEntry)) {
|
||||
mainFrame.next();
|
||||
}
|
||||
} else {
|
||||
mainFrame.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -232,6 +232,46 @@ struct stackframe {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* An iterator that takes a main treemanifest and a vector of comparison
|
||||
* treemanifests and iterates over the Manifests that only exist in the main
|
||||
* treemanifest.
|
||||
*/
|
||||
class NewTreeIterator {
|
||||
private:
|
||||
Manifest *mainRoot;
|
||||
std::vector<stackframe> mainStack;
|
||||
std::vector<char*> cmpNodes;
|
||||
std::vector<std::vector<stackframe> > cmpStacks;
|
||||
std::string path;
|
||||
std::string node;
|
||||
ManifestFetcher fetcher;
|
||||
public:
|
||||
NewTreeIterator(Manifest *mainRoot,
|
||||
const std::vector<char*> &cmpNodes,
|
||||
const std::vector<Manifest*> &cmpRoots,
|
||||
const ManifestFetcher &fetcher);
|
||||
|
||||
/**
|
||||
* Outputs the next new Manifest and its corresponding path and node.
|
||||
*
|
||||
* Return true if a manifest was returned, or false if we've reached the
|
||||
* end.
|
||||
*/
|
||||
bool next(std::string **path, Manifest **result, std::string **node);
|
||||
private:
|
||||
/**
|
||||
* Pops the current Manifest, populating the output values and returning true
|
||||
* if the current Manifest is different from all comparison manifests.
|
||||
*/
|
||||
bool popResult(std::string **path, Manifest **result, std::string **node);
|
||||
|
||||
/** Pushes the given Manifest onto the stacks. If the given Manifest equals
|
||||
* one of the comparison Manifests, the function does nothing.
|
||||
*/
|
||||
bool processDirectory(ManifestEntry *mainEntry);
|
||||
};
|
||||
|
||||
/**
|
||||
* A helper struct representing the state of an iterator recursing over a tree.
|
||||
*/
|
||||
|
Loading…
Reference in New Issue
Block a user