fastmanifest: remove it

Summary: This is dead code and is no longer used. Kill it.

Reviewed By: quark-zju

Differential Revision: D18139945

fbshipit-source-id: 502286f73ddd4689082932af747c5b7858604aad
This commit is contained in:
Xavier Deguillard 2019-10-28 19:18:47 -07:00 committed by Facebook Github Bot
parent 6e4c2f38d0
commit 65fc3c7c40
54 changed files with 11 additions and 9317 deletions

View File

@ -1,864 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// cfastmanifest.c: CPython interface for fastmanifest
//
// no-check-code
#include <Python.h>
#if defined(_MSC_VER) || __STDC_VERSION__ < 199901L
#define true 1
#define false 0
typedef unsigned char bool;
#else
#include <stdbool.h>
#endif
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
// clang-format off
// clang thinks that PyObject_HEAD should be on the same line as the next line
// since there is no semicolong after it. There is no semicolon because
// PyObject_HEAD macro already contains one and MSVC does not support
// extra semicolons.
typedef struct {
PyObject_HEAD
tree_t *tree;
} fastmanifest;
// clang-format on
// clang-format off
typedef struct {
PyObject_HEAD
iterator_t *iterator;
} fmIter;
// clang-format on
static PyTypeObject fastmanifestType;
static PyTypeObject fastmanifestKeysIterator;
static PyTypeObject fastmanifestEntriesIterator;
/* Fastmanifest: CPython helpers */
static bool fastmanifest_is_valid_manifest_key(PyObject* key) {
return PyString_Check(key);
}
static bool fastmanifest_is_valid_manifest_value(PyObject* value) {
if (!PyTuple_Check(value) || PyTuple_Size(value) != 2) {
PyErr_Format(
PyExc_TypeError, "Manifest values must be a tuple of (node, flags).");
return false;
}
return true;
}
static PyObject* fastmanifest_formatfile(
const uint8_t* checksum,
const uint8_t checksum_sz,
const uint8_t flags) {
PyObject* py_checksum =
PyString_FromStringAndSize((const char*)checksum, checksum_sz);
if (!py_checksum) {
return NULL;
}
PyObject* py_flags;
PyObject* tup;
py_flags =
PyString_FromStringAndSize((const char*)&flags, (flags == 0) ? 0 : 1);
if (!py_flags) {
Py_DECREF(py_checksum);
return NULL;
}
tup = PyTuple_Pack(2, py_checksum, py_flags);
Py_DECREF(py_flags);
Py_DECREF(py_checksum);
return tup;
}
/* ================================== */
/* Fastmanifest: CPython Interface */
/* ================================== */
static int fastmanifest_init(fastmanifest* self, PyObject* args) {
PyObject* pydata = NULL;
char* data;
ssize_t len;
if (!PyArg_ParseTuple(args, "|S", &pydata)) {
return -1;
}
if (pydata == NULL) {
// no string. initialize it to an empty tree.
self->tree = alloc_tree();
if (self->tree == NULL) {
PyErr_NoMemory();
return -1;
}
return 0;
}
int err = PyString_AsStringAndSize(pydata, &data, &len);
if (err == -1)
return -1;
convert_from_flat_result_t from_result = convert_from_flat(data, len);
if (from_result.code == CONVERT_FROM_FLAT_OK) {
tree_t* tree = from_result.tree;
self->tree = tree;
} else {
self->tree = NULL;
}
switch (from_result.code) {
case CONVERT_FROM_FLAT_OOM:
PyErr_NoMemory();
return -1;
case CONVERT_FROM_FLAT_WTF:
PyErr_Format(PyExc_ValueError, "Manifest did not end in a newline.");
return -1;
default:
return 0;
}
}
static void fastmanifest_dealloc(fastmanifest* self) {
destroy_tree(self->tree);
PyObject_Del(self);
}
static PyObject* fastmanifest_getkeysiter(fastmanifest* self) {
fmIter* i = NULL;
iterator_t* iterator = create_iterator(self->tree, true);
if (!iterator) {
PyErr_NoMemory();
return NULL;
}
i = PyObject_New(fmIter, &fastmanifestKeysIterator);
if (i) {
i->iterator = iterator;
} else {
destroy_iterator(iterator);
PyErr_NoMemory();
}
return (PyObject*)i;
}
static PyObject* fastmanifest_getentriesiter(fastmanifest* self) {
fmIter* i = NULL;
iterator_t* iterator = create_iterator(self->tree, true);
if (!iterator) {
PyErr_NoMemory();
return NULL;
}
i = PyObject_New(fmIter, &fastmanifestEntriesIterator);
if (i) {
i->iterator = iterator;
} else {
destroy_iterator(iterator);
PyErr_NoMemory();
}
return (PyObject*)i;
}
static PyObject* fastmanifest_save(fastmanifest* self, PyObject* args) {
PyObject* pydata = NULL;
char* data;
ssize_t len;
if (!PyArg_ParseTuple(args, "S", &pydata)) {
return NULL;
}
int err = PyString_AsStringAndSize(pydata, &data, &len);
if (err == -1 || len < 0) {
PyErr_Format(PyExc_ValueError, "Illegal filepath");
return NULL;
}
write_to_file_result_t result = write_to_file(self->tree, data, (size_t)len);
switch (result) {
case WRITE_TO_FILE_OK:
Py_RETURN_NONE;
case WRITE_TO_FILE_OOM:
PyErr_NoMemory();
return NULL;
default:
PyErr_Format(PyExc_ValueError, "Unexpected error saving manifest");
return NULL;
}
}
static PyObject* fastmanifest_load(PyObject* cls, PyObject* args) {
PyObject* pydata = NULL;
char* data;
ssize_t len;
if (!PyArg_ParseTuple(args, "S", &pydata)) {
return NULL;
}
int err = PyString_AsStringAndSize(pydata, &data, &len);
if (err == -1 || len < 0) {
PyErr_Format(PyExc_ValueError, "Illegal filepath");
return NULL;
}
read_from_file_result_t result = read_from_file(data, (size_t)len);
switch (result.code) {
case READ_FROM_FILE_OK: {
fastmanifest* read_manifest =
PyObject_New(fastmanifest, &fastmanifestType);
read_manifest->tree = result.tree;
return (PyObject*)read_manifest;
}
case READ_FROM_FILE_OOM:
PyErr_NoMemory();
return NULL;
case READ_FROM_FILE_NOT_READABLE:
errno = result.err;
PyErr_SetFromErrno(PyExc_IOError);
return NULL;
default:
PyErr_Format(PyExc_ValueError, "Unexpected error loading manifest");
return NULL;
}
}
static fastmanifest* fastmanifest_copy(fastmanifest* self) {
fastmanifest* copy = PyObject_New(fastmanifest, &fastmanifestType);
if (copy) {
copy->tree = copy_tree(self->tree);
}
if (!copy)
PyErr_NoMemory();
return copy;
}
typedef struct {
PyObject* matchfn;
bool filter_error_occurred;
} filter_copy_context_t;
bool filter_callback(char* path, size_t path_sz, void* callback_context) {
filter_copy_context_t* context = (filter_copy_context_t*)callback_context;
PyObject *arglist = NULL, *result = NULL;
arglist = Py_BuildValue("(s#)", path, (int)path_sz);
if (!arglist) {
context->filter_error_occurred = true;
return false;
}
result = PyObject_CallObject(context->matchfn, arglist);
Py_DECREF(arglist);
if (!result) {
context->filter_error_occurred = true;
return false;
}
bool bool_result = PyObject_IsTrue(result);
Py_DECREF(result);
return bool_result;
}
static fastmanifest* fastmanifest_filtercopy(
fastmanifest* self,
PyObject* matchfn) {
fastmanifest* py_copy = PyObject_New(fastmanifest, &fastmanifestType);
tree_t* copy = NULL;
if (py_copy) {
filter_copy_context_t context;
context.matchfn = matchfn;
context.filter_error_occurred = false;
copy = filter_copy(self->tree, filter_callback, &context);
if (copy == NULL) {
goto cleanup;
}
py_copy->tree = copy;
return py_copy;
}
cleanup:
if (copy != NULL) {
destroy_tree(copy);
}
if (py_copy != NULL) {
Py_DECREF(py_copy);
}
PyErr_NoMemory();
return NULL;
}
static PyObject* hashflags(
const uint8_t* checksum,
const uint8_t checksum_sz,
const uint8_t flags) {
PyObject *ret = NULL, *py_hash, *py_flags;
py_hash = PyString_FromStringAndSize((const char*)checksum, checksum_sz);
py_flags =
PyString_FromStringAndSize((const char*)&flags, flags == 0 ? 0 : 1);
if (!py_hash || !py_flags) {
goto cleanup;
}
ret = PyTuple_Pack(2, py_hash, py_flags);
cleanup:
Py_XDECREF(py_hash);
Py_XDECREF(py_flags);
return ret;
}
typedef struct _fastmanifest_diff_context_t {
PyObject* result;
PyObject* emptyTuple;
bool error_occurred;
bool listclean;
} fastmanifest_diff_context_t;
static void fastmanifest_diff_callback(
const char* path,
const size_t path_sz,
const bool left_present,
const uint8_t* left_checksum,
const uint8_t left_checksum_sz,
const uint8_t left_flags,
const bool right_present,
const uint8_t* right_checksum,
const uint8_t right_checksum_sz,
const uint8_t right_flags,
void* context) {
fastmanifest_diff_context_t* diff_context =
(fastmanifest_diff_context_t*)context;
PyObject *key, *outer = NULL, *py_left = NULL, *py_right = NULL;
key = PyString_FromStringAndSize(path, path_sz);
if (!key) {
diff_context->error_occurred = true;
goto cleanup;
}
if (left_present && right_present && left_flags == right_flags &&
left_checksum_sz == right_checksum_sz &&
memcmp(left_checksum, right_checksum, left_checksum_sz) == 0) {
Py_INCREF(Py_None);
outer = Py_None;
} else {
if (left_present) {
py_left = hashflags(left_checksum, left_checksum_sz, left_flags);
} else {
py_left = diff_context->emptyTuple;
}
if (right_present) {
py_right = hashflags(right_checksum, right_checksum_sz, right_flags);
} else {
py_right = diff_context->emptyTuple;
}
if (!py_left || !py_right) {
diff_context->error_occurred = true;
goto cleanup;
}
outer = PyTuple_Pack(2, py_left, py_right);
if (outer == NULL) {
diff_context->error_occurred = true;
goto cleanup;
}
}
if (PyDict_SetItem(diff_context->result, key, outer) != 0) {
diff_context->error_occurred = true;
}
cleanup:
Py_XDECREF(outer);
Py_XDECREF(key);
if (left_present) {
Py_XDECREF(py_left);
}
if (right_present) {
Py_XDECREF(py_right);
}
}
static PyObject*
fastmanifest_diff(fastmanifest* self, PyObject* args, PyObject* kwargs) {
fastmanifest* other;
PyObject *match = NULL, *pyclean = NULL;
PyObject *emptyTuple = NULL, *ret = NULL;
PyObject* es;
fastmanifest_diff_context_t context;
context.error_occurred = false;
static char const* kwlist[] = {"m2", "match", "clean", NULL};
if (!PyArg_ParseTupleAndKeywords(
args,
kwargs,
"O!|OO",
(char**)kwlist,
&fastmanifestType,
&other,
&match,
&pyclean)) {
return NULL;
}
if (match && match != Py_None) {
PyErr_Format(
PyExc_ValueError,
"fastmanifest.diff does not support the match argument");
return NULL;
}
context.listclean = (!pyclean) ? false : PyObject_IsTrue(pyclean);
es = PyString_FromString("");
if (!es) {
goto nomem;
}
emptyTuple = PyTuple_Pack(2, Py_None, es);
Py_CLEAR(es);
if (!emptyTuple) {
goto nomem;
}
ret = PyDict_New();
if (!ret) {
goto nomem;
}
context.result = ret;
context.emptyTuple = emptyTuple;
diff_result_t diff_result = diff_trees(
self->tree,
other->tree,
context.listclean,
&fastmanifest_diff_callback,
&context);
Py_CLEAR(emptyTuple);
switch (diff_result) {
case DIFF_OK:
if (context.error_occurred) {
// error occurred in the callback, i.e., our code.
Py_XDECREF(ret);
if (PyErr_Occurred() == NULL) {
PyErr_Format(
PyExc_ValueError,
"ignore_fastmanifest_errcode set but no exception detected.");
}
return NULL;
}
return ret;
case DIFF_OOM:
goto nomem;
case DIFF_WTF:
PyErr_Format(PyExc_ValueError, "Unexpected error diffing manifests.");
goto cleanup;
}
nomem:
PyErr_NoMemory();
cleanup:
Py_XDECREF(ret);
Py_XDECREF(emptyTuple);
Py_XDECREF(es);
return NULL;
}
static PyObject* fastmanifest_text(fastmanifest* self) {
convert_to_flat_result_t to_flat = convert_to_flat(self->tree);
switch (to_flat.code) {
case CONVERT_TO_FLAT_OK:
return PyString_FromStringAndSize(
to_flat.flat_manifest, to_flat.flat_manifest_sz);
case CONVERT_TO_FLAT_OOM:
PyErr_NoMemory();
return NULL;
case CONVERT_TO_FLAT_WTF:
PyErr_Format(PyExc_ValueError, "Error converting manifest");
return NULL;
default:
PyErr_Format(PyExc_ValueError, "Unknown result code");
return NULL;
}
}
static Py_ssize_t fastmanifest_size(fastmanifest* self) {
return self->tree->num_leaf_nodes;
}
static PyObject* fastmanifest_bytes(fastmanifest* self) {
return PyInt_FromSize_t(self->tree->consumed_memory);
}
static PyObject* fastmanifest_getitem(fastmanifest* self, PyObject* key) {
if (!fastmanifest_is_valid_manifest_key(key)) {
PyErr_Format(PyExc_TypeError, "Manifest keys must be strings.");
return NULL;
}
char* ckey;
ssize_t clen;
int err = PyString_AsStringAndSize(key, &ckey, &clen);
if (err == -1) {
PyErr_Format(PyExc_TypeError, "Error decoding path");
return NULL;
}
get_path_result_t query = get_path(self->tree, ckey, clen);
switch (query.code) {
case GET_PATH_NOT_FOUND:
PyErr_Format(PyExc_KeyError, "File not found");
return NULL;
case GET_PATH_WTF:
PyErr_Format(PyExc_ValueError, "tree corrupt");
return NULL;
default:
break;
}
PyObject* ret =
fastmanifest_formatfile(query.checksum, query.checksum_sz, query.flags);
if (ret == NULL) {
PyErr_Format(PyExc_ValueError, "Error formatting file");
}
return ret;
}
static int
fastmanifest_setitem(fastmanifest* self, PyObject* key, PyObject* value) {
char *path, *hash, *flags;
ssize_t plen, hlen, flen;
int err;
/* Decode path */
if (!fastmanifest_is_valid_manifest_key(key)) {
PyErr_Format(PyExc_TypeError, "Manifest keys must be strings.");
return -1;
}
err = PyString_AsStringAndSize(key, &path, &plen);
if (err == -1 || plen < 0) {
PyErr_Format(PyExc_TypeError, "Error decoding path");
return -1;
}
if (!value) {
remove_path_result_t remove_path_result =
remove_path(self->tree, path, (size_t)plen);
switch (remove_path_result) {
case REMOVE_PATH_OK:
return 0;
case REMOVE_PATH_NOT_FOUND:
PyErr_Format(PyExc_KeyError, "Not found");
return -1;
case REMOVE_PATH_WTF:
PyErr_Format(PyExc_KeyError, "tree corrupt");
return -1;
}
}
/* Decode node and flags*/
if (!fastmanifest_is_valid_manifest_value(value)) {
return -1;
}
PyObject* pyhash = PyTuple_GetItem(value, 0);
err = PyString_AsStringAndSize(pyhash, &hash, &hlen);
if (err == -1) {
PyErr_Format(PyExc_TypeError, "Error decoding hash");
return -1;
}
PyObject* pyflags = PyTuple_GetItem(value, 1);
err = PyString_AsStringAndSize(pyflags, &flags, &flen);
if (err == -1) {
PyErr_Format(PyExc_TypeError, "Error decoding flags");
return -1;
}
add_update_path_result_t add_update_path_result = add_or_update_path(
self->tree, path, plen, (unsigned char*)hash, hlen, *flags);
switch (add_update_path_result) {
case ADD_UPDATE_PATH_OOM: {
PyErr_NoMemory();
return -1;
}
case ADD_UPDATE_PATH_OK:
return 0;
default: {
PyErr_Format(PyExc_TypeError, "unexpected stuff happened");
return -1;
}
}
}
static PyMappingMethods fastmanifest_mapping_methods = {
(lenfunc)fastmanifest_size, /* mp_length */
(binaryfunc)fastmanifest_getitem, /* mp_subscript */
(objobjargproc)fastmanifest_setitem, /* mp_ass_subscript */
};
/* sequence methods (important or __contains__ builds an iterator) */
static int fastmanifest_contains(fastmanifest* self, PyObject* key) {
if (!fastmanifest_is_valid_manifest_key(key)) {
/* Our keys are always strings, so if the contains
* check is for a non-string, just return false. */
return 0;
}
char* path;
ssize_t plen;
int err = PyString_AsStringAndSize(key, &path, &plen);
if (err == -1) {
PyErr_Format(PyExc_TypeError, "Error decoding path");
return -1;
}
return contains_path(self->tree, path, plen) ? 1 : 0;
}
static PySequenceMethods fastmanifest_seq_meths = {
(lenfunc)fastmanifest_size, /* sq_length */
0, /* sq_concat */
0, /* sq_repeat */
0, /* sq_item */
0, /* sq_slice */
0, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)fastmanifest_contains, /* sq_contains */
0, /* sq_inplace_concat */
0, /* sq_inplace_repeat */
};
static PyMethodDef fastmanifest_methods[] = {
{"iterkeys",
(PyCFunction)fastmanifest_getkeysiter,
METH_NOARGS,
"Iterate over file names in this fastmanifest."},
{"iterentries",
(PyCFunction)fastmanifest_getentriesiter,
METH_NOARGS,
"Iterate over (path, nodeid, flags) tuples in this fastmanifest."},
{"copy",
(PyCFunction)fastmanifest_copy,
METH_NOARGS,
"Make a copy of this fastmanifest."},
{"filtercopy",
(PyCFunction)fastmanifest_filtercopy,
METH_O,
"Make a copy of this manifest filtered by matchfn."},
{"_save",
(PyCFunction)fastmanifest_save,
METH_VARARGS,
"Save a fastmanifest to a file"},
{"load",
(PyCFunction)fastmanifest_load,
METH_VARARGS | METH_CLASS,
"Load a tree manifest from a file"},
{"diff",
(PyCFunction)fastmanifest_diff,
METH_VARARGS | METH_KEYWORDS,
"Compare this fastmanifest to another one."},
{"text",
(PyCFunction)fastmanifest_text,
METH_NOARGS,
"Encode this manifest to text."},
{"bytes",
(PyCFunction)fastmanifest_bytes,
METH_NOARGS,
"Returns an upper bound on the number of bytes required "
"to represent this manifest."},
{NULL},
};
static PyTypeObject fastmanifestType = {
PyObject_HEAD_INIT(NULL) 0, /* ob_size */
"parsers.fastmanifest", /* tp_name */
sizeof(fastmanifest), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)fastmanifest_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
&fastmanifest_seq_meths, /* tp_as_sequence */
&fastmanifest_mapping_methods, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_SEQUENCE_IN, /* tp_flags */
"TODO(augie)", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
(getiterfunc)fastmanifest_getkeysiter, /* tp_iter */
0, /* tp_iternext */
fastmanifest_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)fastmanifest_init, /* tp_init */
0, /* tp_alloc */
};
/* iteration support */
static void fmiter_dealloc(PyObject* o) {
fmIter* self = (fmIter*)o;
destroy_iterator(self->iterator);
PyObject_Del(self);
}
static PyObject* fmiter_iterkeysnext(PyObject* o) {
fmIter* self = (fmIter*)o;
iterator_result_t iterator_result = iterator_next(self->iterator);
if (!iterator_result.valid) {
return NULL;
}
return PyString_FromStringAndSize(
iterator_result.path, iterator_result.path_sz);
}
static PyObject* fmiter_iterentriesnext(PyObject* o) {
fmIter* self = (fmIter*)o;
iterator_result_t iterator_result = iterator_next(self->iterator);
if (!iterator_result.valid) {
return NULL;
}
PyObject *ret = NULL, *path, *hash, *flags;
path =
PyString_FromStringAndSize(iterator_result.path, iterator_result.path_sz);
hash = PyString_FromStringAndSize(
(const char*)iterator_result.checksum, iterator_result.checksum_sz);
flags = PyString_FromStringAndSize(
(const char*)&iterator_result.flags, iterator_result.flags == 0 ? 0 : 1);
if (!path || !hash || !flags) {
goto done;
}
ret = PyTuple_Pack(3, path, hash, flags);
done:
Py_XDECREF(path);
Py_XDECREF(hash);
Py_XDECREF(flags);
return ret;
}
static PyTypeObject fastmanifestKeysIterator = {
PyObject_HEAD_INIT(NULL) 0, /*ob_size */
"parsers.fastmanifest.keysiterator", /*tp_name */
sizeof(fmIter), /*tp_basicsize */
0, /*tp_itemsize */
fmiter_dealloc, /*tp_dealloc */
0, /*tp_print */
0, /*tp_getattr */
0, /*tp_setattr */
0, /*tp_compare */
0, /*tp_repr */
0, /*tp_as_number */
0, /*tp_as_sequence */
0, /*tp_as_mapping */
0, /*tp_hash */
0, /*tp_call */
0, /*tp_str */
0, /*tp_getattro */
0, /*tp_setattro */
0, /*tp_as_buffer */
/* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
use tp_iter and tp_iternext fields. */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
"Keys iterator for a fastmanifest.", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter: __iter__() method */
fmiter_iterkeysnext, /* tp_iternext: next() method */
};
static PyTypeObject fastmanifestEntriesIterator = {
PyObject_HEAD_INIT(NULL) 0, /*ob_size */
"parsers.fastmanifest.entriesiterator", /*tp_name */
sizeof(fmIter), /*tp_basicsize */
0, /*tp_itemsize */
fmiter_dealloc, /*tp_dealloc */
0, /*tp_print */
0, /*tp_getattr */
0, /*tp_setattr */
0, /*tp_compare */
0, /*tp_repr */
0, /*tp_as_number */
0, /*tp_as_sequence */
0, /*tp_as_mapping */
0, /*tp_hash */
0, /*tp_call */
0, /*tp_str */
0, /*tp_getattro */
0, /*tp_setattro */
0, /*tp_as_buffer */
/* tp_flags: Py_TPFLAGS_HAVE_ITER tells python to
use tp_iter and tp_iternext fields. */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER,
"Iterator for 3-tuples in a fastmanifest.", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter: __iter__() method */
fmiter_iterentriesnext, /* tp_iternext: next() method */
};
static PyMethodDef methods[] = {{NULL, NULL, 0, NULL}};
PyMODINIT_FUNC initcfastmanifest(void) {
PyObject* m;
fastmanifestType.tp_new = PyType_GenericNew;
if (PyType_Ready(&fastmanifestType) < 0)
return;
m = Py_InitModule3("cfastmanifest", methods, "Wrapper around fast_manifest");
Py_INCREF(&fastmanifestType);
PyModule_AddObject(m, "fastmanifest", (PyObject*)&fastmanifestType);
}

View File

@ -1 +0,0 @@
.idea

View File

@ -1,4 +0,0 @@
fastmanifest is a tree-based implementation to speed up manifest
operations in Mercurial. Its design is optimized for quick
deserialization from a persistent store. In compact form, the entire tree
is relocatable without any traversals.

View File

@ -1,46 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// bsearch.c: binary search implementation with context-aware callback.
//
// no-check-code
#include <stddef.h>
#include <stdio.h>
#include "bsearch.h"
size_t bsearch_between(
const void* needle,
const void* base,
const size_t nel,
const size_t width,
int (*compare)(
const void* needle,
const void* fromarray,
const void* context),
const void* context) {
ptrdiff_t start = 0;
ptrdiff_t end = nel;
while (start < end) {
ptrdiff_t midpoint = start + ((end - start) / 2);
if (midpoint == nel) {
return nel;
}
const void* ptr = (const void*)((char*)base + (midpoint * width));
int cmp = compare(needle, ptr, context);
if (cmp == 0) {
return midpoint;
} else if (cmp < 0) {
end = midpoint;
} else {
start = midpoint + 1;
}
}
return start;
}

View File

@ -1,60 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// bsearch.h: binary search declarations with context-aware callback. this
// is a standalone library.
//
// no-check-code
#ifndef __BSEARCH_BSEARCH_H__
#define __BSEARCH_BSEARCH_H__
#include <stdbool.h>
#include <stddef.h>
#include <sys/types.h>
/**
* A generic binary search that allows a comparator to evaluate the placement of
* a needle relative to its possible neighbors.
*
* Returns a value from 0 to nel, representing where a needle
*
* The comparator should return:
* <0 if the element should be placed before `left`.
* =0 if the element should be placed between `left` and `right`.
* >0 if the element should be placed after `right`.
*/
extern size_t bsearch_between(
const void* needle,
const void* base,
const size_t nel,
const size_t width,
int (*compare)(
const void* needle,
const void* fromarray,
const void* context),
const void* context);
/**
* A convenient macro to build comparators for `bsearch_between`. Callers
* should provide a LEFT_COMPARE, which is used to compare the left neighbor and
* the needle, and RIGHT_COMPARE, which is used to compare the needle and the
* right neighbor.
*
* Each comparator will be passed two void pointers and a context object. It is
* the responsibility of the caller to ensure that it can properly cast the
* values to sane pointers.
*/
#define COMPARATOR_BUILDER(COMPARATOR_NAME, COMPARE) \
int COMPARATOR_NAME( \
const void* needle, const void* fromarray, const void* context) { \
return COMPARE(needle, fromarray); \
}
#define CONTEXTUAL_COMPARATOR_BUILDER(COMPARATOR_NAME, COMPARE) \
int COMPARATOR_NAME( \
const void* needle, const void* fromarray, const void* context) { \
return COMPARE(needle, fromarray, context); \
}
#endif /* #ifndef __BSEARCH_BSEARCH_H__ */

View File

@ -1,46 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// bsearch_test.c: tests for binary search with a context-aware callback.
//
// no-check-code
#include "bsearch.h"
#include "tests.h"
#define CMP(left, right) ((int)(*((intptr_t*)left) - *((intptr_t*)right)))
COMPARATOR_BUILDER(intptr_cmp, CMP)
#define BSEARCH_TEST(needle, expected, ...) \
{ \
size_t result; \
intptr_t _needle = needle; \
intptr_t* array = (intptr_t[]){__VA_ARGS__}; \
\
result = bsearch_between( \
&_needle, \
array, \
sizeof((intptr_t[]){__VA_ARGS__}) / sizeof(intptr_t), \
sizeof(intptr_t), \
&intptr_cmp, \
NULL); \
ASSERT(result == expected); \
}
void test_bsearch() {
BSEARCH_TEST(20, 1, 18, 21);
BSEARCH_TEST(20, 2, 15, 18, 21, );
BSEARCH_TEST(20, 2, 15, 18, 20, 21, );
BSEARCH_TEST(10, 0, 15, 18, 20, 21, );
BSEARCH_TEST(30, 4, 15, 18, 20, 21, );
}
int main(int argc, char* argv[]) {
test_bsearch();
return 0;
}

View File

@ -1,35 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// checksum.c: implementation for recalculating the checksums for
// intermediate nodes in a tree.
//
// no-check-code
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "lib/clib/sha1.h"
#include "node.h"
static void update_checksum(node_t* node) {
fbhg_sha1_ctx_t ctx;
fbhg_sha1_init(&ctx);
// find all the children and make sure their checksums are up-to-date.
for (child_num_t ix = 0; ix < node->num_children; ix++) {
node_t* child = get_child_by_index(node, ix);
if (child->checksum_valid == false) {
update_checksum(child);
}
fbhg_sha1_update(&ctx, child->name, child->name_sz);
fbhg_sha1_update(&ctx, child->checksum, child->checksum_sz);
fbhg_sha1_update(&ctx, &child->flags, 1);
}
fbhg_sha1_final(node->checksum, &ctx);
node->checksum_sz = SHA1_BYTES;
node->checksum_valid = true;
}
void update_checksums(tree_t* tree) {
update_checksum(tree->shadow_root);
}

View File

@ -1,15 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// checksum.h: declarations for recalculating the checksums for intermediate
// nodes in a tree. this is for internal use only.
//
// no-check-code
#ifndef __FASTMANIFEST_CHECKSUM_H__
#define __FASTMANIFEST_CHECKSUM_H__
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
extern void update_checksums(tree_t* tree);
#endif /* #ifndef __FASTMANIFEST_CHECKSUM_H__ */

View File

@ -1,126 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// checksum_test.c: tests for recalculating the checksums for intermediate
// nodes in a tree.
//
// no-check-code
#include "checksum.h"
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "node.h"
#include "tests.h"
static void test_empty_tree() {
tree_t* tree = alloc_tree();
ASSERT(get_child_by_index(tree->shadow_root, 0)->checksum_valid == false);
update_checksums(tree);
ASSERT(get_child_by_index(tree->shadow_root, 0)->checksum_valid == true);
}
typedef struct {
char* path;
bool expected_checksum_valid;
} path_checksum_t;
/**
* Verify that when a path is added or removed, the affected paths have their
* checksums invalidated.
*/
static void test_updates_reset_checksums() {
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix++) {
checksum[ix] = (uint8_t)ix;
}
tree_t* tree = alloc_tree();
char* paths_to_add[] = {
"abc",
"ab/def",
"ab/defg/hi",
"ab/defg/h/ij/kl",
"ab/defg/h/ijk",
"ab/defg/h/i/jkl/mn/op/qr",
"ab/defg/h/i/jkl/mn/op/qrs",
};
const size_t num_paths = sizeof(paths_to_add) / sizeof(*paths_to_add);
for (size_t ix = 0; ix < num_paths; ix++) {
add_update_path_result_t add_result = add_or_update_path(
tree, STRPLUSLEN(paths_to_add[ix]), checksum, SHA1_BYTES, 0);
ASSERT(add_result == ADD_UPDATE_PATH_OK);
}
update_checksums(tree);
ASSERT(get_child_by_index(tree->shadow_root, 0)->checksum_valid == true);
ASSERT(
add_or_update_path(
tree, STRPLUSLEN("ab/defg/h/ijk"), checksum, SHA1_BYTES, 0) ==
ADD_UPDATE_PATH_OK);
path_checksum_t dirs_to_check_after_add[] = {
{"abc", true},
{"ab/", false},
{"ab/defg/", false},
{"ab/defg/h/", false},
{"ab/defg/h/i/", true},
{"ab/defg/h/i/jkl/", true},
{"ab/defg/h/i/jkl/mn/", true},
{"ab/defg/h/i/jkl/mn/op/", true},
{"ab/defg/h/ij/", true},
};
size_t num_dirs =
sizeof(dirs_to_check_after_add) / sizeof(*dirs_to_check_after_add);
for (size_t ix = 0; ix < num_dirs; ix++) {
get_path_unfiltered_result_t get_result =
get_path_unfiltered(tree, STRPLUSLEN(dirs_to_check_after_add[ix].path));
ASSERT(get_result.code == GET_PATH_OK);
ASSERT(
get_result.node->checksum_valid ==
dirs_to_check_after_add[ix].expected_checksum_valid);
}
ASSERT(get_child_by_index(tree->shadow_root, 0)->checksum_valid == false);
update_checksums(tree);
ASSERT(get_child_by_index(tree->shadow_root, 0)->checksum_valid == true);
ASSERT(
remove_path(tree, STRPLUSLEN("ab/defg/h/i/jkl/mn/op/qrs")) ==
REMOVE_PATH_OK);
path_checksum_t dirs_to_check_after_remove[] = {
{"abc", true},
{"ab/", false},
{"ab/defg/", false},
{"ab/defg/h/", false},
{"ab/defg/h/i/", false},
{"ab/defg/h/i/jkl/", false},
{"ab/defg/h/i/jkl/mn/", false},
{"ab/defg/h/i/jkl/mn/op/", false},
{"ab/defg/h/ij/", true},
};
num_dirs =
sizeof(dirs_to_check_after_remove) / sizeof(*dirs_to_check_after_remove);
for (size_t ix = 0; ix < num_dirs; ix++) {
get_path_unfiltered_result_t get_result = get_path_unfiltered(
tree, STRPLUSLEN(dirs_to_check_after_remove[ix].path));
ASSERT(get_result.code == GET_PATH_OK);
ASSERT(
get_result.node->checksum_valid ==
dirs_to_check_after_remove[ix].expected_checksum_valid);
}
ASSERT(get_child_by_index(tree->shadow_root, 0)->checksum_valid == false);
}
int main(int argc, char* argv[]) {
test_empty_tree();
test_updates_reset_checksums();
return 0;
}

View File

@ -1,44 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// internal_result.h: result codes for internal APIs. obviously, this is for
// internal use only.
//
// no-check-code
#ifndef FASTMANIFEST_INTERNAL_RESULT_H
#define FASTMANIFEST_INTERNAL_RESULT_H
#include <stdint.h>
typedef enum {
ADD_CHILD_OK,
ADD_CHILD_ILLEGAL_PARENT,
ADD_CHILD_ILLEGAL_CHILD,
CONFLICTING_ENTRY_PRESENT,
NEEDS_LARGER_NODE,
} node_add_child_result_t;
typedef enum {
REMOVE_CHILD_OK,
REMOVE_CHILD_ILLEGAL_PARENT,
REMOVE_CHILD_ILLEGAL_INDEX,
} node_remove_child_result_t;
typedef enum {
ENLARGE_OK,
ENLARGE_OOM,
ENLARGE_ILLEGAL_PARENT,
ENLARGE_ILLEGAL_INDEX,
} node_enlarge_child_capacity_code_t;
typedef struct _node_enlarge_child_capacity_result_t {
node_enlarge_child_capacity_code_t code;
struct _node_t* old_child;
struct _node_t* new_child;
} node_enlarge_child_capacity_result_t;
typedef struct _node_search_children_result_t {
struct _node_t* child;
uint32_t child_num;
} node_search_children_result_t;
#endif // FASTMANIFEST_INTERNAL_RESULT_H

View File

@ -1,288 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// node.c: implementation for representing a node in a tree.
//
// no-check-code
#include <stdlib.h>
#include "bsearch.h"
#include "node.h"
static size_t calculate_required_size(uint16_t name_sz, uint32_t num_children) {
intptr_t address = get_child_ptr_base_offset(name_sz);
return address + (sizeof(ptrdiff_t) * num_children);
}
static void initialize_node(
node_t* node,
size_t block_sz,
const char* name,
uint16_t name_sz) {
if (!VERIFY_BLOCK_SZ(block_sz)) {
abort();
}
node->block_sz = (uint32_t)block_sz;
node->num_children = 0;
node->name_sz = name_sz;
node->in_use = true;
node->type = TYPE_UNDEFINED;
node->checksum_valid = false;
memcpy(&node->name, name, name_sz);
}
node_t* alloc_node(const char* name, uint16_t name_sz, uint32_t max_children) {
size_t size = calculate_required_size(name_sz, max_children);
node_t* result = (node_t*)malloc(size);
if (result == NULL) {
return result;
}
initialize_node(result, size, name, name_sz);
return result;
}
void* setup_node(
void* ptr,
size_t ptr_size_limit,
const char* name,
uint16_t name_sz,
uint32_t max_children) {
size_t size = calculate_required_size(name_sz, max_children);
if (size > ptr_size_limit) {
return NULL;
}
node_t* node = (node_t*)ptr;
intptr_t next = (intptr_t)ptr;
next += size;
initialize_node(node, size, name, name_sz);
return (void*)next;
}
node_t* clone_node(const node_t* node) {
uint32_t old_capacity = max_children(node);
uint64_t desired_new_capacity =
(((uint64_t)old_capacity) * (100 + STORAGE_INCREMENT_PERCENTAGE)) / 100;
if (desired_new_capacity - old_capacity < MIN_STORAGE_INCREMENT) {
desired_new_capacity = old_capacity + MIN_STORAGE_INCREMENT;
} else if (desired_new_capacity - old_capacity > MAX_STORAGE_INCREMENT) {
desired_new_capacity = old_capacity + MAX_STORAGE_INCREMENT;
}
uint32_t new_capacity;
if (desired_new_capacity > UINT32_MAX) {
new_capacity = UINT32_MAX;
} else {
new_capacity = (uint32_t)desired_new_capacity;
}
node_t* clone = alloc_node(node->name, node->name_sz, new_capacity);
if (clone == NULL) {
return NULL;
}
// copy metadata over.
clone->num_children = node->num_children;
if (node->checksum_valid) {
memcpy(clone->checksum, node->checksum, sizeof(node->checksum));
clone->checksum_sz = node->checksum_sz;
}
clone->type = node->type;
clone->checksum_valid = node->checksum_valid;
clone->flags = node->flags;
// calculate the difference we need to apply to the relative pointers.
ptrdiff_t delta = ((intptr_t)node) - ((intptr_t)clone);
// get the child pointer base of each node.
const ptrdiff_t* node_base = get_child_ptr_base_const(node);
ptrdiff_t* clone_base = get_child_ptr_base(clone);
for (int ix = 0; ix < node->num_children; ix++) {
clone_base[ix] = node_base[ix] + delta;
}
return clone;
}
typedef struct {
const char* name;
uint16_t name_sz;
} find_child_struct_t;
#define NAME_NODE_COMPARE(nameobject, relptr, context) \
(name_compare( \
((const find_child_struct_t*)nameobject)->name, \
((const find_child_struct_t*)nameobject)->name_sz, \
get_child_from_diff((node_t*)context, *((ptrdiff_t*)relptr))))
static CONTEXTUAL_COMPARATOR_BUILDER(name_node_cmp, NAME_NODE_COMPARE);
node_add_child_result_t add_child(node_t* node, const node_t* child) {
// verify parent node.
if (!node->in_use ||
!(node->type == TYPE_IMPLICIT || node->type == TYPE_ROOT)) {
return ADD_CHILD_ILLEGAL_PARENT;
}
// do we have enough space? if not, we need to request a new space.
if (node->num_children + 1 > max_children(node)) {
return NEEDS_LARGER_NODE;
}
// verify child node.
if (!child->in_use) {
return ADD_CHILD_ILLEGAL_CHILD;
}
ptrdiff_t* base = get_child_ptr_base(node);
find_child_struct_t needle = {child->name, child->name_sz};
size_t offset = bsearch_between(
&needle,
get_child_ptr_base(node),
node->num_children,
sizeof(ptrdiff_t),
name_node_cmp,
node);
if (offset < node->num_children) {
// displacing something. ensure we don't have a conflict.
ptrdiff_t diff = base[offset];
node_t* old_child = get_child_from_diff(node, diff);
if (name_compare(child->name, child->name_sz, old_child) == 0) {
return CONFLICTING_ENTRY_PRESENT;
}
}
if (offset < node->num_children) {
// move the remaining entries down to make space. let's say we have 3
// elements. if we're supposed to insert at offset 1, then we need to move
// elements at offset 1 & 2 down.
memmove(
&base[offset + 1],
&base[offset],
sizeof(ptrdiff_t) * (node->num_children - offset));
}
// bump the number of children we have.
node->num_children++;
// write the entry
set_child_by_index(node, offset, child);
return ADD_CHILD_OK;
}
node_remove_child_result_t remove_child(node_t* node, uint32_t child_num) {
// verify parent node.
if (!node->in_use ||
!(node->type == TYPE_IMPLICIT || node->type == TYPE_ROOT)) {
return REMOVE_CHILD_ILLEGAL_PARENT;
}
// do we have enough space? if not, we need to request a new space.
if (child_num >= node->num_children) {
return REMOVE_CHILD_ILLEGAL_INDEX;
}
if (child_num < node->num_children - 1) {
// we need to compact the existing entries.
ptrdiff_t* base = get_child_ptr_base(node);
memmove(
&base[child_num],
&base[child_num + 1],
sizeof(ptrdiff_t) * (node->num_children - 1 - child_num));
}
// decrement the number of children we have.
node->num_children--;
return REMOVE_CHILD_OK;
}
node_enlarge_child_capacity_result_t enlarge_child_capacity(
node_t* node,
uint32_t child_num) {
node_enlarge_child_capacity_result_t result;
// strictly these shouldn't be necessary, because we only read these fields
// if we succeed, and that code path does set the fields. however, gcc
// doesn'tknow that and throws a fit.
result.old_child = NULL;
result.new_child = NULL;
// verify parent node.
if (!node->in_use) {
result.code = ENLARGE_ILLEGAL_PARENT;
return result;
}
// verify child index.
if (child_num >= node->num_children) {
result.code = ENLARGE_ILLEGAL_INDEX;
return result;
}
node_t* old_child = get_child_by_index(node, child_num);
node_t* new_child = clone_node(old_child);
if (new_child == NULL) {
result.code = ENLARGE_OOM;
return result;
}
// write the entry
set_child_by_index(node, child_num, new_child);
result.code = ENLARGE_OK;
result.old_child = old_child;
result.new_child = new_child;
return result;
}
node_search_children_result_t
search_children(const node_t* node, const char* name, const uint16_t name_sz) {
const ptrdiff_t* base = get_child_ptr_base_const(node);
find_child_struct_t needle = {name, name_sz};
size_t offset = bsearch_between(
&needle,
get_child_ptr_base_const(node),
node->num_children,
sizeof(ptrdiff_t),
name_node_cmp,
node);
if (offset >= node->num_children) {
return COMPOUND_LITERAL(node_search_children_result_t){NULL, UINT32_MAX};
}
// ensure the spot we found is an exact match.
ptrdiff_t diff = base[offset];
node_t* child = get_child_from_diff(node, diff);
if (name_compare(name, name_sz, child) == 0) {
// huzzah, we found it.
return COMPOUND_LITERAL(node_search_children_result_t){child,
(uint32_t)offset};
}
return COMPOUND_LITERAL(node_search_children_result_t){NULL, UINT32_MAX};
}
uint32_t get_child_index(
const node_t* const parent,
const node_t* const child) {
const ptrdiff_t* base = get_child_ptr_base_const(parent);
for (uint32_t child_num = 0; child_num < parent->num_children; child_num++) {
if (((intptr_t)parent) + base[child_num] == (intptr_t)child) {
return child_num;
}
}
return UINT32_MAX;
}

View File

@ -1,256 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// node.h: declarations for representing a node in a tree. for internal use
// only.
//
// no-check-code
#ifndef __FASTMANIFEST_NODE_H__
#define __FASTMANIFEST_NODE_H__
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "internal_result.h"
#include "lib/clib/portability/portability.h"
#define STORAGE_INCREMENT_PERCENTAGE 20
#define MIN_STORAGE_INCREMENT 10
#define MAX_STORAGE_INCREMENT 100
#define CHECKSUM_BYTES 21
#define SHA1_BYTES 20
#define PTR_ALIGN_MASK (~((ptrdiff_t)(sizeof(ptrdiff_t) - 1)))
#define TYPE_UNDEFINED 0
#define TYPE_IMPLICIT 1
#define TYPE_LEAF 2
#define TYPE_ROOT 3
// the start of each of these nodes must be 32-bit aligned.
typedef struct _node_t {
uint32_t block_sz;
uint32_t num_children;
uint16_t name_sz;
uint8_t checksum[CHECKSUM_BYTES];
uint8_t checksum_sz;
uint8_t flags;
bool in_use : 1;
unsigned int type : 2;
bool checksum_valid : 1;
unsigned int unused : 4;
char name[0];
// padding to the nearest ptrdiff_t boundary.
// then a series of ptrdiff_t-sized pointers to the children.
} node_t;
/**
* Define some macros for users to test if their values are within the
* restrictions of our node implementation.
*/
#define VERIFY_BLOCK_SZ(block_sz) ((uintmax_t)(block_sz) < UINT32_MAX)
#define VERIFY_NAME_SZ(name_sz) ((uintmax_t)(name_sz) < UINT16_MAX)
#define VERIFY_CHILD_NUM(child_num) ((uintmax_t)(child_num) < UINT32_MAX)
#define block_sz_t uint32_t
#define name_sz_t uint16_t
#define child_num_t uint32_t
/**
* Returns <0 if (`name`, `name_sz`) is lexicographically less than the name in
* node.
*
* Returns =0 if (`name`, `name_sz`) is lexicographically equal to the name in
* node.
*
* Returns >0 if (`name`, `name_sz`) is lexicographically greater than the name
* in node.
*/
static inline int
name_compare(const char* name, uint16_t name_sz, const node_t* node) {
uint32_t min_sz = (name_sz < node->name_sz) ? name_sz : node->name_sz;
int sz_compare = name_sz - node->name_sz;
int cmp = strncmp(name, node->name, min_sz);
if (cmp) {
return cmp;
} else {
return sz_compare;
}
}
/**
* Returns the offset of the first child pointer, given a node with name size
* `name_sz`.
*/
static inline ptrdiff_t get_child_ptr_base_offset(uint16_t name_sz) {
intptr_t ptr = offsetof(node_t, name);
ptr += name_sz;
// this aligns to ptrdiff_t, since some platforms do not support unaligned
// loads.
ptr = (ptr + sizeof(intptr_t) - 1) & PTR_ALIGN_MASK;
return (ptrdiff_t)ptr;
}
/**
* Returns the address of the first child pointer. Since a child pointer is an
* ptrdiff_t, the type returned is an ptrdiff_t. Note that this is *not* the
* value of the first child pointer.
*/
static inline ptrdiff_t* get_child_ptr_base(node_t* node) {
assert(node->in_use);
intptr_t address = (intptr_t)node;
ptrdiff_t offset = get_child_ptr_base_offset(node->name_sz);
return (ptrdiff_t*)(address + offset);
}
/**
* Const version of get_child_ptr_base
*/
static inline const ptrdiff_t* get_child_ptr_base_const(const node_t* node) {
return get_child_ptr_base((node_t*)node);
}
static inline uint32_t max_children(const node_t* node) {
ptrdiff_t bytes_avail = node->block_sz;
bytes_avail -= ((intptr_t)get_child_ptr_base_const(node)) - ((intptr_t)node);
// if it requires > 32b, then we're kind of hosed.
if (!VERIFY_CHILD_NUM(bytes_avail)) {
abort();
}
return ((uint32_t)(bytes_avail / sizeof(intptr_t)));
}
static inline node_t* get_child_by_index(
const node_t* node,
uint32_t child_num) {
assert(node->in_use);
assert(node->type == TYPE_IMPLICIT || node->type == TYPE_ROOT);
assert(child_num < node->num_children);
intptr_t address = (intptr_t)get_child_ptr_base_const(node);
address += sizeof(ptrdiff_t) * child_num;
intptr_t base = (intptr_t)node;
ptrdiff_t offset = *((ptrdiff_t*)address);
base += offset;
return (node_t*)base;
}
static inline node_t* get_child_from_diff(const node_t* node, ptrdiff_t diff) {
assert(node->in_use);
assert(node->type == TYPE_IMPLICIT || node->type == TYPE_ROOT);
intptr_t base = (intptr_t)node;
base += diff;
return (node_t*)base;
}
static inline void
set_child_by_index(node_t* node, size_t child_num, const node_t* child) {
assert(node->in_use);
assert(node->type == TYPE_IMPLICIT || node->type == TYPE_ROOT);
assert(child_num < node->num_children);
assert(child->in_use);
ptrdiff_t* base = get_child_ptr_base(node);
ptrdiff_t delta = ((intptr_t)child) - ((intptr_t)node);
base[child_num] = delta;
}
/**
* Allocate a node on the heap suitably sized for a given name and a given
* number of children. Initialize the node as unused, but copy the name to the
* node.
*/
extern node_t*
alloc_node(const char* name, uint16_t name_sz, uint32_t max_children);
/**
* Given a block of memory, attempt to place a node at the start of the block.
* The node will suitably sized for a given name and a given number of children.
* Initialize the node as unused, but copy the name to the node.
*
* Returns the address following the end of the node if the block is large
* enough to accommodate the node, or NULL if the block is too small.
*/
extern void* setup_node(
void* ptr,
size_t ptr_size_limit,
const char* name,
uint16_t name_sz,
uint32_t max_children);
/**
* Clone a node and increase the storage capacity by
* STORAGE_INCREMENT_PERCENTAGE, but by at least MIN_STORAGE_INCREMENT and no
* more than MAX_STORAGE_INCREMENT.
*/
extern node_t* clone_node(const node_t* node);
/**
* Adds a child to the node. A child with the same name must not already exist.
*
* The caller is responsible for going up the chain and updating metadata, such
* as the total number of leaf nodes in tree_t and marking the checksum bit
* dirty recursively up the tree.
*/
extern node_add_child_result_t add_child(node_t* node, const node_t* child);
/**
* Remove a child of a node, given a child index.
*
* The caller is responsible for going up the chain and updating metadata, such
* as the total number of leaf nodes in tree_t and marking the checksum bit
* dirty recursively up the tree.
*/
extern node_remove_child_result_t remove_child(
node_t* node,
uint32_t child_num);
/**
* Enlarge a child of a node, given a child index. By itself, this operation
* should not affect things like the total number of leaf nodes in the tree and
* the freshness of the checksums. However, it may affect total allocation.
*/
extern node_enlarge_child_capacity_result_t enlarge_child_capacity(
node_t* node,
uint32_t child_num);
/**
* Find the index of a child given a name. Returns true iff the child was
* found.
*
* If the child was found, return the index and the pointer to the child.
*/
extern node_search_children_result_t
search_children(const node_t* node, const char* name, const uint16_t name_sz);
/**
* Find the index of a child given a node. If the node is found, return its
* index. Otherwise return UINT32_MAX.
*/
extern uint32_t get_child_index(
const node_t* const parent,
const node_t* const child);
/**
* Convenience function just to find a child.
*/
static inline node_t*
get_child_by_name(const node_t* node, const char* name, uint16_t name_sz) {
node_search_children_result_t result = search_children(node, name, name_sz);
return result.child;
}
#endif /* #ifndef __FASTMANIFEST_NODE_H__ */

View File

@ -1,355 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// node_test.c: unit tests for the node.c
//
// no-check-code
#include "node.h"
#include "tests.h"
#define ALLOC_NODE_STR(name, max_children) \
alloc_node(name, strlen(name), max_children)
#define GET_CHILD_BY_NAME_STR(node, name) \
get_child_by_name(node, name, strlen(name))
/**
* Add a child and ensure that it can be found.
*/
void test_simple_parent_child() {
node_t* parent = ALLOC_NODE_STR("parent", 1);
node_t* child = ALLOC_NODE_STR("child", 0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
node_t* lookup_child = GET_CHILD_BY_NAME_STR(parent, "child");
ASSERT(lookup_child == child);
}
/**
* Ensure that our size calculations are reasonable accurate by allocating a
* bunch of differently sized parents and adding a child.
*/
void test_space() {
for (uint16_t name_sz = 1; name_sz <= 8; name_sz++) {
node_t* parent = alloc_node("abcdefgh", name_sz, 1);
node_t* child = ALLOC_NODE_STR("child", 0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
node_t* lookup_child = GET_CHILD_BY_NAME_STR(parent, "child");
ASSERT(lookup_child == child);
}
}
/**
* Try to add a child to a node that does not have enough space.
*/
void test_insufficient_space() {
node_t* parent = ALLOC_NODE_STR("parent", 1);
node_t* child1 = ALLOC_NODE_STR("child1", 0);
node_t* child2 = ALLOC_NODE_STR("child2", 0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child1->in_use = true;
child1->type = TYPE_LEAF;
child2->in_use = true;
child2->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child1);
ASSERT(result == ADD_CHILD_OK);
result = add_child(parent, child2);
ASSERT(result == NEEDS_LARGER_NODE);
node_t* lookup_child = GET_CHILD_BY_NAME_STR(parent, "child1");
ASSERT(lookup_child == child1);
lookup_child = GET_CHILD_BY_NAME_STR(parent, "child2");
ASSERT(lookup_child == NULL);
}
/**
* Call `add_child` with a bunch of different arguments and verify the results
* are reasonable.
*/
typedef struct {
bool parent_in_use;
int parent_type;
bool child_in_use;
int child_type;
node_add_child_result_t expected_result;
} parent_child_test_cases_t;
void test_add_child_combinations() {
parent_child_test_cases_t cases[] = {
// parent or child not in use.
{false, TYPE_IMPLICIT, true, TYPE_LEAF, ADD_CHILD_ILLEGAL_PARENT},
{true, TYPE_IMPLICIT, false, TYPE_LEAF, ADD_CHILD_ILLEGAL_CHILD},
// parent type invalid.
{true, TYPE_LEAF, true, TYPE_LEAF, ADD_CHILD_ILLEGAL_PARENT},
// child type invalid.
{true, TYPE_IMPLICIT, false, TYPE_UNDEFINED, ADD_CHILD_ILLEGAL_CHILD},
// some good outcomes.
{true, TYPE_IMPLICIT, true, TYPE_LEAF, ADD_CHILD_OK},
{true, TYPE_IMPLICIT, true, TYPE_IMPLICIT, ADD_CHILD_OK},
};
for (int ix = 0; ix < sizeof(cases) / sizeof(parent_child_test_cases_t);
ix++) {
node_t* parent;
node_t* child;
parent = ALLOC_NODE_STR("parent", 1);
child = ALLOC_NODE_STR("child", 0);
parent->in_use = cases[ix].parent_in_use;
parent->type = cases[ix].parent_type;
child->in_use = cases[ix].child_in_use;
child->type = cases[ix].child_type;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == cases[ix].expected_result);
}
}
/**
* Insert children in lexicographical order. Ensure that we can find them.
*
* requirement: strlen(TEST_MANY_CHILDREN_NAME_STR) >=
* TEST_MANY_CHILDREN_CHILD_COUNT
*/
#define TEST_MANY_CHILDREN_NAME_STR "abcdefgh"
#define TEST_MANY_CHILDREN_COUNT 8
void test_many_children() {
node_t* parent = ALLOC_NODE_STR("parent", TEST_MANY_CHILDREN_COUNT);
node_t* children[TEST_MANY_CHILDREN_COUNT]; // this should be ordered as we
// expect to find them in the
// parent's list of children.
for (uint16_t name_sz = 1; name_sz <= TEST_MANY_CHILDREN_COUNT; name_sz++) {
node_t* child = alloc_node(TEST_MANY_CHILDREN_NAME_STR, name_sz, 0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
children[name_sz - 1] = child;
}
for (uint16_t name_sz = 1; name_sz <= TEST_MANY_CHILDREN_COUNT; name_sz++) {
node_t* result =
get_child_by_name(parent, TEST_MANY_CHILDREN_NAME_STR, name_sz);
ASSERT(result == children[name_sz - 1]);
}
}
/**
* Insert children in reverse lexicographical order. Ensure that we can find
* them.
*
* requirement: strlen(TEST_MANY_CHILDREN_NAME_STR) >=
* TEST_MANY_CHILDREN_CHILD_COUNT
*/
void test_many_children_reverse() {
node_t* parent = ALLOC_NODE_STR("parent", TEST_MANY_CHILDREN_COUNT);
node_t* children[TEST_MANY_CHILDREN_COUNT]; // this should be ordered as we
// expect to find them in the
// parent's list of children.
for (uint16_t name_sz = TEST_MANY_CHILDREN_COUNT; name_sz > 0; name_sz--) {
node_t* child = alloc_node(TEST_MANY_CHILDREN_NAME_STR, name_sz, 0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
children[name_sz - 1] = child;
}
for (uint16_t name_sz = 1; name_sz <= TEST_MANY_CHILDREN_COUNT; name_sz++) {
node_t* result =
get_child_by_name(parent, TEST_MANY_CHILDREN_NAME_STR, name_sz);
ASSERT(result == children[name_sz - 1]);
}
}
/**
* Create a node with many children. Clone the node. Ensure we can locate all
* of the children.
*
* requirement: strlen(TEST_CLONE_NAME_STR) >=
* TEST_CLONE_COUNT
*/
#define TEST_CLONE_NAME_STR "abcdefgh"
#define TEST_CLONE_COUNT 8
void test_clone() {
node_t* parent = ALLOC_NODE_STR("parent", TEST_CLONE_COUNT);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
memset(parent->checksum, 0x2e, SHA1_BYTES);
parent->checksum_valid = true;
parent->checksum_sz = SHA1_BYTES;
parent->flags = 0x3e;
node_t* children[TEST_CLONE_COUNT]; // this should be ordered as we
// expect to find them in the
// parent's list of children.
for (uint16_t name_sz = 1; name_sz <= TEST_CLONE_COUNT; name_sz++) {
node_t* child = alloc_node(TEST_CLONE_NAME_STR, name_sz, 0);
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
children[name_sz - 1] = child;
}
node_t* clone = clone_node(parent);
for (uint16_t name_sz = 1; name_sz <= TEST_CLONE_COUNT; name_sz++) {
node_t* result = get_child_by_name(clone, TEST_CLONE_NAME_STR, name_sz);
ASSERT(result == children[name_sz - 1]);
}
ASSERT(clone->checksum_sz == SHA1_BYTES);
for (uint8_t ix = 0; ix < SHA1_BYTES; ix++) {
ASSERT(clone->checksum[ix] == 0x2e);
}
ASSERT(clone->flags == 0x3e);
ASSERT(max_children(clone) > max_children(parent));
}
/**
* Create a node with many children. Remove them in a pseudorandom fashion.
* Ensure that the remaining children can be correctly found.
*
* requirement: strlen(TEST_REMOVE_CHILD_NAME_STR) >=
* TEST_REMOVE_CHILD_COUNT
*/
#define TEST_REMOVE_CHILD_NAME_STR "1234ffgg"
#define TEST_REMOVE_CHILD_COUNT 8
void test_remove_child() {
node_t* parent = ALLOC_NODE_STR("parent", TEST_REMOVE_CHILD_COUNT);
node_t* children[TEST_REMOVE_CHILD_COUNT]; // this should be ordered as we
// expect to find them in the
// parent's list of children.
bool valid[TEST_REMOVE_CHILD_COUNT];
for (uint16_t name_sz = 1; name_sz <= TEST_REMOVE_CHILD_COUNT; name_sz++) {
node_t* child = alloc_node(TEST_REMOVE_CHILD_NAME_STR, name_sz, 0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
children[name_sz - 1] = child;
valid[name_sz - 1] = true;
}
for (uint16_t ix = 0; ix < TEST_REMOVE_CHILD_COUNT; ix++) {
uint16_t victim_index = 0;
for (uint16_t jx = 0; jx < TEST_REMOVE_CHILD_COUNT + 1; jx++) {
do {
victim_index = (victim_index + 1) % TEST_REMOVE_CHILD_COUNT;
} while (valid[victim_index] == false);
}
// ok, we found our victim. remove it.
node_search_children_result_t search_result =
search_children(parent, TEST_REMOVE_CHILD_NAME_STR, victim_index + 1);
ASSERT(search_result.child == children[victim_index]);
valid[victim_index] = false;
ASSERT(remove_child(parent, search_result.child_num) == REMOVE_CHILD_OK);
// go through the items that should still be children, and make sure they're
// still reachable.
for (uint16_t name_sz = 1; name_sz <= TEST_REMOVE_CHILD_COUNT; name_sz++) {
node_t* child =
get_child_by_name(parent, TEST_REMOVE_CHILD_NAME_STR, name_sz);
if (valid[name_sz - 1]) {
ASSERT(child != NULL);
} else {
ASSERT(child == NULL);
}
}
}
}
/**
* Create a node and add many children. Enlarge one of the children.
*
* requirement: strlen(TEST_ENLARGE_CHILD_CAPACITY_NAME_STR) >=
* TEST_ENLARGE_CHILD_CAPACITY_COUNT
*/
#define TEST_ENLARGE_CHILD_CAPACITY_NAME_STR "abcdefgh"
#define TEST_ENLARGE_CHILD_CAPACITY_COUNT 8
void test_enlarge_child_capacity() {
node_t* parent = ALLOC_NODE_STR("parent", TEST_MANY_CHILDREN_COUNT);
node_t* children[TEST_MANY_CHILDREN_COUNT]; // this should be ordered as we
// expect to find them in the
// parent's list of children.
for (uint16_t name_sz = 1; name_sz <= TEST_MANY_CHILDREN_COUNT; name_sz++) {
node_t* child =
alloc_node(TEST_ENLARGE_CHILD_CAPACITY_NAME_STR, name_sz, 0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
children[name_sz - 1] = child;
}
node_enlarge_child_capacity_result_t enlarge_child_capacity_result =
enlarge_child_capacity(parent, 0);
ASSERT(enlarge_child_capacity_result.code == ENLARGE_OK);
ASSERT(enlarge_child_capacity_result.old_child == children[0]);
node_t* enlarged = get_child_by_index(parent, 0);
ASSERT(max_children(enlarged) > 0);
ASSERT(
name_compare(
enlarged->name,
enlarged->name_sz,
enlarge_child_capacity_result.old_child) == 0);
}
int main(int argc, char* argv[]) {
test_simple_parent_child();
test_space();
test_insufficient_space();
test_add_child_combinations();
test_many_children();
test_many_children_reverse();
test_clone();
test_remove_child();
test_enlarge_child_capacity();
return 0;
}

View File

@ -1,30 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// path_buffer.h: macros for managing a path buffer.
//
// no-check-code
#ifndef CFASTMANIFEST_PATH_BUFFER_H
#define CFASTMANIFEST_PATH_BUFFER_H
#include "lib/clib/buffer.h"
// a common usage pattern for this module is to store a path. the path can
// be of any length, theoretically, so we have to support expansion.
#define DEFAULT_PATH_BUFFER_SZ 16384
#define PATH_BUFFER_GROWTH_FACTOR 1.2
#define PATH_BUFFER_MINIMUM_GROWTH 65536
#define PATH_BUFFER_MAXIMUM_GROWTH (1024 * 1024)
#define PATH_APPEND(buffer, buffer_idx, buffer_sz, input, input_sz) \
buffer_append( \
buffer, \
buffer_idx, \
buffer_sz, \
input, \
input_sz, \
PATH_BUFFER_GROWTH_FACTOR, \
PATH_BUFFER_MINIMUM_GROWTH, \
PATH_BUFFER_MAXIMUM_GROWTH)
#endif // CFASTMANIFEST_PATH_BUFFER_H

View File

@ -1,102 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// result.h: return types for publicly accessible methods. this is
// indirectly exposed through tree.h.
//
// no-check-code
#ifndef __FASTMANIFEST_RESULT_H__
#define __FASTMANIFEST_RESULT_H__
typedef enum {
GET_PATH_OK,
GET_PATH_NOT_FOUND,
GET_PATH_WTF,
} get_path_code_t;
typedef struct _get_path_result_t {
get_path_code_t code;
const uint8_t* checksum;
const uint8_t checksum_sz;
const uint8_t flags;
} get_path_result_t;
typedef enum {
ADD_UPDATE_PATH_OK,
ADD_UPDATE_PATH_OOM,
ADD_UPDATE_PATH_CONFLICT,
ADD_UPDATE_PATH_WTF,
} add_update_path_result_t;
typedef enum {
SET_METADATA_OK,
} set_metadata_result_t;
typedef enum {
REMOVE_PATH_OK,
REMOVE_PATH_WTF,
REMOVE_PATH_NOT_FOUND,
} remove_path_result_t;
typedef enum {
READ_FROM_FILE_OK,
READ_FROM_FILE_OOM,
// consult the err field in read_from_file_result_t for more details.
READ_FROM_FILE_NOT_READABLE,
// should nuke this file. it doesn't parse logically.
READ_FROM_FILE_WTF,
// should nuke this file. it might parse logically, but not on this host.
READ_FROM_FILE_NOT_USABLE,
} read_from_file_code_t;
typedef struct _read_from_file_result_t {
read_from_file_code_t code;
int err;
struct _tree_t* tree;
} read_from_file_result_t;
typedef enum {
WRITE_TO_FILE_OK,
WRITE_TO_FILE_OOM,
WRITE_TO_FILE_WTF,
} write_to_file_result_t;
typedef enum {
CONVERT_FROM_FLAT_OK,
CONVERT_FROM_FLAT_OOM,
CONVERT_FROM_FLAT_WTF,
} convert_from_flat_code_t;
typedef struct _convert_from_flat_result_t {
convert_from_flat_code_t code;
struct _tree_t* tree;
} convert_from_flat_result_t;
typedef enum {
CONVERT_TO_FLAT_OK,
CONVERT_TO_FLAT_OOM,
CONVERT_TO_FLAT_WTF,
} convert_to_flat_code_t;
typedef struct _convert_to_flat_result_t {
convert_to_flat_code_t code;
char* flat_manifest;
size_t flat_manifest_sz;
} convert_to_flat_result_t;
typedef enum {
DIFF_OK,
DIFF_OOM,
DIFF_WTF,
} diff_result_t;
typedef struct _iterator_result_t {
bool valid;
const char* path;
size_t path_sz;
const uint8_t* checksum;
uint8_t checksum_sz;
uint8_t flags;
} iterator_result_t;
#endif /* #ifndef __FASTMANIFEST_RESULT_H__ */

View File

@ -1,79 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tests.c: convenience functions for unit tests.
//
// no-check-code
#include "tests.h"
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "node.h"
#include "tree_path.h"
typedef struct _get_path_unfiltered_metadata_t {
node_t* node;
} get_path_unfiltered_metadata_t;
static find_path_callback_result_t get_path_unfiltered_callback(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* name,
const size_t name_sz,
tree_state_changes_t* changes,
void* context) {
get_path_unfiltered_metadata_t* metadata =
(get_path_unfiltered_metadata_t*)context;
// does the path already exist?
node_t* child = get_child_by_name(root, name, name_sz);
if (child == NULL) {
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_NOT_FOUND,
root};
}
metadata->node = child;
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_OK, root};
}
get_path_unfiltered_result_t
get_path_unfiltered(tree_t* tree, const char* path, const size_t path_sz) {
tree_state_changes_t changes = {0};
get_path_unfiltered_metadata_t metadata;
node_t* shadow_root = tree->shadow_root;
node_t* real_root = get_child_by_index(shadow_root, 0);
if (real_root == NULL) {
return COMPOUND_LITERAL(get_path_unfiltered_result_t){GET_PATH_WTF, NULL};
}
find_path_result_t result = find_path(
tree,
shadow_root,
real_root,
path,
path_sz,
BASIC_WALK_ALLOW_IMPLICIT_NODES,
&changes,
get_path_unfiltered_callback,
&metadata);
assert(changes.size_change == 0);
assert(changes.num_leaf_node_change == 0);
assert(changes.non_arena_allocations == false);
switch (result) {
case FIND_PATH_OK:
return COMPOUND_LITERAL(get_path_unfiltered_result_t){GET_PATH_OK,
metadata.node};
case FIND_PATH_NOT_FOUND:
case FIND_PATH_CONFLICT:
// `FIND_PATH_CONFLICT` is returned if there is a leaf node where we
// expect a directory node. this is treated the same as a NOT_FOUND.
return COMPOUND_LITERAL(get_path_unfiltered_result_t){GET_PATH_NOT_FOUND,
NULL};
default:
return COMPOUND_LITERAL(get_path_unfiltered_result_t){GET_PATH_WTF, NULL};
}
}

View File

@ -1,74 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tests.h: convenience functions for unit tests.
//
// no-check-code
#ifndef __TESTLIB_TESTS_H__
#define __TESTLIB_TESTS_H__
#include <stdio.h>
#include <stdlib.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "lib/clib/portability/portability.h"
#include "node.h"
#include "result.h"
#define ASSERT(cond) \
if (!(cond)) { \
printf("failed on line %d\n", __LINE__); \
exit(37); \
}
#define STRPLUSLEN(__str__) __str__, strlen(__str__)
typedef struct _get_path_unfiltered_result_t {
get_path_code_t code;
const node_t* node;
} get_path_unfiltered_result_t;
extern get_path_unfiltered_result_t
get_path_unfiltered(tree_t* const tree, const char* path, const size_t path_sz);
/**
* Computes a hash based on a value. It's not a great checksum, but it's enough
* for basic tests.
*/
static inline uint8_t* int2sha1hash(uint32_t value, uint8_t* sha1hash) {
for (size_t ix = 0; ix < SHA1_BYTES; ix += sizeof(value), value++) {
size_t left = SHA1_BYTES - ix;
size_t bytes_to_copy = left > sizeof(value) ? sizeof(value) : left;
memcpy(&sha1hash[ix], &value, bytes_to_copy);
}
return sha1hash;
}
typedef struct {
char* path;
size_t path_sz;
uint32_t checksum_seed;
uint8_t flags;
} add_to_tree_t;
/**
* Adds a bunch of paths to a tree.
*/
static inline void
add_to_tree(tree_t* tree, add_to_tree_t* requests, size_t request_sz) {
uint8_t buffer[CHECKSUM_BYTES];
for (size_t ix = 0; ix < request_sz; ix++) {
add_to_tree_t* request = &requests[ix];
add_update_path_result_t result = add_or_update_path(
tree,
request->path,
request->path_sz,
int2sha1hash(request->checksum_seed, buffer),
SHA1_BYTES,
request->flags);
ASSERT(result == ADD_UPDATE_PATH_OK);
}
}
#endif /* #ifndef __TESTLIB_TESTS_H__ */

View File

@ -1,412 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree.c: core methods for tree creation and manipulation. to keep this file
// a reasonable length, some of the more complicated methods have
// been split off into their own .c files (tree_arena.c, tree_convert.c,
// tree_copy.c, checksum.c).
//
// no-check-code
#include <stdlib.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "tree_arena.h"
#include "tree_path.h"
bool valid_path(const char* path, const size_t path_sz) {
if (path_sz > 0 && (path[0] == '/' || path[path_sz] == '/')) {
return false;
}
size_t last_slash = (size_t)-1;
for (size_t off = 0; off < path_sz; off++) {
if (path[off] == '/') {
if (last_slash == off - 1) {
return false;
}
last_slash = off;
}
}
return true;
}
/**
* Given a path, return the size of the string that would yield just the
* directory name. The path must be valid according to `valid_path`, but
* otherwise the semantics are like os.path.dirname on python.
*
* dirname('abc/def/ghi') => 'abc/def'
* dirname('abc/def') => 'abc'
* dirname('abc') => ''
*/
/*static size_t dirname(const char* path, size_t path_sz) {
for (size_t off = path_sz; off > 0; off --) {
if (path[off - 1] == '/') {
if (off == 1) {
return 1;
} else {
return off - 1;
}
}
}
return 0;
}
*/
tree_t* alloc_tree() {
// do all the memory allocations.
node_t* shadow_root = alloc_node("/", 1, 1);
node_t* real_root = alloc_node("/", 1, 0);
tree_t* tree = (tree_t*)calloc(1, sizeof(tree_t));
if (shadow_root == NULL || real_root == NULL || tree == NULL) {
goto fail;
}
shadow_root->type = TYPE_ROOT;
real_root->type = TYPE_ROOT;
node_add_child_result_t add_result = add_child(shadow_root, real_root);
if (add_result != ADD_CHILD_OK) {
goto fail;
}
tree->shadow_root = shadow_root;
tree->consumed_memory = 0;
tree->consumed_memory += real_root->block_sz;
tree->arena = NULL;
tree->arena_free_start = NULL;
tree->arena_sz = 0;
tree->compacted = false;
return tree;
fail:
free(shadow_root);
free(real_root);
free(tree);
return NULL;
}
static void destroy_tree_helper(tree_t* tree, node_t* node) {
for (int ix = 0; ix < node->num_children; ix++) {
destroy_tree_helper(tree, get_child_by_index(node, ix));
}
if (!in_arena(tree, node)) {
free(node);
}
}
void destroy_tree(tree_t* tree) {
if (tree == NULL) {
return;
}
if (tree->compacted == false) {
destroy_tree_helper(tree, tree->shadow_root);
} else {
free(tree->shadow_root);
}
free(tree->arena);
free(tree);
}
typedef struct _get_path_metadata_t {
const node_t* node;
} get_path_metadata_t;
find_path_callback_result_t get_path_callback(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* name,
const size_t name_sz,
tree_state_changes_t* changes,
void* context) {
get_path_metadata_t* metadata = (get_path_metadata_t*)context;
// does the path already exist?
node_t* child = get_child_by_name(root, name, name_sz);
if (child == NULL || child->type != TYPE_LEAF) {
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_NOT_FOUND,
root};
}
metadata->node = child;
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_OK, root};
}
get_path_result_t
get_path(tree_t* tree, const char* path, const size_t path_sz) {
tree_state_changes_t changes = {0};
get_path_metadata_t metadata;
node_t* shadow_root = tree->shadow_root;
node_t* real_root = get_child_by_index(shadow_root, 0);
if (real_root == NULL) {
return COMPOUND_LITERAL(get_path_result_t){GET_PATH_WTF, NULL};
}
find_path_result_t result = find_path(
tree,
shadow_root,
real_root,
path,
path_sz,
BASIC_WALK,
&changes,
get_path_callback,
&metadata);
assert(changes.size_change == 0);
assert(changes.num_leaf_node_change == 0);
assert(changes.non_arena_allocations == false);
switch (result) {
case FIND_PATH_OK:
return COMPOUND_LITERAL(get_path_result_t){GET_PATH_OK,
metadata.node->checksum,
metadata.node->checksum_sz,
metadata.node->flags};
case FIND_PATH_NOT_FOUND:
case FIND_PATH_CONFLICT:
// `FIND_PATH_CONFLICT` is returned if there is a leaf node where we
// expect a directory node. this is treated the same as a NOT_FOUND.
return COMPOUND_LITERAL(get_path_result_t){GET_PATH_NOT_FOUND, NULL};
default:
return COMPOUND_LITERAL(get_path_result_t){GET_PATH_WTF, NULL};
}
}
typedef struct _add_or_update_path_metadata_t {
const uint8_t* checksum;
const uint8_t checksum_sz;
const uint8_t flags;
} add_or_update_path_metadata_t;
find_path_callback_result_t add_or_update_path_callback(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* name,
const size_t name_sz,
tree_state_changes_t* changes,
void* context) {
add_or_update_path_metadata_t* metadata =
(add_or_update_path_metadata_t*)context;
// does the path already exist?
node_t* child = get_child_by_name(root, name, name_sz);
if (child == NULL) {
tree_add_child_result_t tree_add_child_result = tree_add_child(
tree,
root_parent,
root,
name,
name_sz,
0, // leaf nodes don't have children.
changes);
switch (tree_add_child_result.code) {
case TREE_ADD_CHILD_OOM:
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_OOM,
NULL};
case TREE_ADD_CHILD_WTF:
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_WTF,
NULL};
case TREE_ADD_CHILD_OK:
break;
}
root = tree_add_child_result.newroot;
child = tree_add_child_result.newchild;
// it's a leaf node.
child->type = TYPE_LEAF;
// update the accounting.
changes->num_leaf_node_change++;
} else {
if (child->type == TYPE_IMPLICIT) {
// was previously a directory
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_CONFLICT,
NULL};
}
}
// update the node.
if (metadata->checksum_sz > CHECKSUM_BYTES) {
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_WTF, NULL};
}
memcpy(child->checksum, metadata->checksum, metadata->checksum_sz);
child->checksum_sz = metadata->checksum_sz;
child->checksum_valid = true;
child->flags = metadata->flags;
changes->checksum_dirty = true;
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_OK, root};
}
add_update_path_result_t add_or_update_path(
tree_t* tree,
const char* path,
const size_t path_sz,
const uint8_t* checksum,
const uint8_t checksum_sz,
const uint8_t flags) {
tree_state_changes_t changes = {0};
add_or_update_path_metadata_t metadata = {
checksum,
checksum_sz,
flags,
};
node_t* shadow_root = tree->shadow_root;
node_t* real_root = get_child_by_index(shadow_root, 0);
if (real_root == NULL) {
return ADD_UPDATE_PATH_WTF;
}
find_path_result_t result = find_path(
tree,
shadow_root,
real_root,
path,
path_sz,
CREATE_IF_MISSING,
&changes,
add_or_update_path_callback,
&metadata);
// apply the changes back to the tree struct
tree->consumed_memory += changes.size_change;
tree->num_leaf_nodes += changes.num_leaf_node_change;
if (changes.non_arena_allocations) {
tree->compacted = false;
}
switch (result) {
case FIND_PATH_OK:
return ADD_UPDATE_PATH_OK;
case FIND_PATH_OOM:
return ADD_UPDATE_PATH_OOM;
case FIND_PATH_CONFLICT:
return ADD_UPDATE_PATH_CONFLICT;
default:
return ADD_UPDATE_PATH_WTF;
}
}
find_path_callback_result_t remove_path_callback(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* name,
const size_t name_sz,
tree_state_changes_t* changes,
void* context) {
// does the path already exist?
node_search_children_result_t search_result =
search_children(root, name, name_sz);
if (search_result.child == NULL) {
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_NOT_FOUND,
NULL};
}
// record the metadata changes.
changes->checksum_dirty = true;
changes->num_leaf_node_change--;
changes->size_change -= search_result.child->block_sz;
node_remove_child_result_t remove_result =
remove_child(root, search_result.child_num);
if (remove_result == REMOVE_CHILD_OK) {
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_OK, root};
} else {
return COMPOUND_LITERAL(find_path_callback_result_t){FIND_PATH_WTF, root};
}
}
remove_path_result_t
remove_path(tree_t* tree, const char* path, const size_t path_sz) {
tree_state_changes_t changes = {0};
node_t* shadow_root = tree->shadow_root;
node_t* real_root = get_child_by_index(shadow_root, 0);
if (real_root == NULL) {
return REMOVE_PATH_WTF;
}
find_path_result_t result = find_path(
tree,
shadow_root,
real_root,
path,
path_sz,
REMOVE_EMPTY_IMPLICIT_NODES,
&changes,
remove_path_callback,
NULL);
// apply the changes back to the tree struct
tree->consumed_memory += changes.size_change;
tree->num_leaf_nodes += changes.num_leaf_node_change;
if (changes.non_arena_allocations) {
tree->compacted = false;
}
switch (result) {
case FIND_PATH_OK:
return REMOVE_PATH_OK;
case FIND_PATH_NOT_FOUND:
return REMOVE_PATH_NOT_FOUND;
default:
return REMOVE_PATH_WTF;
}
}
bool contains_path(tree_t* tree, const char* path, const size_t path_sz) {
tree_state_changes_t changes = {0};
get_path_metadata_t metadata;
node_t* shadow_root = tree->shadow_root;
node_t* real_root = get_child_by_index(shadow_root, 0);
if (real_root == NULL) {
return false;
}
find_path_result_t result = find_path(
(tree_t*)tree,
shadow_root,
real_root,
path,
path_sz,
BASIC_WALK,
&changes,
get_path_callback,
&metadata);
assert(changes.size_change == 0);
assert(changes.num_leaf_node_change == 0);
assert(changes.non_arena_allocations == false);
switch (result) {
case FIND_PATH_OK:
return true;
default:
return false;
}
}

View File

@ -1,147 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree.h: publicly accessible functions for tree manipulation and
// conversions. this should be the only header file directly exposed
// to users.
//
// no-check-code
#ifndef __FASTMANIFEST_TREE_H__
#define __FASTMANIFEST_TREE_H__
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include "lib/clib/portability/portability.h"
#include "result.h"
#if 0 // FIXME: (ttung) probably remove this
typedef enum {
ARENA_MODE, /* all allocations should come from the
* arena. this is to produce a
* compact and relocatable tree. */
STANDARD_MODE, /* all allocations should come from the
* standard system allocator, i.e.,
* malloc. */
} allocation_mode_t;
#endif /* #if 0 */
typedef struct _tree_t {
// these fields are preserved during serialization.
size_t consumed_memory;
uint32_t num_leaf_nodes;
// these fields are not preserved during serialization.
struct _node_t* shadow_root;
/* this is a literal pointer. */
void* arena;
/* this is also a literal pointer. */
void* arena_free_start;
/* this is also a literal pointer. */
size_t arena_sz;
bool compacted;
#if 0 // FIXME: (ttung) probably remove this
allocation_mode_t mode;
#endif /* #if 0 */
} tree_t;
typedef struct _iterator_t iterator_t;
/**
* Returns true iff the path is something digestible by this tree library. The
* rules are:
*
* 1) The path must be of nonzero length.
* 2) The path must not start nor end with the path separator '/'.
* 3) The path must not have consecutive path separators.
*/
extern bool valid_path(const char* path, const size_t path_sz);
extern tree_t* alloc_tree(void);
extern void destroy_tree(tree_t* tree);
extern tree_t* copy_tree(const tree_t* src);
extern tree_t* filter_copy(
const tree_t* src,
bool (*filter)(char* path, size_t path_sz, void* callback_context),
void* callback_context);
extern get_path_result_t
get_path(tree_t* const tree, const char* path, const size_t path_sz);
extern add_update_path_result_t add_or_update_path(
tree_t* const tree,
const char* path,
const size_t path_sz,
const uint8_t* checksum,
const uint8_t checksum_sz,
const uint8_t flags);
extern remove_path_result_t
remove_path(tree_t* const tree, const char* path, const size_t path_sz);
extern bool contains_path(
// we ought to be able to do this as a const, but we can't propagate
// const-ness through a method call. so unless we dupe the code to create
// a const-version of find_path, we cannot enforce this programmatically.
/* const */ tree_t* tree,
const char* path,
const size_t path_sz);
extern read_from_file_result_t read_from_file(char* fname, size_t fname_sz);
extern write_to_file_result_t
write_to_file(tree_t* tree, char* fname, size_t fname_sz);
extern convert_from_flat_result_t convert_from_flat(
char* manifest,
size_t manifest_sz);
extern convert_to_flat_result_t convert_to_flat(tree_t* tree);
/**
* Calculate the difference between two trees, and call a given function with
* information about the nodes.
*
* If `include_all` is true, then the callback is called with all the nodes,
* regardless of whether a difference exists or not.
*
* If `include_all` is false, then the callback is only called on the nodes
* where there is a difference.
*
* To maintain compatibility with flat manifests, the nodes are traversed in
* lexicographical order. If the caller wishes to maintain a reference to
* the path beyond the scope of the immediate callback, it must save a copy
* of the path. It is *not* guaranteed to be valid once the callback
* function terminates.
*/
extern diff_result_t diff_trees(
tree_t* const left,
tree_t* const right,
bool include_all,
void (*callback)(
const char* path,
const size_t path_sz,
const bool left_present,
const uint8_t* left_checksum,
const uint8_t left_checksum_sz,
const uint8_t left_flags,
const bool right_present,
const uint8_t* right_checksum,
const uint8_t right_checksum_sz,
const uint8_t right_flags,
void* context),
void* context);
extern iterator_t* create_iterator(const tree_t* tree, bool construct_paths);
extern iterator_result_t iterator_next(iterator_t* iterator);
extern void destroy_iterator(iterator_t* iterator);
#endif /* #ifndef __FASTMANIFEST_TREE_H__ */

View File

@ -1,135 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_arena.c: methods to create a tree with a fixed memory arena and to
// allocate nodes from the fixed memory arena.
//
// no-check-code
#include <stdlib.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "node.h"
#include "tree_arena.h"
#define ARENA_INCREMENT_PERCENTAGE 20
#define ARENA_MIN_STORAGE_INCREMENT (1024 * 1024)
#define ARENA_MAX_STORAGE_INCREMENT (16 * 1024 * 1024)
static inline size_t calculate_arena_free(const tree_t* tree) {
intptr_t arena_start = (intptr_t)tree->arena;
intptr_t arena_free_start = (intptr_t)tree->arena_free_start;
intptr_t arena_end = arena_start + tree->arena_sz;
size_t arena_free = arena_end - arena_free_start;
return arena_free;
}
arena_alloc_node_result_t arena_alloc_node_helper(
arena_policy_t policy,
tree_t* tree,
const char* name,
size_t name_sz,
size_t max_children) {
// since name_sz and max_chlidren are going to be downcasted, we should verify
// that they're not too large for the types in node.h
if (!VERIFY_NAME_SZ(name_sz) || !VERIFY_CHILD_NUM(max_children)) {
return COMPOUND_LITERAL(arena_alloc_node_result_t){
ARENA_ALLOC_EXCEEDED_LIMITS, NULL};
}
do {
size_t arena_free = calculate_arena_free(tree);
node_t* candidate = (node_t*)tree->arena_free_start;
void* next = setup_node(
tree->arena_free_start,
arena_free,
name,
(name_sz_t)name_sz,
(child_num_t)max_children);
if (next == NULL) {
if (policy == ARENA_POLICY_FAIL) {
return COMPOUND_LITERAL(arena_alloc_node_result_t){ARENA_ALLOC_OOM,
NULL};
} else {
size_t new_arena_sz =
(tree->arena_sz * (100 + ARENA_INCREMENT_PERCENTAGE)) / 100;
// TODO: optimization opportunity!
// we can calculate how much free space we need and set that as another
// minimum. in the unlikely scenario we need a huge node, just setting
// the lower bound on ARENA_MIN_STORAGE_INCREMENT may require multiple
// rounds of realloc.
if (new_arena_sz - tree->arena_sz < ARENA_MIN_STORAGE_INCREMENT) {
new_arena_sz = tree->arena_sz + ARENA_MIN_STORAGE_INCREMENT;
}
if (new_arena_sz - tree->arena_sz > ARENA_MAX_STORAGE_INCREMENT) {
new_arena_sz = tree->arena_sz + ARENA_MAX_STORAGE_INCREMENT;
}
// resize the arena so it's bigger.
void* new_arena = realloc(tree->arena, new_arena_sz);
if (new_arena == NULL) {
return COMPOUND_LITERAL(arena_alloc_node_result_t){ARENA_ALLOC_OOM,
NULL};
}
// success! update the pointers.
if (new_arena != tree->arena) {
intptr_t arena_start = (intptr_t)tree->arena;
intptr_t arena_free_start = (intptr_t)tree->arena_free_start;
intptr_t new_arena_start = (intptr_t)new_arena;
// if the shadow root is inside the arena, we need to relocate it.
if (in_arena(tree, tree->shadow_root)) {
intptr_t shadow_root = (intptr_t)tree->shadow_root;
ptrdiff_t shadow_root_offset = shadow_root - arena_start;
tree->shadow_root = (node_t*)(new_arena_start + shadow_root_offset);
}
intptr_t new_arena_free_start = new_arena_start;
new_arena_free_start += (arena_free_start - arena_start);
tree->arena_free_start = (void*)new_arena_free_start;
tree->arena = new_arena;
}
tree->arena_sz = new_arena_sz;
}
} else {
tree->arena_free_start = next;
tree->consumed_memory += candidate->block_sz;
return COMPOUND_LITERAL(arena_alloc_node_result_t){ARENA_ALLOC_OK,
candidate};
}
} while (true);
}
tree_t* alloc_tree_with_arena(size_t arena_sz) {
void* arena = malloc(arena_sz);
tree_t* tree = (tree_t*)calloc(1, sizeof(tree_t));
node_t* shadow_root = alloc_node("/", 1, 1);
if (arena == NULL || tree == NULL || shadow_root == NULL) {
free(arena);
free(tree);
free(shadow_root);
return NULL;
}
#if 0 // FIXME: (ttung) probably remove this
tree->mode = STANDARD_MODE;
#endif /* #if 0 */
tree->arena = tree->arena_free_start = arena;
tree->arena_sz = arena_sz;
tree->compacted = true;
tree->shadow_root = NULL;
tree->consumed_memory = 0;
tree->num_leaf_nodes = 0;
shadow_root->type = TYPE_ROOT;
tree->shadow_root = shadow_root;
return tree;
}

View File

@ -1,80 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_arena.h: declarations for methods to create a tree with a fixed
// memory arena and to allocate nodes from the fixed memory
// arena. for internal use only.
//
// no-check-code
#ifndef __FASTMANIFEST_TREE_ARENA_H__
#define __FASTMANIFEST_TREE_ARENA_H__
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "node.h"
typedef enum {
ARENA_POLICY_FAIL, // fail immediately when there is
// insufficient space
ARENA_POLICY_REALLOC, // attempt to realloc until realloc
// fails.
} arena_policy_t;
typedef enum {
ARENA_ALLOC_OK,
ARENA_ALLOC_OOM,
ARENA_ALLOC_EXCEEDED_LIMITS,
} arena_alloc_node_code_t;
typedef struct _arena_alloc_node_result_t {
arena_alloc_node_code_t code;
node_t* node;
} arena_alloc_node_result_t;
static inline bool in_arena(const tree_t* tree, void* _ptr) {
intptr_t arena_start = (intptr_t)tree->arena;
intptr_t arena_end = arena_start + tree->arena_sz - 1;
intptr_t ptr = (intptr_t)_ptr;
if (ptr >= arena_start && ptr < arena_end) {
return true;
}
return false;
}
/**
* Allocate space for a node within a heap-allocated arena. If the arena does
* not have enough space for the node, consult the policy to determine what to
* do next.
*/
extern arena_alloc_node_result_t arena_alloc_node_helper(
arena_policy_t policy,
tree_t* tree,
const char* name,
size_t name_sz,
size_t max_children);
static inline arena_alloc_node_result_t arena_alloc_node(
tree_t* tree,
const char* name,
size_t name_sz,
size_t max_children) {
return arena_alloc_node_helper(
ARENA_POLICY_REALLOC, tree, name, name_sz, max_children);
}
static inline arena_alloc_node_result_t arena_alloc_node_strict(
tree_t* tree,
const char* name,
size_t name_sz,
size_t max_children) {
return arena_alloc_node_helper(
ARENA_POLICY_FAIL, tree, name, name_sz, max_children);
}
/**
* Creates a tree and sets up the shadow root node. This does *not* initialize
* the real root node. It is the responsibility of the caller to do so.
*/
extern tree_t* alloc_tree_with_arena(size_t arena_sz);
#endif /* #ifndef __FASTMANIFEST_TREE_ARENA_H__ */

View File

@ -1,593 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_convert.c: methods to convert flat manifests to and from a tree.
//
// no-check-code
#include <stdlib.h>
#include <sys/types.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "lib/clib/buffer.h"
#include "lib/clib/convert.h"
#include "lib/clib/portability/portability.h"
#include "edenscm/mercurial/compat.h"
#include "path_buffer.h"
#include "tree_arena.h"
#define MAX_FOLDER_DEPTH 1024
#define DEFAULT_CHILDREN_CAPACITY 4096
#define BUFFER_GROWTH_FACTOR 1.2
#define BUFFER_MINIMUM_GROWTH 1048576
#define BUFFER_MAXIMUM_GROWTH (32 * 1024 * 1024)
#define CONVERT_EXPAND_TO_FIT(buffer, buffer_idx, buffer_sz, input_sz) \
expand_to_fit( \
(void**)buffer, \
buffer_idx, \
buffer_sz, \
input_sz, \
sizeof(char), \
BUFFER_GROWTH_FACTOR, \
BUFFER_MINIMUM_GROWTH, \
BUFFER_MAXIMUM_GROWTH)
typedef struct _open_folder_t {
const char* subfolder_name;
/* this is a reference to the flat
* manifest's memory. we do not own
* this memory, and we must copy it
* before the conversion completes. */
size_t subfolder_name_sz;
// readers may wonder why we store a relative pointer. this is because
// storing node_t* pointers is UNSAFE. they are allocated on the arena, and
// can be moved at a moment's notice. the only thing that's safe to do is to
// store an offset from the start of the arena.
ptrdiff_t closed_children_prealloc[DEFAULT_CHILDREN_CAPACITY];
ptrdiff_t* closed_children;
size_t closed_children_count;
size_t closed_children_capacity;
bool in_use;
} open_folder_t;
typedef struct _from_flat_state_t {
tree_t* tree;
open_folder_t folders[MAX_FOLDER_DEPTH];
size_t open_folder_count;
} from_flat_state_t;
typedef struct _to_flat_state_t {
const tree_t* tree;
char* dirpath_build_buffer;
size_t dirpath_build_buffer_idx;
size_t dirpath_build_buffer_sz;
char* output_buffer;
size_t output_buffer_idx;
size_t output_buffer_sz;
} to_flat_state_t;
/**
* Returns <0 if (`name`, `name_sz`) is lexicographically less than the name in
* folder.
*
* Returns =0 if (`name`, `name_sz`) is lexicographically equal to the name in
* folder.
*
* Returns >0 if (`name`, `name_sz`) is lexicographically greater than the name
* in folder.
*/
static inline int folder_name_compare(
const char* name,
size_t name_sz,
const open_folder_t* folder) {
size_t min_sz = (name_sz < folder->subfolder_name_sz)
? name_sz
: folder->subfolder_name_sz;
ssize_t sz_compare = name_sz - folder->subfolder_name_sz;
int cmp = strncmp(name, folder->subfolder_name, min_sz);
if (cmp) {
return cmp;
} else if (sz_compare < 0) {
return -1;
} else if (sz_compare > 0) {
return 1;
} else {
return 0;
}
}
static void init_open_folder(open_folder_t* folder) {
folder->in_use = false;
folder->closed_children = folder->closed_children_prealloc;
folder->closed_children_count = 0;
folder->closed_children_capacity = DEFAULT_CHILDREN_CAPACITY;
}
static from_flat_state_t* init_from_state(size_t flat_sz) {
from_flat_state_t* state = malloc(sizeof(from_flat_state_t));
if (state == NULL) {
return NULL;
}
for (int ix = 0; ix < MAX_FOLDER_DEPTH; ix++) {
init_open_folder(&state->folders[ix]);
}
state->open_folder_count = 0;
state->tree = alloc_tree_with_arena(flat_sz * 2);
return state;
}
/**
* Adds a child to a folder, expanding it as needed.
*/
static bool folder_add_child(
from_flat_state_t* state,
open_folder_t* folder,
node_t* child) {
if (folder->closed_children_count + 1 == folder->closed_children_capacity) {
// time to expand the folder
size_t new_capacity = folder->closed_children_capacity * 2;
// is the current zone the prealloc zone? if so, we need to allocate a new
// zone.
if (folder->closed_children == folder->closed_children_prealloc) {
folder->closed_children = malloc(sizeof(ptrdiff_t) * new_capacity);
if (folder->closed_children == NULL) {
return false;
}
// copy over.
memcpy(
folder->closed_children,
folder->closed_children_prealloc,
sizeof(ptrdiff_t) * folder->closed_children_count);
} else {
// realloc
folder->closed_children =
realloc(folder->closed_children, sizeof(ptrdiff_t) * new_capacity);
if (folder->closed_children == NULL) {
return false;
}
}
folder->closed_children_capacity = new_capacity;
}
// we need to store the delta between the start of the arena and the child.
intptr_t arena_start = (intptr_t)state->tree->arena;
intptr_t child_start = (intptr_t)child;
folder->closed_children[folder->closed_children_count] =
child_start - arena_start;
folder->closed_children_count++;
return true;
}
typedef enum {
CLOSE_FOLDER_OK,
CLOSE_FOLDER_OOM,
} close_folder_code_t;
typedef struct _close_folder_result_t {
close_folder_code_t code;
node_t* node;
} close_folder_result_t;
/**
* Close the folder at index `folder_index`. This may require closing nested
* folders. If folder_index is > 0, then add the closed folder to its parent.
* If the folder_index is 0, it is responsibility of the caller to attach the
* returned node to the shadow root.
*/
static close_folder_result_t close_folder(
from_flat_state_t* state,
size_t folder_index) {
open_folder_t* folder = &state->folders[folder_index];
assert(folder->in_use == true);
if (folder_index < MAX_FOLDER_DEPTH - 1) {
// maybe a nested folder needs to be closed?
if (state->folders[folder_index + 1].in_use) {
// yup, it needs to be closed.
close_folder_result_t close_folder_result =
close_folder(state, folder_index + 1);
if (close_folder_result.code != CLOSE_FOLDER_OK) {
return COMPOUND_LITERAL(close_folder_result_t){close_folder_result.code,
NULL};
}
}
}
// allocate a node and set it up.
arena_alloc_node_result_t arena_alloc_node_result = arena_alloc_node(
state->tree,
folder->subfolder_name,
folder->subfolder_name_sz,
folder->closed_children_count);
if (arena_alloc_node_result.code == ARENA_ALLOC_OOM) {
return COMPOUND_LITERAL(close_folder_result_t){CLOSE_FOLDER_OOM, NULL};
}
node_t* node = arena_alloc_node_result.node;
node->type = TYPE_IMPLICIT;
// we must initialize flags to a known value, even if it's not used
// because it participates in checksum calculation.
node->flags = 0;
if (!VERIFY_CHILD_NUM(folder->closed_children_count)) {
abort();
}
// this is a huge abstraction violation, but it allows us to use
// `set_child_by_index`, which is significantly more efficient.
node->num_children = (child_num_t)folder->closed_children_count;
// node is set up. now add all the children!
intptr_t arena_start = (intptr_t)state->tree->arena;
for (size_t ix = 0; ix < folder->closed_children_count; ix++) {
ptrdiff_t child_offset = (intptr_t)folder->closed_children[ix];
intptr_t address = arena_start + child_offset;
set_child_by_index(node, ix, (node_t*)address);
}
init_open_folder(folder); // zap the folder so it can be reused.
state->open_folder_count--;
// attach to parent folder if it's not the root folder.
assert(folder_index == state->open_folder_count);
if (folder_index > 0) {
open_folder_t* parent_folder = &state->folders[folder_index - 1];
if (folder_add_child(state, parent_folder, node) == false) {
return COMPOUND_LITERAL(close_folder_result_t){CLOSE_FOLDER_OOM, NULL};
}
}
return COMPOUND_LITERAL(close_folder_result_t){CLOSE_FOLDER_OK, node};
}
typedef enum {
PROCESS_PATH_OK,
PROCESS_PATH_OOM,
PROCESS_PATH_CORRUPT,
} process_path_code_t;
typedef struct _process_path_result_t {
process_path_code_t code;
// the following are only set when the code is `PROCESS_PATH_OK`.
node_t* node; // do *NOT* save this pointer.
// immediately do what is needed with
// this pointer and discard. the reason
// is that it's part of the arena, and
// can be moved if the arena is resized.
size_t bytes_consumed; // this is the number of bytes consumed,
// including the null pointer.
} process_path_result_t;
/**
* Process a null-terminated path, closing any directories and building the
* nodes as needed, and opening the new directories to support the current path.
*
* Once the proper set of folders are open, create a node and write it into
* the folder.
*/
static process_path_result_t
process_path(from_flat_state_t* state, const char* path, size_t max_len) {
size_t path_scan_index;
size_t current_path_start;
size_t open_folder_index;
// match as many path components as we can
for (path_scan_index = 0, current_path_start = 0, open_folder_index = 0;
path[path_scan_index] != 0;
path_scan_index++) {
if (path_scan_index == max_len) {
return COMPOUND_LITERAL(process_path_result_t){
PROCESS_PATH_CORRUPT, NULL, 0};
}
// check for a path separator.
if (path[path_scan_index] != '/') {
continue;
}
size_t path_len =
path_scan_index + 1 /* to include the / */ - current_path_start;
bool open_new_folder = true;
// check if the *next* open folder is valid, and if it matches the path
// component we just found.
if (open_folder_index + 1 < state->open_folder_count) {
if (folder_name_compare(
&path[current_path_start],
path_len,
&state->folders[open_folder_index + 1]) == 0) {
// we found the folder we needed, so we can just reuse it.
open_new_folder = false;
open_folder_index++;
} else {
close_folder_result_t close_folder_result =
close_folder(state, open_folder_index + 1);
if (close_folder_result.code == CLOSE_FOLDER_OOM) {
return COMPOUND_LITERAL(process_path_result_t){
PROCESS_PATH_OOM, NULL, 0};
}
}
}
if (open_new_folder == true) {
// if we're opening a new folder, that means there should be no child
// folders open.
assert(state->open_folder_count == open_folder_index + 1);
open_folder_index++;
state->open_folder_count++;
open_folder_t* folder = &state->folders[open_folder_index];
assert(folder->in_use == false);
assert(folder->closed_children == folder->closed_children_prealloc);
assert(folder->closed_children_count == 0);
// link the name in. remember, we don't own the memory!!
folder->in_use = true;
folder->subfolder_name = &path[current_path_start];
folder->subfolder_name_sz = path_len;
}
// path starts after the /
current_path_start = path_scan_index + 1;
}
// close path components that are not matched, building their nodes.
if (open_folder_index + 1 < state->open_folder_count) {
close_folder_result_t close_folder_result =
close_folder(state, open_folder_index + 1);
if (close_folder_result.code == CLOSE_FOLDER_OOM) {
return COMPOUND_LITERAL(process_path_result_t){PROCESS_PATH_OOM, NULL, 0};
}
}
// build a node for the remaining path (which should just be the
// filename). add it to the currently open folder.
arena_alloc_node_result_t arena_alloc_node_result = arena_alloc_node(
state->tree,
&path[current_path_start],
path_scan_index - current_path_start,
0);
if (arena_alloc_node_result.code == ARENA_ALLOC_OOM) {
return COMPOUND_LITERAL(process_path_result_t){PROCESS_PATH_OOM, NULL, 0};
}
arena_alloc_node_result.node->type = TYPE_LEAF;
// jam the new node into the currently open folder.
open_folder_t* folder = &state->folders[open_folder_index];
folder_add_child(state, folder, arena_alloc_node_result.node);
return COMPOUND_LITERAL(process_path_result_t){
PROCESS_PATH_OK, arena_alloc_node_result.node, path_scan_index + 1};
}
static convert_from_flat_result_t convert_from_flat_helper(
from_flat_state_t* state,
char* manifest,
size_t manifest_sz) {
// open the root directory node.
open_folder_t* folder = &state->folders[0];
folder->subfolder_name = "/";
folder->subfolder_name_sz = 1;
folder->in_use = true;
state->open_folder_count++;
for (size_t ptr = 0; ptr < manifest_sz;) {
// filename is up to the first null.
process_path_result_t pp_result =
process_path(state, &manifest[ptr], manifest_sz - ptr);
switch (pp_result.code) {
case PROCESS_PATH_OOM:
return COMPOUND_LITERAL(convert_from_flat_result_t){
CONVERT_FROM_FLAT_OOM, NULL};
case PROCESS_PATH_CORRUPT:
return COMPOUND_LITERAL(convert_from_flat_result_t){
CONVERT_FROM_FLAT_WTF, NULL};
case PROCESS_PATH_OK:
break;
}
assert(pp_result.code == PROCESS_PATH_OK);
node_t* node = pp_result.node;
ptr += pp_result.bytes_consumed;
size_t remaining = manifest_sz - ptr;
if (remaining <= SHA1_BYTES * 2) {
// not enough characters for the checksum and the NL. well, that's a
// fail.
return COMPOUND_LITERAL(convert_from_flat_result_t){CONVERT_FROM_FLAT_WTF,
NULL};
}
if (unhexlify(&manifest[ptr], SHA1_BYTES * 2, node->checksum) == false) {
return COMPOUND_LITERAL(convert_from_flat_result_t){CONVERT_FROM_FLAT_WTF,
NULL};
}
node->checksum_sz = SHA1_BYTES;
node->checksum_valid = true;
ptr += SHA1_BYTES * 2;
// is the next character a NL? if so, then we're done. otherwise, retrieve
// it as the flags field.
if (manifest[ptr] != '\n') {
node->flags = manifest[ptr];
ptr++;
} else {
node->flags = 0;
}
ptr++;
state->tree->num_leaf_nodes++;
}
// close the root folder.
close_folder_result_t close_result = close_folder(state, 0);
if (close_result.code == CLOSE_FOLDER_OOM) {
return COMPOUND_LITERAL(convert_from_flat_result_t){CONVERT_FROM_FLAT_OOM,
NULL};
}
close_result.node->type = TYPE_ROOT;
add_child(state->tree->shadow_root, close_result.node);
return COMPOUND_LITERAL(convert_from_flat_result_t){CONVERT_FROM_FLAT_OK,
state->tree};
}
static convert_to_flat_code_t convert_to_flat_iterator(
to_flat_state_t* state,
const node_t* node) {
assert(node->type == TYPE_IMPLICIT || node->type == TYPE_ROOT);
for (uint32_t ix = 0; ix < node->num_children; ix++) {
node_t* child = get_child_by_index(node, ix);
if (child->type == TYPE_LEAF) {
size_t space_needed = state->dirpath_build_buffer_idx + child->name_sz +
1 /* null character */ + (SHA1_BYTES * 2) +
(child->flags != '\000' ? 1 : 0) + 1 /* NL */;
if (CONVERT_EXPAND_TO_FIT(
&state->output_buffer,
state->output_buffer_idx,
&state->output_buffer_sz,
space_needed) == false) {
return CONVERT_TO_FLAT_OOM;
}
// copy the dirpath over to the output buffer.
memcpy(
&state->output_buffer[state->output_buffer_idx],
state->dirpath_build_buffer,
state->dirpath_build_buffer_idx);
state->output_buffer_idx += state->dirpath_build_buffer_idx;
// copy the filename over to the output buffer.
memcpy(
&state->output_buffer[state->output_buffer_idx],
child->name,
child->name_sz);
state->output_buffer_idx += child->name_sz;
// copy the filename over to the output buffer.
state->output_buffer[state->output_buffer_idx] = '\000';
state->output_buffer_idx++;
// transcribe the sha over.
hexlify(
child->checksum,
SHA1_BYTES,
&state->output_buffer[state->output_buffer_idx]);
state->output_buffer_idx += (SHA1_BYTES * 2);
if (child->flags != '\000') {
state->output_buffer[state->output_buffer_idx] = child->flags;
state->output_buffer_idx++;
}
state->output_buffer[state->output_buffer_idx] = '\n';
state->output_buffer_idx++;
assert(state->output_buffer_idx < state->output_buffer_sz);
} else {
// save the old value...
size_t previous_dirpath_build_buffer_idx =
state->dirpath_build_buffer_idx;
if (PATH_APPEND(
&state->dirpath_build_buffer,
&state->dirpath_build_buffer_idx,
&state->dirpath_build_buffer_sz,
child->name,
child->name_sz) == false) {
return CONVERT_TO_FLAT_OOM;
}
convert_to_flat_iterator(state, child);
state->dirpath_build_buffer_idx = previous_dirpath_build_buffer_idx;
}
}
return CONVERT_TO_FLAT_OK;
}
static convert_to_flat_code_t convert_to_flat_helper(
to_flat_state_t* state,
const tree_t* tree) {
// get the real root.
node_t* shadow_root = tree->shadow_root;
if (shadow_root->num_children != 1) {
return CONVERT_TO_FLAT_WTF;
}
node_t* real_root = get_child_by_index(shadow_root, 0);
return convert_to_flat_iterator(state, real_root);
}
convert_from_flat_result_t convert_from_flat(
char* manifest,
size_t manifest_sz) {
from_flat_state_t* state = init_from_state(manifest_sz);
if (state->tree == NULL) {
free(state);
state = NULL;
}
if (state == NULL) {
return COMPOUND_LITERAL(convert_from_flat_result_t){CONVERT_FROM_FLAT_OOM,
NULL};
}
convert_from_flat_result_t result =
convert_from_flat_helper(state, manifest, manifest_sz);
if (result.code != CONVERT_FROM_FLAT_OK) {
free(state->tree);
}
free(state);
return result;
}
convert_to_flat_result_t convert_to_flat(tree_t* tree) {
to_flat_state_t state;
state.dirpath_build_buffer = malloc(DEFAULT_PATH_BUFFER_SZ);
state.dirpath_build_buffer_idx = 0;
state.dirpath_build_buffer_sz = DEFAULT_PATH_BUFFER_SZ;
// guestimate as to how much space we need. this could probably be
// fine-tuned a bit.
state.output_buffer = malloc(tree->consumed_memory);
state.output_buffer_idx = 0;
state.output_buffer_sz = tree->consumed_memory;
convert_to_flat_code_t result = CONVERT_TO_FLAT_OOM;
if (state.dirpath_build_buffer != NULL && state.output_buffer != NULL) {
result = convert_to_flat_helper(&state, tree);
}
free(state.dirpath_build_buffer);
if (result != CONVERT_TO_FLAT_OK) {
// free the buffer if any error occurred.
free(state.output_buffer);
return COMPOUND_LITERAL(convert_to_flat_result_t){result, NULL, 0};
} else {
return COMPOUND_LITERAL(convert_to_flat_result_t){
CONVERT_TO_FLAT_OK, state.output_buffer, state.output_buffer_idx};
}
}

View File

@ -1,102 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_convert_rt.c: simple benchmark for converting a flat manifest to a tree
// and back. the output can be diff'ed against the input as
// for more sophisticated testing than the unit tests
// provide.
//
// no-check-code
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include "checksum.h"
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
int main(int argc, char* argv[]) {
if (argc < 3) {
fprintf(stderr, "Usage: %s <manifestfile> <outputfile>\n", argv[0]);
exit(1);
}
FILE* fh = fopen(argv[1], "r");
if (fh == NULL) {
fprintf(stderr, "Error: cannot open %s\n", argv[1]);
exit(1);
}
FILE* ofh = fopen(argv[2], "w");
if (ofh == NULL) {
fprintf(stderr, "Error: cannot open %s\n", argv[2]);
exit(1);
}
fseeko(fh, 0, SEEK_END);
off_t length = ftello(fh);
rewind(fh);
char* flatmanifest = malloc(length);
if (flatmanifest == NULL) {
fprintf(stderr, "Error: cannot allocate memory for reading %s\n", argv[1]);
exit(1);
}
if (fread(flatmanifest, length, 1, fh) != 1) {
fprintf(stderr, "Error: cannot read %s\n", argv[1]);
exit(1);
}
struct timeval before_from, after_from;
gettimeofday(&before_from, NULL);
convert_from_flat_result_t from_flat =
convert_from_flat(flatmanifest, length);
gettimeofday(&after_from, NULL);
if (from_flat.code != CONVERT_FROM_FLAT_OK) {
fprintf(stderr, "Error: converting from flat manifest\n");
exit(1);
}
struct timeval before_checksum, after_checksum;
gettimeofday(&before_checksum, NULL);
update_checksums(from_flat.tree);
gettimeofday(&after_checksum, NULL);
struct timeval before_to, after_to;
gettimeofday(&before_to, NULL);
convert_to_flat_result_t to_flat = convert_to_flat(from_flat.tree);
gettimeofday(&after_to, NULL);
if (to_flat.code != CONVERT_TO_FLAT_OK) {
fprintf(stderr, "Error: converting to flat manifest\n");
exit(1);
}
if (fwrite(to_flat.flat_manifest, to_flat.flat_manifest_sz, 1, ofh) != 1) {
fprintf(stderr, "Error: writing flat manifest\n");
exit(1);
}
fclose(ofh);
uint64_t usecs_before_from =
before_from.tv_sec * 1000000 + before_from.tv_usec;
uint64_t usecs_after_from = after_from.tv_sec * 1000000 + after_from.tv_usec;
uint64_t usecs_before_checksum =
before_checksum.tv_sec * 1000000 + before_checksum.tv_usec;
uint64_t usecs_after_checksum =
after_checksum.tv_sec * 1000000 + after_checksum.tv_usec;
uint64_t usecs_before_to = before_to.tv_sec * 1000000 + before_to.tv_usec;
uint64_t usecs_after_to = after_to.tv_sec * 1000000 + after_to.tv_usec;
printf(
"flat -> tree: %" PRIu64 " us\n", (usecs_after_from - usecs_before_from));
printf(
"checksum: %" PRIu64 " us\n",
(usecs_after_checksum - usecs_before_checksum));
printf("tree -> flat: %" PRIu64 " us\n", (usecs_after_to - usecs_before_to));
printf(
"tree consumed memory: %" PRIuPTR "\n", from_flat.tree->consumed_memory);
}

View File

@ -1,160 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_convert_test.c: tests for methods to convert flat manifests to and
// from a tree.
//
// no-check-code
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "node.h"
#include "tests.h"
#define SIMPLE_CONVERT_INPUT \
"abc\000b80de5d138758541c5f05265ad144ab9fa86d1db\n" \
"def\000f6d864039d10a8934d0d581d342780298aa9fb28l\n" \
"ghi\0000f421b102b0baa760a5d4c5759f339cfc1f7d01b\n"
void test_simple_convert() {
char input[] = SIMPLE_CONVERT_INPUT;
size_t size = sizeof(input) - 1; // exempt the final null
convert_from_flat_result_t convert_result = convert_from_flat(input, size);
ASSERT(convert_result.code == CONVERT_FROM_FLAT_OK);
tree_t* tree = convert_result.tree;
ASSERT(tree->compacted == true);
ASSERT(tree->num_leaf_nodes == 3);
get_path_result_t get_result;
get_result = get_path(tree, STRPLUSLEN("abc"));
ASSERT(get_result.code == GET_PATH_OK);
ASSERT(get_result.checksum_sz == SHA1_BYTES);
ASSERT(
memcmp(
get_result.checksum,
"\xb8\x0d\xe5\xd1\x38\x75\x85\x41\xc5\xf0\x52\x65\xad\x14\x4a\xb9\xfa"
"\x86\xd1"
"\xdb",
SHA1_BYTES) == 0);
ASSERT(get_result.flags == 0);
get_result = get_path(tree, STRPLUSLEN("def"));
ASSERT(get_result.code == GET_PATH_OK);
ASSERT(get_result.checksum_sz == SHA1_BYTES);
ASSERT(
memcmp(
get_result.checksum,
"\xf6\xd8\x64\x03\x9d\x10\xa8\x93\x4d\x0d\x58\x1d\x34\x27\x80\x29\x8a"
"\xa9\xfb\x28",
SHA1_BYTES) == 0);
ASSERT(get_result.flags == 'l');
get_result = get_path(tree, STRPLUSLEN("ghi"));
ASSERT(get_result.code == GET_PATH_OK);
ASSERT(get_result.checksum_sz == SHA1_BYTES);
ASSERT(
memcmp(
get_result.checksum,
"\x0f\x42\x1b\x10\x2b\x0b\xaa\x76\x0a\x5d\x4c\x57\x59\xf3\x39\xcf\xc1"
"\xf7\xd0\x1b",
SHA1_BYTES) == 0);
ASSERT(get_result.flags == 0);
destroy_tree(convert_result.tree);
}
#define CONVERT_TREE_INPUT \
"abc\0007a091c781cf86fc5b7c2e93eb9f233c4220026a2\n" \
"abcd/efg\000f33dcd6a4ef633eb1fa02ec72cb76c4043390a50\n" \
"abcd/efgh/ijk\000b6fb5f7b2f3b499ad04b6e97f78904d5314ec690\n" \
"abcd/efghi\00042aece97c3e7db21fbc7559918aba6b6e925a64d\n" \
"abcdefghi\000c4c71e7b43d108fb869c28107c39d21c166be837\n"
#define GET_TEST(tree, path_const, expected_result) \
{ \
get_path_result_t get_result = get_path(tree, STRPLUSLEN(path_const)); \
ASSERT(get_result.code == expected_result); \
}
void test_convert_tree() {
char input[] = CONVERT_TREE_INPUT;
size_t size = sizeof(input) - 1; // exempt the final null
convert_from_flat_result_t convert_result = convert_from_flat(input, size);
ASSERT(convert_result.code == CONVERT_FROM_FLAT_OK);
tree_t* tree = convert_result.tree;
ASSERT(tree->compacted == true);
ASSERT(tree->num_leaf_nodes == 5);
GET_TEST(tree, "abc", GET_PATH_OK);
GET_TEST(tree, "abcd/efg", GET_PATH_OK);
GET_TEST(tree, "abcd/efgh/ijk", GET_PATH_OK);
GET_TEST(tree, "abcd/efghi", GET_PATH_OK);
GET_TEST(tree, "abcdefghi", GET_PATH_OK);
GET_TEST(tree, "abcdefghij", GET_PATH_NOT_FOUND);
destroy_tree(convert_result.tree);
}
#define CONVERT_BIDIRECTIONALLY_INPUT \
"abc\0007a091c781cf86fc5b7c2e93eb9f233c4220026a2\n" \
"abcd/efg\000f33dcd6a4ef633eb1fa02ec72cb76c4043390a50\n" \
"abcd/efgh/ijk/lm\000b6fb5f7b2f3b499ad04b6e97f78904d5314ec690\n" \
"abcd/efghi\00042aece97c3e7db21fbc7559918aba6b6e925a64d\n" \
"abcdefghi\000c4c71e7b43d108fb869c28107c39d21c166be837\n"
void test_convert_bidirectionally() {
char input[] = CONVERT_BIDIRECTIONALLY_INPUT;
size_t size = sizeof(input) - 1; // exempt the final null
convert_from_flat_result_t from_result = convert_from_flat(input, size);
ASSERT(from_result.code == CONVERT_FROM_FLAT_OK);
tree_t* tree = from_result.tree;
convert_to_flat_result_t to_result = convert_to_flat(tree);
ASSERT(to_result.flat_manifest_sz == size);
ASSERT(
memcmp(input, to_result.flat_manifest, to_result.flat_manifest_sz) == 0);
}
// this was exposed in #11145050
void test_remove_after_convert_from_flat() {
convert_from_flat_result_t convert_result = convert_from_flat("", 0);
ASSERT(convert_result.code == CONVERT_FROM_FLAT_OK);
tree_t* tree = convert_result.tree;
add_to_tree_t toadd[] = {
{STRPLUSLEN("abc"), 12345, 5},
};
add_to_tree(tree, toadd, sizeof(toadd) / sizeof(add_to_tree_t));
remove_path(tree, STRPLUSLEN("abc"));
convert_to_flat_result_t to_result = convert_to_flat(tree);
ASSERT(to_result.code == CONVERT_TO_FLAT_OK);
ASSERT(to_result.flat_manifest_sz == 0);
}
void test_empty_convert_to_flat() {
tree_t* empty_tree = alloc_tree();
convert_to_flat_result_t to_result = convert_to_flat(empty_tree);
ASSERT(to_result.flat_manifest_sz == 0);
}
int main(int argc, char* argv[]) {
test_simple_convert();
test_convert_tree();
test_convert_bidirectionally();
test_remove_after_convert_from_flat();
test_empty_convert_to_flat();
return 0;
}

View File

@ -1,318 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_copy.c: methods to make a copy of a tree. the new instance is compacted
// into an arena.
//
// no-check-code
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "internal_result.h"
#include "node.h"
#include "path_buffer.h"
#include "tree_arena.h"
typedef enum {
COPY_OK,
COPY_OOM,
COPY_WTF,
} copy_helper_result_t;
/**
* Clones `src` and adds it as the Nth child of `dst_parent`, where N ==
* `child_num`.
*
* `child_num` must be <= `dst_parent->num_children`.
*/
copy_helper_result_t copy_helper(
tree_t* dst_tree,
const node_t* src,
node_t* dst_parent,
size_t child_num) {
arena_alloc_node_result_t alloc_result = arena_alloc_node_strict(
dst_tree, src->name, src->name_sz, src->num_children);
switch (alloc_result.code) {
case ARENA_ALLOC_OK:
break;
case ARENA_ALLOC_OOM:
return COPY_OOM;
case ARENA_ALLOC_EXCEEDED_LIMITS:
return COPY_WTF;
}
// copy the attributes
node_t* dst = alloc_result.node;
if (src->checksum_valid) {
memcpy(dst->checksum, src->checksum, src->checksum_sz);
dst->checksum_sz = src->checksum_sz;
}
dst->checksum_valid = src->checksum_valid;
dst->flags = src->flags;
dst->type = src->type;
// typically we don't like touching this field manually, but to
// `set_child_by_index` requires the index be < num_children.
dst->num_children = src->num_children;
if (dst->type == TYPE_LEAF) {
dst_tree->num_leaf_nodes++;
} else {
for (int ix = 0; ix < src->num_children; ix++) {
copy_helper_result_t copy_result =
copy_helper(dst_tree, get_child_by_index(src, ix), dst, ix);
if (copy_result != COPY_OK) {
return copy_result;
}
}
}
set_child_by_index(dst_parent, child_num, dst);
return COPY_OK;
}
tree_t* copy_tree(const tree_t* src) {
tree_t* dst = alloc_tree_with_arena(src->consumed_memory);
// prerequisite for using copy_helper is that child_num must be <
// dst_parent->num_children, so we artificially bump up the num_chlidren
// for the shadow root.
assert(max_children(dst->shadow_root) > 0);
dst->shadow_root->num_children = 1;
copy_helper_result_t copy_result = copy_helper(
dst, get_child_by_index(src->shadow_root, 0), dst->shadow_root, 0);
switch (copy_result) {
case COPY_OK:
dst->compacted = true;
return dst;
default:
destroy_tree(dst);
return NULL;
}
}
typedef enum {
// returned if the child was copied, but not all of its descendants are not
// copied.
FILTER_COPY_OK,
// returned if the child was copied, and all of its descendants are also
// copied.
FILTER_COPY_OK_RECURSIVELY,
// returned if the child was not copied.
FILTER_COPY_NOT_COPIED,
FILTER_COPY_OOM,
FILTER_COPY_WTF,
} filter_copy_helper_result_t;
typedef struct {
bool (*filter)(char* path, size_t path_sz, void* callback_context);
// use this buffer to construct the paths.
char* path;
size_t path_idx;
size_t path_sz;
void* callback_context;
} filter_copy_context_t;
/**
* Clones `src` and adds it as the Nth child of `dst_parent`, where N ==
* `child_num`, but only iff the clone has children.
*
* `child_num` must be <= `dst_parent->num_children`.
*/
filter_copy_helper_result_t filter_copy_helper(
tree_t* dst_tree,
filter_copy_context_t* context,
const node_t* src,
node_t* dst_parent,
size_t child_num) {
filter_copy_helper_result_t result;
// save the old path size so we can restore when we exit.
size_t prev_path_idx = context->path_idx;
// construct the path.
if (src->type != TYPE_ROOT) {
if (PATH_APPEND(
&context->path,
&context->path_idx,
&context->path_sz,
(char*)src->name,
src->name_sz) == false) {
return FILTER_COPY_OOM;
}
}
if (src->type == TYPE_LEAF) {
// call the filter and determine whether this node should be added.
if (context->filter(
context->path, context->path_idx, context->callback_context)) {
dst_tree->num_leaf_nodes++;
arena_alloc_node_result_t alloc_result = arena_alloc_node_strict(
dst_tree, src->name, src->name_sz, src->num_children);
switch (alloc_result.code) {
case ARENA_ALLOC_OK:
break;
case ARENA_ALLOC_OOM:
return FILTER_COPY_OOM;
case ARENA_ALLOC_EXCEEDED_LIMITS:
return FILTER_COPY_WTF;
}
// copy the attributes
node_t* dst = alloc_result.node;
if (src->checksum_valid) {
memcpy(dst->checksum, src->checksum, src->checksum_sz);
dst->checksum_sz = src->checksum_sz;
}
dst->checksum_valid = src->checksum_valid;
dst->flags = src->flags;
dst->type = src->type;
set_child_by_index(dst_parent, child_num, dst);
result = FILTER_COPY_OK_RECURSIVELY;
} else {
result = FILTER_COPY_NOT_COPIED;
}
// restore the path and exit.
context->path_idx = prev_path_idx;
return result;
}
// allocate a temporary node to hold the entries.
node_t* temp_node = alloc_node(NULL, 0, src->num_children);
if (temp_node == NULL) {
return FILTER_COPY_OOM;
}
// set enough fields such that we can write children.
temp_node->type = src->type;
// typically we don't like touching this field manually, but to
// `set_child_by_index` requires the index be < num_children.
temp_node->num_children = src->num_children;
child_num_t dst_child_index = 0;
// assume everything gets copied.
bool recursive = true;
for (child_num_t ix = 0; ix < src->num_children; ix++) {
filter_copy_helper_result_t filter_copy_result = filter_copy_helper(
dst_tree,
context,
get_child_by_index(src, ix),
temp_node,
dst_child_index);
switch (filter_copy_result) {
case FILTER_COPY_OK:
recursive = false;
case FILTER_COPY_OK_RECURSIVELY:
dst_child_index++;
break;
case FILTER_COPY_NOT_COPIED:
recursive = false;
break;
case FILTER_COPY_OOM:
case FILTER_COPY_WTF:
return filter_copy_result;
}
}
// how many children were written?
if (dst_child_index == 0 && src->type != TYPE_ROOT) {
// none, that means we shouldn't write to our parent either.
result = FILTER_COPY_NOT_COPIED;
} else {
// hey, we wrote something. allocate within the arena and copy the
// entries over.
arena_alloc_node_result_t alloc_result = arena_alloc_node_strict(
dst_tree, src->name, src->name_sz, src->num_children);
switch (alloc_result.code) {
case ARENA_ALLOC_OK:
break;
case ARENA_ALLOC_OOM:
return FILTER_COPY_OOM;
case ARENA_ALLOC_EXCEEDED_LIMITS:
return FILTER_COPY_WTF;
}
// copy the attributes
node_t* dst = alloc_result.node;
if (src->checksum_valid && recursive) {
memcpy(dst->checksum, src->checksum, src->checksum_sz);
dst->checksum_sz = src->checksum_sz;
dst->checksum_valid = true;
} else {
dst->checksum_valid = false;
}
dst->flags = src->flags;
dst->type = src->type;
// typically we don't like touching this field manually, but to
// `set_child_by_index` requires the index be < num_children.
dst->num_children = dst_child_index;
for (child_num_t ix = 0; ix < dst_child_index; ix++) {
const node_t* child = get_child_by_index(temp_node, ix);
set_child_by_index(dst, ix, child);
}
set_child_by_index(dst_parent, child_num, dst);
result = recursive ? FILTER_COPY_OK_RECURSIVELY : FILTER_COPY_OK;
}
free(temp_node);
context->path_idx = prev_path_idx;
return result;
}
tree_t* filter_copy(
const tree_t* src,
bool (*filter)(char* path, size_t path_sz, void* callback_context),
void* context) {
tree_t* dst = alloc_tree_with_arena(src->consumed_memory);
filter_copy_context_t filter_copy_context;
filter_copy_context.path = malloc(DEFAULT_PATH_BUFFER_SZ);
filter_copy_context.path_idx = 0;
filter_copy_context.path_sz = DEFAULT_PATH_BUFFER_SZ;
filter_copy_context.filter = filter;
filter_copy_context.callback_context = context;
// prerequisite for using filter_copy_helper is that child_num must be <
// dst_parent->num_children, so we artificially bump up the num_chlidren
// for the shadow root.
assert(max_children(dst->shadow_root) > 0);
dst->shadow_root->num_children = 1;
filter_copy_helper_result_t filter_copy_result = filter_copy_helper(
dst,
&filter_copy_context,
get_child_by_index(src->shadow_root, 0),
dst->shadow_root,
0);
switch (filter_copy_result) {
case FILTER_COPY_OK:
case FILTER_COPY_OK_RECURSIVELY:
dst->compacted = true;
return dst;
default:
destroy_tree(dst);
return NULL;
}
}

View File

@ -1,268 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_copy_test.c: tests to verify methods to make a copy of a tree.
//
// no-check-code
#include "checksum.h"
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "node.h"
#include "tests.h"
void test_copy_empty() {
tree_t* src = alloc_tree();
tree_t* dst = copy_tree(src);
ASSERT(dst != NULL);
ASSERT(dst->compacted == true);
ASSERT(dst->num_leaf_nodes == 0);
ASSERT(dst->consumed_memory == src->consumed_memory);
destroy_tree(src);
destroy_tree(dst);
}
void test_copy_empty_chain() {
tree_t* original = alloc_tree();
tree_t* src = original;
for (int ix = 0; ix < 10; ix++) {
tree_t* dst = copy_tree(src);
ASSERT(dst != NULL);
ASSERT(dst->compacted == true);
ASSERT(dst->num_leaf_nodes == 0);
ASSERT(dst->consumed_memory == src->consumed_memory);
tree_t* old_src = src;
src = dst;
destroy_tree(old_src);
}
}
typedef struct {
char* path;
size_t path_sz;
uint8_t* checksum;
uint8_t flags;
} copy_tree_data_t;
#define COPY_TREE_DATA(path, checksum, flags) \
(copy_tree_data_t) { \
path, sizeof(path) - 1, (uint8_t*)checksum, flags, \
}
void test_copy_normal_tree() {
copy_tree_data_t input[] = {
COPY_TREE_DATA(
"abc",
"\xe7\xf5\xdd\xad\x5e\x13\x86\x4e\x25\x30\x41\x3a\x69\x8e\x19\xd4\x25"
"\xc8\x12\x02",
0x23),
COPY_TREE_DATA(
"ab/cde",
"\x7c\x6a\x4b\x0a\x05\x91\x6c\x89\x9d\x8a\xe6\x38\xcf\x38\x93\x2e"
"\x4f\x09\xed\x57",
0x9b),
COPY_TREE_DATA(
"abcd/ef",
"\x3e\x4d\xf1\xe0\x46\x4a\x3e\xb9\x6b\x8d\x55\x6c\x3b\x6b\x00\xee"
"\x4f\x77\x71\x9e",
0xda),
COPY_TREE_DATA(
"abcd/efg/hi",
"\x98\x2f\x46\x90\xfe\xc1\xbc\xe0\x8b\xf7\xa5\x47\x65\xe3\xf4\x16"
"\x5b\xf4\xba\x7c",
0x44),
};
size_t input_sz = sizeof(input) / sizeof(copy_tree_data_t);
tree_t* src = alloc_tree();
for (size_t ix = 0; ix < input_sz; ix++) {
add_update_path_result_t result = add_or_update_path(
src,
input[ix].path,
input[ix].path_sz,
input[ix].checksum,
SHA1_BYTES,
input[ix].flags);
ASSERT(result == ADD_UPDATE_PATH_OK);
}
ASSERT(src->compacted == false);
ASSERT(src->num_leaf_nodes == input_sz);
tree_t* dst = copy_tree(src);
for (size_t ix = 0; ix < input_sz; ix++) {
get_path_result_t get_result =
get_path(dst, input[ix].path, input[ix].path_sz);
ASSERT(get_result.code == GET_PATH_OK);
ASSERT(get_result.checksum_sz == SHA1_BYTES);
ASSERT(memcmp(get_result.checksum, input[ix].checksum, SHA1_BYTES) == 0);
ASSERT(get_result.flags == input[ix].flags);
}
}
static bool
filter_prune_all(char* path, size_t path_sz, void* callback_context) {
return false;
}
void test_filter_copy_prune_all() {
copy_tree_data_t input[] = {
COPY_TREE_DATA(
"abc",
"\xe7\xf5\xdd\xad\x5e\x13\x86\x4e\x25\x30\x41\x3a\x69\x8e\x19\xd4\x25"
"\xc8\x12\x02",
0x23),
COPY_TREE_DATA(
"ab/cde",
"\x7c\x6a\x4b\x0a\x05\x91\x6c\x89\x9d\x8a\xe6\x38\xcf\x38\x93\x2e"
"\x4f\x09\xed\x57",
0x9b),
COPY_TREE_DATA(
"abcd/ef",
"\x3e\x4d\xf1\xe0\x46\x4a\x3e\xb9\x6b\x8d\x55\x6c\x3b\x6b\x00\xee"
"\x4f\x77\x71\x9e",
0xda),
COPY_TREE_DATA(
"abcd/efg/hi",
"\x98\x2f\x46\x90\xfe\xc1\xbc\xe0\x8b\xf7\xa5\x47\x65\xe3\xf4\x16"
"\x5b\xf4\xba\x7c",
0x44),
};
size_t input_sz = sizeof(input) / sizeof(copy_tree_data_t);
tree_t* src = alloc_tree();
for (size_t ix = 0; ix < input_sz; ix++) {
add_update_path_result_t result = add_or_update_path(
src,
input[ix].path,
input[ix].path_sz,
input[ix].checksum,
SHA1_BYTES,
input[ix].flags);
ASSERT(result == ADD_UPDATE_PATH_OK);
}
ASSERT(src->compacted == false);
ASSERT(src->num_leaf_nodes == input_sz);
tree_t* dst = filter_copy(src, filter_prune_all, NULL);
ASSERT(dst != NULL);
ASSERT(dst->compacted == true);
ASSERT(dst->num_leaf_nodes == 0);
for (size_t ix = 0; ix < input_sz; ix++) {
get_path_result_t get_result =
get_path(dst, input[ix].path, input[ix].path_sz);
ASSERT(get_result.code == GET_PATH_NOT_FOUND);
}
}
typedef struct {
char* path;
bool present;
bool expected_checksum_valid;
} path_verify_t;
static bool
filter_prune_some(char* path, size_t path_sz, void* callback_context) {
char prefix[] = "abcd/ef";
if (path_sz == sizeof(prefix) - 1 &&
strncmp(path, prefix, sizeof(prefix) - 1) == 0) {
return false;
}
return true;
}
void test_filter_copy_prune_some() {
copy_tree_data_t input[] = {
COPY_TREE_DATA(
"abc",
"\xe7\xf5\xdd\xad\x5e\x13\x86\x4e\x25\x30\x41\x3a\x69\x8e\x19\xd4\x25"
"\xc8\x12\x02",
0x23),
COPY_TREE_DATA(
"ab/cde",
"\x7c\x6a\x4b\x0a\x05\x91\x6c\x89\x9d\x8a\xe6\x38\xcf\x38\x93\x2e"
"\x4f\x09\xed\x57",
0x9b),
COPY_TREE_DATA(
"abcd/ef",
"\x3e\x4d\xf1\xe0\x46\x4a\x3e\xb9\x6b\x8d\x55\x6c\x3b\x6b\x00\xee"
"\x4f\x77\x71\x9e",
0xda),
COPY_TREE_DATA(
"abcd/efg/hi",
"\x98\x2f\x46\x90\xfe\xc1\xbc\xe0\x8b\xf7\xa5\x47\x65\xe3\xf4\x16"
"\x5b\xf4\xba\x7c",
0x44),
};
size_t input_sz = sizeof(input) / sizeof(copy_tree_data_t);
tree_t* src = alloc_tree();
for (size_t ix = 0; ix < input_sz; ix++) {
add_update_path_result_t result = add_or_update_path(
src,
input[ix].path,
input[ix].path_sz,
input[ix].checksum,
SHA1_BYTES,
input[ix].flags);
ASSERT(result == ADD_UPDATE_PATH_OK);
}
ASSERT(src->compacted == false);
ASSERT(src->num_leaf_nodes == input_sz);
update_checksums(src);
tree_t* dst = filter_copy(src, filter_prune_some, NULL);
ASSERT(dst != NULL);
ASSERT(dst->compacted == true);
ASSERT(dst->num_leaf_nodes == 3);
path_verify_t dirs_to_check_after_filter[] = {
{"abc", true, true},
{"ab/", true, true},
{"ab/cde", true, true},
{"abcd/", true, false},
{"abcd/ef", false, false},
{"abcd/efg/", true, true},
{"abcd/efg/hi", true, true},
};
for (size_t ix = 0; ix < (sizeof(dirs_to_check_after_filter) /
sizeof(*dirs_to_check_after_filter));
ix++) {
path_verify_t* verify_entry = &dirs_to_check_after_filter[ix];
get_path_unfiltered_result_t get_result =
get_path_unfiltered(dst, STRPLUSLEN(verify_entry->path));
ASSERT(
get_result.code == verify_entry->present ? GET_PATH_OK
: GET_PATH_NOT_FOUND);
if (verify_entry->present) {
ASSERT(
get_result.node->checksum_valid ==
verify_entry->expected_checksum_valid);
}
}
}
int main(int argc, char* argv[]) {
test_copy_empty();
test_copy_empty_chain();
test_copy_normal_tree();
test_filter_copy_prune_all();
test_filter_copy_prune_some();
}

View File

@ -1,321 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_diff.c: methods to diff two trees.
//
// no-check-code
#include <stdbool.h>
#include <stdlib.h>
#include "checksum.h"
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "lib/clib/buffer.h"
#include "node.h"
#define DEFAULT_BUILD_BUFFER_SZ 16384
#define BUFFER_GROWTH_FACTOR 2.0
#define BUFFER_MINIMUM_GROWTH 16384
#define BUFFER_MAXIMUM_GROWTH 65536
#define DIFF_EXPAND_TO_FIT(buffer, buffer_idx, buffer_sz, input_sz) \
expand_to_fit( \
(void**)buffer, \
buffer_idx, \
buffer_sz, \
input_sz, \
BUFFER_GROWTH_FACTOR, \
sizeof(char), \
BUFFER_MINIMUM_GROWTH, \
BUFFER_MAXIMUM_GROWTH)
typedef struct _diff_context_t {
bool include_all;
void (*callback)(
const char* path,
const size_t path_sz,
const bool left_present,
const uint8_t* left_checksum,
const uint8_t left_checksum_sz,
const uint8_t left_flags,
const bool right_present,
const uint8_t* right_checksum,
const uint8_t right_checksum_sz,
const uint8_t right_flags,
void* context);
void* context;
// used to build up the path
char* path_build_buffer;
size_t path_build_buffer_idx;
size_t path_build_buffer_sz;
} diff_context_t;
static diff_result_t diff_tree_helper(
const node_t* left,
const node_t* right,
diff_context_t* diff_context);
typedef enum {
CONSIDER_PROCESSED_LEFT,
CONSIDER_PROCESSED_RIGHT,
CONSIDER_PROCESSED_BOTH,
CONSIDER_PROCESSED_OOM,
CONSIDER_PROCESSED_WTF,
} consider_children_result_t;
/**
* Consider two nodes for diff. If one precedes the other lexicographically,
* then the later one is not processed. If they are identical
* lexicographically, but of are different types, then the leaf node is
* processed and the implicit node is not processed.
*
* If they are lexicographically identical and both are leaf nodes, then the
* callback is called. If they are lexicographically identical and both are
* implicit nodes, then we call diff_tree_helper(..).
*
* Returns a code indicating which node(s) are processed.
*/
consider_children_result_t consider_children(
const node_t* left_candidate,
const node_t* right_candidate,
diff_context_t* diff_context) {
// if there's two, then zero out the one that comes later in
// lexicographical order. if they are the same and they're of identical
// types, then both will continue on.
if (left_candidate != NULL && right_candidate != NULL) {
int order = name_compare(
left_candidate->name, left_candidate->name_sz, right_candidate);
if (order < 0) {
// left goes first, clear right
right_candidate = NULL;
} else if (order > 0) {
// right goes first, clear left
left_candidate = NULL;
} else if (
left_candidate->type == TYPE_LEAF &&
right_candidate->type != TYPE_LEAF) {
// identical types, left is a leaf node and right is not, so clear right.
right_candidate = NULL;
} else if (
left_candidate->type != TYPE_LEAF &&
right_candidate->type == TYPE_LEAF) {
// identical types, right is a leaf node and left is not, so clear left.
left_candidate = NULL;
}
}
// save the path index
size_t previous_path_index = diff_context->path_build_buffer_idx;
char* name;
size_t name_sz;
if (left_candidate != NULL) {
name = (char*)left_candidate->name;
name_sz = left_candidate->name_sz;
} else {
name = (char*)right_candidate->name;
name_sz = right_candidate->name_sz;
}
if (DIFF_EXPAND_TO_FIT(
&diff_context->path_build_buffer,
diff_context->path_build_buffer_idx,
&diff_context->path_build_buffer_sz,
name_sz) == false) {
return CONSIDER_PROCESSED_OOM;
}
memcpy(
&diff_context->path_build_buffer[diff_context->path_build_buffer_idx],
name,
name_sz);
diff_context->path_build_buffer_idx += name_sz;
if ((left_candidate != NULL && left_candidate->type == TYPE_IMPLICIT) ||
(right_candidate != NULL && right_candidate->type == TYPE_IMPLICIT)) {
// if one is a directory node, either the other one is NULL or
// also a directory node. in that case, descend into the subdirectory.
diff_result_t result =
diff_tree_helper(left_candidate, right_candidate, diff_context);
switch (result) {
case DIFF_OOM:
return CONSIDER_PROCESSED_OOM;
case DIFF_WTF:
return CONSIDER_PROCESSED_WTF;
default:;
}
} else if (
diff_context->include_all != false || left_candidate == NULL ||
right_candidate == NULL ||
left_candidate->flags != right_candidate->flags ||
left_candidate->checksum_sz != right_candidate->checksum_sz ||
memcmp(
left_candidate->checksum,
right_candidate->checksum,
left_candidate->checksum_sz) != 0) {
const uint8_t* left_checksum =
left_candidate != NULL ? left_candidate->checksum : NULL;
const uint8_t left_checksum_sz =
left_candidate != NULL ? left_candidate->checksum_sz : 0;
const uint8_t left_flags =
left_candidate != NULL ? left_candidate->flags : 0;
const uint8_t* right_checksum =
right_candidate != NULL ? right_candidate->checksum : NULL;
const uint8_t right_checksum_sz =
right_candidate != NULL ? right_candidate->checksum_sz : 0;
const uint8_t right_flags =
right_candidate != NULL ? right_candidate->flags : 0;
// either the two nodes are not identical, or we're being requested to
// include all the nodes.
diff_context->callback(
diff_context->path_build_buffer,
diff_context->path_build_buffer_idx,
left_candidate != NULL,
left_checksum,
left_checksum_sz,
left_flags,
right_candidate != NULL,
right_checksum,
right_checksum_sz,
right_flags,
diff_context->context);
}
// restore the old path write point.
diff_context->path_build_buffer_idx = previous_path_index;
if (left_candidate != NULL && right_candidate != NULL) {
return CONSIDER_PROCESSED_BOTH;
} else if (left_candidate != NULL) {
return CONSIDER_PROCESSED_LEFT;
} else {
return CONSIDER_PROCESSED_RIGHT;
}
}
/**
* Diff two nodes. One of the nodes may be NULL, and we must accommodate that
* possibility.
*/
static diff_result_t diff_tree_helper(
const node_t* left,
const node_t* right,
diff_context_t* diff_context) {
assert(
left == NULL || left->type == TYPE_ROOT || left->type == TYPE_IMPLICIT);
assert(
right == NULL || right->type == TYPE_ROOT ||
right->type == TYPE_IMPLICIT);
// if the two nodes have identical checksums and include_all is false, then
// we can return immediately.
if (diff_context->include_all == false && left != NULL &&
left->checksum_valid && right != NULL && right->checksum_valid &&
left->checksum_sz == right->checksum_sz &&
memcmp(left->checksum, right->checksum, left->checksum_sz) == 0) {
return DIFF_OK;
}
// now we need to merge the two nodes' children in lexicographical order.
for (size_t left_idx = 0, right_idx = 0;
(left != NULL && left_idx < left->num_children) ||
(right != NULL && right_idx < right->num_children);) {
// grab the candidates.
node_t* left_candidate = NULL;
node_t* right_candidate = NULL;
if (left != NULL && left_idx < left->num_children) {
if (!VERIFY_CHILD_NUM(left_idx)) {
return DIFF_WTF;
}
left_candidate = get_child_by_index(left, (child_num_t)left_idx);
assert(left_candidate->checksum_valid == true);
}
if (right != NULL && right_idx < right->num_children) {
if (!VERIFY_CHILD_NUM(right_idx)) {
return DIFF_WTF;
}
right_candidate = get_child_by_index(right, (child_num_t)right_idx);
assert(right_candidate->checksum_valid == true);
}
consider_children_result_t consider_children_result =
consider_children(left_candidate, right_candidate, diff_context);
switch (consider_children_result) {
case CONSIDER_PROCESSED_OOM:
return DIFF_OOM;
case CONSIDER_PROCESSED_WTF:
return DIFF_WTF;
default:
break;
}
if (consider_children_result == CONSIDER_PROCESSED_BOTH ||
consider_children_result == CONSIDER_PROCESSED_LEFT) {
left_idx++;
}
if (consider_children_result == CONSIDER_PROCESSED_BOTH ||
consider_children_result == CONSIDER_PROCESSED_RIGHT) {
right_idx++;
}
}
return DIFF_OK;
}
diff_result_t diff_trees(
tree_t* const left,
tree_t* const right,
bool include_all,
void (*callback)(
const char* path,
const size_t path_sz,
const bool left_present,
const uint8_t* left_checksum,
const uint8_t left_checksum_sz,
const uint8_t left_flags,
const bool right_present,
const uint8_t* right_checksum,
const uint8_t right_checksum_sz,
const uint8_t right_flags,
void* context),
void* context) {
update_checksums(left);
update_checksums(right);
node_t *left_shadow_root, *right_shadow_root;
node_t *left_real_root, *right_real_root;
left_shadow_root = left->shadow_root;
right_shadow_root = right->shadow_root;
if (left_shadow_root->num_children != 1 ||
right_shadow_root->num_children != 1) {
return DIFF_WTF;
}
left_real_root = get_child_by_index(left_shadow_root, 0);
right_real_root = get_child_by_index(right_shadow_root, 0);
diff_context_t diff_context = {include_all, callback, context};
diff_context.path_build_buffer = malloc(DEFAULT_BUILD_BUFFER_SZ);
diff_context.path_build_buffer_idx = 0;
diff_context.path_build_buffer_sz = DEFAULT_BUILD_BUFFER_SZ;
if (diff_context.path_build_buffer == NULL) {
return DIFF_OOM;
}
assert(left_real_root->checksum_valid == true);
assert(right_real_root->checksum_valid == true);
diff_result_t result =
diff_tree_helper(left_real_root, right_real_root, &diff_context);
free(diff_context.path_build_buffer);
return result;
}

View File

@ -1,446 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_diff_test.c: tests to verify tree_diff
//
// no-check-code
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "node.h"
#include "tests.h"
typedef struct {
const char* path;
const size_t path_sz;
const bool left_present;
uint32_t left_checksum_seed;
const uint8_t left_flags;
const bool right_present;
uint32_t right_checksum_seed;
const uint8_t right_flags;
} diff_expectation_t;
typedef struct {
diff_expectation_t* expectations;
size_t expectations_idx;
size_t expectations_sz;
} diff_expectations_t;
static void expectations_matcher(
const char* path,
const size_t path_sz,
const bool left_present,
const uint8_t* left_checksum,
const uint8_t left_checksum_sz,
const uint8_t left_flags,
const bool right_present,
const uint8_t* right_checksum,
const uint8_t right_checksum_sz,
const uint8_t right_flags,
void* context) {
uint8_t buffer[CHECKSUM_BYTES];
diff_expectations_t* expectations = (diff_expectations_t*)context;
ASSERT(expectations->expectations_idx < expectations->expectations_sz);
diff_expectation_t* expectation =
&expectations->expectations[expectations->expectations_idx];
ASSERT(expectation->path_sz == path_sz);
ASSERT(memcmp(expectation->path, path, path_sz) == 0);
ASSERT(expectation->left_present == left_present);
if (left_present) {
ASSERT(SHA1_BYTES == left_checksum_sz);
ASSERT(
memcmp(
int2sha1hash(expectation->left_checksum_seed, buffer),
left_checksum,
left_checksum_sz) == 0);
ASSERT(expectation->left_flags == left_flags);
}
ASSERT(expectation->right_present == right_present);
if (right_present) {
ASSERT(SHA1_BYTES == right_checksum_sz);
ASSERT(
memcmp(
int2sha1hash(expectation->right_checksum_seed, buffer),
right_checksum,
right_checksum_sz) == 0);
ASSERT(expectation->right_flags == right_flags);
}
expectations->expectations_idx++;
}
static void diff_empty_trees() {
tree_t* left = alloc_tree();
tree_t* right = alloc_tree();
diff_expectation_t expectation_array[] = {};
diff_expectations_t expectations = {
expectation_array,
0,
sizeof(expectation_array) / sizeof(diff_expectation_t)};
ASSERT(
diff_trees(left, right, false, expectations_matcher, &expectations) ==
DIFF_OK);
ASSERT(expectations.expectations_idx == expectations.expectations_sz);
ASSERT(
diff_trees(left, right, true, expectations_matcher, &expectations) ==
DIFF_OK);
ASSERT(expectations.expectations_idx == expectations.expectations_sz);
}
/**
* Diff two identical trees.
*/
static void diff_identical_trees() {
tree_t* left = alloc_tree();
tree_t* right = alloc_tree();
add_to_tree_t toadd[] = {
{STRPLUSLEN("abc"), 12345, 5},
{STRPLUSLEN("ab/cdef/ghi"), 44252, 22},
{STRPLUSLEN("ab/cdef/g/hi"), 112123, 64},
{STRPLUSLEN("ab/cdef/g/hij"), 54654, 58},
{STRPLUSLEN("ab/cdef/gh/ijk"), 45645105, 65},
{STRPLUSLEN("ab/cdef/gh/i"), 5464154, 4},
};
add_to_tree(left, toadd, sizeof(toadd) / sizeof(add_to_tree_t));
add_to_tree(right, toadd, sizeof(toadd) / sizeof(add_to_tree_t));
diff_expectation_t normal_expectation_array[] = {};
diff_expectations_t normal_expectations = {
normal_expectation_array,
0,
sizeof(normal_expectation_array) / sizeof(diff_expectation_t)};
ASSERT(
diff_trees(
left, right, false, expectations_matcher, &normal_expectations) ==
DIFF_OK);
ASSERT(
normal_expectations.expectations_idx ==
normal_expectations.expectations_sz);
diff_expectation_t include_all_expectation_array[] = {
{
STRPLUSLEN("ab/cdef/g/hi"),
true,
112123,
64,
true,
112123,
64,
},
{
STRPLUSLEN("ab/cdef/g/hij"),
true,
54654,
58,
true,
54654,
58,
},
{
STRPLUSLEN("ab/cdef/gh/i"),
true,
5464154,
4,
true,
5464154,
4,
},
{
STRPLUSLEN("ab/cdef/gh/ijk"),
true,
45645105,
65,
true,
45645105,
65,
},
{
STRPLUSLEN("ab/cdef/ghi"),
true,
44252,
22,
true,
44252,
22,
},
{
STRPLUSLEN("abc"),
true,
12345,
5,
true,
12345,
5,
},
};
diff_expectations_t include_all_expectations = {
include_all_expectation_array,
0,
sizeof(include_all_expectation_array) / sizeof(diff_expectation_t)};
ASSERT(
diff_trees(
left, right, true, expectations_matcher, &include_all_expectations) ==
DIFF_OK);
ASSERT(
include_all_expectations.expectations_idx ==
include_all_expectations.expectations_sz);
}
/**
* Diff two trees with no identical names in the same directory.
*/
static void diff_no_identical_trees() {
tree_t* left = alloc_tree();
tree_t* right = alloc_tree();
add_to_tree_t toadd_left[] = {
{STRPLUSLEN("ab/cdef/ghi_left"), 44252, 22},
{STRPLUSLEN("ab/cdef/g/hi_left"), 112123, 64},
{STRPLUSLEN("ab/cdef/g/hij_left"), 54654, 58},
};
add_to_tree_t toadd_right[] = {
{STRPLUSLEN("ab/cdef/ghi_right"), 44252, 22},
{STRPLUSLEN("ab/cdef/g/hi_right"), 112123, 64},
{STRPLUSLEN("ab/cdef/g/hij_right"), 54654, 58},
};
add_to_tree(left, toadd_left, sizeof(toadd_left) / sizeof(add_to_tree_t));
add_to_tree(right, toadd_right, sizeof(toadd_right) / sizeof(add_to_tree_t));
diff_expectation_t expectation_array[] = {
{
STRPLUSLEN("ab/cdef/g/hi_left"),
true,
112123,
64,
false,
0,
0,
},
{
STRPLUSLEN("ab/cdef/g/hi_right"),
false,
0,
0,
true,
112123,
64,
},
{
STRPLUSLEN("ab/cdef/g/hij_left"),
true,
54654,
58,
false,
0,
0,
},
{
STRPLUSLEN("ab/cdef/g/hij_right"),
false,
0,
0,
true,
54654,
58,
},
{
STRPLUSLEN("ab/cdef/ghi_left"),
true,
44252,
22,
false,
0,
0,
},
{
STRPLUSLEN("ab/cdef/ghi_right"),
false,
0,
0,
true,
44252,
22,
},
};
diff_expectations_t normal_expectations = {
expectation_array,
0,
sizeof(expectation_array) / sizeof(diff_expectation_t)};
ASSERT(
diff_trees(
left, right, false, expectations_matcher, &normal_expectations) ==
DIFF_OK);
ASSERT(
normal_expectations.expectations_idx ==
normal_expectations.expectations_sz);
diff_expectations_t include_all_expectations = {
expectation_array,
0,
sizeof(expectation_array) / sizeof(diff_expectation_t)};
ASSERT(
diff_trees(
left, right, true, expectations_matcher, &include_all_expectations) ==
DIFF_OK);
ASSERT(
include_all_expectations.expectations_idx ==
include_all_expectations.expectations_sz);
}
/**
* Diff two trees with a leaf vs implicit node difference.
*/
static void diff_different_types_trees() {
tree_t* left = alloc_tree();
tree_t* right = alloc_tree();
add_to_tree_t toadd_left[] = {
{STRPLUSLEN("ab/cdef/ghi_left"), 44252, 22},
};
add_to_tree_t toadd_right[] = {
{STRPLUSLEN("ab/cdef"), 44252, 22},
};
add_to_tree(left, toadd_left, sizeof(toadd_left) / sizeof(add_to_tree_t));
add_to_tree(right, toadd_right, sizeof(toadd_right) / sizeof(add_to_tree_t));
diff_expectation_t expectation_array[] = {
{
STRPLUSLEN("ab/cdef"),
false,
0,
0,
true,
44252,
22,
},
{
STRPLUSLEN("ab/cdef/ghi_left"),
true,
44252,
22,
false,
0,
0,
},
};
diff_expectations_t normal_expectations = {
expectation_array,
0,
sizeof(expectation_array) / sizeof(diff_expectation_t)};
ASSERT(
diff_trees(
left, right, false, expectations_matcher, &normal_expectations) ==
DIFF_OK);
ASSERT(
normal_expectations.expectations_idx ==
normal_expectations.expectations_sz);
diff_expectations_t include_all_expectations = {
expectation_array,
0,
sizeof(expectation_array) / sizeof(diff_expectation_t)};
ASSERT(
diff_trees(
left, right, true, expectations_matcher, &include_all_expectations) ==
DIFF_OK);
ASSERT(
include_all_expectations.expectations_idx ==
include_all_expectations.expectations_sz);
}
/**
* Diff two trees with differences in the metadata.
*/
static void diff_different_metadata() {
tree_t* left = alloc_tree();
tree_t* right = alloc_tree();
add_to_tree_t toadd_left[] = {
{STRPLUSLEN("ab/cdef"), 44253, 22},
{STRPLUSLEN("ab/cdefg"), 44252, 23},
};
add_to_tree_t toadd_right[] = {
{STRPLUSLEN("ab/cdef"), 44252, 22},
{STRPLUSLEN("ab/cdefg"), 44252, 22},
};
add_to_tree(left, toadd_left, sizeof(toadd_left) / sizeof(add_to_tree_t));
add_to_tree(right, toadd_right, sizeof(toadd_right) / sizeof(add_to_tree_t));
diff_expectation_t expectation_array[] = {
{
STRPLUSLEN("ab/cdef"),
true,
44253,
22,
true,
44252,
22,
},
{
STRPLUSLEN("ab/cdefg"),
true,
44252,
23,
true,
44252,
22,
},
};
diff_expectations_t normal_expectations = {
expectation_array,
0,
sizeof(expectation_array) / sizeof(diff_expectation_t)};
ASSERT(
diff_trees(
left, right, false, expectations_matcher, &normal_expectations) ==
DIFF_OK);
ASSERT(
normal_expectations.expectations_idx ==
normal_expectations.expectations_sz);
diff_expectations_t include_all_expectations = {
expectation_array,
0,
sizeof(expectation_array) / sizeof(diff_expectation_t)};
ASSERT(
diff_trees(
left, right, true, expectations_matcher, &include_all_expectations) ==
DIFF_OK);
ASSERT(
include_all_expectations.expectations_idx ==
include_all_expectations.expectations_sz);
}
int main(int argc, char* argv[]) {
diff_empty_trees();
diff_identical_trees();
diff_no_identical_trees();
diff_different_types_trees();
diff_different_metadata();
return 0;
}

View File

@ -1,372 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_disk.c: methods to persist to and restore from disk.
//
// no-check-code
#include <errno.h>
#include <memory.h>
#include <stdio.h>
#include <stdlib.h>
#include "checksum.h"
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "lib/clib/portability/inet.h"
#include "node.h"
#include "tree_arena.h"
// FILE FORMAT
//
// UNLESS OTHERWISE NOTED, NUMERICAL FIELDS ARE IN HOST WORD ORDER.
//
// offset length description
// 0 9 fasttree\0
// 9 1 byte order (1 = little endian, 2 = big endian)
// 10 1 address size
// 11 1 <unused>
// 12 4 file format version
// 16 8 file length in bytes
// 24 4 header length in bytes
// 28 4 num_leaf_nodes (see tree.h)
// 32 size_t consumed_memory (see tree.h)
// 32+size_t ptrdiff_t offset to find the true root
// 32+size_t+ * tree data
// ptrdiff_t
//
// the arena must be allocated with at least <file length> - <header length>
// bytes.
typedef struct _v0_header_t {
#define MAGIC "fasttree"
char magic[sizeof(MAGIC)];
#define BYTE_ORDER_LITTLE_ENDIAN 1
#define BYTE_ORDER_BIG_ENDIAN 2
uint8_t byte_order;
uint8_t address_size;
#define FILE_VERSION 0
uint32_t file_version;
uint64_t file_sz;
uint32_t header_sz;
uint32_t num_leaf_nodes;
size_t consumed_memory;
ptrdiff_t root_offset;
} v0_header_t;
#define LITTLE_ENDIAN_TEST_VALUE 0x01020304
/**
* Returns true iff the host is little endian.
*/
static inline bool little_endian(void) {
int foo = LITTLE_ENDIAN_TEST_VALUE;
if (ntohl(foo) == LITTLE_ENDIAN_TEST_VALUE) {
return false;
}
return true;
}
/**
* Returns the size, in bytes, of the host pointer.
*/
static inline uint8_t host_pointer_size(void) {
return sizeof(void*);
}
static inline size_t read_noint(FILE* fh, void* _buf, size_t nbytes) {
char* buf = (char*)_buf;
size_t read_so_far = 0;
while (read_so_far < nbytes) {
size_t read_this_iteration =
fread(buf + read_so_far, 1, nbytes - read_so_far, fh);
read_so_far += read_this_iteration;
if (read_so_far != nbytes) {
if (feof(fh) || ferror(fh) != EINTR) {
// we reached the end of the file, or we received an error that's not
// EINTR.
break;
}
}
}
return read_so_far;
}
#define CHECKED_READ(fh, ptr, nbytes) \
{ \
size_t __nbytes = nbytes; \
if (read_noint(fh, ptr, __nbytes) != __nbytes) { \
result.code = READ_FROM_FILE_WTF; \
goto cleanup; \
} \
}
read_from_file_result_t read_from_file(char* fname, size_t fname_sz) {
char* fname_dst = malloc(fname_sz + 1);
if (fname_dst == NULL) {
return COMPOUND_LITERAL(read_from_file_result_t){
READ_FROM_FILE_OOM, 0, NULL};
}
memcpy(fname_dst, fname, fname_sz);
fname_dst[fname_sz] = '\x00';
read_from_file_result_t result = {0};
FILE* fh = fopen(fname_dst, "rb");
if (fh == NULL) {
result.err = errno;
result.code = READ_FROM_FILE_NOT_READABLE;
goto cleanup;
}
v0_header_t header;
CHECKED_READ(fh, &header, sizeof(v0_header_t));
if (memcmp(header.magic, MAGIC, sizeof(MAGIC)) != 0) {
result.code = READ_FROM_FILE_WTF;
goto cleanup;
}
// endianness
if (little_endian()) {
if (header.byte_order != BYTE_ORDER_LITTLE_ENDIAN) {
result.code = READ_FROM_FILE_NOT_USABLE;
goto cleanup;
}
} else {
if (header.byte_order != BYTE_ORDER_BIG_ENDIAN) {
result.code = READ_FROM_FILE_NOT_USABLE;
goto cleanup;
}
}
// host pointer size
if (header.address_size != host_pointer_size()) {
result.code = READ_FROM_FILE_NOT_USABLE;
goto cleanup;
}
// file version.
if (header.file_version != FILE_VERSION) {
result.code = READ_FROM_FILE_NOT_USABLE;
goto cleanup;
}
// at this point, the file offset should == header_sz
if (ftell(fh) != header.header_sz) {
result.code = READ_FROM_FILE_WTF;
goto cleanup;
}
if (header.file_sz - header.header_sz > SIZE_MAX) {
result.code = READ_FROM_FILE_WTF;
goto cleanup;
}
size_t arena_sz = (size_t)(header.file_sz - header.header_sz);
// allocate the tree
result.tree = alloc_tree_with_arena(arena_sz);
if (result.tree == NULL) {
result.code = READ_FROM_FILE_OOM;
goto cleanup;
}
// read the tree
CHECKED_READ(fh, result.tree->arena, arena_sz);
// find the real root and parent it to shadow root.
node_t* real_root =
(node_t*)(((intptr_t)result.tree->arena) + header.root_offset);
add_child(result.tree->shadow_root, real_root);
// write all the stats into place.
result.tree->arena_sz = arena_sz;
result.tree->arena_free_start =
(void*)((char*)result.tree->arena + result.tree->arena_sz);
result.tree->compacted = true;
result.tree->consumed_memory = header.consumed_memory;
result.tree->num_leaf_nodes = header.num_leaf_nodes;
result.code = READ_FROM_FILE_OK;
cleanup:
if (result.code != READ_FROM_FILE_OK && result.tree != NULL) {
destroy_tree(result.tree);
}
if (fh != NULL) {
fclose(fh);
}
free(fname_dst);
return result;
}
static inline size_t write_noint(FILE* fh, void* _buf, size_t nbytes) {
char* buf = (char*)_buf;
size_t written_so_far = 0;
while (written_so_far < nbytes) {
size_t written_this_iteration =
fwrite(buf + written_so_far, 1, nbytes - written_so_far, fh);
written_so_far += written_this_iteration;
if (written_so_far != nbytes) {
// came up short. it has to be some sort of error. if it's not EINTR,
// we give up.
if (ferror(fh) != EINTR) {
break;
}
}
}
return written_so_far;
}
#define CHECKED_WRITE(fh, ptr, nbytes) \
{ \
size_t __nbytes = (size_t)nbytes; \
if (write_noint(fh, ptr, __nbytes) != __nbytes) { \
result = WRITE_TO_FILE_WTF; \
goto cleanup; \
} \
}
static write_to_file_result_t
write_compact_tree_to_file(tree_t* tree, char* fname, size_t fname_sz) {
if (tree->compacted == false) {
return WRITE_TO_FILE_WTF;
}
char* fname_dst = malloc(fname_sz + 1);
if (fname_dst == NULL) {
return WRITE_TO_FILE_OOM;
}
memcpy(fname_dst, fname, fname_sz);
fname_dst[fname_sz] = '\x00';
FILE* fh = fopen(fname_dst, "wb");
write_to_file_result_t result;
if (fh == NULL) {
result = WRITE_TO_FILE_WTF;
goto cleanup;
}
v0_header_t header;
memset(&header, 0, sizeof(header)); // keeping valgrind happy.
header.header_sz = sizeof(v0_header_t);
size_t used_size = (char*)tree->arena_free_start - (char*)tree->arena;
header.file_sz = header.header_sz + used_size;
memcpy(header.magic, MAGIC, sizeof(MAGIC));
header.byte_order =
little_endian() ? BYTE_ORDER_LITTLE_ENDIAN : BYTE_ORDER_BIG_ENDIAN;
header.address_size = host_pointer_size();
header.file_version = FILE_VERSION;
header.num_leaf_nodes = tree->num_leaf_nodes;
header.consumed_memory = tree->consumed_memory;
intptr_t real_root_ptr = (intptr_t)get_child_by_index(tree->shadow_root, 0);
ptrdiff_t ptrdiff = real_root_ptr - ((intptr_t)tree->arena);
header.root_offset = ptrdiff;
CHECKED_WRITE(fh, &header, sizeof(v0_header_t));
CHECKED_WRITE(fh, tree->arena, used_size);
result = WRITE_TO_FILE_OK;
cleanup:
if (fh != NULL) {
fclose(fh);
}
free(fname_dst);
return result;
}
/**
* This is a highly implementation dependent mechanism to initialize the
* padding bytes. Otherwise valgrind will freak out over the uninitialized
* padding bytes getting written to disk.
*/
static void initialize_unused_bytes(node_t* node) {
// initializes any unused checksum bytes.
memset(
&node->checksum[node->checksum_sz],
0,
CHECKSUM_BYTES - node->checksum_sz);
// flags for root nodes are not typically initialized
if (node->type == TYPE_ROOT) {
node->flags = 0;
}
// initialize the remaining bits in the bitfield
node->unused = 0;
void* name_start = &node->name;
intptr_t start_address = (intptr_t)name_start;
start_address += node->name_sz;
intptr_t end_address = start_address + sizeof(ptrdiff_t) - 1;
end_address &= ~((intptr_t)(sizeof(ptrdiff_t) - 1));
// initializes the padding between the end of the name and the start of the
// child pointers.
memset((void*)start_address, 0, end_address - start_address);
// find all the children and make sure their checksums are up-to-date.
for (child_num_t ix = 0; ix < node->num_children; ix++) {
node_t* child = get_child_by_index(node, ix);
initialize_unused_bytes(child);
}
}
/**
* Writes a tree to a file.
*/
write_to_file_result_t write_to_file_helper(
tree_t* tree,
char* fname,
size_t fname_sz,
bool initialize_padding) {
// update the checksums first.
update_checksums(tree);
if (tree->compacted) {
if (initialize_padding) {
initialize_unused_bytes(get_child_by_index(tree->shadow_root, 0));
}
return write_compact_tree_to_file(tree, fname, fname_sz);
}
// Note that there is a probably a significant opportunity for improving the
// performance by using the bottom-up tree construction strategy used in
// convert_from_flat(..) to write a non-compact tree straight to disk. This
// is the naive implementation that simply does a tree_copy to construct a
// compact tree.
tree_t* compact_copy = copy_tree(tree);
if (compact_copy == NULL) {
return WRITE_TO_FILE_OOM;
}
if (initialize_padding) {
initialize_unused_bytes(get_child_by_index(compact_copy->shadow_root, 0));
}
write_to_file_result_t result =
write_compact_tree_to_file(compact_copy, fname, fname_sz);
destroy_tree(compact_copy);
return result;
}
write_to_file_result_t
write_to_file(tree_t* tree, char* fname, size_t fname_sz) {
return write_to_file_helper(tree, fname, fname_sz, false);
}

View File

@ -1,103 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_disk_test.c: tests to verify tree_disk
//
// no-check-code
#include <stdlib.h>
#include <unistd.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "tests.h"
#define TMPFILE_TEMPLATE "/tmp/tree_disk_test.XXXXXX"
// this is defined in tree_disk.c. it's public but not advertised.
extern write_to_file_result_t write_to_file_helper(
tree_t* tree,
char* fname,
size_t fname_sz,
bool initialize_padding);
static char* get_tempfile() {
char* template = strdup(TMPFILE_TEMPLATE);
ASSERT(template != NULL);
memcpy(template, TMPFILE_TEMPLATE, sizeof(TMPFILE_TEMPLATE));
int fd = mkstemp(template);
ASSERT(fd != -1);
close(fd);
return template;
}
/**
* A diff callback that should never be called.
*/
static void never_called_callback(
const char* path,
const size_t path_sz,
const bool left_present,
const uint8_t* left_checksum,
const uint8_t left_checksum_sz,
const uint8_t left_flags,
const bool right_present,
const uint8_t* right_checksum,
const uint8_t right_checksum_sz,
const uint8_t right_flags,
void* context) {
ASSERT(false);
}
static void save_load_empty_tree() {
tree_t* tree = alloc_tree();
char* tempfile = get_tempfile();
write_to_file_result_t write_result =
write_to_file_helper(tree, STRPLUSLEN(tempfile), true);
ASSERT(write_result == WRITE_TO_FILE_OK);
read_from_file_result_t read_result = read_from_file(STRPLUSLEN(tempfile));
ASSERT(read_result.code == READ_FROM_FILE_OK);
diff_result_t diff_result =
diff_trees(tree, read_result.tree, false, never_called_callback, NULL);
ASSERT(diff_result == DIFF_OK);
}
static void save_load_small_tree() {
tree_t* tree = alloc_tree();
add_to_tree_t toadd[] = {
{STRPLUSLEN("abc"), 12345, 5},
{STRPLUSLEN("ab/cdef/gh"), 64342, 55},
{STRPLUSLEN("ab/cdef/ghi/jkl"), 51545, 57},
{STRPLUSLEN("ab/cdef/ghi/jklm"), 54774, 12},
{STRPLUSLEN("ab/cdef/ghi/jklmn"), 48477, 252},
{STRPLUSLEN("a"), 577, 14},
};
add_to_tree(tree, toadd, sizeof(toadd) / sizeof(add_to_tree_t));
char* tempfile = get_tempfile();
write_to_file_result_t write_result =
write_to_file_helper(tree, STRPLUSLEN(tempfile), true);
ASSERT(write_result == WRITE_TO_FILE_OK);
read_from_file_result_t read_result = read_from_file(STRPLUSLEN(tempfile));
ASSERT(read_result.code == READ_FROM_FILE_OK);
diff_result_t diff_result =
diff_trees(tree, read_result.tree, false, never_called_callback, NULL);
ASSERT(diff_result == DIFF_OK);
}
int main(int argc, char* argv[]) {
save_load_empty_tree();
save_load_small_tree();
return 0;
}

View File

@ -1,50 +0,0 @@
//
// tree_dump: Load a tree from disk. Then print all the node hashes along
// with the length of the name and the number of children.
//
// no-check-code
#include <stdlib.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "lib/clib/convert.h"
#include "tests.h"
static char buffer[SHA1_BYTES * 2];
void print_subtree(node_t* node) {
hexlify(node->checksum, node->checksum_sz, buffer);
printf(
"%.*s\t%d\t%d\n",
node->checksum_sz * 2,
buffer,
node->name_sz,
node->num_children);
for (uint32_t ix = 0; ix < node->num_children; ix++) {
print_subtree(get_child_by_index(node, ix));
}
}
int main(int argc, char* argv[]) {
if (argc < 2) {
fprintf(stderr, "Usage: %s <tree-save-file>\n", argv[0]);
exit(1);
}
read_from_file_result_t read_from_file_result =
read_from_file(argv[1], strlen(argv[1]));
if (read_from_file_result.code != READ_FROM_FILE_OK) {
fprintf(stderr, "Unable to read tree file %s\n", argv[1]);
exit(1);
}
tree_t* tree = read_from_file_result.tree;
node_t* shadow_root = tree->shadow_root;
node_t* real_root = get_child_by_index(shadow_root, 0);
print_subtree(real_root);
return 0;
}

View File

@ -1,101 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_iterate_rt.c: simple benchmark for converting a flat manifest to a tree
// and then back into a flat manifest through iteration.
//
// no-check-code
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "lib/clib/convert.h"
#include "node.h"
int main(int argc, char* argv[]) {
if (argc < 3) {
fprintf(stderr, "Usage: %s <manifestfile> <outputfile>\n", argv[0]);
exit(1);
}
FILE* fh = fopen(argv[1], "r");
if (fh == NULL) {
fprintf(stderr, "Error: cannot open %s\n", argv[1]);
exit(1);
}
FILE* ofh = fopen(argv[2], "w");
if (ofh == NULL) {
fprintf(stderr, "Error: cannot open %s\n", argv[2]);
exit(1);
}
fseeko(fh, 0, SEEK_END);
off_t length = ftello(fh);
rewind(fh);
char* flatmanifest = malloc(length);
if (flatmanifest == NULL) {
fprintf(stderr, "Error: cannot allocate memory for reading %s\n", argv[1]);
exit(1);
}
if (fread(flatmanifest, length, 1, fh) != 1) {
fprintf(stderr, "Error: cannot read %s\n", argv[1]);
exit(1);
}
struct timeval before_from, after_from;
gettimeofday(&before_from, NULL);
convert_from_flat_result_t from_flat =
convert_from_flat(flatmanifest, length);
gettimeofday(&after_from, NULL);
if (from_flat.code != CONVERT_FROM_FLAT_OK) {
fprintf(stderr, "Error: converting from flat manifest\n");
exit(1);
}
struct timeval before_to, after_to;
gettimeofday(&before_to, NULL);
iterator_t* iterator = create_iterator(from_flat.tree, true);
char sha_ascii[SHA1_BYTES * 2];
while (true) {
iterator_result_t iterator_result = iterator_next(iterator);
if (iterator_result.valid == false) {
break;
}
hexlify(iterator_result.checksum, SHA1_BYTES, sha_ascii);
fwrite(iterator_result.path, iterator_result.path_sz, 1, ofh);
fputc(0, ofh);
fwrite(sha_ascii, SHA1_BYTES * 2, 1, ofh);
if (iterator_result.flags != 0) {
fputc(iterator_result.flags, ofh);
}
fputc('\n', ofh);
}
gettimeofday(&after_to, NULL);
fclose(ofh);
uint64_t usecs_before_from =
before_from.tv_sec * 1000000 + before_from.tv_usec;
uint64_t usecs_after_from = after_from.tv_sec * 1000000 + after_from.tv_usec;
uint64_t usecs_before_to = before_to.tv_sec * 1000000 + before_to.tv_usec;
uint64_t usecs_after_to = after_to.tv_sec * 1000000 + after_to.tv_usec;
printf(
"flat -> tree: %" PRIu64 " us\n", (usecs_after_from - usecs_before_from));
printf(
"tree -> iterater -> flat: %" PRIu64 " us\n",
(usecs_after_to - usecs_before_to));
printf(
"tree consumed memory: %" PRIuPTR "\n", from_flat.tree->consumed_memory);
}

View File

@ -1,198 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_iterator.c: implementation for traversing all the nodes of a tree
// in-order.
//
// no-check-code
#include <stdlib.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "node.h"
#include "path_buffer.h"
#include "tree_iterator.h"
#define DEFAULT_PATH_RECORDS_SZ 1024
iterator_t* create_iterator(const tree_t* tree, bool construct_paths) {
iterator_t* result = malloc(sizeof(iterator_t));
path_record_t* path_records =
malloc(sizeof(path_record_t) * DEFAULT_PATH_RECORDS_SZ);
char* path = malloc(DEFAULT_PATH_BUFFER_SZ);
if (result == NULL || path_records == NULL || path == NULL ||
(result->copy = copy_tree(tree)) == NULL) {
goto fail;
}
// success!
result->path_records = path_records;
result->path_records_idx = 0;
result->path = path;
result->path_idx = 0;
result->path_sz = DEFAULT_PATH_BUFFER_SZ;
result->construct_paths = construct_paths;
return result;
fail:
if (result != NULL) {
if (result->copy != NULL) {
destroy_tree(result->copy);
}
free(result);
}
free(path_records);
free(path);
return NULL;
}
typedef enum {
ITERATOR_FOUND,
ITERATOR_NOT_FOUND,
ITERATOR_OOM,
ITERATOR_ERROR,
} iterator_progress_t;
static iterator_progress_t iterator_find_next(iterator_t* iterator) {
if (iterator->path_records_idx == DEFAULT_PATH_RECORDS_SZ) {
// we've traversed too deep.
abort();
}
while (iterator->path_records_idx > 0) {
size_t read_idx = iterator->path_records_idx - 1;
if (iterator->path_records[read_idx].child_idx <
iterator->path_records[read_idx].node->num_children) {
if (!VERIFY_CHILD_NUM(iterator->path_records[read_idx].child_idx)) {
return ITERATOR_ERROR;
}
node_t* candidate = get_child_by_index(
iterator->path_records[read_idx].node,
(child_num_t)iterator->path_records[read_idx].child_idx);
if (iterator->construct_paths && candidate->type != TYPE_ROOT) {
// if it's not a root node, we need to slap on the name.
if (PATH_APPEND(
&iterator->path,
&iterator->path_idx,
&iterator->path_sz,
candidate->name,
candidate->name_sz) == false) {
return ITERATOR_OOM;
}
}
// if it's a leaf node, we have the name already added to the path if
// required. remember where we are so we can continue.
if (candidate->type == TYPE_LEAF) {
return ITERATOR_FOUND;
}
// has to either be TYPE_IMPLICIT or TYPE_ROOT at this point. set up
// the next path record and descend into the directory.
iterator->path_records[iterator->path_records_idx].node = candidate;
iterator->path_records[iterator->path_records_idx].child_idx = 0;
iterator->path_records[iterator->path_records_idx].previous_path_idx =
iterator->path_idx;
iterator->path_records_idx++;
// start at the top of the while loop again.
continue;
}
// done considering all the children at this level, pop off a path record
// and continue.
iterator->path_records_idx--;
// if we have parents, we should restore the state
if (iterator->path_records_idx > 0) {
// path_record_idx is where we write the *next* record, so we have to go
// back up one more record.
size_t parent_idx = iterator->path_records_idx - 1;
iterator->path_idx = iterator->path_records[parent_idx].previous_path_idx;
iterator->path_records[parent_idx].child_idx++;
}
}
return ITERATOR_NOT_FOUND;
}
iterator_result_t iterator_next(iterator_t* iterator) {
// special case: if we haven't started iterating yet, then there will be no
// path records.
if (iterator->path_records_idx == 0) {
// search for the first leaf node.
const node_t* search_start =
get_child_by_index(iterator->copy->shadow_root, 0);
// record the progress into the iterator struct
iterator->path_records[0].node = search_start;
iterator->path_records[0].child_idx = 0;
iterator->path_records[0].previous_path_idx = 0;
// at the start, reads come from 0, writes go to 1.
iterator->path_records_idx = 1;
} else {
size_t read_idx = iterator->path_records_idx - 1;
iterator->path_records[read_idx].child_idx++;
// truncate the path up to the last directory.
iterator->path_idx = iterator->path_records[read_idx].previous_path_idx;
}
iterator_progress_t progress = iterator_find_next(iterator);
iterator_result_t result;
if (progress == ITERATOR_FOUND) {
size_t read_idx = iterator->path_records_idx - 1;
path_record_t* record = &iterator->path_records[read_idx];
if (!VERIFY_CHILD_NUM(record->child_idx)) {
abort();
}
node_t* child =
get_child_by_index(record->node, (child_num_t)record->child_idx);
result.valid = true;
if (iterator->construct_paths) {
result.path = iterator->path;
result.path_sz = iterator->path_idx;
} else {
// strictly these shouldn't be necessary, because we only read these
// fields if we succeed, and that code path does set the fields. however,
// gcc doesn't know that and throws a fit.
result.path = NULL;
result.path_sz = 0;
}
result.checksum = child->checksum;
result.checksum_sz = child->checksum_sz;
result.flags = child->flags;
} else {
result.valid = false;
// strictly these shouldn't be necessary, because we only read these fields
// if we succeed, and that code path does set the fields. however, gcc
// doesn't know that and throws a fit.
result.path = NULL;
result.path_sz = 0;
result.checksum = NULL;
result.checksum_sz = 0;
result.flags = 0;
}
return result;
}
void destroy_iterator(iterator_t* iterator) {
destroy_tree(iterator->copy);
free(iterator->path_records);
free(iterator->path);
free(iterator);
}

View File

@ -1,43 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_iterator.c: declarations for traversing all the nodes of a tree
// in-order.
//
// no-check-code
#ifndef __FASTMANIFEST_TREE_ITERATOR_H__
#define __FASTMANIFEST_TREE_ITERATOR_H__
#include <stdbool.h>
#include <stdlib.h>
#include "node.h"
typedef struct _path_record_t {
const node_t* node;
size_t child_idx;
// this is how much of the path was already present when we started walking
// this node. once we close this path, we should restore the iterator's
// path_idx to this. value.
size_t previous_path_idx;
} path_record_t;
struct _iterator_t {
tree_t* copy;
bool construct_paths;
// track where we are in the iteration process.
path_record_t* path_records;
// this is where the next path record should be written to.
size_t path_records_idx;
// track the path, if path construction is requested.
char* path;
size_t path_idx;
size_t path_sz;
};
#endif // #ifndef __FASTMANIFEST_TREE_ITERATOR_H__

View File

@ -1,135 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_iterator_test.c: tests for traversing all the nodes of a tree in-order.
//
// no-check-code
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "tests.h"
#include "tree_iterator.h"
typedef struct _iterator_expectations_t {
char* path;
size_t path_sz;
bool path_present;
uint32_t checksum_primer;
uint8_t flags;
} iterator_expectations_t;
static bool match_expectations(
iterator_t* iterator,
iterator_expectations_t* expectations,
size_t expectations_sz) {
size_t ix = 0;
uint8_t expected_checksum[SHA1_BYTES];
while (true) {
iterator_result_t result = iterator_next(iterator);
if (result.valid == false) {
break;
}
if (ix >= expectations_sz) {
return false;
}
iterator_expectations_t* expectation = &expectations[ix];
ix++;
if (expectation->path_present &&
(expectation->path_sz != result.path_sz ||
memcmp(expectation->path, result.path, expectation->path_sz) != 0)) {
return false;
}
// prime the expected checksum
int2sha1hash(expectation->checksum_primer, expected_checksum);
if (SHA1_BYTES != result.checksum_sz ||
memcmp(expected_checksum, result.checksum, SHA1_BYTES) != 0) {
return false;
}
}
return (ix == expectations_sz);
}
void test_empty_tree() {
tree_t* tree = alloc_tree();
iterator_t* iterator = create_iterator(tree, false);
iterator_expectations_t expectations[] = {};
ASSERT(match_expectations(
iterator,
expectations,
sizeof(expectations) / sizeof(iterator_expectations_t)));
destroy_iterator(iterator);
destroy_tree(tree);
}
void test_simple_tree() {
tree_t* tree = alloc_tree();
add_to_tree_t toadd[] = {
{STRPLUSLEN("abc"), 12345, 5},
};
add_to_tree(tree, toadd, sizeof(toadd) / sizeof(add_to_tree_t));
iterator_t* iterator = create_iterator(tree, true);
iterator_expectations_t expectations[] = {
{STRPLUSLEN("abc"), true, 12345, 5}};
ASSERT(match_expectations(
iterator,
expectations,
sizeof(expectations) / sizeof(iterator_expectations_t)));
destroy_iterator(iterator);
destroy_tree(tree);
}
void test_complicated_tree() {
tree_t* tree = alloc_tree();
add_to_tree_t toadd[] = {
{STRPLUSLEN("abc"), 12345, 5},
{STRPLUSLEN("ab/cdef/gh"), 64342, 55},
{STRPLUSLEN("ab/cdef/ghi/jkl"), 51545, 57},
{STRPLUSLEN("ab/cdef/ghi/jklm"), 54774, 12},
{STRPLUSLEN("ab/cdef/ghi/jklmn"), 48477, 252},
{STRPLUSLEN("a"), 577, 14},
};
add_to_tree(tree, toadd, sizeof(toadd) / sizeof(add_to_tree_t));
iterator_t* iterator = create_iterator(tree, true);
iterator_expectations_t expectations[] = {
{STRPLUSLEN("a"), true, 577, 14},
{STRPLUSLEN("ab/cdef/gh"), true, 64342, 55},
{STRPLUSLEN("ab/cdef/ghi/jkl"), true, 51545, 57},
{STRPLUSLEN("ab/cdef/ghi/jklm"), true, 54774, 12},
{STRPLUSLEN("ab/cdef/ghi/jklmn"), true, 48477, 252},
{STRPLUSLEN("abc"), true, 12345, 5},
};
ASSERT(match_expectations(
iterator,
expectations,
sizeof(expectations) / sizeof(iterator_expectations_t)));
destroy_iterator(iterator);
}
int main(int argc, char* argv[]) {
test_empty_tree();
test_simple_tree();
test_complicated_tree();
return 0;
}

View File

@ -1,250 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_path.h: implementation for the core path function for parsing and
// traversing a path through a tree.
//
// no-check-code
#include <stdlib.h>
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "tree_arena.h"
#include "tree_path.h"
/**
* Given a path, return the size of the string that would yield just the
* first component of the path, including the path separator. The path must be
* valid according to `valid_path`.
*
* first_component('abc/def') => 'abc/'
* first_component('abc') => ''
*/
static size_t first_component(const char* path, size_t path_sz) {
for (size_t off = 0; off < path_sz; off++) {
if (path[off] == '/') {
return off + 1;
}
}
return 0;
}
/**
* Adds a child to `root`. Because `root` may need to be resized to accommodate
* the new child, we need the *parent* of `root`. On success (`result.code` ==
* TREE_ADD_CHILD_OK), `result.newchild` will be set to the new node created.
* Because the root may also have been moved, `result.newroot` will be set to
* the new root. Be sure to save BOTH.
*
* Updates the size and the non-arena-allocations in the tree state change
* accounting structure.
*/
tree_add_child_result_t tree_add_child(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* name,
const size_t name_sz,
size_t num_children_hint,
tree_state_changes_t* changes) {
tree_add_child_result_t result;
if (!VERIFY_CHILD_NUM(num_children_hint) || !VERIFY_NAME_SZ(name_sz)) {
return COMPOUND_LITERAL(tree_add_child_result_t){
TREE_ADD_CHILD_WTF, NULL, NULL};
}
// create a new child node, and record the deltas in the change
// register.
//
// NOTE: OPTIMIZATION OPPORTUNITY!
//
// this is a potential optimization opportunity. we could theoretically try
// to allocate the new node in the arena and maintain compacted state of the
// tree.
node_t* node =
alloc_node(name, (name_sz_t)name_sz, (child_num_t)num_children_hint);
if (node == NULL) {
return COMPOUND_LITERAL(tree_add_child_result_t){
TREE_ADD_CHILD_OOM, NULL, NULL};
}
// accounting changes.
changes->size_change += node->block_sz;
changes->non_arena_allocations = true;
result.newchild = node;
// attempt to add a child to `root` with the name `name`.
node_add_child_result_t add_child_result = add_child(root, node);
if (add_child_result == NEEDS_LARGER_NODE) {
// NOTE: OPTIMIZATION OPPORTUNITY!
//
// this is a linear scan. it's unclear whether a linear scan for a pointer
// is better or worse than a binary search that has to chase a pointer. the
// answer is probably to do the linear scan for nodes with a small number of
// children, and a binary search for nodes with a lot of children.
uint32_t index = get_child_index(root_parent, root);
if (index == UINT32_MAX) {
return COMPOUND_LITERAL(tree_add_child_result_t){
TREE_ADD_CHILD_WTF, NULL, NULL};
}
node_enlarge_child_capacity_result_t enlarge_result =
enlarge_child_capacity(root_parent, index);
if (enlarge_result.code == ENLARGE_OOM) {
return COMPOUND_LITERAL(tree_add_child_result_t){
TREE_ADD_CHILD_OOM, NULL, NULL};
} else if (enlarge_result.code != ENLARGE_OK) {
return COMPOUND_LITERAL(tree_add_child_result_t){
TREE_ADD_CHILD_WTF, NULL, NULL};
}
// update accounting.
if (!in_arena(tree, enlarge_result.old_child)) {
// not in arena, free the memory.
uint32_t block_sz = enlarge_result.old_child->block_sz;
free(enlarge_result.old_child);
changes->size_change -= block_sz;
}
changes->size_change += enlarge_result.new_child->block_sz;
root = enlarge_result.new_child;
// add the child again.
add_child_result = add_child(root, node);
if (add_child_result != ADD_CHILD_OK) {
return COMPOUND_LITERAL(tree_add_child_result_t){
TREE_ADD_CHILD_WTF, NULL, NULL};
}
} else if (add_child_result != ADD_CHILD_OK) {
return COMPOUND_LITERAL(tree_add_child_result_t){
TREE_ADD_CHILD_WTF, NULL, NULL};
}
result.code = TREE_ADD_CHILD_OK;
result.newroot = root;
return result;
}
/**
* Find the directory node enclosing `path`. If `create_if_not_found` is true,
* then any intermediate directories that do not exist will be created. Once
* the directory enclosing the object at `path` is located, `callback` will be
* invoked. It should do whatever operation is desired and mark up how the tree
* has been modified.
*
* On exit, `find_path` will examine the state changes and use them to update
* the nodes it has encountered walking to this node.
*
* The path must be valid according to `valid_path`, but since it is not checked
* internally, the caller is responsible for ensuring it.
*/
find_path_result_t find_path(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* path,
const size_t path_sz,
find_path_operation_type operation_type,
tree_state_changes_t* changes,
find_path_callback_result_t (*callback)(
tree_t* tree,
node_t* const dir_parent,
node_t* dir,
const char* path,
const size_t path_sz,
tree_state_changes_t* changes,
void* context),
void* context) {
size_t first_component_sz = first_component(path, path_sz);
find_path_result_t result;
if (first_component_sz == 0 ||
(operation_type == BASIC_WALK_ALLOW_IMPLICIT_NODES &&
first_component_sz == path_sz)) {
// found it! apply the magic function.
find_path_callback_result_t callback_result =
callback(tree, root_parent, root, path, path_sz, changes, context);
result = callback_result.code;
root = callback_result.newroot;
} else {
// resolve the first component.
node_t* child = get_child_by_name(root, path, first_component_sz);
if (child == NULL) {
if (operation_type == CREATE_IF_MISSING) {
// create the new child.
tree_add_child_result_t tree_add_child_result = tree_add_child(
tree,
root_parent,
root,
path,
first_component_sz,
// since we're creating the intermediate nodes that lead to a
// leaf node, we'll have at least one child.
1,
changes);
switch (tree_add_child_result.code) {
case TREE_ADD_CHILD_OOM:
return FIND_PATH_OOM;
case TREE_ADD_CHILD_WTF:
return FIND_PATH_WTF;
case TREE_ADD_CHILD_OK:
break;
}
root = tree_add_child_result.newroot;
child = tree_add_child_result.newchild;
// it's an implicit node.
child->type = TYPE_IMPLICIT;
// we must initialize flags to a known value, even if it's not used
// because it participates in checksum calculation.
child->flags = 0;
} else {
// didn't find it, return.
return FIND_PATH_NOT_FOUND;
}
} else if (child->type == TYPE_LEAF) {
// throw an error.
return FIND_PATH_CONFLICT;
}
result = find_path(
tree,
root,
child,
path + first_component_sz,
path_sz - first_component_sz,
operation_type,
changes,
callback,
context);
}
if (result == FIND_PATH_OK) {
// is the checksum still valid? mark up the nodes as we pop off the stack.
if (changes->checksum_dirty == true) {
root->checksum_valid = false;
}
if (operation_type == REMOVE_EMPTY_IMPLICIT_NODES &&
root->type == TYPE_IMPLICIT && root->num_children == 0) {
// update metadata before we free the node.
changes->size_change -= root->block_sz;
node_remove_child_result_t remove_result =
remove_child(root_parent, get_child_index(root_parent, root));
if (remove_result != REMOVE_CHILD_OK) {
result = FIND_PATH_WTF;
} else {
if (!in_arena(tree, root)) {
free(root);
}
}
}
}
return result;
}

View File

@ -1,91 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_path.h: declarations for the core path function for parsing and
// traversing a path through a tree.
//
// no-check-code
#ifndef __FASTMANIFEST_TREE_PATH_H__
#define __FASTMANIFEST_TREE_PATH_H__
#include "lib/clib/portability/portability.h"
#include "node.h"
typedef struct _tree_state_changes_t {
ptrdiff_t size_change;
int32_t num_leaf_node_change;
bool non_arena_allocations;
bool checksum_dirty;
} tree_state_changes_t;
typedef enum {
TREE_ADD_CHILD_OK,
TREE_ADD_CHILD_OOM,
TREE_ADD_CHILD_WTF,
} tree_add_child_code_t;
typedef struct _tree_add_child_result_t {
tree_add_child_code_t code;
node_t* newroot;
node_t* newchild;
} tree_add_child_result_t;
typedef enum {
// walks the tree and searches for a leaf node. if the path cannot be found,
// exit with `FIND_PATH_NOT_FOUND`.
BASIC_WALK,
// walks the tree and searches for any node (including implicit nodes). if
// the path cannot be found, exit with `FIND_PATH_NOT_FOUND`.
BASIC_WALK_ALLOW_IMPLICIT_NODES,
// walks the tree. if the intermediate paths cannot be found, create them.
// if a leaf node exists where an intermediate path node needs to be
// created, then return `FIND_PATH_CONFLICT`.
CREATE_IF_MISSING,
// walks the tree. if the path cannot be found, exit with
// `FIND_PATH_NOT_FOUND`. if the operation is successful, then check
// intermediate nodes to ensure that they still have children. any nodes
// that do not should be removed.
REMOVE_EMPTY_IMPLICIT_NODES,
} find_path_operation_type;
typedef enum {
FIND_PATH_OK,
FIND_PATH_NOT_FOUND,
FIND_PATH_OOM,
FIND_PATH_CONFLICT,
FIND_PATH_WTF,
} find_path_result_t;
typedef struct _find_path_callback_result_t {
find_path_result_t code;
node_t* newroot;
} find_path_callback_result_t;
extern tree_add_child_result_t tree_add_child(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* name,
const size_t name_sz,
size_t num_children_hint,
tree_state_changes_t* changes);
extern find_path_result_t find_path(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* path,
const size_t path_sz,
find_path_operation_type operation_type,
tree_state_changes_t* changes,
find_path_callback_result_t (*callback)(
tree_t* tree,
node_t* const dir_parent,
node_t* dir,
const char* path,
const size_t path_sz,
tree_state_changes_t* changes,
void* context),
void* context);
#endif // #ifndef __FASTMANIFEST_TREE_PATH_H__

View File

@ -1,335 +0,0 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_test.c: tests for core methods for tree creation and manipulation.
//
// no-check-code
#include "edenscm/hgext/extlib/cfastmanifest/tree.h"
#include "node.h"
#include "tests.h"
/**
* Initializes a tree and verifies that the initial two nodes are created
* correctly.
*/
void tree_init_test() {
tree_t* tree = alloc_tree();
node_t* shadow_root = tree->shadow_root;
ASSERT(shadow_root != NULL);
ASSERT(shadow_root->num_children == 1);
node_t* real_root = get_child_by_index(shadow_root, 0);
ASSERT(real_root != NULL);
ASSERT(real_root->num_children == 0);
ASSERT(tree->consumed_memory == real_root->block_sz);
}
/**
* Initializes a tree and adds a node.
*/
void tree_add_single_child() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix++) {
checksum[ix] = (uint8_t)ix;
}
add_update_path_result_t result =
add_or_update_path(tree, STRPLUSLEN("abc"), checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 1);
}
/**
* Initializes a tree and adds a file and a directory containing a file.
*/
void tree_add_0_cousin_once_removed() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix++) {
checksum[ix] = (uint8_t)ix;
}
add_update_path_result_t result;
result = add_or_update_path(tree, STRPLUSLEN("ab"), checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
result =
add_or_update_path(tree, STRPLUSLEN("abc/de"), checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
// verify the shadow root.
ASSERT(tree->shadow_root->num_children == 1);
// obtain the true root, verify that.
node_t* real_root = get_child_by_index(tree->shadow_root, 0);
// verify the real root.
ASSERT(real_root->num_children == 2);
// first child should be 'ab'
node_t* root_first_child = get_child_by_index(real_root, 0);
ASSERT(root_first_child->num_children == 0);
ASSERT(root_first_child->type == TYPE_LEAF);
ASSERT(name_compare("ab", 2, root_first_child) == 0);
// second child should be 'abc'
node_t* root_second_child = get_child_by_index(real_root, 1);
ASSERT(root_second_child->num_children == 1);
ASSERT(root_second_child->type == TYPE_IMPLICIT);
ASSERT(name_compare("abc/", 4, root_second_child) == 0);
}
/**
* Initializes a tree and adds a long skinny branch.
*/
void tree_add_long_skinny_branch() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix++) {
checksum[ix] = (uint8_t)ix;
}
add_update_path_result_t result;
result = add_or_update_path(tree, STRPLUSLEN("ab"), checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
result =
add_or_update_path(tree, STRPLUSLEN("abc/de"), checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
result = add_or_update_path(
tree, STRPLUSLEN("abc/def/gh"), checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
result = add_or_update_path(
tree, STRPLUSLEN("abc/def/ghi/jkl"), checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 4);
}
/**
* Initializes a tree and adds a bushy branch.
*/
void tree_add_bushy_branch() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix++) {
checksum[ix] = (uint8_t)ix;
}
add_update_path_result_t result;
result = add_or_update_path(tree, STRPLUSLEN("ab"), checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
char tempbuffer[] = "abc/de?";
for (int ix = 0; ix < 26; ix++) {
tempbuffer[6] = 'a' + ix;
result = add_or_update_path(
tree, STRPLUSLEN(tempbuffer), checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
}
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 27);
}
/**
* Initializes a tree and attempt to retrieve a couple paths that are not there.
*/
void tree_get_empty() {
tree_t* tree = alloc_tree();
get_path_result_t result = get_path(tree, STRPLUSLEN("abc"));
ASSERT(result.code == GET_PATH_NOT_FOUND);
result = get_path(tree, STRPLUSLEN("abc/def"));
ASSERT(result.code == GET_PATH_NOT_FOUND);
}
/**
* Initializes a tree, adds a single path, and attempt to retrieve it.
*/
#define ADD_GET_SIMPLE_FLAGS 0x2e
void tree_add_get_simple() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix++) {
checksum[ix] = (uint8_t)ix;
}
add_update_path_result_t add_result = add_or_update_path(
tree, STRPLUSLEN("abc"), checksum, SHA1_BYTES, ADD_GET_SIMPLE_FLAGS);
ASSERT(add_result == ADD_UPDATE_PATH_OK);
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 1);
get_path_result_t get_result = get_path(tree, STRPLUSLEN("abc"));
ASSERT(get_result.code == GET_PATH_OK);
ASSERT(get_result.checksum_sz == SHA1_BYTES);
ASSERT(memcmp(checksum, get_result.checksum, SHA1_BYTES) == 0);
ASSERT(get_result.flags == ADD_GET_SIMPLE_FLAGS);
get_result = get_path(tree, STRPLUSLEN("abc/def"));
ASSERT(get_result.code == GET_PATH_NOT_FOUND);
}
/**
* Initializes a tree, adds a single path, and attempt to retrieve a
* valid directory node.
*/
#define ADD_GET_SIMPLE_FLAGS 0x2e
void tree_add_get_implicit_node() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix++) {
checksum[ix] = (uint8_t)ix;
}
add_update_path_result_t add_result = add_or_update_path(
tree, STRPLUSLEN("abc/def"), checksum, SHA1_BYTES, ADD_GET_SIMPLE_FLAGS);
ASSERT(add_result == ADD_UPDATE_PATH_OK);
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 1);
get_path_result_t get_result = get_path(tree, STRPLUSLEN("abc"));
ASSERT(get_result.code == GET_PATH_NOT_FOUND);
}
/**
* Removes a non-existent path.
*/
void tree_remove_nonexistent() {
tree_t* tree = alloc_tree();
remove_path_result_t remove_result = remove_path(tree, STRPLUSLEN("abc"));
ASSERT(remove_result == REMOVE_PATH_NOT_FOUND);
}
/**
* Adds a path and removes it. Then call get to verify that it was
* removed.
*/
void tree_add_remove() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix++) {
checksum[ix] = (uint8_t)ix;
}
add_update_path_result_t add_result =
add_or_update_path(tree, STRPLUSLEN("abc"), checksum, SHA1_BYTES, 0);
ASSERT(add_result == ADD_UPDATE_PATH_OK);
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 1);
remove_path_result_t remove_result = remove_path(tree, STRPLUSLEN("abc"));
ASSERT(remove_result == REMOVE_PATH_OK);
ASSERT(tree->num_leaf_nodes == 0);
ASSERT(tree->compacted == false);
get_path_result_t get_result = get_path(tree, STRPLUSLEN("abc"));
ASSERT(get_result.code == GET_PATH_NOT_FOUND);
node_t* shadow_root = tree->shadow_root;
ASSERT(shadow_root->num_children == 1);
node_t* real_root = get_child_by_index(shadow_root, 0);
ASSERT(real_root->num_children == 0);
// because the directory nodes may have undergone expansion, we may not
// have the exact same memory requirement.
ASSERT(tree->consumed_memory == real_root->block_sz);
}
/**
* Adds a multiple paths and then remove them.
*/
void tree_add_remove_multi() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix++) {
checksum[ix] = (uint8_t)ix;
}
char* paths_to_add[] = {
"abc",
"ab/def",
"ab/defg/hi",
"ab/defg/h/ijk",
"ab/defg/h/i/jkl/mn/op/qr",
"ab/defg/h/i/jkl/mn/op/qrs",
};
const size_t num_paths = sizeof(paths_to_add) / sizeof(*paths_to_add);
for (size_t ix = 0; ix < num_paths; ix++) {
add_update_path_result_t add_result = add_or_update_path(
tree, STRPLUSLEN(paths_to_add[ix]), checksum, SHA1_BYTES, 0);
ASSERT(add_result == ADD_UPDATE_PATH_OK);
}
for (size_t ix = 0; ix < num_paths; ix++) {
remove_path_result_t remove_result =
remove_path(tree, STRPLUSLEN(paths_to_add[num_paths - ix - 1]));
ASSERT(remove_result == REMOVE_CHILD_OK);
for (size_t jx = 0; jx < num_paths - ix - 1; jx++) {
get_path_result_t get_result =
get_path(tree, STRPLUSLEN(paths_to_add[jx]));
ASSERT(get_result.code == GET_PATH_OK);
}
}
node_t* shadow_root = tree->shadow_root;
ASSERT(shadow_root->num_children == 1);
node_t* real_root = get_child_by_index(shadow_root, 0);
ASSERT(real_root->num_children == 0);
ASSERT(tree->num_leaf_nodes == 0);
ASSERT(tree->compacted == false);
// because the directory nodes may have undergone expansion, we may not
// have the exact same memory requirement. however, we should only have
// two nodes left, so we can just sum them up directly.
ASSERT(tree->consumed_memory == real_root->block_sz);
}
int main(int argc, char* argv[]) {
tree_init_test();
tree_add_single_child();
tree_add_0_cousin_once_removed();
tree_add_long_skinny_branch();
tree_add_bushy_branch();
tree_get_empty();
tree_add_get_simple();
tree_add_get_implicit_node();
tree_remove_nonexistent();
tree_add_remove();
tree_add_remove_multi();
return 0;
}

View File

@ -1,267 +0,0 @@
# fastmanifest.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
"""a treemanifest disk cache for speeding up manifest comparison
This extension adds fastmanifest, a treemanifest disk cache for speeding up
manifest comparison. It also contains utilities to investigate manifest access
patterns.
Configuration options and default value:
[fastmanifest]
# If true, disable all logging, used for running the mercurial test suite
# without changing the output.
silent = False
# If true, suppress all logging from worker processes.
silentworker = True
# If true, materializes every manifest as a fastmanifest. Used to test that
# fastmanifest passes the mercurial test suite. This happens in memory only and
# the on-disk fileformat is still revlog of flat manifest.
debugcachemanifest = False
# Filename, is not empty will log access to any manifest.
logfile = ""
# Cache fastmanifest if remotenames or bookmarks change, or on a commit.
cacheonchange = False
# Make cacheonchange(see above) work in the background.
cacheonchangebackground = True
# Maximum number of fastmanifest kept in volatile memory
maxinmemoryentries = 10
# Dump metrics after each command, see metrics.py
debugmetrics = False
# If False, cache entries in a deterministic order, otherwise use a randomorder
# by batches.
randomorder = True
# Cache properties, see systemawarecachelimit.
lowgrowththresholdgb = 20
lowgrowthslope = 0.1
highgrowthslope = 0.2
maxcachesizegb = 6
# Cut off date, revisions older than the cutoff won't be cached, default is
# 60 days. -1 means no limit.
cachecutoffdays = 60
# List of relevant remotenames whose manifest is to be included in the cache.
# The list is comma or space separated
relevantremotenames = master
# Enables the creation and use of fast cache manifests (defaults to True)
usecache=False
# Enables the use of treemanifests (defaults to False)
usetree=True
Description:
`manifestaccesslogger` logs manifest accessed to a logfile specified with
the option fastmanifest.logfile
`fastmanifesttocache` is a revset of relevant manifests to cache
`hybridmanifest` is a proxy class for flat and cached manifest that loads
manifest from cache or from disk.
It chooses what kind of manifest is relevant to create based on the operation,
ideally the fastest.
TODO instantiate fastmanifest when they are more suitable
`manifestcache` is the class handling the interface with the cache, it supports
caching flat and fast manifest and retrieving them.
TODO logic for loading fastmanifest
TODO logic for saving fastmanifest
TODO garbage collection
`manifestfactory` is a class whose method wraps manifest creating method of
manifest.manifest. It intercepts the calls to build hybridmanifest instead of
regularmanifests. We use a class for that to allow sharing the ui object that
is not normally accessible to manifests.
`debugcachemanifest` is a command calling `_cachemanifest`, a function to add
manifests to the cache and manipulate what is cached. It allows caching fast
and flat manifest, asynchronously and synchronously.
"""
from __future__ import absolute_import
import sys
from edenscm.mercurial import (
bookmarks,
dispatch,
error,
extensions,
localrepo,
manifest,
registrar,
revset as revsetmod,
)
from edenscm.mercurial.i18n import _
from . import cachemanager, debug, implementation, metrics
metricscollector = metrics.metricscollector
manifestfactory = implementation.manifestfactory
fastmanifestcache = implementation.fastmanifestcache
cmdtable = {}
command = registrar.command(cmdtable)
configtable = {}
configitem = registrar.configitem(configtable)
configitem("fastmanifest", "logfile", default="")
configitem("fastmanifest", "debugmetrics", default=False)
configitem("fastmanifest", "usecache", default=True)
configitem("fastmanifest", "usetree", default=False)
@command(
"debugcachemanifest",
[
("r", "rev", [], "cache the manifest for revs", "REV"),
("a", "all", False, "cache all relevant revisions", ""),
(
"l",
"limit",
0,
"limit size of total rev in bytes (<0: unlimited; 0: default policy)",
"BYTES",
),
("p", "pruneall", False, "prune all the entries"),
("e", "list", False, "list the content of the cache and its size", ""),
],
"hg debugcachemanifest",
)
def debugcachemanifest(ui, repo, *pats, **opts):
pruneall = opts["pruneall"]
displaylist = opts["list"]
if opts["all"]:
revset = ["fastmanifesttocache()"]
elif opts["rev"]:
revset = opts["rev"]
else:
revset = []
ui.debug(
("[FM] caching revset: %s, pruneall(%s), list(%s)\n")
% (revset, pruneall, displaylist)
)
if displaylist and pruneall:
raise error.Abort("can only use --pruneall or --list not both")
if pruneall:
cachemanager.cachemanifestpruneall(ui, repo)
return
if displaylist:
cachemanager.cachemanifestlist(ui, repo)
return
if opts["limit"] != 0:
if opts["limit"] < 0:
limitbytes = sys.maxint
else:
limitbytes = opts["limit"]
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
cache.overridelimit(debug.fixedcachelimit(limitbytes))
cachemanager.cachemanifestfillandtrim(ui, repo, revset)
@command("cachemanifest", [], "hg cachemanifest")
def cachemanifest(ui, repo, *pats, **opts):
cachemanager.cacher.cachemanifest(repo)
class FastManifestExtension(object):
initialized = False
@staticmethod
def _logonexit(orig, ui, repo, cmd, fullargs, *args):
r = orig(ui, repo, cmd, fullargs, *args)
metricscollector.get().logsamples(ui)
return r
@staticmethod
def setup(ui):
logger = debug.manifestaccesslogger(ui)
extensions.wrapfunction(manifest.manifestrevlog, "rev", logger.revwrap)
factory = manifestfactory(ui)
extensions.wrapfunction(manifest.manifestlog, "__getitem__", factory.newgetitem)
extensions.wrapfunction(
manifest.manifestlog, "get", factory.newgetdirmanifestctx
)
extensions.wrapfunction(manifest.memmanifestctx, "write", factory.ctxwrite)
extensions.wrapfunction(manifest.manifestrevlog, "add", factory.add)
if ui.configbool("fastmanifest", "usecache"):
revsetmod.symbols["fastmanifesttocache"] = cachemanager.fastmanifesttocache
revsetmod.safesymbols.add("fastmanifesttocache")
revsetmod.symbols["fastmanifestcached"] = cachemanager.fastmanifestcached
revsetmod.safesymbols.add("fastmanifestcached")
# Trigger to enable caching of relevant manifests
extensions.wrapfunction(
bookmarks.bmstore, "_write", cachemanager.triggers.onbookmarkchange
)
extensions.wrapfunction(
localrepo.localrepository, "commitctx", cachemanager.triggers.oncommit
)
try:
remotenames = extensions.find("remotenames")
except KeyError:
pass
else:
if remotenames:
extensions.wrapfunction(
remotenames,
"saveremotenames",
cachemanager.triggers.onremotenameschange,
)
extensions.wrapfunction(
dispatch, "runcommand", cachemanager.triggers.runcommandtrigger
)
extensions.wrapfunction(
dispatch, "runcommand", FastManifestExtension._logonexit
)
def extsetup(ui):
FastManifestExtension.setup(ui)
def reposetup(ui, repo):
# Don't update the ui for remote peer repos, since they won't have the local
# configs.
if repo.local() is None:
return
if ui.configbool("fastmanifest", "usetree"):
try:
extensions.find("treemanifest")
except KeyError:
raise error.Abort(
_(
"fastmanifest.usetree cannot be enabled without"
" enabling treemanifest"
)
)

View File

@ -1,377 +0,0 @@
# cachemanager.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import errno
import os
from edenscm.mercurial import encoding, error, extensions, revlog, scmutil, util
from edenscmnative import cfastmanifest
from . import concurrency, constants
from .implementation import CacheFullException, fastmanifestcache
from .metrics import metricscollector
def _relevantremonamesrevs(repo):
revs = set()
remotenames = None
try:
remotenames = extensions.find("remotenames")
except KeyError: # remotenames not loaded
pass
if remotenames is not None:
# interesting remotenames to fetch
relevantnames = set(
repo.ui.configlist("fastmanifest", "relevantremotenames", ["master"])
)
names = remotenames.readremotenames(repo)
for rev, kind, prefix, name in names:
if name in relevantnames and kind == "bookmarks":
revs.add(repo[rev].rev())
return revs
def fastmanifestcached(repo, subset, x):
"""Revset encompassing all revisions whose manifests are cached"""
# At the high level, we look at what is cached, and go from manifest nodes
# to changelog revs.
#
# 1) We look at all the cached manifest, from each of them we find the first
# changelog rev that introduced each cached manifest thanks to linkrevs.
# 2) We compute the minimum of those changelog revs. It is guaranteed that
# all the changelog revs whose manifest are cached are above that minimum
# rev in the changelog
# 3) From this minimum, we inspect all the more recent and visible changelog
# revisions and keep track of the one whose manifest is cached.
cache = fastmanifestcache.getinstance(repo.store.opener, repo.ui)
manifestsbinnodes = set(
[revlog.bin(u.replace("fast", "")) for u in cache.ondiskcache]
)
mfrevlog = repo.manifestlog._revlog
manifestslinkrevs = [mfrevlog.linkrev(mfrevlog.rev(k)) for k in manifestsbinnodes]
cachedrevs = set()
if manifestslinkrevs:
for u in repo.changelog.revs(min(manifestslinkrevs)):
revmanifestbin = repo.changelog.changelogrevision(u).manifest
if revmanifestbin in manifestsbinnodes:
cachedrevs.add(u)
return subset & cachedrevs
def fastmanifesttocache(repo, subset, x):
"""Revset of the interesting revisions to cache. This returns:
- Drafts
- Revisions with a bookmarks
- Revisions with some selected remote bookmarks (master, stable ...)
- Their parents (to make diff -c faster)
- TODO The base of potential rebase operations
- Filtering all of the above to only include recent changes
"""
# Add relevant remotenames to the list of interesting revs
revs = _relevantremonamesrevs(repo)
# Add all the other relevant revs
query = "(not public() & not hidden()) + bookmark()"
cutoff = repo.ui.configint("fastmanifest", "cachecutoffdays", 60)
if cutoff == -1: # no cutoff
datelimit = ""
else:
datelimit = "and date(-%d)" % cutoff
revs.update(
scmutil.revrange(repo, ["(%s + parents(%s)) %s" % (query, query, datelimit)])
)
metricscollector.get().recordsample("revsetsize", size=len(revs))
return subset & revs
GB = 1024 ** 3
MB = 1024 ** 2
class _systemawarecachelimit(object):
"""A limit that will be tighter as the free disk space reduces"""
def parseconfig(self, ui):
configkeys = set(
[
"lowgrowthslope",
"lowgrowththresholdgb",
"maxcachesizegb",
"highgrowthslope",
]
)
configs = {}
for configkey in configkeys:
strconfig = ui.config("fastmanifest", configkey)
if strconfig is None:
continue
try:
configs[configkey] = float(strconfig)
except ValueError:
# Keep default value and print a warning when config is invalid
msg = "Invalid config for fastmanifest.%s, expected a number"
ui.warn((msg % strconfig))
return configs
def __init__(self, repo=None, opener=None, ui=None):
# Probe the system root partition to know what is available
try:
if repo is None and (opener is None or ui is None):
raise error.Abort("Need to specify repo or (opener and ui)")
st = None
if not util.safehasattr(os, "statvfs"):
self.free = 0
self.total = 0
elif repo is not None:
st = os.statvfs(repo.root)
else:
st = os.statvfs(opener.join(None))
if st is not None:
self.free = st.f_bavail * st.f_frsize
self.total = st.f_blocks * st.f_frsize
except (OSError, IOError) as ex:
if ex.errno == errno.EACCES:
self.free = 0
self.total = 0
return
raise
# Read parameters from config
if repo is not None:
self.config = self.parseconfig(repo.ui)
else:
self.config = self.parseconfig(ui)
def bytes(self):
return _systemawarecachelimit.cacheallocation(self.free, **self.config)
@staticmethod
def cacheallocation(
freespace,
lowgrowththresholdgb=constants.DEFAULT_LOWGROWTH_TRESHOLDGB,
lowgrowthslope=constants.DEFAULT_LOWGROWTH_SLOPE,
maxcachesizegb=constants.DEFAULT_MAXCACHESIZEGB,
highgrowthslope=constants.DEFAULT_HIGHGROWTHSLOPE,
):
"""Given the free space available in bytes, return the size of the cache
When disk space is limited (less than lowgrowththreshold), we increase
the cache size linearly: lowgrowthslope * freespace. Over
lowgrowththreshold, we increase the cache size linearly but faster:
highgrowthslope * freespace until we hit maxcachesize.
These values are configurable, default values are:
[fastmanifest]
lowgrowththresholdgb = 20
lowgrowthslope = 0.1
highgrowthslope = 0.2
maxcachesizegb = 6
^ Cache Size
|
| /------------------- <- maxcachesize
| |
| / <- slope is highgrowthslope
| | <- lowgrowththreshold
| /
| / <- slope is lowgrowslope
|/
-------------------------> Free Space
"""
if freespace < lowgrowththresholdgb * GB:
return min(maxcachesizegb * GB, lowgrowthslope * freespace)
else:
return min(maxcachesizegb * GB, highgrowthslope * freespace)
def cachemanifestpruneall(ui, repo):
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
cache.pruneall()
def cachemanifestlist(ui, repo):
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
total, numentries = cache.ondiskcache.totalsize(silent=False)
ui.status(("cache size is: %s\n" % util.bytecount(total)))
ui.status(("number of entries is: %s\n" % numentries))
if ui.debug:
revs = set(repo.revs("fastmanifestcached()"))
import collections
revstoman = collections.defaultdict(list)
for r in revs:
mannode = revlog.hex(repo.changelog.changelogrevision(r).manifest)
revstoman[mannode].append(str(r))
if revs:
ui.status(("Most relevant cache entries appear first\n"))
ui.status(("=" * 80))
ui.status(("\nmanifest node |revs\n"))
for h in cache.ondiskcache:
l = h.replace("fast", "")
ui.status("%s|%s\n" % (l, ",".join(revstoman.get(l, []))))
def cachemanifestfillandtrim(ui, repo, revset):
"""Cache the manifests described by `revset`. This priming is subject to
limits imposed by the cache, and thus not all the entries may be written.
"""
try:
with concurrency.looselock(
repo.localvfs, "fastmanifest", constants.WORKER_SPAWN_LOCK_STEAL_TIMEOUT
):
cache = fastmanifestcache.getinstance(repo.store.opener, ui)
computedrevs = scmutil.revrange(repo, revset)
sortedrevs = sorted(computedrevs, key=lambda x: -x)
if len(sortedrevs) == 0:
# normally, we prune as we make space for new revisions to add
# to the cache. however, if we're not adding any new elements,
# we'll never check the disk cache size. this is an explicit
# check for that particular scenario.
cache.prune()
else:
revstomannodes = {}
mannodesprocessed = set()
for rev in sortedrevs:
mannode = revlog.hex(repo.changelog.changelogrevision(rev).manifest)
revstomannodes[rev] = mannode
mannodesprocessed.add(mannode)
if mannode in cache.ondiskcache:
ui.debug(
"[FM] skipped %s, already cached "
"(fast path)\n" % (mannode,)
)
# Account for the fact that we access this manifest
cache.ondiskcache.touch(mannode)
continue
manifest = repo[rev].manifest()
fastmanifest = cfastmanifest.fastmanifest(manifest.text())
cache.makeroomfor(fastmanifest.bytes(), mannodesprocessed)
try:
cache[mannode] = fastmanifest
except CacheFullException:
break
# Make the least relevant entries have an artificially older
# mtime than the more relevant ones. We use a resolution of 2
# for time to work accross all platforms and ensure that the
# order is marked.
#
# Note that we use sortedrevs and not revs because here we
# don't care about the shuffling, we just want the most relevant
# revisions to have more recent mtime.
mtimemultiplier = 2
for offset, rev in enumerate(sortedrevs):
if rev in revstomannodes:
hexnode = revstomannodes[rev]
cache.ondiskcache.touch(hexnode, delay=offset * mtimemultiplier)
else:
metricscollector.get().recordsample("cacheoverflow", hit=True)
# We didn't have enough space for that rev
except error.LockHeld:
return
except (OSError, IOError) as ex:
if ex.errno == errno.EACCES:
# permission issue
ui.warn(("warning: not using fastmanifest\n"))
ui.warn(("(make sure that .hg/store is writeable)\n"))
return
raise
total, numentries = cache.ondiskcache.totalsize()
if isinstance(cache.limit, _systemawarecachelimit):
free = cache.limit.free / 1024 ** 2
else:
free = -1
metricscollector.get().recordsample(
"ondiskcachestats",
bytes=total,
numentries=numentries,
limit=(cache.limit.bytes() / 1024 ** 2),
freespace=free,
)
class cacher(object):
@staticmethod
def cachemanifest(repo):
revset = ["fastmanifesttocache()"]
cachemanifestfillandtrim(repo.ui, repo, revset)
class triggers(object):
repos_to_update = set()
@staticmethod
def runcommandtrigger(orig, *args, **kwargs):
result = orig(*args, **kwargs)
for repo in triggers.repos_to_update:
bg = repo.ui.configbool("fastmanifest", "cacheonchangebackground", True)
if bg:
silent_worker = repo.ui.configbool("fastmanifest", "silentworker", True)
# see if the user wants us to invoke a specific instance of
# mercurial.
workerexe = encoding.environ.get("SCM_WORKER_EXE")
cmd = util.hgcmd()[:]
if workerexe is not None:
cmd[0] = workerexe
cmd.extend(["--repository", repo.root, "cachemanifest"])
concurrency.runshellcommand(cmd, silent_worker=silent_worker)
else:
cacher.cachemanifest(repo)
return result
@staticmethod
def onbookmarkchange(orig, self, *args, **kwargs):
repo = self._repo
ui = repo.ui
if ui.configbool("fastmanifest", "cacheonchange", False):
triggers.repos_to_update.add(repo)
metricscollector.get().recordsample("trigger", source="bookmark")
return orig(self, *args, **kwargs)
@staticmethod
def oncommit(orig, self, *args, **kwargs):
repo = self
ui = repo.ui
if ui.configbool("fastmanifest", "cacheonchange", False):
triggers.repos_to_update.add(repo)
metricscollector.get().recordsample("trigger", source="commit")
return orig(self, *args, **kwargs)
@staticmethod
def onremotenameschange(orig, repo, *args, **kwargs):
ui = repo.ui
if ui.configbool("fastmanifest", "cacheonchange", False):
triggers.repos_to_update.add(repo)
metricscollector.get().recordsample("trigger", source="remotenames")
return orig(repo, *args, **kwargs)

View File

@ -1,206 +0,0 @@
# concurrency.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
import errno
import os
import socket
import stat
import subprocess
import sys
import time
import traceback
from edenscm.mercurial import error, pycompat
class looselock(object):
"""A loose lock. If the lock is held and the lockfile is recent, then we
immediately fail. If the lockfile is older than X seconds, where
X=stealtime, then we touch the lockfile and proceed. This is slightly
vulnerable to a thundering herd, as a bunch of callers that arrive at the
expiration may all proceed."""
_host = None
def __init__(self, vfs, lockname, stealtime=10.0):
self.vfs = vfs
self.lockname = lockname
self.stealtime = stealtime
self.refcount = 0
self.stealcount = 0
def _trylock(self, lockcontents):
"""Attempt to acquire a lock.
Raise error.LockHeld if the lock is already held.
Raises error.LockUnavailable if the lock could not be acquired for any
other reason.
This is an internal API, and shouldn't be called externally.
"""
try:
self.vfs.makelock(lockcontents, self.lockname)
except (OSError, IOError) as ex:
if ex.errno in (errno.EEXIST, errno.EAGAIN):
raise error.LockHeld(
ex.errno,
self.vfs.join(self.lockname),
self.lockname,
"unimplemented",
)
raise error.LockUnavailable(
ex.errno, ex.strerror, self.vfs.join(self.lockname), self.lockname
)
def lock(self):
"""Attempt to acquire a lock.
Raise error.LockHeld if the lock is already held and the lock is too
recent to be stolen.
Raises error.LockUnavailable if the lock could not be acquired for any
other reason.
"""
if self.stealcount > 0:
# we stole the lock, so we should continue stealing.
self.stealcount += 1
return self
if looselock._host is None:
looselock._host = socket.gethostname()
lockcontents = "%s:%s" % (looselock._host, os.getpid())
try:
self._trylock(lockcontents)
except error.LockHeld:
# how old is the file?
steal = False
try:
fstat = self.vfs.lstat(self.lockname)
mtime = fstat[stat.ST_MTIME]
if time.time() - mtime > self.stealtime:
# touch the file
self.vfs.utime(self.lockname)
steal = True
else:
raise
except OSError as ex:
if ex.errno == errno.ENOENT:
steal = True
else:
raise
if steal:
# we shouldn't have any hard references
assert self.refcount == 0
# bump the stealcount
self.stealcount += 1
else:
self.refcount += 1
return self
def unlock(self):
"""Releases a lock."""
if self.stealcount > 1:
self.stealcount -= 1
return
elif self.refcount > 1:
self.refcount -= 1
return
elif self.refcount == 1 or self.stealcount == 1:
# delete the file
try:
self.vfs.unlink(self.lockname)
except OSError as ex:
if ex.errno == errno.ENOENT:
pass
else:
raise
self.refcount = 0
self.stealcount = 0
def held(self):
return self.stealcount != 0 or self.refcount != 0
def __enter__(self):
return self.lock()
def __exit__(self, exc_type, exc_value, exc_tb):
return self.unlock()
# This originated in hgext/logtoprocess.py, was copied to
# remotefilelog/shallowutil.py, and now here.
if pycompat.iswindows:
# no fork on Windows, but we can create a detached process
# https://msdn.microsoft.com/en-us/library/windows/desktop/ms684863.aspx
# No stdlib constant exists for this value
DETACHED_PROCESS = 0x00000008
_creationflags = DETACHED_PROCESS | subprocess.CREATE_NEW_PROCESS_GROUP
def runshellcommand(script, env=None, silent_worker=True):
if not silent_worker:
raise NotImplementedError("support for non-silent workers not yet built.")
# we can't use close_fds *and* redirect stdin. I'm not sure that we
# need to because the detached process has no console connection.
subprocess.Popen(script, env=env, close_fds=True, creationflags=_creationflags)
else:
def runshellcommand(script, env=None, silent_worker=True):
# double-fork to completely detach from the parent process
# based on http://code.activestate.com/recipes/278731
pid = os.fork()
if pid:
# parent
return
# subprocess.Popen() forks again, all we need to add is
# flag the new process as a new session.
newsession = {}
if silent_worker:
if sys.version_info < (3, 2):
newsession["preexec_fn"] = os.setsid
else:
newsession["start_new_session"] = True
try:
# connect stdin to devnull to make sure the subprocess can't
# muck up that stream for mercurial.
if silent_worker:
stderr = stdout = open(os.devnull, "w")
else:
stderr = stdout = None
subprocess.Popen(
script,
stdout=stdout,
stderr=stderr,
stdin=open(os.devnull, "r"),
env=env,
close_fds=True,
**newsession
)
except Exception:
if not silent_worker:
sys.stderr.write("Error spawning worker\n")
traceback.print_exc(file=sys.stderr)
finally:
# mission accomplished, this child needs to exit and not
# continue the hg process here.
if not silent_worker:
sys.stdout.flush()
sys.stderr.flush()
os._exit(0)

View File

@ -1,26 +0,0 @@
# constants.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
# fastmanifests are cached in .hg/store/<CACHE_SUBDIR>
from __future__ import absolute_import
CACHE_SUBDIR = "manifestcache"
# See _systemawarecachelimit in cachemanager
# for an explanation of these coefficients
DEFAULT_LOWGROWTH_TRESHOLDGB = 20
DEFAULT_MAXCACHESIZEGB = 6
DEFAULT_LOWGROWTH_SLOPE = 0.1
DEFAULT_HIGHGROWTHSLOPE = 0.2
# How old of a lock do we tolerate before we spawn off a new worker to populate
# the cache
WORKER_SPAWN_LOCK_STEAL_TIMEOUT = 300
# How many entries we keep in the in memory cache?
DEFAULT_MAX_MEMORY_ENTRIES = 10

View File

@ -1,36 +0,0 @@
# debug.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
from __future__ import absolute_import
class manifestaccesslogger(object):
"""Class to log manifest access and confirm our assumptions"""
def __init__(self, ui):
self._ui = ui
def revwrap(self, orig, *args, **kwargs):
"""Wraps manifest.rev and log access"""
r = orig(*args, **kwargs)
logfile = self._ui.config("fastmanifest", "logfile")
if logfile:
try:
with open(logfile, "a") as f:
f.write("%s\n" % r)
except EnvironmentError:
pass
return r
class fixedcachelimit(object):
"""A fix cache limit expressed as a number of bytes"""
def __init__(self, bytes):
self._bytes = bytes
def bytes(self):
return self._bytes

File diff suppressed because it is too large Load Diff

View File

@ -1,146 +0,0 @@
# metrics.py
#
# Copyright 2016 Facebook, Inc.
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
#
# To log a new metric, add it to the list FASTMANIFEST_METRICS
# Then from the code, use metrics.metricscollector.get(repo) or
# metrics.metricscollector.getfromui(ui) to get a metrics `collector`.
# call collector.recordsample(metricsname, key=value, key2=value2, ...) to
# record a samples.
#
# When the command ends the sample will be relayed with ui.log unless
# it is in the list FASTMANIFEST_DONOTREPORT_METRICS.
# You would put a metrics in that list if you do some computation with hit
# and are not interested in the individual sample but only their aggregation.
# For example, if you want to record the cache hit ratio, you can record
# all the cache hit and cache miss, not report them but compute and report their
# ratio.
#
# To debug metrics use fastmanifest.debugmetrics = True, this will print
# the metrics collected for each command with ui.status at the end of each
# command.
from __future__ import absolute_import
FASTMANIFEST_DONOTREPORT_METRICS = set(
["cachehit", "diffcachehit", "filesnotincachehit"]
)
FASTMANIFEST_METRICS = set(
[
## Individual Metrics
# ondiskcachestats has information about the cache on disk
# => keys are "bytes", "entries", "limit" and "freespace", all numbers,
# freespace and limit are in MB
"ondiskcachestats",
# revsetsize is the number of revisions in the 'fastmanifesttocache()'
# => key is "size", a number
"revsetsize",
# trigger is what caused caching to trigger
# => keys is "source", one of ("commit", "remotenames", "bookmark")
"trigger",
# cacheoverflow, logs cache overflow event: not enough space in the
# cache to store revisions, it will inform us on how to resize the
# cache if needed
# => key is "hit", always True
"cacheoverflow",
# The three followings are metrics that will be aggregated as ratio
# they register cache hit and miss at different level: global, diff and
# during filesnotin operations
# => key is "hit", True or False, True is a cache hit, False a cache miss
"cachehit",
"diffcachehit",
"filesnotincachehit",
## Aggregate Metrics
# Cache hit ratio (global, diff and filesnotin), expressed as a percentage
# so between 0 and 100. -1 means no operations.
# => keys is "ratio", a number
# examples:
# -1 for cachehitratio => we never accessed a manifest for the command
# 30 for cachehitratio => 30% of manifest access hit the cache
# 45 for diffcachehitratio => 45% of manifest diffs hit the cache
"cachehitratio",
"diffcachehitratio",
"filesnotincachehitratio",
]
)
class metricscollector(object):
_instance = None
@classmethod
def get(cls):
if not cls._instance:
cls._instance = metricscollector()
return cls._instance
def __init__(self):
self.samples = []
def recordsample(self, kind, **kwargs):
assert kind in FASTMANIFEST_METRICS
self.samples.append((kind, kwargs))
def mergesamples(self, collector):
if collector is not self:
self.samples.extend(collector.samples)
return self
def _addaggregatesamples(self):
def _addhitratio(key, aggkey, dedupe=False):
# Aggregate the cache hit and miss to build a hit ratio
# store the ratio as aggkey : {ratio: ratio} in self.samples
# If dedupe is set, will dedupe using the node field of each sample
hitlist = (s for s in self.samples if s[0] == key and s[1]["hit"])
misslist = (s for s in self.samples if s[0] == key and not s[1]["hit"])
if dedupe:
hit = len(set(s[1]["node"] for s in hitlist))
miss = len(set(s[1]["node"] for s in misslist))
else:
hit = len(list(hitlist))
miss = len(list(misslist))
if miss + hit == 0:
ratio = -1
else:
ratio = float(hit) * 100 / (miss + hit)
data = {aggkey: int(ratio)}
self.recordsample(aggkey, **data)
_addhitratio("cachehit", "cachehitratio", dedupe=True)
_addhitratio("diffcachehit", "diffcachehitratio")
_addhitratio("filesnotincachehit", "filesnotincachehitratio")
def logsamples(self, ui):
self._addaggregatesamples()
debug = ui.configbool("fastmanifest", "debugmetrics")
if debug:
ui.status(("[FM-METRICS] Begin metrics\n"))
for kind, kwargs in self.samples:
if kind in FASTMANIFEST_DONOTREPORT_METRICS:
continue
if debug:
dispkw = kwargs
# Not removing freespace and limit would make the output of
# test machine dependant
if "freespace" in kwargs:
del dispkw["freespace"]
if "limit" in kwargs:
del dispkw["limit"]
# Here we sort to make test output stable
ui.status(
(
"[FM-METRICS] kind: %s, kwargs: %s\n"
% (kind, sorted(dispkw.items()))
)
)
if debug:
ui.status(("[FM-METRICS] End metrics\n"))

View File

@ -353,15 +353,6 @@ def _makerage(ui, repo, **opts):
)
)
# This is quite slow, so we don't want to do it by default
if ui.configbool("rage", "fastmanifestcached", False):
detailed.append(
(
'hg sl -r "fastmanifestcached()"',
(lambda: hgcmd("smartlog", rev=["fastmanifestcached()"])),
)
)
footnotes = []
timeout = opts.get("timeout") or 20

View File

@ -466,14 +466,6 @@ def clientreposetup(repo):
if not repo.name:
raise error.Abort(_("remotefilelog.reponame must be configured"))
if not repo.ui.configbool("treemanifest", "treeonly"):
# If we're not a pure-tree repo, we must be using fastmanifest to
# provide the hybrid manifest implementation.
try:
extensions.find("fastmanifest")
except KeyError:
raise error.Abort(_("cannot use treemanifest without fastmanifest"))
repo.ui.setconfig("verify", "skipmanifests", "True")

View File

@ -257,7 +257,6 @@ def _preimportmodules():
"extutil",
"fastannotate",
"fastlog",
"fastmanifest",
"fbconduit",
"fbhistedit",
"fixcorrupt",

View File

@ -36,6 +36,7 @@ _ignoreextensions = {
"configwarn",
"eden",
"factotum",
"fastmanifest",
"fastpartialmatch",
"fbsparse",
"graphlog",

View File

@ -1461,7 +1461,6 @@ packages = [
"edenscm.hgext.extlib.pywatchman",
"edenscm.hgext.extlib.watchmanclient",
"edenscm.hgext.fastannotate",
"edenscm.hgext.fastmanifest",
"edenscm.hgext.fsmonitor",
"edenscm.hgext.hgevents",
"edenscm.hgext.hggit",
@ -1709,41 +1708,6 @@ extmodules = [
libraries=["datapack", "lz4", "mpatch", SHA1_LIBRARY],
extra_compile_args=filter(None, [STDCPP0X, WALL] + cflags),
),
Extension(
"edenscmnative.cfastmanifest",
sources=[
"edenscm/hgext/extlib/cfastmanifest.c",
"edenscm/hgext/extlib/cfastmanifest/bsearch.c",
"lib/clib/buffer.c",
"edenscm/hgext/extlib/cfastmanifest/checksum.c",
"edenscm/hgext/extlib/cfastmanifest/node.c",
"edenscm/hgext/extlib/cfastmanifest/tree.c",
"edenscm/hgext/extlib/cfastmanifest/tree_arena.c",
"edenscm/hgext/extlib/cfastmanifest/tree_convert.c",
"edenscm/hgext/extlib/cfastmanifest/tree_copy.c",
"edenscm/hgext/extlib/cfastmanifest/tree_diff.c",
"edenscm/hgext/extlib/cfastmanifest/tree_disk.c",
"edenscm/hgext/extlib/cfastmanifest/tree_iterator.c",
"edenscm/hgext/extlib/cfastmanifest/tree_path.c",
],
depends=[
"edenscm/hgext/extlib/cfastmanifest/bsearch.h",
"edenscm/hgext/extlib/cfastmanifest/checksum.h",
"edenscm/hgext/extlib/cfastmanifest/internal_result.h",
"edenscm/hgext/extlib/cfastmanifest/node.h",
"edenscm/hgext/extlib/cfastmanifest/path_buffer.h",
"edenscm/hgext/extlib/cfastmanifest/result.h",
"edenscm/hgext/extlib/cfastmanifest/tests.h",
"edenscm/hgext/extlib/cfastmanifest/tree_arena.h",
"edenscm/hgext/extlib/cfastmanifest/tree.h",
"edenscm/hgext/extlib/cfastmanifest/tree_iterator.h",
"edenscm/hgext/extlib/cfastmanifest/tree_path.h",
],
include_dirs=include_dirs,
library_dirs=library_dirs,
libraries=[SHA1_LIBRARY],
extra_compile_args=filter(None, [STDC99, WALL, WSTRICTPROTOTYPES] + cflags),
),
]

View File

@ -12,42 +12,6 @@ New errors are not allowed. Warnings are strongly discouraged.
$ NPROC=`python -c 'import multiprocessing; print(multiprocessing.cpu_count())'`
$ cat $TESTTMP/files.txt | xargs -n64 -P $NPROC contrib/check-code.py --warnings --per-file=0 | sort
Skipping edenscm/hgext/extlib/cfastmanifest.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/bsearch.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/bsearch.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/bsearch_test.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/checksum.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/checksum.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/checksum_test.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/internal_result.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/node.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/node.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/node_test.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/path_buffer.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/result.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tests.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tests.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_arena.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_arena.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_convert.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_convert_rt.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_convert_test.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_copy.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_copy_test.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_diff.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_diff_test.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_disk.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_disk_test.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_dump.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_iterate_rt.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_iterator.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_iterator.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_iterator_test.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_path.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_path.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cfastmanifest/tree_test.c it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cstore/datapackstore.cpp it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cstore/datapackstore.h it has no-che?k-code (glob)
Skipping edenscm/hgext/extlib/cstore/datastore.h it has no-che?k-code (glob)

View File

@ -53,16 +53,6 @@ New errors are not allowed. Warnings are strongly discouraged.
undocumented: extensions.remotenames (str)
undocumented: extensions.treemanifest (str)
undocumented: fastlog.enabled (bool)
undocumented: fastmanifest.cachecutoffdays (int) [60]
undocumented: fastmanifest.cacheonchange (bool)
undocumented: fastmanifest.cacheonchangebackground (bool) [True]
undocumented: fastmanifest.debugmetrics (bool)
undocumented: fastmanifest.logfile (str)
undocumented: fastmanifest.relevantremotenames (list) [["master"]]
undocumented: fastmanifest.silent (bool)
undocumented: fastmanifest.silentworker (bool) [True]
undocumented: fastmanifest.usecache (bool)
undocumented: fastmanifest.usetree (bool)
undocumented: fbconduit.backingrepos (list) [[reponame]]
undocumented: fbconduit.gitcallsigns (list)
undocumented: fbconduit.host (str)
@ -103,7 +93,6 @@ New errors are not allowed. Warnings are strongly discouraged.
undocumented: phrevset.callsign (str)
undocumented: pushrebase.blocknonpushrebase (bool)
undocumented: pushrebase.rewritedates (bool)
undocumented: rage.fastmanifestcached (bool)
undocumented: remotefilelog.backgroundrepack (bool)
undocumented: remotefilelog.cachegroup (str)
undocumented: remotefilelog.debug (bool)

View File

@ -124,7 +124,8 @@ Make a local tree-only draft commit
\s*8 (re)
# No manifest revlog revision was added
$ hg debugindex -m --config treemanifest.treeonly=False
abort: cannot use treemanifest without fastmanifest
hg debugindex: invalid arguments
(use 'hg debugindex -h' to get help)
[255]
Tree-only amend
@ -136,7 +137,8 @@ Tree-only amend
\s*12 (re)
# No manifest revlog revision was added
$ hg debugindex -m --config treemanifest.treeonly=False
abort: cannot use treemanifest without fastmanifest
hg debugindex: invalid arguments
(use 'hg debugindex -h' to get help)
[255]
# Delete the original commits packs
@ -206,7 +208,8 @@ Test pulling new commits from a hybrid server
new changesets 098a163f13ea
$ hg debugindex -m --config treemanifest.treeonly=False
abort: cannot use treemanifest without fastmanifest
hg debugindex: invalid arguments
(use 'hg debugindex -h' to get help)
[255]
$ hg log -r tip --stat --pager=off
fetching tree '' 7e265a5dc5229c2b237874c6bd19f6ef4120f949, based on 5fbe397e5ac6cb7ee263c5c67613c4665306d143* (glob)
@ -263,7 +266,8 @@ Test turning treeonly off and making sure we can still commit on top of treeonly
commits
$ echo >> subdir/x
$ hg debugindex -m --config treemanifest.treeonly=False | tail -1
abort: cannot use treemanifest without fastmanifest
hg debugindex: invalid arguments
(use 'hg debugindex -h' to get help)
$ hg commit -m 'treeonly from hybrid repo'
$ hg log -r . -T '{desc}\n' --stat
treeonly from hybrid repo
@ -276,7 +280,8 @@ commits
1 files changed, 1 insertions(+), 0 deletions(-)
$ hg debugindex -m --config treemanifest.treeonly=False | tail -1
abort: cannot use treemanifest without fastmanifest
hg debugindex: invalid arguments
(use 'hg debugindex -h' to get help)
$ hg debugstrip -r .
1 files updated, 0 files merged, 0 files removed, 0 files unresolved
saved backup bundle to $TESTTMP/client/.hg/strip-backup/41373853bc69-c732668d-backup.hg

View File

@ -310,8 +310,6 @@ Test extension help:
extutil (no help text available)
fastannotate yet another annotate implementation that might be faster
fastlog
fastmanifest a treemanifest disk cache for speeding up manifest
comparison
fbconduit (no help text available)
fbhistedit extends the existing histedit functionality
fixcorrupt (no help text available)