[fastmanifest] initial checkin

Summary:
Still missing:

== Code and tests: ==
* remove path
* contains path (but this is essentially the same as get path)
* read/write from file

== Tests: ==
* checksumming directories

Test Plan: well, it kind of builds.

Reviewers: durham, simpkins, lcharignon

Reviewed By: lcharignon

Subscribers: net-systems-diffs@, mitrandir, mjpieters, akushner, rmcelroy

Differential Revision: https://phabricator.fb.com/D3120166

Signature: t1:3120166:1459464981:60f19dd1e36d62776a69fa88c018122a4be27d87
This commit is contained in:
Tony Tung 2016-04-01 00:46:28 -07:00
parent c640cac22d
commit 14a8f27f55
31 changed files with 3596 additions and 0 deletions

View File

@ -6,3 +6,4 @@
^dist/
tests/getdb\.sh
tests/.testtimes*
subinclude:fastmanifest/.hgignore

1
fastmanifest/.hgignore Normal file
View File

@ -0,0 +1 @@
.idea

View File

@ -0,0 +1,98 @@
# Copyright 2016-present Facebook. All Rights Reserved.
#
# Build file.
cmake_minimum_required(VERSION 3.4)
project(fastmanifest)
#set(CMAKE_VERBOSE_MAKEFILE on)
SET(CMAKE_C_FLAGS "-Wall -Werror")
SET(CMAKE_C_FLAGS_DEBUG "-O0 -g")
SET(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG")
SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O0 -g")
add_library(bsearch
bsearch.c
bsearch.h
)
add_executable(bsearch_test bsearch_test.c)
target_link_libraries(bsearch_test bsearch)
add_library(fastmanifest
checksum.c
checksum.h
internal_result.h
node.c
result.h
tree.c
tree_arena.c
tree_arena.h
tree_convert.c
buffer.h
buffer.c
tree_copy.c)
find_library(OPENSSL
NAME crypto
PATHS /opt/local/lib)
target_include_directories(fastmanifest PUBLIC /opt/local/include)
target_link_libraries(fastmanifest PUBLIC ${OPENSSL})
SET(TEST_HEADER_FILES tests.h)
add_executable(checksum_test checksum_test.c ${TEST_HEADER_FILES})
target_link_libraries(checksum_test bsearch fastmanifest)
add_executable(node_test node_test.c ${TEST_HEADER_FILES})
target_link_libraries(node_test bsearch fastmanifest)
add_executable(tree_test tree_test.c ${TEST_HEADER_FILES})
target_link_libraries(tree_test bsearch fastmanifest)
add_executable(tree_convert_test tree_convert_test.c ${TEST_HEADER_FILES})
target_link_libraries(tree_convert_test bsearch fastmanifest)
add_executable(tree_copy_test tree_copy_test.c)
target_link_libraries(tree_copy_test bsearch fastmanifest)
add_executable(tree_convert_rt tree_convert_rt.c)
target_link_libraries(tree_convert_rt bsearch fastmanifest)
add_executable(null_test null_test.c)
# If we're making any sort of a release, tie the success of building to
# whether or not unit tests pass. Obviously, a strict Release build will not
# have assert.h enabled, but the ASSERTs in the unit tests themselves will
# still fire.
IF(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo OR CMAKE_BUILD_TYPE MATCHES Release)
add_custom_command(
TARGET bsearch_test
POST_BUILD
COMMAND valgrind -q --error-exitcode=127 $<TARGET_FILE:bsearch_test>)
add_custom_command(
TARGET checksum_test
POST_BUILD
COMMAND valgrind -q --error-exitcode=127 $<TARGET_FILE:checksum_test>)
add_custom_command(
TARGET node_test
POST_BUILD
COMMAND valgrind -q --error-exitcode=127 $<TARGET_FILE:node_test>)
add_custom_command(
TARGET tree_test
POST_BUILD
COMMAND valgrind -q --error-exitcode=127 $<TARGET_FILE:tree_test>)
add_custom_command(
TARGET tree_convert_test
POST_BUILD
COMMAND valgrind -q --error-exitcode=127 $<TARGET_FILE:tree_convert_test>)
add_custom_command(
TARGET tree_copy_test
POST_BUILD
COMMAND valgrind -q --error-exitcode=127 $<TARGET_FILE:tree_copy_test>)
ENDIF(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo OR CMAKE_BUILD_TYPE MATCHES Release)

4
fastmanifest/README Normal file
View File

@ -0,0 +1,4 @@
fastmanifest is a tree-based implementation to speed up manifest
operations in Mercurial. Its design is optimized for quick
deserialization from a persistent store. In compact form, the entire tree
is relocatable without any traversals.

40
fastmanifest/bsearch.c Normal file
View File

@ -0,0 +1,40 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// bsearch.c: binary search implementation with context-aware callback.
#include <stdio.h>
#include <stddef.h>
#include "bsearch.h"
size_t bsearch_between(const void* needle,
const void* base, const size_t nel, const size_t width,
int (*compare) (const void* needle,
const void* fromarray,
const void* context),
const void* context) {
ptrdiff_t start = 0;
ptrdiff_t end = nel;
while (start < end) {
ptrdiff_t midpoint = start + ((end - start) / 2);
if (midpoint == nel) {
return nel;
}
const void* ptr = base + (midpoint * width);
int cmp = compare(needle, ptr, context);
if (cmp == 0) {
return midpoint;
} else if (cmp < 0) {
end = midpoint;
} else {
start = midpoint + 1;
}
}
return start;
}

60
fastmanifest/bsearch.h Normal file
View File

@ -0,0 +1,60 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// bsearch.h: binary search declarations with context-aware callback. this
// is a standalone library.
#ifndef __BSEARCH_BSEARCH_H__
#define __BSEARCH_BSEARCH_H__
#include <stdbool.h>
#include <stddef.h>
#include <sys/types.h>
/**
* A generic binary search that allows a comparator to evaluate the placement of
* a needle relative to its possible neighbors.
*
* Returns a value from 0 to nel, representing where a needle
*
* The comparator should return:
* <0 if the element should be placed before `left`.
* =0 if the element should be placed between `left` and `right`.
* >0 if the element should be placed after `right`.
*/
extern size_t bsearch_between(
const void* needle,
const void* base, const size_t nel, const size_t width,
int (*compare) (const void* needle,
const void* fromarray,
const void* context),
const void* context
);
/**
* A convenient macro to build comparators for `bsearch_between`. Callers
* should provide a LEFT_COMPARE, which is used to compare the left neighbor and
* the needle, and RIGHT_COMPARE, which is used to compare the needle and the
* right neighbor.
*
* Each comparator will be passed two void pointers and a context object. It is
* the responsibility of the caller to ensure that it can properly cast the
* values to sane pointers.
*/
#define COMPARATOR_BUILDER(COMPARATOR_NAME, COMPARE) \
int COMPARATOR_NAME( \
const void* needle, \
const void* fromarray, \
const void* context) { \
return COMPARE(needle, fromarray); \
}
#define CONTEXTUAL_COMPARATOR_BUILDER(COMPARATOR_NAME, COMPARE) \
int COMPARATOR_NAME( \
const void* needle, \
const void* fromarray, \
const void* context) { \
return COMPARE(needle, fromarray, context); \
}
#endif /* #ifndef __BSEARCH_BSEARCH_H__ */

View File

@ -0,0 +1,63 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// bsearch_test.c: tests for binary search with a context-aware callback.
#include "bsearch.h"
#include "tests.h"
#define CMP(left, right) (*((intptr_t*) left) - *((intptr_t*) right))
COMPARATOR_BUILDER(intptr_cmp, CMP)
#define BSEARCH_TEST(needle, expected, ...) \
{ \
size_t result; \
intptr_t _needle = needle; \
intptr_t* array = (intptr_t[]) {__VA_ARGS__}; \
\
result = bsearch_between( \
&_needle, \
array, \
sizeof((intptr_t[]) {__VA_ARGS__}) / sizeof(intptr_t), \
sizeof(intptr_t), \
&intptr_cmp, \
NULL); \
ASSERT(result == expected); \
}
void test_bsearch() {
BSEARCH_TEST(
20,
1,
18, 21);
BSEARCH_TEST(
20,
2,
15, 18, 21,
);
BSEARCH_TEST(
20,
2,
15, 18, 20, 21,
);
BSEARCH_TEST(
10,
0,
15, 18, 20, 21,
);
BSEARCH_TEST(
30,
4,
15, 18, 20, 21,
);
}
int main(int argc, char* argv[]) {
test_bsearch();
return 0;
}

26
fastmanifest/buffer.c Normal file
View File

@ -0,0 +1,26 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// buffer.c: implementation for a generic mechanism to expand a heap-allocated
// buffer.
#include <stdlib.h>
#include <string.h>
#include "buffer.h"
bool buffer_append(
char **buffer, size_t *buffer_idx, size_t *buffer_sz,
char *input, size_t input_sz,
const float factor,
const size_t min_increment,
const size_t max_increment) {
if (expand_to_fit(buffer, buffer_idx, buffer_sz, input_sz,
factor, min_increment, max_increment) == false) {
return false;
}
memcpy(&(*buffer)[*buffer_idx], input, input_sz);
*buffer_idx += input_sz;
return true;
}

51
fastmanifest/buffer.h Normal file
View File

@ -0,0 +1,51 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// buffer.c: declarations for a generic mechanism to expand a heap-allocated
// buffer. this is for internal use only.
#ifndef __FASTMANIFEST_BUFFER_H__
#define __FASTMANIFEST_BUFFER_H__
#include <stdbool.h>
#include <stddef.h>
static inline bool expand_to_fit(
char **buffer, size_t *buffer_idx, size_t *buffer_sz,
size_t input_sz,
const float factor,
const size_t min_increment,
const size_t max_increment) {
size_t remaining = *buffer_sz - *buffer_idx;
if (input_sz > remaining) {
// need realloc
size_t new_sz = factor * ((float) *buffer_sz);
if (new_sz < min_increment + *buffer_sz) {
new_sz = min_increment + *buffer_sz;
}
if (new_sz > max_increment + *buffer_sz) {
new_sz = max_increment + *buffer_sz;
}
if (new_sz < input_sz + *buffer_sz) {
new_sz = input_sz + *buffer_sz;
}
void* newbuffer = realloc(*buffer, new_sz);
if (newbuffer == NULL) {
return false;
}
*buffer = newbuffer;
*buffer_sz = new_sz;
}
return true;
}
extern bool buffer_append(
char **buffer, size_t *buffer_idx, size_t *buffer_sz,
char *input, size_t input_sz,
const float factor,
const size_t min_increment,
const size_t max_increment);
#endif /* __FASTMANIFEST_BUFFER_H__ */

34
fastmanifest/checksum.c Normal file
View File

@ -0,0 +1,34 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// checksum.c: implementation for recalculating the checksums for
// intermediate nodes in a tree.
#include <openssl/sha.h>
#include "node.h"
#include "tree.h"
static void update_checksum(node_t* node) {
SHA_CTX ctx;
SHA1_Init(&ctx);
// find all the children and make sure their checksums are up-to-date.
for (int ix = 0; ix < node->num_children; node ++) {
node_t* child = get_child_by_index(node, ix);
if (child->checksum_valid == false) {
update_checksum(child);
}
SHA1_Update(&ctx, child->name, child->name_sz);
SHA1_Update(&ctx, child->checksum, child->checksum_sz);
SHA1_Update(&ctx, &child->flags, 1);
}
SHA1_Final(node->checksum, &ctx);
node->checksum_sz = SHA1_BYTES;
node->checksum_valid = true;
}
void update_checksums(tree_t* tree) {
update_checksum(tree->shadow_root);
}

13
fastmanifest/checksum.h Normal file
View File

@ -0,0 +1,13 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// checksum.h: declarations for recalculating the checksums for intermediate
// nodes in a tree. this is for internal use only.
#ifndef __FASTMANIFEST_CHECKSUM_H__
#define __FASTMANIFEST_CHECKSUM_H__
#include "tree.h"
void update_checksums(tree_t* tree);
#endif /* #ifndef __FASTMANIFEST_CHECKSUM_H__ */

View File

@ -0,0 +1,10 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// checksum_test.c: tests for recalculating the checksums for intermediate
// nodes in a tree.
#include "checksum.h"
int main(int argc, char* argv[]) {
return 0;
}

View File

@ -0,0 +1,42 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// internal_result.h: result codes for internal APIs. obviously, this is for
// internal use only.
#ifndef FASTMANIFEST_INTERNAL_RESULT_H
#define FASTMANIFEST_INTERNAL_RESULT_H
#include <stdint.h>
typedef enum _node_add_child_result_t {
ADD_CHILD_OK,
ADD_CHILD_ILLEGAL_PARENT,
ADD_CHILD_ILLEGAL_CHILD,
CONFLICTING_ENTRY_PRESENT,
NEEDS_LARGER_NODE,
} node_add_child_result_t;
typedef enum _node_remove_child_result_t {
REMOVE_CHILD_OK,
REMOVE_CHILD_ILLEGAL_PARENT,
REMOVE_CHILD_ILLEGAL_INDEX,
} node_remove_child_result_t;
typedef enum {
ENLARGE_OK,
ENLARGE_OOM,
ENLARGE_ILLEGAL_PARENT,
ENLARGE_ILLEGAL_INDEX,
} node_enlarge_child_capacity_code_t;
typedef struct _node_enlarge_child_capacity_result_t {
node_enlarge_child_capacity_code_t code;
struct _node_t* old_child;
struct _node_t* new_child;
} node_enlarge_child_capacity_result_t;
typedef struct _node_search_children_result_t {
struct _node_t* child;
uint32_t child_num;
} node_search_children_result_t;
#endif //FASTMANIFEST_INTERNAL_RESULT_H

13
fastmanifest/iterator.c Normal file
View File

@ -0,0 +1,13 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// iterator.c: implementation for traversing all the nodes of a tree in-order.
typedef struct _path_record_t {
node_t* next_node;
uint32_t next_child;
} path_record_t;
struct _iterator_t {
tree_t* copy;
uint16_t path_alloc_cnt;
};

11
fastmanifest/iterator.h Normal file
View File

@ -0,0 +1,11 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// iterator.h: declarations for traversing all the nodes of a tree in-order.
// for internal use only.
#ifndef __FASTMANIFEST_ITERATOR_H__
#define __FASTMANIFEST_ITERATOR_H__
typedef struct _iterator_t iterator_t;
#endif /* #ifndef __FASTMANIFEST_ITERATOR_H__ */

271
fastmanifest/node.c Normal file
View File

@ -0,0 +1,271 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// node.c: implementation for representing a node in a tree.
#include <stdlib.h>
#include "bsearch.h"
#include "node.h"
static size_t calculate_required_size(
uint16_t name_sz,
uint32_t num_children) {
node_t* ptr = 0;
void* name_start = &ptr->name;
intptr_t address = (intptr_t) name_start;
address += name_sz;
address += sizeof(ptrdiff_t) - 1;
address &= ~((intptr_t) (sizeof(ptrdiff_t) - 1));
return address + name_sz + (sizeof(ptrdiff_t) * num_children);
}
static void initialize_node(
node_t* node, size_t block_sz,
const char* name, uint16_t name_sz,
uint32_t num_children) {
node->block_sz = block_sz;
node->num_children = 0;
node->name_sz = name_sz;
node->in_use = true;
node->type = TYPE_UNDEFINED;
node->checksum_valid = false;
memcpy(&node->name, name, name_sz);
}
node_t* alloc_node(
const char* name, uint16_t name_sz,
uint32_t max_children) {
size_t size = calculate_required_size(name_sz, max_children);
node_t* result = (node_t*) malloc(size);
if (result == NULL) {
return result;
}
initialize_node(result, size, name, name_sz, max_children);
return result;
}
void* setup_node(
void* ptr, size_t ptr_size_limit,
const char* name, uint16_t name_sz,
uint32_t max_children) {
size_t size = calculate_required_size(name_sz, max_children);
if (size > ptr_size_limit) {
return NULL;
}
node_t* node = (node_t*) ptr;
intptr_t next = (intptr_t) ptr;
next += size;
initialize_node(node, size, name, name_sz, max_children);
return (void*) next;
}
node_t* clone_node(const node_t* node) {
uint32_t old_capacity = max_children(node);
uint32_t new_capacity = (((uint64_t) old_capacity) *
(100 + STORAGE_INCREMENT_PERCENTAGE)) /
100;
if (new_capacity - old_capacity < MIN_STORAGE_INCREMENT) {
new_capacity = old_capacity + MIN_STORAGE_INCREMENT;
} else if (new_capacity - old_capacity > MAX_STORAGE_INCREMENT) {
new_capacity = old_capacity + MAX_STORAGE_INCREMENT;
}
node_t* clone = alloc_node(
node->name, node->name_sz,
new_capacity);
if (clone == NULL) {
return NULL;
}
// copy metadata over.
clone->num_children = node->num_children;
if (node->checksum_valid) {
memcpy(clone->checksum, node->checksum, sizeof(node->checksum));
}
clone->type = node->type;
clone->checksum_valid = node->checksum_valid;
// calculate the difference we need to apply to the relative pointers.
ptrdiff_t delta = ((intptr_t) node) - ((intptr_t) clone);
// get the child pointer base of each node.
const ptrdiff_t* node_base = get_child_ptr_base_const(node);
ptrdiff_t* clone_base = get_child_ptr_base(clone);
for (int ix = 0; ix < node->num_children; ix ++) {
clone_base[ix] = node_base[ix] + delta;
}
return clone;
}
typedef struct {
const char* name;
uint16_t name_sz;
} find_child_struct_t;
#define NAME_NODE_COMPARE(nameobject, relptr, context) \
(name_compare( \
((const find_child_struct_t*) nameobject)->name, \
((const find_child_struct_t*) nameobject)->name_sz, \
get_child_from_diff((node_t*) context, *((ptrdiff_t*) relptr))))
static CONTEXTUAL_COMPARATOR_BUILDER(name_node_cmp, NAME_NODE_COMPARE);
node_add_child_result_t add_child(node_t* node, const node_t* child) {
// verify parent node.
if (!node->in_use || !(node->type == TYPE_IMPLICIT)) {
return ADD_CHILD_ILLEGAL_PARENT;
}
// do we have enough space? if not, we need to request a new space.
if (node->num_children + 1 > max_children(node)) {
return NEEDS_LARGER_NODE;
}
// verify child node.
if (!child->in_use) {
return ADD_CHILD_ILLEGAL_CHILD;
}
ptrdiff_t* base = get_child_ptr_base(node);
find_child_struct_t needle = { child->name, child->name_sz };
size_t offset = bsearch_between(
&needle,
get_child_ptr_base(node),
node->num_children,
sizeof(ptrdiff_t),
name_node_cmp,
node);
if (offset < node->num_children) {
// displacing something. ensure we don't have a conflict.
ptrdiff_t diff = base[offset];
node_t* old_child = get_child_from_diff(node, diff);
if (name_compare(child->name, child->name_sz, old_child) == 0) {
return CONFLICTING_ENTRY_PRESENT;
}
}
if (offset < node->num_children) {
// move the remaining entries down to make space. let's say we have 3
// elements. if we're supposed to insert at offset 1, then we need to move
// elements at offset 1 & 2 down.
memmove(&base[offset + 1], &base[offset],
sizeof(ptrdiff_t) * (node->num_children - offset));
}
// bump the number of children we have.
node->num_children ++;
// write the entry
set_child_by_index(node, offset, child);
return ADD_CHILD_OK;
}
node_remove_child_result_t remove_child(node_t* node, uint32_t child_num) {
// verify parent node.
if (!node->in_use || !(node->type == TYPE_IMPLICIT)) {
return REMOVE_CHILD_ILLEGAL_PARENT;
}
// do we have enough space? if not, we need to request a new space.
if (child_num >= node->num_children) {
return REMOVE_CHILD_ILLEGAL_INDEX;
}
if (child_num < node->num_children - 1) {
// we need to compact the existing entries.
ptrdiff_t* base = get_child_ptr_base(node);
memmove(&base[child_num], &base[child_num + 1],
sizeof(ptrdiff_t) * (node->num_children - 1 - child_num));
}
// decrement the number of children we have.
node->num_children --;
return REMOVE_CHILD_OK;
}
node_enlarge_child_capacity_result_t enlarge_child_capacity(
node_t* node,
uint32_t child_num) {
node_enlarge_child_capacity_result_t result;
// verify parent node.
if (!node->in_use) {
result.code = ENLARGE_ILLEGAL_PARENT;
return result;
}
// verify child index.
if (child_num >= node->num_children) {
result.code = ENLARGE_ILLEGAL_INDEX;
return result;
}
node_t* old_child = get_child_by_index(node, child_num);
node_t* new_child = clone_node(old_child);
if (new_child == NULL) {
result.code = ENLARGE_OOM;
return result;
}
// write the entry
set_child_by_index(node, child_num, new_child);
result.code = ENLARGE_OK;
result.old_child = old_child;
result.new_child = new_child;
return result;
}
node_search_children_result_t search_children(
const node_t* node,
const char* name,
const uint16_t name_sz) {
const ptrdiff_t* base = get_child_ptr_base_const(node);
find_child_struct_t needle = { name, name_sz };
size_t offset = bsearch_between(
&needle,
get_child_ptr_base_const(node),
node->num_children,
sizeof(ptrdiff_t),
name_node_cmp,
node);
if (offset >= node->num_children) {
return (node_search_children_result_t) { NULL, 0 };
}
// ensure the spot we found is an exact match.
ptrdiff_t diff = base[offset];
node_t* child = get_child_from_diff(node, diff);
if (name_compare(name, name_sz, child) == 0) {
// huzzah, we found it.
return (node_search_children_result_t) { child, offset };
}
return (node_search_children_result_t) { NULL, 0 };
}
uint32_t get_child_index(
const node_t* const parent,
const node_t* const child) {
const ptrdiff_t* base = get_child_ptr_base_const(parent);
for (uint32_t child_num = 0; child_num < parent->num_children; child_num ++) {
if (((intptr_t) parent) + base[child_num] == (intptr_t) child) {
return child_num;
}
}
return UINT32_MAX;
}

246
fastmanifest/node.h Normal file
View File

@ -0,0 +1,246 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// node.h: declarations for representing a node in a tree. for internal use
// only.
#ifndef __FASTMANIFEST_NODE_H__
#define __FASTMANIFEST_NODE_H__
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "internal_result.h"
#define STORAGE_INCREMENT_PERCENTAGE 20
#define MIN_STORAGE_INCREMENT 10
#define MAX_STORAGE_INCREMENT 100
#define CHECKSUM_BYTES 21
#define SHA1_BYTES 20
#define PTR_ALIGN_MASK (~ ((ptrdiff_t) (sizeof(ptrdiff_t) - 1)))
#define TYPE_UNDEFINED 0
#define TYPE_IMPLICIT 1
#define TYPE_LEAF 2
// the start of each of these nodes must be 32-bit aligned.
typedef struct _node_t {
uint32_t block_sz;
uint32_t num_children;
uint16_t name_sz;
uint8_t checksum[CHECKSUM_BYTES];
uint8_t checksum_sz;
uint8_t flags;
bool in_use : 1;
unsigned int type : 2;
bool checksum_valid : 1;
char name[0];
// padding to the nearest ptrdiff_t boundary.
// then a series of ptrdiff_t-sized pointers to the children.
} node_t;
/**
* Returns <0 if (`name`, `name_sz`) is lexicographically less than the name in
* node.
*
* Returns =0 if (`name`, `name_sz`) is lexicographically equal to the name in
* node.
*
* Returns >0 if (`name`, `name_sz`) is lexicographically greater than the name
* in node.
*/
static inline int name_compare(
const char* name,
uint16_t name_sz,
const node_t* node) {
uint32_t min_sz = (name_sz < node->name_sz) ? name_sz : node->name_sz;
int sz_compare = name_sz - node->name_sz;
int cmp = strncmp(name, node->name, min_sz);
if (cmp) {
return cmp;
} else {
return sz_compare;
}
}
/**
* Returns the offset of the first child pointer, given a node with name size
* `name_sz`.
*/
static inline ptrdiff_t get_child_ptr_base_offset(
uint16_t name_sz) {
node_t* node = (node_t*) 0;
intptr_t ptr = (intptr_t) &node->name[name_sz];
ptr = (ptr + sizeof(intptr_t) - 1) & PTR_ALIGN_MASK;
return (ptrdiff_t) ptr;
}
/**
* Returns the address of the first child pointer. Since a child pointer is an
* ptrdiff_t, the type returned is an ptrdiff_t. Note that this is *not* the
* value of the first child pointer.
*/
static inline ptrdiff_t* get_child_ptr_base(node_t* node) {
assert(node->in_use);
intptr_t address = (intptr_t) node;
ptrdiff_t offset = get_child_ptr_base_offset(node->name_sz);
return (ptrdiff_t*) (address + offset);
}
/**
* Const version of get_child_ptr_base
*/
static inline const ptrdiff_t* get_child_ptr_base_const(const node_t* node) {
return get_child_ptr_base((node_t*) node);
}
static inline uint32_t max_children(const node_t* node) {
ptrdiff_t bytes_avail = node->block_sz;
bytes_avail -= ((intptr_t) get_child_ptr_base_const(node)) - ((intptr_t) node);
return bytes_avail / sizeof(intptr_t);
}
static inline node_t* get_child_by_index(
const node_t* node,
uint32_t child_num) {
assert(node->in_use);
assert(node->type == TYPE_IMPLICIT);
assert(child_num < node->num_children);
intptr_t address = (intptr_t) get_child_ptr_base_const(node);
address += sizeof(ptrdiff_t) * child_num;
intptr_t base = (intptr_t) node;
ptrdiff_t offset = *((ptrdiff_t*) address);
base += offset;
return (node_t*) base;
}
static inline node_t* get_child_from_diff(const node_t* node, ptrdiff_t diff) {
assert(node->in_use);
assert(node->type == TYPE_IMPLICIT);
intptr_t base = (intptr_t) node;
base += diff;
return (node_t*) base;
}
static inline void set_child_by_index(
node_t *node,
size_t child_num,
const node_t *child) {
assert(node->in_use);
assert(node->type == TYPE_IMPLICIT);
assert(child_num < node->num_children);
assert(child->in_use);
ptrdiff_t* base = get_child_ptr_base(node);
ptrdiff_t delta = ((intptr_t) child) - ((intptr_t) node);
base[child_num] = delta;
}
/**
* Define some macros for users to test if their values are within the
* restrictions of our node implementation.
*/
#define VERIFY_NAME_SZ(name_sz) ((uintmax_t) (name_sz) < UINT16_MAX)
#define VERIFY_CHILD_NUM(child_num) ((uintmax_t) (child_num) < UINT32_MAX)
/**
* Allocate a node on the heap suitably sized for a given name and a given
* number of children. Initialize the node as unused, but copy the name to the
* node.
*/
extern node_t* alloc_node(
const char* name, uint16_t name_sz,
uint32_t max_children
);
/**
* Given a block of memory, attempt to place a node at the start of the block.
* The node will suitably sized for a given name and a given number of children.
* Initialize the node as unused, but copy the name to the node.
*
* Returns the address following the end of the node if the block is large
* enough to accommodate the node, or NULL if the block is too small.
*/
extern void* setup_node(
void* ptr, size_t ptr_size_limit,
const char* name, uint16_t name_sz,
uint32_t max_children);
/**
* Clone a node and increase the storage capacity by
* STORAGE_INCREMENT_PERCENTAGE, but by at least MIN_STORAGE_INCREMENT and no
* more than MAX_STORAGE_INCREMENT.
*/
extern node_t* clone_node(const node_t* node);
/**
* Adds a child to the node. A child with the same name must not already exist.
*
* The caller is responsible for going up the chain and updating metadata, such
* as the total number of leaf nodes in tree_t and marking the checksum bit
* dirty recursively up the tree.
*/
extern node_add_child_result_t add_child(node_t* node, const node_t* child);
/**
* Remove a child of a node, given a child index.
*
* The caller is responsible for going up the chain and updating metadata, such
* as the total number of leaf nodes in tree_t and marking the checksum bit
* dirty recursively up the tree.
*/
extern node_remove_child_result_t remove_child(
node_t* node,
uint32_t child_num);
/**
* Enlarge a child of a node, given a child index. By itself, this operation
* should not affect things like the total number of leaf nodes in the tree and
* the freshness of the checksums. However, it may affect total allocation.
*/
extern node_enlarge_child_capacity_result_t enlarge_child_capacity(
node_t* node,
uint32_t child_num);
/**
* Find the index of a child given a name. Returns true iff the child was
* found.
*
* If the child was found, return the index and the pointer to the child.
*/
extern node_search_children_result_t search_children(
const node_t* node,
const char* name,
const uint16_t name_sz);
/**
* Find the index of a child given a node. If the node is found, return its
* index. Otherwise return UINT32_MAX.
*/
extern uint32_t get_child_index(
const node_t* const parent,
const node_t* const child);
/**
* Convenience function just to find a child.
*/
static inline node_t* get_child_by_name(
const node_t* node,
const char* name,
uint16_t name_sz) {
node_search_children_result_t result = search_children(node, name, name_sz);
return result.child;
}
#endif /* #ifndef __FASTMANIFEST_NODE_H__ */

365
fastmanifest/node_test.c Normal file
View File

@ -0,0 +1,365 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// node_test.c: unit tests for the node.c
#include "node.h"
#include "tests.h"
#define ALLOC_NODE_STR(name, max_children) \
alloc_node(name, strlen(name), max_children)
#define GET_CHILD_BY_NAME_STR(node, name) \
get_child_by_name(node, name, strlen(name))
/**
* Add a child and ensure that it can be found.
*/
void test_simple_parent_child() {
node_t* parent = ALLOC_NODE_STR("parent", 1);
node_t* child = ALLOC_NODE_STR("child", 0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
node_t* lookup_child = GET_CHILD_BY_NAME_STR(parent, "child");
ASSERT(lookup_child == child);
}
/**
* Ensure that our size calculations are reasonable accurate by allocating a
* bunch of differently sized parents and adding a child.
*/
void test_space() {
for (uint16_t name_sz = 1; name_sz <= 8; name_sz ++) {
node_t* parent = alloc_node("abcdefgh", name_sz, 1);
node_t* child = ALLOC_NODE_STR("child", 0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
node_t* lookup_child = GET_CHILD_BY_NAME_STR(parent, "child");
ASSERT(lookup_child == child);
}
}
/**
* Try to add a child to a node that does not have enough space.
*/
void test_insufficient_space() {
node_t* parent = ALLOC_NODE_STR("parent", 1);
node_t* child1 = ALLOC_NODE_STR("child1", 0);
node_t* child2 = ALLOC_NODE_STR("child2", 0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child1->in_use = true;
child1->type = TYPE_LEAF;
child2->in_use = true;
child2->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child1);
ASSERT(result == ADD_CHILD_OK);
result = add_child(parent, child2);
ASSERT(result == NEEDS_LARGER_NODE);
node_t* lookup_child = GET_CHILD_BY_NAME_STR(parent, "child1");
ASSERT(lookup_child == child1);
lookup_child = GET_CHILD_BY_NAME_STR(parent, "child2");
ASSERT(lookup_child == NULL);
}
/**
* Call `add_child` with a bunch of different arguments and verify the results
* are reasonable.
*/
typedef struct {
bool parent_in_use;
int parent_type;
bool child_in_use;
int child_type;
node_add_child_result_t expected_result;
} parent_child_test_cases_t;
void test_add_child_combinations() {
parent_child_test_cases_t cases[] =
{
// parent or child not in use.
{false, TYPE_IMPLICIT, true, TYPE_LEAF, ADD_CHILD_ILLEGAL_PARENT},
{true, TYPE_IMPLICIT, false, TYPE_LEAF, ADD_CHILD_ILLEGAL_CHILD},
// parent type invalid.
{true, TYPE_LEAF, true, TYPE_LEAF, ADD_CHILD_ILLEGAL_PARENT},
// child type invalid.
{true, TYPE_IMPLICIT, false, TYPE_UNDEFINED, ADD_CHILD_ILLEGAL_CHILD},
// some good outcomes.
{true, TYPE_IMPLICIT, true, TYPE_LEAF, ADD_CHILD_OK},
{true, TYPE_IMPLICIT, true, TYPE_IMPLICIT, ADD_CHILD_OK},
};
for (int ix = 0;
ix < sizeof(cases) / sizeof(parent_child_test_cases_t);
ix ++) {
node_t* parent;
node_t* child;
parent = ALLOC_NODE_STR("parent", 1);
child = ALLOC_NODE_STR("child", 0);
parent->in_use = cases[ix].parent_in_use;
parent->type = cases[ix].parent_type;
child->in_use = cases[ix].child_in_use;
child->type = cases[ix].child_type;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == cases[ix].expected_result);
}
}
/**
* Insert children in lexicographical order. Ensure that we can find them.
*
* requirement: strlen(TEST_MANY_CHILDREN_NAME_STR) >=
* TEST_MANY_CHILDREN_CHILD_COUNT
*/
#define TEST_MANY_CHILDREN_NAME_STR "abcdefgh"
#define TEST_MANY_CHILDREN_COUNT 8
void test_many_children() {
node_t* parent = ALLOC_NODE_STR("parent", TEST_MANY_CHILDREN_COUNT);
node_t* children[TEST_MANY_CHILDREN_COUNT]; // this should be ordered as we
// expect to find them in the
// parent's list of children.
for (uint16_t name_sz = 1; name_sz <= TEST_MANY_CHILDREN_COUNT; name_sz ++) {
node_t* child = alloc_node(
TEST_MANY_CHILDREN_NAME_STR,
name_sz,
0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
children[name_sz - 1] = child;
}
for (uint16_t name_sz = 1; name_sz <= TEST_MANY_CHILDREN_COUNT; name_sz ++) {
node_t* result = get_child_by_name(
parent,
TEST_MANY_CHILDREN_NAME_STR,
name_sz);
ASSERT(result == children[name_sz - 1]);
}
}
/**
* Insert children in reverse lexicographical order. Ensure that we can find
* them.
*
* requirement: strlen(TEST_MANY_CHILDREN_NAME_STR) >=
* TEST_MANY_CHILDREN_CHILD_COUNT
*/
void test_many_children_reverse() {
node_t* parent = ALLOC_NODE_STR("parent", TEST_MANY_CHILDREN_COUNT);
node_t* children[TEST_MANY_CHILDREN_COUNT]; // this should be ordered as we
// expect to find them in the
// parent's list of children.
for (uint16_t name_sz = TEST_MANY_CHILDREN_COUNT; name_sz > 0; name_sz --) {
node_t* child = alloc_node(
TEST_MANY_CHILDREN_NAME_STR,
name_sz,
0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
children[name_sz - 1] = child;
}
for (uint16_t name_sz = 1; name_sz <= TEST_MANY_CHILDREN_COUNT; name_sz ++) {
node_t* result = get_child_by_name(
parent,
TEST_MANY_CHILDREN_NAME_STR,
name_sz);
ASSERT(result == children[name_sz - 1]);
}
}
/**
* Create a node with many children. Clone the node. Ensure we can locate all
* of the children.
*
* requirement: strlen(TEST_CLONE_NAME_STR) >=
* TEST_CLONE_COUNT
*/
#define TEST_CLONE_NAME_STR "abcdefgh"
#define TEST_CLONE_COUNT 8
void test_clone() {
node_t* parent = ALLOC_NODE_STR("parent", TEST_CLONE_COUNT);
node_t* children[TEST_CLONE_COUNT]; // this should be ordered as we
// expect to find them in the
// parent's list of children.
for (uint16_t name_sz = 1; name_sz <= TEST_CLONE_COUNT; name_sz ++) {
node_t* child = alloc_node(
TEST_CLONE_NAME_STR,
name_sz,
0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
children[name_sz - 1] = child;
}
node_t* clone = clone_node(parent);
for (uint16_t name_sz = 1; name_sz <= TEST_CLONE_COUNT; name_sz ++) {
node_t* result = get_child_by_name(
clone,
TEST_CLONE_NAME_STR,
name_sz);
ASSERT(result == children[name_sz - 1]);
}
ASSERT(max_children(clone) > max_children(parent));
}
/**
* Create a node with many children. Remove them in a pseudorandom fashion.
* Ensure that the remaining children can be correctly found.
*
* requirement: strlen(TEST_REMOVE_CHILD_NAME_STR) >=
* TEST_REMOVE_CHILD_COUNT
*/
#define TEST_REMOVE_CHILD_NAME_STR "1234ffgg"
#define TEST_REMOVE_CHILD_COUNT 8
void test_remove_child() {
node_t* parent = ALLOC_NODE_STR("parent", TEST_REMOVE_CHILD_COUNT);
node_t* children[TEST_REMOVE_CHILD_COUNT]; // this should be ordered as we
// expect to find them in the
// parent's list of children.
bool valid[TEST_REMOVE_CHILD_COUNT];
for (uint16_t name_sz = 1; name_sz <= TEST_REMOVE_CHILD_COUNT; name_sz ++) {
node_t* child = alloc_node(
TEST_REMOVE_CHILD_NAME_STR,
name_sz,
0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
children[name_sz - 1] = child;
valid[name_sz - 1] = true;
}
for (uint16_t ix = 0; ix < TEST_REMOVE_CHILD_COUNT; ix ++) {
uint16_t victim_index = 0;
for (uint16_t jx = 0; jx < TEST_REMOVE_CHILD_COUNT + 1; jx ++) {
do {
victim_index = (victim_index + 1) % TEST_REMOVE_CHILD_COUNT;
} while (valid[victim_index] == false);
}
// ok, we found our victim. remove it.
node_search_children_result_t search_result = search_children(
parent,
TEST_REMOVE_CHILD_NAME_STR,
victim_index + 1);
ASSERT(search_result.child == children[victim_index]);
valid[victim_index] = false;
ASSERT(remove_child(parent, search_result.child_num) ==
REMOVE_CHILD_OK);
// go through the items that should still be children, and make sure they're
// still reachable.
for (uint16_t name_sz = 1; name_sz <= TEST_REMOVE_CHILD_COUNT; name_sz ++) {
node_t* child = get_child_by_name(
parent,
TEST_REMOVE_CHILD_NAME_STR,
name_sz);
if (valid[name_sz - 1]) {
ASSERT(child != NULL);
} else {
ASSERT(child == NULL);
}
}
}
}
/**
* Create a node and add many children. Enlarge one of the children.
*
* requirement: strlen(TEST_ENLARGE_CHILD_CAPACITY_NAME_STR) >=
* TEST_ENLARGE_CHILD_CAPACITY_COUNT
*/
#define TEST_ENLARGE_CHILD_CAPACITY_NAME_STR "abcdefgh"
#define TEST_ENLARGE_CHILD_CAPACITY_COUNT 8
void test_enlarge_child_capacity() {
node_t* parent = ALLOC_NODE_STR(
"parent",
TEST_MANY_CHILDREN_COUNT);
node_t* children[TEST_MANY_CHILDREN_COUNT]; // this should be ordered as we
// expect to find them in the
// parent's list of children.
for (uint16_t name_sz = 1; name_sz <= TEST_MANY_CHILDREN_COUNT; name_sz ++) {
node_t* child = alloc_node(
TEST_ENLARGE_CHILD_CAPACITY_NAME_STR,
name_sz,
0);
parent->in_use = true;
parent->type = TYPE_IMPLICIT;
child->in_use = true;
child->type = TYPE_LEAF;
node_add_child_result_t result = add_child(parent, child);
ASSERT(result == ADD_CHILD_OK);
children[name_sz - 1] = child;
}
node_enlarge_child_capacity_result_t enlarge_child_capacity_result =
enlarge_child_capacity(parent, 0);
ASSERT(enlarge_child_capacity_result.code == ENLARGE_OK);
ASSERT(enlarge_child_capacity_result.old_child ==
children[0]);
node_t* enlarged = get_child_by_index(parent, 0);
ASSERT(max_children(enlarged) > 0);
ASSERT(name_compare(enlarged->name, enlarged->name_sz,
enlarge_child_capacity_result.old_child) == 0);
}
int main(int argc, char* argv[]) {
test_simple_parent_child();
test_space();
test_insufficient_space();
test_add_child_combinations();
test_many_children();
test_many_children_reverse();
test_clone();
test_remove_child();
test_enlarge_child_capacity();
return 0;
}

7
fastmanifest/null_test.c Normal file
View File

@ -0,0 +1,7 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// null_test.c: garbage test to make CLion happy.
int main(int argc, char* argv[]) {
return 0;
}

62
fastmanifest/result.h Normal file
View File

@ -0,0 +1,62 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// result.h: return types for publicly accessible methods. this is
// indirectly exposed through tree.h.
#ifndef __FASTMANIFEST_RESULT_H__
#define __FASTMANIFEST_RESULT_H__
typedef enum {
GET_PATH_OK,
GET_PATH_NOT_FOUND,
GET_PATH_WTF,
} get_path_code_t;
typedef struct _get_path_result_t {
get_path_code_t code;
struct _node_t* node;
} get_path_result_t;
typedef enum _add_update_path_result_t {
ADD_UPDATE_PATH_OK,
ADD_UPDATE_PATH_OOM,
ADD_UPDATE_PATH_CONFLICT,
ADD_UPDATE_PATH_WTF,
} add_update_path_result_t;
typedef enum _set_metadata_result_t {
SET_METADATA_OK,
} set_metadata_result_t;
typedef enum _remove_path_result_t {
REMOVE_PATH_OK,
} remove_path_result_t;
typedef enum _write_to_file_result_t {
WRITE_TO_FILE_OK,
} write_to_file_result_t;
typedef enum {
CONVERT_FROM_FLAT_OK,
CONVERT_FROM_FLAT_OOM,
CONVERT_FROM_FLAT_WTF,
} convert_from_flat_code_t;
typedef struct _convert_from_flat_result_t {
convert_from_flat_code_t code;
struct _tree_t* tree;
} convert_from_flat_result_t;
typedef enum {
CONVERT_TO_FLAT_OK,
CONVERT_TO_FLAT_OOM,
CONVERT_TO_FLAT_WTF,
} convert_to_flat_code_t;
typedef struct _convert_to_flat_result_t {
convert_to_flat_code_t code;
char* flat_manifest;
size_t flat_manifest_sz;
} convert_to_flat_result_t;
#endif /* #ifndef __FASTMANIFEST_RESULT_H__ */

18
fastmanifest/tests.h Normal file
View File

@ -0,0 +1,18 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tests.h: convenience macros for unit tests.
#ifndef __TESTLIB_TESTS_H__
#define __TESTLIB_TESTS_H__
#include <stdio.h>
#include <stdlib.h>
#define ASSERT(cond) if (!(cond)) { \
printf("failed on line %d\n", __LINE__); \
exit(37); \
}
#define STRPLUSLEN(__str__) __str__, strlen(__str__)
#endif /* #ifndef __TESTLIB_TESTS_H__ */

523
fastmanifest/tree.c Normal file
View File

@ -0,0 +1,523 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree.c: core methods for tree creation and manipulation. to keep this file
// a reasonable length, some of the more complicated methods have
// been split off into their own .c files (tree_arena.c, tree_convert.c,
// tree_copy.c, checksum.c).
#include <stdlib.h>
#include "tree.h"
#include "tree_arena.h"
bool valid_path(const char* path, const size_t path_sz) {
if (path_sz > 0 && (path[0] == '/' || path[path_sz] == '/')) {
return false;
}
size_t last_slash = (size_t) -1;
for (size_t off = 0; off < path_sz; off ++) {
if (path[off] == '/') {
if (last_slash == off - 1) {
return false;
}
last_slash = off;
}
}
return true;
}
/**
* Given a path, return the size of the string that would yield just the
* first component of the path. The path must be valid according to
* `valid_path`.
*
* first_component('abc/def') => 'abc'
* first_component('abc') => ''
*/
static size_t first_component(const char* path, size_t path_sz) {
for (size_t off = 0; off < path_sz; off ++) {
if (path[off] == '/') {
return off;
}
}
return 0;
}
/**
* Given a path, return the size of the string that would yield just the
* directory name. The path must be valid according to `valid_path`, but
* otherwise the semantics are like os.path.dirname on python.
*
* dirname('abc/def/ghi') => 'abc/def'
* dirname('abc/def') => 'abc'
* dirname('abc') => ''
*/
/*static size_t dirname(const char* path, size_t path_sz) {
for (size_t off = path_sz; off > 0; off --) {
if (path[off - 1] == '/') {
if (off == 1) {
return 1;
} else {
return off - 1;
}
}
}
return 0;
}
*/
typedef enum {
TREE_ADD_CHILD_OK,
TREE_ADD_CHILD_OOM,
TREE_ADD_CHILD_WTF,
} tree_add_child_code_t;
typedef struct _tree_add_child_result_t {
tree_add_child_code_t code;
node_t* newroot;
node_t* newchild;
} tree_add_child_result_t;
/**
* Adds a child to `root`. Because `root` may need to be resized to accomodate
* the new child, we need the *parent* of `root`. On success (`result.code` ==
* TREE_ADD_CHILD_OK), `result.newchild` will be set to the new node created.
* Because the root may also have been moved, `result.newroot` will be set to
* the new root. Be sure to save BOTH.
*
* Updates the size and the non-arena-allocations in the tree state change
* accounting structure.
*/
static tree_add_child_result_t tree_add_child(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* name, const size_t name_sz,
tree_state_changes_t* changes) {
tree_add_child_result_t result;
// create a new child node, and record the deltas in the change
// register.
//
// NOTE: OPTIMIZATION OPPORTUNITY!
//
// this is a potential optimization opportunity. we could theoretically try
// to allocate the new node in the arena and maintain compacted state of the
// tree.
node_t* node = alloc_node(name, name_sz, 0);
if (node == NULL) {
return (tree_add_child_result_t) {
TREE_ADD_CHILD_OOM, NULL, NULL };
}
// accounting changes.
changes->size_change += node->block_sz;
changes->non_arena_allocations = true;
result.newchild = node;
// attempt to add a child to `root` with the name `name`.
node_add_child_result_t add_child_result = add_child(root, node);
if (add_child_result == NEEDS_LARGER_NODE) {
// NOTE: OPTIMIZATION OPPORTUNITY!
//
// this is a linear scan. it's unclear whether a linear scan for a pointer
// is better or worse than a binary search that has to chase a pointer. the
// answer is probably to do the linear scan for nodes with a small number of
// children, and a binary search for nodes with a lot of children.
uint32_t index = get_child_index(root_parent, root);
if (index == UINT32_MAX) {
return (tree_add_child_result_t) {
TREE_ADD_CHILD_WTF, NULL, NULL };
}
node_enlarge_child_capacity_result_t enlarge_result =
enlarge_child_capacity(root_parent, index);
if (enlarge_result.code == ENLARGE_OOM) {
return (tree_add_child_result_t) {
TREE_ADD_CHILD_OOM, NULL, NULL };
} else if (enlarge_result.code != ENLARGE_OK) {
return (tree_add_child_result_t) {
TREE_ADD_CHILD_WTF, NULL, NULL };
}
// update accounting.
if (!in_arena(tree, enlarge_result.old_child)) {
// not in arena, free the memory.
uint32_t block_sz = enlarge_result.old_child->block_sz;
free(enlarge_result.old_child);
changes->size_change -= block_sz;
}
changes->size_change += enlarge_result.new_child->block_sz;
root = enlarge_result.new_child;
// add the child again.
add_child_result = add_child(root, node);
if (add_child_result != ADD_CHILD_OK) {
return (tree_add_child_result_t) {
TREE_ADD_CHILD_WTF, NULL, NULL };
}
} else if (add_child_result != ADD_CHILD_OK) {
return (tree_add_child_result_t) {
TREE_ADD_CHILD_WTF, NULL, NULL };
}
result.code = TREE_ADD_CHILD_OK;
result.newroot = root;
return result;
}
typedef enum {
// walks the tree. if the path cannot be found, exit with
// `FIND_PATH_NOT_FOUND`.
BASIC_WALK,
// walks the tree. if the intermediate paths cannot be found, create them.
// if a leaf node exists where an intermediate path node needs to be
// created, then return `FIND_PATH_CONFLICT`.
CREATE_IF_MISSING,
// walks the tree. if the path cannot be found, exit with
// `FIND_PATH_NOT_FOUND`. if the operation is successful, then check
// intermediate nodes to ensure that they still have children. any nodes
// that do not should be removed.
REMOVE_EMPTY_IMPLICIT_NODES,
} find_path_operation_type;
typedef enum _find_path_result_t {
FIND_PATH_OK,
FIND_PATH_NOT_FOUND,
FIND_PATH_OOM,
FIND_PATH_CONFLICT,
FIND_PATH_WTF,
} find_path_result_t;
typedef struct _find_path_callback_result_t {
find_path_result_t code;
node_t* newroot;
} find_path_callback_result_t;
/**
* Find the directory node enclosing `path`. If `create_if_not_found` is true,
* then any intermediate directories that do not exist will be created. Once
* the directory enclosing the object at `path` is located, `callback` will be
* invoked. It should do whatever operation is desired and mark up how the tree
* has been modified.
*
* On exit, `find_path` will examine the state changes and use them to update
* the nodes it has encountered walking to this node.
*
* The path must be valid according to `valid_path`, but since it is not checked
* internally, the caller is responsible for ensuring it.
*/
static find_path_result_t find_path(
tree_t *tree,
node_t *const root_parent,
node_t *root,
const char *path, const size_t path_sz,
find_path_operation_type operation_type,
tree_state_changes_t *changes,
find_path_callback_result_t (*callback)(
tree_t *tree,
node_t *const dir_parent,
node_t *dir,
const char *path, const size_t path_sz,
tree_state_changes_t *changes,
void *context),
void *context) {
size_t first_component_sz = first_component(path, path_sz);
find_path_result_t result;
if (first_component_sz == 0) {
// found it! apply the magic function.
find_path_callback_result_t callback_result = callback(tree,
root_parent, root,
path, path_sz,
changes,
context);
result = callback_result.code;
root = callback_result.newroot;
} else {
// resolve the first component.
node_t* child = get_child_by_name(root, path, first_component_sz);
if (child == NULL) {
if (operation_type == CREATE_IF_MISSING) {
// create the new child.
tree_add_child_result_t tree_add_child_result =
tree_add_child(
tree, root_parent, root, path, first_component_sz, changes);
switch (tree_add_child_result.code) {
case TREE_ADD_CHILD_OOM:
return FIND_PATH_OOM;
case TREE_ADD_CHILD_WTF:
return FIND_PATH_WTF;
case TREE_ADD_CHILD_OK:
break;
}
root = tree_add_child_result.newroot;
child = tree_add_child_result.newchild;
// it's an implicit node.
child->type = TYPE_IMPLICIT;
} else {
// didn't find it, return.
return FIND_PATH_NOT_FOUND;
}
} else if (child->type == TYPE_LEAF) {
// throw an error.
return FIND_PATH_CONFLICT;
}
result = find_path(
tree,
root,
child,
path + first_component_sz + 1,
path_sz - first_component_sz - 1,
operation_type,
changes,
callback,
context);
}
if (result == FIND_PATH_OK) {
// is the checksum still valid? mark up the nodes as we pop off the stack.
if (changes->checksum_dirty == true) {
root->checksum_valid = false;
}
}
return result;
}
tree_t* alloc_tree() {
// set up the shadow root and the real root.
node_t* shadow_root = alloc_node("/", 1, 1);
shadow_root->type = TYPE_IMPLICIT;
node_t* real_root = alloc_node("/", 1, 0);
real_root->type = TYPE_IMPLICIT;
add_child(shadow_root, real_root);
tree_t* tree = (tree_t*) calloc(1, sizeof(tree_t));
#if 0 // FIXME: (ttung) probably remove this
tree->mode = STANDARD_MODE;
#endif /* #if 0 */
tree->shadow_root = shadow_root;
tree->consumed_memory = 0;
tree->consumed_memory += shadow_root->block_sz;
tree->consumed_memory += real_root->block_sz;
tree->arena = NULL;
tree->arena_free_start = NULL;
tree->arena_sz = 0;
tree->compacted = false;
return tree;
}
static void destroy_tree_helper(tree_t* tree, node_t* node) {
for (int ix = 0; ix < node->num_children; ix ++) {
destroy_tree_helper(tree, get_child_by_index(node, ix));
}
if (!in_arena(tree, node)) {
free(node);
}
}
void destroy_tree(tree_t* tree) {
if (tree->compacted == false) {
destroy_tree_helper(tree, tree->shadow_root);
}
if (tree->arena != NULL) {
free(tree->arena);
}
free(tree);
}
typedef struct _get_path_metadata_t {
node_t* node;
} get_path_metadata_t;
find_path_callback_result_t get_path_callback(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* name, const size_t name_sz,
tree_state_changes_t* changes,
void* context) {
get_path_metadata_t *metadata =
(get_path_metadata_t *) context;
// does the path already exist?
node_t *child = get_child_by_name(root, name, name_sz);
if (child == NULL) {
return (find_path_callback_result_t) {
FIND_PATH_NOT_FOUND, root};
}
metadata->node = child;
return (find_path_callback_result_t) { FIND_PATH_OK, root };
}
get_path_result_t get_path(
tree_t* tree,
const char* path,
const size_t path_sz) {
tree_state_changes_t changes = { 0 };
get_path_metadata_t metadata;
node_t* shadow_root = tree->shadow_root;
node_t* real_root = get_child_by_index(shadow_root, 0);
if (real_root == NULL) {
return (get_path_result_t) { GET_PATH_WTF, NULL };
}
find_path_result_t result =
find_path(
tree,
shadow_root,
real_root,
path, path_sz,
BASIC_WALK,
&changes,
get_path_callback,
&metadata);
assert(changes.size_change == 0);
assert(changes.num_leaf_node_change == 0);
assert(changes.non_arena_allocations == false);
switch (result) {
case FIND_PATH_OK:
return (get_path_result_t) { GET_PATH_OK, metadata.node };
case FIND_PATH_NOT_FOUND:
case FIND_PATH_CONFLICT:
// `FIND_PATH_CONFLICT` is returned if there is a leaf node where we
// expect a directory node. this is treated the same as a NOT_FOUND.
return (get_path_result_t) { GET_PATH_NOT_FOUND, NULL };
default:
return (get_path_result_t) { GET_PATH_WTF, NULL };
}
}
typedef struct _add_or_update_path_metadata_t {
const uint8_t* checksum;
const uint8_t checksum_sz;
const uint8_t flags;
} add_or_update_path_metadata_t;
find_path_callback_result_t add_or_update_path_callback(
tree_t* tree,
node_t* const root_parent,
node_t* root,
const char* name, const size_t name_sz,
tree_state_changes_t* changes,
void* context) {
add_or_update_path_metadata_t* metadata =
(add_or_update_path_metadata_t*) context;
// does the path already exist?
node_t* child = get_child_by_name(root, name, name_sz);
if (child == NULL) {
tree_add_child_result_t tree_add_child_result =
tree_add_child(
tree,
root_parent,
root,
name, name_sz,
changes);
switch (tree_add_child_result.code) {
case TREE_ADD_CHILD_OOM:
return (find_path_callback_result_t) {
FIND_PATH_OOM, NULL };
case TREE_ADD_CHILD_WTF:
return (find_path_callback_result_t) {
FIND_PATH_WTF, NULL };
case TREE_ADD_CHILD_OK:
break;
}
root = tree_add_child_result.newroot;
child = tree_add_child_result.newchild;
// it's a leaf node.
child->type = TYPE_LEAF;
// update the accounting.
changes->num_leaf_node_change ++;
} else {
if (child->type == TYPE_IMPLICIT) {
// was previously a directory
return (find_path_callback_result_t) {
FIND_PATH_CONFLICT, NULL };
}
}
// update the node.
if (metadata->checksum_sz > CHECKSUM_BYTES) {
return (find_path_callback_result_t) {
FIND_PATH_WTF, NULL };
}
memcpy(child->checksum, metadata->checksum, metadata->checksum_sz);
child->checksum_sz = metadata->checksum_sz;
child->checksum_valid = true;
child->flags = metadata->flags;
return (find_path_callback_result_t) { FIND_PATH_OK, root };
}
add_update_path_result_t add_or_update_path(
tree_t* tree,
const char* path,
const size_t path_sz,
const uint8_t* checksum,
const uint8_t checksum_sz,
const uint8_t flags) {
tree_state_changes_t changes = { 0 };
add_or_update_path_metadata_t metadata = {
checksum,
checksum_sz,
flags,
};
node_t* shadow_root = tree->shadow_root;
node_t* real_root = get_child_by_index(shadow_root, 0);
if (real_root == NULL) {
return ADD_UPDATE_PATH_WTF;
}
find_path_result_t result =
find_path(
tree,
shadow_root,
real_root,
path, path_sz,
CREATE_IF_MISSING,
&changes,
add_or_update_path_callback,
&metadata);
// apply the changes back to the tree struct
tree->consumed_memory += changes.size_change;
tree->num_leaf_nodes += changes.num_leaf_node_change;
if (changes.non_arena_allocations) {
tree->compacted = false;
}
switch (result) {
case FIND_PATH_OK:
return ADD_UPDATE_PATH_OK;
case FIND_PATH_OOM:
return ADD_UPDATE_PATH_OOM;
case FIND_PATH_CONFLICT:
return ADD_UPDATE_PATH_CONFLICT;
default:
return ADD_UPDATE_PATH_WTF;
}
}

92
fastmanifest/tree.h Normal file
View File

@ -0,0 +1,92 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree.h: publicly accessible functions for tree manipulation and
// conversions. this should be the only header file directly exposed
// to users.
#ifndef __FASTMANIFEST_TREE_H__
#define __FASTMANIFEST_TREE_H__
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
#include "result.h"
#if 0 // FIXME: (ttung) probably remove this
typedef enum _allocation_mode_t {
ARENA_MODE, /* all allocations should come from the
* arena. this is to produce a
* compact and relocatable tree. */
STANDARD_MODE, /* all allocations should come from the
* standard system allocator, i.e.,
* malloc. */
} allocation_mode_t;
#endif /* #if 0 */
typedef struct _tree_state_changes_t {
ptrdiff_t size_change;
int32_t num_leaf_node_change;
bool non_arena_allocations;
bool checksum_dirty;
} tree_state_changes_t;
typedef struct _tree_t {
// these fields are preserved during serialization.
size_t consumed_memory;
uint32_t num_leaf_nodes;
// these fields are not preserved during serialization.
struct _node_t* shadow_root; /* this is a literal pointer. */
void* arena; /* this is also a literal pointer. */
void* arena_free_start; /* this is also a literal pointer. */
size_t arena_sz;
bool compacted;
#if 0 // FIXME: (ttung) probably remove this
allocation_mode_t mode;
#endif /* #if 0 */
} tree_t;
/**
* Returns true iff the path is something digestible by this tree library. The
* rules are:
*
* 1) The path must be of nonzero length.
* 2) The path must not start nor end with the path separator '/'.
* 3) The path must not have consecutive path separators.
*/
extern bool valid_path(const char* path, const size_t path_sz);
extern tree_t* alloc_tree();
extern void destroy_tree(tree_t* tree);
extern tree_t* copy(const tree_t* src);
extern get_path_result_t get_path(
tree_t* const tree,
const char* path,
const size_t path_sz);
extern add_update_path_result_t add_or_update_path(
tree_t* const tree,
const char* path,
const size_t path_sz,
const uint8_t* checksum,
const uint8_t checksum_sz,
const uint8_t flags);
extern remove_path_result_t remove_path(
tree_t* const tree,
const char* path,
const size_t path_sz);
extern bool contains_path(
const tree_t* tree,
const char* path,
const size_t path_sz);
extern tree_t* read_from_file(char* fname);
extern write_to_file_result_t write_to_file(tree_t* tree, char* fname);
extern convert_from_flat_result_t convert_from_flat(
char* manifest, size_t manifest_sz);
extern convert_to_flat_result_t convert_to_flat(tree_t* tree);
#endif /* #ifndef __FASTMANIFEST_TREE_H__ */

142
fastmanifest/tree_arena.c Normal file
View File

@ -0,0 +1,142 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_arena.c: methods to create a tree with a fixed memory arena and to
// allocate nodes from the fixed memory arena.
#include <stdlib.h>
#include "node.h"
#include "tree.h"
#include "tree_arena.h"
#define ARENA_INCREMENT_PERCENTAGE 20
#define ARENA_MIN_STORAGE_INCREMENT (1024 * 1024)
#define ARENA_MAX_STORAGE_INCREMENT (16 * 1024 * 1024)
static inline size_t calculate_arena_free(const tree_t* tree) {
intptr_t arena_start = (intptr_t) tree->arena;
intptr_t arena_free_start = (intptr_t) tree->arena_free_start;
intptr_t arena_end = arena_start + tree->arena_sz;
size_t arena_free = arena_end - arena_free_start;
return arena_free;
}
arena_alloc_node_result_t arena_alloc_node_helper(
arena_policy_t policy,
tree_t* tree,
const char* name, size_t name_sz,
size_t max_children) {
// since name_sz and max_chlidren are going to be downcasted, we should verify
// that they're not too large for the types in node.h
if (!VERIFY_NAME_SZ(name_sz) ||
!VERIFY_CHILD_NUM(max_children)) {
return (arena_alloc_node_result_t) {
ARENA_ALLOC_EXCEEDED_LIMITS, NULL };
}
do {
size_t arena_free = calculate_arena_free(tree);
node_t* candidate = (node_t*) tree->arena_free_start;
void* next = setup_node(
tree->arena_free_start, arena_free,
name, name_sz,
max_children);
if (next == NULL) {
if (policy == ARENA_POLICY_FAIL) {
return (arena_alloc_node_result_t) {
ARENA_ALLOC_OOM, NULL };
} else {
size_t new_arena_sz =
(tree->arena_sz * (100 + ARENA_INCREMENT_PERCENTAGE)) / 100;
// TODO: optimization opportunity!
// we can calculate how much free space we need and set that as another
// minimum. in the unlikely scenario we need a huge node, just setting
// the lower bound on ARENA_MIN_STORAGE_INCREMENT may require multiple
// rounds of realloc.
if (new_arena_sz - tree->arena_sz < ARENA_MIN_STORAGE_INCREMENT) {
new_arena_sz = tree->arena_sz + ARENA_MIN_STORAGE_INCREMENT;
}
if (new_arena_sz - tree->arena_sz > ARENA_MAX_STORAGE_INCREMENT) {
new_arena_sz = tree->arena_sz + ARENA_MAX_STORAGE_INCREMENT;
}
// resize the arena so it's bigger.
void *new_arena = realloc(tree->arena, new_arena_sz);
if (new_arena == NULL) {
return (arena_alloc_node_result_t) {
ARENA_ALLOC_OOM, NULL};
}
// success! update the pointers.
if (new_arena != tree->arena) {
intptr_t arena_start = (intptr_t) tree->arena;
intptr_t arena_free_start = (intptr_t) tree->arena_free_start;
intptr_t new_arena_start = (intptr_t) new_arena;
// if the shadow root is inside the arena, we need to relocate it.
if (in_arena(tree, tree->shadow_root)) {
intptr_t shadow_root = (intptr_t) tree->shadow_root;
ptrdiff_t shadow_root_offset = shadow_root - arena_start;
tree->shadow_root = (node_t*) (new_arena_start +
shadow_root_offset);
}
intptr_t new_arena_free_start = new_arena_start;
new_arena_free_start += (arena_free_start - arena_start);
tree->arena_free_start = (void *) new_arena_free_start;
tree->arena = new_arena;
}
tree->arena_sz = new_arena_sz;
}
} else {
tree->arena_free_start = next;
tree->consumed_memory += candidate->block_sz;
return (arena_alloc_node_result_t) {
ARENA_ALLOC_OK, candidate };
}
} while (true);
}
tree_t* alloc_tree_with_arena(size_t arena_sz) {
void* arena = malloc(arena_sz);
tree_t* tree = (tree_t*) calloc(1, sizeof(tree_t));
if (arena == NULL || tree == NULL) {
if (arena != NULL) {
free(arena);
}
if (tree != NULL) {
free(tree);
}
return NULL;
}
#if 0 // FIXME: (ttung) probably remove this
tree->mode = STANDARD_MODE;
#endif /* #if 0 */
tree->arena = tree->arena_free_start = arena;
tree->arena_sz = arena_sz;
tree->compacted = true;
tree->shadow_root = NULL;
tree->consumed_memory = 0;
tree->num_leaf_nodes = 0;
// set up ONLY the shadow root.
arena_alloc_node_result_t alloc_result =
arena_alloc_node(tree, "/", 1, 1);
if (alloc_result.code != ARENA_ALLOC_OK) {
return NULL;
}
node_t* shadow_root = alloc_result.node;
shadow_root->type = TYPE_IMPLICIT;
tree->shadow_root = shadow_root;
return tree;
}

81
fastmanifest/tree_arena.h Normal file
View File

@ -0,0 +1,81 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_arena.h: declarations for methods to create a tree with a fixed
// memory arena and to allocate nodes from the fixed memory
// arena. for internal use only.
#ifndef __FASTMANIFEST_TREE_ARENA_H__
#define __FASTMANIFEST_TREE_ARENA_H__
#include "node.h"
#include "tree.h"
typedef enum _arena_policy_t {
ARENA_POLICY_FAIL, // fail immediately when there is
// insufficient space
ARENA_POLICY_REALLOC, // attempt to realloc until realloc
// fails.
} arena_policy_t;
typedef enum {
ARENA_ALLOC_OK,
ARENA_ALLOC_OOM,
ARENA_ALLOC_EXCEEDED_LIMITS,
} arena_alloc_node_code_t;
typedef struct _arena_alloc_node_result_t {
arena_alloc_node_code_t code;
node_t* node;
} arena_alloc_node_result_t;
static inline bool in_arena(const tree_t* tree, void* _ptr) {
intptr_t arena_start = (intptr_t) tree->arena;
intptr_t arena_end = arena_start + tree->arena_sz - 1;
intptr_t ptr = (intptr_t) _ptr;
if (ptr >= arena_start && ptr < arena_end) {
return true;
}
return false;
}
/**
* Allocate space for a node within a heap-allocated arena. If the arena does
* not have enough space for the node, consult the policy to determine what to
* do next.
*/
extern arena_alloc_node_result_t arena_alloc_node_helper(
arena_policy_t policy,
tree_t* tree,
const char* name, size_t name_sz,
size_t max_children);
static inline arena_alloc_node_result_t arena_alloc_node(
tree_t* tree,
const char* name, size_t name_sz,
size_t max_children) {
return arena_alloc_node_helper(
ARENA_POLICY_REALLOC,
tree,
name, name_sz,
max_children);
}
static inline arena_alloc_node_result_t arena_alloc_node_strict(
tree_t* tree,
const char* name, size_t name_sz,
size_t max_children) {
return arena_alloc_node_helper(
ARENA_POLICY_FAIL,
tree,
name, name_sz,
max_children);
}
/**
* Creates a tree and sets up the shadow root node. This does *not* initialize
* the real root node. It is the responsibility of the caller to do so.
*/
extern tree_t* alloc_tree_with_arena(size_t arena_sz);
#endif /* #ifndef __FASTMANIFEST_TREE_ARENA_H__ */

662
fastmanifest/tree_convert.c Normal file
View File

@ -0,0 +1,662 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_convert.c: methods to convert flat manifests to and from a tree.
#include <stdlib.h>
#include "buffer.h"
#include "tree.h"
#include "tree_arena.h"
#define MAX_FOLDER_DEPTH 1024
#define DEFAULT_CHILDREN_CAPACITY 4096
#define DEFAULT_BUILD_BUFFER_SZ 16384
#define BUFFER_GROWTH_FACTOR 1.2
#define BUFFER_MINIMUM_GROWTH 1048576
#define BUFFER_MAXIMUM_GROWTH (32 * 1024 * 1024)
#define CONVERT_BUFFER_APPEND(buffer, buffer_idx, buffer_sz, \
input, input_sz) \
buffer_append(buffer, buffer_idx, buffer_sz, input, input_sz, \
BUFFER_GROWTH_FACTOR, \
BUFFER_MINIMUM_GROWTH, \
BUFFER_MAXIMUM_GROWTH)
#define CONVERT_EXPAND_TO_FIT(buffer, buffer_idx, buffer_sz, input_sz) \
expand_to_fit(buffer, buffer_idx, buffer_sz, input_sz, \
BUFFER_GROWTH_FACTOR, \
BUFFER_MINIMUM_GROWTH, \
BUFFER_MAXIMUM_GROWTH)
typedef struct _open_folder_t {
const char* subfolder_name; /* this is a reference to the flat
* manifest's memory. we do not own
* this memory, and we must copy it
* before the conversion completes. */
size_t subfolder_name_sz;
// readers may wonder why we store a relative pointer. this is because
// storing node_t* pointers is UNSAFE. they are allocated on the arena, and
// can be moved at a moment's notice. the only thing that's safe to do is to
// store an offset from the start of the arena.
ptrdiff_t closed_children_prealloc[DEFAULT_CHILDREN_CAPACITY];
ptrdiff_t* closed_children;
size_t closed_children_count;
size_t closed_children_capacity;
bool in_use;
} open_folder_t;
typedef struct _from_flat_state_t {
tree_t* tree;
open_folder_t folders[MAX_FOLDER_DEPTH];
size_t open_folder_count;
} from_flat_state_t;
typedef struct _to_flat_state_t {
const tree_t* tree;
char* dirpath_build_buffer;
size_t dirpath_build_buffer_idx;
size_t dirpath_build_buffer_sz;
char* output_buffer;
size_t output_buffer_idx;
size_t output_buffer_sz;
} to_flat_state_t;
static int8_t hextable[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
static char chartable[16] = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
};
/*
* Turn a hex-encoded string into binary. Returns false on failure.
*/
static bool unhexlify(const char* input, int len, uint8_t* dst) {
if (len != SHA1_BYTES * 2) {
// wtf.
return false;
}
for (size_t ix = 0; ix < len; ix += 2, dst ++) {
int hi = hextable[(unsigned char) input[ix]];
int lo = hextable[(unsigned char) input[ix + 1]];
if (hi < 0 || lo < 0) {
return false;
}
*dst = (hi << 4) | lo;
}
return true;
}
/*
* Turn binary data into a hex-encoded string.
*/
static void hexlify(const uint8_t* input, int len, char* dst) {
for (size_t ix = 0; ix < len; ix ++, dst += 2) {
unsigned char ch = (unsigned char) input[ix];
char hi = chartable[ch >> 4];
char lo = chartable[ch & 0xf];
*dst = hi;
*(dst + 1) = lo;
}
}
/**
* Returns <0 if (`name`, `name_sz`) is lexicographically less than the name in
* folder.
*
* Returns =0 if (`name`, `name_sz`) is lexicographically equal to the name in
* folder.
*
* Returns >0 if (`name`, `name_sz`) is lexicographically greater than the name
* in folder.
*/
static inline int folder_name_compare(
const char* name,
size_t name_sz,
const open_folder_t* folder) {
uint32_t min_sz = (name_sz < folder->subfolder_name_sz) ?
name_sz : folder->subfolder_name_sz;
int sz_compare = name_sz - folder->subfolder_name_sz;
int cmp = strncmp(name, folder->subfolder_name, min_sz);
if (cmp) {
return cmp;
} else {
return sz_compare;
}
}
static void init_open_folder(open_folder_t* folder) {
folder->in_use = false;
folder->closed_children = folder->closed_children_prealloc;
folder->closed_children_count = 0;
folder->closed_children_capacity = DEFAULT_CHILDREN_CAPACITY;
}
static from_flat_state_t* init_from_state(size_t flat_sz) {
from_flat_state_t *state = malloc(sizeof(from_flat_state_t));
if (state == NULL) {
return NULL;
}
for (int ix = 0; ix < MAX_FOLDER_DEPTH; ix ++) {
init_open_folder(&state->folders[ix]);
}
state->open_folder_count = 0;
state->tree = alloc_tree_with_arena(flat_sz * 2);
// set up ONLY the shadow root. the real root will be created by the
// conversion process.
arena_alloc_node_result_t alloc_result =
arena_alloc_node(state->tree, "/", 1, 1);
if (alloc_result.code != ARENA_ALLOC_OK) {
return NULL;
}
node_t* shadow_root = alloc_result.node;
shadow_root->type = TYPE_IMPLICIT;
state->tree->shadow_root = shadow_root;
state->tree->consumed_memory += shadow_root->block_sz;
return state;
}
/**
* Adds a child to a folder, expanding it as needed.
*/
static bool folder_add_child(
from_flat_state_t* state,
open_folder_t* folder,
node_t* child) {
if (folder->closed_children_count + 1 == folder->closed_children_capacity) {
// time to expand the folder
size_t new_capacity = folder->closed_children_capacity * 2;
// is the current zone the prealloc zone? if so, we need to allocate a new
// zone.
if (folder->closed_children == folder->closed_children_prealloc) {
folder->closed_children = malloc(sizeof(ptrdiff_t) * new_capacity);
if (folder->closed_children == NULL) {
return false;
}
// copy over.
memcpy(folder->closed_children, folder->closed_children_prealloc,
sizeof(ptrdiff_t) * folder->closed_children_count);
} else {
// realloc
folder->closed_children = realloc(folder->closed_children, new_capacity);
if (folder->closed_children == NULL) {
return false;
}
}
}
// we need to store the delta between the start of the arena and the child.
intptr_t arena_start = (intptr_t) state->tree->arena;
intptr_t child_start = (intptr_t) child;
folder->closed_children[folder->closed_children_count] =
child_start - arena_start;
folder->closed_children_count ++;
return true;
}
typedef enum {
CLOSE_FOLDER_OK,
CLOSE_FOLDER_OOM,
} close_folder_code_t;
typedef struct _close_folder_result_t {
close_folder_code_t code;
node_t* node;
} close_folder_result_t;
/**
* Close the folder at index `folder_index`. This may require closing nested
* folders. If folder_index is > 0, then add the closed folder to its parent.
* If the folder_index is 0, it is responsibility of the caller to attach the
* returned node to the shadow root.
*/
static close_folder_result_t close_folder(
from_flat_state_t* state,
size_t folder_index) {
open_folder_t* folder = &state->folders[folder_index];
assert(folder->in_use == true);
if (folder_index < MAX_FOLDER_DEPTH - 1) {
// maybe a nested folder needs to be closed?
if (state->folders[folder_index + 1].in_use) {
// yup, it needs to be closed.
close_folder_result_t close_folder_result =
close_folder(state, folder_index + 1);
if (close_folder_result.code != CLOSE_FOLDER_OK) {
return (close_folder_result_t) {
close_folder_result.code, NULL };
}
}
}
// allocate a node and set it up.
arena_alloc_node_result_t arena_alloc_node_result =
arena_alloc_node(
state->tree,
folder->subfolder_name,
folder->subfolder_name_sz,
folder->closed_children_count);
if (arena_alloc_node_result.code == ARENA_ALLOC_OOM) {
return (close_folder_result_t) {
CLOSE_FOLDER_OOM, NULL };
}
node_t* node = arena_alloc_node_result.node;
node->type = TYPE_IMPLICIT;
node->num_children = folder->closed_children_count; // this is a huge
// abstraction violation,
// but it allows us to use
// `set_child_by_index`,
// which is significantly
// more efficient.
// node is set up. now add all the children!
intptr_t arena_start = (intptr_t) state->tree->arena;
for (size_t ix = 0; ix < folder->closed_children_count; ix ++) {
ptrdiff_t child_offset = (intptr_t) folder->closed_children[ix];
intptr_t address = arena_start + child_offset;
set_child_by_index(node, ix, (node_t *) address);
}
init_open_folder(folder); // zap the folder so it can be reused.
state->open_folder_count --;
// attach to parent folder if it's not the root folder.
assert(folder_index == state->open_folder_count);
if (folder_index > 0) {
open_folder_t* parent_folder = &state->folders[folder_index - 1];
if (folder_add_child(state, parent_folder, node) == false) {
return (close_folder_result_t) {
CLOSE_FOLDER_OOM, NULL };
}
}
return (close_folder_result_t) {
CLOSE_FOLDER_OK, node };
}
typedef enum {
PROCESS_PATH_OK,
PROCESS_PATH_OOM,
PROCESS_PATH_CORRUPT,
} process_path_code_t;
typedef struct _process_path_result_t {
process_path_code_t code;
// the following are only set when the code is `PROCESS_PATH_OK`.
node_t* node; // do *NOT* save this pointer.
// immediately do what is needed with
// this pointer and discard. the reason
// is that it's part of the arena, and
// can be moved if the arena is resized.
size_t bytes_consumed; // this is the number of bytes consumed,
// including the null pointer.
} process_path_result_t;
/**
* Process a null-terminated path, closing any directories and building the
* nodes as needed, and opening the new directories to support the current path.
*
* Once the proper set of folders are open, create a node and write it into
* the folder.
*/
static process_path_result_t process_path(
from_flat_state_t* state,
const char* path, size_t max_len) {
size_t path_scan_index;
size_t current_path_start;
size_t open_folder_index;
// match as many path components as we can
for (path_scan_index = 0,
current_path_start = 0,
open_folder_index = 0;
path[path_scan_index] != 0;
path_scan_index ++) {
if (path_scan_index == max_len) {
return (process_path_result_t) {
PROCESS_PATH_CORRUPT, NULL, 0 };
}
// check for a path separator.
if (path[path_scan_index] != '/') {
continue;
}
bool open_new_folder = true;
// check if the *next* open folder is valid, and if it matches the path
// component we just found.
if (open_folder_index + 1 < state->open_folder_count) {
if (folder_name_compare(
&path[current_path_start],
path_scan_index - current_path_start,
&state->folders[open_folder_index + 1]) == 0) {
// we found the folder we needed, so we can just reuse it.
open_new_folder = false;
open_folder_index ++;
} else {
close_folder_result_t close_folder_result =
close_folder(state, open_folder_index + 1);
if (close_folder_result.code == CLOSE_FOLDER_OOM) {
return (process_path_result_t) { PROCESS_PATH_OOM, NULL, 0 };
}
}
}
if (open_new_folder == true) {
// if we're opening a new folder, that means there should be no child
// folders open.
assert(state->open_folder_count == open_folder_index + 1);
open_folder_index ++;
state->open_folder_count ++;
open_folder_t* folder = &state->folders[open_folder_index];
assert(folder->in_use == false);
assert(folder->closed_children == folder->closed_children_prealloc);
assert(folder->closed_children_count == 0);
// link the name in. remember, we don't own the memory!!
folder->in_use = true;
folder->subfolder_name = &path[current_path_start];
folder->subfolder_name_sz = (path_scan_index - current_path_start);
}
// path starts after the /
current_path_start = path_scan_index + 1;
}
// close path components that are not matched, building their nodes.
if (open_folder_index + 1 < state->open_folder_count) {
close_folder_result_t close_folder_result =
close_folder(state, open_folder_index + 1);
if (close_folder_result.code == CLOSE_FOLDER_OOM) {
return (process_path_result_t) { PROCESS_PATH_OOM, NULL, 0 };
}
}
// build a node for the remaining path (which should just be the
// filename). add it to the currently open folder.
arena_alloc_node_result_t arena_alloc_node_result =
arena_alloc_node(
state->tree,
&path[current_path_start],
path_scan_index - current_path_start,
0);
if (arena_alloc_node_result.code == ARENA_ALLOC_OOM) {
return (process_path_result_t) { PROCESS_PATH_OOM, NULL, 0 };
}
arena_alloc_node_result.node->type = TYPE_LEAF;
// jam the new node into the currently open folder.
open_folder_t* folder = &state->folders[open_folder_index];
folder_add_child(state, folder, arena_alloc_node_result.node);
return (process_path_result_t) {
PROCESS_PATH_OK, arena_alloc_node_result.node, path_scan_index + 1 };
}
static convert_from_flat_result_t convert_from_flat_helper(
from_flat_state_t *state, char* manifest, size_t manifest_sz) {
// open the root directory node.
open_folder_t* folder = &state->folders[0];
folder->subfolder_name = "/";
folder->subfolder_name_sz = 1;
folder->in_use = true;
state->open_folder_count ++;
for (size_t ptr = 0; ptr < manifest_sz; ) {
// filename is up to the first null.
process_path_result_t pp_result = process_path(
state, &manifest[ptr], manifest_sz - ptr);
switch (pp_result.code) {
case PROCESS_PATH_OOM:
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_OOM, NULL };
case PROCESS_PATH_CORRUPT:
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_WTF, NULL };
case PROCESS_PATH_OK:
break;
}
assert(pp_result.code == PROCESS_PATH_OK);
node_t* node = pp_result.node;
ptr += pp_result.bytes_consumed;
size_t remaining = manifest_sz - ptr;
if (remaining <= SHA1_BYTES * 2) {
// not enough characters for the checksum and the NL. well, that's a
// fail.
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_WTF, NULL };
}
if (unhexlify(&manifest[ptr], SHA1_BYTES * 2, node->checksum) ==
false) {
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_WTF, NULL };
}
node->checksum_sz = SHA1_BYTES;
node->checksum_valid = true;
ptr += SHA1_BYTES * 2;
// is the next character a NL? if so, then we're done. otherwise, retrieve
// it as the flags field.
if (manifest[ptr] != '\n') {
node->flags = manifest[ptr];
ptr ++;
} else {
node->flags = 0;
}
ptr ++;
state->tree->num_leaf_nodes ++;
}
// close the root folder.
close_folder_result_t close_result = close_folder(state, 0);
if (close_result.code == CLOSE_FOLDER_OOM) {
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_OOM, NULL};
}
add_child(state->tree->shadow_root, close_result.node);
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_OK, state->tree };
}
static convert_to_flat_code_t convert_to_flat_iterator(
to_flat_state_t* state,
const node_t* node) {
assert(node->type == TYPE_IMPLICIT);
for (uint32_t ix = 0; ix < node->num_children; ix ++) {
node_t *child = get_child_by_index(node, ix);
if (child->type == TYPE_LEAF) {
size_t space_needed = state->dirpath_build_buffer_idx +
child->name_sz +
1 /* null character */ +
(SHA1_BYTES * 2) +
(child->flags != '\000' ? 1 : 0) +
1 /* NL */;
if (CONVERT_EXPAND_TO_FIT(
&state->output_buffer,
&state->output_buffer_idx,
&state->output_buffer_sz,
space_needed) == false) {
return CONVERT_TO_FLAT_OOM;
}
// copy the dirpath over to the output buffer.
memcpy(&state->output_buffer[state->output_buffer_idx],
state->dirpath_build_buffer,
state->dirpath_build_buffer_idx);
state->output_buffer_idx += state->dirpath_build_buffer_idx;
// copy the filename over to the output buffer.
memcpy(&state->output_buffer[state->output_buffer_idx],
child->name, child->name_sz);
state->output_buffer_idx += child->name_sz;
// copy the filename over to the output buffer.
state->output_buffer[state->output_buffer_idx] = '\000';
state->output_buffer_idx ++;
// transcribe the sha over.
hexlify(child->checksum, SHA1_BYTES,
&state->output_buffer[state->output_buffer_idx]);
state->output_buffer_idx += (SHA1_BYTES * 2);
if (child->flags != '\000') {
state->output_buffer[state->output_buffer_idx] = child->flags;
state->output_buffer_idx ++;
}
state->output_buffer[state->output_buffer_idx] = '\n';
state->output_buffer_idx ++;
assert(state->output_buffer_idx < state->output_buffer_sz);
} else {
// save the old value...
size_t previous_dirpath_build_buffer_idx =
state->dirpath_build_buffer_idx;
size_t space_needed = child->name_sz +
1 /* '/' character */;
if (CONVERT_EXPAND_TO_FIT(
&state->dirpath_build_buffer,
&state->dirpath_build_buffer_idx,
&state->dirpath_build_buffer_sz,
space_needed) == false) {
return CONVERT_TO_FLAT_OOM;
}
// copy the dirpath over to the output buffer.
memcpy(&state->dirpath_build_buffer[state->dirpath_build_buffer_idx],
child->name, child->name_sz);
state->dirpath_build_buffer_idx += child->name_sz;
state->dirpath_build_buffer[state->dirpath_build_buffer_idx] = '/';
state->dirpath_build_buffer_idx ++;
convert_to_flat_iterator(state, child);
state->dirpath_build_buffer_idx = previous_dirpath_build_buffer_idx;
}
}
return CONVERT_TO_FLAT_OK;
}
static convert_to_flat_code_t convert_to_flat_helper(
to_flat_state_t* state,
const tree_t* tree) {
// get the real root.
node_t* shadow_root = tree->shadow_root;
if (shadow_root->num_children != 1) {
return CONVERT_TO_FLAT_WTF;
}
node_t* real_root = get_child_by_index(shadow_root, 0);
return convert_to_flat_iterator(state, real_root);
}
convert_from_flat_result_t convert_from_flat(
char* manifest, size_t manifest_sz) {
from_flat_state_t *state = init_from_state(manifest_sz);
if (state->tree == NULL) {
free(state);
state = NULL;
}
if (state == NULL) {
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_OOM, NULL };
}
convert_from_flat_result_t result =
convert_from_flat_helper(state, manifest, manifest_sz);
if (result.code != CONVERT_FROM_FLAT_OK) {
free(state->tree);
}
free(state);
return result;
}
convert_to_flat_result_t convert_to_flat(tree_t* tree) {
to_flat_state_t state;
state.dirpath_build_buffer = malloc(DEFAULT_BUILD_BUFFER_SZ);
state.dirpath_build_buffer_idx = 0;
state.dirpath_build_buffer_sz = DEFAULT_BUILD_BUFFER_SZ;
// guestimate as to how much space we need. this could probably be
// fine-tuned a bit.
state.output_buffer = malloc(tree->consumed_memory);
state.output_buffer_idx = 0;
state.output_buffer_sz = tree->consumed_memory;
convert_to_flat_code_t result = CONVERT_TO_FLAT_OOM;
if (state.dirpath_build_buffer != NULL &&
state.output_buffer != NULL) {
result = convert_to_flat_helper(&state, tree);
}
if (state.dirpath_build_buffer != NULL) {
free(state.dirpath_build_buffer);
}
if (state.output_buffer != NULL &&
result != CONVERT_TO_FLAT_OK) {
// free the buffer if any error occurred.
free(state.output_buffer);
return (convert_to_flat_result_t) { result, NULL, 0 };
} else {
return (convert_to_flat_result_t) {
CONVERT_TO_FLAT_OK, state.output_buffer, state.output_buffer_idx };
}
}

View File

@ -0,0 +1,87 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_convert_rt.c: simple benchmark for converting a flat manifest to a tree
// and back. the output can be diff'ed against the input as
// for more sophisticated testing than the unit tests
// provide.
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include "tree.h"
int main(int argc, char* argv[]) {
if (argc < 3) {
fprintf(stderr, "Usage: %s <manifestfile> <outputfile>\n", argv[0]);
exit(1);
}
FILE *fh = fopen(argv[1], "r");
if (fh == NULL) {
fprintf(stderr, "Error: cannot open %s\n", argv[1]);
exit(1);
}
FILE *ofh = fopen(argv[2], "w");
if (ofh == NULL) {
fprintf(stderr, "Error: cannot open %s\n", argv[2]);
exit(1);
}
fseeko(fh, 0, SEEK_END);
off_t length = ftello(fh);
rewind(fh);
char *flatmanifest = malloc(length);
if (flatmanifest == NULL) {
fprintf(stderr, "Error: cannot allocate memory for reading %s\n", argv[1]);
exit(1);
}
if (fread(flatmanifest, length, 1, fh) != 1) {
fprintf(stderr, "Error: cannot read %s\n", argv[1]);
exit(1);
}
struct timeval before_from, after_from;
gettimeofday(&before_from, NULL);
convert_from_flat_result_t from_flat = convert_from_flat(
flatmanifest, length);
gettimeofday(&after_from, NULL);
if (from_flat.code != CONVERT_FROM_FLAT_OK) {
fprintf(stderr, "Error: converting from flat manifest\n");
exit(1);
}
struct timeval before_to, after_to;
gettimeofday(&before_to, NULL);
convert_to_flat_result_t to_flat = convert_to_flat(from_flat.tree);
gettimeofday(&after_to, NULL);
if (to_flat.code != CONVERT_TO_FLAT_OK) {
fprintf(stderr, "Error: converting to flat manifest\n");
exit(1);
}
if (fwrite(to_flat.flat_manifest, to_flat.flat_manifest_sz, 1, ofh) != 1) {
fprintf(stderr, "Error: writing flat manifest\n");
exit(1);
}
fclose(ofh);
uint64_t usecs_before_from = before_from.tv_sec * 1000000 +
before_from.tv_usec;
uint64_t usecs_after_from = after_from.tv_sec * 1000000 +
after_from.tv_usec;
uint64_t usecs_before_to = before_to.tv_sec * 1000000 +
before_to.tv_usec;
uint64_t usecs_after_to = after_to.tv_sec * 1000000 +
after_to.tv_usec;
printf("flat -> tree: %lld us\n", (usecs_after_from - usecs_before_from));
printf("tree -> flat: %lld us\n", (usecs_after_to - usecs_before_to));
printf("tree consumed memory: %ld\n", from_flat.tree->consumed_memory);
}

View File

@ -0,0 +1,131 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_convert_test.c: tests for methods to convert flat manifests to and
// from a tree.
#include "node.h"
#include "tree.h"
#include "tests.h"
#define SIMPLE_CONVERT_INPUT \
"abc\000b80de5d138758541c5f05265ad144ab9fa86d1db\n" \
"def\000f6d864039d10a8934d0d581d342780298aa9fb28l\n" \
"ghi\0000f421b102b0baa760a5d4c5759f339cfc1f7d01b\n"
void test_simple_convert() {
char input[] = SIMPLE_CONVERT_INPUT;
size_t size = sizeof(input) - 1; // exempt the final null
convert_from_flat_result_t convert_result = convert_from_flat(
input, size);
ASSERT(convert_result.code == CONVERT_FROM_FLAT_OK);
tree_t* tree = convert_result.tree;
ASSERT(tree->compacted == true);
ASSERT(tree->num_leaf_nodes == 3);
get_path_result_t get_result;
node_t* node;
get_result = get_path(tree, STRPLUSLEN("abc"));
ASSERT(get_result.code == GET_PATH_OK);
node = get_result.node;
ASSERT(node->checksum_valid == true);
ASSERT(node->checksum_sz == SHA1_BYTES);
ASSERT(memcmp(node->checksum,
"\xb8\x0d\xe5\xd1\x38\x75\x85\x41\xc5\xf0\x52\x65\xad\x14\x4a\xb9\xfa"
"\x86\xd1"
"\xdb", SHA1_BYTES) == 0);
ASSERT(node->flags == 0);
get_result = get_path(tree, STRPLUSLEN("def"));
ASSERT(get_result.code == GET_PATH_OK);
node = get_result.node;
ASSERT(node->checksum_valid == true);
ASSERT(node->checksum_sz == SHA1_BYTES);
ASSERT(memcmp(node->checksum,
"\xf6\xd8\x64\x03\x9d\x10\xa8\x93\x4d\x0d\x58\x1d\x34\x27\x80\x29\x8a"
"\xa9\xfb\x28", SHA1_BYTES) == 0);
ASSERT(node->flags == 'l');
get_result = get_path(tree, STRPLUSLEN("ghi"));
ASSERT(get_result.code == GET_PATH_OK);
node = get_result.node;
ASSERT(node->checksum_valid == true);
ASSERT(node->checksum_sz == SHA1_BYTES);
ASSERT(memcmp(node->checksum,
"\x0f\x42\x1b\x10\x2b\x0b\xaa\x76\x0a\x5d\x4c\x57\x59\xf3\x39\xcf\xc1"
"\xf7\xd0\x1b", SHA1_BYTES) == 0);
ASSERT(node->flags == 0);
destroy_tree(convert_result.tree);
}
#define CONVERT_TREE_INPUT \
"abc\0007a091c781cf86fc5b7c2e93eb9f233c4220026a2\n" \
"abcd/efg\000f33dcd6a4ef633eb1fa02ec72cb76c4043390a50\n" \
"abcd/efgh/ijk\000b6fb5f7b2f3b499ad04b6e97f78904d5314ec690\n" \
"abcd/efghi\00042aece97c3e7db21fbc7559918aba6b6e925a64d\n" \
"abcdefghi\000c4c71e7b43d108fb869c28107c39d21c166be837\n" \
#define GET_TEST(tree, path_const, expected_result) { \
get_path_result_t get_result = get_path(tree, STRPLUSLEN(path_const)); \
ASSERT(get_result.code == expected_result); \
}
void test_convert_tree() {
char input[] = CONVERT_TREE_INPUT;
size_t size = sizeof(input) - 1; // exempt the final null
convert_from_flat_result_t convert_result = convert_from_flat(
input, size);
ASSERT(convert_result.code == CONVERT_FROM_FLAT_OK);
tree_t *tree = convert_result.tree;
ASSERT(tree->compacted == true);
ASSERT(tree->num_leaf_nodes == 5);
GET_TEST(tree, "abc", GET_PATH_OK);
GET_TEST(tree, "abcd/efg", GET_PATH_OK);
GET_TEST(tree, "abcd/efgh/ijk", GET_PATH_OK);
GET_TEST(tree, "abcd/efghi", GET_PATH_OK);
GET_TEST(tree, "abcdefghi", GET_PATH_OK);
GET_TEST(tree, "abcdefghij", GET_PATH_NOT_FOUND);
destroy_tree(convert_result.tree);
}
#define CONVERT_BIDIRECTIONALLY_INPUT \
"abc\0007a091c781cf86fc5b7c2e93eb9f233c4220026a2\n" \
"abcd/efg\000f33dcd6a4ef633eb1fa02ec72cb76c4043390a50\n" \
"abcd/efgh/ijk/lm\000b6fb5f7b2f3b499ad04b6e97f78904d5314ec690\n" \
"abcd/efghi\00042aece97c3e7db21fbc7559918aba6b6e925a64d\n" \
"abcdefghi\000c4c71e7b43d108fb869c28107c39d21c166be837\n" \
void test_convert_bidirectionally() {
char input[] = CONVERT_BIDIRECTIONALLY_INPUT;
size_t size = sizeof(input) - 1; // exempt the final null
convert_from_flat_result_t from_result = convert_from_flat(
input, size);
ASSERT(from_result.code == CONVERT_FROM_FLAT_OK);
tree_t *tree = from_result.tree;
convert_to_flat_result_t to_result = convert_to_flat(tree);
ASSERT(to_result.flat_manifest_sz == size);
ASSERT(
memcmp(input, to_result.flat_manifest, to_result.flat_manifest_sz) == 0);
}
int main(int argc, char* argv[]) {
test_simple_convert();
test_convert_tree();
test_convert_bidirectionally();
return 0;
}

98
fastmanifest/tree_copy.c Normal file
View File

@ -0,0 +1,98 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_copy.c: methods to make a copy of a tree. the new instance is compacted
// into an arena.
#include "internal_result.h"
#include "node.h"
#include "tree.h"
#include "tree_arena.h"
typedef enum {
COPY_OK,
COPY_OOM,
COPY_WTF,
} copy_helper_result_t;
/**
* Clones `src` and adds it as the Nth child of `dst_parent`, where N ==
* `child_num`.
*
* `child_num` must be <= `dst_parent->num_children`.
*/
copy_helper_result_t copy_helper(
tree_t* dst_tree,
const node_t* src,
node_t* dst_parent,
size_t child_num) {
arena_alloc_node_result_t alloc_result = arena_alloc_node_strict(
dst_tree, src->name, src->name_sz, src->num_children);
switch (alloc_result.code) {
case ARENA_ALLOC_OK:
break;
case ARENA_ALLOC_OOM:
return COPY_OOM;
case ARENA_ALLOC_EXCEEDED_LIMITS:
return COPY_WTF;
}
// copy the attributes
node_t* dst = alloc_result.node;
if (src->checksum_valid) {
memcpy(dst->checksum, src->checksum, src->checksum_sz);
dst->checksum_sz = src->checksum_sz;
}
dst->checksum_valid = src->checksum_valid;
dst->flags = src->flags;
dst->type = src->type;
// typically we don't like touching this field manually, but to
// `set_child_by_index` requires the index be < num_children.
dst->num_children = src->num_children;
if (dst->type == TYPE_LEAF) {
dst_tree->num_leaf_nodes ++;
} else {
for (int ix = 0; ix < src->num_children; ix ++) {
copy_helper_result_t copy_result =
copy_helper(
dst_tree,
get_child_by_index(src, ix),
dst,
ix);
if (copy_result != COPY_OK) {
return copy_result;
}
}
}
set_child_by_index(dst_parent, child_num, dst);
return COPY_OK;
}
tree_t* copy(const tree_t* src) {
tree_t* dst = alloc_tree_with_arena(src->consumed_memory);
// prerequisite for using copy_helper is that child_num must be <
// dst_parent->num_children, so we artificially bump up the num_chlidren
// for the shadow root.
assert(max_children(dst->shadow_root) > 0);
dst->shadow_root->num_children = 1;
copy_helper_result_t copy_result = copy_helper(
dst,
get_child_by_index(src->shadow_root, 0),
dst->shadow_root,
0);
switch (copy_result) {
case COPY_OK:
dst->compacted = true;
return dst;
default:
destroy_tree(dst);
return NULL;
}
}

View File

@ -0,0 +1,110 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_copy_test.c: tests to verify methods to make a copy of a tree.
#include "node.h"
#include "tests.h"
#include "tree.h"
void test_copy_empty() {
tree_t* src = alloc_tree();
tree_t* dst = copy(src);
ASSERT(dst != NULL);
ASSERT(dst->compacted == true);
ASSERT(dst->num_leaf_nodes == 0);
ASSERT(dst->consumed_memory == src->consumed_memory);
destroy_tree(src);
destroy_tree(dst);
}
void test_copy_empty_chain() {
tree_t* original = alloc_tree();
tree_t* src = original;
for (int ix = 0; ix < 10; ix ++) {
tree_t* dst = copy(src);
ASSERT(dst != NULL);
ASSERT(dst->compacted == true);
ASSERT(dst->num_leaf_nodes == 0);
ASSERT(dst->consumed_memory == src->consumed_memory);
tree_t* old_src = src;
src = dst;
destroy_tree(old_src);
}
}
typedef struct {
char* path;
size_t path_sz;
uint8_t* checksum;
uint8_t flags;
} copy_normal_tree_data_t;
#define COPY_NORMAL_TREE_DATA(path, checksum, flags) \
(copy_normal_tree_data_t) \
{ \
path, \
sizeof(path) - 1, \
(uint8_t*) checksum, \
flags, \
}
void test_copy_normal_tree() {
copy_normal_tree_data_t input[] = {
COPY_NORMAL_TREE_DATA("abc",
"\xe7\xf5\xdd\xad\x5e\x13\x86\x4e\x25\x30\x41\x3a\x69\x8e\x19\xd4\x25"
"\xc8\x12\x02",
0x23),
COPY_NORMAL_TREE_DATA("ab/cde",
"\x7c\x6a\x4b\x0a\x05\x91\x6c\x89\x9d\x8a\xe6\x38\xcf\x38\x93\x2e"
"\x4f\x09\xed\x57",
0x9b),
COPY_NORMAL_TREE_DATA("abcd/ef",
"\x3e\x4d\xf1\xe0\x46\x4a\x3e\xb9\x6b\x8d\x55\x6c\x3b\x6b\x00\xee"
"\x4f\x77\x71\x9e",
0xda),
COPY_NORMAL_TREE_DATA("abcd/efg/hi",
"\x98\x2f\x46\x90\xfe\xc1\xbc\xe0\x8b\xf7\xa5\x47\x65\xe3\xf4\x16"
"\x5b\xf4\xba\x7c",
0x44),
};
size_t input_sz = sizeof(input) / sizeof(copy_normal_tree_data_t) ;
tree_t* src = alloc_tree();
for (size_t ix = 0; ix < input_sz; ix ++) {
add_update_path_result_t result =
add_or_update_path(
src,
input[ix].path, input[ix].path_sz,
input[ix].checksum, SHA1_BYTES,
input[ix].flags);
ASSERT(result == ADD_UPDATE_PATH_OK);
}
ASSERT(src->compacted == false);
ASSERT(src->num_leaf_nodes == input_sz);
tree_t* dst = copy(src);
for (size_t ix = 0; ix < input_sz; ix ++) {
get_path_result_t get_result =
get_path(dst, input[ix].path, input[ix].path_sz);
ASSERT(get_result.code == GET_PATH_OK);
ASSERT(get_result.node->checksum_valid == true);
ASSERT(get_result.node->checksum_sz == SHA1_BYTES);
ASSERT(memcmp(
get_result.node->checksum, input[ix].checksum, SHA1_BYTES) == 0);
ASSERT(get_result.node->flags == input[ix].flags);
}
}
int main(int argc, char* argv[]) {
test_copy_empty();
test_copy_empty_chain();
test_copy_normal_tree();
}

234
fastmanifest/tree_test.c Normal file
View File

@ -0,0 +1,234 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_test.c: tests for core methods for tree creation and manipulation.
#include "node.h"
#include "tree.h"
#include "tests.h"
/**
* Initializes a tree and verifies that the initial two nodes are created
* correctly.
*/
void tree_init_test() {
tree_t* tree = alloc_tree();
node_t* shadow_root = tree->shadow_root;
ASSERT(shadow_root != NULL);
ASSERT(shadow_root->num_children == 1);
node_t* real_root = get_child_by_index(shadow_root, 0);
ASSERT(real_root != NULL);
ASSERT(real_root->num_children == 0);
ASSERT(tree->consumed_memory == shadow_root->block_sz + real_root->block_sz);
}
/**
* Initializes a tree and adds a node.
*/
void tree_add_child() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix ++) {
checksum[ix] = (uint8_t) ix;
}
add_update_path_result_t result =
add_or_update_path(tree, STRPLUSLEN("abc"),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 1);
}
/**
* Initializes a tree and adds a file and a directory containing a file.
*/
void tree_add_0_cousin_once_removed() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix ++) {
checksum[ix] = (uint8_t) ix;
}
add_update_path_result_t result;
result = add_or_update_path(tree, STRPLUSLEN("ab"),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
result = add_or_update_path(tree, STRPLUSLEN("abc/de"),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
// verify the shadow root.
ASSERT(tree->shadow_root->num_children == 1);
// obtain the true root, verify that.
node_t* real_root = get_child_by_index(tree->shadow_root, 0);
// verify the real root.
ASSERT(real_root->num_children == 2);
// first child should be 'ab'
node_t* root_first_child = get_child_by_index(real_root, 0);
ASSERT(root_first_child->num_children == 0);
ASSERT(root_first_child->type == TYPE_LEAF);
ASSERT(name_compare("ab", 2, root_first_child) == 0);
// second child should be 'abc'
node_t* root_second_child = get_child_by_index(real_root, 1);
ASSERT(root_second_child->num_children == 1);
ASSERT(root_second_child->type == TYPE_IMPLICIT);
ASSERT(name_compare("abc", 3, root_second_child) == 0);
}
/**
* Initializes a tree and adds a long skinny branch.
*/
void tree_add_long_skinny_branch() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix ++) {
checksum[ix] = (uint8_t) ix;
}
add_update_path_result_t result;
result = add_or_update_path(tree, STRPLUSLEN("ab"),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
result = add_or_update_path(tree, STRPLUSLEN("abc/de"),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
result = add_or_update_path(tree, STRPLUSLEN("abc/def/gh"),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
result = add_or_update_path(tree, STRPLUSLEN("abc/def/ghi/jkl"),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 4);
}
/**
* Initializes a tree and adds a bushy branch.
*/
void tree_add_bushy_branch() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix ++) {
checksum[ix] = (uint8_t) ix;
}
add_update_path_result_t result;
result = add_or_update_path(tree, STRPLUSLEN("ab"),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
char tempbuffer[] = "abc/de?";
for (int ix = 0; ix < 26; ix ++) {
tempbuffer[6] = 'a' + ix;
result = add_or_update_path(tree, STRPLUSLEN(tempbuffer),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
}
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 27);
}
/**
* Initializes a tree, adds a node (N0), and then adds a second node (N1) that
* would require a directory be created where N0 is.
*/
void tree_add_conflict() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix ++) {
checksum[ix] = (uint8_t) ix;
}
add_update_path_result_t result =
add_or_update_path(tree, STRPLUSLEN("abc/def"),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_OK);
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 1);
result = add_or_update_path(tree, STRPLUSLEN("abc/def/ghi"),
checksum, SHA1_BYTES, 0);
ASSERT(result == ADD_UPDATE_PATH_CONFLICT);
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 1);
}
/**
* Initializes a tree and attempt to retrieve a couple paths that are not there.
*/
void tree_get_empty() {
tree_t* tree = alloc_tree();
get_path_result_t result = get_path(tree, STRPLUSLEN("abc"));
ASSERT(result.code == GET_PATH_NOT_FOUND);
result = get_path(tree, STRPLUSLEN("abc/def"));
ASSERT(result.code == GET_PATH_NOT_FOUND);
}
/**
* Initializes a tree, adds a single path, and attempt to retrieve it.
*/
#define ADD_GET_SIMPLE_FLAGS 0x2e
void tree_add_get_simple() {
tree_t* tree = alloc_tree();
uint8_t checksum[SHA1_BYTES];
for (int ix = 0; ix < SHA1_BYTES; ix ++) {
checksum[ix] = (uint8_t) ix;
}
add_update_path_result_t add_result =
add_or_update_path(tree, STRPLUSLEN("abc"),
checksum, SHA1_BYTES, ADD_GET_SIMPLE_FLAGS);
ASSERT(add_result == ADD_UPDATE_PATH_OK);
ASSERT(tree->compacted == false);
ASSERT(tree->num_leaf_nodes == 1);
get_path_result_t get_result = get_path(tree, STRPLUSLEN("abc"));
ASSERT(get_result.code == GET_PATH_OK);
ASSERT(get_result.node != NULL);
ASSERT(get_result.node->checksum_valid == true);
ASSERT(get_result.node->checksum_sz == SHA1_BYTES);
ASSERT(memcmp(checksum, get_result.node->checksum, SHA1_BYTES) == 0);
ASSERT(get_result.node->flags == ADD_GET_SIMPLE_FLAGS);
get_result = get_path(tree, STRPLUSLEN("abc/def"));
ASSERT(get_result.code == GET_PATH_NOT_FOUND);
ASSERT(get_result.node == NULL);
}
int main(int argc, char* argv[]) {
tree_init_test();
tree_add_child();
tree_add_0_cousin_once_removed();
tree_add_long_skinny_branch();
tree_add_bushy_branch();
tree_add_conflict();
tree_get_empty();
tree_add_get_simple();
return 0;
}