[fastmanifest] tree iteration

Summary: We build our own stack to keep track of where we've been.

Test Plan: built some unit tests to exercise the code.  pass them.

Reviewers: #sourcecontrol, lcharignon

Reviewed By: lcharignon

Subscribers: lcharignon, mitrandir, mjpieters

Differential Revision: https://phabricator.fb.com/D3206746

Tasks: 10589038

Signature: t1:3206746:1461704702:f738a41050f19eacdeb83dce6ff536b64896eabc
This commit is contained in:
Tony Tung 2016-04-26 15:39:42 -07:00
parent 1b552da9ef
commit 8a65d97ff2
6 changed files with 416 additions and 0 deletions

View File

@ -37,6 +37,8 @@ add_library(fastmanifest
tree_arena.h
tree_convert.c
tree_copy.c
tree_iterator.c
tree_iterator.h
tree_path.c
tree_path.h
)
@ -63,6 +65,9 @@ target_link_libraries(tree_convert_test bsearch fastmanifest)
add_executable(tree_copy_test tree_copy_test.c)
target_link_libraries(tree_copy_test bsearch fastmanifest)
add_executable(tree_iterator_test tree_iterator_test.c)
target_link_libraries(tree_iterator_test bsearch fastmanifest)
add_executable(tree_convert_rt tree_convert_rt.c)
target_link_libraries(tree_convert_rt bsearch fastmanifest)
@ -102,4 +107,9 @@ IF(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo OR CMAKE_BUILD_TYPE MATCHES Release)
TARGET tree_copy_test
POST_BUILD
COMMAND valgrind -q --error-exitcode=127 $<TARGET_FILE:tree_copy_test>)
add_custom_command(
TARGET tree_iterator_test
POST_BUILD
COMMAND valgrind -q --error-exitcode=127 $<TARGET_FILE:tree_iterator_test>)
ENDIF(CMAKE_BUILD_TYPE MATCHES RelWithDebInfo OR CMAKE_BUILD_TYPE MATCHES Release)

View File

@ -64,4 +64,13 @@ typedef struct _convert_to_flat_result_t {
size_t flat_manifest_sz;
} convert_to_flat_result_t;
typedef struct _iterator_result_t {
bool valid;
const char *path;
size_t path_sz;
const uint8_t *checksum;
uint8_t checksum_sz;
uint8_t flags;
} iterator_result_t;
#endif /* #ifndef __FASTMANIFEST_RESULT_H__ */

View File

@ -47,6 +47,8 @@ typedef struct _tree_t {
#endif /* #if 0 */
} tree_t;
typedef struct _iterator_t iterator_t;
/**
* Returns true iff the path is something digestible by this tree library. The
* rules are:
@ -98,4 +100,10 @@ extern convert_from_flat_result_t convert_from_flat(
extern convert_to_flat_result_t convert_to_flat(tree_t *tree);
extern iterator_t *create_iterator(const tree_t *tree, bool construct_paths);
extern iterator_result_t iterator_next(iterator_t *iterator);
extern void destroy_iterator(iterator_t *iterator);
#endif /* #ifndef __FASTMANIFEST_TREE_H__ */

View File

@ -0,0 +1,213 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_iterator.c: implementation for traversing all the nodes of a tree
// in-order.
//
// no-check-code
#include <stdlib.h>
#include "buffer.h"
#include "node.h"
#include "tree.h"
#include "tree_iterator.h"
#define DEFAULT_PATH_RECORDS_SZ 1024
#define DEFAULT_PATH_BUFFER_SZ 16384
#define PATH_BUFFER_GROWTH_FACTOR 1.2
#define PATH_BUFFER_MINIMUM_GROWTH 65536
#define PATH_BUFFER_MAXIMUM_GROWTH (1024 * 1024)
#define ITERATE_EXPAND_TO_FIT(buffer, buffer_idx, buffer_sz, input_sz) \
expand_to_fit(buffer, buffer_idx, buffer_sz, input_sz, \
PATH_BUFFER_GROWTH_FACTOR, \
PATH_BUFFER_MINIMUM_GROWTH, \
PATH_BUFFER_MAXIMUM_GROWTH)
iterator_t *create_iterator(const tree_t *tree, bool construct_paths) {
iterator_t *result = malloc(sizeof(iterator_t));
path_record_t *path_records = malloc(sizeof(path_record_t) *
DEFAULT_PATH_RECORDS_SZ);
char *path = malloc(DEFAULT_PATH_BUFFER_SZ);
if (result == NULL || path_records == NULL || path == NULL ||
(result->copy = copy_tree(tree)) == NULL) {
goto fail;
}
// success!
result->path_records = path_records;
result->path_records_idx = 0;
result->path = path;
result->path_idx = 0;
result->path_sz = DEFAULT_PATH_BUFFER_SZ;
result->construct_paths = construct_paths;
return result;
fail:
if (result != NULL) {
if (result->copy != NULL) {
destroy_tree(result->copy);
}
free(result);
}
if (path_records != NULL) {
free(path_records);
}
if (path != NULL) {
free(path);
}
return NULL;
}
typedef enum {
ITERATOR_FOUND,
ITERATOR_NOT_FOUND,
ITERATOR_ERROR,
} iterator_progress_t;
static iterator_progress_t iterator_find_next(iterator_t *iterator) {
if (iterator->path_records_idx == DEFAULT_PATH_RECORDS_SZ) {
// we've traversed too deep.
abort();
}
while (iterator->path_records_idx > 0) {
size_t read_idx = iterator->path_records_idx - 1;
if (iterator->path_records[read_idx].child_idx <
iterator->path_records[read_idx].node->num_children) {
if (!VERIFY_CHILD_NUM(iterator->path_records[read_idx].child_idx)) {
return ITERATOR_ERROR;
}
node_t *candidate = get_child_by_index(
iterator->path_records[read_idx].node,
(child_num_t) iterator->path_records[read_idx].child_idx
);
if (iterator->construct_paths &&
candidate->type != TYPE_ROOT) {
// if it's not a root node, we need to slap on the name.
ITERATE_EXPAND_TO_FIT(
&iterator->path,
&iterator->path_idx,
&iterator->path_sz,
candidate->name_sz + 1
);
memcpy(&iterator->path[iterator->path_idx],
candidate->name, candidate->name_sz);
iterator->path_idx += candidate->name_sz;
}
// if it's a leaf node, we have the name already added to the path if
// required. remember where we are so we can continue.
if (candidate->type == TYPE_LEAF) {
return ITERATOR_FOUND;
}
// if it's an implicit node, and we want to construct paths, we need to
// add a '/' to the constructed path.
if (iterator->construct_paths &&
candidate->type == TYPE_IMPLICIT) {
// tack on a '/' to the path.
iterator->path[iterator->path_idx] = '/';
iterator->path_idx++;
}
// has to either be TYPE_IMPLICIT or TYPE_ROOT at this point. set up
// the next path record and descend into the directory.
iterator->path_records[iterator->path_records_idx].node = candidate;
iterator->path_records[iterator->path_records_idx].child_idx = 0;
iterator->path_records[iterator->path_records_idx].previous_path_idx =
iterator->path_idx;
iterator->path_records_idx++;
// start at the top of the while loop again.
continue;
}
// done considering all the children at this level, pop off a path record
// and continue.
iterator->path_records_idx--;
// if we have parents, we should restore the state
if (iterator->path_records_idx > 0) {
// path_record_idx is where we write the *next* record, so we have to go
// back up one more record.
size_t parent_idx = iterator->path_records_idx - 1;
iterator->path_idx = iterator->path_records[parent_idx].previous_path_idx;
iterator->path_records[parent_idx].child_idx++;
}
}
return ITERATOR_NOT_FOUND;
}
iterator_result_t iterator_next(iterator_t *iterator) {
// special case: if we haven't started iterating yet, then there will be no
// path records.
if (iterator->path_records_idx == 0) {
// search for the first leaf node.
const node_t *search_start =
get_child_by_index(iterator->copy->shadow_root, 0);
// record the progress into the iterator struct
iterator->path_records[0].node = search_start;
iterator->path_records[0].child_idx = 0;
iterator->path_records[0].previous_path_idx = 0;
// at the start, reads come from 0, writes go to 1.
iterator->path_records_idx = 1;
} else {
size_t read_idx = iterator->path_records_idx - 1;
iterator->path_records[read_idx].child_idx++;
// truncate the path up to the last directory.
iterator->path_idx = iterator->
path_records[read_idx].previous_path_idx;
}
iterator_progress_t progress = iterator_find_next(iterator);
iterator_result_t result;
if (progress == ITERATOR_FOUND) {
size_t read_idx = iterator->path_records_idx - 1;
path_record_t *record = &iterator->path_records[read_idx];
if (!VERIFY_CHILD_NUM(record->child_idx)) {
abort();
}
node_t *child = get_child_by_index(
record->node, (child_num_t) record->child_idx);
result.valid = true;
if (iterator->construct_paths) {
result.path = iterator->path;
result.path_sz = iterator->path_idx;
}
result.checksum = child->checksum;
result.checksum_sz = child->checksum_sz;
result.flags = child->flags;
} else {
result.valid = false;
}
return result;
}
void destroy_iterator(iterator_t *iterator) {
destroy_tree(iterator->copy);
free(iterator->path_records);
free(iterator->path);
free(iterator);
}

View File

@ -0,0 +1,43 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_iterator.c: declarations for traversing all the nodes of a tree
// in-order.
//
// no-check-code
#ifndef __FASTMANIFEST_TREE_ITERATOR_H__
#define __FASTMANIFEST_TREE_ITERATOR_H__
#include <stdbool.h>
#include <stdlib.h>
#include "node.h"
typedef struct _path_record_t {
const node_t* node;
size_t child_idx;
// this is how much of the path was already present when we started walking
// this node. once we close this path, we should restore the iterator's
// path_idx to this. value.
size_t previous_path_idx;
} path_record_t;
typedef struct _iterator_t {
tree_t* copy;
bool construct_paths;
// track where we are in the iteration process.
path_record_t *path_records;
// this is where the next path record should be written to.
size_t path_records_idx;
// track the path, if path construction is requested.
char *path;
size_t path_idx;
size_t path_sz;
} iterator_t;
#endif // #ifndef __FASTMANIFEST_TREE_ITERATOR_H__

View File

@ -0,0 +1,133 @@
// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_iterator_test.c: tests for traversing all the nodes of a tree in-order.
//
// no-check-code
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include "tree.h"
#include "tree_iterator.h"
#include "tests.h"
typedef struct _iterator_expectations_t {
char *path;
size_t path_sz;
bool path_present;
uint32_t checksum_primer;
uint8_t flags;
} iterator_expectations_t;
static bool match_expectations(
iterator_t *iterator,
iterator_expectations_t *expectations,
size_t expectations_sz) {
size_t ix = 0;
uint8_t expected_checksum[SHA1_BYTES];
while (true) {
iterator_result_t result = iterator_next(iterator);
if (result.valid == false) {
break;
}
if (ix >= expectations_sz) {
return false;
}
iterator_expectations_t *expectation = &expectations[ix];
ix++;
if (expectation->path_present &&
(expectation->path_sz != result.path_sz ||
memcmp(expectation->path, result.path, expectation->path_sz) !=
0)) {
return false;
}
// prime the expected checksum
int2sha1hash(expectation->checksum_primer, expected_checksum);
if (SHA1_BYTES != result.checksum_sz ||
memcmp(expected_checksum, result.checksum, SHA1_BYTES) != 0) {
return false;
}
}
return (ix == expectations_sz);
}
void test_empty_tree() {
tree_t *tree = alloc_tree();
iterator_t *iterator = create_iterator(tree, false);
iterator_expectations_t expectations[] = {};
ASSERT(match_expectations(iterator, expectations,
sizeof(expectations) / sizeof(iterator_expectations_t)));
destroy_iterator(iterator);
destroy_tree(tree);
}
void test_simple_tree() {
tree_t *tree = alloc_tree();
add_to_tree_t toadd[] = {
{STRPLUSLEN("abc"), 12345, 5},
};
add_to_tree(tree, toadd, sizeof(toadd) / sizeof(add_to_tree_t));
iterator_t *iterator = create_iterator(tree, true);
iterator_expectations_t expectations[] =
{
{STRPLUSLEN("abc"), true, 12345, 5}
};
ASSERT(match_expectations(iterator, expectations,
sizeof(expectations) / sizeof(iterator_expectations_t)));
destroy_iterator(iterator);
destroy_tree(tree);
}
void test_complicated_tree() {
tree_t *tree = alloc_tree();
add_to_tree_t toadd[] = {
{STRPLUSLEN("abc"), 12345, 5},
{STRPLUSLEN("ab/cdef/gh"), 64342, 55},
{STRPLUSLEN("ab/cdef/ghi/jkl"), 51545, 57},
{STRPLUSLEN("ab/cdef/ghi/jklm"), 54774, 12},
{STRPLUSLEN("ab/cdef/ghi/jklmn"), 48477, 252},
{STRPLUSLEN("a"), 577, 14},
};
add_to_tree(tree, toadd, sizeof(toadd) / sizeof(add_to_tree_t));
iterator_t *iterator = create_iterator(tree, true);
iterator_expectations_t expectations[] =
{
{STRPLUSLEN("a"), true, 577, 14},
{STRPLUSLEN("ab/cdef/gh"), true, 64342, 55},
{STRPLUSLEN("ab/cdef/ghi/jkl"), true, 51545, 57},
{STRPLUSLEN("ab/cdef/ghi/jklm"), true, 54774, 12},
{STRPLUSLEN("ab/cdef/ghi/jklmn"), true, 48477, 252},
{STRPLUSLEN("abc"), true, 12345, 5},
};
ASSERT(match_expectations(iterator, expectations,
sizeof(expectations) / sizeof(iterator_expectations_t)));
destroy_iterator(iterator);
}
int main(int argc, char *argv[]) {
test_empty_tree();
test_simple_tree();
test_complicated_tree();
return 0;
}