sapling/cfastmanifest/tree_convert.c
Tony Tung 2b3e7ac198 port upgrades of buffer.h into clib
Summary:
buffer.h gained the ability to deal with non-char-sized buffers when I built cdatapack.  We need to update the callers in ctreemanifest to be aware of this.  Most of this is done with macro magic.

Some functionality was dropped from cdatapack's buffer.h (macro definitions to deal with paths).  Those are moved to path_buffer.h

Test Plan:
make local && clion build.
pass cfastmanifest unit tests.

Reviewers: #fastmanifest, durham

Reviewed By: durham

Subscribers: mitrandir, mjpieters

Differential Revision: https://phabricator.intern.facebook.com/D3780767

Signature: t1:3780767:1472255278:40a19edfd171df5804e9cdfa4444d5c6386f00e8
2016-08-26 17:14:52 -07:00

591 lines
18 KiB
C

// Copyright 2016-present Facebook. All Rights Reserved.
//
// tree_convert.c: methods to convert flat manifests to and from a tree.
//
// no-check-code
#include <stdlib.h>
#include <sys/types.h>
#include "buffer.h"
#include "convert.h"
#include "path_buffer.h"
#include "tree.h"
#include "tree_arena.h"
#define MAX_FOLDER_DEPTH 1024
#define DEFAULT_CHILDREN_CAPACITY 4096
#define BUFFER_GROWTH_FACTOR 1.2
#define BUFFER_MINIMUM_GROWTH 1048576
#define BUFFER_MAXIMUM_GROWTH (32 * 1024 * 1024)
#define CONVERT_EXPAND_TO_FIT(buffer, buffer_idx, buffer_sz, input_sz) \
expand_to_fit((void **) buffer, buffer_idx, buffer_sz, input_sz, \
sizeof(char), \
BUFFER_GROWTH_FACTOR, \
BUFFER_MINIMUM_GROWTH, \
BUFFER_MAXIMUM_GROWTH)
typedef struct _open_folder_t {
const char *subfolder_name;
/* this is a reference to the flat
* manifest's memory. we do not own
* this memory, and we must copy it
* before the conversion completes. */
size_t subfolder_name_sz;
// readers may wonder why we store a relative pointer. this is because
// storing node_t* pointers is UNSAFE. they are allocated on the arena, and
// can be moved at a moment's notice. the only thing that's safe to do is to
// store an offset from the start of the arena.
ptrdiff_t closed_children_prealloc[DEFAULT_CHILDREN_CAPACITY];
ptrdiff_t *closed_children;
size_t closed_children_count;
size_t closed_children_capacity;
bool in_use;
} open_folder_t;
typedef struct _from_flat_state_t {
tree_t *tree;
open_folder_t folders[MAX_FOLDER_DEPTH];
size_t open_folder_count;
} from_flat_state_t;
typedef struct _to_flat_state_t {
const tree_t *tree;
char *dirpath_build_buffer;
size_t dirpath_build_buffer_idx;
size_t dirpath_build_buffer_sz;
char *output_buffer;
size_t output_buffer_idx;
size_t output_buffer_sz;
} to_flat_state_t;
/**
* Returns <0 if (`name`, `name_sz`) is lexicographically less than the name in
* folder.
*
* Returns =0 if (`name`, `name_sz`) is lexicographically equal to the name in
* folder.
*
* Returns >0 if (`name`, `name_sz`) is lexicographically greater than the name
* in folder.
*/
static inline int folder_name_compare(
const char *name,
size_t name_sz,
const open_folder_t *folder) {
size_t min_sz = (name_sz < folder->subfolder_name_sz) ?
name_sz : folder->subfolder_name_sz;
ssize_t sz_compare = name_sz - folder->subfolder_name_sz;
int cmp = strncmp(name, folder->subfolder_name, min_sz);
if (cmp) {
return cmp;
} else if (sz_compare < 0) {
return -1;
} else if (sz_compare > 0) {
return 1;
} else {
return 0;
}
}
static void init_open_folder(open_folder_t *folder) {
folder->in_use = false;
folder->closed_children = folder->closed_children_prealloc;
folder->closed_children_count = 0;
folder->closed_children_capacity = DEFAULT_CHILDREN_CAPACITY;
}
static from_flat_state_t *init_from_state(size_t flat_sz) {
from_flat_state_t *state = malloc(sizeof(from_flat_state_t));
if (state == NULL) {
return NULL;
}
for (int ix = 0; ix < MAX_FOLDER_DEPTH; ix++) {
init_open_folder(&state->folders[ix]);
}
state->open_folder_count = 0;
state->tree = alloc_tree_with_arena(flat_sz * 2);
return state;
}
/**
* Adds a child to a folder, expanding it as needed.
*/
static bool folder_add_child(
from_flat_state_t *state,
open_folder_t *folder,
node_t *child) {
if (folder->closed_children_count + 1 == folder->closed_children_capacity) {
// time to expand the folder
size_t new_capacity = folder->closed_children_capacity * 2;
// is the current zone the prealloc zone? if so, we need to allocate a new
// zone.
if (folder->closed_children == folder->closed_children_prealloc) {
folder->closed_children = malloc(sizeof(ptrdiff_t) * new_capacity);
if (folder->closed_children == NULL) {
return false;
}
// copy over.
memcpy(folder->closed_children, folder->closed_children_prealloc,
sizeof(ptrdiff_t) * folder->closed_children_count);
} else {
// realloc
folder->closed_children = realloc(folder->closed_children,
sizeof(ptrdiff_t) * new_capacity);
if (folder->closed_children == NULL) {
return false;
}
}
folder->closed_children_capacity = new_capacity;
}
// we need to store the delta between the start of the arena and the child.
intptr_t arena_start = (intptr_t) state->tree->arena;
intptr_t child_start = (intptr_t) child;
folder->closed_children[folder->closed_children_count] =
child_start - arena_start;
folder->closed_children_count++;
return true;
}
typedef enum {
CLOSE_FOLDER_OK,
CLOSE_FOLDER_OOM,
} close_folder_code_t;
typedef struct _close_folder_result_t {
close_folder_code_t code;
node_t *node;
} close_folder_result_t;
/**
* Close the folder at index `folder_index`. This may require closing nested
* folders. If folder_index is > 0, then add the closed folder to its parent.
* If the folder_index is 0, it is responsibility of the caller to attach the
* returned node to the shadow root.
*/
static close_folder_result_t close_folder(
from_flat_state_t *state,
size_t folder_index) {
open_folder_t *folder = &state->folders[folder_index];
assert(folder->in_use == true);
if (folder_index < MAX_FOLDER_DEPTH - 1) {
// maybe a nested folder needs to be closed?
if (state->folders[folder_index + 1].in_use) {
// yup, it needs to be closed.
close_folder_result_t close_folder_result =
close_folder(state, folder_index + 1);
if (close_folder_result.code != CLOSE_FOLDER_OK) {
return (close_folder_result_t) {
close_folder_result.code, NULL};
}
}
}
// allocate a node and set it up.
arena_alloc_node_result_t arena_alloc_node_result =
arena_alloc_node(
state->tree,
folder->subfolder_name,
folder->subfolder_name_sz,
folder->closed_children_count);
if (arena_alloc_node_result.code == ARENA_ALLOC_OOM) {
return (close_folder_result_t) {
CLOSE_FOLDER_OOM, NULL};
}
node_t *node = arena_alloc_node_result.node;
node->type = TYPE_IMPLICIT;
// we must initialize flags to a known value, even if it's not used
// because it participates in checksum calculation.
node->flags = 0;
if (!VERIFY_CHILD_NUM(folder->closed_children_count)) {
abort();
}
// this is a huge abstraction violation, but it allows us to use
// `set_child_by_index`, which is significantly more efficient.
node->num_children = (child_num_t) folder->closed_children_count;
// node is set up. now add all the children!
intptr_t arena_start = (intptr_t) state->tree->arena;
for (size_t ix = 0; ix < folder->closed_children_count; ix++) {
ptrdiff_t child_offset = (intptr_t) folder->closed_children[ix];
intptr_t address = arena_start + child_offset;
set_child_by_index(node, ix, (node_t *) address);
}
init_open_folder(folder); // zap the folder so it can be reused.
state->open_folder_count--;
// attach to parent folder if it's not the root folder.
assert(folder_index == state->open_folder_count);
if (folder_index > 0) {
open_folder_t *parent_folder = &state->folders[folder_index - 1];
if (folder_add_child(state, parent_folder, node) == false) {
return (close_folder_result_t) {
CLOSE_FOLDER_OOM, NULL};
}
}
return (close_folder_result_t) {
CLOSE_FOLDER_OK, node};
}
typedef enum {
PROCESS_PATH_OK,
PROCESS_PATH_OOM,
PROCESS_PATH_CORRUPT,
} process_path_code_t;
typedef struct _process_path_result_t {
process_path_code_t code;
// the following are only set when the code is `PROCESS_PATH_OK`.
node_t *node; // do *NOT* save this pointer.
// immediately do what is needed with
// this pointer and discard. the reason
// is that it's part of the arena, and
// can be moved if the arena is resized.
size_t bytes_consumed; // this is the number of bytes consumed,
// including the null pointer.
} process_path_result_t;
/**
* Process a null-terminated path, closing any directories and building the
* nodes as needed, and opening the new directories to support the current path.
*
* Once the proper set of folders are open, create a node and write it into
* the folder.
*/
static process_path_result_t process_path(
from_flat_state_t *state,
const char *path, size_t max_len) {
size_t path_scan_index;
size_t current_path_start;
size_t open_folder_index;
// match as many path components as we can
for (path_scan_index = 0,
current_path_start = 0,
open_folder_index = 0;
path[path_scan_index] != 0;
path_scan_index++) {
if (path_scan_index == max_len) {
return (process_path_result_t) {
PROCESS_PATH_CORRUPT, NULL, 0};
}
// check for a path separator.
if (path[path_scan_index] != '/') {
continue;
}
size_t path_len =
path_scan_index + 1 /* to include the / */ - current_path_start;
bool open_new_folder = true;
// check if the *next* open folder is valid, and if it matches the path
// component we just found.
if (open_folder_index + 1 < state->open_folder_count) {
if (folder_name_compare(
&path[current_path_start],
path_len,
&state->folders[open_folder_index + 1]) == 0) {
// we found the folder we needed, so we can just reuse it.
open_new_folder = false;
open_folder_index++;
} else {
close_folder_result_t close_folder_result =
close_folder(state, open_folder_index + 1);
if (close_folder_result.code == CLOSE_FOLDER_OOM) {
return (process_path_result_t) {PROCESS_PATH_OOM, NULL, 0};
}
}
}
if (open_new_folder == true) {
// if we're opening a new folder, that means there should be no child
// folders open.
assert(state->open_folder_count == open_folder_index + 1);
open_folder_index++;
state->open_folder_count++;
open_folder_t *folder = &state->folders[open_folder_index];
assert(folder->in_use == false);
assert(folder->closed_children == folder->closed_children_prealloc);
assert(folder->closed_children_count == 0);
// link the name in. remember, we don't own the memory!!
folder->in_use = true;
folder->subfolder_name = &path[current_path_start];
folder->subfolder_name_sz = path_len;
}
// path starts after the /
current_path_start = path_scan_index + 1;
}
// close path components that are not matched, building their nodes.
if (open_folder_index + 1 < state->open_folder_count) {
close_folder_result_t close_folder_result =
close_folder(state, open_folder_index + 1);
if (close_folder_result.code == CLOSE_FOLDER_OOM) {
return (process_path_result_t) {PROCESS_PATH_OOM, NULL, 0};
}
}
// build a node for the remaining path (which should just be the
// filename). add it to the currently open folder.
arena_alloc_node_result_t arena_alloc_node_result =
arena_alloc_node(
state->tree,
&path[current_path_start],
path_scan_index - current_path_start,
0);
if (arena_alloc_node_result.code == ARENA_ALLOC_OOM) {
return (process_path_result_t) {PROCESS_PATH_OOM, NULL, 0};
}
arena_alloc_node_result.node->type = TYPE_LEAF;
// jam the new node into the currently open folder.
open_folder_t *folder = &state->folders[open_folder_index];
folder_add_child(state, folder, arena_alloc_node_result.node);
return (process_path_result_t) {
PROCESS_PATH_OK, arena_alloc_node_result.node, path_scan_index + 1};
}
static convert_from_flat_result_t convert_from_flat_helper(
from_flat_state_t *state, char *manifest, size_t manifest_sz) {
// open the root directory node.
open_folder_t *folder = &state->folders[0];
folder->subfolder_name = "/";
folder->subfolder_name_sz = 1;
folder->in_use = true;
state->open_folder_count++;
for (size_t ptr = 0; ptr < manifest_sz;) {
// filename is up to the first null.
process_path_result_t pp_result = process_path(
state, &manifest[ptr], manifest_sz - ptr);
switch (pp_result.code) {
case PROCESS_PATH_OOM:
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_OOM, NULL};
case PROCESS_PATH_CORRUPT:
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_WTF, NULL};
case PROCESS_PATH_OK:
break;
}
assert(pp_result.code == PROCESS_PATH_OK);
node_t *node = pp_result.node;
ptr += pp_result.bytes_consumed;
size_t remaining = manifest_sz - ptr;
if (remaining <= SHA1_BYTES * 2) {
// not enough characters for the checksum and the NL. well, that's a
// fail.
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_WTF, NULL};
}
if (unhexlify(&manifest[ptr], SHA1_BYTES * 2, node->checksum) ==
false) {
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_WTF, NULL};
}
node->checksum_sz = SHA1_BYTES;
node->checksum_valid = true;
ptr += SHA1_BYTES * 2;
// is the next character a NL? if so, then we're done. otherwise, retrieve
// it as the flags field.
if (manifest[ptr] != '\n') {
node->flags = manifest[ptr];
ptr++;
} else {
node->flags = 0;
}
ptr++;
state->tree->num_leaf_nodes++;
}
// close the root folder.
close_folder_result_t close_result = close_folder(state, 0);
if (close_result.code == CLOSE_FOLDER_OOM) {
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_OOM, NULL};
}
close_result.node->type = TYPE_ROOT;
add_child(state->tree->shadow_root, close_result.node);
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_OK, state->tree};
}
static convert_to_flat_code_t convert_to_flat_iterator(
to_flat_state_t *state,
const node_t *node) {
assert(node->type == TYPE_IMPLICIT || node->type == TYPE_ROOT);
for (uint32_t ix = 0; ix < node->num_children; ix++) {
node_t *child = get_child_by_index(node, ix);
if (child->type == TYPE_LEAF) {
size_t space_needed = state->dirpath_build_buffer_idx +
child->name_sz +
1 /* null character */ +
(SHA1_BYTES * 2) +
(child->flags != '\000' ? 1 : 0) +
1 /* NL */;
if (CONVERT_EXPAND_TO_FIT(
&state->output_buffer,
state->output_buffer_idx,
&state->output_buffer_sz,
space_needed) == false) {
return CONVERT_TO_FLAT_OOM;
}
// copy the dirpath over to the output buffer.
memcpy(&state->output_buffer[state->output_buffer_idx],
state->dirpath_build_buffer,
state->dirpath_build_buffer_idx);
state->output_buffer_idx += state->dirpath_build_buffer_idx;
// copy the filename over to the output buffer.
memcpy(&state->output_buffer[state->output_buffer_idx],
child->name, child->name_sz);
state->output_buffer_idx += child->name_sz;
// copy the filename over to the output buffer.
state->output_buffer[state->output_buffer_idx] = '\000';
state->output_buffer_idx++;
// transcribe the sha over.
hexlify(child->checksum, SHA1_BYTES,
&state->output_buffer[state->output_buffer_idx]);
state->output_buffer_idx += (SHA1_BYTES * 2);
if (child->flags != '\000') {
state->output_buffer[state->output_buffer_idx] = child->flags;
state->output_buffer_idx++;
}
state->output_buffer[state->output_buffer_idx] = '\n';
state->output_buffer_idx++;
assert(state->output_buffer_idx < state->output_buffer_sz);
} else {
// save the old value...
size_t previous_dirpath_build_buffer_idx =
state->dirpath_build_buffer_idx;
if (PATH_APPEND(
&state->dirpath_build_buffer,
&state->dirpath_build_buffer_idx,
&state->dirpath_build_buffer_sz,
child->name,
child->name_sz) == false) {
return CONVERT_TO_FLAT_OOM;
}
convert_to_flat_iterator(state, child);
state->dirpath_build_buffer_idx = previous_dirpath_build_buffer_idx;
}
}
return CONVERT_TO_FLAT_OK;
}
static convert_to_flat_code_t convert_to_flat_helper(
to_flat_state_t *state,
const tree_t *tree) {
// get the real root.
node_t *shadow_root = tree->shadow_root;
if (shadow_root->num_children != 1) {
return CONVERT_TO_FLAT_WTF;
}
node_t *real_root = get_child_by_index(shadow_root, 0);
return convert_to_flat_iterator(state, real_root);
}
convert_from_flat_result_t convert_from_flat(
char *manifest, size_t manifest_sz) {
from_flat_state_t *state = init_from_state(manifest_sz);
if (state->tree == NULL) {
free(state);
state = NULL;
}
if (state == NULL) {
return (convert_from_flat_result_t) {
CONVERT_FROM_FLAT_OOM, NULL};
}
convert_from_flat_result_t result =
convert_from_flat_helper(state, manifest, manifest_sz);
if (result.code != CONVERT_FROM_FLAT_OK) {
free(state->tree);
}
free(state);
return result;
}
convert_to_flat_result_t convert_to_flat(tree_t *tree) {
to_flat_state_t state;
state.dirpath_build_buffer = malloc(DEFAULT_PATH_BUFFER_SZ);
state.dirpath_build_buffer_idx = 0;
state.dirpath_build_buffer_sz = DEFAULT_PATH_BUFFER_SZ;
// guestimate as to how much space we need. this could probably be
// fine-tuned a bit.
state.output_buffer = malloc(tree->consumed_memory);
state.output_buffer_idx = 0;
state.output_buffer_sz = tree->consumed_memory;
convert_to_flat_code_t result = CONVERT_TO_FLAT_OOM;
if (state.dirpath_build_buffer != NULL &&
state.output_buffer != NULL) {
result = convert_to_flat_helper(&state, tree);
}
free(state.dirpath_build_buffer);
if (result != CONVERT_TO_FLAT_OK) {
// free the buffer if any error occurred.
free(state.output_buffer);
return (convert_to_flat_result_t) {result, NULL, 0};
} else {
return (convert_to_flat_result_t) {
CONVERT_TO_FLAT_OK, state.output_buffer, state.output_buffer_idx};
}
}