mirror of
https://github.com/facebook/sapling.git
synced 2024-10-10 08:47:12 +03:00
[remotefilelog] initial checkin of a c datapack parser
Summary: This is not yet complete, but seems to be able to parse a data file. Test Plan: `/Users/tonytung/Library/Caches/CLion2016.2/cmake/generated/cdatapack-64b7828e/64b7828e/Debug/cdatapack_dump d864669a5651d04505ec6e5e9dba1319cde71f7b > /tmp/2` compare it with the output of `hg debugdatapack --long d864669a5651d04505ec6e5e9dba1319cde71f7b > /tmp/1` and it exactly matches. Reviewers: durham Reviewed By: durham Subscribers: mitrandir Differential Revision: https://phabricator.intern.facebook.com/D3627122 Signature: t1:3627122:1470085301:c9b9e8b2fa57bb7a09dd56d3c811ff8eadbb85ba
This commit is contained in:
parent
9e557758b0
commit
705c0731b6
27
remotefilelog/cdatapack/CMakeLists.txt
Normal file
27
remotefilelog/cdatapack/CMakeLists.txt
Normal file
@ -0,0 +1,27 @@
|
||||
# Copyright 2016-present Facebook. All Rights Reserved.
|
||||
#
|
||||
# Build file.
|
||||
#
|
||||
# no-check-code
|
||||
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project(cdatapack)
|
||||
|
||||
SET(CMAKE_C_FLAGS "-std=c99 -Wall -Wshorten-64-to-32 -Wincompatible-pointer-types-discards-qualifiers -Werror")
|
||||
SET(CMAKE_C_FLAGS_DEBUG "-O0 -g")
|
||||
SET(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG")
|
||||
SET(CMAKE_C_FLAGS_RELWITHDEBINFO "-O0 -g")
|
||||
|
||||
add_library(cdatapack
|
||||
buffer.h
|
||||
cdatapack.c
|
||||
cdatapack.h
|
||||
convert.h)
|
||||
|
||||
add_executable(cdatapack_dump cdatapack_dump.c)
|
||||
target_link_libraries(cdatapack_dump cdatapack)
|
||||
|
||||
add_executable(cdatapack_get cdatapack_get.c)
|
||||
target_link_libraries(cdatapack_get cdatapack)
|
||||
|
||||
add_executable(null_test null_test.c)
|
47
remotefilelog/cdatapack/buffer.h
Normal file
47
remotefilelog/cdatapack/buffer.h
Normal file
@ -0,0 +1,47 @@
|
||||
// Copyright 2016-present Facebook. All Rights Reserved.
|
||||
//
|
||||
// buffer.c: declarations for a generic mechanism to expand a heap-allocated
|
||||
// buffer. this is for internal use only.
|
||||
//
|
||||
// no-check-code
|
||||
|
||||
#ifndef __FASTMANIFEST_BUFFER_H__
|
||||
#define __FASTMANIFEST_BUFFER_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
static inline bool expand_to_fit(
|
||||
void **buffer, size_t num_slots_used, size_t *num_slots_total,
|
||||
size_t input_count, size_t item_sz,
|
||||
const float factor,
|
||||
const size_t min_increment,
|
||||
const size_t max_increment) {
|
||||
size_t remaining = *num_slots_total - num_slots_used;
|
||||
if (input_count > remaining) {
|
||||
// need realloc
|
||||
size_t new_slots_total = factor * ((float) *num_slots_total);
|
||||
if (new_slots_total < min_increment + *num_slots_total) {
|
||||
new_slots_total = min_increment + *num_slots_total;
|
||||
}
|
||||
if (new_slots_total > max_increment + *num_slots_total) {
|
||||
new_slots_total = max_increment + *num_slots_total;
|
||||
}
|
||||
if (new_slots_total < input_count + *num_slots_total) {
|
||||
new_slots_total = input_count + *num_slots_total;
|
||||
}
|
||||
|
||||
void *newbuffer = realloc(*buffer, item_sz * new_slots_total);
|
||||
if (newbuffer == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
*buffer = newbuffer;
|
||||
*num_slots_total = new_slots_total;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* __FASTMANIFEST_BUFFER_H__ */
|
412
remotefilelog/cdatapack/cdatapack.c
Normal file
412
remotefilelog/cdatapack/cdatapack.c
Normal file
@ -0,0 +1,412 @@
|
||||
// Copyright 2016-present Facebook. All Rights Reserved.
|
||||
//
|
||||
// cdatapack.c: Datapack implementation in C.
|
||||
//
|
||||
// no-check-code
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <memory.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
#include "cdatapack.h"
|
||||
#include "buffer.h"
|
||||
|
||||
/**
|
||||
* This is an exact representation of an index entry on disk. Do not consume
|
||||
* the fields directly, as they may need processing.
|
||||
*
|
||||
* NOTE: this uses gcc's __attribute__((packed)) syntax to indicate a packed
|
||||
* data structure, which obviously has potential portability issues.
|
||||
*/
|
||||
typedef struct _disk_index_entry_t {
|
||||
uint8_t node[NODE_SZ];
|
||||
|
||||
// offset of the next element in the delta chain in the index file
|
||||
index_offset_t deltabase_index_offset;
|
||||
|
||||
// offset and size of this current element in the delta chain in the data
|
||||
// file.
|
||||
data_offset_t data_offset;
|
||||
data_offset_t data_sz;
|
||||
} __attribute__((packed)) disk_index_entry_t;
|
||||
|
||||
/**
|
||||
* This represents offsets into the index indicating the range of a fanout
|
||||
* bucket. This is calculated upon opening the file.
|
||||
*/
|
||||
typedef struct _fanout_table_entry_t {
|
||||
index_offset_t start_index;
|
||||
index_offset_t end_index;
|
||||
} fanout_table_entry_t;
|
||||
|
||||
/**
|
||||
* This is a post-processed index entry. The node pointer is valid only if
|
||||
* the handle that generated this entry hasn't been closed.
|
||||
*
|
||||
* This is the counterpart of disk_index_entry_t.
|
||||
*/
|
||||
typedef struct _pack_index_entry_t {
|
||||
const uint8_t *node;
|
||||
|
||||
// offset and size of this current element in the delta chain in the data
|
||||
// file.
|
||||
data_offset_t data_offset;
|
||||
data_offset_t data_sz;
|
||||
|
||||
// offset of the next element in the delta chain in the index file
|
||||
index_offset_t deltabase_index_offset;
|
||||
} pack_index_entry_t;
|
||||
|
||||
/**
|
||||
* This is a chain of index entries.
|
||||
*/
|
||||
typedef struct _pack_chain_t {
|
||||
pack_index_entry_t *pack_chain_links;
|
||||
size_t links_idx;
|
||||
size_t links_sz;
|
||||
} pack_chain_t;
|
||||
|
||||
/**
|
||||
* This is an exact representation of an index file's header on disk. Do not
|
||||
* consume the fields directly, as they may need processing.
|
||||
*
|
||||
* NOTE: this uses gcc's __attribute__((packed)) syntax to indicate a packed
|
||||
* data structure, which obviously has potential portability issues.
|
||||
*/
|
||||
typedef struct _disk_index_header_t {
|
||||
#define VERSION 0
|
||||
uint8_t version;
|
||||
|
||||
#define LARGE_FANOUT 0x80
|
||||
uint8_t config;
|
||||
} __attribute__((packed)) disk_index_header_t;
|
||||
|
||||
static void unpack_disk_deltachunk(
|
||||
const disk_index_entry_t *disk_deltachunk,
|
||||
pack_index_entry_t *packindex) {
|
||||
packindex->node = disk_deltachunk->node;
|
||||
packindex->data_offset = ntoh_data_offset(
|
||||
disk_deltachunk->data_offset);
|
||||
packindex->data_sz = ntoh_data_offset(
|
||||
disk_deltachunk->data_sz);
|
||||
packindex->deltabase_index_offset = ntoh_index_offset(
|
||||
disk_deltachunk->deltabase_index_offset);
|
||||
}
|
||||
|
||||
static bool find(
|
||||
const datapack_handle_t * handle,
|
||||
uint8_t node[NODE_SZ],
|
||||
pack_index_entry_t *packindex) {
|
||||
uint16_t fanout_idx;
|
||||
if (handle->large_fanout) {
|
||||
uint16_t* fanout_idx_ptr = (uint16_t*) &node[0];
|
||||
fanout_idx = ntohs(*fanout_idx_ptr);
|
||||
} else {
|
||||
fanout_idx = node[0];
|
||||
}
|
||||
|
||||
index_offset_t start = handle->fanout_table[fanout_idx].start_index,
|
||||
end = handle->fanout_table[fanout_idx].end_index;
|
||||
|
||||
// indices are INCLUSIVE, so the search is <=
|
||||
while (start <= end) {
|
||||
index_offset_t middle = start + ((end - start) / 2);
|
||||
|
||||
// peek at the hash at that location.
|
||||
int cmp = memcmp(node, handle->index_table[middle].node, NODE_SZ);
|
||||
if (cmp < 0) {
|
||||
end = middle - 1;
|
||||
} else if (cmp > 0) {
|
||||
start = middle + 1;
|
||||
} else {
|
||||
// exact match!
|
||||
unpack_disk_deltachunk(&handle->index_table[middle], packindex);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// nope, no good.
|
||||
return false;
|
||||
}
|
||||
|
||||
datapack_handle_t *open_datapack(
|
||||
char *indexfp, size_t indexfp_sz,
|
||||
char *datafp, size_t datafp_sz) {
|
||||
datapack_handle_t *handle = NULL;
|
||||
char *buffer = NULL;
|
||||
|
||||
handle = malloc(sizeof(datapack_handle_t));
|
||||
if (handle == NULL) {
|
||||
// TODO: at some future point in time, it might be nice to add some
|
||||
// better error reporting like we have in cfastmanifest.
|
||||
goto error_cleanup;
|
||||
}
|
||||
|
||||
// can't just use memset because MAP_FAILED is the error result code, not
|
||||
// NULL.
|
||||
memset(handle, 0, sizeof(datapack_handle_t));
|
||||
handle->data_mmap = MAP_FAILED;
|
||||
handle->index_mmap = MAP_FAILED;
|
||||
|
||||
buffer = malloc(1 + (indexfp_sz > datafp_sz ? indexfp_sz : datafp_sz));
|
||||
if (buffer == NULL) {
|
||||
goto error_cleanup;
|
||||
}
|
||||
|
||||
memcpy(buffer, indexfp, indexfp_sz);
|
||||
buffer[indexfp_sz] = '\0';
|
||||
handle->indexfd = open(buffer, O_RDONLY);
|
||||
if (handle->indexfd < 0) {
|
||||
goto error_cleanup;
|
||||
}
|
||||
|
||||
handle->index_file_sz = lseek(handle->indexfd, 0, SEEK_END);
|
||||
lseek(handle->indexfd, 0, SEEK_SET);
|
||||
|
||||
memcpy(buffer, datafp, datafp_sz);
|
||||
buffer[datafp_sz] = '\0';
|
||||
handle->datafd = open(buffer, O_RDONLY);
|
||||
if (handle->datafd < 0) {
|
||||
goto error_cleanup;
|
||||
}
|
||||
|
||||
handle->data_file_sz = lseek(handle->datafd, 0, SEEK_END);
|
||||
lseek(handle->datafd, 0, SEEK_SET);
|
||||
|
||||
handle->index_mmap = mmap(NULL, (size_t) handle->index_file_sz, PROT_READ,
|
||||
MAP_FILE | MAP_PRIVATE, handle->indexfd, (off_t) 0);
|
||||
if (handle->index_mmap == MAP_FAILED) {
|
||||
int er = errno;
|
||||
(void) er;
|
||||
goto error_cleanup;
|
||||
}
|
||||
|
||||
handle->data_mmap = mmap(NULL, (size_t) handle->data_file_sz, PROT_READ,
|
||||
MAP_FILE | MAP_PRIVATE, handle->datafd, (off_t) 0);
|
||||
if (handle->data_mmap == MAP_FAILED) {
|
||||
goto error_cleanup;
|
||||
}
|
||||
|
||||
// read the headers and ensure that the file length is at least somewhat
|
||||
// sane.
|
||||
if (handle->index_file_sz < sizeof(disk_index_header_t)) {
|
||||
goto error_cleanup;
|
||||
}
|
||||
const disk_index_header_t *header = (const disk_index_header_t *)
|
||||
handle->index_mmap;
|
||||
if (header->version != VERSION) {
|
||||
goto error_cleanup;
|
||||
}
|
||||
handle->large_fanout = ((header->config | LARGE_FANOUT) != 0);
|
||||
int fanout_count = 1 << (handle->large_fanout ? 16 : 8);
|
||||
handle->fanout_table = (fanout_table_entry_t *) calloc(
|
||||
fanout_count, sizeof(fanout_table_entry_t));
|
||||
if (handle->fanout_table == NULL) {
|
||||
goto error_cleanup;
|
||||
}
|
||||
handle->index_table = (disk_index_entry_t *)
|
||||
(((const char *) handle->index_mmap) +
|
||||
sizeof(disk_index_header_t) +
|
||||
(sizeof(index_offset_t) * fanout_count));
|
||||
disk_index_entry_t *index_end = (disk_index_entry_t *)
|
||||
(((const char *) handle->index_mmap) + handle->index_file_sz);
|
||||
if (handle->index_table > index_end) {
|
||||
// ensure the file is at least big enough to include the fanout table.
|
||||
goto error_cleanup;
|
||||
}
|
||||
|
||||
// build a clean and easy table to bisect.
|
||||
index_offset_t *index = (index_offset_t *)
|
||||
(((const char *) handle->index_mmap) +
|
||||
sizeof(disk_index_header_t));
|
||||
index_offset_t prev_index_offset = 0;
|
||||
int last_fanout_increment = 0;
|
||||
|
||||
for (int ix = 0; ix < fanout_count; ix++) {
|
||||
index_offset_t index_offset = ntoh_index_offset(index[ix]);
|
||||
if (index_offset != prev_index_offset) {
|
||||
// backfill the start & end offsets
|
||||
for (int jx = last_fanout_increment; jx < ix; jx ++) {
|
||||
// fill the "start" except for the last time we changed the index
|
||||
// offset.
|
||||
if (jx != last_fanout_increment) {
|
||||
handle->fanout_table[jx].start_index = index_offset;
|
||||
}
|
||||
handle->fanout_table[jx].end_index = index_offset;
|
||||
}
|
||||
|
||||
handle->fanout_table[ix].start_index = index_offset;
|
||||
last_fanout_increment = ix;
|
||||
|
||||
prev_index_offset = index_offset;
|
||||
}
|
||||
}
|
||||
|
||||
// we may need to backfill the remaining offsets.
|
||||
index_offset_t last_offset = (index_offset_t)
|
||||
(index_end - handle->index_table - 1);
|
||||
for (int jx = last_fanout_increment; jx < fanout_count; jx ++) {
|
||||
// fill the "start" except for the last time we changed the index
|
||||
// offset.
|
||||
if (jx != last_fanout_increment) {
|
||||
handle->fanout_table[jx].start_index = last_offset;
|
||||
}
|
||||
handle->fanout_table[jx].end_index = last_offset;
|
||||
}
|
||||
|
||||
goto success_cleanup;
|
||||
|
||||
error_cleanup:
|
||||
|
||||
if (handle->index_mmap != MAP_FAILED) {
|
||||
munmap(handle->index_mmap, handle->index_file_sz);
|
||||
}
|
||||
|
||||
if (handle->data_mmap != MAP_FAILED) {
|
||||
munmap(handle->data_mmap, handle->data_file_sz);
|
||||
}
|
||||
|
||||
if (handle && handle->indexfd != 0) {
|
||||
close(handle->indexfd);
|
||||
}
|
||||
|
||||
if (handle && handle->datafd != 0) {
|
||||
close(handle->datafd);
|
||||
}
|
||||
free(handle->index_table);
|
||||
free(handle);
|
||||
|
||||
handle = NULL;
|
||||
|
||||
success_cleanup:
|
||||
|
||||
free(buffer);
|
||||
|
||||
return handle;
|
||||
}
|
||||
|
||||
void close_datapack(datapack_handle_t *handle) {
|
||||
munmap(handle->index_mmap, handle->index_file_sz);
|
||||
munmap(handle->data_mmap, handle->data_file_sz);
|
||||
close(handle->indexfd);
|
||||
close(handle->datafd);
|
||||
free(handle->index_table);
|
||||
free(handle);
|
||||
}
|
||||
|
||||
#define DEFAULT_PACK_CHAIN_CAPACITY 64
|
||||
#define PACK_CHAIN_GROWTH_FACTOR 2.0
|
||||
#define PACK_CHAIN_MINIMUM_GROWTH 1024
|
||||
#define PACK_CHAIN_MAXIMUM_GROWTH 65536
|
||||
|
||||
#define PACK_CHAIN_EXPAND_TO_FIT(buffer, buffer_idx, buffer_sz) \
|
||||
expand_to_fit(buffer, buffer_idx, buffer_sz, \
|
||||
1, sizeof(pack_index_entry_t), \
|
||||
PACK_CHAIN_GROWTH_FACTOR, \
|
||||
PACK_CHAIN_MINIMUM_GROWTH, \
|
||||
PACK_CHAIN_MAXIMUM_GROWTH)
|
||||
|
||||
static pack_chain_t *build_pack_chain(
|
||||
const datapack_handle_t *handle,
|
||||
uint8_t node[NODE_SZ]) {
|
||||
pack_chain_t *result = malloc(sizeof(pack_chain_t));
|
||||
result->links_idx = 0;
|
||||
result->links_sz = DEFAULT_PACK_CHAIN_CAPACITY;
|
||||
result->pack_chain_links = malloc(
|
||||
result->links_sz * sizeof(pack_index_entry_t));
|
||||
// TODO: error handling.
|
||||
|
||||
pack_index_entry_t entry;
|
||||
|
||||
// find the first entry.
|
||||
if (find(handle, node, &entry) == false) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PACK_CHAIN_EXPAND_TO_FIT(
|
||||
(void **)&result->pack_chain_links,
|
||||
result->links_idx,
|
||||
&result->links_sz);
|
||||
// TODO: yeah, this desperately needs some error handling.
|
||||
|
||||
result->pack_chain_links[result->links_idx] = entry;
|
||||
|
||||
while (entry.deltabase_index_offset != FULLTEXTINDEXMARK &&
|
||||
entry.deltabase_index_offset != NOBASEINDEXMARK) {
|
||||
unpack_disk_deltachunk(
|
||||
&handle->index_table[entry.deltabase_index_offset], &entry);
|
||||
|
||||
PACK_CHAIN_EXPAND_TO_FIT(
|
||||
(void **)&result->pack_chain_links,
|
||||
result->links_idx,
|
||||
&result->links_sz);
|
||||
// TODO: yeah, this desperately needs some error handling.
|
||||
|
||||
result->pack_chain_links[result->links_idx] = entry;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
const uint8_t *getdeltachainlink(
|
||||
const uint8_t *ptr, delta_chain_link_t *link) {
|
||||
link->filename_sz = ntohs(*((uint16_t *) ptr));
|
||||
ptr += sizeof(uint16_t);
|
||||
|
||||
link->filename = (const char *) ptr;
|
||||
ptr += link->filename_sz;
|
||||
|
||||
link->node = ptr;
|
||||
ptr += NODE_SZ;
|
||||
|
||||
link->deltabase_node = ptr;
|
||||
ptr += NODE_SZ;
|
||||
|
||||
link->delta_sz = ntohll(*((uint64_t *) ptr));
|
||||
ptr += sizeof(uint64_t);
|
||||
|
||||
link->delta = ptr;
|
||||
ptr += link->delta_sz;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
delta_chain_t *getdeltachain(
|
||||
const datapack_handle_t *handle,
|
||||
uint8_t node[NODE_SZ]) {
|
||||
pack_chain_t *pack_chain = build_pack_chain(handle, node);
|
||||
// TODO: error handling
|
||||
|
||||
delta_chain_t *delta_chain = malloc(sizeof(delta_chain_t));
|
||||
delta_chain->links_count = pack_chain->links_idx;
|
||||
delta_chain->delta_chain_links = malloc(
|
||||
delta_chain->links_count * sizeof(delta_chain_link_t));
|
||||
// TODO: error handling
|
||||
|
||||
|
||||
for (int ix = 0; ix < pack_chain->links_sz; ix ++) {
|
||||
const uint8_t *ptr = (const uint8_t *)
|
||||
pack_chain->pack_chain_links[ix].data_offset;
|
||||
const uint8_t *end = ptr +
|
||||
pack_chain->pack_chain_links[ix].data_sz;
|
||||
|
||||
delta_chain_link_t *link = &delta_chain->delta_chain_links[ix];
|
||||
|
||||
ptr = getdeltachainlink(ptr, link);
|
||||
|
||||
if (ptr > end) {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// free pack chain.
|
||||
if (pack_chain != NULL) {
|
||||
free(pack_chain->pack_chain_links);
|
||||
free(pack_chain);
|
||||
}
|
||||
|
||||
return delta_chain;
|
||||
}
|
94
remotefilelog/cdatapack/cdatapack.h
Normal file
94
remotefilelog/cdatapack/cdatapack.h
Normal file
@ -0,0 +1,94 @@
|
||||
// Copyright 2016-present Facebook. All Rights Reserved.
|
||||
//
|
||||
// cdatapack:
|
||||
//
|
||||
// no-check-code
|
||||
|
||||
#ifndef CDATAPACK_CDATAPACK_H
|
||||
#define CDATAPACK_CDATAPACK_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define NODE_SZ 20
|
||||
|
||||
typedef uint32_t index_offset_t;
|
||||
#define ntoh_index_offset ntohl
|
||||
#define FULLTEXTINDEXMARK ((index_offset_t) -1)
|
||||
#define NOBASEINDEXMARK ((index_offset_t) -2)
|
||||
typedef uint64_t data_offset_t;
|
||||
#define ntoh_data_offset ntohll
|
||||
|
||||
struct _disk_index_entry_t;
|
||||
struct _fanout_table_entry_t;
|
||||
|
||||
typedef struct _datapack_handle_t {
|
||||
int indexfd;
|
||||
int datafd;
|
||||
void* index_mmap;
|
||||
void* data_mmap;
|
||||
off_t index_file_sz;
|
||||
off_t data_file_sz;
|
||||
|
||||
bool large_fanout;
|
||||
|
||||
// this is the computed fanout table.
|
||||
struct _fanout_table_entry_t *fanout_table;
|
||||
|
||||
// this points to the first index entry.
|
||||
struct _disk_index_entry_t* index_table;
|
||||
|
||||
// this points to the entry one past the last.
|
||||
struct _disk_index_entry_t* index_end;
|
||||
|
||||
} datapack_handle_t;
|
||||
|
||||
/**
|
||||
* This represents a single entry in a delta chain.
|
||||
*/
|
||||
typedef struct _delta_chain_link_t {
|
||||
uint16_t filename_sz;
|
||||
const char *filename;
|
||||
const uint8_t *node;
|
||||
const uint8_t *deltabase_node;
|
||||
|
||||
data_offset_t delta_sz;
|
||||
const uint8_t *delta;
|
||||
} delta_chain_link_t;
|
||||
|
||||
/**
|
||||
* This represents an entire delta chain.
|
||||
*/
|
||||
typedef struct _delta_chain_t {
|
||||
delta_chain_link_t *delta_chain_links;
|
||||
size_t links_count;
|
||||
} delta_chain_t;
|
||||
|
||||
/**
|
||||
* Open a datapack + index file. The fanout table is read and processed at
|
||||
* this point.
|
||||
*
|
||||
* Returns a handle for subsequent operations.
|
||||
*/
|
||||
extern datapack_handle_t *open_datapack(
|
||||
char *indexfp, size_t indexfp_sz,
|
||||
char *datafp, size_t datafp_sz);
|
||||
|
||||
/**
|
||||
* Release a datapack + index file handle.
|
||||
*/
|
||||
extern void close_datapack(datapack_handle_t *);
|
||||
|
||||
/**
|
||||
* Retrieves a delta chain for a given node.
|
||||
*/
|
||||
extern delta_chain_t *getdeltachain(
|
||||
const datapack_handle_t *handle,
|
||||
uint8_t node[NODE_SZ]);
|
||||
|
||||
// this should really be private, but we need it for the cdatapack_dump tool.
|
||||
extern const uint8_t *getdeltachainlink(
|
||||
const uint8_t *ptr, delta_chain_link_t *link);
|
||||
|
||||
#endif //CDATAPACK_CDATAPACK_H
|
69
remotefilelog/cdatapack/cdatapack_dump.c
Normal file
69
remotefilelog/cdatapack/cdatapack_dump.c
Normal file
@ -0,0 +1,69 @@
|
||||
// Copyright 2016-present Facebook. All Rights Reserved.
|
||||
//
|
||||
// cdatapack_dump.c: Dump the entire contents of a datapack file by walking
|
||||
// the datapack file.
|
||||
//
|
||||
// no-check-code
|
||||
|
||||
#include <memory.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <inttypes.h>
|
||||
#include "convert.h"
|
||||
#include "cdatapack.h"
|
||||
|
||||
#define DATAIDX_EXT ".dataidx"
|
||||
#define DATAPACK_EXT ".datapack"
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc < 2) {
|
||||
fprintf(stderr, "%s <path>\n", argv[0]);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
long len = strlen(argv[1]);
|
||||
char idx_path[len + sizeof(DATAIDX_EXT)];
|
||||
char data_path[len + sizeof(DATAPACK_EXT)];
|
||||
|
||||
sprintf(idx_path, "%s%s", argv[1], DATAIDX_EXT);
|
||||
sprintf(data_path, "%s%s", argv[1], DATAPACK_EXT);
|
||||
|
||||
datapack_handle_t *handle = open_datapack(
|
||||
idx_path, strlen(idx_path),
|
||||
data_path, strlen(data_path));
|
||||
|
||||
const uint8_t *ptr = handle->data_mmap;
|
||||
const uint8_t *end = ptr + handle->data_file_sz;
|
||||
|
||||
ptr += 1; // for the version field.
|
||||
|
||||
const char *last_filename = NULL;
|
||||
uint16_t last_filename_sz = 0;
|
||||
|
||||
char node_buffer[NODE_SZ * 2];
|
||||
char deltabase_buffer[NODE_SZ * 2];
|
||||
|
||||
while (ptr < end) {
|
||||
delta_chain_link_t link;
|
||||
|
||||
ptr = getdeltachainlink(ptr, &link);
|
||||
|
||||
if (last_filename_sz != link.filename_sz ||
|
||||
memcmp(last_filename, link.filename, last_filename_sz) != 0) {
|
||||
// print the filename
|
||||
printf("\n%-.*s\n", (int) link.filename_sz, link.filename);
|
||||
last_filename_sz = link.filename_sz;
|
||||
last_filename = link.filename;
|
||||
}
|
||||
|
||||
hexlify(link.node, NODE_SZ, node_buffer);
|
||||
hexlify(link.deltabase_node, NODE_SZ, deltabase_buffer);
|
||||
|
||||
printf("%-*s %-*s %s\n",
|
||||
NODE_SZ * 2, "Node", NODE_SZ * 2, "Delta Base",
|
||||
"Delta Length");
|
||||
printf("%-.*s %-.*s %" PRIu64 "\n",
|
||||
NODE_SZ * 2, node_buffer, NODE_SZ * 2, deltabase_buffer,
|
||||
link.delta_sz);
|
||||
}
|
||||
}
|
8
remotefilelog/cdatapack/cdatapack_get.c
Normal file
8
remotefilelog/cdatapack/cdatapack_get.c
Normal file
@ -0,0 +1,8 @@
|
||||
//
|
||||
// cdatapack_get:
|
||||
//
|
||||
// no-check-code
|
||||
|
||||
int main() {
|
||||
return 0;
|
||||
}
|
75
remotefilelog/cdatapack/convert.h
Normal file
75
remotefilelog/cdatapack/convert.h
Normal file
@ -0,0 +1,75 @@
|
||||
// Copyright 2016-present Facebook. All Rights Reserved.
|
||||
//
|
||||
// convert.h: hex-string conversions
|
||||
//
|
||||
// no-check-code
|
||||
|
||||
#ifndef __FASTMANIFEST_CONVERT_H__
|
||||
#define __FASTMANIFEST_CONVERT_H__
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
static int8_t hextable[256] = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
|
||||
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
||||
};
|
||||
|
||||
static char chartable[16] = {
|
||||
'0', '1', '2', '3', '4', '5', '6', '7',
|
||||
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
|
||||
};
|
||||
|
||||
/*
|
||||
* Turn a hex-encoded string into binary. Returns false on failure.
|
||||
*/
|
||||
static inline bool unhexlify(const char *input, int len, uint8_t *dst) {
|
||||
if (len % 2 != 0) {
|
||||
// wtf.
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t ix = 0; ix < len; ix += 2, dst++) {
|
||||
int hi = hextable[(unsigned char) input[ix]];
|
||||
int lo = hextable[(unsigned char) input[ix + 1]];
|
||||
|
||||
if (hi < 0 || lo < 0) {
|
||||
return false;
|
||||
}
|
||||
*dst = (hi << 4) | lo;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Turn binary data into a hex-encoded string.
|
||||
*/
|
||||
static inline void hexlify(const uint8_t *input, int len, char *dst) {
|
||||
for (size_t ix = 0; ix < len; ix++, dst += 2) {
|
||||
unsigned char ch = (unsigned char) input[ix];
|
||||
char hi = chartable[ch >> 4];
|
||||
char lo = chartable[ch & 0xf];
|
||||
|
||||
*dst = hi;
|
||||
*(dst + 1) = lo;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif /* #ifndef __FASTMANIFEST_CONVERT_H__ */
|
9
remotefilelog/cdatapack/null_test.c
Normal file
9
remotefilelog/cdatapack/null_test.c
Normal file
@ -0,0 +1,9 @@
|
||||
// Copyright 2016-present Facebook. All Rights Reserved.
|
||||
//
|
||||
// null_test.c: garbage test to make CLion happy.
|
||||
//
|
||||
// no-check-code
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user