mirror of
https://github.com/urbit/ares.git
synced 2024-11-26 09:57:56 +03:00
pma: fix regression on restore
There was a regression introduced with partition striping that caused problems on restore due to node partitions incidentally getting mapped anonymous rather than to the backing file
This commit is contained in:
parent
a245328266
commit
93a788ec76
@ -157,30 +157,6 @@ int main(int argc, char *argv[])
|
|||||||
BT_findpath path = {0};
|
BT_findpath path = {0};
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
/* broken with recent changes. Maybe because we aren't mmapping the data
|
|
||||||
ranges (pure _bt_insert) */
|
|
||||||
#if 0
|
|
||||||
|
|
||||||
DPUTS("== test 1: insert");
|
|
||||||
|
|
||||||
bt_state_new(&state1);
|
|
||||||
if (mkdir("./pmatest1", 0774) == -1)
|
|
||||||
return errno;
|
|
||||||
assert(SUCC(bt_state_open(state1, "./pmatest1", 0, 0644)));
|
|
||||||
|
|
||||||
#define LOWEST_ADDR 0x2aaa80;
|
|
||||||
vaof_t lo = LOWEST_ADDR;
|
|
||||||
vaof_t hi = 0xDEADBEEF;
|
|
||||||
pgno_t pg = 1; /* dummy value */
|
|
||||||
for (size_t i = 0; i < BT_DAT_MAXKEYS * 4; ++i) {
|
|
||||||
_bt_insert(state1, lo, hi, pg);
|
|
||||||
_test_nodeinteg(state1, &path, lo, hi, pg);
|
|
||||||
lo++; pg++;
|
|
||||||
}
|
|
||||||
|
|
||||||
bt_state_close(state1);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
DPUTS("== test 2: malloc");
|
DPUTS("== test 2: malloc");
|
||||||
BT_state *state2;
|
BT_state *state2;
|
||||||
@ -340,7 +316,7 @@ int main(int argc, char *argv[])
|
|||||||
assert(SUCC(bt_state_open(state4, "./pmatest4", 0, 0644)));
|
assert(SUCC(bt_state_open(state4, "./pmatest4", 0, 0644)));
|
||||||
|
|
||||||
assert(state4->file_size_p == PMA_INITIAL_SIZE_p + PMA_GROW_SIZE_p * 2);
|
assert(state4->file_size_p == PMA_INITIAL_SIZE_p + PMA_GROW_SIZE_p * 2);
|
||||||
assert(state4->flist->hi == state4->file_size_p);
|
assert(state4->flist->next->hi == state4->file_size_p);
|
||||||
|
|
||||||
|
|
||||||
DPUTS("== test 5: partition striping");
|
DPUTS("== test 5: partition striping");
|
||||||
|
@ -699,6 +699,9 @@ _bt_root_new(BT_meta *meta, BT_page *root)
|
|||||||
root->datk[0].fo = 0;
|
root->datk[0].fo = 0;
|
||||||
root->datk[1].va = UINT32_MAX;
|
root->datk[1].va = UINT32_MAX;
|
||||||
root->datk[1].fo = 0;
|
root->datk[1].fo = 0;
|
||||||
|
/* though we've modified the data segment, we shouldn't mark these default
|
||||||
|
values dirty because when we attempt to sync them, we'll obviously run into
|
||||||
|
problems since they aren't mapped */
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -1513,7 +1516,7 @@ _bt_insert2(BT_state *state, vaof_t lo, vaof_t hi, pgno_t fo,
|
|||||||
/* nullcond: node is a leaf */
|
/* nullcond: node is a leaf */
|
||||||
if (meta->depth == depth) {
|
if (meta->depth == depth) {
|
||||||
/* dirty the data range */
|
/* dirty the data range */
|
||||||
_bt_dirtydata(node, childidx);
|
_bt_dirtydata(node, childidx); /* ;;: I believe this is incorrect. We should just directly modify the dirty bitset in _bt_insertdat */
|
||||||
/* guaranteed non-full and dirty by n-1 recursive call, so just insert */
|
/* guaranteed non-full and dirty by n-1 recursive call, so just insert */
|
||||||
return _bt_insertdat(lo, hi, fo, node, childidx);
|
return _bt_insertdat(lo, hi, fo, node, childidx);
|
||||||
}
|
}
|
||||||
@ -2209,7 +2212,7 @@ _bt_state_restore_maps(BT_state *state)
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
_bt_state_meta_which(BT_state *state)
|
_bt_state_meta_which(BT_state *state)
|
||||||
{
|
{ /* ;;: TODO you need to mprotect writable the current metapage */
|
||||||
BT_meta *m1 = state->meta_pages[0];
|
BT_meta *m1 = state->meta_pages[0];
|
||||||
BT_meta *m2 = state->meta_pages[1];
|
BT_meta *m2 = state->meta_pages[1];
|
||||||
int which = -1;
|
int which = -1;
|
||||||
@ -2307,9 +2310,8 @@ _bt_state_read_header(BT_state *state)
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
_bt_state_meta_new(BT_state *state)
|
_bt_state_meta_new(BT_state *state)
|
||||||
#define INITIAL_ROOTPG 2
|
|
||||||
{
|
{
|
||||||
BT_page *p1, *p2, *root;
|
BT_page *p1, *p2;
|
||||||
BT_meta meta = {0};
|
BT_meta meta = {0};
|
||||||
|
|
||||||
TRACE();
|
TRACE();
|
||||||
@ -2324,9 +2326,6 @@ _bt_state_meta_new(BT_state *state)
|
|||||||
/* initialize the block base array */
|
/* initialize the block base array */
|
||||||
meta.blk_base[0] = BT_NUMMETAS;
|
meta.blk_base[0] = BT_NUMMETAS;
|
||||||
|
|
||||||
root = _bt_nalloc(state);
|
|
||||||
_bt_root_new(&meta, root);
|
|
||||||
|
|
||||||
/* initialize meta struct */
|
/* initialize meta struct */
|
||||||
meta.magic = BT_MAGIC;
|
meta.magic = BT_MAGIC;
|
||||||
meta.version = BT_VERSION;
|
meta.version = BT_VERSION;
|
||||||
@ -2335,8 +2334,6 @@ _bt_state_meta_new(BT_state *state)
|
|||||||
meta.fix_addr = BT_MAPADDR;
|
meta.fix_addr = BT_MAPADDR;
|
||||||
meta.depth = 1;
|
meta.depth = 1;
|
||||||
meta.flags = BP_META;
|
meta.flags = BP_META;
|
||||||
meta.root = _fo_get(state, root);
|
|
||||||
assert(meta.root == INITIAL_ROOTPG); /* ;;: remove?? */
|
|
||||||
|
|
||||||
/* initialize the metapages */
|
/* initialize the metapages */
|
||||||
p1 = &((BT_page *)state->map)[0];
|
p1 = &((BT_page *)state->map)[0];
|
||||||
@ -2344,9 +2341,8 @@ _bt_state_meta_new(BT_state *state)
|
|||||||
|
|
||||||
/* copy the metadata into the metapages */
|
/* copy the metadata into the metapages */
|
||||||
memcpy(METADATA(p1), &meta, sizeof meta);
|
memcpy(METADATA(p1), &meta, sizeof meta);
|
||||||
/* ;;: todo, should the second metapage actually share a .root with the
|
/* ;;: writing to the second metapage really isn't necessary and it's probably better to leave it zeroed */
|
||||||
first?? */
|
/* memcpy(METADATA(p2), &meta, sizeof meta); */
|
||||||
memcpy(METADATA(p2), &meta, sizeof meta);
|
|
||||||
|
|
||||||
/* only the active metapage should be writable (first page) */
|
/* only the active metapage should be writable (first page) */
|
||||||
if (mprotect(BT_MAPADDR, BT_META_SECTION_WIDTH, BT_PROT_CLEAN) != 0) {
|
if (mprotect(BT_MAPADDR, BT_META_SECTION_WIDTH, BT_PROT_CLEAN) != 0) {
|
||||||
@ -2361,6 +2357,19 @@ _bt_state_meta_new(BT_state *state)
|
|||||||
|
|
||||||
return BT_SUCC;
|
return BT_SUCC;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
_bt_state_meta_inject_root(BT_state *state)
|
||||||
|
#define INITIAL_ROOTPG 2
|
||||||
|
{
|
||||||
|
assert(state->nlist);
|
||||||
|
BT_meta *meta = state->meta_pages[state->which];
|
||||||
|
BT_page *root = _bt_nalloc(state);
|
||||||
|
_bt_root_new(meta, root);
|
||||||
|
meta->root = _fo_get(state, root);
|
||||||
|
assert(meta->root == INITIAL_ROOTPG);
|
||||||
|
return BT_SUCC;
|
||||||
|
}
|
||||||
#undef INITIAL_ROOTPG
|
#undef INITIAL_ROOTPG
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -2422,6 +2431,8 @@ _bt_state_map_node_segment(BT_state *state)
|
|||||||
BYTE *targ = BT_MAPADDR + BT_META_SECTION_WIDTH;
|
BYTE *targ = BT_MAPADDR + BT_META_SECTION_WIDTH;
|
||||||
size_t i;
|
size_t i;
|
||||||
|
|
||||||
|
assert(meta->blk_base[0] == BT_NUMMETAS);
|
||||||
|
|
||||||
/* map all allocated node stripes as clean */
|
/* map all allocated node stripes as clean */
|
||||||
for (i = 0
|
for (i = 0
|
||||||
; i < BT_NUMPARTS && meta->blk_base[i] != 0
|
; i < BT_NUMPARTS && meta->blk_base[i] != 0
|
||||||
@ -2500,19 +2511,18 @@ _bt_state_load(BT_state *state)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (new) {
|
||||||
|
assert(SUCC(_bt_state_meta_new(state)));
|
||||||
|
}
|
||||||
|
|
||||||
/* map the node segment */
|
/* map the node segment */
|
||||||
_bt_state_map_node_segment(state);
|
_bt_state_map_node_segment(state); /* ;;: this should follow a call to _bt_state_meta_new. hmm... but that leads to a bad dependency graph. We may need to separately initialize the first partition and only call map_node_segment on restore. */
|
||||||
|
|
||||||
/* new db, so populate metadata */
|
/* new db, so populate metadata */
|
||||||
if (new) {
|
if (new) {
|
||||||
assert(SUCC(_flist_new(state, PMA_GROW_SIZE_p)));
|
assert(SUCC(_flist_new(state, PMA_GROW_SIZE_p)));
|
||||||
assert(SUCC(_nlist_new(state)));
|
assert(SUCC(_nlist_new(state)));
|
||||||
|
assert(SUCC(_bt_state_meta_inject_root(state)));
|
||||||
if (!SUCC(rc = _bt_state_meta_new(state))) {
|
|
||||||
munmap(state->map, BT_ADDRSIZE);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(SUCC(_mlist_new(state)));
|
assert(SUCC(_mlist_new(state)));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -2729,7 +2739,7 @@ _bt_sync(BT_state *state, BT_page *node, uint8_t depth, uint8_t maxdepth)
|
|||||||
|
|
||||||
/* do dfs */
|
/* do dfs */
|
||||||
for (size_t i = 0; i < N-1; i++) {
|
for (size_t i = 0; i < N-1; i++) {
|
||||||
if (!_bt_ischilddirty(node, i))
|
if (!_bt_ischilddirty(node, i)) /* ;;: consider removing case until dirty logic is foolproof */
|
||||||
continue; /* not dirty. nothing to do */
|
continue; /* not dirty. nothing to do */
|
||||||
|
|
||||||
BT_page *child = _node_get(state, node->datk[i].fo);
|
BT_page *child = _node_get(state, node->datk[i].fo);
|
||||||
@ -2931,9 +2941,16 @@ bt_sync(BT_state *state)
|
|||||||
BT_page *root = _node_get(state, meta->root);
|
BT_page *root = _node_get(state, meta->root);
|
||||||
int rc = 0;
|
int rc = 0;
|
||||||
|
|
||||||
|
/* sync root subtrees */
|
||||||
if ((rc = _bt_sync(state, root, 1, meta->depth)))
|
if ((rc = _bt_sync(state, root, 1, meta->depth)))
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
|
/* sync root page itself */
|
||||||
|
if (msync(root, sizeof(BT_page), MS_SYNC) != 0) {
|
||||||
|
DPRINTF("msync of root node: %p failed with %s", root, strerror(errno));
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
/* merge the pending freelists */
|
/* merge the pending freelists */
|
||||||
_pending_nlist_merge(state);
|
_pending_nlist_merge(state);
|
||||||
_pending_flist_merge(state);
|
_pending_flist_merge(state);
|
||||||
|
Loading…
Reference in New Issue
Block a user