mirror of
https://github.com/urbit/ares.git
synced 2024-11-22 15:08:54 +03:00
pma: fix regression on restore
There was a regression introduced with partition striping that caused problems on restore due to node partitions incidentally getting mapped anonymous rather than to the backing file
This commit is contained in:
parent
a245328266
commit
93a788ec76
@ -157,30 +157,6 @@ int main(int argc, char *argv[])
|
||||
BT_findpath path = {0};
|
||||
int rc = 0;
|
||||
|
||||
/* broken with recent changes. Maybe because we aren't mmapping the data
|
||||
ranges (pure _bt_insert) */
|
||||
#if 0
|
||||
|
||||
DPUTS("== test 1: insert");
|
||||
|
||||
bt_state_new(&state1);
|
||||
if (mkdir("./pmatest1", 0774) == -1)
|
||||
return errno;
|
||||
assert(SUCC(bt_state_open(state1, "./pmatest1", 0, 0644)));
|
||||
|
||||
#define LOWEST_ADDR 0x2aaa80;
|
||||
vaof_t lo = LOWEST_ADDR;
|
||||
vaof_t hi = 0xDEADBEEF;
|
||||
pgno_t pg = 1; /* dummy value */
|
||||
for (size_t i = 0; i < BT_DAT_MAXKEYS * 4; ++i) {
|
||||
_bt_insert(state1, lo, hi, pg);
|
||||
_test_nodeinteg(state1, &path, lo, hi, pg);
|
||||
lo++; pg++;
|
||||
}
|
||||
|
||||
bt_state_close(state1);
|
||||
#endif
|
||||
|
||||
|
||||
DPUTS("== test 2: malloc");
|
||||
BT_state *state2;
|
||||
@ -340,7 +316,7 @@ int main(int argc, char *argv[])
|
||||
assert(SUCC(bt_state_open(state4, "./pmatest4", 0, 0644)));
|
||||
|
||||
assert(state4->file_size_p == PMA_INITIAL_SIZE_p + PMA_GROW_SIZE_p * 2);
|
||||
assert(state4->flist->hi == state4->file_size_p);
|
||||
assert(state4->flist->next->hi == state4->file_size_p);
|
||||
|
||||
|
||||
DPUTS("== test 5: partition striping");
|
||||
|
@ -699,6 +699,9 @@ _bt_root_new(BT_meta *meta, BT_page *root)
|
||||
root->datk[0].fo = 0;
|
||||
root->datk[1].va = UINT32_MAX;
|
||||
root->datk[1].fo = 0;
|
||||
/* though we've modified the data segment, we shouldn't mark these default
|
||||
values dirty because when we attempt to sync them, we'll obviously run into
|
||||
problems since they aren't mapped */
|
||||
}
|
||||
|
||||
static int
|
||||
@ -1513,7 +1516,7 @@ _bt_insert2(BT_state *state, vaof_t lo, vaof_t hi, pgno_t fo,
|
||||
/* nullcond: node is a leaf */
|
||||
if (meta->depth == depth) {
|
||||
/* dirty the data range */
|
||||
_bt_dirtydata(node, childidx);
|
||||
_bt_dirtydata(node, childidx); /* ;;: I believe this is incorrect. We should just directly modify the dirty bitset in _bt_insertdat */
|
||||
/* guaranteed non-full and dirty by n-1 recursive call, so just insert */
|
||||
return _bt_insertdat(lo, hi, fo, node, childidx);
|
||||
}
|
||||
@ -2209,7 +2212,7 @@ _bt_state_restore_maps(BT_state *state)
|
||||
|
||||
static int
|
||||
_bt_state_meta_which(BT_state *state)
|
||||
{
|
||||
{ /* ;;: TODO you need to mprotect writable the current metapage */
|
||||
BT_meta *m1 = state->meta_pages[0];
|
||||
BT_meta *m2 = state->meta_pages[1];
|
||||
int which = -1;
|
||||
@ -2307,9 +2310,8 @@ _bt_state_read_header(BT_state *state)
|
||||
|
||||
static int
|
||||
_bt_state_meta_new(BT_state *state)
|
||||
#define INITIAL_ROOTPG 2
|
||||
{
|
||||
BT_page *p1, *p2, *root;
|
||||
BT_page *p1, *p2;
|
||||
BT_meta meta = {0};
|
||||
|
||||
TRACE();
|
||||
@ -2324,9 +2326,6 @@ _bt_state_meta_new(BT_state *state)
|
||||
/* initialize the block base array */
|
||||
meta.blk_base[0] = BT_NUMMETAS;
|
||||
|
||||
root = _bt_nalloc(state);
|
||||
_bt_root_new(&meta, root);
|
||||
|
||||
/* initialize meta struct */
|
||||
meta.magic = BT_MAGIC;
|
||||
meta.version = BT_VERSION;
|
||||
@ -2335,8 +2334,6 @@ _bt_state_meta_new(BT_state *state)
|
||||
meta.fix_addr = BT_MAPADDR;
|
||||
meta.depth = 1;
|
||||
meta.flags = BP_META;
|
||||
meta.root = _fo_get(state, root);
|
||||
assert(meta.root == INITIAL_ROOTPG); /* ;;: remove?? */
|
||||
|
||||
/* initialize the metapages */
|
||||
p1 = &((BT_page *)state->map)[0];
|
||||
@ -2344,9 +2341,8 @@ _bt_state_meta_new(BT_state *state)
|
||||
|
||||
/* copy the metadata into the metapages */
|
||||
memcpy(METADATA(p1), &meta, sizeof meta);
|
||||
/* ;;: todo, should the second metapage actually share a .root with the
|
||||
first?? */
|
||||
memcpy(METADATA(p2), &meta, sizeof meta);
|
||||
/* ;;: writing to the second metapage really isn't necessary and it's probably better to leave it zeroed */
|
||||
/* memcpy(METADATA(p2), &meta, sizeof meta); */
|
||||
|
||||
/* only the active metapage should be writable (first page) */
|
||||
if (mprotect(BT_MAPADDR, BT_META_SECTION_WIDTH, BT_PROT_CLEAN) != 0) {
|
||||
@ -2361,6 +2357,19 @@ _bt_state_meta_new(BT_state *state)
|
||||
|
||||
return BT_SUCC;
|
||||
}
|
||||
|
||||
static int
|
||||
_bt_state_meta_inject_root(BT_state *state)
|
||||
#define INITIAL_ROOTPG 2
|
||||
{
|
||||
assert(state->nlist);
|
||||
BT_meta *meta = state->meta_pages[state->which];
|
||||
BT_page *root = _bt_nalloc(state);
|
||||
_bt_root_new(meta, root);
|
||||
meta->root = _fo_get(state, root);
|
||||
assert(meta->root == INITIAL_ROOTPG);
|
||||
return BT_SUCC;
|
||||
}
|
||||
#undef INITIAL_ROOTPG
|
||||
|
||||
static void
|
||||
@ -2422,6 +2431,8 @@ _bt_state_map_node_segment(BT_state *state)
|
||||
BYTE *targ = BT_MAPADDR + BT_META_SECTION_WIDTH;
|
||||
size_t i;
|
||||
|
||||
assert(meta->blk_base[0] == BT_NUMMETAS);
|
||||
|
||||
/* map all allocated node stripes as clean */
|
||||
for (i = 0
|
||||
; i < BT_NUMPARTS && meta->blk_base[i] != 0
|
||||
@ -2500,19 +2511,18 @@ _bt_state_load(BT_state *state)
|
||||
}
|
||||
}
|
||||
|
||||
if (new) {
|
||||
assert(SUCC(_bt_state_meta_new(state)));
|
||||
}
|
||||
|
||||
/* map the node segment */
|
||||
_bt_state_map_node_segment(state);
|
||||
_bt_state_map_node_segment(state); /* ;;: this should follow a call to _bt_state_meta_new. hmm... but that leads to a bad dependency graph. We may need to separately initialize the first partition and only call map_node_segment on restore. */
|
||||
|
||||
/* new db, so populate metadata */
|
||||
if (new) {
|
||||
assert(SUCC(_flist_new(state, PMA_GROW_SIZE_p)));
|
||||
assert(SUCC(_nlist_new(state)));
|
||||
|
||||
if (!SUCC(rc = _bt_state_meta_new(state))) {
|
||||
munmap(state->map, BT_ADDRSIZE);
|
||||
return rc;
|
||||
}
|
||||
|
||||
assert(SUCC(_bt_state_meta_inject_root(state)));
|
||||
assert(SUCC(_mlist_new(state)));
|
||||
}
|
||||
else {
|
||||
@ -2729,7 +2739,7 @@ _bt_sync(BT_state *state, BT_page *node, uint8_t depth, uint8_t maxdepth)
|
||||
|
||||
/* do dfs */
|
||||
for (size_t i = 0; i < N-1; i++) {
|
||||
if (!_bt_ischilddirty(node, i))
|
||||
if (!_bt_ischilddirty(node, i)) /* ;;: consider removing case until dirty logic is foolproof */
|
||||
continue; /* not dirty. nothing to do */
|
||||
|
||||
BT_page *child = _node_get(state, node->datk[i].fo);
|
||||
@ -2931,9 +2941,16 @@ bt_sync(BT_state *state)
|
||||
BT_page *root = _node_get(state, meta->root);
|
||||
int rc = 0;
|
||||
|
||||
/* sync root subtrees */
|
||||
if ((rc = _bt_sync(state, root, 1, meta->depth)))
|
||||
return rc;
|
||||
|
||||
/* sync root page itself */
|
||||
if (msync(root, sizeof(BT_page), MS_SYNC) != 0) {
|
||||
DPRINTF("msync of root node: %p failed with %s", root, strerror(errno));
|
||||
abort();
|
||||
}
|
||||
|
||||
/* merge the pending freelists */
|
||||
_pending_nlist_merge(state);
|
||||
_pending_flist_merge(state);
|
||||
|
Loading…
Reference in New Issue
Block a user