pma: mmap and mprotect revisions wip

This commit is contained in:
barter-simsum 2023-12-12 18:23:37 -05:00
parent 9271deb3d6
commit e96d7ecb4c

View File

@ -382,6 +382,7 @@ _bt_nalloc(BT_state *state)
params to the function and make actual return value an error code. This is params to the function and make actual return value an error code. This is
to avoid forcing some callers to immediately use _fo_get */ to avoid forcing some callers to immediately use _fo_get */
BT_nlistnode **n = &state->nlist; BT_nlistnode **n = &state->nlist;
BT_page *ret = 0;
for (; *n; n = &(*n)->next) { for (; *n; n = &(*n)->next) {
/* ;;: this assert is temporary. When partition striping is /* ;;: this assert is temporary. When partition striping is
@ -396,7 +397,7 @@ _bt_nalloc(BT_state *state)
BT_page *ret; BT_page *ret;
ret = (*n)->va; ret = (*n)->va;
*n = (*n)->next; *n = (*n)->next;
return ret; break;
} }
/* larger than necessary: shrink the node */ /* larger than necessary: shrink the node */
if ((*n)->sz > 1) { if ((*n)->sz > 1) {
@ -404,9 +405,12 @@ _bt_nalloc(BT_state *state)
ret = (*n)->va; ret = (*n)->va;
(*n)->sz -= 1; (*n)->sz -= 1;
(*n)->va = (*n)->va + 1; (*n)->va = (*n)->va + 1;
return ret; break;
} }
} }
/* make node writeable */
mprotect(ret, sizeof(BT_page), PROT_READ | PROT_WRITE);
} }
static int static int
@ -1494,6 +1498,7 @@ _flist_grow(BT_state *state, BT_flistnode *space)
static int static int
_flist_new(BT_state *state) _flist_new(BT_state *state)
#define FLIST_PG_START (((BT_PAGESIZE * BT_NUMMETAS) + BLK_BASE_LEN0) / BT_PAGESIZE)
{ {
BT_meta *meta = state->meta_pages[state->which]; BT_meta *meta = state->meta_pages[state->which];
BT_page *root = _node_get(state, meta->root); BT_page *root = _node_get(state, meta->root);
@ -1507,8 +1512,7 @@ _flist_new(BT_state *state)
head->next = 0; head->next = 0;
head->sz = len; head->sz = len;
head->pg = PMA_GROW_SIZE; /* ;;: should we invoke logic to expand the backing file head->pg = FLIST_PG_START;
here? probably. implement it */ /* */
state->flist = head; state->flist = head;
return BT_SUCC; return BT_SUCC;
@ -2022,15 +2026,30 @@ _bt_state_restore_maps2(BT_state *state, BT_page *node,
size_t bytelen = hiaddr - loaddr; size_t bytelen = hiaddr - loaddr;
off_t offset = P2BYTES(pg); off_t offset = P2BYTES(pg);
if (loaddr != if (pg != 0) {
mmap(loaddr, /* not freespace, map readonly data on disk */
bytelen, if (loaddr !=
PROT_READ | PROT_WRITE, mmap(loaddr,
MAP_FIXED | MAP_SHARED, bytelen,
state->data_fd, PROT_READ,
offset)) { MAP_FIXED | MAP_SHARED,
DPRINTF("mmap: failed to map at addr %p", loaddr); state->data_fd,
abort(); offset)) {
DPRINTF("mmap: failed to map at addr %p", loaddr);
abort();
}
}
else {
/* freespace, map no access */
if (loaddr !=
mmap(loaddr,
bytelen,
PROT_NONE,
MAP_FIXED | MAP_ANONYMOUS | MAP_NORESERVE,
-1, 0)) {
DPRINTF("mmap: failed to map at addr %p", loaddr);
abort();
}
} }
} }
return; return;
@ -2224,8 +2243,10 @@ _bt_state_load(BT_state *state)
} }
state->map = mmap(BT_MAPADDR, state->map = mmap(BT_MAPADDR,
BT_ADDRSIZE, BT_ADDRSIZE, /* should actually just be first 2M
PROT_READ | PROT_WRITE, stripe. and then from there
should map like it's freespace. */
PROT_READ | PROT_WRITE, /* ;;: PROT_READ */
MAP_FIXED | MAP_SHARED, MAP_FIXED | MAP_SHARED,
state->data_fd, state->data_fd,
0); 0);
@ -2392,6 +2413,13 @@ _bt_sync_meta(BT_state *state)
/* zero the new metapage's checksum */ /* zero the new metapage's checksum */
newwhich = state->which ? 0 : 1; newwhich = state->which ? 0 : 1;
newmeta = state->meta_pages[newwhich]; newmeta = state->meta_pages[newwhich];
/* make new metapage writeable */
if (mprotect(newmeta, sizeof(BT_page), PROT_READ | PROT_WRITE) != 0) {
DPRINTF("mprotect of new metapage failed with %s", strerror(errno));
abort();
}
newmeta->chk = 0; newmeta->chk = 0;
/* copy over metapage to new metapage excluding the checksum */ /* copy over metapage to new metapage excluding the checksum */
@ -2407,9 +2435,15 @@ _bt_sync_meta(BT_state *state)
newmeta->root = newrootpg; newmeta->root = newrootpg;
/* finally, switch the metapage we're referring to */ /* switch the metapage we're referring to */
state->which = newwhich; state->which = newwhich;
/* finally, make old metapage read-only */
if (mprotect(meta, sizeof(BT_page), PROT_READ) != 0) {
DPRINTF("mprotect of old metapage failed with %s", strerror(errno));
abort();
}
return BT_SUCC; return BT_SUCC;
} }
@ -2424,7 +2458,7 @@ _bt_sync(BT_state *state, BT_page *node, uint8_t depth, uint8_t maxdepth)
/* leaf */ /* leaf */
if (depth == maxdepth) { if (depth == maxdepth) {
_bt_sync_leaf(state, node); _bt_sync_leaf(state, node);
return BT_SUCC; goto e;
} }
/* do dfs */ /* do dfs */
@ -2442,10 +2476,17 @@ _bt_sync(BT_state *state, BT_page *node, uint8_t depth, uint8_t maxdepth)
if (msync(child, sizeof(BT_page), MS_SYNC)) if (msync(child, sizeof(BT_page), MS_SYNC))
return errno; return errno;
/* clean the child */ /* unset child dirty bit */
_bt_cleanchild(node, i); _bt_cleanchild(node, i);
} }
e:
/* all modifications done in node, mark it read-only */
if (mprotect(node, sizeof(BT_page), PROT_READ) != 0) {
DPRINTF("mprotect of node failed with %s", strerror(errno));
abort();
}
return BT_SUCC; return BT_SUCC;
} }
@ -2572,6 +2613,20 @@ bt_free(BT_state *state, void *lo, void *hi)
vaof_t hioff = addr2off(hi); vaof_t hioff = addr2off(hi);
_bt_insert(state, looff, hioff, 0); _bt_insert(state, looff, hioff, 0);
_mlist_insert(state, lo, hi); _mlist_insert(state, lo, hi);
/* ;;: is this correct? Shouldn't this actually happen when we merge the
pending_mlist on sync? */
size_t bytelen = (BYTE *)hi - (BYTE *)lo;
if (lo !=
mmap(lo,
bytelen,
PROT_NONE,
MAP_ANONYMOUS | MAP_FIXED | MAP_NORESERVE,
-1, 0)) {
DPRINTF("mmap: failed to map at addr %p", lo);
abort();
}
} }
// XX need to mprotect PROT_READ all ranges synced including root/meta // XX need to mprotect PROT_READ all ranges synced including root/meta
@ -2595,6 +2650,12 @@ bt_sync(BT_state *state)
if (msync(root, sizeof(BT_page), MS_SYNC)) if (msync(root, sizeof(BT_page), MS_SYNC))
return errno; return errno;
/* make root read-only */
if (mprotect(root, sizeof(BT_page), PROT_READ) != 0) {
DPRINTF("mprotect of root failed with %s", strerror(errno));
abort();
}
/* then sync the metapage */ /* then sync the metapage */
if (rc = _bt_sync_meta(state)) if (rc = _bt_sync_meta(state))
return rc; return rc;
@ -2920,3 +2981,49 @@ _bt_printnode(BT_page *node)
printf("[%5zu] %10x %10x\n", i, node->datk[i].va, node->datk[i].fo); printf("[%5zu] %10x %10x\n", i, node->datk[i].va, node->datk[i].fo);
} }
} }
/*
_bt_state_restore_maps2
if pg 0:
mmap MAP_ANONYMOUS | MAP_FIXED | MAP_NO_RESERVE
PROT_NONE
if pg !0:
mmap MAP_SHARED | MAP_FIXED
PROT_READ
------------------
the three routines that make modification to the data maps are:
bt_malloc:
MAP_SHARED | MAP_FIXED
PROT_READ | PROT_WRITE
_bt_data_cow:
MAP_SHARED | MAP_FIXED
PROT_READ | PROT_WRITE
bt_sync:
(mprotect)
PROT_READ
bt_free:
MAP_ANONYMOUS | MAP_FIXED | MAP_NO_RESERVE
PROT_NONE
-----------------
8 linear mappings (striping)
when we _bt_nalloc, mprotect(PROT_READ | PROT_WRITE)
when we free a node: mprotect(PROT_NONE)
additionally, when we sync, all allocated nodes: mprotect(PROT_READ)
*/