DWORD aligned heap allocs

This commit is contained in:
barter-simsum 2023-02-17 14:43:32 -05:00
parent abfd14c72a
commit d6261e914f
4 changed files with 233 additions and 62 deletions

View File

@ -186,4 +186,38 @@
# define c3_rename(a, b) ({ \
rename(a, b);})
/* c3_align: hi or lo align x to al
unless effective type of x is c3_w or c3_d, assumes x is a pointer.
*/
#define c3_align(x, al, hilo) \
_Generic((x), \
c3_w : c3_align_w, \
c3_d : c3_align_d, \
default : c3_align_p) \
(x, al, hilo)
typedef enum { ALHI=1, ALLO=0 } align_dir;
inline c3_w
c3_align_w(c3_w x, c3_w al, align_dir hilo) {
c3_dessert(hilo <= ALHI && hilo >= ALLO);
x += hilo * (al - 1);
x &= ~(al - 1);
return x;
}
inline c3_d
c3_align_d(c3_d x, c3_d al, align_dir hilo) {
c3_dessert(hilo <= ALHI && hilo >= ALLO);
x += hilo * (al - 1);
x &= ~(al - 1);
return x;
}
inline void*
c3_align_p(void const * p, size_t al, align_dir hilo) {
uintptr_t x = (uintptr_t)p;
c3_dessert(hilo <= ALHI && hilo >= ALLO);
x += hilo * (al - 1);
x &= ~(al - 1);
return (void*)x;
}
#endif /* ifndef C3_DEFS_H */

View File

@ -18,6 +18,9 @@ c3_w u3_Code;
// declarations of inline functions
//
c3_w c3_align_w(c3_w x, c3_w al, align_dir hilo);
c3_d c3_align_d(c3_d x, c3_d al, align_dir hilo);
void *c3_align_p(void const * p, size_t al, align_dir hilo);
void *u3a_into(c3_w x);
c3_w u3a_outa(void *p);
c3_w u3a_to_off(c3_w som);
@ -57,6 +60,20 @@ static void
_box_count(c3_ws siz_ws) { }
#endif
/* _box_vaal(): validate box alignment. no-op without C3DBG
TODO: I think validation code that might be compiled out like this,
_box_count, (others?) should have perhaps its own header and certainly its
own prefix. having to remind yourself that _box_count doesn't actually do
anything unless U3_CPU_DEBUG is defined is annoying. */
#define _box_vaal(box_u) \
do { \
c3_dessert(((uintptr_t)u3a_boxto(box_u) \
& u3a_balign-1) == 0); \
c3_dessert((((u3a_box*)(box_u))->siz_w \
& u3a_walign-1) == 0); \
} while(0)
/* _box_slot(): select the right free list to search for a block.
TODO: do we really need a loop to do this?
@ -99,6 +116,8 @@ _box_make(void* box_v, c3_w siz_w, c3_w use_w)
box_w[siz_w - 1] = siz_w; /* stor size at end of allocation as well */
box_u->use_w = use_w;
_box_vaal(box_u);
# ifdef U3_MEMORY_DEBUG
box_u->cod_w = u3_Code;
box_u->eus_w = 0;
@ -189,6 +208,8 @@ _box_free(u3a_box* box_u)
return;
}
_box_vaal(box_u);
#if 0
/* Clear the contents of the block, for debugging.
*/
@ -201,12 +222,12 @@ _box_free(u3a_box* box_u)
}
#endif
if ( c3y == u3a_is_north(u3R) ) {
if ( c3y == u3a_is_north(u3R) ) { /* north */
/* Try to coalesce with the block below.
*/
if ( box_w != u3a_into(u3R->rut_p) ) {
c3_w laz_w = *(box_w - 1);
u3a_box* pox_u = (u3a_box*)(void *)(box_w - laz_w);
c3_w laz_w = *(box_w - 1); /* the size of a box stored at the end of its allocation */
u3a_box* pox_u = (u3a_box*)(void *)(box_w - laz_w); /* the head of the adjacent box below */
if ( 0 == pox_u->use_w ) {
_box_detach(pox_u);
@ -231,8 +252,8 @@ _box_free(u3a_box* box_u)
}
_box_attach(box_u);
}
}
else {
} /* end north */
else { /* south */
/* Try to coalesce with the block above.
*/
if ( (box_w + box_u->siz_w) != u3a_into(u3R->rut_p) ) {
@ -260,7 +281,7 @@ _box_free(u3a_box* box_u)
}
_box_attach(box_u);
}
}
} /* end south */
}
/* _me_align_pad(): pad to first point after pos_p aligned at (ald_w, alp_w).
@ -301,29 +322,29 @@ _ca_box_make_hat(c3_w len_w, c3_w ald_w, c3_w alp_w, c3_w use_w)
if ( c3y == u3a_is_north(u3R) ) {
all_p = u3R->hat_p;
pad_w = _me_align_pad(all_p, ald_w, alp_w);
siz_w = (len_w + pad_w);
pad_w = c3_align(all_p, ald_w, ALHI) - all_p;
siz_w = c3_align(len_w + pad_w, u3a_walign, ALHI);
// hand-inlined: siz_w >= u3a_open(u3R)
//
if ( (siz_w >= (u3R->cap_p - u3R->hat_p)) ) {
return 0;
}
u3R->hat_p = (all_p + siz_w);
u3R->hat_p += siz_w;
}
else {
all_p = (u3R->hat_p - len_w);
pad_w = _me_align_dap(all_p, ald_w, alp_w);
siz_w = (len_w + pad_w);
all_p -= pad_w;
all_p = u3R->hat_p - len_w;
pad_w = all_p - c3_align(all_p, ald_w, ALLO);
siz_w = c3_align(len_w + pad_w, u3a_walign, ALHI);
// hand-inlined: siz_w >= u3a_open(u3R)
//
if ( siz_w >= (u3R->hat_p - u3R->cap_p) ) {
return 0;
}
u3R->hat_p = all_p;
all_p = u3R->hat_p -= siz_w;
}
return _box_make(u3a_into(all_p), siz_w, use_w);
}
@ -503,10 +524,11 @@ _ca_willoc(c3_w len_w, c3_w ald_w, c3_w alp_w)
}
else { /* we got a non-null freelist */
c3_w pad_w = _me_align_pad(*pfr_p, ald_w, alp_w);
c3_w des_w = c3_align(siz_w + pad_w, u3a_walign, ALHI);
if ( 1 == ald_w ) c3_assert(0 == pad_w);
if ( (siz_w + pad_w) > u3to(u3a_fbox, *pfr_p)->box_u.siz_w ) {
if ( (des_w) > u3to(u3a_fbox, *pfr_p)->box_u.siz_w ) {
/* This free block is too small. Continue searching.
*/
pfr_p = &(u3to(u3a_fbox, *pfr_p)->nex_p);
@ -518,7 +540,7 @@ _ca_willoc(c3_w len_w, c3_w ald_w, c3_w alp_w)
/* We have found a free block of adequate size. Remove it
** from the free list.
*/
siz_w += pad_w;
_box_count(-(box_u->siz_w));
/* misc free list consistency checks.
TODO: in the future should probably only run for C3DBG builds */
@ -549,15 +571,21 @@ _ca_willoc(c3_w len_w, c3_w ald_w, c3_w alp_w)
/* If we can chop off another block, do it.
*/
if ( (siz_w + u3a_minimum) <= box_u->siz_w ) {
if ( (des_w + u3a_minimum) <= box_u->siz_w ) {
/* Split the block.
*/
/* XXX: Despite the fact that we're making a box here, we don't
actually have to ensure it's aligned, since siz_w and all boxes
already on the loom /are/ aligned. A debug break here implies
that you broke those conditions, not that this needs to handle
alignment. abandon hope. */
c3_w* box_w = ((c3_w *)(void *)box_u);
c3_w* end_w = box_w + siz_w;
c3_w lef_w = (box_u->siz_w - siz_w);
c3_w* end_w = box_w + des_w;
c3_w lef_w = (box_u->siz_w - des_w);
_box_attach(_box_make(end_w, lef_w, 0));
return u3a_boxto(_box_make(box_w, siz_w, 1));
return u3a_boxto(_box_make(box_w, des_w, 1));
}
else {
c3_assert(0 == box_u->use_w);
@ -580,14 +608,31 @@ static void*
_ca_walloc(c3_w len_w, c3_w ald_w, c3_w alp_w)
{
void* ptr_v;
c3_w req_w; /* allocation request length */
while ( 1 ) {
ptr_v = _ca_willoc(len_w, ald_w, alp_w);
/* N.B: This odd looking logic is to generalize correct allocation lengths
requested from _ca_willoc to alignments other than DWORD. For DWORD (8
byte) aligned references, this is eq to `req_w = len_w | 1`;
ie we request an odd allocation length because c3_wiseof(u3a_box) + 1 is 3
and 3 + {1,3,5,...} % 2 == 0
This works only because when we strip off mem from the hat or from a larger
fbox, we don't strip off any more than what was requested (+ padding)
*/
req_w = len_w
+ (c3_wiseof(u3a_box) + 1)
+ u3a_walign
& ~(u3a_walign - 1)
- (c3_wiseof(u3a_box) + 1);
for (;;) {
ptr_v = _ca_willoc(req_w, ald_w, alp_w);
if ( 0 != ptr_v ) {
break;
}
_ca_reclaim_half();
}
_box_vaal(u3a_botox(ptr_v));
return ptr_v;
}
@ -614,6 +659,7 @@ u3a_walloc(c3_w len_w)
xuc_i++;
}
#endif
_box_vaal(u3a_botox(ptr_v));
return ptr_v;
}
@ -622,15 +668,24 @@ u3a_walloc(c3_w len_w)
void*
u3a_wealloc(void* lag_v, c3_w len_w)
{
c3_w req_w; /* allocation request length */
/* N.B: see related note in _ca_walloc */
req_w = len_w
+ (c3_wiseof(u3a_box) + 1)
+ u3a_walign
& ~(u3a_walign - 1)
- (c3_wiseof(u3a_box) + 1);
if ( !lag_v ) {
return u3a_malloc(len_w);
return u3a_malloc(req_w);
}
else {
u3a_box* box_u = u3a_botox(lag_v);
c3_w* old_w = lag_v;
c3_w tiz_w = c3_min(box_u->siz_w, len_w);
c3_w tiz_w = c3_min(box_u->siz_w, req_w);
{
c3_w* new_w = u3a_walloc(len_w);
c3_w* new_w = u3a_walloc(req_w);
c3_w i_w;
for ( i_w = 0; i_w < tiz_w; i_w++ ) {
@ -670,23 +725,37 @@ u3a_wfree(void* tox_v)
}
/* u3a_wtrim(): trim storage.
old_w - old length
len_w - new length
*/
void
u3a_wtrim(void* tox_v, c3_w old_w, c3_w len_w)
{
c3_w* nov_w = tox_v;
if ( (old_w > len_w)
&& ((old_w - len_w) >= u3a_minimum) )
{
c3_w* box_w = (void *)u3a_botox(nov_w);
c3_w* end_w = (nov_w + len_w + 1);
c3_w asz_w = (end_w - box_w);
c3_w bsz_w = box_w[0] - asz_w;
if ( (old_w > len_w)
&& ((old_w - len_w) >= u3a_minimum) )
{
u3a_box* box_u = u3a_botox(nov_w);
c3_w* box_w = (void*)u3a_botox(nov_w);
_box_attach(_box_make(end_w, bsz_w, 0));
c3_w* end_w = c3_align(nov_w + len_w + 1, /* +1 for trailing allocation size */
u3a_balign,
ALHI);
box_w[0] = asz_w;
c3_w asz_w = (end_w - box_w); /* total size in words of new allocation */
if (box_u->siz_w <= asz_w) return;
c3_w bsz_w = box_u->siz_w - asz_w; /* size diff in words between old and new */
c3_dessert(asz_w && ((asz_w & u3a_walign-1) == 0)); /* new allocation size must be non-zero and DWORD multiple */
c3_dessert(end_w < (box_w + box_u->siz_w)); /* desired alloc end must not exceed existing boundaries */
c3_dessert(((uintptr_t)end_w & u3a_balign-1) == 0); /* address of box getting freed must be DWORD aligned */
c3_dessert((bsz_w & u3a_walign-1) == 0); /* size of box getting freed must be DWORD multiple */
_box_attach(_box_make(end_w, bsz_w, 0)); /* free the unneeded space */
box_u->siz_w = asz_w;
box_w[asz_w - 1] = asz_w;
}
}
@ -732,6 +801,9 @@ u3a_malloc(size_t len_i)
}
/* u3a_cellblock(): allocate a block of cells on the hat.
XXX beware when we stop boxing cells and QWORD align references. Alignment
not guaranteed to be preserved after a call.
*/
static c3_o
u3a_cellblock(c3_w num_w)
@ -756,7 +828,7 @@ u3a_cellblock(c3_w num_w)
// hand inline of _box_make(u3a_into(all_p), u3a_minimum, 1)
{
box_w[0] = u3a_minimum;
box_u->siz_w = u3a_minimum;
box_w[u3a_minimum - 1] = u3a_minimum;
box_u->use_w = 1;
#ifdef U3_MEMORY_DEBUG
@ -791,7 +863,7 @@ u3a_cellblock(c3_w num_w)
// hand inline of _box_make(u3a_into(all_p), u3a_minimum, 1);
{
box_w[0] = u3a_minimum;
box_u->siz_w = u3a_minimum;
box_w[u3a_minimum - 1] = u3a_minimum;
box_u->use_w = 1;
# ifdef U3_MEMORY_DEBUG
@ -812,6 +884,7 @@ u3a_cellblock(c3_w num_w)
}
/* u3a_celloc(): allocate a cell.
XXX beware when we stop boxing cells and QWORD align references
*/
c3_w*
u3a_celloc(void)

View File

@ -9,6 +9,19 @@
*/
# define u3a_bits U3_OS_LoomBits
/* u3a_vits: number of virtual bits in a noun reference
gained via alignment + shifting
*/
# define u3a_vits ((c3_y)1)
/* u3a_walign: references into the loom are guaranteed to be word-aligned to:
*/
# define u3a_walign ((c3_y)1 << u3a_vits)
/* u3a_balign: u3a_walign in bytes
*/
# define u3a_balign (sizeof(c3_w)*u3a_walign)
/* u3a_page: number of bits in word-addressed page. 12 == 16Kbyte page.
*/
# define u3a_page 12
@ -40,7 +53,7 @@
/* u3a_fbox_no: number of free lists per size.
*/
# define u3a_fbox_no 27
# define u3a_fbox_no 27 /* why 27? Perhaps because 16 = 1 << 4 and 31 - 4 = 27 */
/** Structures.
@ -335,6 +348,30 @@
: (u3a_botox(u3a_to_ptr(som))->use_w == 1) \
? c3y : c3n )
/* like _box_vaal but for rods. Again, probably want to prefix validation
functions at the very least. Maybe they can be defined in their own header.
ps. while arguably cooler to have this compile to
do {(void(0));(void(0));} while(0)
It may be nicer to just wrap an inline function in #ifdef C3DBG guards. You
could even return the then validated road like
u3a_road f() {
u3a_road rod_u;
...
return _rod_vaal(rod_u);
}
*/
# define _rod_vaal(rod_u) \
do { \
c3_dessert(((uintptr_t)((u3a_road*)(rod_u))->hat_p \
& u3a_walign-1) == 0); \
} while(0)
/** Globals.
**/
/// Current road (thread-local).

View File

@ -483,11 +483,12 @@ _pave_parts(void)
u3R->byc.har_p = u3h_new();
}
/* _pave_road(): initialize road boundaries
/* _pave_road(): writes road boundaries to loom mem (stored at mat_w)
*/
static u3_road*
_pave_road(c3_w* rut_w, c3_w* mat_w, c3_w* cap_w, c3_w siz_w)
{
c3_dessert(((uintptr_t)rut_w & u3a_balign-1) == 0);
u3_road* rod_u = (void*) mat_w;
// enable in case of corruption
@ -504,10 +505,17 @@ _pave_road(c3_w* rut_w, c3_w* mat_w, c3_w* cap_w, c3_w siz_w)
rod_u->mat_p = u3of(c3_w, mat_w); // stack bottom
rod_u->cap_p = u3of(c3_w, cap_w); // stack top
_rod_vaal(rod_u);
return rod_u;
}
/* _pave_north(): calculate boundaries and initialize north road.
mem_w - the "beginning" of your loom (its lowest address). Corresponds to rut
in a north road.
siz_w - the size in bytes of your road record (or home record in the case of
paving home).
len_w - size of your loom in words
*/
static u3_road*
_pave_north(c3_w* mem_w, c3_w siz_w, c3_w len_w)
@ -518,14 +526,23 @@ _pave_north(c3_w* mem_w, c3_w siz_w, c3_w len_w)
// the stack starts at the end of the memory segment,
// minus space for the road structure [siz_w]
//
c3_w* rut_w = mem_w;
c3_w* mat_w = ((mem_w + len_w) - siz_w);
// 00~~~|R|---|H|######|C|+++|M|~~~FF
// ^--u3R which _pave_road returns (u3H for home road)
//
c3_w* mat_w = c3_align(mem_w + len_w - siz_w, u3a_balign, ALLO);
c3_w* rut_w = c3_align(mem_w, u3a_balign, ALHI);
c3_w* cap_w = mat_w;
return _pave_road(rut_w, mat_w, cap_w, siz_w);
}
/* _pave_south(): calculate boundaries and initialize south road.
mem_w - the "beginning" of your loom (its lowest address). Corresponds to mat
in a south road.
siz_w - the size in bytes of your road record (or home record in the case of
paving home).
len_w - size of your loom in words
*/
static u3_road*
_pave_south(c3_w* mem_w, c3_w siz_w, c3_w len_w)
@ -536,8 +553,10 @@ _pave_south(c3_w* mem_w, c3_w siz_w, c3_w len_w)
// the stack starts at the base memory pointer [mem_w],
// and ends after the space for the road structure [siz_w]
//
c3_w* rut_w = (mem_w + len_w);
c3_w* mat_w = mem_w;
// 00~~~|M|+++|C|######|H|---|R|~~~FFF
// ^---u3R which _pave_road returns
c3_w* mat_w = c3_align(mem_w, u3a_balign, ALHI);
c3_w* rut_w = c3_align(mem_w + len_w, u3a_balign, ALLO);
c3_w* cap_w = mat_w + siz_w;
return _pave_road(rut_w, mat_w, cap_w, siz_w);
@ -548,9 +567,9 @@ _pave_south(c3_w* mem_w, c3_w siz_w, c3_w len_w)
static void
_pave_home(void)
{
c3_w* mem_w = u3_Loom + 1;
c3_w* mem_w = u3_Loom + u3a_walign;
c3_w siz_w = c3_wiseof(u3v_home);
c3_w len_w = u3C.wor_i - 1;
c3_w len_w = u3C.wor_i - u3a_walign;
u3H = (void *)_pave_north(mem_w, siz_w, len_w);
u3H->ver_w = u3v_version;
@ -569,9 +588,10 @@ _find_home(void)
{
// NB: the home road is always north
//
c3_w* mem_w = u3_Loom + 1;
c3_w* mem_w = u3_Loom + u3a_walign;
c3_w siz_w = c3_wiseof(u3v_home);
c3_w len_w = u3C.wor_i - 1;
c3_w len_w = u3C.wor_i - u3a_walign;
c3_w* mat_w;
{
c3_w ver_w = *((mem_w + len_w) - 1);
@ -585,13 +605,16 @@ _find_home(void)
}
}
u3H = (void *)((mem_w + len_w) - siz_w);
mat_w = c3_align(mem_w + len_w - siz_w, u3a_balign, ALLO);
u3H = (void *)mat_w;
u3R = &u3H->rod_u;
// this looks risky, but there are no legitimate scenarios
// where it's wrong
//
u3R->cap_p = u3R->mat_p = u3C.wor_i - c3_wiseof(*u3H);
_rod_vaal(u3R);
}
/* u3m_pave(): instantiate or activate image.
@ -780,9 +803,11 @@ u3m_error(c3_c* str_c)
void
u3m_leap(c3_w pad_w)
{
c3_w len_w;
c3_w len_w; /* the length of the new road (avail - (pad [4M] + wiseof(u3a_road))) */
u3_road* rod_u;
_rod_vaal(u3R);
/* Measure the pad - we'll need it.
*/
{
@ -795,40 +820,40 @@ u3m_leap(c3_w pad_w)
}
#endif
if ( (pad_w + c3_wiseof(u3a_road)) >= u3a_open(u3R) ) {
/* not enough storage to leap */
u3m_bail(c3__meme);
}
len_w = u3a_open(u3R) - (pad_w + c3_wiseof(u3a_road));
pad_w += c3_wiseof(u3a_road);
len_w = u3a_open(u3R) - pad_w;
c3_align(len_w, u3a_walign, ALHI);
}
/* Allocate a region on the cap.
*/
{
u3p(c3_w) bot_p;
u3p(c3_w) bot_p; /* S: bot_p = new mat. N: bot_p = new rut */
if ( c3y == u3a_is_north(u3R) ) {
bot_p = (u3R->cap_p - len_w);
u3R->cap_p -= len_w;
bot_p = u3R->hat_p + pad_w;
rod_u = _pave_south(u3a_into(bot_p), c3_wiseof(u3a_road), len_w);
u3e_ward(rod_u->cap_p, rod_u->hat_p);
#if 0
fprintf(stderr, "leap: from north %p (cap 0x%x), to south %p\r\n",
u3R,
u3R->cap_p + len_w,
rod_u);
fprintf(stderr, "NPAR.hat_p: 0x%x %p, SKID.hat_p: 0x%x %p\r\n",
u3R->hat_p, u3a_into(u3R->hat_p),
rod_u->hat_p, u3a_into(rod_u->hat_p));
#endif
}
else {
bot_p = u3R->cap_p;
u3R->cap_p += len_w;
rod_u = _pave_north(u3a_into(bot_p), c3_wiseof(u3a_road), len_w);
u3e_ward(rod_u->hat_p, rod_u->cap_p);
#if 0
fprintf(stderr, "leap: from south %p (cap 0x%x), to north %p\r\n",
u3R,
u3R->cap_p - len_w,
rod_u);
fprintf(stderr, "SPAR.hat_p: 0x%x %p, NKID.hat_p: 0x%x %p\r\n",
u3R->hat_p, u3a_into(u3R->hat_p),
rod_u->hat_p, u3a_into(rod_u->hat_p));
#endif
}
}
@ -850,6 +875,8 @@ u3m_leap(c3_w pad_w)
#ifdef U3_MEMORY_DEBUG
rod_u->all.fre_w = 0;
#endif
_rod_vaal(u3R);
}
void
@ -1304,7 +1331,7 @@ u3m_soft(c3_w mil_w,
{
u3_noun why;
why = u3m_soft_top(mil_w, (1 << 20), fun_f, arg); // 2MB pad
why = u3m_soft_top(mil_w, (1 << 20), fun_f, arg); // 4M pad
if ( 0 == u3h(why) ) {
return why;