diff --git a/Makefile b/Makefile index def67dfa67..6e0770f39f 100644 --- a/Makefile +++ b/Makefile @@ -59,7 +59,7 @@ INCLUDE=i MDEFINES=-DU3_OS_$(OS) -DU3_OS_ENDIAN_$(ENDIAN) -D U3_LIB=\"$(LIB)\" # NOTFORCHECKIN - restore -O3 -CFLAGS= $(COSFLAGS) -g -msse3 -ffast-math \ +CFLAGS= $(COSFLAGS) -O3 -msse3 -ffast-math \ -funsigned-char \ -I/usr/local/include \ -I/opt/local/include \ diff --git a/i/n/a.h b/i/n/a.h index 1e24a9c47b..f13590230f 100644 --- a/i/n/a.h +++ b/i/n/a.h @@ -311,29 +311,31 @@ **/ /** Allocation. **/ - /* Basic allocation. + /* Word-aligned allocation. */ /* u3a_walloc(): allocate storage measured in words. */ void* u3a_walloc(c3_w len_w); - /* u3a_malloc(): allocate storage measured in bytes. - */ - void* - u3a_malloc(size_t len_i); - - /* u3a_free(): free storage. + /* u3a_drop(): free storage. */ void - u3a_free(void* lag_v); + u3a_drop(void* lag_v); /* u3a_wealloc(): word realloc. */ void* u3a_wealloc(void* lag_v, c3_w len_w); - /* u3a_realloc(): byte realloc. + /* C-style aligned allocation - *not* compatible with above. + */ + /* u3a_malloc(): aligned storage measured in bytes. + */ + void* + u3a_malloc(size_t len_i); + + /* u3a_realloc(): aligned realloc in bytes. */ void* u3a_realloc(void* lag_v, size_t len_i); @@ -343,12 +345,16 @@ void* u3a_realloc2(void* lag_v, size_t old_i, size_t new_i); + /* u3a_free(): free for aligned malloc. + */ + void + u3a_free(void* tox_v); + /* u3a_free2(): gmp-shaped free. */ void u3a_free2(void* tox_v, size_t siz_i); - /* Reference and arena control. */ /* u3a_gain(): gain a reference count in normal space. diff --git a/n/a.c b/n/a.c index 1cb256fcbd..00d2c2f7e4 100644 --- a/n/a.c +++ b/n/a.c @@ -115,29 +115,75 @@ _box_detach(u3a_box* box_u) } } -/* _me_road_all_hat(): in u3R, allocate directly on the hat. +/* _me_align_pad(): pad to first point after pos_p aligned at (ald_w, alp_w). */ -static c3_w* -_me_road_all_hat(c3_w len_w) +static __inline__ c3_w +_me_align_pad(u3_post pos_p, c3_w ald_w, c3_w alp_w) { - if ( len_w > u3a_open(u3R) ) { - u3m_bail(c3__meme); return 0; - } + c3_w adj_w = (ald_w - (alp_w + 1)); + c3_p off_p = (pos_p + adj_w); + c3_p orp_p = off_p &~ (ald_w - 1); + c3_p fin_p = orp_p + alp_w; + c3_w pad_w = (fin_p - pos_p); - if ( c3y == u3a_is_north(u3R) ) { - u3_post all_p; + return pad_w; +} + +/* _me_align_dap(): pad to last point before pos_p aligned at (ald_w, alp_w). +*/ +static __inline__ c3_w +_me_align_dap(u3_post pos_p, c3_w ald_w, c3_w alp_w) +{ + c3_w adj_w = alp_w; + c3_p off_p = (pos_p - adj_w); + c3_p orp_p = (off_p &~ (ald_w - 1)); + c3_p fin_p = orp_p + alp_w; + c3_w pad_w = (pos_p - fin_p); + + return pad_w; +} + +/* _ca_box_make_hat(): in u3R, allocate directly on the hat. +*/ +static u3a_box* +_ca_box_make_hat(c3_w len_w, c3_w ald_w, c3_w alp_w, c3_w use_w) +{ + c3_w pad_w; + u3_post all_p; + if ( c3y == u3a_is_north(u3R) ) { all_p = u3R->hat_p; - u3R->hat_p += len_w; + pad_w = _me_align_pad(all_p, ald_w, alp_w); - return u3a_into(all_p); + u3R->hat_p += (len_w + pad_w); + + if ( u3R->hat_p >= u3R->cap_p ) { + u3m_bail(c3__meme); return 0; + } } else { - u3R->hat_p -= len_w; - return u3a_into(u3R->hat_p); + all_p = (u3R->hat_p - len_w); + pad_w = _me_align_dap(all_p, ald_w, alp_w); + all_p -= pad_w; + u3R->hat_p = all_p; + + if ( u3R->hat_p <= u3R->cap_p ) { + u3m_bail(c3__meme); return 0; + } } + return _box_make(u3a_into(all_p), (len_w + pad_w), use_w); } +#if 0 +/* _me_road_all_hat(): in u3R, allocate directly on the hat. +*/ +static u3a_box* +_ca_box_make_hat(c3_w len_w, c3_w alm_w, c3_w use_w) +{ + return _box_make(_me_road_all_hat(len_w), len_w, use_w); +} +#endif + #if 0 // not yet used /* _me_road_all_cap(): in u3R, allocate directly on the cap. */ @@ -193,11 +239,13 @@ u3a_sane(void) /* _ca_walloc(): u3a_walloc() internals. */ static void* -_ca_walloc(c3_w len_w) +_ca_walloc(c3_w len_w, c3_w ald_w, c3_w alp_w) { c3_w siz_w = c3_max(u3a_minimum, u3a_boxed(len_w)); c3_w sel_w = _box_slot(siz_w); + alp_w = (alp_w + c3_wiseof(u3a_box)) % ald_w; + // XX: this logic is totally bizarre, but preserve it. // if ( (sel_w != 0) && (sel_w != u3a_fbox_no - 1) ) { @@ -217,11 +265,15 @@ _ca_walloc(c3_w len_w) else { /* Nothing in top free list. Chip away at the hat. */ - return u3a_boxto(_box_make(_me_road_all_hat(siz_w), siz_w, 1)); + return u3a_boxto(_ca_box_make_hat(siz_w, ald_w, alp_w, 1)); } } else { - if ( siz_w > u3to(u3a_fbox, *pfr_p)->box_u.siz_w ) { + c3_w pad_w = _me_align_pad(*pfr_p, ald_w, alp_w); + + if ( 1 == ald_w ) c3_assert(0 == pad_w); + + if ( (siz_w + pad_w) > u3to(u3a_fbox, *pfr_p)->box_u.siz_w ) { /* This free block is too small. Continue searching. */ pfr_p = &(u3to(u3a_fbox, *pfr_p)->nex_p); @@ -233,6 +285,7 @@ _ca_walloc(c3_w len_w) /* We have found a free block of adequate size. Remove it ** from the free list. */ + siz_w += pad_w; { { c3_assert((0 == u3to(u3a_fbox, *pfr_p)->pre_p) || @@ -285,7 +338,7 @@ int FOO; void* u3a_walloc(c3_w len_w) { - void* ptr_v = _ca_walloc(len_w); + void* ptr_v = _ca_walloc(len_w, 1, 0); #if 0 if ( (703 == u3_Code) && @@ -325,16 +378,16 @@ u3a_wealloc(void* lag_v, c3_w len_w) for ( i_w = 0; i_w < tiz_w; i_w++ ) { new_w[i_w] = old_w[i_w]; } - u3a_free(lag_v); + u3a_drop(lag_v); return new_w; } } } -/* u3a_free(): free storage. +/* u3a_drop(): free storage. */ void -u3a_free(void* tox_v) +u3a_drop(void* tox_v) { u3a_box* box_u = u3a_botox(tox_v); c3_w* box_w = (c3_w *)(void *)box_u; @@ -417,6 +470,7 @@ u3a_free(void* tox_v) } } +#if 0 /* u3a_malloc(): allocate storage measured in bytes. */ void* @@ -437,6 +491,54 @@ u3a_realloc(void* lag_v, size_t len_i) return u3a_wealloc(lag_v, (len_w + 3) >> 2); } +#else +/* u3a_malloc(): aligned storage measured in bytes. +*/ +void* +u3a_malloc(size_t len_i) +{ + c3_w len_w = (c3_w)((len_i + 3) >> 2); + c3_w* ptr_w = _ca_walloc(len_w + 1, 4, 3); + u3_post ptr_p = u3a_outa(ptr_w); + c3_w pad_w = _me_align_pad(ptr_p, 4, 3); + c3_w* out_w = u3a_into(ptr_p + pad_w + 1); + + out_w[-1] = pad_w; + return out_w; +} + +/* u3a_realloc(): aligned realloc in bytes. +*/ +void* +u3a_realloc(void* lag_v, size_t len_i) +{ + if ( !lag_v ) { + return u3a_malloc(len_i); + } + else { + c3_w len_w = (c3_w)((len_i + 3) >> 2); + c3_w* lag_w = lag_v; + c3_w pad_w = lag_w[-1]; + c3_w* org_w = lag_w - (pad_w + 1); + u3a_box* box_u = u3a_botox((void *)org_w); + c3_w* old_w = lag_v; + c3_w tiz_w = c3_min(box_u->siz_w, len_w); + { + c3_w* new_w = u3a_malloc(len_i); + c3_w i_w; + + for ( i_w = 0; i_w < tiz_w; i_w++ ) { + new_w[i_w] = old_w[i_w]; + } + u3a_drop(org_w); + return new_w; + } + } + c3_w len_w = (c3_w)len_i; + + return u3a_wealloc(lag_v, (len_w + 3) >> 2); +} + /* u3a_realloc2(): gmp-shaped realloc. */ void* @@ -445,13 +547,27 @@ u3a_realloc2(void* lag_v, size_t old_i, size_t new_i) return u3a_realloc(lag_v, new_i); } +/* u3a_free(): free for aligned malloc. +*/ +void +u3a_free(void* tox_v) +{ + c3_w* tox_w = tox_v; + c3_w pad_w = tox_w[-1]; + c3_w* org_w = tox_w - (pad_w + 1); + + // printf("free %p %p\r\n", org_w, tox_w); + u3a_drop(org_w); +} + /* u3a_free2(): gmp-shaped free. */ void u3a_free2(void* tox_v, size_t siz_i) { - u3a_free(tox_v); + return u3a_free(tox_v); } +#endif #if 1 /* _me_wash_north(): clean up mug slots after copy. @@ -946,14 +1062,14 @@ top: if ( !_(u3a_is_cat(h_dog)) ) { _me_lose_north(h_dog); } - u3a_free(dog_w); + u3a_drop(dog_w); if ( !_(u3a_is_cat(t_dog)) ) { dog = t_dog; goto top; } } else { - u3a_free(dog_w); + u3a_drop(dog_w); } } } @@ -986,14 +1102,14 @@ top: if ( !_(u3a_is_cat(h_dog)) ) { _me_lose_south(h_dog); } - u3a_free(dog_w); + u3a_drop(dog_w); if ( !_(u3a_is_cat(t_dog)) ) { dog = t_dog; goto top; } } else { - u3a_free(dog_w); + u3a_drop(dog_w); } } } @@ -1340,7 +1456,7 @@ u3a_moot(c3_w* sal_w) if ( 1 == len_w ) { if ( _(u3a_is_cat(las_w)) ) { - u3a_free(nov_w); + u3a_drop(nov_w); return las_w; } @@ -1405,7 +1521,7 @@ u3a_mint(c3_w* sal_w, c3_w len_w) /* See if we can free the slab entirely. */ if ( len_w == 0 ) { - u3a_free(nov_w); + u3a_drop(nov_w); return 0; } @@ -1413,7 +1529,7 @@ u3a_mint(c3_w* sal_w, c3_w len_w) c3_w low_w = nov_u->buf_w[0]; if ( _(u3a_is_cat(low_w)) ) { - u3a_free(nov_w); + u3a_drop(nov_w); return low_w; } diff --git a/n/e.c b/n/e.c index eaf4800c8b..557434388e 100644 --- a/n/e.c +++ b/n/e.c @@ -883,7 +883,7 @@ u3e_init(c3_o chk_o) /* Make sure GMP uses our malloc. */ - // mp_set_memory_functions(u3a_malloc, u3a_realloc2, u3a_free2); + mp_set_memory_functions(u3a_malloc, u3a_realloc2, u3a_free2); /* Map at fixed address. */ diff --git a/n/h.c b/n/h.c index 4595c96c4e..6e77782186 100644 --- a/n/h.c +++ b/n/h.c @@ -72,7 +72,7 @@ _ch_buck_add(u3h_buck* hab_u, u3_noun kev) bah_u->kev[i_w + 1] = hab_u->kev[i_w]; } - u3a_free(hab_u); + u3a_drop(hab_u); return bah_u; } } @@ -150,7 +150,7 @@ _ch_node_add(u3h_node* han_u, c3_w lef_w, c3_w rem_w, u3_noun kev) for ( i_w = inx_w; i_w < len_w; i_w++ ) { nah_u->sot_w[i_w + 1] = han_u->sot_w[i_w]; } - u3a_free(han_u); + u3a_drop(han_u); return nah_u; } } @@ -489,7 +489,7 @@ _ch_free_buck(u3h_buck* hab_u) for ( i_w = 0; i_w < hab_u->len_w; i_w++ ) { u3a_lose(hab_u->kev[i_w]); } - u3a_free(hab_u); + u3a_drop(hab_u); } /* _ch_free_node(): free node. @@ -520,7 +520,7 @@ _ch_free_node(u3h_node* han_u, c3_w lef_w) } } } - u3a_free(han_u); + u3a_drop(han_u); } /* u3h_free(): free hashtable. @@ -545,7 +545,7 @@ u3h_free(u3p(u3h_root) har_p) _ch_free_node(han_u, 25); } } - u3a_free(har_u); + u3a_drop(har_u); } /* _ch_walk_buck(): walk bucket for gc. diff --git a/n/v.c b/n/v.c index a1a12c7f35..562706b910 100644 --- a/n/v.c +++ b/n/v.c @@ -64,7 +64,7 @@ u3v_hose(void) u3p(u3v_cart) nex_p = egg_u->nex_p; u3a_lose(egg_u->vir); - u3a_free(egg_u); + u3a_drop(egg_u); egg_p = nex_p; }