diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 374c520754..b0ccd3de20 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -170,3 +170,6 @@ ur_walk_fore(ur_root_t *r, void *v, void (*atom)(ur_root_t*, ur_nref, void*), ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)); + +uint64_t +ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c new file mode 100644 index 0000000000..0653ced129 --- /dev/null +++ b/pkg/urbit/tests/ur_tests.c @@ -0,0 +1,157 @@ +#include "all.h" +#include "ur/hashcons.h" + +static c3_i +_test_jam(const c3_c* cap_c, + ur_root_t* rot_u, + ur_nref ref, + c3_w len_w, + const c3_y* res_y) +{ + c3_d i_d, len_d; + c3_y* out_y; + c3_i ret_i; + + ur_jam(rot_u, ref, &len_d, &out_y); + + if ( 0 != memcmp(out_y, res_y, len_w) ) { + fprintf(stderr, "\033[31m%s fail\033[0m\r\n", cap_c); + + fprintf(stderr, " actual: { "); + for ( i_d = 0; i_d < len_d; i_d++ ) { + fprintf(stderr, "0x%x, ", out_y[i_d]); + } + fprintf(stderr, "}\r\n"); + fprintf(stderr, " expect: { "); + for ( i_d = 0; i_d < len_w; i_d++ ) { + fprintf(stderr, "0x%x, ", res_y[i_d]); + } + fprintf(stderr, "}\r\n"); + + ret_i = 0; + } + else { + ret_i = 1; + } + + c3_free(out_y); + + return ret_i; +} + +static c3_i +_test_ur(void) +{ + ur_root_t* rot_u = ur_hcon_init(); + c3_d i_d, len_d; + c3_y* byt_y; + c3_i res_i = 1; + +# define nc(a, b) ur_cons(rot_u, a, b) +# define nt(a, b, c) nc(a, nc(b, c)) + + { + c3_c* cap_c = "jam 0"; + c3_y res_y[1] = { 0x2 }; + ur_nref ref = 0; + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam 1"; + c3_y res_y[1] = { 0xc }; + ur_nref ref = 1; + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam 2"; + c3_y res_y[1] = { 0x48 }; + ur_nref ref = 2; + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam %fast"; + c3_y res_y[6] = { 0xc0, 0x37, 0xb, 0x9b, 0xa3, 0x3 }; + ur_nref ref = c3__fast; + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam %full"; + c3_y res_y[6] = { 0xc0, 0x37, 0xab, 0x63, 0x63, 0x3 }; + ur_nref ref = c3__full; + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [0 0]"; + c3_y res_y[1] = { 0x29 }; + ur_nref ref = nc(0, 0); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [1 1]"; + c3_y res_y[2] = { 0x31, 0x3 }; + ur_nref ref = nc(1, 1); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [2 3]"; + c3_y res_y[2] = { 0x21, 0xd1 }; + ur_nref ref = nc(2, 3); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [%fast %full]"; + c3_y res_y[11] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0xe, 0x7c, 0xb3, 0x3a, 0x36, 0x36 }; + ur_nref ref = nc(c3__fast, c3__full); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [1 1 1]"; + c3_y res_y[2] = { 0x71, 0xcc }; + ur_nref ref = nc(1, nc(1, 1)); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [%fast %full %fast]"; + c3_y res_y[12] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0x1e, 0xf0, 0xcd, 0xea, 0xd8, 0xd8, 0x93 }; + ur_nref ref = nc(c3__fast, nc(c3__full, c3__fast)); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [[0 0] [[0 0] 1 1] 1 1]"; + c3_y res_y[6] = { 0xa5, 0x35, 0x19, 0xf3, 0x18, 0x5 }; + ur_nref ref = nc(nc(0, 0), nc(nc(nc(0, 0), nc(1, 1)), nc(1, 1))); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam big"; + c3_y res_y[14] = { 0x15, 0x17, 0xb2, 0xd0, 0x85, 0x59, 0xb8, 0x61, 0x87, 0x5f, 0x10, 0x54, 0x55, 0x5 }; + ur_nref ref = nc(nc(nc(1, nc(nc(2, nc(nc(3, nc(nc(4, nc(nt(5, 6, nc(7, nc(nc(8, 0), 0))), 0)), 0)), 0)), 0)), 0), 0); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + return res_i; +} + +int +main(int argc, char* argv[]) +{ + if ( !_test_ur() ) { + fprintf(stderr, "ur test failed\r\n"); + return 1; + } + + fprintf(stderr, "ur ok\n"); + return 0; +} diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index fd544957f4..96966762fc 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -883,3 +883,337 @@ ur_walk_fore(ur_root_t *r, free(don); } + +typedef struct ur_bsw_s { + uint64_t prev; + uint64_t size; + uint64_t fill; + uint64_t bits; + uint8_t off; + uint8_t *bytes; +} ur_bsw_t; + +static inline void +ur_bsw_grow(ur_bsw_t *bsw) +{ + uint64_t prev = bsw->prev; + uint64_t size = bsw->size; + uint64_t next = prev + size; + + // fprintf(stderr, "bsw: grow: %" PRIu64 "-%" PRIu64" fill: %" PRIu64 "\r\n", size, next, bsw->fill); + + bsw->bytes = realloc(bsw->bytes, next); + assert(bsw->bytes); + memset(bsw->bytes + size, 0, prev); + + bsw->prev = size; + bsw->size = next; +} + +static void +ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t old = bsw->bytes[fill]; + + bsw->bytes[fill] = old ^ ((bit & 1) << off); + + if ( 7 == off ) { + if ( ++fill == bsw->size ) { + ur_bsw_grow(bsw); + } + + bsw->fill = fill; + bsw->off = 0; + } + else { + bsw->off = 1 + off; + } + + bsw->bits++; + + // if ( (bsw->fill << 3) + bsw->off != bsw->bits ) { + // fprintf(stderr, "bit fill: %" PRIu64 " off: %u bits: %" PRIu64 ", calc: %" PRIu64 "\r\n", + // bsw->fill, + // bsw->off, + // bsw->bits, + // (bsw->fill << 3) + bsw->off); + // assert(0); + // } +} + +static void +ur_bsw_bits(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + + // XX + assert( 8 > len ); + + { + uint8_t rest = 8 - off; + uint8_t old = bsw->bytes[fill]; + + if ( len < rest ) { + uint8_t left = (byt & ((1 << len) - 1)) << off; + + bsw->bytes[fill] = old ^ left; + bsw->off = off + len; + } + else { + uint8_t left, right; + + left = (byt & ((1 << rest) - 1)) << off; + off = len - rest; + right = (byt >> rest) & ((1 << off) - 1); + + if ( (fill + 1 + !!off) >= bsw->size ) { + ur_bsw_grow(bsw); + } + + bsw->bytes[fill] = old ^ left; + fill++; + bsw->bytes[fill] = right; + + bsw->fill = fill; + bsw->off = off; + } + } + + bsw->bits += len; + + // if ( (bsw->fill << 3) + bsw->off != bsw->bits ) { + // fprintf(stderr, "bits fill: %" PRIu64 " off: %u bits: %" PRIu64 ", calc: %" PRIu64 "\r\n", + // bsw->fill, + // bsw->off, + // bsw->bits, + // (bsw->fill << 3) + bsw->off); + // assert(0); + // } +} + +static void +ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t fill = bsw->fill; + uint64_t full = fill + len; + uint8_t off = bsw->off; + + // XX + assert(len); + + if ( (full + !!off) >= bsw->size ) { + uint64_t prev = bsw->prev; + + // be sure to grow sufficiently + // + if ( len > prev ) { + bsw->prev = len; + } + + ur_bsw_grow(bsw); + } + + if ( !off ) { + memcpy(bsw->bytes + fill, byt, len); + } + else { + uint8_t rest = 8 - off; + uint8_t left, right, old = bsw->bytes[fill]; + uint64_t i; + + for ( i = 0; i < len; i++ ) { + left = (byt[i] & ((1 << rest) - 1)) << off; + right = (byt[i] >> rest) & ((1 << off) - 1); + + bsw->bytes[fill++] = old ^ left; + old = right; + } + + bsw->bytes[fill] = old; + + assert( full == fill ); + } + + bsw->fill = full; + bsw->bits += len << 3; + + // if ( (bsw->fill << 3) + bsw->off != bsw->bits ) { + // fprintf(stderr, "bytes fill: %" PRIu64 " off: %u bits: %" PRIu64 ", calc: %" PRIu64 "\r\n", + // bsw->fill, + // bsw->off, + // bsw->bits, + // (bsw->fill << 3) + bsw->off); + // assert(0); + // } +} + +static void +ur_bsw64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val) +{ + // assumes little-endian + // + uint8_t *byt = (uint8_t*)&val; + uint8_t len_byt = len_bit >> 3; + uint8_t low = ur_mask_3(len_bit); + + if ( len_byt ) { + ur_bsw_bytes(bsw, len_byt, byt); + } + + if ( low ) { + ur_bsw_bits(bsw, low, byt[len_byt]); + } +} + +static inline void +ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val) +{ + if ( 0 == val ) { + ur_bsw_bit(bsw, 1); + } + else { + uint8_t len_len = ur_met0_64(len_bit); + + ur_bsw64(bsw, len_len + 1, 1ULL << len_len); + ur_bsw64(bsw, len_len - 1, len_bit); + ur_bsw64(bsw, len_bit, val); + } +} + + +static inline void +ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len_bit, uint64_t len, uint8_t *byt) +{ + // write run-length + // + { + uint8_t len_len = ur_met0_64(len_bit); + + // XX + assert( 64 > len_len ); + + ur_bsw64(bsw, len_len + 1, 1ULL << len_len); + ur_bsw64(bsw, len_len - 1, len_bit); + } + + // write bytes + // + { + uint8_t low = ur_mask_3(len_bit); + + if ( !low ) { + ur_bsw_bytes(bsw, len, byt); + } + else { + uint64_t last = len - 1; + ur_bsw_bytes(bsw, last, byt); + ur_bsw_bits(bsw, low, byt[last]); + } + } +} + +static inline void +_jam_mat(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len_bit) +{ + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: { + ur_bsw_mat64(bsw, len_bit, ref); + } break; + + case ur_iatom: { + uint64_t len; + uint8_t *byt; + ur_bytes(r, ref, &byt, &len); + ur_bsw_mat_bytes(bsw, len_bit, len, byt); + } break; + } +} + +typedef struct _jam_s { + ur_dict64_t dict; + ur_bsw_t bsw; +} _jam_t; + +static void +_jam_atom(ur_root_t *r, ur_nref ref, void *ptr) +{ + _jam_t *j = ptr; + ur_dict64_t *dict = &(j->dict); + ur_bsw_t *bsw = &j->bsw; + uint64_t bak, len_bit; + + len_bit = ur_met(r, 0, ref); + + if ( !ur_dict64_get(r, dict, ref, &bak) ) { + ur_dict64_put(r, dict, ref, bsw->bits); + + ur_bsw_bit(bsw, 0); + _jam_mat(r, ref, bsw, len_bit); + } + else { + uint64_t bak_bit = ur_met0_64(bak); + + if ( len_bit <= bak_bit ) { + ur_bsw_bit(bsw, 0); + _jam_mat(r, ref, bsw, len_bit); + } + else { + ur_bsw_bit(bsw, 1); + ur_bsw_bit(bsw, 1); + ur_bsw_mat64(bsw, bak_bit, bak); + } + } +} + +static ur_bool_t +_jam_cell(ur_root_t *r, ur_nref ref, void *ptr) +{ + _jam_t *j = ptr; + ur_dict64_t *dict = &(j->dict); + ur_bsw_t *bsw = &j->bsw; + uint64_t bak; + + if ( !ur_dict64_get(r, dict, ref, &bak) ) { + ur_dict64_put(r, dict, ref, bsw->bits); + + ur_bsw_bit(bsw, 1); + ur_bsw_bit(bsw, 0); + + return 1; // true + } + else { + ur_bsw_bit(bsw, 1); + ur_bsw_bit(bsw, 1); + ur_bsw_mat64(bsw, ur_met0_64(bak), bak); + + return 0; // false + } +} + +uint64_t +ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) +{ + _jam_t j = {0}; + { + uint64_t fib11 = 89, fib12 = 144; + + j.bsw.prev = fib11; + j.bsw.size = fib12; + j.bsw.bytes = calloc(j.bsw.size, 1); + + ur_dict64_grow(r, &j.dict, fib11, fib12); + } + + ur_walk_fore(r, ref, &j, _jam_atom, _jam_cell); + ur_dict_free((ur_dict_t*)&j.dict); + + *len = j.bsw.fill + !!j.bsw.off; + *byt = j.bsw.bytes; + + return j.bsw.bits; +}