From 89967db876784502e6259d91e3bf0ec8f6b39c35 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 28 Jul 2020 14:06:00 -0700 Subject: [PATCH 001/123] ur: adds initial, untested hash-consing module --- pkg/urbit/include/ur/hashcons.h | 95 ++++++++ pkg/urbit/ur/hashcons.c | 390 ++++++++++++++++++++++++++++++++ 2 files changed, 485 insertions(+) create mode 100644 pkg/urbit/include/ur/hashcons.h create mode 100644 pkg/urbit/ur/hashcons.c diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h new file mode 100644 index 0000000000..c3f7143b19 --- /dev/null +++ b/pkg/urbit/include/ur/hashcons.h @@ -0,0 +1,95 @@ +#include +#include +#include + +typedef uint8_t ur_bool_t; + +#if (32 == (CHAR_BIT * __SIZEOF_INT__)) +# define ur_lz32 __builtin_ctz +#elif (32 == (CHAR_BIT * __SIZEOF_LONG__)) +# define ur_lz32 __builtin_ctzl +#else +# error "port me" +#endif + +#if (64 == (CHAR_BIT * __SIZEOF_LONG__)) +# define ur_lz64 __builtin_ctzl +#elif (64 == (CHAR_BIT * __SIZEOF_LONG_LONG__)) +# define ur_lz64 __builtin_ctzll +#else +# error "port me" +#endif + +#define ur_mask_3(a) (a & 0x7) +#define ur_mask_8(a) (a & 0xff) +#define ur_mask_31(a) (a & 0x7fffffff) +#define ur_mask_62(a) (a & 0x3fffffffffffffffULL) + +#define ur_met0_32(a) ( 32 - ur_lz32(a) ) +#define ur_met0_64(a) ( 64 - ur_lz64(a) ) + +#define ur_met3_32(a) \ + ({ uint8_t _a = ur_met0_32(a); \ + ( (_a >> 3) + !!ur_mask_3(_a) ); }) \ + +#define ur_met3_64(a) \ + ({ uint8_t _a = ur_met0_64(a); \ + ( (_a >> 3) + !!ur_mask_3(_a) ); }) \ + +#define ur_nref_tag(ref) ( ref >> 62 ) +#define ur_nref_idx(ref) ur_mask_62(ref) + +typedef struct ur_pail32_s { + uint8_t fill; + uint32_t data[10]; +} ur_pail32_t; + +typedef struct ur_dict32_s { + uint64_t prev; + uint64_t size; + ur_pail32_t *buckets; +} ur_dict32_t; + +typedef struct ur_pail64_s { + uint8_t fill; + uint64_t data[10]; +} ur_pail64_t; + +typedef struct ur_dict64_s { + uint64_t prev; + uint64_t size; + ur_pail64_t *buckets; +} ur_dict64_t; + +typedef uint32_t ur_mug; +typedef uint64_t ur_nref; +typedef enum { + ur_direct = 0, + ur_iatom = 1, + ur_icell = 2, +} ur_tag; + +typedef struct ur_cells_s { + ur_dict64_t dict; + uint64_t prev; + uint64_t size; + uint64_t fill; + ur_mug *mugs; + ur_nref *heads; + ur_nref *tails; +} ur_cells_t; + +typedef struct ur_atoms_s { + ur_dict64_t dict; + uint64_t prev; + uint64_t size; + uint64_t fill; + ur_mug *mugs; + uint8_t **bytes; + uint64_t *lens; +} ur_atoms_t; + +typedef struct ur_root_s { + ur_cells_t cells; + ur_atoms_t atoms; +} ur_root_t; diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c new file mode 100644 index 0000000000..f97e727671 --- /dev/null +++ b/pkg/urbit/ur/hashcons.c @@ -0,0 +1,390 @@ +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "noun/hashcons.h" + +ur_mug +ur_mug_bytes(const uint8_t *byt, uint64_t len) +{ + uint32_t seed = 0xcafebabe; + ur_mug mug; + + while ( 1 ) { + uint32_t raw; + MurmurHash3_x86_32(byt, len, seed, &raw); + mug = (raw >> 31) ^ ( ur_mask_31(raw) ); + + if ( 0 == mug ) { + seed++; + } + else { + return mug; + } + } +} + +ur_mug +ur_mug32(uint32_t x) +{ + uint8_t byt[4] = { + ur_mask_8(x >> 0), + ur_mask_8(x >> 8), + ur_mask_8(x >> 16), + ur_mask_8(x >> 24) + }; + + return ur_mug_bytes(byt, ur_met3_32(x)); +} + +ur_mug +ur_mug64(uint64_t x) +{ + uint8_t byt[8] = { + ur_mask_8(x >> 0), + ur_mask_8(x >> 8), + ur_mask_8(x >> 16), + ur_mask_8(x >> 24), + ur_mask_8(x >> 32), + ur_mask_8(x >> 40), + ur_mask_8(x >> 48), + ur_mask_8(x >> 56) + }; + + return ur_mug_bytes(byt, ur_met3_64(x)); +} + +ur_mug +ur_mug_both(ur_mug hed, ur_mug tal) +{ + return ur_mug32(hed ^ (0x7fffffff ^ tal)); +} + +ur_mug +ur_nref_mug(ur_root_t *r, ur_nref ref) +{ + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: return ur_mug64(ref); + case ur_iatom: return r->atoms.mugs[ur_nref_idx(ref)]; + case ur_icell: return r->cells.mugs[ur_nref_idx(ref)]; + } +} + +ur_bool_t +ur_deep(ur_nref ref) +{ + return ur_icell == ur_nref_tag(ref); +} + +ur_nref +ur_head(ur_root_t *r, ur_nref ref) +{ + assert( ur_deep(ref) ); + return r->cells.heads[ur_nref_idx(ref)]; +} + +ur_nref +ur_tail(ur_root_t *r, ur_nref ref) +{ + assert( ur_deep(ref) ); + return r->cells.tails[ur_nref_idx(ref)]; +} + +void +ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size) +{ + ur_pail64_t *buckets, *old_buckets = dict->buckets; + uint64_t old_size = dict->size; + uint64_t next = prev + size; + uint64_t i; + + buckets = calloc(next, sizeof(*buckets)); + + for ( i = 0; i < old_size; i++ ) { + ur_pail64_t *old_bucket = &(old_buckets[i]); + uint64_t old_fill = old_bucket->fill; + uint64_t j; + + for ( j = 0; j < old_fill; j++ ) { + ur_nref ref = (ur_nref)old_bucket->data[old_fill]; + ur_mug mug = ur_nref_mug(r, ref); + + ur_pail64_t *bucket = &(buckets[ mug % next ]); + uint64_t new_fill = bucket->fill; + + if( 10 == new_fill ) { + free(buckets); + return ur_dict64_grow(r, dict, size, next); + } + + bucket->data[new_fill] = (uint64_t)ref; + bucket->fill = 1 + new_fill; + } + } + + free(old_buckets); + + dict->prev = size; + dict->size = next; + dict->buckets = buckets; +} + +void +ur_atoms_grow(ur_atoms_t *atoms) +{ + uint64_t prev = atoms->prev; + uint64_t size = atoms->size; + uint64_t next = prev + size; + uint8_t **bytes = atoms->bytes; + uint64_t *lens = atoms->lens; + ur_mug *mugs = atoms->mugs; + + atoms->bytes = malloc(next * (sizeof(*atoms->bytes) + + sizeof(*atoms->lens) + + sizeof(*atoms->mugs))); + assert( atoms->bytes ); + + atoms->lens = (void*)(atoms->bytes + (next * sizeof(*atoms->bytes))); + atoms->mugs = (void*)(atoms->lens + (next * sizeof(*atoms->lens))); + + if ( bytes ) { + memcpy(atoms->bytes, bytes, size * (sizeof(*bytes))); + memcpy(atoms->lens, lens, size * (sizeof(*lens))); + memcpy(atoms->mugs, mugs, size * (sizeof(*mugs))); + + free(bytes); + } +} + +void +ur_cells_grow(ur_cells_t *cells) +{ + uint64_t prev = cells->prev; + uint64_t size = cells->size; + uint64_t next = prev + size; + ur_nref *heads = cells->heads; + ur_nref *tails = cells->tails; + ur_mug *mugs = cells->mugs; + + cells->heads = malloc(next * (sizeof(*cells->heads) + + sizeof(*cells->heads) + + sizeof(*cells->mugs))); + assert( cells->heads ); + + cells->tails = (void*)(cells->heads + (next * sizeof(*cells->heads))); + cells->mugs = (void*)(cells->tails + (next * sizeof(*cells->tails))); + + if ( heads ) { + memcpy(cells->heads, heads, size * (sizeof(*heads))); + memcpy(cells->tails, tails, size * (sizeof(*tails))); + memcpy(cells->mugs, mugs, size * (sizeof(*mugs))); + + free(heads); + } +} + +void +ur_bytes(ur_root_t *r, ur_nref ref, uint8_t **byt, uint64_t *len) +{ + assert( !ur_deep(ref) ); + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: { + *len = ur_met3_64(ref); + // XX little-endian + // + *byt = (uint8_t*)&ref; + } break; + + case ur_iatom: { + uint64_t idx = ur_nref_idx(ref); + *len = r->atoms.lens[idx]; + *byt = r->atoms.bytes[idx]; + } break; + } +} + +static ur_nref +_coin_unsafe(ur_atoms_t *atoms, ur_mug mug, uint8_t *byt, uint64_t len) +{ + uint64_t fill = atoms->fill; + ur_nref tom = ( fill & ((uint64_t)ur_iatom << 62) ); + uint8_t *copy = malloc(len); + + // XX necessary? + // + assert( 62 >= ur_met0_64(fill) ); + + assert(copy); + memcpy(copy, byt, len); + + atoms->bytes[fill] = copy; + atoms->lens[fill] = len; + atoms->mugs[fill] = mug; + + return tom; +} + +static ur_nref +_cons_unsafe(ur_cells_t *cells, ur_mug mug, ur_nref hed, ur_nref tal) +{ + uint64_t fill = cells->fill; + ur_nref cel = ( fill & ((uint64_t)ur_icell << 62) ); + + // XX necessary? + // + assert( 62 >= ur_met0_64(fill) ); + + cells->mugs[fill] = mug; + cells->heads[fill] = hed; + cells->tails[fill] = tal; + cells->fill = 1 + fill; + + return cel; +} + +ur_nref +ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) +{ + ur_atoms_t *atoms = &(r->atoms); + ur_dict64_t *dict = &(atoms->dict); + ur_mug mug = ur_mug_bytes(byt, len); + + while ( 1 ) { + uint64_t idx = ( mug % dict->size ); + ur_pail64_t *bucket = &(dict->buckets[idx]); + uint8_t i, b_fill = bucket->fill; + ur_nref tom; + + for ( i = 0; i < b_fill; i++ ) { + uint8_t *t_byt; + uint64_t t_len; + tom = (ur_nref)bucket->data[i]; + + ur_bytes(r, tom, &t_byt, &t_len); + + if ( (t_len == len) + && (0 == memcmp(t_byt, byt, len)) ) + { + return tom; + } + } + + if ( 10 == b_fill ) { + ur_dict64_grow(r, dict, dict->prev, dict->size); + continue; + } + + if ( atoms->fill == atoms->size ) { + ur_atoms_grow(atoms); + } + + tom = _coin_unsafe(atoms, mug, byt, len); + + bucket->data[b_fill] = (uint64_t)tom; + bucket->fill = 1 + b_fill; + + return tom; + } +} + +ur_nref +ur_coin64(ur_root_t *r, uint64_t n) +{ + if ( ur_direct == ur_nref_tag(n) ) { + return n; + } + else { + // XX little-endian + // + return ur_coin_bytes(r, (uint8_t*)&n, ur_met3_64(n)); + } +} + +ur_nref +ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal) +{ + ur_cells_t *cells = &(r->cells); + ur_dict64_t *dict = &(cells->dict); + ur_mug mug = ur_mug_both(ur_nref_mug(r, hed), + ur_nref_mug(r, tal)); + + while ( 1 ) { + uint64_t idx = ( mug % cells->size ); + ur_pail64_t *bucket = &(dict->buckets[idx]); + uint8_t i, b_fill = bucket->fill; + ur_nref cel; + + for ( i = 0; i < b_fill; i++ ) { + cel = (ur_nref)bucket->data[i]; + + if ( (hed == ur_head(r, cel)) + && (tal == ur_tail(r, cel)) ) + { + return cel; + } + } + + if ( 10 == b_fill ) { + ur_dict64_grow(r, dict, dict->prev, dict->size); + continue; + } + + if ( cells->fill == cells->size ) { + ur_cells_grow(cells); + } + + cel = _cons_unsafe(cells, mug, hed, tal); + + bucket->data[b_fill] = (uint64_t)cel; + bucket->fill = 1 + b_fill; + + return cel; + } +} + +ur_root_t* +ur_hcon_init(void) +{ + ur_root_t *r = calloc(1, sizeof(*r)); + assert( r ); + + { + ur_dict64_t *dict; + uint64_t fib11 = 89, fib12 = 144; + + // allocate atom storage + // + r->atoms.prev = fib11; + r->atoms.size = fib12; + ur_atoms_grow(&(r->atoms)); + + // allocate atom hashtable + // + dict = &(r->atoms.dict); + ur_dict64_grow(r, dict, fib11, fib12); + + // allocate cell storage + // + r->cells.prev = fib11; + r->cells.size = fib12; + ur_cells_grow(&(r->cells)); + + // allocate cell hashtable + // + dict = &(r->cells.dict); + ur_dict64_grow(r, dict, fib11, fib12); + } + + return r; +} From 24d87239171c7016d32fc157029d75ae0430559b Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 28 Jul 2020 14:20:37 -0700 Subject: [PATCH 002/123] ur: fixes a variety of bugs uncovered in local testing --- pkg/urbit/include/ur/hashcons.h | 12 +++---- pkg/urbit/ur/hashcons.c | 60 ++++++++++++++++++++------------- 2 files changed, 42 insertions(+), 30 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index c3f7143b19..69e34b378b 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -5,17 +5,17 @@ typedef uint8_t ur_bool_t; #if (32 == (CHAR_BIT * __SIZEOF_INT__)) -# define ur_lz32 __builtin_ctz +# define ur_lz32 __builtin_clz #elif (32 == (CHAR_BIT * __SIZEOF_LONG__)) -# define ur_lz32 __builtin_ctzl +# define ur_lz32 __builtin_clzl #else # error "port me" #endif #if (64 == (CHAR_BIT * __SIZEOF_LONG__)) -# define ur_lz64 __builtin_ctzl +# define ur_lz64 __builtin_clzl #elif (64 == (CHAR_BIT * __SIZEOF_LONG_LONG__)) -# define ur_lz64 __builtin_ctzll +# define ur_lz64 __builtin_clzll #else # error "port me" #endif @@ -25,8 +25,8 @@ typedef uint8_t ur_bool_t; #define ur_mask_31(a) (a & 0x7fffffff) #define ur_mask_62(a) (a & 0x3fffffffffffffffULL) -#define ur_met0_32(a) ( 32 - ur_lz32(a) ) -#define ur_met0_64(a) ( 64 - ur_lz64(a) ) +#define ur_met0_32(a) ( (a) ? 32 - ur_lz32(a) : 0 ) +#define ur_met0_64(a) ( (a) ? 64 - ur_lz64(a) : 0 ) #define ur_met3_32(a) \ ({ uint8_t _a = ur_met0_32(a); \ diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index f97e727671..c321d8fc2b 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -9,7 +9,7 @@ #include -#include "noun/hashcons.h" +#include "ur/hashcons.h" ur_mug ur_mug_bytes(const uint8_t *byt, uint64_t len) @@ -64,7 +64,9 @@ ur_mug64(uint64_t x) ur_mug ur_mug_both(ur_mug hed, ur_mug tal) { - return ur_mug32(hed ^ (0x7fffffff ^ tal)); + // XX not correct per u3r_mug, but necessary to avoid collisions + // + return ur_mug32(hed ^ (0x7fffffff ^ ur_mug32(tal))); } ur_mug @@ -111,17 +113,18 @@ ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size) for ( i = 0; i < old_size; i++ ) { ur_pail64_t *old_bucket = &(old_buckets[i]); - uint64_t old_fill = old_bucket->fill; - uint64_t j; + uint8_t old_fill = old_bucket->fill; + uint8_t j; for ( j = 0; j < old_fill; j++ ) { - ur_nref ref = (ur_nref)old_bucket->data[old_fill]; + ur_nref ref = (ur_nref)old_bucket->data[j]; ur_mug mug = ur_nref_mug(r, ref); - ur_pail64_t *bucket = &(buckets[ mug % next ]); - uint64_t new_fill = bucket->fill; + uint64_t idx = ( mug % next ); + ur_pail64_t *bucket = &(buckets[idx]); + uint8_t new_fill = bucket->fill; - if( 10 == new_fill ) { + if ( 10 == new_fill ) { free(buckets); return ur_dict64_grow(r, dict, size, next); } @@ -148,21 +151,24 @@ ur_atoms_grow(ur_atoms_t *atoms) uint64_t *lens = atoms->lens; ur_mug *mugs = atoms->mugs; - atoms->bytes = malloc(next * (sizeof(*atoms->bytes) - + sizeof(*atoms->lens) - + sizeof(*atoms->mugs))); + atoms->bytes = malloc(next * ( sizeof(*atoms->bytes) + + sizeof(*atoms->lens) + + sizeof(*atoms->mugs) )); assert( atoms->bytes ); - atoms->lens = (void*)(atoms->bytes + (next * sizeof(*atoms->bytes))); - atoms->mugs = (void*)(atoms->lens + (next * sizeof(*atoms->lens))); + atoms->lens = (void*)((char*)atoms->bytes + (next * sizeof(*atoms->bytes))); + atoms->mugs = (void*)((char*)atoms->lens + (next * sizeof(*atoms->lens))); if ( bytes ) { memcpy(atoms->bytes, bytes, size * (sizeof(*bytes))); - memcpy(atoms->lens, lens, size * (sizeof(*lens))); - memcpy(atoms->mugs, mugs, size * (sizeof(*mugs))); + memcpy(atoms->lens, lens, size * (sizeof(*lens))); + memcpy(atoms->mugs, mugs, size * (sizeof(*mugs))); free(bytes); } + + atoms->prev = size; + atoms->size = next; } void @@ -175,21 +181,24 @@ ur_cells_grow(ur_cells_t *cells) ur_nref *tails = cells->tails; ur_mug *mugs = cells->mugs; - cells->heads = malloc(next * (sizeof(*cells->heads) - + sizeof(*cells->heads) - + sizeof(*cells->mugs))); + cells->heads = malloc(next * ( sizeof(*cells->heads) + + sizeof(*cells->heads) + + sizeof(*cells->mugs) )); assert( cells->heads ); - cells->tails = (void*)(cells->heads + (next * sizeof(*cells->heads))); - cells->mugs = (void*)(cells->tails + (next * sizeof(*cells->tails))); + cells->tails = (void*)((char*)cells->heads + (next * sizeof(*cells->heads))); + cells->mugs = (void*)((char*)cells->tails + (next * sizeof(*cells->tails))); if ( heads ) { memcpy(cells->heads, heads, size * (sizeof(*heads))); memcpy(cells->tails, tails, size * (sizeof(*tails))); - memcpy(cells->mugs, mugs, size * (sizeof(*mugs))); + memcpy(cells->mugs, mugs, size * (sizeof(*mugs))); free(heads); } + + cells->prev = size; + cells->size = next; } void @@ -218,7 +227,8 @@ static ur_nref _coin_unsafe(ur_atoms_t *atoms, ur_mug mug, uint8_t *byt, uint64_t len) { uint64_t fill = atoms->fill; - ur_nref tom = ( fill & ((uint64_t)ur_iatom << 62) ); + ur_tag tag = ur_iatom; + ur_nref tom = ( fill | ((uint64_t)tag << 62) ); uint8_t *copy = malloc(len); // XX necessary? @@ -231,6 +241,7 @@ _coin_unsafe(ur_atoms_t *atoms, ur_mug mug, uint8_t *byt, uint64_t len) atoms->bytes[fill] = copy; atoms->lens[fill] = len; atoms->mugs[fill] = mug; + atoms->fill = 1 + fill; return tom; } @@ -239,7 +250,8 @@ static ur_nref _cons_unsafe(ur_cells_t *cells, ur_mug mug, ur_nref hed, ur_nref tal) { uint64_t fill = cells->fill; - ur_nref cel = ( fill & ((uint64_t)ur_icell << 62) ); + ur_tag tag = ur_icell; + ur_nref cel = ( fill | ((uint64_t)tag << 62) ); // XX necessary? // @@ -320,7 +332,7 @@ ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal) ur_nref_mug(r, tal)); while ( 1 ) { - uint64_t idx = ( mug % cells->size ); + uint64_t idx = ( mug % dict->size ); ur_pail64_t *bucket = &(dict->buckets[idx]); uint8_t i, b_fill = bucket->fill; ur_nref cel; From 15f60af86f4c05858b34c38ceea59d368f57de87 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 28 Jul 2020 15:52:32 -0700 Subject: [PATCH 003/123] ur: adds hashcons module to build --- pkg/urbit/Makefile | 3 ++- pkg/urbit/include/ur/hashcons.h | 12 ++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pkg/urbit/Makefile b/pkg/urbit/Makefile index 972310caac..76e65e89e3 100644 --- a/pkg/urbit/Makefile +++ b/pkg/urbit/Makefile @@ -2,12 +2,13 @@ include config.mk jets = jets/tree.c $(wildcard jets/*/*.c) noun = $(wildcard noun/*.c) +ur = $(wildcard ur/*.c) vere = $(wildcard vere/*.c) $(wildcard vere/*/*.c) daemon = $(wildcard daemon/*.c) worker = $(wildcard worker/*.c) tests = $(wildcard tests/*.c) -common = $(jets) $(noun) $(vere) +common = $(jets) $(noun) $(ur) $(vere) headers = $(shell find include -type f) common_objs = $(shell echo $(common) | sed 's/\.c/.o/g') diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 69e34b378b..7c77268b50 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -93,3 +93,15 @@ typedef struct ur_root_s { ur_cells_t cells; ur_atoms_t atoms; } ur_root_t; + +ur_nref +ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len); + +ur_nref +ur_coin64(ur_root_t *r, uint64_t n); + +ur_nref +ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal); + +ur_root_t* +ur_hcon_init(void); From b61c016b93b4b7f7e15b9a5a1da71f84698d0dd9 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 28 Jul 2020 15:52:56 -0700 Subject: [PATCH 004/123] serf: WIP adds uniq: global deduplicator --- pkg/urbit/worker/main.c | 420 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 420 insertions(+) diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index 7bc494e269..b5872620fe 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -22,6 +22,8 @@ #include #include +#include "ur/hashcons.h" + static u3_serf u3V; // one serf per process static u3_moat inn_u; // input stream static u3_mojo out_u; // output stream @@ -288,6 +290,421 @@ _cw_queu(c3_i argc, c3_c* argv[]) } } + +// stack frame for recording head vs tail iteration +// +// In Hoon, this structure would be as follows: +// +// $% [%root ~] +// [%head cell=^] +// [%tail cell=^ hed-mug=@] +// == +// + +#define STACK_ROOT 0 +#define STACK_HEAD 1 +#define STACK_TAIL 2 + +typedef struct ur_temp_s +{ + uint64_t prev; + uint64_t size; + void *base; + void *top; +} ur_temp_t; + +void +ur_temp_init(ur_temp_t *t) +{ + uint64_t fib19 = 4181, fib20 = 6765; + + t->prev = fib19; + t->size = fib20; + t->base = malloc(fib20); + t->top = t->base; +} + +void* +ur_temp_push(ur_temp_t *t, size_t wide) +{ + uint64_t fill = t->top - t->base; + uint64_t grow = t->size + wide; + + if ( fill > (t->size + wide) ) { + uint64_t next = t->prev + t->size; + t->base = realloc(t->base, next); + t->top = t->base + fill; + } + + { + void* ptr = t->top; + t->top += wide; + return ptr; + } +} + +void* +ur_temp_peek(ur_temp_t *t, size_t wide) +{ + return t->top - wide; +} + +void +ur_temp_pop(ur_temp_t *t, size_t wide) +{ + t->top -= wide; + assert( t->top >= t->base ); +} + +typedef struct _ur_frame_s +{ + c3_y tag_y; + u3a_cell* cel_u; + ur_nref ref; +} _ur_frame_t; + +typedef struct _ur_stack_s +{ + uint32_t prev; + uint32_t size; + uint32_t fill; + _ur_frame_t *entries; +} _ur_stack_t; + +static inline void +_stack_push(_ur_stack_t *s, c3_y tag_y, u3a_cell* cel_u, ur_nref ref) +{ + if ( s->fill == s->size ) { + uint32_t next = s->prev + s->size; + s->entries = realloc(s->entries, next * sizeof(_ur_frame_t)); + s->prev = s->size; + s->size = next; + } + + _ur_frame_t* fam_u = &(s->entries[s->fill++]); + fam_u->tag_y = tag_y; + fam_u->cel_u = cel_u; + fam_u->ref = ref; +} + +static ur_nref +_from_loom(ur_root_t *r, u3_noun a) +{ + ur_nref ref; + + _ur_stack_t s; + s.prev = 89; + s.size = 144; + s.fill = 0; + s.entries = malloc((s.prev + s.size) * sizeof(_ur_frame_t)); + _stack_push(&s, STACK_ROOT, 0, 0); + + // ur_temp_t t; + // ur_temp_init(&t); + + // { + // _ur_frame_t *fam_u = ur_temp_push(&t, sizeof(_ur_frame_t)); + // fam_u->tag_y = STACK_ROOT; + // } + + advance: { + // u3 direct == ur direct + // + if ( c3y == u3a_is_cat(a) ) { + ref = (ur_nref)a; + goto retreat; + } + else { + u3a_noun* som_u = u3a_to_ptr(a); + u3a_box* box_u = u3a_botox(som_u); + c3_w* box_w = (void*)box_u; + + // all bits set == already reallocated + // + if ( 0xffffffff == box_w[0] ) { + ref = ( ((uint64_t)box_w[2]) << 32 + | ((uint64_t)box_w[1]) ); + goto retreat; + } + else if ( c3y == u3a_is_atom(a) ) { + u3a_atom* vat_u = (u3a_atom*)som_u; + + // coin an nref + // + switch ( vat_u->len_w ) { + case 2: { + ref = ur_coin64(r, ( ((uint64_t)vat_u->buf_w[1]) << 32 + | ((uint64_t)vat_u->buf_w[0]) )); + } break; + + case 1: { + ref = ur_coin64(r, (uint64_t)vat_u->buf_w[0]); + } break; + + + default: { + c3_assert( vat_u->len_w ); + + uint8_t *byt = (uint8_t*)vat_u->buf_w; + uint64_t len = u3r_met(3, a); + + ref = ur_coin_bytes(r, byt, len); + } break; + } + + // overwrite u3a_atom with reallocated reference + // + box_w[0] = 0xffffffff; + box_w[1] = ref & 0xffffffff; + box_w[2] = ref >> 32; + + goto retreat; + } + else { + u3a_cell* cel_u = (u3a_cell*)som_u; + _stack_push(&s, STACK_HEAD, cel_u, 0); + // { + // _ur_frame_t *fam_u = ur_temp_push(&t, sizeof(_ur_frame_t)); + // fam_u->tag_y = STACK_HEAD; + // fam_u->cel_u = cel_u; + // } + a = cel_u->hed; + goto advance; + } + } + } + + retreat: { + _ur_frame_t fam_u = s.entries[--s.fill]; + + // c3_y tag_y; + // u3a_cell* cel_u; + // ur_nref hed; + // { + // _ur_frame_t *fam_u = ur_temp_peek(&t, sizeof(_ur_frame_t)); + + // tag_y = fam_u->tag_y; + // cel_u = fam_u->cel_u; + // hed = fam_u->ref; + + // ur_temp_pop(&t, sizeof(_ur_frame_t)); + // } + + switch ( fam_u.tag_y ) { + // switch ( tag_y ) { + default: { + c3_assert(0); + } + + case STACK_ROOT: { + break; + } + + case STACK_HEAD: { + _stack_push(&s, STACK_TAIL, fam_u.cel_u, ref); + // { + // _ur_frame_t *fam_u = ur_temp_push(&t, sizeof(_ur_frame_t)); + // fam_u->tag_y = STACK_TAIL; + // fam_u->cel_u = cel_u; + // fam_u->ref = ref; + // } + + a = fam_u.cel_u->tel; + // a = cel_u->tel; + goto advance; + } + + case STACK_TAIL: { + u3a_cell* cel_u = fam_u.cel_u; + u3a_box* box_u = u3a_botox(cel_u); + c3_w* box_w = (void*)box_u; + + ref = ur_cons(r, fam_u.ref, ref); + // ref = ur_cons(r, hed, ref); + + // overwrite u3a_atom with reallocated reference + // + box_w[0] = 0xffffffff; + box_w[1] = ref & 0xffffffff; + box_w[2] = ref >> 32; + + goto retreat; + } + } + } + + free(s.entries); + // free(t.base); + + return ref; +} + +typedef struct ur_nvec_s { + void* data; + uint64_t fill; + ur_nref* refs; +} ur_nvec_t; + +void +ur_nvec_init(ur_nvec_t *v, uint64_t size, void* ptr) +{ + v->data = ptr; + v->fill = 0; + v->refs = calloc(size, sizeof(ur_nref)); +} + +// XX u3h_use() +static c3_w +_hamt_count(u3p(u3h_root) har_p) +{ + u3h_root* har_u = u3to(u3h_root, har_p); + return har_u->use_w; +} + +static void +_from_hamt(u3_noun kev, void* ptr) +{ + ur_nvec_t *v = (ur_nvec_t*)ptr; + ur_root_t *r = v->data; + + v->refs[v->fill++] = _from_loom(r, kev); +} + +static u3_noun +_ref_to_noun(ur_nref ref, u3_noun* vat, u3_noun* cel) +{ + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: { + if ( 0x7fffffffULL > ref ) { + return (u3_atom)ref; + } + else { + c3_w wor_w[2]; + + wor_w[0] = ref & 0xffffffff; + wor_w[1] = ref >> 32; + + return u3i_words(2, wor_w); + } + } break; + + case ur_iatom: return vat[ur_nref_idx(ref)]; + + case ur_icell: return cel[ur_nref_idx(ref)]; + } +} + +void +do_stuff(void) +{ + ur_root_t *r = ur_hcon_init(); + + // allow read/write on the whole loom, bypassing page tracking + // + if ( 0 != mprotect((void *)u3_Loom, u3a_bytes, (PROT_READ | PROT_WRITE)) ) { + c3_assert(0); + } + + fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); + fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); + + ur_nref ken = _from_loom(r, u3A->roc); + + fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); + fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); + + + c3_w cod_w = _hamt_count(u3R->jed.cod_p); + ur_nvec_t v; + + fprintf(stderr, "hc: cold count %u\r\n", cod_w); + + ur_nvec_init(&v, cod_w, r); + u3h_walk_with(u3R->jed.cod_p, _from_hamt, &v); + + fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); + fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); + + u3m_pave(c3y, c3n); + // XX wtf? + u3R->jed.hot_p = u3h_new(); + + u3_atom *vat; + u3_noun *cel; + + { + ur_atoms_t *atoms = &(r->atoms); + uint64_t *lens = atoms->lens; + uint8_t **byts = atoms->bytes; + uint64_t i, fill = atoms->fill; + + vat = calloc(fill, sizeof(u3_atom)); + + for ( i = 0; i < fill; i++ ) { + vat[i] = u3i_bytes(lens[i], byts[i]); + // XX mug? + } + } + + { + ur_cells_t *cells = &(r->cells); + ur_nref *heds = cells->heads, *tals = cells->tails; + uint64_t i, fill = cells->fill; + u3_noun hed, tal; + + cel = calloc(fill, sizeof(u3_noun)); + + for ( i = 0; i < fill; i++ ) { + hed = _ref_to_noun(heds[i], vat, cel); + tal = _ref_to_noun(tals[i], vat, cel); + cel[i] = u3nc(hed, tal); + // XX mug? + } + } + + u3A->roc = cel[ur_nref_idx(ken)]; + + { + uint32_t i; + ur_nref ref; + u3_noun kev; + + for ( i = 0; i < cod_w; i++) { + ref = v.refs[i]; + kev = cel[ur_nref_idx(ref)]; + u3h_put(u3R->jed.cod_p, u3h(kev), u3k(u3t(kev))); + u3z(kev); + } + } + + // mark all pages dirty + // + memset((void*)u3P.dit_w, 0xff, u3a_pages >> 3); +} + +/* _cw_uniq(); deduplicate persistent nouns +*/ +static void +_cw_uniq(c3_i argc, c3_c* argv[]) +{ + c3_assert( 3 <= argc ); + + c3_c* dir_c = argv[2]; + c3_d eve_d = u3m_boot(dir_c); + + u3_serf_grab(); + + do_stuff(); + + u3_serf_grab(); + + u3A->ent_d = eve_d; + + u3e_save(); +} + /* _cw_pack(); compact memory, save, and exit. */ static void @@ -362,6 +779,9 @@ main(c3_i argc, c3_c* argv[]) else if ( 0 == strcmp("queu", argv[1]) ) { _cw_queu(argc, argv); } + else if ( 0 == strcmp("uniq", argv[1]) ) { + _cw_uniq(argc, argv); + } else if ( 0 == strcmp("pack", argv[1]) ) { _cw_pack(argc, argv); } From 29810b5b1b7bcae68dfd66bd27cea7a12ae18b05 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 29 Jul 2020 12:58:35 -0700 Subject: [PATCH 005/123] ur: removes unusable width-abstraction in ur_dict* types --- pkg/urbit/include/ur/hashcons.h | 61 +++++++++++++----------------- pkg/urbit/ur/hashcons.c | 66 ++++++++++++++++----------------- 2 files changed, 57 insertions(+), 70 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 7c77268b50..bfb1efbb7a 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -39,28 +39,6 @@ typedef uint8_t ur_bool_t; #define ur_nref_tag(ref) ( ref >> 62 ) #define ur_nref_idx(ref) ur_mask_62(ref) -typedef struct ur_pail32_s { - uint8_t fill; - uint32_t data[10]; -} ur_pail32_t; - -typedef struct ur_dict32_s { - uint64_t prev; - uint64_t size; - ur_pail32_t *buckets; -} ur_dict32_t; - -typedef struct ur_pail64_s { - uint8_t fill; - uint64_t data[10]; -} ur_pail64_t; - -typedef struct ur_dict64_s { - uint64_t prev; - uint64_t size; - ur_pail64_t *buckets; -} ur_dict64_t; - typedef uint32_t ur_mug; typedef uint64_t ur_nref; typedef enum { @@ -69,24 +47,35 @@ typedef enum { ur_icell = 2, } ur_tag; +typedef struct ur_pail_s { + uint8_t fill; + ur_nref refs[10]; +} ur_pail_t; + +typedef struct ur_dict_s { + uint64_t prev; + uint64_t size; + ur_pail_t *buckets; +} ur_dict_t; + typedef struct ur_cells_s { - ur_dict64_t dict; - uint64_t prev; - uint64_t size; - uint64_t fill; - ur_mug *mugs; - ur_nref *heads; - ur_nref *tails; + ur_dict_t dict; + uint64_t prev; + uint64_t size; + uint64_t fill; + ur_mug *mugs; + ur_nref *heads; + ur_nref *tails; } ur_cells_t; typedef struct ur_atoms_s { - ur_dict64_t dict; - uint64_t prev; - uint64_t size; - uint64_t fill; - ur_mug *mugs; - uint8_t **bytes; - uint64_t *lens; + ur_dict_t dict; + uint64_t prev; + uint64_t size; + uint64_t fill; + ur_mug *mugs; + uint8_t **bytes; + uint64_t *lens; } ur_atoms_t; typedef struct ur_root_s { diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index c321d8fc2b..1017989816 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -102,34 +102,32 @@ ur_tail(ur_root_t *r, ur_nref ref) } void -ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size) +ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size) { - ur_pail64_t *buckets, *old_buckets = dict->buckets; + ur_pail_t *buckets, *old_buckets = dict->buckets; uint64_t old_size = dict->size; - uint64_t next = prev + size; - uint64_t i; + uint64_t i, next = prev + size; buckets = calloc(next, sizeof(*buckets)); for ( i = 0; i < old_size; i++ ) { - ur_pail64_t *old_bucket = &(old_buckets[i]); - uint8_t old_fill = old_bucket->fill; - uint8_t j; + ur_pail_t *old_bucket = &(old_buckets[i]); + uint8_t j, old_fill = old_bucket->fill; for ( j = 0; j < old_fill; j++ ) { - ur_nref ref = (ur_nref)old_bucket->data[j]; + ur_nref ref = old_bucket->refs[j]; ur_mug mug = ur_nref_mug(r, ref); - uint64_t idx = ( mug % next ); - ur_pail64_t *bucket = &(buckets[idx]); - uint8_t new_fill = bucket->fill; + uint64_t idx = ( mug % next ); + ur_pail_t *bucket = &(buckets[idx]); + uint8_t new_fill = bucket->fill; if ( 10 == new_fill ) { free(buckets); - return ur_dict64_grow(r, dict, size, next); + return ur_dict_grow(r, dict, size, next); } - bucket->data[new_fill] = (uint64_t)ref; + bucket->refs[new_fill] = ref; bucket->fill = 1 + new_fill; } } @@ -154,11 +152,11 @@ ur_atoms_grow(ur_atoms_t *atoms) atoms->bytes = malloc(next * ( sizeof(*atoms->bytes) + sizeof(*atoms->lens) + sizeof(*atoms->mugs) )); - assert( atoms->bytes ); - atoms->lens = (void*)((char*)atoms->bytes + (next * sizeof(*atoms->bytes))); atoms->mugs = (void*)((char*)atoms->lens + (next * sizeof(*atoms->lens))); + assert( atoms->bytes ); + if ( bytes ) { memcpy(atoms->bytes, bytes, size * (sizeof(*bytes))); memcpy(atoms->lens, lens, size * (sizeof(*lens))); @@ -184,11 +182,11 @@ ur_cells_grow(ur_cells_t *cells) cells->heads = malloc(next * ( sizeof(*cells->heads) + sizeof(*cells->heads) + sizeof(*cells->mugs) )); - assert( cells->heads ); - cells->tails = (void*)((char*)cells->heads + (next * sizeof(*cells->heads))); cells->mugs = (void*)((char*)cells->tails + (next * sizeof(*cells->tails))); + assert( cells->heads ); + if ( heads ) { memcpy(cells->heads, heads, size * (sizeof(*heads))); memcpy(cells->tails, tails, size * (sizeof(*tails))); @@ -269,19 +267,19 @@ ur_nref ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) { ur_atoms_t *atoms = &(r->atoms); - ur_dict64_t *dict = &(atoms->dict); + ur_dict_t *dict = &(atoms->dict); ur_mug mug = ur_mug_bytes(byt, len); while ( 1 ) { - uint64_t idx = ( mug % dict->size ); - ur_pail64_t *bucket = &(dict->buckets[idx]); - uint8_t i, b_fill = bucket->fill; + uint64_t idx = ( mug % dict->size ); + ur_pail_t *bucket = &(dict->buckets[idx]); + uint8_t i, b_fill = bucket->fill; ur_nref tom; for ( i = 0; i < b_fill; i++ ) { uint8_t *t_byt; uint64_t t_len; - tom = (ur_nref)bucket->data[i]; + tom = bucket->refs[i]; ur_bytes(r, tom, &t_byt, &t_len); @@ -293,7 +291,7 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) } if ( 10 == b_fill ) { - ur_dict64_grow(r, dict, dict->prev, dict->size); + ur_dict_grow(r, dict, dict->prev, dict->size); continue; } @@ -303,7 +301,7 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) tom = _coin_unsafe(atoms, mug, byt, len); - bucket->data[b_fill] = (uint64_t)tom; + bucket->refs[b_fill] = tom; bucket->fill = 1 + b_fill; return tom; @@ -327,18 +325,18 @@ ur_nref ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal) { ur_cells_t *cells = &(r->cells); - ur_dict64_t *dict = &(cells->dict); + ur_dict_t *dict = &(cells->dict); ur_mug mug = ur_mug_both(ur_nref_mug(r, hed), ur_nref_mug(r, tal)); while ( 1 ) { - uint64_t idx = ( mug % dict->size ); - ur_pail64_t *bucket = &(dict->buckets[idx]); - uint8_t i, b_fill = bucket->fill; + uint64_t idx = ( mug % dict->size ); + ur_pail_t *bucket = &(dict->buckets[idx]); + uint8_t i, b_fill = bucket->fill; ur_nref cel; for ( i = 0; i < b_fill; i++ ) { - cel = (ur_nref)bucket->data[i]; + cel = bucket->refs[i]; if ( (hed == ur_head(r, cel)) && (tal == ur_tail(r, cel)) ) @@ -348,7 +346,7 @@ ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal) } if ( 10 == b_fill ) { - ur_dict64_grow(r, dict, dict->prev, dict->size); + ur_dict_grow(r, dict, dict->prev, dict->size); continue; } @@ -358,7 +356,7 @@ ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal) cel = _cons_unsafe(cells, mug, hed, tal); - bucket->data[b_fill] = (uint64_t)cel; + bucket->refs[b_fill] = cel; bucket->fill = 1 + b_fill; return cel; @@ -372,7 +370,7 @@ ur_hcon_init(void) assert( r ); { - ur_dict64_t *dict; + ur_dict_t *dict; uint64_t fib11 = 89, fib12 = 144; // allocate atom storage @@ -384,7 +382,7 @@ ur_hcon_init(void) // allocate atom hashtable // dict = &(r->atoms.dict); - ur_dict64_grow(r, dict, fib11, fib12); + ur_dict_grow(r, dict, fib11, fib12); // allocate cell storage // @@ -395,7 +393,7 @@ ur_hcon_init(void) // allocate cell hashtable // dict = &(r->cells.dict); - ur_dict64_grow(r, dict, fib11, fib12); + ur_dict_grow(r, dict, fib11, fib12); } return r; From 072307addde39cb3a31851b5e1b8fe48616ed9cb Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 29 Jul 2020 13:52:57 -0700 Subject: [PATCH 006/123] u3: refactors global deduplicator in u3u_uniq() --- pkg/urbit/include/all.h | 1 + pkg/urbit/include/noun/hashtable.h | 5 + pkg/urbit/include/noun/urth.h | 8 + pkg/urbit/include/ur/hashcons.h | 8 + pkg/urbit/noun/hashtable.c | 9 + pkg/urbit/noun/urth.c | 364 ++++++++++++++++++++++++++ pkg/urbit/ur/hashcons.c | 11 + pkg/urbit/worker/main.c | 396 +---------------------------- 8 files changed, 407 insertions(+), 395 deletions(-) create mode 100644 pkg/urbit/include/noun/urth.h create mode 100644 pkg/urbit/noun/urth.c diff --git a/pkg/urbit/include/all.h b/pkg/urbit/include/all.h index 04ff196449..b138c6d408 100644 --- a/pkg/urbit/include/all.h +++ b/pkg/urbit/include/all.h @@ -27,6 +27,7 @@ # include "noun/serial.h" // u3s: serialization # include "noun/trace.h" // u3t: profiling / tracing # include "noun/xtract.h" // u3x: noun access (error crashes) +# include "noun/urth.h" // u3u: off-loom integration # include "noun/vortex.h" // u3v: arvo kernel # include "noun/zave.h" // u3z: memoization diff --git a/pkg/urbit/include/noun/hashtable.h b/pkg/urbit/include/noun/hashtable.h index bf9f50e176..64818de921 100644 --- a/pkg/urbit/include/noun/hashtable.h +++ b/pkg/urbit/include/noun/hashtable.h @@ -177,3 +177,8 @@ */ u3p(u3h_root) u3h_take(u3p(u3h_root) har_p); + + /* u3h_wyt(): number of entries + */ + c3_w + u3h_wyt(u3p(u3h_root) har_p); diff --git a/pkg/urbit/include/noun/urth.h b/pkg/urbit/include/noun/urth.h new file mode 100644 index 0000000000..d777debc5e --- /dev/null +++ b/pkg/urbit/include/noun/urth.h @@ -0,0 +1,8 @@ +/* include/noun/urth.h +*/ + /** Functions. + **/ + /* u3u_uniq(): hash-cons roots off-loom, reallocate on loom. + */ + void + u3u_uniq(void); diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index bfb1efbb7a..3eaa61724b 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -47,6 +47,11 @@ typedef enum { ur_icell = 2, } ur_tag; +typedef struct ur_nvec_s { + uint64_t fill; + ur_nref* refs; +} ur_nvec_t; + typedef struct ur_pail_s { uint8_t fill; ur_nref refs[10]; @@ -94,3 +99,6 @@ ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal); ur_root_t* ur_hcon_init(void); + +void +ur_nvec_init(ur_nvec_t *v, uint64_t size); diff --git a/pkg/urbit/noun/hashtable.c b/pkg/urbit/noun/hashtable.c index 1838c8945d..8ae60209ea 100644 --- a/pkg/urbit/noun/hashtable.c +++ b/pkg/urbit/noun/hashtable.c @@ -1180,3 +1180,12 @@ u3h_discount(u3p(u3h_root) har_p) return tot_w; } + +/* u3h_wyt(): number of entries +*/ +c3_w +u3h_wyt(u3p(u3h_root) har_p) +{ + u3h_root* har_u = u3to(u3h_root, har_p); + return har_u->use_w; +} diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c new file mode 100644 index 0000000000..c5a40373d8 --- /dev/null +++ b/pkg/urbit/noun/urth.c @@ -0,0 +1,364 @@ +/* noun/urth.c +** +*/ +#include +#include +#include +#include + +#include "all.h" +#include "ur/hashcons.h" + +/* _cu_met_3(): atom bytewidth a la u3r_met(3, ...) +*/ +static inline c3_w +_cu_met_3(u3a_atom* vat_u) +{ + c3_w len_w = vat_u->len_w; + c3_w* buf_w = vat_u->buf_w; + + if ( !len_w ) { + return 0; + } + else { + c3_w gal_w = len_w - 1; + c3_w daz_w = buf_w[gal_w]; + + return (gal_w << 2) + + ((daz_w >> 24) ? 4 : (daz_w >> 16) ? 3 : (daz_w >> 8) ? 2 : 1); + } +} + +// XX this is morally correct, but not useful +// for deduplicating the loom +// +#if 0 +static inline ur_nref +_cu_atom_to_ref(u3a_atom* vat_u, ur_root_t *r) +{ + ur_nref ref; + + switch ( vat_u->len_w ) { + case 2: { + ref = ur_coin64(r, ( ((c3_d)vat_u->buf_w[1]) << 32 + | ((c3_d)vat_u->buf_w[0]) )); + } break; + + case 1: { + ref = ur_coin64(r, (c3_d)vat_u->buf_w[0]); + } break; + + + default: { + c3_assert( vat_u->len_w ); + + c3_y* byt_y = (c3_y*)vat_u->buf_w; + c3_w len_w = _cu_met_3(vat_u); + + ref = ur_coin_bytes(r, byt_y, (c3_d)len_w); + } break; + } + + return ref; +} +#endif + +/* _cu_atom_to_ref(): indirect u3 atom to ur_nref. +*/ +static inline ur_nref +_cu_atom_to_ref(u3a_atom* vat_u, ur_root_t *r) +{ + c3_assert( vat_u->len_w ); + + c3_y* byt_y = (c3_y*)vat_u->buf_w; + c3_w len_w = _cu_met_3(vat_u); + + return ur_coin_bytes(r, byt_y, (c3_d)len_w); +} + +/* _cu_box_check(): check loom allocation box for relocation pointer. +*/ +static inline c3_o +_cu_box_check(u3a_noun* som_u, ur_nref* ref) +{ + u3a_box* box_u = u3a_botox(som_u); + c3_w* box_w = (void*)box_u; + + if ( 0xffffffff == box_w[0] ) { + *ref = ( ((c3_d)box_w[2]) << 32 + | ((c3_d)box_w[1]) ); + return c3y; + } + + return c3n; +} + +/* _cu_box_stash(): overwrite an allocation box with relocation pointer. +*/ +static inline void +_cu_box_stash(u3a_noun* som_u, ur_nref ref) +{ + u3a_box* box_u = u3a_botox(som_u); + c3_w* box_w = (void*)box_u; + + // overwrite u3a_atom with reallocated reference + // + box_w[0] = 0xffffffff; + box_w[1] = ref & 0xffffffff; + box_w[2] = ref >> 32; +} + +// stack frame for recording head vs tail iteration +// +// In Hoon, this structure would be as follows: +// +// $% [%root ~] +// [%head cell=^] +// [%tail cell=^ hed-mug=@] +// == +// + +#define STACK_ROOT 0 +#define STACK_HEAD 1 +#define STACK_TAIL 2 + +typedef struct _cu_frame_s +{ + c3_y tag_y; + u3a_cell* cel_u; + ur_nref ref; +} _cu_frame; + +typedef struct _cu_stack_s +{ + c3_w pre_w; + c3_w siz_w; + c3_w fil_w; + _cu_frame* fam_u; +} _cu_stack; + +/* _cu_stack_push(): push a "stack" frame. +*/ +static inline void +_cu_stack_push(_cu_stack *s, c3_y tag_y, u3a_cell* cel_u, ur_nref ref) +{ + if ( s->fil_w == s->siz_w ) { + c3_w nex_w = s->pre_w + s->siz_w; + s->fam_u = c3_realloc(s->fam_u, nex_w * sizeof(*s->fam_u)); + s->pre_w = s->siz_w; + s->siz_w = nex_w; + } + + _cu_frame* fam_u = &(s->fam_u[s->fil_w++]); + fam_u->tag_y = tag_y; + fam_u->cel_u = cel_u; + fam_u->ref = ref; +} + +/* _cu_from_loom(): reallocate [a] off loom, in [r]. +*/ +static ur_nref +_cu_from_loom(ur_root_t *r, u3_noun a) +{ + ur_nref ref; + + _cu_stack s = { .pre_w = 89, .siz_w = 144, .fil_w = 0, .fam_u = 0 }; + s.fam_u = c3_malloc((s.pre_w + s.siz_w) * sizeof(*s.fam_u)); + _cu_stack_push(&s, STACK_ROOT, 0, 0); + + advance: { + // u3 direct == ur direct + // + if ( c3y == u3a_is_cat(a) ) { + ref = (ur_nref)a; + goto retreat; + } + else { + u3a_noun* som_u = u3a_to_ptr(a); + + // all bits set == already reallocated + // + if ( c3y == _cu_box_check(som_u, &ref) ) { + goto retreat; + } + else if ( c3y == u3a_is_atom(a) ) { + ref = _cu_atom_to_ref((u3a_atom*)som_u, r); + _cu_box_stash(som_u, ref); + goto retreat; + } + else { + u3a_cell* cel_u = (u3a_cell*)som_u; + _cu_stack_push(&s, STACK_HEAD, cel_u, 0); + a = cel_u->hed; + goto advance; + } + } + } + + retreat: { + _cu_frame fam_u = s.fam_u[--s.fil_w]; + + switch ( fam_u.tag_y ) { + default: c3_assert(0); + case STACK_ROOT: break; + + case STACK_HEAD: { + _cu_stack_push(&s, STACK_TAIL, fam_u.cel_u, ref); + a = fam_u.cel_u->tel; + goto advance; + } + + case STACK_TAIL: { + u3a_cell* cel_u = fam_u.cel_u; + ref = ur_cons(r, fam_u.ref, ref); + _cu_box_stash((u3a_noun*)cel_u, ref); + goto retreat; + } + } + } + + free(s.fam_u); + + return ref; +} + +/* _cu_ref_to_noun(): lookup/allocate [ref] on the loom. +*/ +static u3_noun +_cu_ref_to_noun(ur_nref ref, u3_noun* vat, u3_noun* cel) +{ + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: { + if ( 0x7fffffffULL > ref ) { + return (u3_atom)ref; + } + else { + c3_w wor_w[2]; + + wor_w[0] = ref & 0xffffffff; + wor_w[1] = ref >> 32; + + return u3i_words(2, wor_w); + } + } break; + + case ur_iatom: return vat[ur_nref_idx(ref)]; + + case ur_icell: return cel[ur_nref_idx(ref)]; + } +} + +typedef struct _cu_vec_s { + ur_nvec_t* vec_u; + ur_root_t* rot_u; +} _cu_vec; + +/* _cu_hamt_walk(): reallocate key/value pair in hamt walk. +*/ +static void +_cu_hamt_walk(u3_noun kev, void* ptr) +{ + _cu_vec* dat_u = (_cu_vec*)ptr; + ur_nvec_t* vec_u = dat_u->vec_u; + ur_root_t* rot_u = dat_u->rot_u; + + vec_u->refs[vec_u->fill++] = _cu_from_loom(rot_u, kev); +} + +/* u3u_uniq(): hash-cons roots off-loom, reallocate on loom. +*/ +void +u3u_uniq(void) +{ + c3_assert( &(u3H->rod_u) == u3R ); + + ur_root_t *r = ur_hcon_init(); + + // allow read/write on the whole loom, bypassing page tracking + // + if ( 0 != mprotect((void *)u3_Loom, u3a_bytes, (PROT_READ | PROT_WRITE)) ) { + c3_assert(0); + } + + fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); + fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); + + ur_nref ken = _cu_from_loom(r, u3A->roc); + + fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); + fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); + + + c3_w cod_w = u3h_wyt(u3R->jed.cod_p); + ur_nvec_t v; + + fprintf(stderr, "hc: cold count %u\r\n", cod_w); + + { + _cu_vec dat_u = { .vec_u = &v, .rot_u = r }; + ur_nvec_init(&v, cod_w); + u3h_walk_with(u3R->jed.cod_p, _cu_hamt_walk, &dat_u); + } + + + fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); + fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); + + u3m_pave(c3y, c3n); + // XX wtf? + u3R->jed.hot_p = u3h_new(); + + u3_atom *vat; + u3_noun *cel; + + { + ur_atoms_t *atoms = &(r->atoms); + uint64_t *lens = atoms->lens; + uint8_t **byts = atoms->bytes; + uint64_t i, fill = atoms->fill; + + vat = calloc(fill, sizeof(u3_atom)); + + for ( i = 0; i < fill; i++ ) { + vat[i] = u3i_bytes(lens[i], byts[i]); + // XX mug? + } + } + + { + ur_cells_t *cells = &(r->cells); + ur_nref *heds = cells->heads, *tals = cells->tails; + uint64_t i, fill = cells->fill; + u3_noun hed, tal; + + cel = calloc(fill, sizeof(u3_noun)); + + for ( i = 0; i < fill; i++ ) { + hed = _cu_ref_to_noun(heds[i], vat, cel); + tal = _cu_ref_to_noun(tals[i], vat, cel); + cel[i] = u3nc(hed, tal); + // XX mug? + } + } + + u3A->roc = cel[ur_nref_idx(ken)]; + + { + uint32_t i; + ur_nref ref; + u3_noun kev; + + for ( i = 0; i < cod_w; i++) { + ref = v.refs[i]; + kev = cel[ur_nref_idx(ref)]; + u3h_put(u3R->jed.cod_p, u3h(kev), u3k(u3t(kev))); + u3z(kev); + } + } + + // mark all pages dirty + // + memset((void*)u3P.dit_w, 0xff, u3a_pages >> 3); +} diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 1017989816..0ca5809758 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -270,6 +270,10 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) ur_dict_t *dict = &(atoms->dict); ur_mug mug = ur_mug_bytes(byt, len); + // XX should check for <= 62 bits, coin direct + // XX conflicts with current u3u_uniq() use-case + // + while ( 1 ) { uint64_t idx = ( mug % dict->size ); ur_pail_t *bucket = &(dict->buckets[idx]); @@ -398,3 +402,10 @@ ur_hcon_init(void) return r; } + +void +ur_nvec_init(ur_nvec_t *v, uint64_t size) +{ + v->fill = 0; + v->refs = calloc(size, sizeof(ur_nref)); +} diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index b5872620fe..512fd908f8 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -290,400 +290,6 @@ _cw_queu(c3_i argc, c3_c* argv[]) } } - -// stack frame for recording head vs tail iteration -// -// In Hoon, this structure would be as follows: -// -// $% [%root ~] -// [%head cell=^] -// [%tail cell=^ hed-mug=@] -// == -// - -#define STACK_ROOT 0 -#define STACK_HEAD 1 -#define STACK_TAIL 2 - -typedef struct ur_temp_s -{ - uint64_t prev; - uint64_t size; - void *base; - void *top; -} ur_temp_t; - -void -ur_temp_init(ur_temp_t *t) -{ - uint64_t fib19 = 4181, fib20 = 6765; - - t->prev = fib19; - t->size = fib20; - t->base = malloc(fib20); - t->top = t->base; -} - -void* -ur_temp_push(ur_temp_t *t, size_t wide) -{ - uint64_t fill = t->top - t->base; - uint64_t grow = t->size + wide; - - if ( fill > (t->size + wide) ) { - uint64_t next = t->prev + t->size; - t->base = realloc(t->base, next); - t->top = t->base + fill; - } - - { - void* ptr = t->top; - t->top += wide; - return ptr; - } -} - -void* -ur_temp_peek(ur_temp_t *t, size_t wide) -{ - return t->top - wide; -} - -void -ur_temp_pop(ur_temp_t *t, size_t wide) -{ - t->top -= wide; - assert( t->top >= t->base ); -} - -typedef struct _ur_frame_s -{ - c3_y tag_y; - u3a_cell* cel_u; - ur_nref ref; -} _ur_frame_t; - -typedef struct _ur_stack_s -{ - uint32_t prev; - uint32_t size; - uint32_t fill; - _ur_frame_t *entries; -} _ur_stack_t; - -static inline void -_stack_push(_ur_stack_t *s, c3_y tag_y, u3a_cell* cel_u, ur_nref ref) -{ - if ( s->fill == s->size ) { - uint32_t next = s->prev + s->size; - s->entries = realloc(s->entries, next * sizeof(_ur_frame_t)); - s->prev = s->size; - s->size = next; - } - - _ur_frame_t* fam_u = &(s->entries[s->fill++]); - fam_u->tag_y = tag_y; - fam_u->cel_u = cel_u; - fam_u->ref = ref; -} - -static ur_nref -_from_loom(ur_root_t *r, u3_noun a) -{ - ur_nref ref; - - _ur_stack_t s; - s.prev = 89; - s.size = 144; - s.fill = 0; - s.entries = malloc((s.prev + s.size) * sizeof(_ur_frame_t)); - _stack_push(&s, STACK_ROOT, 0, 0); - - // ur_temp_t t; - // ur_temp_init(&t); - - // { - // _ur_frame_t *fam_u = ur_temp_push(&t, sizeof(_ur_frame_t)); - // fam_u->tag_y = STACK_ROOT; - // } - - advance: { - // u3 direct == ur direct - // - if ( c3y == u3a_is_cat(a) ) { - ref = (ur_nref)a; - goto retreat; - } - else { - u3a_noun* som_u = u3a_to_ptr(a); - u3a_box* box_u = u3a_botox(som_u); - c3_w* box_w = (void*)box_u; - - // all bits set == already reallocated - // - if ( 0xffffffff == box_w[0] ) { - ref = ( ((uint64_t)box_w[2]) << 32 - | ((uint64_t)box_w[1]) ); - goto retreat; - } - else if ( c3y == u3a_is_atom(a) ) { - u3a_atom* vat_u = (u3a_atom*)som_u; - - // coin an nref - // - switch ( vat_u->len_w ) { - case 2: { - ref = ur_coin64(r, ( ((uint64_t)vat_u->buf_w[1]) << 32 - | ((uint64_t)vat_u->buf_w[0]) )); - } break; - - case 1: { - ref = ur_coin64(r, (uint64_t)vat_u->buf_w[0]); - } break; - - - default: { - c3_assert( vat_u->len_w ); - - uint8_t *byt = (uint8_t*)vat_u->buf_w; - uint64_t len = u3r_met(3, a); - - ref = ur_coin_bytes(r, byt, len); - } break; - } - - // overwrite u3a_atom with reallocated reference - // - box_w[0] = 0xffffffff; - box_w[1] = ref & 0xffffffff; - box_w[2] = ref >> 32; - - goto retreat; - } - else { - u3a_cell* cel_u = (u3a_cell*)som_u; - _stack_push(&s, STACK_HEAD, cel_u, 0); - // { - // _ur_frame_t *fam_u = ur_temp_push(&t, sizeof(_ur_frame_t)); - // fam_u->tag_y = STACK_HEAD; - // fam_u->cel_u = cel_u; - // } - a = cel_u->hed; - goto advance; - } - } - } - - retreat: { - _ur_frame_t fam_u = s.entries[--s.fill]; - - // c3_y tag_y; - // u3a_cell* cel_u; - // ur_nref hed; - // { - // _ur_frame_t *fam_u = ur_temp_peek(&t, sizeof(_ur_frame_t)); - - // tag_y = fam_u->tag_y; - // cel_u = fam_u->cel_u; - // hed = fam_u->ref; - - // ur_temp_pop(&t, sizeof(_ur_frame_t)); - // } - - switch ( fam_u.tag_y ) { - // switch ( tag_y ) { - default: { - c3_assert(0); - } - - case STACK_ROOT: { - break; - } - - case STACK_HEAD: { - _stack_push(&s, STACK_TAIL, fam_u.cel_u, ref); - // { - // _ur_frame_t *fam_u = ur_temp_push(&t, sizeof(_ur_frame_t)); - // fam_u->tag_y = STACK_TAIL; - // fam_u->cel_u = cel_u; - // fam_u->ref = ref; - // } - - a = fam_u.cel_u->tel; - // a = cel_u->tel; - goto advance; - } - - case STACK_TAIL: { - u3a_cell* cel_u = fam_u.cel_u; - u3a_box* box_u = u3a_botox(cel_u); - c3_w* box_w = (void*)box_u; - - ref = ur_cons(r, fam_u.ref, ref); - // ref = ur_cons(r, hed, ref); - - // overwrite u3a_atom with reallocated reference - // - box_w[0] = 0xffffffff; - box_w[1] = ref & 0xffffffff; - box_w[2] = ref >> 32; - - goto retreat; - } - } - } - - free(s.entries); - // free(t.base); - - return ref; -} - -typedef struct ur_nvec_s { - void* data; - uint64_t fill; - ur_nref* refs; -} ur_nvec_t; - -void -ur_nvec_init(ur_nvec_t *v, uint64_t size, void* ptr) -{ - v->data = ptr; - v->fill = 0; - v->refs = calloc(size, sizeof(ur_nref)); -} - -// XX u3h_use() -static c3_w -_hamt_count(u3p(u3h_root) har_p) -{ - u3h_root* har_u = u3to(u3h_root, har_p); - return har_u->use_w; -} - -static void -_from_hamt(u3_noun kev, void* ptr) -{ - ur_nvec_t *v = (ur_nvec_t*)ptr; - ur_root_t *r = v->data; - - v->refs[v->fill++] = _from_loom(r, kev); -} - -static u3_noun -_ref_to_noun(ur_nref ref, u3_noun* vat, u3_noun* cel) -{ - switch ( ur_nref_tag(ref) ) { - default: assert(0); - - case ur_direct: { - if ( 0x7fffffffULL > ref ) { - return (u3_atom)ref; - } - else { - c3_w wor_w[2]; - - wor_w[0] = ref & 0xffffffff; - wor_w[1] = ref >> 32; - - return u3i_words(2, wor_w); - } - } break; - - case ur_iatom: return vat[ur_nref_idx(ref)]; - - case ur_icell: return cel[ur_nref_idx(ref)]; - } -} - -void -do_stuff(void) -{ - ur_root_t *r = ur_hcon_init(); - - // allow read/write on the whole loom, bypassing page tracking - // - if ( 0 != mprotect((void *)u3_Loom, u3a_bytes, (PROT_READ | PROT_WRITE)) ) { - c3_assert(0); - } - - fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); - fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); - - ur_nref ken = _from_loom(r, u3A->roc); - - fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); - fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); - - - c3_w cod_w = _hamt_count(u3R->jed.cod_p); - ur_nvec_t v; - - fprintf(stderr, "hc: cold count %u\r\n", cod_w); - - ur_nvec_init(&v, cod_w, r); - u3h_walk_with(u3R->jed.cod_p, _from_hamt, &v); - - fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); - fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); - - u3m_pave(c3y, c3n); - // XX wtf? - u3R->jed.hot_p = u3h_new(); - - u3_atom *vat; - u3_noun *cel; - - { - ur_atoms_t *atoms = &(r->atoms); - uint64_t *lens = atoms->lens; - uint8_t **byts = atoms->bytes; - uint64_t i, fill = atoms->fill; - - vat = calloc(fill, sizeof(u3_atom)); - - for ( i = 0; i < fill; i++ ) { - vat[i] = u3i_bytes(lens[i], byts[i]); - // XX mug? - } - } - - { - ur_cells_t *cells = &(r->cells); - ur_nref *heds = cells->heads, *tals = cells->tails; - uint64_t i, fill = cells->fill; - u3_noun hed, tal; - - cel = calloc(fill, sizeof(u3_noun)); - - for ( i = 0; i < fill; i++ ) { - hed = _ref_to_noun(heds[i], vat, cel); - tal = _ref_to_noun(tals[i], vat, cel); - cel[i] = u3nc(hed, tal); - // XX mug? - } - } - - u3A->roc = cel[ur_nref_idx(ken)]; - - { - uint32_t i; - ur_nref ref; - u3_noun kev; - - for ( i = 0; i < cod_w; i++) { - ref = v.refs[i]; - kev = cel[ur_nref_idx(ref)]; - u3h_put(u3R->jed.cod_p, u3h(kev), u3k(u3t(kev))); - u3z(kev); - } - } - - // mark all pages dirty - // - memset((void*)u3P.dit_w, 0xff, u3a_pages >> 3); -} - /* _cw_uniq(); deduplicate persistent nouns */ static void @@ -696,7 +302,7 @@ _cw_uniq(c3_i argc, c3_c* argv[]) u3_serf_grab(); - do_stuff(); + u3u_uniq(); u3_serf_grab(); From 90f2d1e4be4f7e956978e11761d5772588e08512 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 29 Jul 2020 13:56:09 -0700 Subject: [PATCH 007/123] serf: adds uniq command to urbt-worker usage string --- pkg/urbit/worker/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index 512fd908f8..b57d38fc39 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -339,13 +339,15 @@ _cw_usage(c3_i argc, c3_c* argv[]) " %s grab \n\n" " compact persistent state:\n" " %s pack \n\n" + " deduplicate persistent state:\n" + " %s uniq \n\n" " jam persistent state:\n" " %s cram \n\n" " cue persistent state:\n" " %s queu \n\n" " run as a 'serf':\n" " %s serf \n", - argv[0], argv[0], argv[0], argv[0], argv[0], argv[0]); + argv[0], argv[0], argv[0], argv[0], argv[0], argv[0], argv[0]); } /* main(): main() when run as urbit-worker From 9b2a267c1dc3253b199979e84b793f0aa0226dc4 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 29 Jul 2020 15:30:09 -0700 Subject: [PATCH 008/123] u3: refactors on-loom reallocation in u3u_uniq() --- pkg/urbit/noun/urth.c | 138 ++++++++++++++++++++++++------------------ 1 file changed, 80 insertions(+), 58 deletions(-) diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index c5a40373d8..5aa92d1ed9 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -222,10 +222,48 @@ _cu_from_loom(ur_root_t *r, u3_noun a) return ref; } +typedef struct _cu_vec_s { + ur_nvec_t* vec_u; + ur_root_t* rot_u; +} _cu_vec; + +/* _cu_hamt_walk(): reallocate key/value pair in hamt walk. +*/ +static void +_cu_hamt_walk(u3_noun kev, void* ptr) +{ + _cu_vec* dat_u = (_cu_vec*)ptr; + ur_nvec_t* vec_u = dat_u->vec_u; + ur_root_t* rot_u = dat_u->rot_u; + + vec_u->refs[vec_u->fill++] = _cu_from_loom(rot_u, kev); +} + +typedef struct _cu_loom_s { + u3_atom *vat; + u3_noun *cel; +} _cu_loom; + +/* _cu_atoms_to_loom(): allocate all indirect atoms on the loom. +*/ +static void +_cu_atoms_to_loom(ur_root_t* rot_u, _cu_loom* lom_u) +{ + c3_d* len_d = rot_u->atoms.lens; + c3_y** byt_y = rot_u->atoms.bytes; + c3_d fil_d = rot_u->atoms.fill; + u3_atom* vat = lom_u->vat = calloc(fil_d, sizeof(u3_atom)); + c3_d i_d; + + for ( i_d = 0; i_d < fil_d; i_d++ ) { + vat[i_d] = u3i_bytes(len_d[i_d], byt_y[i_d]); + } +} + /* _cu_ref_to_noun(): lookup/allocate [ref] on the loom. */ static u3_noun -_cu_ref_to_noun(ur_nref ref, u3_noun* vat, u3_noun* cel) +_cu_ref_to_noun(ur_nref ref, _cu_loom* lom_u) { switch ( ur_nref_tag(ref) ) { default: assert(0); @@ -244,27 +282,28 @@ _cu_ref_to_noun(ur_nref ref, u3_noun* vat, u3_noun* cel) } } break; - case ur_iatom: return vat[ur_nref_idx(ref)]; + case ur_iatom: return lom_u->vat[ur_nref_idx(ref)]; - case ur_icell: return cel[ur_nref_idx(ref)]; + case ur_icell: return lom_u->cel[ur_nref_idx(ref)]; } } -typedef struct _cu_vec_s { - ur_nvec_t* vec_u; - ur_root_t* rot_u; -} _cu_vec; - -/* _cu_hamt_walk(): reallocate key/value pair in hamt walk. +/* _cu_cells_to_loom(): allocate all cells on the loom. */ static void -_cu_hamt_walk(u3_noun kev, void* ptr) +_cu_cells_to_loom(ur_root_t* rot_u, _cu_loom* lom_u) { - _cu_vec* dat_u = (_cu_vec*)ptr; - ur_nvec_t* vec_u = dat_u->vec_u; - ur_root_t* rot_u = dat_u->rot_u; + ur_nref* hed = rot_u->cells.heads; + ur_nref* tal = rot_u->cells.tails; + c3_d fil_d = rot_u->cells.fill; + u3_noun* cel = lom_u->cel = calloc(fil_d, sizeof(u3_noun)); + c3_d i_d; - vec_u->refs[vec_u->fill++] = _cu_from_loom(rot_u, kev); + for ( i_d = 0; i_d < fil_d; i_d++ ) { + cel[i_d] = u3nc(_cu_ref_to_noun(hed[i_d], lom_u), + _cu_ref_to_noun(tal[i_d], lom_u)); + // XX mug? + } } /* u3u_uniq(): hash-cons roots off-loom, reallocate on loom. @@ -278,6 +317,8 @@ u3u_uniq(void) // allow read/write on the whole loom, bypassing page tracking // + // NB: u3e_save() will reinstate protection flags + // if ( 0 != mprotect((void *)u3_Loom, u3a_bytes, (PROT_READ | PROT_WRITE)) ) { c3_assert(0); } @@ -302,61 +343,42 @@ u3u_uniq(void) u3h_walk_with(u3R->jed.cod_p, _cu_hamt_walk, &dat_u); } - fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); + // NB: hot jet state is not yet re-established + // u3m_pave(c3y, c3n); - // XX wtf? - u3R->jed.hot_p = u3h_new(); - - u3_atom *vat; - u3_noun *cel; { - ur_atoms_t *atoms = &(r->atoms); - uint64_t *lens = atoms->lens; - uint8_t **byts = atoms->bytes; - uint64_t i, fill = atoms->fill; + _cu_loom lom_u; + _cu_atoms_to_loom(r, &lom_u); + _cu_cells_to_loom(r, &lom_u); - vat = calloc(fill, sizeof(u3_atom)); + // restore kernel reference (always a cell) + // + u3A->roc = lom_u.cel[ur_nref_idx(ken)]; - for ( i = 0; i < fill; i++ ) { - vat[i] = u3i_bytes(lens[i], byts[i]); - // XX mug? + // restore cold jet state (always cells) + // + { + c3_w i_w; + ur_nref ref; + u3_noun kev; + + for ( i_w = 0; i_w < cod_w; i_w++) { + ref = v.refs[i_w]; + kev = lom_u.cel[ur_nref_idx(ref)]; + u3h_put(u3R->jed.cod_p, u3h(kev), u3k(u3t(kev))); + u3z(kev); + } } } - { - ur_cells_t *cells = &(r->cells); - ur_nref *heds = cells->heads, *tals = cells->tails; - uint64_t i, fill = cells->fill; - u3_noun hed, tal; - - cel = calloc(fill, sizeof(u3_noun)); - - for ( i = 0; i < fill; i++ ) { - hed = _cu_ref_to_noun(heds[i], vat, cel); - tal = _cu_ref_to_noun(tals[i], vat, cel); - cel[i] = u3nc(hed, tal); - // XX mug? - } - } - - u3A->roc = cel[ur_nref_idx(ken)]; - - { - uint32_t i; - ur_nref ref; - u3_noun kev; - - for ( i = 0; i < cod_w; i++) { - ref = v.refs[i]; - kev = cel[ur_nref_idx(ref)]; - u3h_put(u3R->jed.cod_p, u3h(kev), u3k(u3t(kev))); - u3z(kev); - } - } + // allocate new hot jet state; re-establish warm + // + u3j_boot(c3y); + u3j_ream(); // mark all pages dirty // From 87b10abee4d9ba27278f87c669e5c883a2fac232 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 30 Jul 2020 14:42:16 -0700 Subject: [PATCH 009/123] ur: adds ur_hcon_info() to print memory measurements --- pkg/urbit/include/ur/hashcons.h | 3 + pkg/urbit/ur/hashcons.c | 101 ++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 3eaa61724b..8f7f451918 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -97,6 +97,9 @@ ur_coin64(ur_root_t *r, uint64_t n); ur_nref ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal); +void +ur_hcon_info(FILE *f, ur_root_t *r); + ur_root_t* ur_hcon_init(void); diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 0ca5809758..43cbadc995 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -367,6 +367,107 @@ ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal) } } +static void +_print_memory(FILE *f, const char *c, uint64_t bytes) +{ + if ( !bytes ) { + fprintf(f, "%s: B/0\r\n", c); + } + else { + uint32_t g = (bytes / 1000000000); + uint32_t m = (bytes % 1000000000) / 1000000; + uint32_t k = (bytes % 1000000) / 1000; + uint32_t b = (bytes % 1000); + + if ( g ) { + fprintf(f, "%s: GB/%d.%03d.%03d.%03d\r\n", c, g, m, k, b); + } + else if ( m ) { + fprintf(f, "%s: MB/%d.%03d.%03d\r\n", c, m, k, b); + } + else if ( k ) { + fprintf(f, "%s: KB/%d.%03d\r\n", c, k, b); + } + else if ( b ) { + fprintf(f, "%s: B/%d\r\n", c, b); + } + } +} + +static uint64_t +_dict_info(FILE *f, ur_dict_t *dict) +{ + uint64_t data = dict->size * sizeof(*dict->buckets); + _print_memory(f, " dict", data); + return data; +} + +static uint64_t +_atoms_info(FILE *f, ur_atoms_t *atoms) +{ + uint64_t total = 0; + uint64_t size = atoms->size; + uint64_t fill = atoms->fill; + uint64_t refs = size * ( sizeof(*atoms->bytes) + + sizeof(*atoms->lens) + + sizeof(*atoms->mugs) ); + uint64_t i, data = 0; + + fprintf(f, " atoms (%" PRIu64 "):\r\n", fill); + + _print_memory(f, " refs", refs); + total += refs; + + for ( i = 0; i < fill; i++ ) { + data += atoms->lens[i]; + } + _print_memory(f, " data", data); + total += data; + + total += _dict_info(f, &(atoms->dict)); + _print_memory(f, " total", total); + return total; +} + +static uint64_t +_cells_info(FILE *f, ur_cells_t *cells) +{ + uint64_t total = 0; + uint64_t size = cells->size; + uint64_t fill = cells->fill; + uint64_t refs = size * ( sizeof(*cells->heads) + + sizeof(*cells->heads) + + sizeof(*cells->mugs) ); + + fprintf(f, " cells (%" PRIu64 "):\r\n", fill); + + _print_memory(f, " refs", refs); + total += refs; + + total += _dict_info(f, &(cells->dict)); + _print_memory(f, " total", total); + return total; +} + +void +ur_hcon_info(FILE *f, ur_root_t *r) +{ + uint64_t total = 0; + + fprintf(stderr, "hash-cons arena:\r\n"); + + { + uint64_t root = sizeof(*r); + _print_memory(f, " root", root); + total += root; + } + + total += _atoms_info(f, &(r->atoms)); + total += _cells_info(f, &(r->cells)); + + _print_memory(f, "total", total); +} + ur_root_t* ur_hcon_init(void) { From d7f6e79409368ee70d36e4251476f402865b4c31 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 30 Jul 2020 14:42:34 -0700 Subject: [PATCH 010/123] u3: refactors u3u_uniq, prints memory measurements --- pkg/urbit/noun/urth.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 5aa92d1ed9..1c335e348b 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -323,34 +323,34 @@ u3u_uniq(void) c3_assert(0); } - fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); - fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); - + // reallocate kernel + // ur_nref ken = _cu_from_loom(r, u3A->roc); - fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); - fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); - - - c3_w cod_w = u3h_wyt(u3R->jed.cod_p); - ur_nvec_t v; - - fprintf(stderr, "hc: cold count %u\r\n", cod_w); - + // reallocate cold jet state + // + ur_nvec_t cod_u; { - _cu_vec dat_u = { .vec_u = &v, .rot_u = r }; - ur_nvec_init(&v, cod_w); + c3_w cod_w = u3h_wyt(u3R->jed.cod_p); + _cu_vec dat_u = { .vec_u = &cod_u, .rot_u = r }; + ur_nvec_init(&cod_u, cod_w); u3h_walk_with(u3R->jed.cod_p, _cu_hamt_walk, &dat_u); } - fprintf(stderr, "hc: cells fill %" PRIu64 " size %" PRIu64 "\r\n", r->cells.fill, r->cells.size); - fprintf(stderr, "hc: atoms fill %" PRIu64 " size %" PRIu64 "\r\n", r->atoms.fill, r->atoms.size); + // print [rot_u] measurements + // + ur_hcon_info(stderr, r); + fprintf(stderr, "\r\n"); - // NB: hot jet state is not yet re-established + // reinitialize looom + // + // NB: hot jet state is not yet re-established // u3m_pave(c3y, c3n); { + // reallocate all nouns on the loom + // _cu_loom lom_u; _cu_atoms_to_loom(r, &lom_u); _cu_cells_to_loom(r, &lom_u); @@ -362,12 +362,13 @@ u3u_uniq(void) // restore cold jet state (always cells) // { - c3_w i_w; + c3_d max_d = cod_u.fill; + c3_d i_d; ur_nref ref; u3_noun kev; - for ( i_w = 0; i_w < cod_w; i_w++) { - ref = v.refs[i_w]; + for ( i_d = 0; i_d < max_d; i_d++) { + ref = cod_u.refs[i_d]; kev = lom_u.cel[ur_nref_idx(ref)]; u3h_put(u3R->jed.cod_p, u3h(kev), u3k(u3t(kev))); u3z(kev); From 52c86946d7c82df9ed6e898b499fefbed3d738a5 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 30 Jul 2020 14:51:39 -0700 Subject: [PATCH 011/123] u3: moves event-number tracking inside u3u_uniq() --- pkg/urbit/noun/urth.c | 24 +++++++++++++++--------- pkg/urbit/worker/main.c | 5 ++--- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 1c335e348b..adac66eef4 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -313,8 +313,6 @@ u3u_uniq(void) { c3_assert( &(u3H->rod_u) == u3R ); - ur_root_t *r = ur_hcon_init(); - // allow read/write on the whole loom, bypassing page tracking // // NB: u3e_save() will reinstate protection flags @@ -323,23 +321,27 @@ u3u_uniq(void) c3_assert(0); } - // reallocate kernel + // stash event number // - ur_nref ken = _cu_from_loom(r, u3A->roc); + c3_d eve_d = u3A->ent_d; - // reallocate cold jet state + + // reallocate kernel and cold jet state // + ur_root_t* rot_u = ur_hcon_init(); + ur_nref ken = _cu_from_loom(rot_u, u3A->roc); + ur_nvec_t cod_u; { c3_w cod_w = u3h_wyt(u3R->jed.cod_p); - _cu_vec dat_u = { .vec_u = &cod_u, .rot_u = r }; + _cu_vec dat_u = { .vec_u = &cod_u, .rot_u = rot_u }; ur_nvec_init(&cod_u, cod_w); u3h_walk_with(u3R->jed.cod_p, _cu_hamt_walk, &dat_u); } // print [rot_u] measurements // - ur_hcon_info(stderr, r); + ur_hcon_info(stderr, rot_u); fprintf(stderr, "\r\n"); // reinitialize looom @@ -352,8 +354,8 @@ u3u_uniq(void) // reallocate all nouns on the loom // _cu_loom lom_u; - _cu_atoms_to_loom(r, &lom_u); - _cu_cells_to_loom(r, &lom_u); + _cu_atoms_to_loom(rot_u, &lom_u); + _cu_cells_to_loom(rot_u, &lom_u); // restore kernel reference (always a cell) // @@ -381,6 +383,10 @@ u3u_uniq(void) u3j_boot(c3y); u3j_ream(); + // restore event number + // + u3A->ent_d = eve_d; + // mark all pages dirty // memset((void*)u3P.dit_w, 0xff, u3a_pages >> 3); diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index b57d38fc39..76eba3004e 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -298,7 +298,8 @@ _cw_uniq(c3_i argc, c3_c* argv[]) c3_assert( 3 <= argc ); c3_c* dir_c = argv[2]; - c3_d eve_d = u3m_boot(dir_c); + + u3m_boot(dir_c); u3_serf_grab(); @@ -306,8 +307,6 @@ _cw_uniq(c3_i argc, c3_c* argv[]) u3_serf_grab(); - u3A->ent_d = eve_d; - u3e_save(); } From cd7f0267a983939db26c99c1b7548399ffce6c77 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 30 Jul 2020 15:19:26 -0700 Subject: [PATCH 012/123] ur: add *_free functions for all dynamic allocations --- pkg/urbit/include/ur/hashcons.h | 6 +++++ pkg/urbit/ur/hashcons.c | 41 +++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 8f7f451918..adf889a469 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -100,8 +100,14 @@ ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal); void ur_hcon_info(FILE *f, ur_root_t *r); +void +ur_hcon_free(ur_root_t *r); + ur_root_t* ur_hcon_init(void); +void +ur_nvec_free(ur_nvec_t *v); + void ur_nvec_init(ur_nvec_t *v, uint64_t size); diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 43cbadc995..74807a17ad 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -468,6 +468,41 @@ ur_hcon_info(FILE *f, ur_root_t *r) _print_memory(f, "total", total); } +static void +_dict_free(ur_dict_t *dict) +{ + free(dict->buckets); +} + +static void +_atoms_free(ur_atoms_t *atoms) +{ + uint8_t **bytes = atoms->bytes; + uint64_t i, fill = atoms->fill; + + for ( i = 0; i < fill; i++ ) { + free(bytes[i]); + } + + _dict_free(&(atoms->dict)); + free(bytes); +} + +static void +_cells_free(ur_cells_t *cells) +{ + _dict_free(&(cells->dict)); + free(cells->heads); +} + +void +ur_hcon_free(ur_root_t *r) +{ + _atoms_free(&(r->atoms)); + _cells_free(&(r->cells)); + free(r); +} + ur_root_t* ur_hcon_init(void) { @@ -504,6 +539,12 @@ ur_hcon_init(void) return r; } +void +ur_nvec_free(ur_nvec_t *v) +{ + free(v->refs); +} + void ur_nvec_init(ur_nvec_t *v, uint64_t size) { From 993f4b83f4df7a65f76ce03df93871fbf59f659c Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 30 Jul 2020 15:20:42 -0700 Subject: [PATCH 013/123] u3: free all dynamic allocations in u3u_uniq() --- pkg/urbit/noun/urth.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index adac66eef4..fbfe5e6ef0 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -244,6 +244,15 @@ typedef struct _cu_loom_s { u3_noun *cel; } _cu_loom; +/* _cu_loom_free(): dispose loom relocation pointers +*/ +static void +_cu_loom_free(_cu_loom* lom_u) +{ + free(lom_u->vat); + free(lom_u->cel); +} + /* _cu_atoms_to_loom(): allocate all indirect atoms on the loom. */ static void @@ -376,8 +385,15 @@ u3u_uniq(void) u3z(kev); } } + + _cu_loom_free(&lom_u); } + // dispose off-loom structures + // + ur_nvec_free(&cod_u); + ur_hcon_free(rot_u); + // allocate new hot jet state; re-establish warm // u3j_boot(c3y); From 08d22b14d3f2742775b519bc4d278caea740ff1b Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 4 Aug 2020 13:52:19 -0700 Subject: [PATCH 014/123] u3: fixes bug in snapshot image truncation --- pkg/urbit/noun/events.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/urbit/noun/events.c b/pkg/urbit/noun/events.c index b010fd13e0..d12dc81ffe 100644 --- a/pkg/urbit/noun/events.c +++ b/pkg/urbit/noun/events.c @@ -621,7 +621,7 @@ _ce_patch_apply(u3_ce_patch* pat_u) if ( u3P.nor_u.pgs_w > pat_u->con_u->nor_w ) { c3_w ret_w; - ret_w = ftruncate(u3P.nor_u.fid_i, u3P.nor_u.pgs_w << (u3a_page + 2)); + ret_w = ftruncate(u3P.nor_u.fid_i, pat_u->con_u->nor_w << (u3a_page + 2)); if (ret_w){ fprintf(stderr, "loom: patch apply truncate north: %s\r\n", strerror(errno)); c3_assert(0); @@ -631,7 +631,7 @@ _ce_patch_apply(u3_ce_patch* pat_u) if ( u3P.sou_u.pgs_w > pat_u->con_u->sou_w ) { c3_w ret_w; - ret_w = ftruncate(u3P.sou_u.fid_i, u3P.sou_u.pgs_w << (u3a_page + 2)); + ret_w = ftruncate(u3P.sou_u.fid_i, pat_u->con_u->sou_w << (u3a_page + 2)); if (ret_w){ fprintf(stderr, "loom: patch apply truncate south: %s\r\n", strerror(errno)); c3_assert(0); From 99d205674914f60ac6d4217138d2fe400ceeecef Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 4 Aug 2020 13:56:33 -0700 Subject: [PATCH 015/123] u3: refactors snapshot patch application --- pkg/urbit/noun/events.c | 55 ++++++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/pkg/urbit/noun/events.c b/pkg/urbit/noun/events.c index d12dc81ffe..289c30d399 100644 --- a/pkg/urbit/noun/events.c +++ b/pkg/urbit/noun/events.c @@ -609,44 +609,47 @@ _ce_image_sync(u3e_image* img_u) c3_sync(img_u->fid_i); } -/* _ce_patch_apply(): apply patch to image. +/* _ce_image_resize(): resize image, truncating if it shrunk. +*/ +static void +_ce_image_resize(u3e_image* img_u, c3_w pgs_w) +{ + if ( img_u->pgs_w > pgs_w ) { + if ( ftruncate(img_u->fid_i, pgs_w << (u3a_page + 2)) ) { + fprintf(stderr, "loom: image truncate %s: %s\r\n", + img_u->nam_c, + strerror(errno)); + c3_assert(0); + } + } + + img_u->pgs_w = pgs_w; +} + +/* _ce_patch_apply(): apply patch to images. */ static void _ce_patch_apply(u3_ce_patch* pat_u) { c3_w i_w; - //u3l_log("image: nor_w %d, new %d\r\n", u3P.nor_u.pgs_w, pat_u->con_u->nor_w); - //u3l_log("image: sou_w %d, new %d\r\n", u3P.sou_u.pgs_w, pat_u->con_u->sou_w); + // resize images + // + _ce_image_resize(&u3P.nor_u, pat_u->con_u->nor_w); + _ce_image_resize(&u3P.sou_u, pat_u->con_u->sou_w); - if ( u3P.nor_u.pgs_w > pat_u->con_u->nor_w ) { - c3_w ret_w; - ret_w = ftruncate(u3P.nor_u.fid_i, pat_u->con_u->nor_w << (u3a_page + 2)); - if (ret_w){ - fprintf(stderr, "loom: patch apply truncate north: %s\r\n", strerror(errno)); - c3_assert(0); - } - } - u3P.nor_u.pgs_w = pat_u->con_u->nor_w; - - if ( u3P.sou_u.pgs_w > pat_u->con_u->sou_w ) { - c3_w ret_w; - ret_w = ftruncate(u3P.sou_u.fid_i, pat_u->con_u->sou_w << (u3a_page + 2)); - if (ret_w){ - fprintf(stderr, "loom: patch apply truncate south: %s\r\n", strerror(errno)); - c3_assert(0); - } - } - u3P.sou_u.pgs_w = pat_u->con_u->sou_w; - - if ( (-1 == lseek(pat_u->mem_i, 0, SEEK_SET)) || - (-1 == lseek(u3P.nor_u.fid_i, 0, SEEK_SET)) || - (-1 == lseek(u3P.sou_u.fid_i, 0, SEEK_SET)) ) + // seek to begining of patch and images + // + if ( (-1 == lseek(pat_u->mem_i, 0, SEEK_SET)) + || (-1 == lseek(u3P.nor_u.fid_i, 0, SEEK_SET)) + || (-1 == lseek(u3P.sou_u.fid_i, 0, SEEK_SET)) ) { fprintf(stderr, "loom: patch apply seek 0: %s\r\n", strerror(errno)); c3_assert(0); } + // write patch pages into the appropriate image + // for ( i_w = 0; i_w < pat_u->con_u->pgs_w; i_w++ ) { c3_w pag_w = pat_u->con_u->mem_u[i_w].pag_w; c3_w mem_w[1 << u3a_page]; From c1398c1b4ebc2392fa27291fcc667f16667a4e9a Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 10 Aug 2020 12:44:52 -0700 Subject: [PATCH 016/123] ur: adds atom bloq measurement in ur_met() --- pkg/urbit/include/ur/hashcons.h | 7 ++++ pkg/urbit/noun/urth.c | 2 +- pkg/urbit/ur/hashcons.c | 74 +++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index adf889a469..7511f941d7 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -20,14 +20,21 @@ typedef uint8_t ur_bool_t; # error "port me" #endif +#define ur_lz8(a) ( ur_lz32(a) - 24 ) + #define ur_mask_3(a) (a & 0x7) #define ur_mask_8(a) (a & 0xff) #define ur_mask_31(a) (a & 0x7fffffff) #define ur_mask_62(a) (a & 0x3fffffffffffffffULL) +#define ur_met0_8(a) ( (a) ? 8 - ur_lz8(a) : 0 ) #define ur_met0_32(a) ( (a) ? 32 - ur_lz32(a) : 0 ) #define ur_met0_64(a) ( (a) ? 64 - ur_lz64(a) : 0 ) +#define ur_met3_8(a) \ + ({ uint8_t _a = ur_met0_8(a); \ + ( (_a >> 3) + !!ur_mask_3(_a) ); }) \ + #define ur_met3_32(a) \ ({ uint8_t _a = ur_met0_32(a); \ ( (_a >> 3) + !!ur_mask_3(_a) ); }) \ diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index fbfe5e6ef0..facd88ba19 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -353,7 +353,7 @@ u3u_uniq(void) ur_hcon_info(stderr, rot_u); fprintf(stderr, "\r\n"); - // reinitialize looom + // reinitialize loom // // NB: hot jet state is not yet re-established // diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 74807a17ad..142d5a8b7a 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -221,6 +221,80 @@ ur_bytes(ur_root_t *r, ur_nref ref, uint8_t **byt, uint64_t *len) } } +static inline uint64_t +_met0_bytes(uint8_t *byt, uint64_t len) +{ + // XX requires no trailing null bytes + // + uint64_t last = len - 1; + return (last << 3) + ur_met0_8(byt[last]); +} + +uint64_t +ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref) +{ + assert( !ur_deep(ref) ); + + // these cases are the same, except for the + // bit-width calculation and the width of their operands + // + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: { + uint8_t m_bit = ur_met0_64(ref); + + switch ( bloq ) { + case 0: return m_bit; + case 1: return (m_bit + 1) >> 1; + case 2: return (m_bit + 3) >> 2; + + { + // hand-inline of ur_met3_64 + // + uint8_t m_byt = (m_bit >> 3) + !!ur_mask_3(m_bit); + + case 3: return m_byt; + default: { + uint8_t off = (bloq - 3); + return (m_byt + ((1 << off) - 1)) >> off; + } + } + } + } break; + + case ur_iatom: { + uint64_t m_bit; + + { + uint64_t idx = ur_nref_idx(ref); + uint64_t len = r->atoms.lens[idx]; + uint8_t *byt = r->atoms.bytes[idx]; + + m_bit = _met0_bytes(byt, len); + } + + switch ( bloq ) { + case 0: return m_bit; + case 1: return (m_bit + 1) >> 1; + case 2: return (m_bit + 3) >> 2; + + { + // hand-inline of ur_met3_64 + // + uint64_t m_byt = (m_bit >> 3) + !!ur_mask_3(m_bit); + + case 3: return m_byt; + default: { + uint8_t off = (bloq - 3); + return (m_byt + ((1ULL << off) - 1)) >> off; + } + } + } + } break; + } +} + static ur_nref _coin_unsafe(ur_atoms_t *atoms, ur_mug mug, uint8_t *byt, uint64_t len) { From ab217fc8746a43f474e36837ebc3e78ee47bdb28 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 10 Aug 2020 12:54:55 -0700 Subject: [PATCH 017/123] ur: adds dictionaries with noun keys and arbitrary 32/64-bit values --- pkg/urbit/include/ur/hashcons.h | 45 ++++++++ pkg/urbit/ur/hashcons.c | 191 ++++++++++++++++++++++++++++++-- 2 files changed, 228 insertions(+), 8 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 7511f941d7..a6b83f388c 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -59,6 +59,30 @@ typedef struct ur_nvec_s { ur_nref* refs; } ur_nvec_t; +typedef struct ur_pail32_s { + uint8_t fill; + ur_nref refs[10]; + uint32_t vals[10]; +} ur_pail32_t; + +typedef struct ur_dict32_s { + uint64_t prev; + uint64_t size; + ur_pail32_t *buckets; +} ur_dict32_t; + +typedef struct ur_pail64_s { + uint8_t fill; + ur_nref refs[10]; + uint64_t vals[10]; +} ur_pail64_t; + +typedef struct ur_dict64_s { + uint64_t prev; + uint64_t size; + ur_pail64_t *buckets; +} ur_dict64_t; + typedef struct ur_pail_s { uint8_t fill; ur_nref refs[10]; @@ -95,6 +119,27 @@ typedef struct ur_root_s { ur_atoms_t atoms; } ur_root_t; +void +ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size); + +ur_bool_t +ur_dict32_get(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t *out); + +void +ur_dict32_put(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t val); + +void +ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size); + +void +ur_dict_free(ur_dict_t *dict); + +ur_bool_t +ur_dict64_get(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t *out); + +void +ur_dict64_put(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t val); + ur_nref ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len); diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 142d5a8b7a..b4489b58e5 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -100,6 +100,181 @@ ur_tail(ur_root_t *r, ur_nref ref) assert( ur_deep(ref) ); return r->cells.tails[ur_nref_idx(ref)]; } +void +ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size) +{ + ur_pail32_t *buckets, *old_buckets = dict->buckets; + uint64_t old_size = dict->size; + uint64_t i, next = prev + size; + + buckets = calloc(next, sizeof(*buckets)); + + for ( i = 0; i < old_size; i++ ) { + ur_pail32_t *old_bucket = &(old_buckets[i]); + uint8_t j, old_fill = old_bucket->fill; + + for ( j = 0; j < old_fill; j++ ) { + uint32_t val = old_bucket->vals[j]; + ur_nref ref = old_bucket->refs[j]; + ur_mug mug = ur_nref_mug(r, ref); + + uint64_t idx = ( mug % next ); + ur_pail32_t *bucket = &(buckets[idx]); + uint8_t new_fill = bucket->fill; + + if ( 10 == new_fill ) { + free(buckets); + return ur_dict32_grow(r, dict, size, next); + } + + bucket->refs[new_fill] = ref; + bucket->vals[new_fill] = val; + bucket->fill = 1 + new_fill; + } + } + + free(old_buckets); + + dict->prev = size; + dict->size = next; + dict->buckets = buckets; +} + +ur_bool_t +ur_dict32_get(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t *out) +{ + ur_mug mug = ur_nref_mug(r, ref); + uint64_t idx = ( mug % dict->size ); + + ur_pail32_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = bucket->fill; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + *out = bucket->vals[i]; + return 1; + } + } + + return 0; +} + +void +ur_dict32_put(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t val) +{ + ur_mug mug = ur_nref_mug(r, ref); + + while ( 1 ) { + uint64_t idx = ( mug % dict->size ); + ur_pail32_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = bucket->fill; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + bucket->vals[i] = val; + return; + } + } + + if ( 10 == fill ) { + ur_dict32_grow(r, dict, dict->prev, dict->size); + continue; + } + + bucket->refs[fill] = ref; + bucket->vals[fill] = val; + bucket->fill = 1 + fill; + break; + } +} + +void +ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size) +{ + ur_pail64_t *buckets, *old_buckets = dict->buckets; + uint64_t old_size = dict->size; + uint64_t i, next = prev + size; + + buckets = calloc(next, sizeof(*buckets)); + + for ( i = 0; i < old_size; i++ ) { + ur_pail64_t *old_bucket = &(old_buckets[i]); + uint8_t j, old_fill = old_bucket->fill; + + for ( j = 0; j < old_fill; j++ ) { + uint64_t val = old_bucket->vals[j]; + ur_nref ref = old_bucket->refs[j]; + ur_mug mug = ur_nref_mug(r, ref); + + uint64_t idx = ( mug % next ); + ur_pail64_t *bucket = &(buckets[idx]); + uint8_t new_fill = bucket->fill; + + if ( 10 == new_fill ) { + free(buckets); + return ur_dict64_grow(r, dict, size, next); + } + + bucket->refs[new_fill] = ref; + bucket->vals[new_fill] = val; + bucket->fill = 1 + new_fill; + } + } + + free(old_buckets); + + dict->prev = size; + dict->size = next; + dict->buckets = buckets; +} + +ur_bool_t +ur_dict64_get(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t *out) +{ + ur_mug mug = ur_nref_mug(r, ref); + uint64_t idx = ( mug % dict->size ); + + ur_pail64_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = bucket->fill; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + *out = bucket->vals[i]; + return 1; + } + } + + return 0; +} + +void +ur_dict64_put(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t val) +{ + ur_mug mug = ur_nref_mug(r, ref); + + while ( 1 ) { + uint64_t idx = ( mug % dict->size ); + ur_pail64_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = bucket->fill; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + bucket->vals[i] = val; + return; + } + } + + if ( 10 == fill ) { + ur_dict64_grow(r, dict, dict->prev, dict->size); + continue; + } + + bucket->refs[fill] = ref; + bucket->vals[fill] = val; + bucket->fill = 1 + fill; + break; + } +} void ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size) @@ -139,6 +314,12 @@ ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size) dict->buckets = buckets; } +void +ur_dict_free(ur_dict_t *dict) +{ + free(dict->buckets); +} + void ur_atoms_grow(ur_atoms_t *atoms) { @@ -542,12 +723,6 @@ ur_hcon_info(FILE *f, ur_root_t *r) _print_memory(f, "total", total); } -static void -_dict_free(ur_dict_t *dict) -{ - free(dict->buckets); -} - static void _atoms_free(ur_atoms_t *atoms) { @@ -558,14 +733,14 @@ _atoms_free(ur_atoms_t *atoms) free(bytes[i]); } - _dict_free(&(atoms->dict)); + ur_dict_free(&(atoms->dict)); free(bytes); } static void _cells_free(ur_cells_t *cells) { - _dict_free(&(cells->dict)); + ur_dict_free(&(cells->dict)); free(cells->heads); } From d0e5c658a9a5e14eb40d49a1f707257dcfd05fb0 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 10 Aug 2020 12:45:27 -0700 Subject: [PATCH 018/123] ur/u3: enforce ur_coin* invariants, deduplicate 32-61-bit direct atoms --- pkg/urbit/noun/urth.c | 52 ++++++++++++++++++++--------------------- pkg/urbit/ur/hashcons.c | 35 +++++++++++++++++++++------ 2 files changed, 54 insertions(+), 33 deletions(-) diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index facd88ba19..674b734c44 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -29,10 +29,6 @@ _cu_met_3(u3a_atom* vat_u) } } -// XX this is morally correct, but not useful -// for deduplicating the loom -// -#if 0 static inline ur_nref _cu_atom_to_ref(u3a_atom* vat_u, ur_root_t *r) { @@ -61,20 +57,6 @@ _cu_atom_to_ref(u3a_atom* vat_u, ur_root_t *r) return ref; } -#endif - -/* _cu_atom_to_ref(): indirect u3 atom to ur_nref. -*/ -static inline ur_nref -_cu_atom_to_ref(u3a_atom* vat_u, ur_root_t *r) -{ - c3_assert( vat_u->len_w ); - - c3_y* byt_y = (c3_y*)vat_u->buf_w; - c3_w len_w = _cu_met_3(vat_u); - - return ur_coin_bytes(r, byt_y, (c3_d)len_w); -} /* _cu_box_check(): check loom allocation box for relocation pointer. */ @@ -240,8 +222,9 @@ _cu_hamt_walk(u3_noun kev, void* ptr) } typedef struct _cu_loom_s { - u3_atom *vat; - u3_noun *cel; + ur_dict32_t map_u; // direct->indirect mapping + u3_atom *vat; // indirect atoms + u3_noun *cel; // cells } _cu_loom; /* _cu_loom_free(): dispose loom relocation pointers @@ -249,6 +232,7 @@ typedef struct _cu_loom_s { static void _cu_loom_free(_cu_loom* lom_u) { + ur_dict_free((ur_dict_t*)&(lom_u->map_u)); free(lom_u->vat); free(lom_u->cel); } @@ -278,16 +262,29 @@ _cu_ref_to_noun(ur_nref ref, _cu_loom* lom_u) default: assert(0); case ur_direct: { - if ( 0x7fffffffULL > ref ) { + if ( 0x7fffffffULL >= ref ) { return (u3_atom)ref; } else { - c3_w wor_w[2]; + c3_w val_w; - wor_w[0] = ref & 0xffffffff; - wor_w[1] = ref >> 32; + // XX the ur_root_t argument here is only used to dereference a mug, + // but these atoms are all direct, so it'll never be used + // + if ( ur_dict32_get(0, &lom_u->map_u, ref, &val_w) ) { + return (u3_atom)val_w; + } + else { + u3_atom vat; + { + c3_w wor_w[2] = { ref & 0xffffffff, ref >> 32 }; + vat = val_w = u3i_words(2, wor_w); + } - return u3i_words(2, wor_w); + ur_dict32_put(0, &lom_u->map_u, ref, val_w); + + return vat; + } } } break; @@ -362,7 +359,10 @@ u3u_uniq(void) { // reallocate all nouns on the loom // - _cu_loom lom_u; + _cu_loom lom_u = {0}; + + ur_dict32_grow(0, &lom_u.map_u, 89, 144); + _cu_atoms_to_loom(rot_u, &lom_u); _cu_cells_to_loom(rot_u, &lom_u); diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index b4489b58e5..e07e839809 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -518,17 +518,13 @@ _cons_unsafe(ur_cells_t *cells, ur_mug mug, ur_nref hed, ur_nref tal) return cel; } -ur_nref -ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) +static ur_nref +_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len) { ur_atoms_t *atoms = &(r->atoms); ur_dict_t *dict = &(atoms->dict); ur_mug mug = ur_mug_bytes(byt, len); - // XX should check for <= 62 bits, coin direct - // XX conflicts with current u3u_uniq() use-case - // - while ( 1 ) { uint64_t idx = ( mug % dict->size ); ur_pail_t *bucket = &(dict->buckets[idx]); @@ -567,6 +563,31 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) } } +ur_nref +ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) +{ + // strip trailing zeroes + // + while ( len && !byt[len - 1] ) { + len--; + } + + // produce a direct atom if possible + // + if ( 62 >= _met0_bytes(byt, len) ) { + uint64_t i, direct = 0; + + for ( i = 0; i < len; i++ ) { + direct |= byt[i] << (8 * i); + } + + return (ur_nref)direct; + } + else { + return _coin_bytes_unsafe(r, byt, len); + } +} + ur_nref ur_coin64(ur_root_t *r, uint64_t n) { @@ -576,7 +597,7 @@ ur_coin64(ur_root_t *r, uint64_t n) else { // XX little-endian // - return ur_coin_bytes(r, (uint8_t*)&n, ur_met3_64(n)); + return _coin_bytes_unsafe(r, (uint8_t*)&n, ur_met3_64(n)); } } From 039ef019b9de7dd425b53abccc241da6999441a1 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 10 Aug 2020 12:56:54 -0700 Subject: [PATCH 019/123] ur: adds ur_walk_fore - higher-ordered pre-order noun traversal --- pkg/urbit/include/ur/hashcons.h | 7 +++++ pkg/urbit/ur/hashcons.c | 51 +++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index a6b83f388c..374c520754 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -163,3 +163,10 @@ ur_nvec_free(ur_nvec_t *v); void ur_nvec_init(ur_nvec_t *v, uint64_t size); + +void +ur_walk_fore(ur_root_t *r, + ur_nref ref, + void *v, + void (*atom)(ur_root_t*, ur_nref, void*), + ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)); diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index e07e839809..ced7cc37d1 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -821,3 +821,54 @@ ur_nvec_init(ur_nvec_t *v, uint64_t size) v->fill = 0; v->refs = calloc(size, sizeof(ur_nref)); } + +void +ur_walk_fore(ur_root_t *r, + ur_nref ref, + void *v, + void (*atom)(ur_root_t*, ur_nref, void*), + ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)) +{ + uint64_t prev = 89, size = 144, fill = 0; + ur_nref *top, *don; + + don = malloc(size * sizeof(*don)); + top = don + ++fill; + *top = ref; + + while ( top != don ) { + // visit atom, pop stack + // + if ( !ur_deep(ref) ) { + atom(r, ref, v); + top--; fill--; + } + // visit cell, pop stack if false + // + else if ( !cell(r, ref, v) ) { + top--; fill--; + } + // push the tail, continue into the head + // + else { + *top = ur_tail(r, ref); + + // reallocate "stack" if full + // + if ( size == fill ) { + uint64_t next = prev + size; + don = realloc(don, next * sizeof(*don)); + top = don + fill; + prev = size; + size = next; + } + + top++; fill++; + *top = ur_head(r, ref); + } + + ref = *top; + } + + free(don); +} From eedb1389750ba0a07f6715ebe856894dc1b49ff7 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 14 Aug 2020 10:56:31 -0700 Subject: [PATCH 020/123] ur: refactors indirect-atom (re-)allocation --- pkg/urbit/ur/hashcons.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index ced7cc37d1..fd544957f4 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -482,18 +482,14 @@ _coin_unsafe(ur_atoms_t *atoms, ur_mug mug, uint8_t *byt, uint64_t len) uint64_t fill = atoms->fill; ur_tag tag = ur_iatom; ur_nref tom = ( fill | ((uint64_t)tag << 62) ); - uint8_t *copy = malloc(len); // XX necessary? // assert( 62 >= ur_met0_64(fill) ); - assert(copy); - memcpy(copy, byt, len); - - atoms->bytes[fill] = copy; - atoms->lens[fill] = len; - atoms->mugs[fill] = mug; + atoms->bytes[fill] = byt; + atoms->lens[fill] = len; + atoms->mugs[fill] = mug; atoms->fill = 1 + fill; return tom; @@ -584,7 +580,11 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) return (ur_nref)direct; } else { - return _coin_bytes_unsafe(r, byt, len); + uint8_t *copy = malloc(len); + assert( copy ); + memcpy(copy, byt, len); + + return _coin_bytes_unsafe(r, copy, len); } } @@ -595,9 +595,20 @@ ur_coin64(ur_root_t *r, uint64_t n) return n; } else { - // XX little-endian - // - return _coin_bytes_unsafe(r, (uint8_t*)&n, ur_met3_64(n)); + uint8_t *byt = malloc(8); + assert( byt ); + assert( 8 == ur_met3_64(n) ); + + byt[0] = ur_mask_8(n); + byt[1] = ur_mask_8(n >> 8); + byt[2] = ur_mask_8(n >> 16); + byt[3] = ur_mask_8(n >> 24); + byt[4] = ur_mask_8(n >> 32); + byt[5] = ur_mask_8(n >> 40); + byt[6] = ur_mask_8(n >> 48); + byt[7] = ur_mask_8(n >> 56); + + return _coin_bytes_unsafe(r, byt, 8); } } From 377ee490f3cfc2d4593a7797de5f22e56c0ce172 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 14 Aug 2020 11:12:55 -0700 Subject: [PATCH 021/123] u3: corrects _cu_from_loom() stack struct comment --- pkg/urbit/noun/urth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 674b734c44..aa3557bd12 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -95,8 +95,8 @@ _cu_box_stash(u3a_noun* som_u, ur_nref ref) // In Hoon, this structure would be as follows: // // $% [%root ~] -// [%head cell=^] -// [%tail cell=^ hed-mug=@] +// [%head loom-cell=^] +// [%tail loom-cell=^ off-loom-head=*] // == // From ee8c8ed1bd32b38be946f25d70b215ba665ed5db Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 11 Aug 2020 19:02:23 -0700 Subject: [PATCH 022/123] ur: implements jam --- pkg/urbit/include/ur/hashcons.h | 3 + pkg/urbit/tests/ur_tests.c | 157 +++++++++++++++ pkg/urbit/ur/hashcons.c | 334 ++++++++++++++++++++++++++++++++ 3 files changed, 494 insertions(+) create mode 100644 pkg/urbit/tests/ur_tests.c diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 374c520754..b0ccd3de20 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -170,3 +170,6 @@ ur_walk_fore(ur_root_t *r, void *v, void (*atom)(ur_root_t*, ur_nref, void*), ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)); + +uint64_t +ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c new file mode 100644 index 0000000000..0653ced129 --- /dev/null +++ b/pkg/urbit/tests/ur_tests.c @@ -0,0 +1,157 @@ +#include "all.h" +#include "ur/hashcons.h" + +static c3_i +_test_jam(const c3_c* cap_c, + ur_root_t* rot_u, + ur_nref ref, + c3_w len_w, + const c3_y* res_y) +{ + c3_d i_d, len_d; + c3_y* out_y; + c3_i ret_i; + + ur_jam(rot_u, ref, &len_d, &out_y); + + if ( 0 != memcmp(out_y, res_y, len_w) ) { + fprintf(stderr, "\033[31m%s fail\033[0m\r\n", cap_c); + + fprintf(stderr, " actual: { "); + for ( i_d = 0; i_d < len_d; i_d++ ) { + fprintf(stderr, "0x%x, ", out_y[i_d]); + } + fprintf(stderr, "}\r\n"); + fprintf(stderr, " expect: { "); + for ( i_d = 0; i_d < len_w; i_d++ ) { + fprintf(stderr, "0x%x, ", res_y[i_d]); + } + fprintf(stderr, "}\r\n"); + + ret_i = 0; + } + else { + ret_i = 1; + } + + c3_free(out_y); + + return ret_i; +} + +static c3_i +_test_ur(void) +{ + ur_root_t* rot_u = ur_hcon_init(); + c3_d i_d, len_d; + c3_y* byt_y; + c3_i res_i = 1; + +# define nc(a, b) ur_cons(rot_u, a, b) +# define nt(a, b, c) nc(a, nc(b, c)) + + { + c3_c* cap_c = "jam 0"; + c3_y res_y[1] = { 0x2 }; + ur_nref ref = 0; + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam 1"; + c3_y res_y[1] = { 0xc }; + ur_nref ref = 1; + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam 2"; + c3_y res_y[1] = { 0x48 }; + ur_nref ref = 2; + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam %fast"; + c3_y res_y[6] = { 0xc0, 0x37, 0xb, 0x9b, 0xa3, 0x3 }; + ur_nref ref = c3__fast; + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam %full"; + c3_y res_y[6] = { 0xc0, 0x37, 0xab, 0x63, 0x63, 0x3 }; + ur_nref ref = c3__full; + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [0 0]"; + c3_y res_y[1] = { 0x29 }; + ur_nref ref = nc(0, 0); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [1 1]"; + c3_y res_y[2] = { 0x31, 0x3 }; + ur_nref ref = nc(1, 1); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [2 3]"; + c3_y res_y[2] = { 0x21, 0xd1 }; + ur_nref ref = nc(2, 3); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [%fast %full]"; + c3_y res_y[11] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0xe, 0x7c, 0xb3, 0x3a, 0x36, 0x36 }; + ur_nref ref = nc(c3__fast, c3__full); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [1 1 1]"; + c3_y res_y[2] = { 0x71, 0xcc }; + ur_nref ref = nc(1, nc(1, 1)); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [%fast %full %fast]"; + c3_y res_y[12] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0x1e, 0xf0, 0xcd, 0xea, 0xd8, 0xd8, 0x93 }; + ur_nref ref = nc(c3__fast, nc(c3__full, c3__fast)); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam [[0 0] [[0 0] 1 1] 1 1]"; + c3_y res_y[6] = { 0xa5, 0x35, 0x19, 0xf3, 0x18, 0x5 }; + ur_nref ref = nc(nc(0, 0), nc(nc(nc(0, 0), nc(1, 1)), nc(1, 1))); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + { + c3_c* cap_c = "jam big"; + c3_y res_y[14] = { 0x15, 0x17, 0xb2, 0xd0, 0x85, 0x59, 0xb8, 0x61, 0x87, 0x5f, 0x10, 0x54, 0x55, 0x5 }; + ur_nref ref = nc(nc(nc(1, nc(nc(2, nc(nc(3, nc(nc(4, nc(nt(5, 6, nc(7, nc(nc(8, 0), 0))), 0)), 0)), 0)), 0)), 0), 0); + res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + } + + return res_i; +} + +int +main(int argc, char* argv[]) +{ + if ( !_test_ur() ) { + fprintf(stderr, "ur test failed\r\n"); + return 1; + } + + fprintf(stderr, "ur ok\n"); + return 0; +} diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index fd544957f4..96966762fc 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -883,3 +883,337 @@ ur_walk_fore(ur_root_t *r, free(don); } + +typedef struct ur_bsw_s { + uint64_t prev; + uint64_t size; + uint64_t fill; + uint64_t bits; + uint8_t off; + uint8_t *bytes; +} ur_bsw_t; + +static inline void +ur_bsw_grow(ur_bsw_t *bsw) +{ + uint64_t prev = bsw->prev; + uint64_t size = bsw->size; + uint64_t next = prev + size; + + // fprintf(stderr, "bsw: grow: %" PRIu64 "-%" PRIu64" fill: %" PRIu64 "\r\n", size, next, bsw->fill); + + bsw->bytes = realloc(bsw->bytes, next); + assert(bsw->bytes); + memset(bsw->bytes + size, 0, prev); + + bsw->prev = size; + bsw->size = next; +} + +static void +ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t old = bsw->bytes[fill]; + + bsw->bytes[fill] = old ^ ((bit & 1) << off); + + if ( 7 == off ) { + if ( ++fill == bsw->size ) { + ur_bsw_grow(bsw); + } + + bsw->fill = fill; + bsw->off = 0; + } + else { + bsw->off = 1 + off; + } + + bsw->bits++; + + // if ( (bsw->fill << 3) + bsw->off != bsw->bits ) { + // fprintf(stderr, "bit fill: %" PRIu64 " off: %u bits: %" PRIu64 ", calc: %" PRIu64 "\r\n", + // bsw->fill, + // bsw->off, + // bsw->bits, + // (bsw->fill << 3) + bsw->off); + // assert(0); + // } +} + +static void +ur_bsw_bits(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + + // XX + assert( 8 > len ); + + { + uint8_t rest = 8 - off; + uint8_t old = bsw->bytes[fill]; + + if ( len < rest ) { + uint8_t left = (byt & ((1 << len) - 1)) << off; + + bsw->bytes[fill] = old ^ left; + bsw->off = off + len; + } + else { + uint8_t left, right; + + left = (byt & ((1 << rest) - 1)) << off; + off = len - rest; + right = (byt >> rest) & ((1 << off) - 1); + + if ( (fill + 1 + !!off) >= bsw->size ) { + ur_bsw_grow(bsw); + } + + bsw->bytes[fill] = old ^ left; + fill++; + bsw->bytes[fill] = right; + + bsw->fill = fill; + bsw->off = off; + } + } + + bsw->bits += len; + + // if ( (bsw->fill << 3) + bsw->off != bsw->bits ) { + // fprintf(stderr, "bits fill: %" PRIu64 " off: %u bits: %" PRIu64 ", calc: %" PRIu64 "\r\n", + // bsw->fill, + // bsw->off, + // bsw->bits, + // (bsw->fill << 3) + bsw->off); + // assert(0); + // } +} + +static void +ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t fill = bsw->fill; + uint64_t full = fill + len; + uint8_t off = bsw->off; + + // XX + assert(len); + + if ( (full + !!off) >= bsw->size ) { + uint64_t prev = bsw->prev; + + // be sure to grow sufficiently + // + if ( len > prev ) { + bsw->prev = len; + } + + ur_bsw_grow(bsw); + } + + if ( !off ) { + memcpy(bsw->bytes + fill, byt, len); + } + else { + uint8_t rest = 8 - off; + uint8_t left, right, old = bsw->bytes[fill]; + uint64_t i; + + for ( i = 0; i < len; i++ ) { + left = (byt[i] & ((1 << rest) - 1)) << off; + right = (byt[i] >> rest) & ((1 << off) - 1); + + bsw->bytes[fill++] = old ^ left; + old = right; + } + + bsw->bytes[fill] = old; + + assert( full == fill ); + } + + bsw->fill = full; + bsw->bits += len << 3; + + // if ( (bsw->fill << 3) + bsw->off != bsw->bits ) { + // fprintf(stderr, "bytes fill: %" PRIu64 " off: %u bits: %" PRIu64 ", calc: %" PRIu64 "\r\n", + // bsw->fill, + // bsw->off, + // bsw->bits, + // (bsw->fill << 3) + bsw->off); + // assert(0); + // } +} + +static void +ur_bsw64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val) +{ + // assumes little-endian + // + uint8_t *byt = (uint8_t*)&val; + uint8_t len_byt = len_bit >> 3; + uint8_t low = ur_mask_3(len_bit); + + if ( len_byt ) { + ur_bsw_bytes(bsw, len_byt, byt); + } + + if ( low ) { + ur_bsw_bits(bsw, low, byt[len_byt]); + } +} + +static inline void +ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val) +{ + if ( 0 == val ) { + ur_bsw_bit(bsw, 1); + } + else { + uint8_t len_len = ur_met0_64(len_bit); + + ur_bsw64(bsw, len_len + 1, 1ULL << len_len); + ur_bsw64(bsw, len_len - 1, len_bit); + ur_bsw64(bsw, len_bit, val); + } +} + + +static inline void +ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len_bit, uint64_t len, uint8_t *byt) +{ + // write run-length + // + { + uint8_t len_len = ur_met0_64(len_bit); + + // XX + assert( 64 > len_len ); + + ur_bsw64(bsw, len_len + 1, 1ULL << len_len); + ur_bsw64(bsw, len_len - 1, len_bit); + } + + // write bytes + // + { + uint8_t low = ur_mask_3(len_bit); + + if ( !low ) { + ur_bsw_bytes(bsw, len, byt); + } + else { + uint64_t last = len - 1; + ur_bsw_bytes(bsw, last, byt); + ur_bsw_bits(bsw, low, byt[last]); + } + } +} + +static inline void +_jam_mat(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len_bit) +{ + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: { + ur_bsw_mat64(bsw, len_bit, ref); + } break; + + case ur_iatom: { + uint64_t len; + uint8_t *byt; + ur_bytes(r, ref, &byt, &len); + ur_bsw_mat_bytes(bsw, len_bit, len, byt); + } break; + } +} + +typedef struct _jam_s { + ur_dict64_t dict; + ur_bsw_t bsw; +} _jam_t; + +static void +_jam_atom(ur_root_t *r, ur_nref ref, void *ptr) +{ + _jam_t *j = ptr; + ur_dict64_t *dict = &(j->dict); + ur_bsw_t *bsw = &j->bsw; + uint64_t bak, len_bit; + + len_bit = ur_met(r, 0, ref); + + if ( !ur_dict64_get(r, dict, ref, &bak) ) { + ur_dict64_put(r, dict, ref, bsw->bits); + + ur_bsw_bit(bsw, 0); + _jam_mat(r, ref, bsw, len_bit); + } + else { + uint64_t bak_bit = ur_met0_64(bak); + + if ( len_bit <= bak_bit ) { + ur_bsw_bit(bsw, 0); + _jam_mat(r, ref, bsw, len_bit); + } + else { + ur_bsw_bit(bsw, 1); + ur_bsw_bit(bsw, 1); + ur_bsw_mat64(bsw, bak_bit, bak); + } + } +} + +static ur_bool_t +_jam_cell(ur_root_t *r, ur_nref ref, void *ptr) +{ + _jam_t *j = ptr; + ur_dict64_t *dict = &(j->dict); + ur_bsw_t *bsw = &j->bsw; + uint64_t bak; + + if ( !ur_dict64_get(r, dict, ref, &bak) ) { + ur_dict64_put(r, dict, ref, bsw->bits); + + ur_bsw_bit(bsw, 1); + ur_bsw_bit(bsw, 0); + + return 1; // true + } + else { + ur_bsw_bit(bsw, 1); + ur_bsw_bit(bsw, 1); + ur_bsw_mat64(bsw, ur_met0_64(bak), bak); + + return 0; // false + } +} + +uint64_t +ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) +{ + _jam_t j = {0}; + { + uint64_t fib11 = 89, fib12 = 144; + + j.bsw.prev = fib11; + j.bsw.size = fib12; + j.bsw.bytes = calloc(j.bsw.size, 1); + + ur_dict64_grow(r, &j.dict, fib11, fib12); + } + + ur_walk_fore(r, ref, &j, _jam_atom, _jam_cell); + ur_dict_free((ur_dict_t*)&j.dict); + + *len = j.bsw.fill + !!j.bsw.off; + *byt = j.bsw.bytes; + + return j.bsw.bits; +} From 509c0744453724a36a96ade022a7848de63d9bbb Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 14 Aug 2020 11:14:32 -0700 Subject: [PATCH 023/123] ur: implements cue --- pkg/urbit/include/ur/hashcons.h | 14 ++ pkg/urbit/tests/ur_tests.c | 62 +++-- pkg/urbit/ur/hashcons.c | 392 ++++++++++++++++++++++++++++++++ 3 files changed, 454 insertions(+), 14 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index b0ccd3de20..17ca35a330 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -173,3 +173,17 @@ ur_walk_fore(ur_root_t *r, uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); + +typedef enum { + ur_cue_good = 0, + ur_cue_gone = 1 +} ur_cue_res_e; + +typedef enum { + ur_jam_atom = 0, + ur_jam_cell = 1, + ur_jam_back = 2 +} ur_cue_tag_e; + +ur_cue_res_e +ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 0653ced129..c563bf303f 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -15,7 +15,7 @@ _test_jam(const c3_c* cap_c, ur_jam(rot_u, ref, &len_d, &out_y); if ( 0 != memcmp(out_y, res_y, len_w) ) { - fprintf(stderr, "\033[31m%s fail\033[0m\r\n", cap_c); + fprintf(stderr, "\033[31mjam %s fail\033[0m\r\n", cap_c); fprintf(stderr, " actual: { "); for ( i_d = 0; i_d < len_d; i_d++ ) { @@ -39,6 +39,27 @@ _test_jam(const c3_c* cap_c, return ret_i; } +static c3_i +_test_cue(const c3_c* cap_c, + ur_root_t* rot_u, + ur_nref ref, + c3_w len_w, + const c3_y* res_y) +{ + ur_nref out; + + if ( ur_cue_good != ur_cue(rot_u, len_w, res_y, &out) ) { + fprintf(stderr, "\033[31mcue %s fail 1\033[0m\r\n", cap_c); + return 0; + } + else if ( ref != out ) { + fprintf(stderr, "\033[31mcue %s fail 2 ref=%" PRIu64 " out=%" PRIu64 " \033[0m\r\n", cap_c, ref, out); + return 0; + } + + return 1; +} + static c3_i _test_ur(void) { @@ -51,94 +72,107 @@ _test_ur(void) # define nt(a, b, c) nc(a, nc(b, c)) { - c3_c* cap_c = "jam 0"; + c3_c* cap_c = "0"; c3_y res_y[1] = { 0x2 }; ur_nref ref = 0; res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam 1"; + c3_c* cap_c = "1"; c3_y res_y[1] = { 0xc }; ur_nref ref = 1; res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam 2"; + c3_c* cap_c = "2"; c3_y res_y[1] = { 0x48 }; ur_nref ref = 2; res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam %fast"; + c3_c* cap_c = "%fast"; c3_y res_y[6] = { 0xc0, 0x37, 0xb, 0x9b, 0xa3, 0x3 }; ur_nref ref = c3__fast; res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam %full"; + c3_c* cap_c = "%full"; c3_y res_y[6] = { 0xc0, 0x37, 0xab, 0x63, 0x63, 0x3 }; ur_nref ref = c3__full; res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam [0 0]"; + c3_c* cap_c = "[0 0]"; c3_y res_y[1] = { 0x29 }; ur_nref ref = nc(0, 0); res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam [1 1]"; + c3_c* cap_c = "[1 1]"; c3_y res_y[2] = { 0x31, 0x3 }; ur_nref ref = nc(1, 1); res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam [2 3]"; + c3_c* cap_c = "[2 3]"; c3_y res_y[2] = { 0x21, 0xd1 }; ur_nref ref = nc(2, 3); res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam [%fast %full]"; + c3_c* cap_c = "[%fast %full]"; c3_y res_y[11] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0xe, 0x7c, 0xb3, 0x3a, 0x36, 0x36 }; ur_nref ref = nc(c3__fast, c3__full); res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam [1 1 1]"; + c3_c* cap_c = "[1 1 1]"; c3_y res_y[2] = { 0x71, 0xcc }; ur_nref ref = nc(1, nc(1, 1)); res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam [%fast %full %fast]"; + c3_c* cap_c = "[%fast %full %fast]"; c3_y res_y[12] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0x1e, 0xf0, 0xcd, 0xea, 0xd8, 0xd8, 0x93 }; ur_nref ref = nc(c3__fast, nc(c3__full, c3__fast)); res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam [[0 0] [[0 0] 1 1] 1 1]"; + c3_c* cap_c = "[[0 0] [[0 0] 1 1] 1 1]"; c3_y res_y[6] = { 0xa5, 0x35, 0x19, 0xf3, 0x18, 0x5 }; ur_nref ref = nc(nc(0, 0), nc(nc(nc(0, 0), nc(1, 1)), nc(1, 1))); res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } { - c3_c* cap_c = "jam big"; + c3_c* cap_c = "big"; c3_y res_y[14] = { 0x15, 0x17, 0xb2, 0xd0, 0x85, 0x59, 0xb8, 0x61, 0x87, 0x5f, 0x10, 0x54, 0x55, 0x5 }; ur_nref ref = nc(nc(nc(1, nc(nc(2, nc(nc(3, nc(nc(4, nc(nt(5, 6, nc(7, nc(nc(8, 0), 0))), 0)), 0)), 0)), 0)), 0), 0); res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); + res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); } return res_i; diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 96966762fc..2b694bc821 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1115,6 +1115,192 @@ ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len_bit, uint64_t len, uint8_t *byt) } } +typedef struct ur_bsr_s { + uint64_t left; + uint64_t bits; + uint8_t off; + const uint8_t *bytes; +} ur_bsr_t; + +static inline ur_cue_res_e +ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out) +{ + uint8_t left = bsr->left; + + if ( !left ) { + return ur_cue_gone; + } + else { + uint8_t byt = bsr->bytes[0]; + uint8_t off = bsr->off; + uint8_t bit = (byt >> off) & 1; + + if ( 7 == off ) { + left--; + + if ( left ) { + bsr->bytes++; + bsr->left = left; + } + else { + bsr->bytes = 0; + bsr->left = 0; + } + + bsr->off = 0; + } + else { + bsr->off = 1 + off; + } + + bsr->bits++; + + *out = bit; + + return ur_cue_good; + } +} + +static inline ur_cue_res_e +ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) +{ + ur_cue_res_e res; + uint8_t bit; + + if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { + return res; + } + else if ( 0 == bit ) { + *out = ur_jam_atom; + return ur_cue_good; + } + else if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { + return res; + } + + *out = ( 0 == bit ) ? ur_jam_cell : ur_jam_back; + return ur_cue_good; +} + +static inline ur_cue_res_e +ur_bsr_zeros(ur_bsr_t *bsr, uint8_t *out) +{ + ur_cue_res_e res; + uint8_t bit, len = 0; + + while ( (ur_cue_good == (res = ur_bsr_bit(bsr, &bit))) && (0 == bit) ) { + len++; + } + + if ( ur_cue_good != res ) { + return res; + } + else { + *out = len; + return ur_cue_good; + } +} + +static inline uint64_t +ur_bsr64(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t acc = 0; + uint64_t i; + uint8_t bit; + + for ( i = 0; i < len; i++ ) { + if ( ur_cue_good != ur_bsr_bit(bsr, &bit) ) { + bsr->bits += len - i; + bsr->bytes = 0; + return acc; + } + + acc ^= (uint64_t)bit << i; + } + + return acc; +} + +static inline void +ur_bsr_bytes(ur_bsr_t *bsr, uint64_t len, uint8_t *out) +{ + uint8_t left = bsr->left; + uint8_t off = bsr->off; + ur_bool_t end = len >= left; + + if ( !left ) { + return; + } + + if ( !off ) { + if ( end ) { + memcpy(out, bsr->bytes, left); + bsr->bytes = 0; + left = 0; + } + else { + memcpy(out, bsr->bytes, len); + bsr->bytes += len; + left -= len; + } + } + // the most-significant bits from a byte in the stream + // become the least-significant bits of an output byte, and vice-versa + // + else { + uint8_t rest = 8 - off; + const uint8_t *bytes = bsr->bytes; + uint8_t byt = bytes[0]; + uint8_t l, m; + uint64_t max = end ? (left - 1) : len; + uint64_t i; + + for ( i = 0; i < max; i++ ) { + m = byt >> off; + byt = bytes[1 + i]; + l = byt & ((1 << off) - 1); + out[i] = m ^ (l << rest); + } + + if ( end ) { + out[max] = bytes[max] >> off; + + bsr->bytes = 0; + left = 0; + } + else { + bsr->bytes += max; + left -= max; + } + } + + bsr->left = left; + bsr->bits += len << 3; +} + +static inline ur_cue_res_e +ur_bsr_mat(ur_bsr_t *bsr, uint64_t *out) +{ + uint8_t len; + + if ( ur_cue_gone == ur_bsr_zeros(bsr, &len) ) { + return ur_cue_gone; + } + + // XX + assert( 64 > len ); + + if ( !len ) { + *out = 0; + } + else { + len--; + *out = ur_bsr64(bsr, len) ^ (1ULL << len); + } + + return ur_cue_good; +} + static inline void _jam_mat(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len_bit) { @@ -1217,3 +1403,209 @@ ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) return j.bsw.bits; } + +typedef struct _cue_s { + ur_dict64_t dict; + ur_bsr_t bsr; +} _cue_t; + +static inline ur_cue_res_e +_cue_atom(ur_root_t *r, _cue_t *c, ur_nref *out) +{ + ur_bsr_t *bsr = &c->bsr; + ur_cue_res_e res; + uint64_t len; + + if ( ur_cue_good != (res = ur_bsr_mat(bsr, &len)) ) { + return res; + } + + if ( 62 >= len ) { + *out = (ur_nref)ur_bsr64(bsr, len); + } + else { + uint8_t *byt = calloc(len, 1); + ur_bsr_bytes(bsr, len, byt); + + // strip trailing zeroes + // + while ( len && !byt[len - 1] ) { + len--; + } + + *out = _coin_bytes_unsafe(r, byt, len); + } + + return ur_cue_good; +} + +static inline ur_cue_res_e +_cue_back(ur_bsr_t *bsr, uint64_t *out) +{ + ur_cue_res_e res; + uint64_t len; + + if ( ur_cue_good != (res = ur_bsr_mat(bsr, &len)) ) { + return res; + } + + // XX + assert( 62 >= len ); + + *out = ur_bsr64(bsr, len); + return ur_cue_good; +} + +#define STACK_ROOT 0 +#define STACK_HEAD 1 +#define STACK_TAIL 2 + +// stack frame for recording head vs tail iteration +// +// In Hoon, this structure would be as follows: +// +// $% [%root ~] +// [%head cursor=@] +// [%tail cursor=@ hed-ref=*] +// == +// +typedef struct _cue_frame_s { + uint8_t tag; + uint64_t bits; + ur_nref ref; +} _cue_frame_t; + +typedef struct _cue_stack_s { + uint32_t prev; + uint32_t size; + uint32_t fill; + _cue_frame_t* f; +} _cue_stack_t; + +static inline void +_cue_stack_push(_cue_stack_t *s, uint8_t tag, uint64_t bits, ur_nref ref) +{ + if ( s->fill == s->size ) { + uint32_t next = s->prev + s->size; + s->f = realloc(s->f, next * sizeof(*s->f)); + s->prev = s->size; + s->size = next; + } + + _cue_frame_t* f = &(s->f[s->fill++]); + f->tag = tag; + f->bits = bits; + f->ref = ref; +} + +ur_cue_res_e +ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out) +{ + ur_cue_res_e res; + ur_nref ref; + _cue_t c = {0}; + _cue_stack_t s = { .prev = 89, .size = 144, .fill = 0, .f = 0 }; + + // init bitstream-reader + // + c.bsr.left = len; + c.bsr.bytes = byt; + + // init dictionary + // + { + uint64_t fib11 = 89, fib12 = 144; + ur_dict64_grow(r, &c.dict, fib11, fib12); + } + + // setup stack + // + s.f = malloc(s.size * sizeof(*s.f)); + _cue_stack_push(&s, STACK_ROOT, 0, 0); + + // advance into buffer + // + advance: { + uint64_t bits = c.bsr.bits; + ur_cue_tag_e tag; + + if ( ur_cue_good != (res = ur_bsr_tag(&c.bsr, &tag)) ) { + goto perfect; + } + + switch ( tag ) { + default: assert(0); + + case ur_jam_atom: { + if ( ur_cue_good != (res = _cue_atom(r, &c, &ref)) ) { + goto perfect; + } + else { + ur_dict64_put(r, &c.dict, bits, (uint64_t)ref); + goto retreat; + } + } + + case ur_jam_back: { + uint64_t bak, val; + + if ( ur_cue_good != (res = _cue_back(&c.bsr, &bak)) ) { + goto perfect; + } + else if ( !ur_dict64_get(r, &c.dict, bak, &val) ) { + // XX distinguish bad backref? + // + res = ur_cue_gone; + goto perfect; + } + + ref = (ur_nref)val; + goto retreat; + } + + case ur_jam_cell: { + _cue_stack_push(&s, STACK_HEAD, bits, 0); + goto advance; + } + } + } + + // retreat down the stack + // + retreat: { + _cue_frame_t f = s.f[--s.fill]; + + switch ( f.tag ) { + default: assert(0); + + case STACK_ROOT: { + res = ur_cue_good; + goto perfect; + } + + case STACK_HEAD: { + _cue_stack_push(&s, STACK_TAIL, f.bits, ref); + goto advance; + } + + case STACK_TAIL: { + ref = ur_cons(r, f.ref, ref); + ur_dict64_put(r, &c.dict, f.bits, (uint64_t)ref); + goto retreat; + } + } + } + + // we done + // + perfect: { + ur_dict_free((ur_dict_t*)&c.dict); + free(s.f); + + if ( ur_cue_good == res ) { + *out = ref; + } + + return res; + } +} From e160620256f27cc391e56cd2fe3cc77090b4b936 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 17 Aug 2020 15:24:52 -0700 Subject: [PATCH 024/123] ur: refactors jam/cue tests --- pkg/urbit/tests/ur_tests.c | 201 ++++++++++++++++++------------------- 1 file changed, 97 insertions(+), 104 deletions(-) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index c563bf303f..98f83e5873 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1,181 +1,174 @@ -#include "all.h" +#include +#include +#include +#include +#include +#include + #include "ur/hashcons.h" -static c3_i -_test_jam(const c3_c* cap_c, - ur_root_t* rot_u, - ur_nref ref, - c3_w len_w, - const c3_y* res_y) +static int +_test_jam_spec(const char *cap, + ur_root_t *r, + ur_nref ref, + size_t len, + const uint8_t *res) { - c3_d i_d, len_d; - c3_y* out_y; - c3_i ret_i; + uint64_t i, out_len; + uint8_t *out; + int ret; - ur_jam(rot_u, ref, &len_d, &out_y); + ur_jam(r, ref, &out_len, &out); - if ( 0 != memcmp(out_y, res_y, len_w) ) { - fprintf(stderr, "\033[31mjam %s fail\033[0m\r\n", cap_c); + if ( 0 != memcmp(out, res, len) ) { + fprintf(stderr, "\033[31mjam %s fail\033[0m\r\n", cap); fprintf(stderr, " actual: { "); - for ( i_d = 0; i_d < len_d; i_d++ ) { - fprintf(stderr, "0x%x, ", out_y[i_d]); + for ( i = 0; i < out_len; i++ ) { + fprintf(stderr, "0x%x, ", out[i]); } fprintf(stderr, "}\r\n"); fprintf(stderr, " expect: { "); - for ( i_d = 0; i_d < len_w; i_d++ ) { - fprintf(stderr, "0x%x, ", res_y[i_d]); + for ( i = 0; i < len; i++ ) { + fprintf(stderr, "0x%x, ", res[i]); } fprintf(stderr, "}\r\n"); - ret_i = 0; + ret = 0; } else { - ret_i = 1; + ret = 1; } - c3_free(out_y); + free(out); - return ret_i; + return ret; } -static c3_i -_test_cue(const c3_c* cap_c, - ur_root_t* rot_u, - ur_nref ref, - c3_w len_w, - const c3_y* res_y) +static int +_test_cue_spec(const char *cap, + ur_root_t* r, + ur_nref ref, + size_t len, + const uint8_t *res) { ur_nref out; - if ( ur_cue_good != ur_cue(rot_u, len_w, res_y, &out) ) { - fprintf(stderr, "\033[31mcue %s fail 1\033[0m\r\n", cap_c); + if ( ur_cue_good != ur_cue(r, len, res, &out) ) { + fprintf(stderr, "\033[31mcue %s fail 1\033[0m\r\n", cap); return 0; } else if ( ref != out ) { - fprintf(stderr, "\033[31mcue %s fail 2 ref=%" PRIu64 " out=%" PRIu64 " \033[0m\r\n", cap_c, ref, out); + fprintf(stderr, "\033[31mcue %s fail 2 ref=%" PRIu64 " out=%" PRIu64 " \033[0m\r\n", cap, ref, out); return 0; } return 1; } -static c3_i -_test_ur(void) +static int +_test_jam_cue(void) { - ur_root_t* rot_u = ur_hcon_init(); - c3_d i_d, len_d; - c3_y* byt_y; - c3_i res_i = 1; + ur_root_t *r = ur_hcon_init(); + int ret = 1; -# define nc(a, b) ur_cons(rot_u, a, b) -# define nt(a, b, c) nc(a, nc(b, c)) +# define NC(a, b) ur_cons(r, a, b) +# define NT(a, b, c) NC(a, NC(b, c)) + +# define FAST 0x74736166 +# define FULL 0x6c6c7566 + +# define TEST_CASE(a, b) \ + const char* cap = a; \ + ur_nref ref = b; \ + ret &= _test_jam_spec(cap, r, ref, sizeof(res), res); \ + ret &= _test_cue_spec(cap, r, ref, sizeof(res), res); \ { - c3_c* cap_c = "0"; - c3_y res_y[1] = { 0x2 }; - ur_nref ref = 0; - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[1] = { 0x2 }; + TEST_CASE("0", 0); } { - c3_c* cap_c = "1"; - c3_y res_y[1] = { 0xc }; - ur_nref ref = 1; - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[1] = { 0xc }; + TEST_CASE("1", 1); } { - c3_c* cap_c = "2"; - c3_y res_y[1] = { 0x48 }; - ur_nref ref = 2; - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[1] = { 0x48 }; + TEST_CASE("2", 2); } { - c3_c* cap_c = "%fast"; - c3_y res_y[6] = { 0xc0, 0x37, 0xb, 0x9b, 0xa3, 0x3 }; - ur_nref ref = c3__fast; - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[6] = { 0xc0, 0x37, 0xb, 0x9b, 0xa3, 0x3 }; + TEST_CASE("%fast", FAST); } { - c3_c* cap_c = "%full"; - c3_y res_y[6] = { 0xc0, 0x37, 0xab, 0x63, 0x63, 0x3 }; - ur_nref ref = c3__full; - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[6] = { 0xc0, 0x37, 0xab, 0x63, 0x63, 0x3 }; + TEST_CASE("%full", FULL); } { - c3_c* cap_c = "[0 0]"; - c3_y res_y[1] = { 0x29 }; - ur_nref ref = nc(0, 0); - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[1] = { 0x29 }; + TEST_CASE("[0 0]", NC(0, 0)); } { - c3_c* cap_c = "[1 1]"; - c3_y res_y[2] = { 0x31, 0x3 }; - ur_nref ref = nc(1, 1); - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[2] = { 0x31, 0x3 }; + TEST_CASE("[1 1]", NC(1, 1)); } { - c3_c* cap_c = "[2 3]"; - c3_y res_y[2] = { 0x21, 0xd1 }; - ur_nref ref = nc(2, 3); - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[2] = { 0x21, 0xd1 }; + TEST_CASE("[2 3]", NC(2, 3)); } { - c3_c* cap_c = "[%fast %full]"; - c3_y res_y[11] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0xe, 0x7c, 0xb3, 0x3a, 0x36, 0x36 }; - ur_nref ref = nc(c3__fast, c3__full); - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[11] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0xe, 0x7c, 0xb3, 0x3a, 0x36, 0x36 }; + TEST_CASE("[%fast %full]", NC(FAST, FULL)); } { - c3_c* cap_c = "[1 1 1]"; - c3_y res_y[2] = { 0x71, 0xcc }; - ur_nref ref = nc(1, nc(1, 1)); - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[2] = { 0x71, 0xcc }; + TEST_CASE("[1 1 1]", NC(1, NC(1, 1))); } { - c3_c* cap_c = "[%fast %full %fast]"; - c3_y res_y[12] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0x1e, 0xf0, 0xcd, 0xea, 0xd8, 0xd8, 0x93 }; - ur_nref ref = nc(c3__fast, nc(c3__full, c3__fast)); - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[12] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0x1e, 0xf0, 0xcd, 0xea, 0xd8, 0xd8, 0x93 }; + TEST_CASE("[%fast %full %fast]", NC(FAST, NC(FULL, FAST))); } { - c3_c* cap_c = "[[0 0] [[0 0] 1 1] 1 1]"; - c3_y res_y[6] = { 0xa5, 0x35, 0x19, 0xf3, 0x18, 0x5 }; - ur_nref ref = nc(nc(0, 0), nc(nc(nc(0, 0), nc(1, 1)), nc(1, 1))); - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[6] = { 0xa5, 0x35, 0x19, 0xf3, 0x18, 0x5 }; + TEST_CASE("[[0 0] [[0 0] 1 1] 1 1]", NC(NC(0, 0), NC(NC(NC(0, 0), NC(1, 1)), NC(1, 1)))); } { - c3_c* cap_c = "big"; - c3_y res_y[14] = { 0x15, 0x17, 0xb2, 0xd0, 0x85, 0x59, 0xb8, 0x61, 0x87, 0x5f, 0x10, 0x54, 0x55, 0x5 }; - ur_nref ref = nc(nc(nc(1, nc(nc(2, nc(nc(3, nc(nc(4, nc(nt(5, 6, nc(7, nc(nc(8, 0), 0))), 0)), 0)), 0)), 0)), 0), 0); - res_i &= _test_jam(cap_c, rot_u, ref, sizeof(res_y), res_y); - res_i &= _test_cue(cap_c, rot_u, ref, sizeof(res_y), res_y); + uint8_t res[14] = { 0x15, 0x17, 0xb2, 0xd0, 0x85, 0x59, 0xb8, 0x61, 0x87, 0x5f, 0x10, 0x54, 0x55, 0x5 }; + TEST_CASE("deep", NC(NC(NC(1, NC(NC(2, NC(NC(3, NC(NC(4, NC(NT(5, 6, NC(7, NC(NC(8, 0), 0))), 0)), 0)), 0)), 0)), 0), 0)); } - return res_i; + { + uint8_t inp[33] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + uint8_t res[35] = { 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; + TEST_CASE("wide", ur_coin_bytes(r, inp, sizeof(inp))); + } + + return ret; +} + +static int +_test_ur(void) +{ + int ret = 1; + + if ( !_test_jam_cue() ) { + fprintf(stderr, "ur test jam/cue failed\r\n"); + ret = 0; + } + + return ret; } int From 9282247ba5716c1ae9584034b1e69fc39a694c86 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 17 Aug 2020 17:45:00 -0700 Subject: [PATCH 025/123] ur: adds tests for bitstream write functions (ur_bsw*) --- pkg/urbit/include/ur/hashcons.h | 32 +++ pkg/urbit/tests/ur_tests.c | 333 ++++++++++++++++++++++++++++++++ pkg/urbit/ur/hashcons.c | 315 +++++++++++++++++------------- 3 files changed, 545 insertions(+), 135 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 17ca35a330..1f492168d2 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -171,6 +171,38 @@ ur_walk_fore(ur_root_t *r, void (*atom)(ur_root_t*, ur_nref, void*), ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)); + +typedef struct ur_bsw_s { + uint64_t prev; + uint64_t size; + uint64_t fill; + uint64_t bits; + uint8_t off; + uint8_t *bytes; +} ur_bsw_t; + +void +ur_bsw_grow(ur_bsw_t *bsw); + +ur_bool_t +ur_bsw_sane(ur_bsw_t *bsw); + +void +ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit); + +void +ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt); +void +ur_bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt); + +void +ur_bsw64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val); +void +ur_bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +void +ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); + uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 98f83e5873..c32f616d6f 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -7,6 +7,334 @@ #include "ur/hashcons.h" +static void +_bsw_init(ur_bsw_t *bsw, uint64_t prev, uint64_t size) +{ + bsw->prev = prev; + bsw->size = size; + bsw->bits = 0; + bsw->fill = 0; + bsw->off = 0; + + free(bsw->bytes); + bsw->bytes = calloc(size, 1); +} + +static int +_bsw_bit_check(const char* cap, ur_bsw_t *bsw, uint8_t byt, uint8_t off) +{ + int ret = 1; + + if ( !ur_bsw_sane(bsw) ) { + fprintf(stderr, "%s: insane off=%u fill=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, bsw->off, bsw->fill, bsw->bits); + ret = 0; + } + + if ( byt != bsw->bytes[0] ) { + fprintf(stderr, "%s: bytes fail (%u, %u)\r\n", cap, byt, bsw->bytes[0]); + ret = 0; + } + + if ( off != bsw->off ) { + fprintf(stderr, "%s: offset fail (%u, %u)\r\n", cap, off, bsw->off); + ret = 0; + } + + return ret; +} + +static int +_test_bsw_bit_ones(void) +{ + int ret = 1; + ur_bsw_t bsw = {0}; + _bsw_init(&bsw, 1, 1); + + ret &= _bsw_bit_check("bsw ones init", &bsw, 0x0, 0); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones a", &bsw, 0x1, 1); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones b", &bsw, 0x3, 2); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones c", &bsw, 0x7, 3); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones d", &bsw, 0xf, 4); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones e", &bsw, 0x1f, 5); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones f", &bsw, 0x3f, 6); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones g", &bsw, 0x7f, 7); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw ones h", &bsw, 0xff, 0); + + if ( bsw.size != 2 ) { + fprintf(stderr, "bsw ones grow: fail\r\n"); + ret = 0; + } + + free(bsw.bytes); + + return ret; +} + +static int +_test_bsw_bit_zeros(void) +{ + int ret = 1; + ur_bsw_t bsw = {0}; + _bsw_init(&bsw, 1, 1); + + ret &= _bsw_bit_check("bsw zeros init", &bsw, 0x0, 0); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros a", &bsw, 0x0, 1); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros b", &bsw, 0x0, 2); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros c", &bsw, 0x0, 3); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros d", &bsw, 0x0, 4); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros e", &bsw, 0x0, 5); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros f", &bsw, 0x0, 6); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros g", &bsw, 0x0, 7); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw zeros h", &bsw, 0x0, 0); + + if ( bsw.size != 2 ) { + fprintf(stderr, "bsw zeros grow: fail\r\n"); + ret = 0; + } + + free(bsw.bytes); + + return ret; +} + +static int +_test_bsw_bit_alt(void) +{ + int ret = 1; + ur_bsw_t bsw = {0}; + _bsw_init(&bsw, 1, 1); + + ret &= _bsw_bit_check("bsw alt init", &bsw, 0x0, 0); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw alt a", &bsw, 0x0, 1); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw alt b", &bsw, 0x2, 2); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw alt c", &bsw, 0x2, 3); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw alt d", &bsw, 0xa, 4); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw alt e", &bsw, 0xa, 5); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw alt f", &bsw, 0x2a, 6); + + ur_bsw_bit(&bsw, 0); + ret &= _bsw_bit_check("bsw alt g", &bsw, 0x2a, 7); + + ur_bsw_bit(&bsw, 1); + ret &= _bsw_bit_check("bsw alt h", &bsw, 0xaa, 0); + + if ( bsw.size != 2 ) { + fprintf(stderr, "bsw alt grow: fail\r\n"); + ret = 0; + } + + free(bsw.bytes); + + return ret; +} + +static int +_test_bsw_bit(void) +{ + return _test_bsw_bit_ones() + & _test_bsw_bit_zeros() + & _test_bsw_bit_alt(); +} + +static int +_bsw_cmp_check(const char* cap, uint8_t val, uint8_t off, uint8_t len, ur_bsw_t *a, ur_bsw_t *b) +{ + int ret = 1; + + if ( !ur_bsw_sane(a) ) { + fprintf(stderr, "%s: val 0x%02x off %u, len %u: a insane off=%u fill=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, val, off, len, a->off, a->fill, a->bits); + ret = 0; + } + if ( !ur_bsw_sane(b) ) { + fprintf(stderr, "%s: val 0x%02x off %u len %u: b insane off=%u fill=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, val, off, len, b->off, b->fill, b->bits); + ret = 0; + } + + if ( a->bytes[0] != b->bytes[0] ) { + fprintf(stderr, "%s: val 0x%02x off %u len %u: bytes fail (0x%02x, 0x%02x)\r\n", + cap, val, off, len, a->bytes[0], b->bytes[0]); + ret = 0; + } + + if ( a->off != b->off ) { + fprintf(stderr, "%s: val 0x%02x off %u len %u: offset fail (%u, %u)\r\n", + cap, val, off, len, a->off, b->off); + ret = 0; + } + + if ( a->fill != b->fill ) { + fprintf(stderr, "%s: val 0x%02x off %u len %u: fill fail (%" PRIu64 ", %" PRIu64 ")\r\n", + cap, val, off, len, a->fill, b->fill); + ret = 0; + } + + return ret; +} + +static int +_test_bsw8_loop(const char* cap, uint8_t val) +{ + int ret = 1; + ur_bsw_t a = {0}; + ur_bsw_t b = {0}; + uint8_t i, j; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 8; j++ ) { + _bsw_init(&a, 1, 1); + _bsw_init(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; + + ur_bsw8_slow(&a, j, val); + ur_bsw8(&b, j, val); + + ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); + } + } + + return ret; +} + +static int +_test_bsw8(void) +{ + return _test_bsw8_loop("bsw bits ones", 0xff) + & _test_bsw8_loop("bsw bits zeros", 0x0) + & _test_bsw8_loop("bsw bits alt 1", 0xaa) + & _test_bsw8_loop("bsw bits alt 2", 0x55); +} + +static int +_test_bsw64_loop(const char* cap, uint64_t val) +{ + int ret = 1; + ur_bsw_t a = {0}; + ur_bsw_t b = {0}; + uint8_t i, j; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 64; j++ ) { + _bsw_init(&a, 1, 1); + _bsw_init(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; + + ur_bsw64_slow(&a, j, val); + ur_bsw64(&b, j, val); + + ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); + } + } + + return ret; +} + +static int +_test_bsw64(void) +{ + return _test_bsw64_loop("bsw 64 ones", 0xffffffffffffffffULL) + & _test_bsw64_loop("bsw 64 zeros", 0x0ULL) + & _test_bsw64_loop("bsw 64 alt 1", 0xaaaaaaaaaaaaaaaaULL) + & _test_bsw64_loop("bsw 64 alt 2", 0x5555555555555555ULL); +} + +static int +_test_bsw_bytes_loop(const char* cap, uint64_t len, uint8_t val) +{ + int ret = 1; + ur_bsw_t a = {0}; + ur_bsw_t b = {0}; + uint8_t i, j, *byt; + + for ( i = 0; i < 8; i++) { + _bsw_init(&a, 1, 1); + _bsw_init(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; + byt = malloc(len); + + for ( j = 0; j < len; j++ ) { + ur_bsw8_slow(&a, 8, val); + byt[j] = val; + } + + ur_bsw_bytes(&b, len, byt); + free(byt); + + ret &= _bsw_cmp_check(cap, val, 8, i, &a, &b); + } + + return ret; +} + +static int +_test_bsw_bytes(void) +{ + return _test_bsw_bytes_loop("bsw bytes nought", 0, 0x0) + & _test_bsw_bytes_loop("bsw bytes ones odd", 3, 0xff) + & _test_bsw_bytes_loop("bsw bytes ones even", 4, 0xff) + & _test_bsw_bytes_loop("bsw bytes zeros odd", 5, 0x0) + & _test_bsw_bytes_loop("bsw bytes zeros even", 6, 0x0) + & _test_bsw_bytes_loop("bsw bytes alt 1 odd", 7, 0xaa) + & _test_bsw_bytes_loop("bsw bytes alt 1 even", 8, 0xaa) + & _test_bsw_bytes_loop("bsw bytes alt 2 odd", 9, 0x55) + & _test_bsw_bytes_loop("bsw bytes alt 2 odd", 10, 0x55); +} + +static int +_test_bsw(void) +{ + return _test_bsw_bit() + & _test_bsw8() + & _test_bsw64() + & _test_bsw_bytes(); +} + static int _test_jam_spec(const char *cap, ur_root_t *r, @@ -163,6 +491,11 @@ _test_ur(void) { int ret = 1; + if ( !_test_bsw() ) { + fprintf(stderr, "ur test bsw failed\r\n"); + ret = 0; + } + if ( !_test_jam_cue() ) { fprintf(stderr, "ur test jam/cue failed\r\n"); ret = 0; diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 2b694bc821..62739903cc 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -884,24 +884,13 @@ ur_walk_fore(ur_root_t *r, free(don); } -typedef struct ur_bsw_s { - uint64_t prev; - uint64_t size; - uint64_t fill; - uint64_t bits; - uint8_t off; - uint8_t *bytes; -} ur_bsw_t; - -static inline void +void ur_bsw_grow(ur_bsw_t *bsw) { uint64_t prev = bsw->prev; uint64_t size = bsw->size; uint64_t next = prev + size; - // fprintf(stderr, "bsw: grow: %" PRIu64 "-%" PRIu64" fill: %" PRIu64 "\r\n", size, next, bsw->fill); - bsw->bytes = realloc(bsw->bytes, next); assert(bsw->bytes); memset(bsw->bytes + size, 0, prev); @@ -910,21 +899,23 @@ ur_bsw_grow(ur_bsw_t *bsw) bsw->size = next; } -static void -ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit) +ur_bool_t +ur_bsw_sane(ur_bsw_t *bsw) +{ + return ( (8 > bsw->off) + && ((bsw->fill << 3) + bsw->off == bsw->bits) ); +} + +static inline void +_bsw_bit_unsafe(ur_bsw_t *bsw, uint8_t bit) { uint64_t fill = bsw->fill; uint8_t off = bsw->off; - uint8_t old = bsw->bytes[fill]; - bsw->bytes[fill] = old ^ ((bit & 1) << off); + bsw->bytes[fill] ^= (bit & 1) << off; if ( 7 == off ) { - if ( ++fill == bsw->size ) { - ur_bsw_grow(bsw); - } - - bsw->fill = fill; + bsw->fill = 1 + fill; bsw->off = 0; } else { @@ -932,92 +923,81 @@ ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit) } bsw->bits++; - - // if ( (bsw->fill << 3) + bsw->off != bsw->bits ) { - // fprintf(stderr, "bit fill: %" PRIu64 " off: %u bits: %" PRIu64 ", calc: %" PRIu64 "\r\n", - // bsw->fill, - // bsw->off, - // bsw->bits, - // (bsw->fill << 3) + bsw->off); - // assert(0); - // } } -static void -ur_bsw_bits(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +void +ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit) { - uint64_t fill = bsw->fill; - uint8_t off = bsw->off; - - // XX - assert( 8 > len ); - + if ( (7 == bsw->off) + && ((1 + bsw->fill) == bsw->size) ) { - uint8_t rest = 8 - off; - uint8_t old = bsw->bytes[fill]; - - if ( len < rest ) { - uint8_t left = (byt & ((1 << len) - 1)) << off; - - bsw->bytes[fill] = old ^ left; - bsw->off = off + len; - } - else { - uint8_t left, right; - - left = (byt & ((1 << rest) - 1)) << off; - off = len - rest; - right = (byt >> rest) & ((1 << off) - 1); - - if ( (fill + 1 + !!off) >= bsw->size ) { - ur_bsw_grow(bsw); - } - - bsw->bytes[fill] = old ^ left; - fill++; - bsw->bytes[fill] = right; - - bsw->fill = fill; - bsw->off = off; - } - } - - bsw->bits += len; - - // if ( (bsw->fill << 3) + bsw->off != bsw->bits ) { - // fprintf(stderr, "bits fill: %" PRIu64 " off: %u bits: %" PRIu64 ", calc: %" PRIu64 "\r\n", - // bsw->fill, - // bsw->off, - // bsw->bits, - // (bsw->fill << 3) + bsw->off); - // assert(0); - // } -} - -static void -ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) -{ - uint64_t fill = bsw->fill; - uint64_t full = fill + len; - uint8_t off = bsw->off; - - // XX - assert(len); - - if ( (full + !!off) >= bsw->size ) { - uint64_t prev = bsw->prev; - - // be sure to grow sufficiently - // - if ( len > prev ) { - bsw->prev = len; - } - ur_bsw_grow(bsw); } + _bsw_bit_unsafe(bsw, bit); +} + +static inline void +_bsw8_unsafe(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t rest = 8 - off; + uint8_t old = bsw->bytes[fill]; + + if ( len < rest ) { + uint8_t left = (byt & ((1 << len) - 1)) << off; + + bsw->bytes[fill] = old ^ left; + bsw->off = off + len; + } + else { + uint8_t left, right; + + left = (byt & ((1 << rest) - 1)) << off; + off = len - rest; + right = (byt >> rest) & ((1 << off) - 1); + + bsw->bytes[fill] = old ^ left; + fill++; + bsw->bytes[fill] = right; + + bsw->fill = fill; + bsw->off = off; + } + + bsw->bits += len; +} + +void +ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + if ( bsw->fill + !!((bsw->off + len) >> 3) >= bsw->size ) { + ur_bsw_grow(bsw); + } + + _bsw8_unsafe(bsw, (len > 8) ? 8 : len, byt); +} + +void +ur_bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + while ( len ) { + ur_bsw_bit(bsw, byt); + byt >>= 1; + len--; + } +} + +static inline void +_bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + if ( !off ) { memcpy(bsw->bytes + fill, byt, len); + fill += len; } else { uint8_t rest = 8 - off; @@ -1033,25 +1013,32 @@ ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) } bsw->bytes[fill] = old; - - assert( full == fill ); } - bsw->fill = full; + bsw->fill = fill; bsw->bits += len << 3; - - // if ( (bsw->fill << 3) + bsw->off != bsw->bits ) { - // fprintf(stderr, "bytes fill: %" PRIu64 " off: %u bits: %" PRIu64 ", calc: %" PRIu64 "\r\n", - // bsw->fill, - // bsw->off, - // bsw->bits, - // (bsw->fill << 3) + bsw->off); - // assert(0); - // } } -static void -ur_bsw64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val) +void +ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + if ( (bsw->fill + len + !!bsw->off) >= bsw->size ) { + uint64_t prev = bsw->prev; + + // be sure to grow sufficiently + // + if ( len > prev ) { + bsw->prev = len; + } + + ur_bsw_grow(bsw); + } + + _bsw_bytes_unsafe(bsw, len, byt); +} + +static inline void +_bsw64_unsafe(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val) { // assumes little-endian // @@ -1060,44 +1047,74 @@ ur_bsw64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val) uint8_t low = ur_mask_3(len_bit); if ( len_byt ) { - ur_bsw_bytes(bsw, len_byt, byt); + _bsw_bytes_unsafe(bsw, len_byt, byt); } if ( low ) { - ur_bsw_bits(bsw, low, byt[len_byt]); + _bsw8_unsafe(bsw, low, byt[len_byt]); + } +} + +void +ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint8_t bits = bsw->off + len; + + if ( bsw->fill + (bits >> 3) + !!ur_mask_3(bits) >= bsw->size ) { + ur_bsw_grow(bsw); + } + + _bsw64_unsafe(bsw, (len > 64) ? 64 : len, val); +} + +void +ur_bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + while ( len ) { + ur_bsw_bit(bsw, val & 0xff); + val >>= 1; + len--; } } static inline void -ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val) +_bsw_mat64_unsafe(ur_bsw_t *bsw, uint8_t len_len, uint8_t len, uint64_t val) { if ( 0 == val ) { - ur_bsw_bit(bsw, 1); + _bsw_bit_unsafe(bsw, 1); } else { - uint8_t len_len = ur_met0_64(len_bit); - - ur_bsw64(bsw, len_len + 1, 1ULL << len_len); - ur_bsw64(bsw, len_len - 1, len_bit); - ur_bsw64(bsw, len_bit, val); + _bsw64_unsafe(bsw, len_len + 1, 1ULL << len_len); + _bsw64_unsafe(bsw, len_len - 1, len); + _bsw64_unsafe(bsw, len, val); } } +void +ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + len = ( len > 64 ) ? 64 : len; + + { + uint8_t len_len = ur_met0_64(len); + uint8_t next = ( 0 == val ) ? 1 : len + (2 * len_len); + uint8_t bits = bsw->off + next; + + if ( bsw->fill + (bits >> 3) + !!ur_mask_3(bits) >= bsw->size ) { + ur_bsw_grow(bsw); + } + + _bsw_mat64_unsafe(bsw, len_len, len, val); + } +} static inline void -ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len_bit, uint64_t len, uint8_t *byt) +_bsw_mat_bytes_unsafe(ur_bsw_t *bsw, uint8_t len_len, uint64_t len_bit, uint64_t len, uint8_t *byt) { // write run-length // - { - uint8_t len_len = ur_met0_64(len_bit); - - // XX - assert( 64 > len_len ); - - ur_bsw64(bsw, len_len + 1, 1ULL << len_len); - ur_bsw64(bsw, len_len - 1, len_bit); - } + _bsw64_unsafe(bsw, len_len + 1, 1ULL << len_len); + _bsw64_unsafe(bsw, len_len - 1, len_bit); // write bytes // @@ -1105,16 +1122,44 @@ ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len_bit, uint64_t len, uint8_t *byt) uint8_t low = ur_mask_3(len_bit); if ( !low ) { - ur_bsw_bytes(bsw, len, byt); + _bsw_bytes_unsafe(bsw, len, byt); } else { - uint64_t last = len - 1; - ur_bsw_bytes(bsw, last, byt); - ur_bsw_bits(bsw, low, byt[last]); + len--; + _bsw_bytes_unsafe(bsw, len, byt); + _bsw8_unsafe(bsw, low, byt[len]); } } } +void +ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len_bit, uint64_t len, uint8_t *byt) +{ + uint8_t len_len = ur_met0_64(len_bit); + + // XX + assert( 64 > len_len ); + + { + uint8_t bits = bsw->off + (2 * len_len); + uint64_t need = len + (bits >> 3) + !!ur_mask_3(bits); + + if ( (bsw->fill + need) >= bsw->size ) { + uint64_t prev = bsw->prev; + + // be sure to grow sufficiently + // + if ( need > prev ) { + bsw->prev = need; + } + + ur_bsw_grow(bsw); + } + + _bsw_mat_bytes_unsafe(bsw, len_len, len_bit, len, byt); + } +} + typedef struct ur_bsr_s { uint64_t left; uint64_t bits; From a9d2a29bf2625e323b5615a0844e0eca62ad10fb Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 17 Aug 2020 18:35:00 -0700 Subject: [PATCH 026/123] ur: rewrites _bsw64_unsafe() for efficiency --- pkg/urbit/ur/hashcons.c | 172 ++++++++++++++++++++++++++++++---------- 1 file changed, 132 insertions(+), 40 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 62739903cc..1893a6ee60 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -989,6 +989,138 @@ ur_bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt) } } +static inline void +_bsw64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t *bytes = bsw->bytes; + + bsw->bits += len; + + if ( off ) { + uint8_t rest = 8 - off; + + if ( len < rest ) { + bytes[fill] ^= (val & ((1 << len) - 1)) << off; + bsw->off = off + len; + return; + } + + bytes[fill++] ^= (val & ((1 << rest) - 1)) << off; + val >>= rest; + len -= rest; + } + + switch ( len >> 3 ) { + case 8: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + bytes[fill++] = ur_mask_8(val >> 40); + bytes[fill++] = ur_mask_8(val >> 48); + bytes[fill++] = ur_mask_8(val >> 56); + + // no offset is possible here + // + bsw->fill = fill; + bsw->off = 0; + return; + } + + case 7: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + bytes[fill++] = ur_mask_8(val >> 40); + bytes[fill++] = ur_mask_8(val >> 48); + val >>= 56; + } break; + + case 6: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + bytes[fill++] = ur_mask_8(val >> 40); + val >>= 48; + } break; + + case 5: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + val >>= 40; + } break; + + case 4: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + val >>= 32; + } break; + + case 3: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + val >>= 24; + } break; + + case 2: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + val >>= 16; + } break; + + case 1: { + bytes[fill++] = ur_mask_8(val); + val >>= 8; + } break; + } + + off = ur_mask_3(len); + + if ( off ) { + bytes[fill] = (uint8_t)(val & ((1 << off) - 1)); + } + + bsw->fill = fill; + bsw->off = off; +} + +void +ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint8_t bits = bsw->off + len; + + if ( bsw->fill + (bits >> 3) + !!ur_mask_3(bits) >= bsw->size ) { + ur_bsw_grow(bsw); + } + + _bsw64_unsafe(bsw, (len > 64) ? 64 : len, val); +} + +void +ur_bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + len = (len > 64) ? 64 : len; + + while ( len ) { + ur_bsw_bit(bsw, val & 0xff); + val >>= 1; + len--; + } +} + static inline void _bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { @@ -1037,46 +1169,6 @@ ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) _bsw_bytes_unsafe(bsw, len, byt); } -static inline void -_bsw64_unsafe(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val) -{ - // assumes little-endian - // - uint8_t *byt = (uint8_t*)&val; - uint8_t len_byt = len_bit >> 3; - uint8_t low = ur_mask_3(len_bit); - - if ( len_byt ) { - _bsw_bytes_unsafe(bsw, len_byt, byt); - } - - if ( low ) { - _bsw8_unsafe(bsw, low, byt[len_byt]); - } -} - -void -ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - uint8_t bits = bsw->off + len; - - if ( bsw->fill + (bits >> 3) + !!ur_mask_3(bits) >= bsw->size ) { - ur_bsw_grow(bsw); - } - - _bsw64_unsafe(bsw, (len > 64) ? 64 : len, val); -} - -void -ur_bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - while ( len ) { - ur_bsw_bit(bsw, val & 0xff); - val >>= 1; - len--; - } -} - static inline void _bsw_mat64_unsafe(ur_bsw_t *bsw, uint8_t len_len, uint8_t len, uint64_t val) { From 5b0c1320fbf850ca3d4f2750a3bf0584c6ca897d Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 17 Aug 2020 20:29:16 -0700 Subject: [PATCH 027/123] ur: refactors ur_bsw8 and ur_bsw_bytes internals --- pkg/urbit/ur/hashcons.c | 42 ++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 1893a6ee60..3786dde19c 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -943,24 +943,26 @@ _bsw8_unsafe(ur_bsw_t *bsw, uint8_t len, uint8_t byt) uint64_t fill = bsw->fill; uint8_t off = bsw->off; uint8_t rest = 8 - off; - uint8_t old = bsw->bytes[fill]; + uint8_t l, m; + // the least-significant bits of the input become the + // most-significant bits of a byte in the output stream + // if ( len < rest ) { - uint8_t left = (byt & ((1 << len) - 1)) << off; + l = byt & ((1 << len) - 1); - bsw->bytes[fill] = old ^ left; + bsw->bytes[fill] ^= l << off; bsw->off = off + len; } + // and vice-versa + // else { - uint8_t left, right; + l = byt & ((1 << rest) - 1); + m = byt >> rest; - left = (byt & ((1 << rest) - 1)) << off; - off = len - rest; - right = (byt >> rest) & ((1 << off) - 1); - - bsw->bytes[fill] = old ^ left; - fill++; - bsw->bytes[fill] = right; + bsw->bytes[fill++] ^= l << off; + off = len - rest; + bsw->bytes[fill] = m & ((1 << off) - 1); bsw->fill = fill; bsw->off = off; @@ -1131,20 +1133,22 @@ _bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) memcpy(bsw->bytes + fill, byt, len); fill += len; } + // the least-significant bits of the input become the + // most-significant bits of a byte in the output stream, and vice-versa + // else { uint8_t rest = 8 - off; - uint8_t left, right, old = bsw->bytes[fill]; - uint64_t i; + uint8_t mask = (1 << rest) - 1; + uint8_t l, m = bsw->bytes[fill]; + uint64_t i; for ( i = 0; i < len; i++ ) { - left = (byt[i] & ((1 << rest) - 1)) << off; - right = (byt[i] >> rest) & ((1 << off) - 1); - - bsw->bytes[fill++] = old ^ left; - old = right; + l = byt[i] & mask; + bsw->bytes[fill++] = m ^ (l << off); + m = byt[i] >> rest; } - bsw->bytes[fill] = old; + bsw->bytes[fill] = m; } bsw->fill = fill; From 572a0d2c91c30788c67071e56eb011e3374fff5f Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 17 Aug 2020 20:35:22 -0700 Subject: [PATCH 028/123] ur: adds ur_bsw32() --- pkg/urbit/include/ur/hashcons.h | 5 ++ pkg/urbit/tests/ur_tests.c | 34 +++++++++++++ pkg/urbit/ur/hashcons.c | 90 +++++++++++++++++++++++++++++++++ 3 files changed, 129 insertions(+) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 1f492168d2..9ca5e6479c 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -195,6 +195,11 @@ ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt); void ur_bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt); +void +ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val); +void +ur_bsw32_slow(ur_bsw_t *bsw, uint8_t len, uint32_t val); + void ur_bsw64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val); void diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index c32f616d6f..c2182bfcb6 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -251,6 +251,39 @@ _test_bsw8(void) & _test_bsw8_loop("bsw bits alt 2", 0x55); } +static int +_test_bsw32_loop(const char* cap, uint32_t val) +{ + int ret = 1; + ur_bsw_t a = {0}; + ur_bsw_t b = {0}; + uint8_t i, j; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 32; j++ ) { + _bsw_init(&a, 1, 1); + _bsw_init(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; + + ur_bsw32_slow(&a, j, val); + ur_bsw32(&b, j, val); + + ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); + } + } + + return ret; +} + +static int +_test_bsw32(void) +{ + return _test_bsw32_loop("bsw 32 ones", 0xffffffff) + & _test_bsw32_loop("bsw 32 zeros", 0x0) + & _test_bsw32_loop("bsw 32 alt 1", 0xaaaaaaaa) + & _test_bsw32_loop("bsw 32 alt 2", 0x55555555); +} + static int _test_bsw64_loop(const char* cap, uint64_t val) { @@ -331,6 +364,7 @@ _test_bsw(void) { return _test_bsw_bit() & _test_bsw8() + & _test_bsw32() & _test_bsw64() & _test_bsw_bytes(); } diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 3786dde19c..10681d14d0 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -991,6 +991,96 @@ ur_bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt) } } +static inline void +_bsw32_unsafe(ur_bsw_t *bsw, uint8_t len, uint32_t val) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t *bytes = bsw->bytes; + + bsw->bits += len; + + if ( off ) { + uint8_t rest = 8 - off; + + if ( len < rest ) { + bytes[fill] ^= (val & ((1 << len) - 1)) << off; + bsw->off = off + len; + return; + } + + bytes[fill++] ^= (val & ((1 << rest) - 1)) << off; + val >>= rest; + len -= rest; + } + + switch ( len >> 3 ) { + case 4: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + + // no offset is possible here + // + bsw->fill = fill; + bsw->off = 0; + return; + } + + case 3: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + val >>= 24; + } break; + + case 2: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + val >>= 16; + } break; + + case 1: { + bytes[fill++] = ur_mask_8(val); + val >>= 8; + } break; + } + + off = ur_mask_3(len); + + if ( off ) { + bytes[fill] = (uint8_t)(val & ((1 << off) - 1)); + } + + bsw->fill = fill; + bsw->off = off; +} + +void +ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val) +{ + uint8_t bits = bsw->off + len; + + if ( bsw->fill + (bits >> 3) + !!ur_mask_3(bits) >= bsw->size ) { + ur_bsw_grow(bsw); + } + + _bsw32_unsafe(bsw, (len > 32) ? 32 : len, val); +} + +void +ur_bsw32_slow(ur_bsw_t *bsw, uint8_t len, uint32_t val) +{ + len = (len > 32) ? 32 : len; + + while ( len ) { + ur_bsw_bit(bsw, val & 0xff); + val >>= 1; + len--; + } +} + static inline void _bsw64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) { From d4522adb0c33248b484933fe4aa6a1979583e601 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 19 Aug 2020 10:18:58 -0700 Subject: [PATCH 029/123] ur: moves slow8/32/64 write functions into tests --- pkg/urbit/include/ur/hashcons.h | 6 ----- pkg/urbit/tests/ur_tests.c | 44 ++++++++++++++++++++++++++++++--- pkg/urbit/ur/hashcons.c | 34 ------------------------- 3 files changed, 40 insertions(+), 44 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 9ca5e6479c..10c243e5ab 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -192,18 +192,12 @@ ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit); void ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt); -void -ur_bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt); void ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val); -void -ur_bsw32_slow(ur_bsw_t *bsw, uint8_t len, uint32_t val); void ur_bsw64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val); -void -ur_bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val); void ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index c2182bfcb6..70e55af026 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -218,6 +218,18 @@ _bsw_cmp_check(const char* cap, uint8_t val, uint8_t off, uint8_t len, ur_bsw_t return ret; } +static void +_bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + len = (len > 8) ? 8 : len; + + while ( len ) { + ur_bsw_bit(bsw, byt); + byt >>= 1; + len--; + } +} + static int _test_bsw8_loop(const char* cap, uint8_t val) { @@ -232,7 +244,7 @@ _test_bsw8_loop(const char* cap, uint8_t val) _bsw_init(&b, 1, 1); a.off = a.bits = b.off = b.bits = i; - ur_bsw8_slow(&a, j, val); + _bsw8_slow(&a, j, val); ur_bsw8(&b, j, val); ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); @@ -251,6 +263,18 @@ _test_bsw8(void) & _test_bsw8_loop("bsw bits alt 2", 0x55); } +static void +_bsw32_slow(ur_bsw_t *bsw, uint8_t len, uint32_t val) +{ + len = (len > 32) ? 32 : len; + + while ( len ) { + ur_bsw_bit(bsw, val & 0xff); + val >>= 1; + len--; + } +} + static int _test_bsw32_loop(const char* cap, uint32_t val) { @@ -265,7 +289,7 @@ _test_bsw32_loop(const char* cap, uint32_t val) _bsw_init(&b, 1, 1); a.off = a.bits = b.off = b.bits = i; - ur_bsw32_slow(&a, j, val); + _bsw32_slow(&a, j, val); ur_bsw32(&b, j, val); ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); @@ -284,6 +308,18 @@ _test_bsw32(void) & _test_bsw32_loop("bsw 32 alt 2", 0x55555555); } +static void +_bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + len = (len > 64) ? 64 : len; + + while ( len ) { + ur_bsw_bit(bsw, val & 0xff); + val >>= 1; + len--; + } +} + static int _test_bsw64_loop(const char* cap, uint64_t val) { @@ -298,7 +334,7 @@ _test_bsw64_loop(const char* cap, uint64_t val) _bsw_init(&b, 1, 1); a.off = a.bits = b.off = b.bits = i; - ur_bsw64_slow(&a, j, val); + _bsw64_slow(&a, j, val); ur_bsw64(&b, j, val); ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); @@ -332,7 +368,7 @@ _test_bsw_bytes_loop(const char* cap, uint64_t len, uint8_t val) byt = malloc(len); for ( j = 0; j < len; j++ ) { - ur_bsw8_slow(&a, 8, val); + _bsw8_slow(&a, 8, val); byt[j] = val; } diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 10681d14d0..d9ff000f90 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -981,16 +981,6 @@ ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt) _bsw8_unsafe(bsw, (len > 8) ? 8 : len, byt); } -void -ur_bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt) -{ - while ( len ) { - ur_bsw_bit(bsw, byt); - byt >>= 1; - len--; - } -} - static inline void _bsw32_unsafe(ur_bsw_t *bsw, uint8_t len, uint32_t val) { @@ -1069,18 +1059,6 @@ ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val) _bsw32_unsafe(bsw, (len > 32) ? 32 : len, val); } -void -ur_bsw32_slow(ur_bsw_t *bsw, uint8_t len, uint32_t val) -{ - len = (len > 32) ? 32 : len; - - while ( len ) { - ur_bsw_bit(bsw, val & 0xff); - val >>= 1; - len--; - } -} - static inline void _bsw64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) { @@ -1201,18 +1179,6 @@ ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val) _bsw64_unsafe(bsw, (len > 64) ? 64 : len, val); } -void -ur_bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - len = (len > 64) ? 64 : len; - - while ( len ) { - ur_bsw_bit(bsw, val & 0xff); - val >>= 1; - len--; - } -} - static inline void _bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { From 0f3731a40c0b6b8aea32ad3e82fbc9a145eb9149 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 19 Aug 2020 00:44:21 -0700 Subject: [PATCH 030/123] ur: adds initial tests for bitstream-read functions (ur_bsr*) --- pkg/urbit/include/ur/hashcons.h | 30 +- pkg/urbit/tests/ur_tests.c | 684 ++++++++++++++++++++++++++++++++ pkg/urbit/ur/hashcons.c | 308 +++++++++++++- 3 files changed, 1010 insertions(+), 12 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 10c243e5ab..51bfab0759 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -202,14 +202,40 @@ ur_bsw64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val); void ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); -uint64_t -ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); + +typedef struct ur_bsr_s { + uint64_t left; + uint64_t bits; + uint8_t off; + const uint8_t *bytes; +} ur_bsr_t; typedef enum { ur_cue_good = 0, ur_cue_gone = 1 } ur_cue_res_e; +ur_bool_t +ur_bsr_sane(ur_bsr_t *bsr); + +ur_cue_res_e +ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out); + +uint8_t +ur_bsr_bit_any(ur_bsr_t *bsr); + +uint8_t +ur_bsr8_any(ur_bsr_t *bsr, uint8_t len); + +uint32_t +ur_bsr32_any(ur_bsr_t *bsr, uint8_t len); + +uint64_t +ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); + +uint64_t +ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); + typedef enum { ur_jam_atom = 0, ur_jam_cell = 1, diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 70e55af026..3c6fd6265f 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -405,6 +405,685 @@ _test_bsw(void) & _test_bsw_bytes(); } +static int +_bsr_bit_check(const char *cap, + ur_bsr_t *bsr, + uint8_t off, + uint64_t bits, + uint8_t exp, + uint8_t val, + ur_cue_res_e ser, + ur_cue_res_e res) +{ + int ret = 1; + + if ( !ur_bsr_sane(bsr) ) { + fprintf(stderr, "%s: insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, bsr->off, bsr->left, bsr->bits); + ret = 0; + } + + if ( ser != res ) { + fprintf(stderr, "%s: val not equal (%s, %s) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, (ur_cue_good == ser) ? "good" : "gone", + (ur_cue_good == res) ? "good" : "gone", + bsr->off, bsr->left, bsr->left ? bsr->bytes[0] : 0, bsr->bits); + ret = 0; + } + + if ( (ur_cue_good == res) && (exp != val) ) { + fprintf(stderr, "%s: res not equal (%02x, %02x) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, exp, val, bsr->off, bsr->left, bsr->left ? bsr->bytes[0] : 0, bsr->bits); + ret = 0; + } + + if ( off != bsr->off ) { + fprintf(stderr, "%s: offset fail (%u, %u)\r\n", cap, off, bsr->off); + ret = 0; + } + + if ( bits != bsr->bits ) { + fprintf(stderr, "%s: bits fail (%" PRIu64 ", %" PRIu64 ")\r\n", cap, bits, bsr->bits); + ret = 0; + } + + return ret; +} + +static int +_test_bsr_bit_ones(void) +{ + int ret = 1; + uint8_t ones[1] = { 0xff }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + ur_cue_res_e res; + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 1", &bsr, 1, 1, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 2", &bsr, 2, 2, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 3", &bsr, 3, 3, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 4", &bsr, 4, 4, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 5", &bsr, 5, 5, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 6", &bsr, 6, 6, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 7", &bsr, 7, 7, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 8", &bsr, 0, 8, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 9", &bsr, 0, 8, ur_cue_gone, res, 0, 0); + + return ret; +} + +static int +_test_bsr_bit_zeros(void) +{ + int ret = 1; + uint8_t ones[1] = { 0x0 }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + ur_cue_res_e res; + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 1", &bsr, 1, 1, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 2", &bsr, 2, 2, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 3", &bsr, 3, 3, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 4", &bsr, 4, 4, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 5", &bsr, 5, 5, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 6", &bsr, 6, 6, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 7", &bsr, 7, 7, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 8", &bsr, 0, 8, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 9", &bsr, 0, 8, ur_cue_gone, res, 0, 0); + + return ret; +} + +static int +_test_bsr_bit_alt(void) +{ + int ret = 1; + uint8_t ones[1] = { 0xaa }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + ur_cue_res_e res; + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 1", &bsr, 1, 1, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 2", &bsr, 2, 2, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 3", &bsr, 3, 3, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 4", &bsr, 4, 4, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 5", &bsr, 5, 5, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 6", &bsr, 6, 6, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 7", &bsr, 7, 7, ur_cue_good, res, 0, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 8", &bsr, 0, 8, ur_cue_good, res, 1, out); + + res = ur_bsr_bit(&bsr, &out); + ret &= _bsr_bit_check("bsr bit ones 9", &bsr, 0, 8, ur_cue_gone, res, 0, 0); + + return ret; +} + +static int +_test_bsr_bit(void) +{ + return _test_bsr_bit_ones() + & _test_bsr_bit_zeros() + & _test_bsr_bit_alt(); +} + +static int +_bsr_bit_any_check(const char* cap, ur_bsr_t *bsr, uint8_t off, uint64_t bits, uint8_t exp, uint8_t val) +{ + int ret = 1; + + if ( !ur_bsr_sane(bsr) ) { + fprintf(stderr, "%s: insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, bsr->off, bsr->left, bsr->bits); + ret = 0; + } + + if ( exp != val ) { + fprintf(stderr, "%s: not equal (%02x, %02x) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, exp, val, bsr->off, bsr->left, bsr->left ? bsr->bytes[0] : 0, bsr->bits); + ret = 0; + } + + if ( off != bsr->off ) { + fprintf(stderr, "%s: offset fail (%u, %u)\r\n", cap, off, bsr->off); + ret = 0; + } + + if ( bits != bsr->bits ) { + fprintf(stderr, "%s: bits fail (%" PRIu64 ", %" PRIu64 ")\r\n", cap, bits, bsr->bits); + ret = 0; + } + + return ret; +} + +static int +_test_bsr_bit_any_ones(void) +{ + int ret = 1; + uint8_t ones[1] = { 0xff }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + + ret &= _bsr_bit_any_check("bsr bit-any ones init", &bsr, 0, 0, 0, 0); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 1", &bsr, 1, 1, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 2", &bsr, 2, 2, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 3", &bsr, 3, 3, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 4", &bsr, 4, 4, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 5", &bsr, 5, 5, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 6", &bsr, 6, 6, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 7", &bsr, 7, 7, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones 8", &bsr, 0, 8, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 9", &bsr, 0, 9, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 10", &bsr, 0, 10, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 11", &bsr, 0, 11, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 12", &bsr, 0, 12, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 13", &bsr, 0, 13, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 14", &bsr, 0, 14, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 15", &bsr, 0, 15, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 16", &bsr, 0, 16, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any ones off 17", &bsr, 0, 17, 0, out); + + return ret; +} + +static int +_test_bsr_bit_any_zeros(void) +{ + int ret = 1; + uint8_t ones[1] = { 0x0 }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + + ret &= _bsr_bit_any_check("bsr bit-any zeros init", &bsr, 0, 0, 0, 0); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 1", &bsr, 1, 1, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 2", &bsr, 2, 2, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 3", &bsr, 3, 3, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 4", &bsr, 4, 4, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 5", &bsr, 5, 5, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 6", &bsr, 6, 6, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 7", &bsr, 7, 7, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros 8", &bsr, 0, 8, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 9", &bsr, 0, 9, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 10", &bsr, 0, 10, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 11", &bsr, 0, 11, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 12", &bsr, 0, 12, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 13", &bsr, 0, 13, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 14", &bsr, 0, 14, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 15", &bsr, 0, 15, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 16", &bsr, 0, 16, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any zeros off 17", &bsr, 0, 17, 0, out); + + return ret; +} + +static int +_test_bsr_bit_any_alt(void) +{ + int ret = 1; + uint8_t ones[1] = { 0xaa }; + ur_bsr_t bsr = { .left = sizeof(ones), .bytes = ones }; + uint8_t out; + + ret &= _bsr_bit_any_check("bsr bit-any alt init", &bsr, 0, 0, 0, 0); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 1", &bsr, 1, 1, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 2", &bsr, 2, 2, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 3", &bsr, 3, 3, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 4", &bsr, 4, 4, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 5", &bsr, 5, 5, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 6", &bsr, 6, 6, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 7", &bsr, 7, 7, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt 8", &bsr, 0, 8, 1, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 9", &bsr, 0, 9, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 10", &bsr, 0, 10, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 11", &bsr, 0, 11, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 12", &bsr, 0, 12, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 13", &bsr, 0, 13, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 14", &bsr, 0, 14, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 15", &bsr, 0, 15, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 16", &bsr, 0, 16, 0, out); + + out = ur_bsr_bit_any(&bsr); + ret &= _bsr_bit_any_check("bsr bit-any alt off 17", &bsr, 0, 17, 0, out); + + return ret; +} + +static int +_test_bsr_bit_any(void) +{ + return _test_bsr_bit_any_ones() + & _test_bsr_bit_any_zeros() + & _test_bsr_bit_any_alt(); +} + +static int +_bsr_cmp_any_check(const char* cap, uint8_t off, uint8_t len, ur_bsr_t *a, ur_bsr_t *b) +{ + int ret = 1; + + if ( !ur_bsr_sane(a) ) { + fprintf(stderr, "%s: off %u, len %u a insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, a->off, a->left, a->bits); + ret = 0; + } + + if ( !ur_bsr_sane(b) ) { + fprintf(stderr, "%s: off %u, len %u a insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, b->off, b->left, b->bits); + ret = 0; + } + + if ( a->off != b->off ) { + fprintf(stderr, "%s: off %u len %u: offset fail (%u, %u)\r\n", + cap, off, len, a->off, b->off); + ret = 0; + } + + if ( a->left != b->left ) { + fprintf(stderr, "%s: off %u len %u: left fail (%" PRIu64 ", %" PRIu64 ")\r\n", + cap, off, len, a->left, b->left); + ret = 0; + } + + if ( a->bits != b->bits ) { + fprintf(stderr, "%s: off %u len %u: bits fail (%" PRIu64 ", %" PRIu64 ")\r\n", + cap, off, len, a->bits, b->bits); + ret = 0; + } + + return ret; +} + +static uint8_t +_bsr8_any_slow(ur_bsr_t *bsr, uint8_t len) +{ + uint8_t i, out = 0; + + len = (len > 8) ? 8 : len; + + for ( i = 0; i < len; i++ ) { + out ^= ur_bsr_bit_any(bsr) << i; + } + + return out; +} + +static int +_test_bsr8_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + uint8_t *bytes; + ur_bsr_t a, b; + uint8_t c, d, i, j, k; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 8; j++ ) { + bytes = malloc(len); + + for ( k = 0; k < len; k++ ) { + bytes[k] = val; + } + + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + + c = _bsr8_any_slow(&a, j); + d = ur_bsr8_any(&b, j); + + ret &= _bsr_cmp_any_check(cap, i, j, &a, &b); + + if ( c != d ) { + fprintf(stderr, "%s: off %u, len %u not equal (%02x, %02x) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, i, j, c, d, b.off, b.left, b.left ? b.bytes[0] : 0, b.bits); + ret = 0; + } + + free(bytes); + } + } + + return ret; +} + +static int +_test_bsr8(void) +{ + return _test_bsr8_loop("bsr8 ones 1", 1, 0xff) + & _test_bsr8_loop("bsr8 ones 2", 2, 0xff) + & _test_bsr8_loop("bsr8 zeros 1", 1, 0x0) + & _test_bsr8_loop("bsr8 zeros 2", 2, 0x0) + & _test_bsr8_loop("bsr8 alt-1 1", 1, 0xaa) + & _test_bsr8_loop("bsr8 alt-1 2", 2, 0xaa) + & _test_bsr8_loop("bsr8 alt-2 1", 1, 0x55) + & _test_bsr8_loop("bsr8 alt-2 2", 2, 0x55); +} + +static uint32_t +_bsr32_any_slow(ur_bsr_t *bsr, uint8_t len) +{ + uint32_t out = 0; + uint8_t i; + + len = (len > 32) ? 32 : len; + + for ( i = 0; i < len; i++ ) { + out ^= (uint32_t)ur_bsr_bit_any(bsr) << i; + } + + return out; +} + +static int +_test_bsr32_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + uint8_t *bytes; + ur_bsr_t a, b; + uint32_t c, d; + uint8_t i, j, k; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 32; j++ ) { + bytes = malloc(len); + + for ( k = 0; k < len; k++ ) { + bytes[k] = val; + } + + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + + c = _bsr32_any_slow(&a, j); + d = ur_bsr32_any(&b, j); + + ret &= _bsr_cmp_any_check(cap, i, j, &a, &b); + + if ( c != d ) { + fprintf(stderr, "%s: off %u, len %u not equal (%08x, %08x) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, i, j, c, d, b.off, b.left, b.left ? b.bytes[0] : 0, b.bits); + ret = 0; + } + + free(bytes); + } + } + + return ret; +} + +static int +_test_bsr32(void) +{ + return _test_bsr32_loop("bsr32 ones 1", 1, 0xff) + & _test_bsr32_loop("bsr32 ones 2", 2, 0xff) + & _test_bsr32_loop("bsr32 ones 3", 3, 0xff) + & _test_bsr32_loop("bsr32 ones 4", 4, 0xff) + & _test_bsr32_loop("bsr32 zeros 1", 1, 0x0) + & _test_bsr32_loop("bsr32 zeros 2", 2, 0x0) + & _test_bsr32_loop("bsr32 zeros 3", 3, 0x0) + & _test_bsr32_loop("bsr32 zeros 4", 4, 0x0) + & _test_bsr32_loop("bsr32 alt-1 1", 1, 0xaa) + & _test_bsr32_loop("bsr32 alt-1 2", 2, 0xaa) + & _test_bsr32_loop("bsr32 alt-1 3", 3, 0xaa) + & _test_bsr32_loop("bsr32 alt-1 4", 4, 0xaa) + & _test_bsr32_loop("bsr32 alt-2 1", 1, 0x55) + & _test_bsr32_loop("bsr32 alt-2 2", 2, 0x55) + & _test_bsr32_loop("bsr32 alt-2 3", 3, 0x55) + & _test_bsr32_loop("bsr32 alt-2 4", 4, 0x55); +} + +static uint64_t +_bsr64_any_slow(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t out = 0; + uint8_t i; + + len = (len > 64) ? 64 : len; + + for ( i = 0; i < len; i++ ) { + out ^= (uint64_t)ur_bsr_bit_any(bsr) << i; + } + + return out; +} + +static int +_test_bsr64_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + uint8_t *bytes; + ur_bsr_t a, b; + uint64_t c, d; + uint8_t i, j, k; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j <= 64; j++ ) { + bytes = malloc(len); + + for ( k = 0; k < len; k++ ) { + bytes[k] = val; + } + + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + + c = _bsr64_any_slow(&a, j); + d = ur_bsr64_any(&b, j); + + ret &= _bsr_cmp_any_check(cap, i, j, &a, &b); + + if ( c != d ) { + fprintf(stderr, "%s: off %u, len %u not equal (%016" PRIx64", %016" PRIx64") off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + cap, i, j, c, d, b.off, b.left, b.left ? b.bytes[0] : 0, b.bits); + ret = 0; + } + + free(bytes); + } + } + + return ret; +} + +static int +_test_bsr64(void) +{ + return _test_bsr64_loop("bsr64 ones 1", 1, 0xff) + & _test_bsr64_loop("bsr64 ones 2", 2, 0xff) + & _test_bsr64_loop("bsr64 ones 3", 3, 0xff) + & _test_bsr64_loop("bsr64 ones 4", 4, 0xff) + & _test_bsr64_loop("bsr64 ones 5", 5, 0xff) + & _test_bsr64_loop("bsr64 ones 6", 6, 0xff) + & _test_bsr64_loop("bsr64 ones 7", 7, 0xff) + & _test_bsr64_loop("bsr64 ones 8", 8, 0xff) + & _test_bsr64_loop("bsr64 zeros 1", 1, 0x0) + & _test_bsr64_loop("bsr64 zeros 2", 2, 0x0) + & _test_bsr64_loop("bsr64 zeros 3", 3, 0x0) + & _test_bsr64_loop("bsr64 zeros 4", 4, 0x0) + & _test_bsr64_loop("bsr64 zeros 5", 5, 0x0) + & _test_bsr64_loop("bsr64 zeros 6", 6, 0x0) + & _test_bsr64_loop("bsr64 zeros 7", 7, 0x0) + & _test_bsr64_loop("bsr64 zeros 8", 8, 0x0) + & _test_bsr64_loop("bsr64 alt-1 1", 1, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 2", 2, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 3", 3, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 4", 4, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 5", 5, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 6", 6, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 7", 7, 0xaa) + & _test_bsr64_loop("bsr64 alt-1 8", 8, 0xaa) + & _test_bsr64_loop("bsr64 alt-2 1", 1, 0x55) + & _test_bsr64_loop("bsr64 alt-2 2", 2, 0x55) + & _test_bsr64_loop("bsr64 alt-2 3", 3, 0x55) + & _test_bsr64_loop("bsr64 alt-2 4", 4, 0x55) + & _test_bsr64_loop("bsr64 alt-2 5", 5, 0x55) + & _test_bsr64_loop("bsr64 alt-2 6", 6, 0x55) + & _test_bsr64_loop("bsr64 alt-2 7", 7, 0x55) + & _test_bsr64_loop("bsr64 alt-2 8", 8, 0x55); +} + +static int +_test_bsr(void) +{ + return _test_bsr_bit() + & _test_bsr_bit_any() + & _test_bsr8() + & _test_bsr32() + & _test_bsr64(); +} + static int _test_jam_spec(const char *cap, ur_root_t *r, @@ -566,6 +1245,11 @@ _test_ur(void) ret = 0; } + if ( !_test_bsr() ) { + fprintf(stderr, "ur test bsr failed\r\n"); + ret = 0; + } + if ( !_test_jam_cue() ) { fprintf(stderr, "ur test jam/cue failed\r\n"); ret = 0; diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index d9ff000f90..f618dd4e95 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1312,17 +1312,20 @@ ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len_bit, uint64_t len, uint8_t *byt) } } -typedef struct ur_bsr_s { - uint64_t left; - uint64_t bits; - uint8_t off; - const uint8_t *bytes; -} ur_bsr_t; +ur_bool_t +ur_bsr_sane(ur_bsr_t *bsr) +{ + if ( !bsr->left ) { + return !bsr->off && !bsr->bytes; + } -static inline ur_cue_res_e + return 1; +} + +ur_cue_res_e ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out) { - uint8_t left = bsr->left; + uint64_t left = bsr->left; if ( !left ) { return ur_cue_gone; @@ -1358,6 +1361,292 @@ ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out) } } +uint8_t +ur_bsr_bit_any(ur_bsr_t *bsr) +{ + uint64_t left = bsr->left; + + bsr->bits++; + + if ( !left ) { + return 0; + } + else { + uint8_t byt = bsr->bytes[0]; + uint8_t off = bsr->off; + uint8_t bit = (byt >> off) & 1; + + if ( 7 == off ) { + left--; + + if ( left ) { + bsr->bytes++; + bsr->left = left; + } + else { + bsr->bytes = 0; + bsr->left = 0; + } + + bsr->off = 0; + } + else { + bsr->off = 1 + off; + } + + return bit; + } +} + +uint8_t +ur_bsr8_any(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t left = bsr->left; + + len = ( len > 8 ) ? 8 : len; + + bsr->bits += len; + + if ( !left ) { + return 0; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + const uint8_t *bytes = bsr->bytes; + uint8_t m = bytes[0] >> off; + + if ( len < rest ) { + bsr->off = off + len; + return m & ((1 << len) - 1); + } + else if ( 1 == left ) { + bsr->off = 0; + bsr->left = 0; + bsr->bytes = 0; + return m; + } + else { + off = len - rest; + + bsr->off = off; + bsr->left--; + bsr->bytes++; + + { + uint8_t l = bytes[1] & ((1 << off) - 1); + return m ^ (l << rest); + } + } + } +} + +uint32_t +ur_bsr32_any(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t left = bsr->left; + + len = ( len > 32 ) ? 32 : len; + + bsr->bits += len; + + if ( !left ) { + return 0; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + const uint8_t *b = bsr->bytes; + uint32_t m = b[0] >> off; + + if ( len < rest ) { + bsr->off = off + len; + return m & ((1 << len) - 1); + } + else { + uint8_t mask, len_byt; + uint32_t l; + + len -= rest; + left--; + bsr->bytes++; + + len_byt = len >> 3; + + if ( len_byt >= left ) { + len_byt = left; + bsr->off = off = 0; + bsr->left = 0; + bsr->bytes = 0; + } + else { + bsr->off = off = ur_mask_3(len); + bsr->left = left - len_byt; + bsr->bytes += len_byt; + } + + mask = (1 << off) - 1; + + switch ( len_byt ) { + case 4: { + l = (uint32_t)b[1] + ^ (uint32_t)b[2] << 8 + ^ (uint32_t)b[3] << 16 + ^ (uint32_t)b[4] << 24; + } break; + + case 3: { + l = (uint32_t)b[1] + ^ (uint32_t)b[2] << 8 + ^ (uint32_t)b[3] << 16 + ^ (uint32_t)(b[4] & mask) << 24; + } break; + + case 2: { + l = (uint32_t)b[1] + ^ (uint32_t)b[2] << 8 + ^ (uint32_t)(b[3] & mask) << 16; + } break; + + case 1: { + l = (uint32_t)b[1] + ^ (uint32_t)(b[2] & mask) << 8; + } break; + + case 0: { + l = (uint32_t)(b[1] & mask); + } break; + } + + return m ^ (l << rest); + } + } +} + +uint64_t +ur_bsr64_any(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t left = bsr->left; + + len = ( len > 64 ) ? 64 : len; + + bsr->bits += len; + + if ( !left ) { + return 0; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + const uint8_t *b = bsr->bytes; + uint64_t m = b[0] >> off; + + if ( len < rest ) { + bsr->off = off + len; + return m & ((1 << len) - 1); + } + else { + uint8_t mask, len_byt; + uint64_t l; + + len -= rest; + left--; + bsr->bytes++; + + len_byt = len >> 3; + + if ( len_byt >= left ) { + len_byt = left; + bsr->off = off = 0; + bsr->left = 0; + bsr->bytes = 0; + } + else { + bsr->off = off = ur_mask_3(len); + bsr->left = left - len_byt; + bsr->bytes += len_byt; + } + + mask = (1 << off) - 1; + + switch ( len_byt ) { + case 8: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)b[4] << 24 + ^ (uint64_t)b[5] << 32 + ^ (uint64_t)b[6] << 40 + ^ (uint64_t)b[7] << 48 + ^ (uint64_t)b[8] << 56; + } break; + + case 7: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)b[4] << 24 + ^ (uint64_t)b[5] << 32 + ^ (uint64_t)b[6] << 40 + ^ (uint64_t)b[7] << 48 + ^ (uint64_t)(b[8] & mask) << 56; + } break; + + case 6: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)b[4] << 24 + ^ (uint64_t)b[5] << 32 + ^ (uint64_t)b[6] << 40 + ^ (uint64_t)(b[7] & mask) << 48; + } break; + + case 5: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)b[4] << 24 + ^ (uint64_t)b[5] << 32 + ^ (uint64_t)(b[6] & mask) << 40; + } break; + + case 4: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)b[4] << 24 + ^ (uint64_t)(b[5] & mask) << 32; + } break; + + case 3: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)(b[4] & mask) << 24; + } break; + + case 2: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)(b[3] & mask) << 16; + } break; + + case 1: { + l = (uint64_t)b[1] + ^ (uint64_t)(b[2] & mask) << 8; + } break; + + case 0: { + l = (uint64_t)(b[1] & mask); + } break; + } + + return m ^ (l << rest); + } + } +} + static inline ur_cue_res_e ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) { @@ -1408,7 +1697,6 @@ ur_bsr64(ur_bsr_t *bsr, uint8_t len) for ( i = 0; i < len; i++ ) { if ( ur_cue_good != ur_bsr_bit(bsr, &bit) ) { bsr->bits += len - i; - bsr->bytes = 0; return acc; } @@ -1421,7 +1709,7 @@ ur_bsr64(ur_bsr_t *bsr, uint8_t len) static inline void ur_bsr_bytes(ur_bsr_t *bsr, uint64_t len, uint8_t *out) { - uint8_t left = bsr->left; + uint64_t left = bsr->left; uint8_t off = bsr->off; ur_bool_t end = len >= left; From 4aa1dbe4a2cecedac699253b00b2142dd1fa1832 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 19 Aug 2020 11:51:47 -0700 Subject: [PATCH 031/123] ur: adds ur_bsw_bex() and tests --- pkg/urbit/include/ur/hashcons.h | 3 ++ pkg/urbit/tests/ur_tests.c | 39 +++++++++++++++++++- pkg/urbit/ur/hashcons.c | 63 +++++++++++++++++++++++++++------ 3 files changed, 94 insertions(+), 11 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 51bfab0759..b8127314c7 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -202,6 +202,9 @@ ur_bsw64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val); void ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); +void +ur_bsw_bex(ur_bsw_t *bsw, uint8_t n); + typedef struct ur_bsr_s { uint64_t left; diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 3c6fd6265f..0526633fb4 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -395,6 +395,42 @@ _test_bsw_bytes(void) & _test_bsw_bytes_loop("bsw bytes alt 2 odd", 10, 0x55); } +static void +_bsw_bex_slow(ur_bsw_t *bsw, uint8_t n) +{ + while ( n >= 64 ) { + _bsw64_slow(bsw, 64, 0); + n -= 64; + } + + _bsw64_slow(bsw, n + 1, 1ULL << n); +} + +static int +_test_bsw_bex() +{ + int ret = 1; + ur_bsw_t a = {0}; + ur_bsw_t b = {0}; + uint8_t i, l; + uint32_t j, k; + + for ( i = 0; i < 8; i++) { + for ( j = 0; j < 256; j++ ) { + _bsw_init(&a, 1, 1); + _bsw_init(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; + + _bsw_bex_slow(&a, j); + ur_bsw_bex(&b, j); + + ret &= _bsw_cmp_check("bsw bex", j, i, j + 1, &a, &b); + } + } + + return ret; +} + static int _test_bsw(void) { @@ -402,7 +438,8 @@ _test_bsw(void) & _test_bsw8() & _test_bsw32() & _test_bsw64() - & _test_bsw_bytes(); + & _test_bsw_bytes() + & _test_bsw_bex(); } static int diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index f618dd4e95..321b02190e 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1230,14 +1230,58 @@ ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) } static inline void -_bsw_mat64_unsafe(ur_bsw_t *bsw, uint8_t len_len, uint8_t len, uint64_t val) +_bsw_bex_unsafe(ur_bsw_t *bsw, uint8_t n) { - if ( 0 == val ) { + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint32_t bits = n + off; + + fill += bits >> 3; + off = ur_mask_3(bits); + + bsw->bytes[fill] ^= 1 << off; + + if ( 7 == off ) { + bsw->off = 0; + bsw->fill = 1 + fill; + } + else { + bsw->off = 1 + off; + bsw->fill = fill; + } + + bsw->bits += 1 + n; +} + +void +ur_bsw_bex(ur_bsw_t *bsw, uint8_t n) +{ + uint32_t bits = 1 + n + bsw->off; + uint8_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + if ( need > bsw->prev ) { + bsw->prev = need; + } + ur_bsw_grow(bsw); + } + + _bsw_bex_unsafe(bsw, n); +} + +static inline void +_bsw_mat64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + if ( 0 == len ) { _bsw_bit_unsafe(bsw, 1); } else { - _bsw64_unsafe(bsw, len_len + 1, 1ULL << len_len); - _bsw64_unsafe(bsw, len_len - 1, len); + { + uint8_t nel = ur_met0_64(len); + _bsw_bex_unsafe(bsw, nel); + _bsw64_unsafe(bsw, nel - 1, len); + } + _bsw64_unsafe(bsw, len, val); } } @@ -1248,16 +1292,15 @@ ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val) len = ( len > 64 ) ? 64 : len; { - uint8_t len_len = ur_met0_64(len); - uint8_t next = ( 0 == val ) ? 1 : len + (2 * len_len); - uint8_t bits = bsw->off + next; + uint8_t next = ( 0 == val ) ? 1 : len + (2 * ur_met0_64(len)); + uint8_t bits = bsw->off + next; if ( bsw->fill + (bits >> 3) + !!ur_mask_3(bits) >= bsw->size ) { ur_bsw_grow(bsw); } - - _bsw_mat64_unsafe(bsw, len_len, len, val); } + + _bsw_mat64_unsafe(bsw, len, val); } static inline void @@ -1265,7 +1308,7 @@ _bsw_mat_bytes_unsafe(ur_bsw_t *bsw, uint8_t len_len, uint64_t len_bit, uint64_t { // write run-length // - _bsw64_unsafe(bsw, len_len + 1, 1ULL << len_len); + _bsw_bex_unsafe(bsw, len_len); _bsw64_unsafe(bsw, len_len - 1, len_bit); // write bytes From 79363d66a8a994680a2bbb1dc0968f4dc2648e5c Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 19 Aug 2020 12:04:12 -0700 Subject: [PATCH 032/123] ur: refactors bit-stream bounds-enforcement and growth --- pkg/urbit/include/ur/hashcons.h | 5 +- pkg/urbit/tests/ur_tests.c | 12 ++--- pkg/urbit/ur/hashcons.c | 88 +++++++++++++++------------------ 3 files changed, 50 insertions(+), 55 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index b8127314c7..e5f578cca3 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -4,6 +4,9 @@ typedef uint8_t ur_bool_t; +#define ur_min(a, b) ( ((a) < (b)) ? (a) : (b) ) +#define ur_max(a, b) ( ((a) > (b)) ? (a) : (b) ) + #if (32 == (CHAR_BIT * __SIZEOF_INT__)) # define ur_lz32 __builtin_clz #elif (32 == (CHAR_BIT * __SIZEOF_LONG__)) @@ -182,7 +185,7 @@ typedef struct ur_bsw_s { } ur_bsw_t; void -ur_bsw_grow(ur_bsw_t *bsw); +ur_bsw_grow(ur_bsw_t *bsw, uint64_t need); ur_bool_t ur_bsw_sane(ur_bsw_t *bsw); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 0526633fb4..02ade55f72 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -221,7 +221,7 @@ _bsw_cmp_check(const char* cap, uint8_t val, uint8_t off, uint8_t len, ur_bsw_t static void _bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt) { - len = (len > 8) ? 8 : len; + len = ur_min(8, len); while ( len ) { ur_bsw_bit(bsw, byt); @@ -266,7 +266,7 @@ _test_bsw8(void) static void _bsw32_slow(ur_bsw_t *bsw, uint8_t len, uint32_t val) { - len = (len > 32) ? 32 : len; + len = ur_min(32, len); while ( len ) { ur_bsw_bit(bsw, val & 0xff); @@ -311,7 +311,7 @@ _test_bsw32(void) static void _bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val) { - len = (len > 64) ? 64 : len; + len = ur_min(64, len); while ( len ) { ur_bsw_bit(bsw, val & 0xff); @@ -885,7 +885,7 @@ _bsr8_any_slow(ur_bsr_t *bsr, uint8_t len) { uint8_t i, out = 0; - len = (len > 8) ? 8 : len; + len = ur_min(8, len); for ( i = 0; i < len; i++ ) { out ^= ur_bsr_bit_any(bsr) << i; @@ -951,7 +951,7 @@ _bsr32_any_slow(ur_bsr_t *bsr, uint8_t len) uint32_t out = 0; uint8_t i; - len = (len > 32) ? 32 : len; + len = ur_min(32, len); for ( i = 0; i < len; i++ ) { out ^= (uint32_t)ur_bsr_bit_any(bsr) << i; @@ -1026,7 +1026,7 @@ _bsr64_any_slow(ur_bsr_t *bsr, uint8_t len) uint64_t out = 0; uint8_t i; - len = (len > 64) ? 64 : len; + len = ur_min(64, len); for ( i = 0; i < len; i++ ) { out ^= (uint64_t)ur_bsr_bit_any(bsr) << i; diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 321b02190e..3d55ea1b75 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -885,15 +885,14 @@ ur_walk_fore(ur_root_t *r, } void -ur_bsw_grow(ur_bsw_t *bsw) +ur_bsw_grow(ur_bsw_t *bsw, uint64_t step) { - uint64_t prev = bsw->prev; uint64_t size = bsw->size; - uint64_t next = prev + size; + uint64_t next = size + step; bsw->bytes = realloc(bsw->bytes, next); assert(bsw->bytes); - memset(bsw->bytes + size, 0, prev); + memset(bsw->bytes + size, 0, step); bsw->prev = size; bsw->size = next; @@ -931,7 +930,7 @@ ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit) if ( (7 == bsw->off) && ((1 + bsw->fill) == bsw->size) ) { - ur_bsw_grow(bsw); + ur_bsw_grow(bsw, bsw->prev); } _bsw_bit_unsafe(bsw, bit); @@ -974,11 +973,13 @@ _bsw8_unsafe(ur_bsw_t *bsw, uint8_t len, uint8_t byt) void ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt) { + len = ur_min(8, len); + if ( bsw->fill + !!((bsw->off + len) >> 3) >= bsw->size ) { - ur_bsw_grow(bsw); + ur_bsw_grow(bsw, bsw->prev); } - _bsw8_unsafe(bsw, (len > 8) ? 8 : len, byt); + _bsw8_unsafe(bsw, len, byt); } static inline void @@ -1050,13 +1051,17 @@ _bsw32_unsafe(ur_bsw_t *bsw, uint8_t len, uint32_t val) void ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val) { - uint8_t bits = bsw->off + len; + uint8_t bits, need; - if ( bsw->fill + (bits >> 3) + !!ur_mask_3(bits) >= bsw->size ) { - ur_bsw_grow(bsw); + len = ur_min(32, len); + bits = bsw->off + len; + need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); } - _bsw32_unsafe(bsw, (len > 32) ? 32 : len, val); + _bsw32_unsafe(bsw, len, val); } static inline void @@ -1170,13 +1175,17 @@ _bsw64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) void ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val) { - uint8_t bits = bsw->off + len; + uint8_t bits, need; - if ( bsw->fill + (bits >> 3) + !!ur_mask_3(bits) >= bsw->size ) { - ur_bsw_grow(bsw); + len = ur_min(64, len); + bits = bsw->off + len; + need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); } - _bsw64_unsafe(bsw, (len > 64) ? 64 : len, val); + _bsw64_unsafe(bsw, len, val); } static inline void @@ -1214,16 +1223,10 @@ _bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) void ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { - if ( (bsw->fill + len + !!bsw->off) >= bsw->size ) { - uint64_t prev = bsw->prev; + uint64_t need = len + !!bsw->off; - // be sure to grow sufficiently - // - if ( len > prev ) { - bsw->prev = len; - } - - ur_bsw_grow(bsw); + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); } _bsw_bytes_unsafe(bsw, len, byt); @@ -1260,10 +1263,7 @@ ur_bsw_bex(ur_bsw_t *bsw, uint8_t n) uint8_t need = (bits >> 3) + !!ur_mask_3(bits); if ( bsw->fill + need >= bsw->size ) { - if ( need > bsw->prev ) { - bsw->prev = need; - } - ur_bsw_grow(bsw); + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); } _bsw_bex_unsafe(bsw, n); @@ -1289,15 +1289,15 @@ _bsw_mat64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) void ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val) { - len = ( len > 64 ) ? 64 : len; + uint8_t next, bits, need; - { - uint8_t next = ( 0 == val ) ? 1 : len + (2 * ur_met0_64(len)); - uint8_t bits = bsw->off + next; + len = ur_min(64, len); + next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); + bits = bsw->off + next; + need = (bits >> 3) + !!ur_mask_3(bits); - if ( bsw->fill + (bits >> 3) + !!ur_mask_3(bits) >= bsw->size ) { - ur_bsw_grow(bsw); - } + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); } _bsw_mat64_unsafe(bsw, len, val); @@ -1339,16 +1339,8 @@ ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len_bit, uint64_t len, uint8_t *byt) uint8_t bits = bsw->off + (2 * len_len); uint64_t need = len + (bits >> 3) + !!ur_mask_3(bits); - if ( (bsw->fill + need) >= bsw->size ) { - uint64_t prev = bsw->prev; - - // be sure to grow sufficiently - // - if ( need > prev ) { - bsw->prev = need; - } - - ur_bsw_grow(bsw); + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); } _bsw_mat_bytes_unsafe(bsw, len_len, len_bit, len, byt); @@ -1446,7 +1438,7 @@ ur_bsr8_any(ur_bsr_t *bsr, uint8_t len) { uint64_t left = bsr->left; - len = ( len > 8 ) ? 8 : len; + len = ur_min(8, len); bsr->bits += len; @@ -1489,7 +1481,7 @@ ur_bsr32_any(ur_bsr_t *bsr, uint8_t len) { uint64_t left = bsr->left; - len = ( len > 32 ) ? 32 : len; + len = ur_min(32, len); bsr->bits += len; @@ -1571,7 +1563,7 @@ ur_bsr64_any(ur_bsr_t *bsr, uint8_t len) { uint64_t left = bsr->left; - len = ( len > 64 ) ? 64 : len; + len = ur_min(64, len); bsr->bits += len; From c5bbe9e441ca1360e87c0a8ed54b479fdc342d50 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 19 Aug 2020 17:17:15 -0700 Subject: [PATCH 033/123] ur: rewrites ur_bsw_bytes to be bit-indexed --- pkg/urbit/tests/ur_tests.c | 39 +++++++++----- pkg/urbit/ur/hashcons.c | 103 ++++++++++++++++++++----------------- 2 files changed, 84 insertions(+), 58 deletions(-) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 02ade55f72..8856d45bed 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -353,6 +353,19 @@ _test_bsw64(void) & _test_bsw64_loop("bsw 64 alt 2", 0x5555555555555555ULL); } +static void +_bsw_bytes_slow(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t i, len_byt = len >> 3; + + for ( i = 0; i < len_byt; i++ ) { + _bsw8_slow(bsw, 8, byt[i]); + len -= 8; + } + + _bsw8_slow(bsw, len, byt[len_byt]); +} + static int _test_bsw_bytes_loop(const char* cap, uint64_t len, uint8_t val) { @@ -360,24 +373,26 @@ _test_bsw_bytes_loop(const char* cap, uint64_t len, uint8_t val) ur_bsw_t a = {0}; ur_bsw_t b = {0}; uint8_t i, j, *byt; + uint64_t len_bit = len << 3; + + byt = malloc(len); + memset(byt, val, len); for ( i = 0; i < 8; i++) { - _bsw_init(&a, 1, 1); - _bsw_init(&b, 1, 1); - a.off = a.bits = b.off = b.bits = i; - byt = malloc(len); + for ( j = 0; j < len_bit; j++ ) { + _bsw_init(&a, 1, 1); + _bsw_init(&b, 1, 1); + a.off = a.bits = b.off = b.bits = i; - for ( j = 0; j < len; j++ ) { - _bsw8_slow(&a, 8, val); - byt[j] = val; + _bsw_bytes_slow(&a, j, byt); + ur_bsw_bytes(&b, j, byt); + + ret &= _bsw_cmp_check(cap, val, i, j, &a, &b); } - - ur_bsw_bytes(&b, len, byt); - free(byt); - - ret &= _bsw_cmp_check(cap, val, 8, i, &a, &b); } + free(byt); + return ret; } diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 3d55ea1b75..b514fb173d 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1191,12 +1191,19 @@ ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val) static inline void _bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { - uint64_t fill = bsw->fill; - uint8_t off = bsw->off; + uint64_t len_byt = len >> 3; + uint8_t len_bit = ur_mask_3(len); + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; if ( !off ) { - memcpy(bsw->bytes + fill, byt, len); - fill += len; + memcpy(bsw->bytes + fill, byt, len_byt); + fill += len_byt; + off = len_bit; + + if ( off ) { + bsw->bytes[fill] = byt[len_byt] & ((1 << off) - 1); + } } // the least-significant bits of the input become the // most-significant bits of a byte in the output stream, and vice-versa @@ -1207,23 +1214,38 @@ _bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) uint8_t l, m = bsw->bytes[fill]; uint64_t i; - for ( i = 0; i < len; i++ ) { + for ( i = 0; i < len_byt; i++ ) { l = byt[i] & mask; bsw->bytes[fill++] = m ^ (l << off); m = byt[i] >> rest; } - bsw->bytes[fill] = m; + if ( len_bit < rest ) { + l = byt[len_byt] & ((1 << len_bit) - 1); + bsw->bytes[fill] = m ^ (l << off); + off += len_bit; + } + else { + l = byt[len_byt] & mask; + bsw->bytes[fill++] = m ^ (l << off); + + m = byt[len_byt] >> rest; + + off = len_bit - rest; + bsw->bytes[fill] = m & ((1 << off) - 1); + } } + bsw->off = off; bsw->fill = fill; - bsw->bits += len << 3; + bsw->bits += len; } void ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { - uint64_t need = len + !!bsw->off; + uint8_t bits = len + bsw->off; + uint64_t need = (bits >> 3) + !!ur_mask_3(bits); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); @@ -1304,47 +1326,36 @@ ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val) } static inline void -_bsw_mat_bytes_unsafe(ur_bsw_t *bsw, uint8_t len_len, uint64_t len_bit, uint64_t len, uint8_t *byt) +_bsw_mat_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { - // write run-length - // - _bsw_bex_unsafe(bsw, len_len); - _bsw64_unsafe(bsw, len_len - 1, len_bit); - - // write bytes - // - { - uint8_t low = ur_mask_3(len_bit); - - if ( !low ) { - _bsw_bytes_unsafe(bsw, len, byt); - } - else { - len--; - _bsw_bytes_unsafe(bsw, len, byt); - _bsw8_unsafe(bsw, low, byt[len]); + if ( 0 == len ) { + _bsw_bit_unsafe(bsw, 1); + } + else { + // write run-length + // + { + uint8_t nel = ur_met0_64(len); + _bsw_bex_unsafe(bsw, nel); + _bsw64_unsafe(bsw, nel - 1, len); } + + _bsw_bytes_unsafe(bsw, len, byt); } } void -ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len_bit, uint64_t len, uint8_t *byt) +ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { - uint8_t len_len = ur_met0_64(len_bit); + uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); + uint64_t bits = bsw->off + next; + uint64_t need = (bits >> 3) + !!ur_mask_3(bits); - // XX - assert( 64 > len_len ); - - { - uint8_t bits = bsw->off + (2 * len_len); - uint64_t need = len + (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw_mat_bytes_unsafe(bsw, len_len, len_bit, len, byt); + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); } + + _bsw_mat_bytes_unsafe(bsw, len, byt); } ur_bool_t @@ -1822,20 +1833,20 @@ ur_bsr_mat(ur_bsr_t *bsr, uint64_t *out) } static inline void -_jam_mat(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len_bit) +_jam_mat(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len) { switch ( ur_nref_tag(ref) ) { default: assert(0); case ur_direct: { - ur_bsw_mat64(bsw, len_bit, ref); + ur_bsw_mat64(bsw, len, ref); } break; case ur_iatom: { - uint64_t len; - uint8_t *byt; - ur_bytes(r, ref, &byt, &len); - ur_bsw_mat_bytes(bsw, len_bit, len, byt); + uint64_t len_byt; + uint8_t *byt; + ur_bytes(r, ref, &byt, &len_byt); + ur_bsw_mat_bytes(bsw, len, byt); } break; } } From b117ca25a6f1f9a17d916320090a815bc845006b Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 19 Aug 2020 18:35:10 -0700 Subject: [PATCH 034/123] ur: fills out bistream-write (ur_bsw*) interface --- pkg/urbit/include/ur/hashcons.h | 22 +++++- pkg/urbit/ur/hashcons.c | 121 ++++++++++++++++++++++++-------- 2 files changed, 111 insertions(+), 32 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index e5f578cca3..42c3b078ff 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -185,7 +185,7 @@ typedef struct ur_bsw_s { } ur_bsw_t; void -ur_bsw_grow(ur_bsw_t *bsw, uint64_t need); +ur_bsw_grow(ur_bsw_t *bsw, uint64_t step); ur_bool_t ur_bsw_sane(ur_bsw_t *bsw); @@ -200,7 +200,7 @@ void ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val); void -ur_bsw64(ur_bsw_t *bsw, uint8_t len_bit, uint64_t val); +ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val); void ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); @@ -208,6 +208,24 @@ ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); void ur_bsw_bex(ur_bsw_t *bsw, uint8_t n); +void +ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +void +ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); + +void +ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +void +ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +void +ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); + +void +ur_bsw_cell(ur_bsw_t *bsw); + typedef struct ur_bsr_s { uint64_t left; diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index b514fb173d..182f7a16cb 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1358,6 +1358,82 @@ ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) _bsw_mat_bytes_unsafe(bsw, len, byt); } +static inline void +_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + _bsw8_unsafe(bsw, 2, 3); + _bsw_mat64_unsafe(bsw, len, val); +} + +void +ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); + uint64_t bits = 2 + bsw->off + next; + uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_back64(bsw, len, val); +} + +static inline void +_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + _bsw_bit_unsafe(bsw, 0); + _bsw_mat64_unsafe(bsw, len, val); +} + +void +ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); + uint64_t bits = 1 + bsw->off + next; + uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_atom64(bsw, len, val); +} + +static inline void +_bsw_atom_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + _bsw_bit_unsafe(bsw, 0); + _bsw_mat_bytes_unsafe(bsw, len, byt); +} + +void +ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); + uint64_t bits = 1 + bsw->off + next; + uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_atom_bytes_unsafe(bsw, len, byt); +} + +void +ur_bsw_cell(ur_bsw_t *bsw) +{ + uint8_t bits = 2 + bsw->off; + uint8_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw8_unsafe(bsw, 2, 1); +} + ur_bool_t ur_bsr_sane(ur_bsr_t *bsr) { @@ -1833,21 +1909,17 @@ ur_bsr_mat(ur_bsr_t *bsr, uint64_t *out) } static inline void -_jam_mat(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len) +_bsw_atom(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len) { switch ( ur_nref_tag(ref) ) { default: assert(0); - case ur_direct: { - ur_bsw_mat64(bsw, len, ref); - } break; + case ur_direct: return ur_bsw_atom64(bsw, len, ref); case ur_iatom: { - uint64_t len_byt; - uint8_t *byt; - ur_bytes(r, ref, &byt, &len_byt); - ur_bsw_mat_bytes(bsw, len, byt); - } break; + uint8_t *byt = r->atoms.bytes[ur_nref_idx(ref)]; + return ur_bsw_atom_bytes(bsw, len, byt); + } } } @@ -1862,27 +1934,21 @@ _jam_atom(ur_root_t *r, ur_nref ref, void *ptr) _jam_t *j = ptr; ur_dict64_t *dict = &(j->dict); ur_bsw_t *bsw = &j->bsw; - uint64_t bak, len_bit; - - len_bit = ur_met(r, 0, ref); + uint64_t bak, len = ur_met(r, 0, ref); if ( !ur_dict64_get(r, dict, ref, &bak) ) { ur_dict64_put(r, dict, ref, bsw->bits); - ur_bsw_bit(bsw, 0); - _jam_mat(r, ref, bsw, len_bit); + _bsw_atom(r, ref, bsw, len); } else { - uint64_t bak_bit = ur_met0_64(bak); + uint64_t len_bak = ur_met0_64(bak); - if ( len_bit <= bak_bit ) { - ur_bsw_bit(bsw, 0); - _jam_mat(r, ref, bsw, len_bit); + if ( len <= len_bak ) { + _bsw_atom(r, ref, bsw, len); } else { - ur_bsw_bit(bsw, 1); - ur_bsw_bit(bsw, 1); - ur_bsw_mat64(bsw, bak_bit, bak); + ur_bsw_back64(bsw, len_bak, bak); } } } @@ -1898,17 +1964,12 @@ _jam_cell(ur_root_t *r, ur_nref ref, void *ptr) if ( !ur_dict64_get(r, dict, ref, &bak) ) { ur_dict64_put(r, dict, ref, bsw->bits); - ur_bsw_bit(bsw, 1); - ur_bsw_bit(bsw, 0); - - return 1; // true + ur_bsw_cell(bsw); + return 1; } else { - ur_bsw_bit(bsw, 1); - ur_bsw_bit(bsw, 1); - ur_bsw_mat64(bsw, ur_met0_64(bak), bak); - - return 0; // false + ur_bsw_back64(bsw, ur_met0_64(bak), bak); + return 0; } } From 4d1bb97f3608a374508d22deffe9240776ad015e Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 20 Aug 2020 12:38:58 -0700 Subject: [PATCH 035/123] ur: adds ur_bsr_bytes_any(), for bit-indexed byte reads --- pkg/urbit/include/ur/hashcons.h | 3 + pkg/urbit/tests/ur_tests.c | 77 +++++++++++++++++++++++++ pkg/urbit/ur/hashcons.c | 99 +++++++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 42c3b078ff..fcd1776dd9 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -257,6 +257,9 @@ ur_bsr32_any(ur_bsr_t *bsr, uint8_t len); uint64_t ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); +void +ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); + uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 8856d45bed..f0d2b961ad 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1126,11 +1126,88 @@ _test_bsr64(void) & _test_bsr64_loop("bsr64 alt-2 8", 8, 0x55); } +static void +_bsr_bytes_any_slow(ur_bsr_t *bsr, uint64_t len, uint8_t *out) +{ + uint64_t i, len_byt = len >> 3; + + for ( i = 0; i < len_byt; i++ ) { + out[i] = _bsr8_any_slow(bsr, 8); + } + + out[len_byt] = _bsr8_any_slow(bsr, ur_mask_3(len)); +} + +static int +_test_bsr_bytes_any_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + uint64_t len_bit = len << 3; + ur_bsr_t a, b; + uint8_t *bytes, *c, *d; + uint8_t i, j, k; + + c = malloc(len); + d = malloc(len); + bytes = malloc(len); + memset(bytes, val, len); + + for ( i = 0; i < 8; i++) { + for ( j = 1; j <= len_bit; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + memset(c, 0x0, len); + memset(d, 0x0, len); + + _bsr_bytes_any_slow(&a, j, c); + ur_bsr_bytes_any(&b, j, d); + + ret &= _bsr_cmp_any_check(cap, i, j, &a, &b); + + if ( memcmp(c, d, len) ) { + fprintf(stderr, "%s: off %u, len %u not equal off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, i, j, b.off, b.left, b.bits); + fprintf(stderr, " a: { "); + for ( k = 0; k < len; k++ ) { + fprintf(stderr, "%02x, ", c[k]); + } + fprintf(stderr, "}\r\n"); + fprintf(stderr, " b: { "); + for ( k = 0; k < len; k++ ) { + fprintf(stderr, "%02x, ", d[k]); + } + fprintf(stderr, "}\r\n"); + ret = 0; + } + } + } + + free(bytes); + + return ret; +} + +static int +_test_bsr_bytes_any(void) +{ + return _test_bsr_bytes_any_loop("bsr bytes nought", 0, 0x0) + & _test_bsr_bytes_any_loop("bsr bytes ones odd", 3, 0xff) + & _test_bsr_bytes_any_loop("bsr bytes ones even", 4, 0xff) + & _test_bsr_bytes_any_loop("bsr bytes zeros odd", 5, 0x0) + & _test_bsr_bytes_any_loop("bsr bytes zeros even", 6, 0x0) + & _test_bsr_bytes_any_loop("bsr bytes alt 1 odd", 7, 0xaa) + & _test_bsr_bytes_any_loop("bsr bytes alt 1 even", 8, 0xaa) + & _test_bsr_bytes_any_loop("bsr bytes alt 2 odd", 9, 0x55) + & _test_bsr_bytes_any_loop("bsr bytes alt 2 odd", 10, 0x55); +} + static int _test_bsr(void) { return _test_bsr_bit() & _test_bsr_bit_any() + & _test_bsr_bytes_any() & _test_bsr8() & _test_bsr32() & _test_bsr64(); diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 182f7a16cb..6d3aad0ef2 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1769,6 +1769,105 @@ ur_bsr64_any(ur_bsr_t *bsr, uint8_t len) } } +void +ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) +{ + uint64_t left = bsr->left; + + if ( !left ) { + return; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint64_t len_byt = len >> 3; + uint8_t len_bit = ur_mask_3(len); + + if ( !off ) { + uint8_t bits = off + len_bit; + uint64_t need = len_byt + (bits >> 3) + !!ur_mask_3(bits); + + if ( need > left ) { + memcpy(out, b, left); + bsr->bytes = 0; + bsr->left = 0; + } + else { + memcpy(out, b, len_byt); + off = len_bit; + left -= len_byt; + + if ( !left ) { + bsr->bytes = 0; + } + else { + bsr->bytes += len_byt; + } + + bsr->left = left; + + if ( off ) { + out[len_byt] = b[len_byt] & ((1 << off) - 1); + } + } + } + // the most-significant bits from a byte in the stream + // become the least-significant bits of an output byte, and vice-versa + // + else { + uint64_t need = len_byt + (len_bit >> 3) + !!ur_mask_3(len_bit); + ur_bool_t end = need >= left; + uint64_t max = end ? (left - 1) : len_byt; + uint8_t rest = 8 - off; + uint8_t mask = (1 << off) - 1; + uint8_t byt = b[0]; + uint8_t l, m = byt >> off; + uint64_t i; + + for ( i = 0; i < max; i++ ) { + byt = b[1ULL + i]; + l = byt & mask; + out[i] = m ^ (l << rest); + m = byt >> off; + } + + if ( end ) { + if ( len_bit && len_bit < rest ) { + out[max] = m & ((1 << len_bit) - 1); + bsr->bytes += max; + left -= max; + off += len_bit; + } + else { + out[max] = m; + bsr->bytes = 0; + left = 0; + off = 0; + } + } + else { + uint8_t bits = off + len_bit; + + bsr->bytes += max; + left -= max + !!(bits >> 3); + off = ur_mask_3(bits); + + if ( len_bit <= rest ) { + out[max] = m & ((1 << len_bit) - 1); + } + else { + l = b[1ULL + max] & ((1 << off) - 1);; + out[max] = m ^ (l << rest); + } + } + } + + bsr->off = off; + bsr->left = left; + bsr->bits += len; + } +} + static inline ur_cue_res_e ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) { From a7ac76d5e7cc4a0d62f501aea45fc2e59a3d6a0b Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 20 Aug 2020 12:39:22 -0700 Subject: [PATCH 036/123] ur: cleans up ur_bsrN_any tests --- pkg/urbit/tests/ur_tests.c | 45 +++++++++++++++----------------------- 1 file changed, 18 insertions(+), 27 deletions(-) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index f0d2b961ad..9f1ea4f01a 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -915,16 +915,13 @@ _test_bsr8_loop(const char *cap, uint8_t len, uint8_t val) int ret = 1; uint8_t *bytes; ur_bsr_t a, b; - uint8_t c, d, i, j, k; + uint8_t c, d, i, j; + + bytes = malloc(len); + memset(bytes, val, len); for ( i = 0; i < 8; i++) { for ( j = 0; j <= 8; j++ ) { - bytes = malloc(len); - - for ( k = 0; k < len; k++ ) { - bytes[k] = val; - } - a.left = b.left = len; a.bytes = b.bytes = bytes; a.off = a.bits = b.off = b.bits = i; @@ -939,11 +936,11 @@ _test_bsr8_loop(const char *cap, uint8_t len, uint8_t val) cap, i, j, c, d, b.off, b.left, b.left ? b.bytes[0] : 0, b.bits); ret = 0; } - - free(bytes); } } + free(bytes); + return ret; } @@ -982,16 +979,13 @@ _test_bsr32_loop(const char *cap, uint8_t len, uint8_t val) uint8_t *bytes; ur_bsr_t a, b; uint32_t c, d; - uint8_t i, j, k; + uint8_t i, j; + + bytes = malloc(len); + memset(bytes, val, len); for ( i = 0; i < 8; i++) { for ( j = 0; j <= 32; j++ ) { - bytes = malloc(len); - - for ( k = 0; k < len; k++ ) { - bytes[k] = val; - } - a.left = b.left = len; a.bytes = b.bytes = bytes; a.off = a.bits = b.off = b.bits = i; @@ -1006,11 +1000,11 @@ _test_bsr32_loop(const char *cap, uint8_t len, uint8_t val) cap, i, j, c, d, b.off, b.left, b.left ? b.bytes[0] : 0, b.bits); ret = 0; } - - free(bytes); } } + free(bytes); + return ret; } @@ -1057,16 +1051,13 @@ _test_bsr64_loop(const char *cap, uint8_t len, uint8_t val) uint8_t *bytes; ur_bsr_t a, b; uint64_t c, d; - uint8_t i, j, k; + uint8_t i, j; + + bytes = malloc(len); + memset(bytes, val, len); for ( i = 0; i < 8; i++) { for ( j = 0; j <= 64; j++ ) { - bytes = malloc(len); - - for ( k = 0; k < len; k++ ) { - bytes[k] = val; - } - a.left = b.left = len; a.bytes = b.bytes = bytes; a.off = a.bits = b.off = b.bits = i; @@ -1081,11 +1072,11 @@ _test_bsr64_loop(const char *cap, uint8_t len, uint8_t val) cap, i, j, c, d, b.off, b.left, b.left ? b.bytes[0] : 0, b.bits); ret = 0; } - - free(bytes); } } + free(bytes); + return ret; } From 5a8043f62036cc63c2f524668668596db1ef0807 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 20 Aug 2020 15:49:02 -0700 Subject: [PATCH 037/123] ur: clean up ur_bsr8_any() --- pkg/urbit/ur/hashcons.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 6d3aad0ef2..5637c23a77 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1533,10 +1533,10 @@ ur_bsr8_any(ur_bsr_t *bsr, uint8_t len) return 0; } else { - uint8_t off = bsr->off; - uint8_t rest = 8 - off; - const uint8_t *bytes = bsr->bytes; - uint8_t m = bytes[0] >> off; + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + const uint8_t *b = bsr->bytes; + uint8_t m = b[0] >> off; if ( len < rest ) { bsr->off = off + len; @@ -1556,7 +1556,7 @@ ur_bsr8_any(ur_bsr_t *bsr, uint8_t len) bsr->bytes++; { - uint8_t l = bytes[1] & ((1 << off) - 1); + uint8_t l = b[1] & ((1 << off) - 1); return m ^ (l << rest); } } From b3c644930a486899a1d224670dce5a508d1eca0f Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 20 Aug 2020 15:58:35 -0700 Subject: [PATCH 038/123] ur: use new ur_bsr*_any functions in ur_cue() --- pkg/urbit/ur/hashcons.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 5637c23a77..bf4597d9d2 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -2001,7 +2001,7 @@ ur_bsr_mat(ur_bsr_t *bsr, uint64_t *out) } else { len--; - *out = ur_bsr64(bsr, len) ^ (1ULL << len); + *out = ur_bsr64_any(bsr, len) ^ (1ULL << len); } return ur_cue_good; @@ -2112,19 +2112,20 @@ _cue_atom(ur_root_t *r, _cue_t *c, ur_nref *out) } if ( 62 >= len ) { - *out = (ur_nref)ur_bsr64(bsr, len); + *out = (ur_nref)ur_bsr64_any(bsr, len); } else { - uint8_t *byt = calloc(len, 1); - ur_bsr_bytes(bsr, len, byt); + uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); + uint8_t *byt = calloc(len_byt, 1); + ur_bsr_bytes_any(bsr, len, byt); // strip trailing zeroes // - while ( len && !byt[len - 1] ) { - len--; + while ( len_byt && !byt[len_byt - 1] ) { + len_byt--; } - *out = _coin_bytes_unsafe(r, byt, len); + *out = _coin_bytes_unsafe(r, byt, len_byt); } return ur_cue_good; @@ -2143,7 +2144,7 @@ _cue_back(ur_bsr_t *bsr, uint64_t *out) // XX assert( 62 >= len ); - *out = ur_bsr64(bsr, len); + *out = ur_bsr64_any(bsr, len); return ur_cue_good; } From e09ba7e8ee773006a58671a3781dcea6ee75a3dd Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 20 Aug 2020 23:38:29 -0700 Subject: [PATCH 039/123] ur: fixes typos in tests --- pkg/urbit/tests/ur_tests.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 9f1ea4f01a..8cc667ecab 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -407,7 +407,7 @@ _test_bsw_bytes(void) & _test_bsw_bytes_loop("bsw bytes alt 1 odd", 7, 0xaa) & _test_bsw_bytes_loop("bsw bytes alt 1 even", 8, 0xaa) & _test_bsw_bytes_loop("bsw bytes alt 2 odd", 9, 0x55) - & _test_bsw_bytes_loop("bsw bytes alt 2 odd", 10, 0x55); + & _test_bsw_bytes_loop("bsw bytes alt 2 even", 10, 0x55); } static void @@ -476,7 +476,7 @@ _bsr_bit_check(const char *cap, } if ( ser != res ) { - fprintf(stderr, "%s: val not equal (%s, %s) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + fprintf(stderr, "%s: res not equal (%s, %s) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", cap, (ur_cue_good == ser) ? "good" : "gone", (ur_cue_good == res) ? "good" : "gone", bsr->off, bsr->left, bsr->left ? bsr->bytes[0] : 0, bsr->bits); @@ -484,7 +484,7 @@ _bsr_bit_check(const char *cap, } if ( (ur_cue_good == res) && (exp != val) ) { - fprintf(stderr, "%s: res not equal (%02x, %02x) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", + fprintf(stderr, "%s: val not equal (%02x, %02x) off=%u left=%" PRIu64 " byte=%02x bits=%" PRIu64 "\r\n", cap, exp, val, bsr->off, bsr->left, bsr->left ? bsr->bytes[0] : 0, bsr->bits); ret = 0; } @@ -1190,7 +1190,7 @@ _test_bsr_bytes_any(void) & _test_bsr_bytes_any_loop("bsr bytes alt 1 odd", 7, 0xaa) & _test_bsr_bytes_any_loop("bsr bytes alt 1 even", 8, 0xaa) & _test_bsr_bytes_any_loop("bsr bytes alt 2 odd", 9, 0x55) - & _test_bsr_bytes_any_loop("bsr bytes alt 2 odd", 10, 0x55); + & _test_bsr_bytes_any_loop("bsr bytes alt 2 even", 10, 0x55); } static int From 1a89c71e96fbdc80e67ec879ee4a97b02cf51bde Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 20 Aug 2020 15:48:40 -0700 Subject: [PATCH 040/123] ur: adds ur_bsr_rub_log and tests, fixes ur_bsr_zeros() --- pkg/urbit/include/ur/hashcons.h | 11 +++ pkg/urbit/tests/ur_tests.c | 127 +++++++++++++++++++++++++++++++- pkg/urbit/ur/hashcons.c | 94 ++++++++++++++++++++--- 3 files changed, 220 insertions(+), 12 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index fcd1776dd9..976a8cc2b8 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -9,21 +9,26 @@ typedef uint8_t ur_bool_t; #if (32 == (CHAR_BIT * __SIZEOF_INT__)) # define ur_lz32 __builtin_clz +# define ur_tz32 __builtin_ctz #elif (32 == (CHAR_BIT * __SIZEOF_LONG__)) # define ur_lz32 __builtin_clzl +# define ur_tz32 __builtin_ctzl #else # error "port me" #endif #if (64 == (CHAR_BIT * __SIZEOF_LONG__)) # define ur_lz64 __builtin_clzl +# define ur_tz64 __builtin_ctzl #elif (64 == (CHAR_BIT * __SIZEOF_LONG_LONG__)) # define ur_lz64 __builtin_clzll +# define ur_tz64 __builtin_ctzll #else # error "port me" #endif #define ur_lz8(a) ( ur_lz32(a) - 24 ) +#define ur_tz8 ur_tz32 #define ur_mask_3(a) (a & 0x7) #define ur_mask_8(a) (a & 0xff) @@ -260,6 +265,12 @@ ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); void ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); +ur_cue_res_e +ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out); + +ur_cue_res_e +ur_bsr_zeros(ur_bsr_t *bsr, uint8_t *out); + uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 8cc667ecab..4a19c04395 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1193,6 +1193,130 @@ _test_bsr_bytes_any(void) & _test_bsr_bytes_any_loop("bsr bytes alt 2 even", 10, 0x55); } +static int +_bsr_cmp_check(const char* cap, + uint8_t off, + uint8_t len, + ur_bsr_t *a, + ur_bsr_t *b, + uint8_t c, + uint8_t d, + ur_cue_res_e e, + ur_cue_res_e f) +{ + int ret = 1; + + if ( !ur_bsr_sane(a) ) { + fprintf(stderr, "%s: off %u, len %u a insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, a->off, a->left, a->bits); + ret = 0; + } + + if ( !ur_bsr_sane(b) ) { + fprintf(stderr, "%s: off %u, len %u a insane off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, b->off, b->left, b->bits); + ret = 0; + } + + if ( e != f ) { + fprintf(stderr, "%s: off %u, len %u ret not equal (%s, %s) off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, + (ur_cue_good == e) ? "good" : "gone", + (ur_cue_good == f) ? "good" : "gone", + b->off, b->left, b->bits); + ret = 0; + } + + if ( (ur_cue_good == e) && (c != d) ) { + fprintf(stderr, "%s: off %u, len %u val not equal (%02x, %02x) off=%u left=%" PRIu64 " bits=%" PRIu64 "\r\n", + cap, off, len, c, d, b->off, b->left, b->bits); + ret = 0; + } + + if ( a->off != b->off ) { + fprintf(stderr, "%s: off %u len %u: offset fail (%u, %u)\r\n", + cap, off, len, a->off, b->off); + ret = 0; + } + + if ( a->left != b->left ) { + fprintf(stderr, "%s: off %u len %u: left fail (%" PRIu64 ", %" PRIu64 ")\r\n", + cap, off, len, a->left, b->left); + ret = 0; + } + + if ( a->bits != b->bits ) { + fprintf(stderr, "%s: off %u len %u: bits fail (%" PRIu64 ", %" PRIu64 ")\r\n", + cap, off, len, a->bits, b->bits); + ret = 0; + } + + + return ret; +} + +static int +_test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + ur_bsr_t a, b; + uint8_t *bytes, c, d; + uint8_t i, j, k; + ur_cue_res_e e, f; + + bytes = malloc(len); + + for ( i = 0; i < 8; i++) { + for ( j = 0; j < len; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + + memset(bytes, 0x0, j); + memset(bytes + j, val, len - j); + + e = ur_bsr_zeros(&a, &c); + f = ur_bsr_rub_log(&b, &d); + + ret &= _bsr_cmp_check(cap, i, j, &a, &b, c, d, e, f); + } + } + + free(bytes); + + return ret; +} + +static int +_test_bsr_rub_log(void) +{ + int ret = _test_bsr_rub_log_loop("bsr rub_log nought", 0, 0x0) + & _test_bsr_rub_log_loop("bsr rub_log ones odd", 3, 0xff) + & _test_bsr_rub_log_loop("bsr rub_log ones even", 4, 0xff) + & _test_bsr_rub_log_loop("bsr rub_log ones big", 50, 0xff) + & _test_bsr_rub_log_loop("bsr rub_log zeros odd", 5, 0x0) + & _test_bsr_rub_log_loop("bsr rub_log zeros even", 6, 0x0) + & _test_bsr_rub_log_loop("bsr rub_log zeros big", 50, 0x0); + + { + uint8_t i, j = 5; + char cap[1024]; + + for ( i = 0; i < 8; i++ ) { + snprintf(cap, 1000, "bsr rub_log 1<<%u odd", i); + ret &= _test_bsr_rub_log_loop((const char*)cap, j++, 0x1 << i); + + snprintf(cap, 1000, "bsr rub_log 1<<%u even", i); + ret &= _test_bsr_rub_log_loop((const char*)cap, j++, 0x1 << i); + + snprintf(cap, 1000, "bsr rub_log 1<<%u big", i); + ret &= _test_bsr_rub_log_loop((const char*)cap, 50, 0x1 << i); + } + } + + return ret; +} + static int _test_bsr(void) { @@ -1201,7 +1325,8 @@ _test_bsr(void) & _test_bsr_bytes_any() & _test_bsr8() & _test_bsr32() - & _test_bsr64(); + & _test_bsr64() + & _test_bsr_rub_log(); } static int diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index bf4597d9d2..bb9e4326e5 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1868,6 +1868,75 @@ ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) } } +static inline ur_cue_res_e +_bsr_rub_log_meme(ur_bsr_t *bsr) +{ + bsr->bits += 256; + bsr->bytes += 32; + bsr->left -= 32; + + // XX distinguish meme status + // + return ur_cue_gone; +} + +ur_cue_res_e +ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) +{ + uint64_t left = bsr->left; + + if ( !left ) { + return ur_cue_gone; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + const uint8_t *b = bsr->bytes; + uint8_t byt = b[0] >> off; + uint8_t skip = 0; + + while ( !byt ) { + if ( 32 == skip ) { + return _bsr_rub_log_meme(bsr); + } + + skip++; + + if ( skip == left ) { + bsr->bits += (skip << 3) - off; + bsr->bytes = 0; + bsr->left = 0; + bsr->off = 0; + return ur_cue_gone; + } + + byt = b[skip]; + } + + { + uint32_t zeros = ur_tz8(byt) + (skip ? ((skip << 3) - off) : 0); + + if ( 255 < zeros ) { + return _bsr_rub_log_meme(bsr); + } + else { + uint32_t bits = off + 1 + zeros; + uint8_t bytes = bits >> 3; + + left -= bytes; + + bsr->bytes = left ? (b + bytes) : 0; + bsr->bits += 1 + zeros; + bsr->left = left; + bsr->off = ur_mask_3(bits); + + *out = zeros; + return ur_cue_good; + } + } + } +} + static inline ur_cue_res_e ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) { @@ -1889,23 +1958,26 @@ ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) return ur_cue_good; } -static inline ur_cue_res_e +ur_cue_res_e ur_bsr_zeros(ur_bsr_t *bsr, uint8_t *out) { ur_cue_res_e res; - uint8_t bit, len = 0; + uint8_t bit, i = 0; - while ( (ur_cue_good == (res = ur_bsr_bit(bsr, &bit))) && (0 == bit) ) { - len++; + do { + if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { + return res; + } + else if ( bit ) { + *out = i; + return ur_cue_good; + } } + while ( ++i ); - if ( ur_cue_good != res ) { - return res; - } - else { - *out = len; - return ur_cue_good; - } + // XX distinguish meme + // + return ur_cue_gone; } static inline uint64_t From d130633732035905536da89b7fdaf9b2b3da5ad3 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 20 Aug 2020 15:45:57 -0700 Subject: [PATCH 041/123] ur: uses ur_bsr_rub_log() in ur_bsr_mat, moves ur_bsr_zeros() into tests --- pkg/urbit/include/ur/hashcons.h | 3 --- pkg/urbit/tests/ur_tests.c | 24 +++++++++++++++++++++++- pkg/urbit/ur/hashcons.c | 29 ++++------------------------- 3 files changed, 27 insertions(+), 29 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 976a8cc2b8..8ccb183a39 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -268,9 +268,6 @@ ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); ur_cue_res_e ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out); -ur_cue_res_e -ur_bsr_zeros(ur_bsr_t *bsr, uint8_t *out); - uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 4a19c04395..4de14f9861 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1255,6 +1255,28 @@ _bsr_cmp_check(const char* cap, return ret; } +static ur_cue_res_e +_bsr_rub_log_slow(ur_bsr_t *bsr, uint8_t *out) +{ + ur_cue_res_e res; + uint8_t bit, i = 0; + + do { + if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { + return res; + } + else if ( bit ) { + *out = i; + return ur_cue_good; + } + } + while ( ++i ); + + // XX distinguish meme + // + return ur_cue_gone; +} + static int _test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val) { @@ -1275,7 +1297,7 @@ _test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val) memset(bytes, 0x0, j); memset(bytes + j, val, len - j); - e = ur_bsr_zeros(&a, &c); + e = _bsr_rub_log_slow(&a, &c); f = ur_bsr_rub_log(&b, &d); ret &= _bsr_cmp_check(cap, i, j, &a, &b, c, d, e, f); diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index bb9e4326e5..e02a29b845 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1958,28 +1958,6 @@ ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) return ur_cue_good; } -ur_cue_res_e -ur_bsr_zeros(ur_bsr_t *bsr, uint8_t *out) -{ - ur_cue_res_e res; - uint8_t bit, i = 0; - - do { - if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { - return res; - } - else if ( bit ) { - *out = i; - return ur_cue_good; - } - } - while ( ++i ); - - // XX distinguish meme - // - return ur_cue_gone; -} - static inline uint64_t ur_bsr64(ur_bsr_t *bsr, uint8_t len) { @@ -2059,10 +2037,11 @@ ur_bsr_bytes(ur_bsr_t *bsr, uint64_t len, uint8_t *out) static inline ur_cue_res_e ur_bsr_mat(ur_bsr_t *bsr, uint64_t *out) { - uint8_t len; + ur_cue_res_e res; + uint8_t len; - if ( ur_cue_gone == ur_bsr_zeros(bsr, &len) ) { - return ur_cue_gone; + if ( ur_cue_good != (res = ur_bsr_rub_log(bsr, &len)) ) { + return res; } // XX From 4a7ad95e14e2f252bb7dc70bae52c1bc294e9cf9 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 21 Aug 2020 14:26:10 -0700 Subject: [PATCH 042/123] ur: rewrites ur_bsr_tag(), adds tests --- pkg/urbit/include/ur/hashcons.h | 15 +++--- pkg/urbit/tests/ur_tests.c | 71 ++++++++++++++++++++++++- pkg/urbit/ur/hashcons.c | 94 ++++++++++++++++++++++++--------- 3 files changed, 147 insertions(+), 33 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 8ccb183a39..ef3f2af74e 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -244,6 +244,12 @@ typedef enum { ur_cue_gone = 1 } ur_cue_res_e; +typedef enum { + ur_jam_atom = 0, + ur_jam_cell = 1, + ur_jam_back = 2 +} ur_cue_tag_e; + ur_bool_t ur_bsr_sane(ur_bsr_t *bsr); @@ -265,17 +271,14 @@ ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); void ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); +ur_cue_res_e +ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out); + ur_cue_res_e ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out); uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); -typedef enum { - ur_jam_atom = 0, - ur_jam_cell = 1, - ur_jam_back = 2 -} ur_cue_tag_e; - ur_cue_res_e ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 4de14f9861..f14b8258c2 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1339,6 +1339,74 @@ _test_bsr_rub_log(void) return ret; } +static ur_cue_res_e +_bsr_tag_slow(ur_bsr_t *bsr, ur_cue_tag_e *out) +{ + ur_cue_res_e res; + uint8_t bit; + + if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { + return res; + } + else if ( 0 == bit ) { + *out = ur_jam_atom; + return ur_cue_good; + } + else if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { + return res; + } + + *out = ( 0 == bit ) ? ur_jam_cell : ur_jam_back; + return ur_cue_good; +} + +static int +_test_bsr_tag_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + ur_bsr_t a, b; + uint8_t *bytes; + ur_cue_tag_e c, d; + uint8_t i, j, k; + ur_cue_res_e e, f; + + bytes = malloc(len); + + for ( i = 0; i < 8; i++) { + for ( j = 0; j < len; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = bytes; + a.off = a.bits = b.off = b.bits = i; + + memset(bytes, 0x0, j); + memset(bytes + j, val, len - j); + + e = _bsr_tag_slow(&a, &c); + f = ur_bsr_tag(&b, &d); + + ret &= _bsr_cmp_check(cap, i, j, &a, &b, c, d, e, f); + } + } + + free(bytes); + + return ret; +} + +static int +_test_bsr_tag(void) +{ + return _test_bsr_tag_loop("bsr tag nought", 0, 0x0) + & _test_bsr_tag_loop("bsr tag ones 1", 1, 0xff) + & _test_bsr_tag_loop("bsr tag ones 2", 2, 0xff) + & _test_bsr_tag_loop("bsr tag zeros 1", 1, 0x0) + & _test_bsr_tag_loop("bsr tag zeros 2", 2, 0x0) + & _test_bsr_tag_loop("bsr tag alt-1 1", 1, 0xaa) + & _test_bsr_tag_loop("bsr tag alt-1 2", 2, 0xaa) + & _test_bsr_tag_loop("bsr tag alt-2 1", 1, 0x55) + & _test_bsr_tag_loop("bsr tag alt-2 2", 2, 0x55); +} + static int _test_bsr(void) { @@ -1348,7 +1416,8 @@ _test_bsr(void) & _test_bsr8() & _test_bsr32() & _test_bsr64() - & _test_bsr_rub_log(); + & _test_bsr_rub_log() + & _test_bsr_tag(); } static int diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index e02a29b845..e583477530 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1868,6 +1868,73 @@ ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) } } +static inline ur_cue_res_e +_bsr_set_gone(ur_bsr_t *bsr, uint8_t bits) +{ + bsr->bits += bits; + bsr->bytes = 0; + bsr->left = 0; + bsr->off = 0; + return ur_cue_gone; +} + +ur_cue_res_e +ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) +{ + uint64_t left = bsr->left; + + if ( !left ) { + return ur_cue_gone; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint8_t bit = (b[0] >> off) & 1; + uint8_t len = 1; + + if ( 0 == bit ) { + *out = ur_jam_atom; + } + else { + if ( 7 == off ) { + if ( 1 == left ) { + return _bsr_set_gone(bsr, 1); + } + + bit = b[1] & 1; + } + else { + bit = (b[0] >> (off + 1)) & 1; + } + + len++; + *out = ( 0 == bit ) ? ur_jam_cell : ur_jam_back; + } + + { + uint8_t bits = off + len; + uint8_t bytes = bits >> 3; + + left -= bytes; + + if ( !left ) { + bsr->bytes = 0; + bsr->left = 0; + bsr->off = 0; + } + else { + bsr->bytes += bytes; + bsr->left = left; + bsr->off = ur_mask_3(bits); + } + + bsr->bits += len; + + return ur_cue_good; + } + } +} + static inline ur_cue_res_e _bsr_rub_log_meme(ur_bsr_t *bsr) { @@ -1903,11 +1970,7 @@ ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) skip++; if ( skip == left ) { - bsr->bits += (skip << 3) - off; - bsr->bytes = 0; - bsr->left = 0; - bsr->off = 0; - return ur_cue_gone; + return _bsr_set_gone(bsr, (skip << 3) - off); } byt = b[skip]; @@ -1937,27 +2000,6 @@ ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) } } -static inline ur_cue_res_e -ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) -{ - ur_cue_res_e res; - uint8_t bit; - - if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { - return res; - } - else if ( 0 == bit ) { - *out = ur_jam_atom; - return ur_cue_good; - } - else if ( ur_cue_good != (res = ur_bsr_bit(bsr, &bit)) ) { - return res; - } - - *out = ( 0 == bit ) ? ur_jam_cell : ur_jam_back; - return ur_cue_good; -} - static inline uint64_t ur_bsr64(ur_bsr_t *bsr, uint8_t len) { From 6e3dade9a6db39ac2a2bd1d5e9785fad4b11f98b Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 21 Aug 2020 14:26:26 -0700 Subject: [PATCH 043/123] ur: improves ur_bsr_sane() --- pkg/urbit/ur/hashcons.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index e583477530..cd0cfd5d1c 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1437,11 +1437,13 @@ ur_bsw_cell(ur_bsw_t *bsw) ur_bool_t ur_bsr_sane(ur_bsr_t *bsr) { + ur_bool_t ret = 8 > bsr->off; + if ( !bsr->left ) { - return !bsr->off && !bsr->bytes; + ret = ret && (!bsr->off && !bsr->bytes); } - return 1; + return ret; } ur_cue_res_e From 66970a01e129638d9e20e1c89e6add8e7858265c Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 21 Aug 2020 14:26:38 -0700 Subject: [PATCH 044/123] ur: refactors ur_bsr_bit* functions --- pkg/urbit/ur/hashcons.c | 47 ++++++++++++----------------------------- 1 file changed, 14 insertions(+), 33 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index cd0cfd5d1c..2a4e0fa29f 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1455,32 +1455,22 @@ ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out) return ur_cue_gone; } else { - uint8_t byt = bsr->bytes[0]; - uint8_t off = bsr->off; - uint8_t bit = (byt >> off) & 1; + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint8_t bit = (b[0] >> off) & 1; if ( 7 == off ) { - left--; - - if ( left ) { - bsr->bytes++; - bsr->left = left; - } - else { - bsr->bytes = 0; - bsr->left = 0; - } - - bsr->off = 0; + bsr->bytes = ( --left ) ? (b + 1) : 0; + bsr->left = left; + bsr->off = 0; } else { - bsr->off = 1 + off; + bsr->off = 1 + off; } bsr->bits++; *out = bit; - return ur_cue_good; } } @@ -1496,26 +1486,17 @@ ur_bsr_bit_any(ur_bsr_t *bsr) return 0; } else { - uint8_t byt = bsr->bytes[0]; - uint8_t off = bsr->off; - uint8_t bit = (byt >> off) & 1; + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint8_t bit = (b[0] >> off) & 1; if ( 7 == off ) { - left--; - - if ( left ) { - bsr->bytes++; - bsr->left = left; - } - else { - bsr->bytes = 0; - bsr->left = 0; - } - - bsr->off = 0; + bsr->bytes = ( --left ) ? (b + 1) : 0; + bsr->left = left; + bsr->off = 0; } else { - bsr->off = 1 + off; + bsr->off = 1 + off; } return bit; From 753a3ea9a57c86b662326b142b5395b357e1662f Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 21 Aug 2020 14:28:09 -0700 Subject: [PATCH 045/123] ur: removes superceded functions: ur_bsr64 and ur_bsr_bytes --- pkg/urbit/ur/hashcons.c | 76 ----------------------------------------- 1 file changed, 76 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 2a4e0fa29f..681d7611eb 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1983,82 +1983,6 @@ ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) } } -static inline uint64_t -ur_bsr64(ur_bsr_t *bsr, uint8_t len) -{ - uint64_t acc = 0; - uint64_t i; - uint8_t bit; - - for ( i = 0; i < len; i++ ) { - if ( ur_cue_good != ur_bsr_bit(bsr, &bit) ) { - bsr->bits += len - i; - return acc; - } - - acc ^= (uint64_t)bit << i; - } - - return acc; -} - -static inline void -ur_bsr_bytes(ur_bsr_t *bsr, uint64_t len, uint8_t *out) -{ - uint64_t left = bsr->left; - uint8_t off = bsr->off; - ur_bool_t end = len >= left; - - if ( !left ) { - return; - } - - if ( !off ) { - if ( end ) { - memcpy(out, bsr->bytes, left); - bsr->bytes = 0; - left = 0; - } - else { - memcpy(out, bsr->bytes, len); - bsr->bytes += len; - left -= len; - } - } - // the most-significant bits from a byte in the stream - // become the least-significant bits of an output byte, and vice-versa - // - else { - uint8_t rest = 8 - off; - const uint8_t *bytes = bsr->bytes; - uint8_t byt = bytes[0]; - uint8_t l, m; - uint64_t max = end ? (left - 1) : len; - uint64_t i; - - for ( i = 0; i < max; i++ ) { - m = byt >> off; - byt = bytes[1 + i]; - l = byt & ((1 << off) - 1); - out[i] = m ^ (l << rest); - } - - if ( end ) { - out[max] = bytes[max] >> off; - - bsr->bytes = 0; - left = 0; - } - else { - bsr->bytes += max; - left -= max; - } - } - - bsr->left = left; - bsr->bits += len << 3; -} - static inline ur_cue_res_e ur_bsr_mat(ur_bsr_t *bsr, uint64_t *out) { From 096677a3c3b43d90100eab96e721912590769c79 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 21 Aug 2020 15:27:04 -0700 Subject: [PATCH 046/123] ur: distinguish "meme" (exceeds memory repr) errors in ur_bsr* and ur_cue* functions --- pkg/urbit/include/ur/hashcons.h | 6 ++++- pkg/urbit/tests/ur_tests.c | 4 +-- pkg/urbit/ur/hashcons.c | 44 ++++++++++++++++++--------------- 3 files changed, 30 insertions(+), 24 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index ef3f2af74e..d842e4f09b 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -241,7 +241,8 @@ typedef struct ur_bsr_s { typedef enum { ur_cue_good = 0, - ur_cue_gone = 1 + ur_cue_gone = 1, + ur_cue_meme = 2 } ur_cue_res_e; typedef enum { @@ -277,6 +278,9 @@ ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out); ur_cue_res_e ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out); +ur_cue_res_e +ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out); + uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index f14b8258c2..fc8b663860 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1272,9 +1272,7 @@ _bsr_rub_log_slow(ur_bsr_t *bsr, uint8_t *out) } while ( ++i ); - // XX distinguish meme - // - return ur_cue_gone; + return ur_cue_meme; } static int diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 681d7611eb..8888919ea7 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -1924,10 +1924,7 @@ _bsr_rub_log_meme(ur_bsr_t *bsr) bsr->bits += 256; bsr->bytes += 32; bsr->left -= 32; - - // XX distinguish meme status - // - return ur_cue_gone; + return ur_cue_meme; } ur_cue_res_e @@ -1983,8 +1980,8 @@ ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) } } -static inline ur_cue_res_e -ur_bsr_mat(ur_bsr_t *bsr, uint64_t *out) +ur_cue_res_e +ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out) { ur_cue_res_e res; uint8_t len; @@ -1992,16 +1989,23 @@ ur_bsr_mat(ur_bsr_t *bsr, uint64_t *out) if ( ur_cue_good != (res = ur_bsr_rub_log(bsr, &len)) ) { return res; } - - // XX - assert( 64 > len ); - - if ( !len ) { - *out = 0; + else if ( 64 <= len ) { + return ur_cue_meme; } - else { - len--; - *out = ur_bsr64_any(bsr, len) ^ (1ULL << len); + + switch ( len ) { + case 0: { + *out = 0; + } break; + + case 1: { + *out = 1; + } break; + + default: { + len--; + *out = ur_bsr64_any(bsr, len) ^ (1ULL << len); + } break; } return ur_cue_good; @@ -2107,7 +2111,7 @@ _cue_atom(ur_root_t *r, _cue_t *c, ur_nref *out) ur_cue_res_e res; uint64_t len; - if ( ur_cue_good != (res = ur_bsr_mat(bsr, &len)) ) { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { return res; } @@ -2137,12 +2141,12 @@ _cue_back(ur_bsr_t *bsr, uint64_t *out) ur_cue_res_e res; uint64_t len; - if ( ur_cue_good != (res = ur_bsr_mat(bsr, &len)) ) { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { return res; } - - // XX - assert( 62 >= len ); + else if ( 62 < len ) { + return ur_cue_meme; + } *out = ur_bsr64_any(bsr, len); return ur_cue_good; From 8a3b2fc0617a316ced6e4cae2dd95014ecc898ef Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 21 Aug 2020 15:54:56 -0700 Subject: [PATCH 047/123] ur: refactors ur "library" into separate modules --- pkg/urbit/include/ur/bitstream.h | 106 +++ pkg/urbit/include/ur/defs.h | 66 ++ pkg/urbit/include/ur/hashcons.h | 167 +--- pkg/urbit/include/ur/serial.h | 13 + pkg/urbit/include/ur/ur.h | 9 + pkg/urbit/tests/ur_tests.c | 2 +- pkg/urbit/ur/bitstream.c | 1133 +++++++++++++++++++++++ pkg/urbit/ur/hashcons.c | 1449 +----------------------------- pkg/urbit/ur/serial.c | 299 ++++++ 9 files changed, 1651 insertions(+), 1593 deletions(-) create mode 100644 pkg/urbit/include/ur/bitstream.h create mode 100644 pkg/urbit/include/ur/defs.h create mode 100644 pkg/urbit/include/ur/serial.h create mode 100644 pkg/urbit/include/ur/ur.h create mode 100644 pkg/urbit/ur/bitstream.c create mode 100644 pkg/urbit/ur/serial.c diff --git a/pkg/urbit/include/ur/bitstream.h b/pkg/urbit/include/ur/bitstream.h new file mode 100644 index 0000000000..785866857f --- /dev/null +++ b/pkg/urbit/include/ur/bitstream.h @@ -0,0 +1,106 @@ +#ifndef UR_BITSTREAM_H +#define UR_BITSTREAM_H + +#include + +typedef enum { + ur_cue_good = 0, + ur_cue_gone = 1, + ur_cue_meme = 2 +} ur_cue_res_e; + +typedef enum { + ur_jam_atom = 0, + ur_jam_cell = 1, + ur_jam_back = 2 +} ur_cue_tag_e; + +typedef struct ur_bsr_s { + uint64_t left; + uint64_t bits; + uint8_t off; + const uint8_t *bytes; +} ur_bsr_t; + +typedef struct ur_bsw_s { + uint64_t prev; + uint64_t size; + uint64_t fill; + uint64_t bits; + uint8_t off; + uint8_t *bytes; +} ur_bsw_t; + +ur_bool_t +ur_bsr_sane(ur_bsr_t *bsr); + +ur_cue_res_e +ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out); + +uint8_t +ur_bsr_bit_any(ur_bsr_t *bsr); + +uint8_t +ur_bsr8_any(ur_bsr_t *bsr, uint8_t len); + +uint32_t +ur_bsr32_any(ur_bsr_t *bsr, uint8_t len); + +uint64_t +ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); + +void +ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); + +ur_cue_res_e +ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out); + +ur_cue_res_e +ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out); + +ur_cue_res_e +ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out); + +void +ur_bsw_grow(ur_bsw_t *bsw, uint64_t step); + +ur_bool_t +ur_bsw_sane(ur_bsw_t *bsw); + +void +ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit); + +void +ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt); + +void +ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val); + +void +ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +void +ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); + +void +ur_bsw_bex(ur_bsw_t *bsw, uint8_t n); + +void +ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +void +ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); + +void +ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +void +ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val); + +void +ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); + +void +ur_bsw_cell(ur_bsw_t *bsw); + +#endif diff --git a/pkg/urbit/include/ur/defs.h b/pkg/urbit/include/ur/defs.h new file mode 100644 index 0000000000..51f9064cc3 --- /dev/null +++ b/pkg/urbit/include/ur/defs.h @@ -0,0 +1,66 @@ +#ifndef UR_DEFS_H +#define UR_DEFS_H + +#include +#include +#include + +typedef uint8_t ur_bool_t; + +#define ur_min(a, b) ( ((a) < (b)) ? (a) : (b) ) +#define ur_max(a, b) ( ((a) > (b)) ? (a) : (b) ) + +#if (32 == (CHAR_BIT * __SIZEOF_INT__)) +# define ur_lz32 __builtin_clz +# define ur_tz32 __builtin_ctz +#elif (32 == (CHAR_BIT * __SIZEOF_LONG__)) +# define ur_lz32 __builtin_clzl +# define ur_tz32 __builtin_ctzl +#else +# error "port me" +#endif + +#if (64 == (CHAR_BIT * __SIZEOF_LONG__)) +# define ur_lz64 __builtin_clzl +# define ur_tz64 __builtin_ctzl +#elif (64 == (CHAR_BIT * __SIZEOF_LONG_LONG__)) +# define ur_lz64 __builtin_clzll +# define ur_tz64 __builtin_ctzll +#else +# error "port me" +#endif + +#define ur_lz8(a) ( ur_lz32(a) - 24 ) +#define ur_tz8 ur_tz32 + +#define ur_mask_3(a) (a & 0x7) +#define ur_mask_8(a) (a & 0xff) +#define ur_mask_31(a) (a & 0x7fffffff) +#define ur_mask_62(a) (a & 0x3fffffffffffffffULL) + +#define ur_met0_8(a) ( (a) ? 8 - ur_lz8(a) : 0 ) +#define ur_met0_32(a) ( (a) ? 32 - ur_lz32(a) : 0 ) +#define ur_met0_64(a) ( (a) ? 64 - ur_lz64(a) : 0 ) + +inline uint64_t +ur_met0_bytes(uint8_t *byt, uint64_t len) +{ + // XX requires no trailing null bytes + // + uint64_t last = len - 1; + return (last << 3) + ur_met0_8(byt[last]); +} + +#define ur_met3_8(a) \ + ({ uint8_t _a = ur_met0_8(a); \ + ( (_a >> 3) + !!ur_mask_3(_a) ); }) + +#define ur_met3_32(a) \ + ({ uint8_t _a = ur_met0_32(a); \ + ( (_a >> 3) + !!ur_mask_3(_a) ); }) + +#define ur_met3_64(a) \ + ({ uint8_t _a = ur_met0_64(a); \ + ( (_a >> 3) + !!ur_mask_3(_a) ); }) + +#endif diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index d842e4f09b..73601c174f 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -1,55 +1,12 @@ +#ifndef UR_HASHCONS_H +#define UR_HASHCONS_H + #include #include #include +#include -typedef uint8_t ur_bool_t; - -#define ur_min(a, b) ( ((a) < (b)) ? (a) : (b) ) -#define ur_max(a, b) ( ((a) > (b)) ? (a) : (b) ) - -#if (32 == (CHAR_BIT * __SIZEOF_INT__)) -# define ur_lz32 __builtin_clz -# define ur_tz32 __builtin_ctz -#elif (32 == (CHAR_BIT * __SIZEOF_LONG__)) -# define ur_lz32 __builtin_clzl -# define ur_tz32 __builtin_ctzl -#else -# error "port me" -#endif - -#if (64 == (CHAR_BIT * __SIZEOF_LONG__)) -# define ur_lz64 __builtin_clzl -# define ur_tz64 __builtin_ctzl -#elif (64 == (CHAR_BIT * __SIZEOF_LONG_LONG__)) -# define ur_lz64 __builtin_clzll -# define ur_tz64 __builtin_ctzll -#else -# error "port me" -#endif - -#define ur_lz8(a) ( ur_lz32(a) - 24 ) -#define ur_tz8 ur_tz32 - -#define ur_mask_3(a) (a & 0x7) -#define ur_mask_8(a) (a & 0xff) -#define ur_mask_31(a) (a & 0x7fffffff) -#define ur_mask_62(a) (a & 0x3fffffffffffffffULL) - -#define ur_met0_8(a) ( (a) ? 8 - ur_lz8(a) : 0 ) -#define ur_met0_32(a) ( (a) ? 32 - ur_lz32(a) : 0 ) -#define ur_met0_64(a) ( (a) ? 64 - ur_lz64(a) : 0 ) - -#define ur_met3_8(a) \ - ({ uint8_t _a = ur_met0_8(a); \ - ( (_a >> 3) + !!ur_mask_3(_a) ); }) \ - -#define ur_met3_32(a) \ - ({ uint8_t _a = ur_met0_32(a); \ - ( (_a >> 3) + !!ur_mask_3(_a) ); }) \ - -#define ur_met3_64(a) \ - ({ uint8_t _a = ur_met0_64(a); \ - ( (_a >> 3) + !!ur_mask_3(_a) ); }) \ +#include "ur/defs.h" #define ur_nref_tag(ref) ( ref >> 62 ) #define ur_nref_idx(ref) ur_mask_62(ref) @@ -127,6 +84,9 @@ typedef struct ur_root_s { ur_atoms_t atoms; } ur_root_t; +uint64_t +ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref); + void ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size); @@ -148,6 +108,9 @@ ur_dict64_get(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t *out); void ur_dict64_put(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t val); +ur_nref +ur_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len); + ur_nref ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len); @@ -179,110 +142,4 @@ ur_walk_fore(ur_root_t *r, void (*atom)(ur_root_t*, ur_nref, void*), ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)); - -typedef struct ur_bsw_s { - uint64_t prev; - uint64_t size; - uint64_t fill; - uint64_t bits; - uint8_t off; - uint8_t *bytes; -} ur_bsw_t; - -void -ur_bsw_grow(ur_bsw_t *bsw, uint64_t step); - -ur_bool_t -ur_bsw_sane(ur_bsw_t *bsw); - -void -ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit); - -void -ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt); - -void -ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val); - -void -ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val); - -void -ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); - -void -ur_bsw_bex(ur_bsw_t *bsw, uint8_t n); - -void -ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val); - -void -ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); - -void -ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val); - -void -ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val); - -void -ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); - -void -ur_bsw_cell(ur_bsw_t *bsw); - - -typedef struct ur_bsr_s { - uint64_t left; - uint64_t bits; - uint8_t off; - const uint8_t *bytes; -} ur_bsr_t; - -typedef enum { - ur_cue_good = 0, - ur_cue_gone = 1, - ur_cue_meme = 2 -} ur_cue_res_e; - -typedef enum { - ur_jam_atom = 0, - ur_jam_cell = 1, - ur_jam_back = 2 -} ur_cue_tag_e; - -ur_bool_t -ur_bsr_sane(ur_bsr_t *bsr); - -ur_cue_res_e -ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out); - -uint8_t -ur_bsr_bit_any(ur_bsr_t *bsr); - -uint8_t -ur_bsr8_any(ur_bsr_t *bsr, uint8_t len); - -uint32_t -ur_bsr32_any(ur_bsr_t *bsr, uint8_t len); - -uint64_t -ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); - -void -ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); - -ur_cue_res_e -ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out); - -ur_cue_res_e -ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out); - -ur_cue_res_e -ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out); - -uint64_t -ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); - -ur_cue_res_e -ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out); +#endif \ No newline at end of file diff --git a/pkg/urbit/include/ur/serial.h b/pkg/urbit/include/ur/serial.h new file mode 100644 index 0000000000..2a8d98b86e --- /dev/null +++ b/pkg/urbit/include/ur/serial.h @@ -0,0 +1,13 @@ +#ifndef UR_SERIAL_H +#define UR_SERIAL_H + +#include +#include + +uint64_t +ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); + +ur_cue_res_e +ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out); + +#endif diff --git a/pkg/urbit/include/ur/ur.h b/pkg/urbit/include/ur/ur.h new file mode 100644 index 0000000000..5b1bb222eb --- /dev/null +++ b/pkg/urbit/include/ur/ur.h @@ -0,0 +1,9 @@ +#ifndef UR_UR_H +#define UR_UR_H + +#include "ur/defs.h" +#include "ur/bitstream.h" +#include "ur/hashcons.h" +#include "ur/serial.h" + +#endif diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index fc8b663860..66a145559f 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -5,7 +5,7 @@ #include #include -#include "ur/hashcons.h" +#include "ur/ur.h" static void _bsw_init(ur_bsw_t *bsw, uint64_t prev, uint64_t size) diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c new file mode 100644 index 0000000000..476cf40d89 --- /dev/null +++ b/pkg/urbit/ur/bitstream.c @@ -0,0 +1,1133 @@ +#include +#include +#include + +#include "ur/defs.h" +#include "ur/bitstream.h" + +ur_bool_t +ur_bsr_sane(ur_bsr_t *bsr) +{ + ur_bool_t ret = 8 > bsr->off; + + if ( !bsr->left ) { + ret = ret && (!bsr->off && !bsr->bytes); + } + + return ret; +} + +ur_cue_res_e +ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out) +{ + uint64_t left = bsr->left; + + if ( !left ) { + return ur_cue_gone; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint8_t bit = (b[0] >> off) & 1; + + if ( 7 == off ) { + bsr->bytes = ( --left ) ? (b + 1) : 0; + bsr->left = left; + bsr->off = 0; + } + else { + bsr->off = 1 + off; + } + + bsr->bits++; + + *out = bit; + return ur_cue_good; + } +} + +uint8_t +ur_bsr_bit_any(ur_bsr_t *bsr) +{ + uint64_t left = bsr->left; + + bsr->bits++; + + if ( !left ) { + return 0; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint8_t bit = (b[0] >> off) & 1; + + if ( 7 == off ) { + bsr->bytes = ( --left ) ? (b + 1) : 0; + bsr->left = left; + bsr->off = 0; + } + else { + bsr->off = 1 + off; + } + + return bit; + } +} + +uint8_t +ur_bsr8_any(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t left = bsr->left; + + len = ur_min(8, len); + + bsr->bits += len; + + if ( !left ) { + return 0; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + const uint8_t *b = bsr->bytes; + uint8_t m = b[0] >> off; + + if ( len < rest ) { + bsr->off = off + len; + return m & ((1 << len) - 1); + } + else if ( 1 == left ) { + bsr->off = 0; + bsr->left = 0; + bsr->bytes = 0; + return m; + } + else { + off = len - rest; + + bsr->off = off; + bsr->left--; + bsr->bytes++; + + { + uint8_t l = b[1] & ((1 << off) - 1); + return m ^ (l << rest); + } + } + } +} + +uint32_t +ur_bsr32_any(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t left = bsr->left; + + len = ur_min(32, len); + + bsr->bits += len; + + if ( !left ) { + return 0; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + const uint8_t *b = bsr->bytes; + uint32_t m = b[0] >> off; + + if ( len < rest ) { + bsr->off = off + len; + return m & ((1 << len) - 1); + } + else { + uint8_t mask, len_byt; + uint32_t l; + + len -= rest; + left--; + bsr->bytes++; + + len_byt = len >> 3; + + if ( len_byt >= left ) { + len_byt = left; + bsr->off = off = 0; + bsr->left = 0; + bsr->bytes = 0; + } + else { + bsr->off = off = ur_mask_3(len); + bsr->left = left - len_byt; + bsr->bytes += len_byt; + } + + mask = (1 << off) - 1; + + switch ( len_byt ) { + case 4: { + l = (uint32_t)b[1] + ^ (uint32_t)b[2] << 8 + ^ (uint32_t)b[3] << 16 + ^ (uint32_t)b[4] << 24; + } break; + + case 3: { + l = (uint32_t)b[1] + ^ (uint32_t)b[2] << 8 + ^ (uint32_t)b[3] << 16 + ^ (uint32_t)(b[4] & mask) << 24; + } break; + + case 2: { + l = (uint32_t)b[1] + ^ (uint32_t)b[2] << 8 + ^ (uint32_t)(b[3] & mask) << 16; + } break; + + case 1: { + l = (uint32_t)b[1] + ^ (uint32_t)(b[2] & mask) << 8; + } break; + + case 0: { + l = (uint32_t)(b[1] & mask); + } break; + } + + return m ^ (l << rest); + } + } +} + +uint64_t +ur_bsr64_any(ur_bsr_t *bsr, uint8_t len) +{ + uint64_t left = bsr->left; + + len = ur_min(64, len); + + bsr->bits += len; + + if ( !left ) { + return 0; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + const uint8_t *b = bsr->bytes; + uint64_t m = b[0] >> off; + + if ( len < rest ) { + bsr->off = off + len; + return m & ((1 << len) - 1); + } + else { + uint8_t mask, len_byt; + uint64_t l; + + len -= rest; + left--; + bsr->bytes++; + + len_byt = len >> 3; + + if ( len_byt >= left ) { + len_byt = left; + bsr->off = off = 0; + bsr->left = 0; + bsr->bytes = 0; + } + else { + bsr->off = off = ur_mask_3(len); + bsr->left = left - len_byt; + bsr->bytes += len_byt; + } + + mask = (1 << off) - 1; + + switch ( len_byt ) { + case 8: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)b[4] << 24 + ^ (uint64_t)b[5] << 32 + ^ (uint64_t)b[6] << 40 + ^ (uint64_t)b[7] << 48 + ^ (uint64_t)b[8] << 56; + } break; + + case 7: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)b[4] << 24 + ^ (uint64_t)b[5] << 32 + ^ (uint64_t)b[6] << 40 + ^ (uint64_t)b[7] << 48 + ^ (uint64_t)(b[8] & mask) << 56; + } break; + + case 6: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)b[4] << 24 + ^ (uint64_t)b[5] << 32 + ^ (uint64_t)b[6] << 40 + ^ (uint64_t)(b[7] & mask) << 48; + } break; + + case 5: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)b[4] << 24 + ^ (uint64_t)b[5] << 32 + ^ (uint64_t)(b[6] & mask) << 40; + } break; + + case 4: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)b[4] << 24 + ^ (uint64_t)(b[5] & mask) << 32; + } break; + + case 3: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)b[3] << 16 + ^ (uint64_t)(b[4] & mask) << 24; + } break; + + case 2: { + l = (uint64_t)b[1] + ^ (uint64_t)b[2] << 8 + ^ (uint64_t)(b[3] & mask) << 16; + } break; + + case 1: { + l = (uint64_t)b[1] + ^ (uint64_t)(b[2] & mask) << 8; + } break; + + case 0: { + l = (uint64_t)(b[1] & mask); + } break; + } + + return m ^ (l << rest); + } + } +} + +void +ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) +{ + uint64_t left = bsr->left; + + if ( !left ) { + return; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint64_t len_byt = len >> 3; + uint8_t len_bit = ur_mask_3(len); + + if ( !off ) { + uint8_t bits = off + len_bit; + uint64_t need = len_byt + (bits >> 3) + !!ur_mask_3(bits); + + if ( need > left ) { + memcpy(out, b, left); + bsr->bytes = 0; + bsr->left = 0; + } + else { + memcpy(out, b, len_byt); + off = len_bit; + left -= len_byt; + + if ( !left ) { + bsr->bytes = 0; + } + else { + bsr->bytes += len_byt; + } + + bsr->left = left; + + if ( off ) { + out[len_byt] = b[len_byt] & ((1 << off) - 1); + } + } + } + // the most-significant bits from a byte in the stream + // become the least-significant bits of an output byte, and vice-versa + // + else { + uint64_t need = len_byt + (len_bit >> 3) + !!ur_mask_3(len_bit); + ur_bool_t end = need >= left; + uint64_t max = end ? (left - 1) : len_byt; + uint8_t rest = 8 - off; + uint8_t mask = (1 << off) - 1; + uint8_t byt = b[0]; + uint8_t l, m = byt >> off; + uint64_t i; + + for ( i = 0; i < max; i++ ) { + byt = b[1ULL + i]; + l = byt & mask; + out[i] = m ^ (l << rest); + m = byt >> off; + } + + if ( end ) { + if ( len_bit && len_bit < rest ) { + out[max] = m & ((1 << len_bit) - 1); + bsr->bytes += max; + left -= max; + off += len_bit; + } + else { + out[max] = m; + bsr->bytes = 0; + left = 0; + off = 0; + } + } + else { + uint8_t bits = off + len_bit; + + bsr->bytes += max; + left -= max + !!(bits >> 3); + off = ur_mask_3(bits); + + if ( len_bit <= rest ) { + out[max] = m & ((1 << len_bit) - 1); + } + else { + l = b[1ULL + max] & ((1 << off) - 1);; + out[max] = m ^ (l << rest); + } + } + } + + bsr->off = off; + bsr->left = left; + bsr->bits += len; + } +} + +static inline ur_cue_res_e +_bsr_set_gone(ur_bsr_t *bsr, uint8_t bits) +{ + bsr->bits += bits; + bsr->bytes = 0; + bsr->left = 0; + bsr->off = 0; + return ur_cue_gone; +} + +ur_cue_res_e +ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) +{ + uint64_t left = bsr->left; + + if ( !left ) { + return ur_cue_gone; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint8_t bit = (b[0] >> off) & 1; + uint8_t len = 1; + + if ( 0 == bit ) { + *out = ur_jam_atom; + } + else { + if ( 7 == off ) { + if ( 1 == left ) { + return _bsr_set_gone(bsr, 1); + } + + bit = b[1] & 1; + } + else { + bit = (b[0] >> (off + 1)) & 1; + } + + len++; + *out = ( 0 == bit ) ? ur_jam_cell : ur_jam_back; + } + + { + uint8_t bits = off + len; + uint8_t bytes = bits >> 3; + + left -= bytes; + + if ( !left ) { + bsr->bytes = 0; + bsr->left = 0; + bsr->off = 0; + } + else { + bsr->bytes += bytes; + bsr->left = left; + bsr->off = ur_mask_3(bits); + } + + bsr->bits += len; + + return ur_cue_good; + } + } +} + +static inline ur_cue_res_e +_bsr_rub_log_meme(ur_bsr_t *bsr) +{ + bsr->bits += 256; + bsr->bytes += 32; + bsr->left -= 32; + return ur_cue_meme; +} + +ur_cue_res_e +ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) +{ + uint64_t left = bsr->left; + + if ( !left ) { + return ur_cue_gone; + } + else { + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + const uint8_t *b = bsr->bytes; + uint8_t byt = b[0] >> off; + uint8_t skip = 0; + + while ( !byt ) { + if ( 32 == skip ) { + return _bsr_rub_log_meme(bsr); + } + + skip++; + + if ( skip == left ) { + return _bsr_set_gone(bsr, (skip << 3) - off); + } + + byt = b[skip]; + } + + { + uint32_t zeros = ur_tz8(byt) + (skip ? ((skip << 3) - off) : 0); + + if ( 255 < zeros ) { + return _bsr_rub_log_meme(bsr); + } + else { + uint32_t bits = off + 1 + zeros; + uint8_t bytes = bits >> 3; + + left -= bytes; + + bsr->bytes = left ? (b + bytes) : 0; + bsr->bits += 1 + zeros; + bsr->left = left; + bsr->off = ur_mask_3(bits); + + *out = zeros; + return ur_cue_good; + } + } + } +} + +ur_cue_res_e +ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out) +{ + ur_cue_res_e res; + uint8_t len; + + if ( ur_cue_good != (res = ur_bsr_rub_log(bsr, &len)) ) { + return res; + } + else if ( 64 <= len ) { + return ur_cue_meme; + } + + switch ( len ) { + case 0: { + *out = 0; + } break; + + case 1: { + *out = 1; + } break; + + default: { + len--; + *out = ur_bsr64_any(bsr, len) ^ (1ULL << len); + } break; + } + + return ur_cue_good; +} + +void +ur_bsw_grow(ur_bsw_t *bsw, uint64_t step) +{ + uint64_t size = bsw->size; + uint64_t next = size + step; + + bsw->bytes = realloc(bsw->bytes, next); + assert(bsw->bytes); + memset(bsw->bytes + size, 0, step); + + bsw->prev = size; + bsw->size = next; +} + +ur_bool_t +ur_bsw_sane(ur_bsw_t *bsw) +{ + return ( (8 > bsw->off) + && ((bsw->fill << 3) + bsw->off == bsw->bits) ); +} + +static inline void +_bsw_bit_unsafe(ur_bsw_t *bsw, uint8_t bit) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + + bsw->bytes[fill] ^= (bit & 1) << off; + + if ( 7 == off ) { + bsw->fill = 1 + fill; + bsw->off = 0; + } + else { + bsw->off = 1 + off; + } + + bsw->bits++; +} + +void +ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit) +{ + if ( (7 == bsw->off) + && ((1 + bsw->fill) == bsw->size) ) + { + ur_bsw_grow(bsw, bsw->prev); + } + + _bsw_bit_unsafe(bsw, bit); +} + +static inline void +_bsw8_unsafe(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t rest = 8 - off; + uint8_t l, m; + + // the least-significant bits of the input become the + // most-significant bits of a byte in the output stream + // + if ( len < rest ) { + l = byt & ((1 << len) - 1); + + bsw->bytes[fill] ^= l << off; + bsw->off = off + len; + } + // and vice-versa + // + else { + l = byt & ((1 << rest) - 1); + m = byt >> rest; + + bsw->bytes[fill++] ^= l << off; + off = len - rest; + bsw->bytes[fill] = m & ((1 << off) - 1); + + bsw->fill = fill; + bsw->off = off; + } + + bsw->bits += len; +} + +void +ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt) +{ + len = ur_min(8, len); + + if ( bsw->fill + !!((bsw->off + len) >> 3) >= bsw->size ) { + ur_bsw_grow(bsw, bsw->prev); + } + + _bsw8_unsafe(bsw, len, byt); +} + +static inline void +_bsw32_unsafe(ur_bsw_t *bsw, uint8_t len, uint32_t val) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t *bytes = bsw->bytes; + + bsw->bits += len; + + if ( off ) { + uint8_t rest = 8 - off; + + if ( len < rest ) { + bytes[fill] ^= (val & ((1 << len) - 1)) << off; + bsw->off = off + len; + return; + } + + bytes[fill++] ^= (val & ((1 << rest) - 1)) << off; + val >>= rest; + len -= rest; + } + + switch ( len >> 3 ) { + case 4: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + + // no offset is possible here + // + bsw->fill = fill; + bsw->off = 0; + return; + } + + case 3: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + val >>= 24; + } break; + + case 2: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + val >>= 16; + } break; + + case 1: { + bytes[fill++] = ur_mask_8(val); + val >>= 8; + } break; + } + + off = ur_mask_3(len); + + if ( off ) { + bytes[fill] = (uint8_t)(val & ((1 << off) - 1)); + } + + bsw->fill = fill; + bsw->off = off; +} + +void +ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val) +{ + uint8_t bits, need; + + len = ur_min(32, len); + bits = bsw->off + len; + need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw32_unsafe(bsw, len, val); +} + +static inline void +_bsw64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint8_t *bytes = bsw->bytes; + + bsw->bits += len; + + if ( off ) { + uint8_t rest = 8 - off; + + if ( len < rest ) { + bytes[fill] ^= (val & ((1 << len) - 1)) << off; + bsw->off = off + len; + return; + } + + bytes[fill++] ^= (val & ((1 << rest) - 1)) << off; + val >>= rest; + len -= rest; + } + + switch ( len >> 3 ) { + case 8: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + bytes[fill++] = ur_mask_8(val >> 40); + bytes[fill++] = ur_mask_8(val >> 48); + bytes[fill++] = ur_mask_8(val >> 56); + + // no offset is possible here + // + bsw->fill = fill; + bsw->off = 0; + return; + } + + case 7: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + bytes[fill++] = ur_mask_8(val >> 40); + bytes[fill++] = ur_mask_8(val >> 48); + val >>= 56; + } break; + + case 6: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + bytes[fill++] = ur_mask_8(val >> 40); + val >>= 48; + } break; + + case 5: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + bytes[fill++] = ur_mask_8(val >> 32); + val >>= 40; + } break; + + case 4: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + bytes[fill++] = ur_mask_8(val >> 24); + val >>= 32; + } break; + + case 3: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + bytes[fill++] = ur_mask_8(val >> 16); + val >>= 24; + } break; + + case 2: { + bytes[fill++] = ur_mask_8(val); + bytes[fill++] = ur_mask_8(val >> 8); + val >>= 16; + } break; + + case 1: { + bytes[fill++] = ur_mask_8(val); + val >>= 8; + } break; + } + + off = ur_mask_3(len); + + if ( off ) { + bytes[fill] = (uint8_t)(val & ((1 << off) - 1)); + } + + bsw->fill = fill; + bsw->off = off; +} + +void +ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint8_t bits, need; + + len = ur_min(64, len); + bits = bsw->off + len; + need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw64_unsafe(bsw, len, val); +} + +static inline void +_bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t len_byt = len >> 3; + uint8_t len_bit = ur_mask_3(len); + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + + if ( !off ) { + memcpy(bsw->bytes + fill, byt, len_byt); + fill += len_byt; + off = len_bit; + + if ( off ) { + bsw->bytes[fill] = byt[len_byt] & ((1 << off) - 1); + } + } + // the least-significant bits of the input become the + // most-significant bits of a byte in the output stream, and vice-versa + // + else { + uint8_t rest = 8 - off; + uint8_t mask = (1 << rest) - 1; + uint8_t l, m = bsw->bytes[fill]; + uint64_t i; + + for ( i = 0; i < len_byt; i++ ) { + l = byt[i] & mask; + bsw->bytes[fill++] = m ^ (l << off); + m = byt[i] >> rest; + } + + if ( len_bit < rest ) { + l = byt[len_byt] & ((1 << len_bit) - 1); + bsw->bytes[fill] = m ^ (l << off); + off += len_bit; + } + else { + l = byt[len_byt] & mask; + bsw->bytes[fill++] = m ^ (l << off); + + m = byt[len_byt] >> rest; + + off = len_bit - rest; + bsw->bytes[fill] = m & ((1 << off) - 1); + } + } + + bsw->off = off; + bsw->fill = fill; + bsw->bits += len; +} + +void +ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint8_t bits = len + bsw->off; + uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_bytes_unsafe(bsw, len, byt); +} + +static inline void +_bsw_bex_unsafe(ur_bsw_t *bsw, uint8_t n) +{ + uint64_t fill = bsw->fill; + uint8_t off = bsw->off; + uint32_t bits = n + off; + + fill += bits >> 3; + off = ur_mask_3(bits); + + bsw->bytes[fill] ^= 1 << off; + + if ( 7 == off ) { + bsw->off = 0; + bsw->fill = 1 + fill; + } + else { + bsw->off = 1 + off; + bsw->fill = fill; + } + + bsw->bits += 1 + n; +} + +void +ur_bsw_bex(ur_bsw_t *bsw, uint8_t n) +{ + uint32_t bits = 1 + n + bsw->off; + uint8_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_bex_unsafe(bsw, n); +} + +static inline void +_bsw_mat64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + if ( 0 == len ) { + _bsw_bit_unsafe(bsw, 1); + } + else { + { + uint8_t nel = ur_met0_64(len); + _bsw_bex_unsafe(bsw, nel); + _bsw64_unsafe(bsw, nel - 1, len); + } + + _bsw64_unsafe(bsw, len, val); + } +} + +void +ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint8_t next, bits, need; + + len = ur_min(64, len); + next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); + bits = bsw->off + next; + need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_mat64_unsafe(bsw, len, val); +} + +static inline void +_bsw_mat_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + if ( 0 == len ) { + _bsw_bit_unsafe(bsw, 1); + } + else { + // write run-length + // + { + uint8_t nel = ur_met0_64(len); + _bsw_bex_unsafe(bsw, nel); + _bsw64_unsafe(bsw, nel - 1, len); + } + + _bsw_bytes_unsafe(bsw, len, byt); + } +} + +void +ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); + uint64_t bits = bsw->off + next; + uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_mat_bytes_unsafe(bsw, len, byt); +} + +static inline void +_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + _bsw8_unsafe(bsw, 2, 3); + _bsw_mat64_unsafe(bsw, len, val); +} + +void +ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); + uint64_t bits = 2 + bsw->off + next; + uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_back64(bsw, len, val); +} + +static inline void +_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + _bsw_bit_unsafe(bsw, 0); + _bsw_mat64_unsafe(bsw, len, val); +} + +void +ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val) +{ + uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); + uint64_t bits = 1 + bsw->off + next; + uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_atom64(bsw, len, val); +} + +static inline void +_bsw_atom_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + _bsw_bit_unsafe(bsw, 0); + _bsw_mat_bytes_unsafe(bsw, len, byt); +} + +void +ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) +{ + uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); + uint64_t bits = 1 + bsw->off + next; + uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw_atom_bytes_unsafe(bsw, len, byt); +} + +void +ur_bsw_cell(ur_bsw_t *bsw) +{ + uint8_t bits = 2 + bsw->off; + uint8_t need = (bits >> 3) + !!ur_mask_3(bits); + + if ( bsw->fill + need >= bsw->size ) { + ur_bsw_grow(bsw, ur_max(need, bsw->prev)); + } + + _bsw8_unsafe(bsw, 2, 1); +} diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 8888919ea7..a10579c7cb 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -9,8 +9,14 @@ #include +#include "ur/defs.h" #include "ur/hashcons.h" +// declarations of inline functions +// +uint64_t +ur_met0_bytes(uint8_t *byt, uint64_t len); + ur_mug ur_mug_bytes(const uint8_t *byt, uint64_t len) { @@ -402,15 +408,6 @@ ur_bytes(ur_root_t *r, ur_nref ref, uint8_t **byt, uint64_t *len) } } -static inline uint64_t -_met0_bytes(uint8_t *byt, uint64_t len) -{ - // XX requires no trailing null bytes - // - uint64_t last = len - 1; - return (last << 3) + ur_met0_8(byt[last]); -} - uint64_t ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref) { @@ -452,7 +449,7 @@ ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref) uint64_t len = r->atoms.lens[idx]; uint8_t *byt = r->atoms.bytes[idx]; - m_bit = _met0_bytes(byt, len); + m_bit = ur_met0_bytes(byt, len); } switch ( bloq ) { @@ -514,8 +511,8 @@ _cons_unsafe(ur_cells_t *cells, ur_mug mug, ur_nref hed, ur_nref tal) return cel; } -static ur_nref -_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len) +ur_nref +ur_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len) { ur_atoms_t *atoms = &(r->atoms); ur_dict_t *dict = &(atoms->dict); @@ -570,7 +567,7 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) // produce a direct atom if possible // - if ( 62 >= _met0_bytes(byt, len) ) { + if ( 62 >= ur_met0_bytes(byt, len) ) { uint64_t i, direct = 0; for ( i = 0; i < len; i++ ) { @@ -584,7 +581,7 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) assert( copy ); memcpy(copy, byt, len); - return _coin_bytes_unsafe(r, copy, len); + return ur_coin_bytes_unsafe(r, copy, len); } } @@ -608,7 +605,7 @@ ur_coin64(ur_root_t *r, uint64_t n) byt[6] = ur_mask_8(n >> 48); byt[7] = ur_mask_8(n >> 56); - return _coin_bytes_unsafe(r, byt, 8); + return ur_coin_bytes_unsafe(r, byt, 8); } } @@ -883,1425 +880,3 @@ ur_walk_fore(ur_root_t *r, free(don); } - -void -ur_bsw_grow(ur_bsw_t *bsw, uint64_t step) -{ - uint64_t size = bsw->size; - uint64_t next = size + step; - - bsw->bytes = realloc(bsw->bytes, next); - assert(bsw->bytes); - memset(bsw->bytes + size, 0, step); - - bsw->prev = size; - bsw->size = next; -} - -ur_bool_t -ur_bsw_sane(ur_bsw_t *bsw) -{ - return ( (8 > bsw->off) - && ((bsw->fill << 3) + bsw->off == bsw->bits) ); -} - -static inline void -_bsw_bit_unsafe(ur_bsw_t *bsw, uint8_t bit) -{ - uint64_t fill = bsw->fill; - uint8_t off = bsw->off; - - bsw->bytes[fill] ^= (bit & 1) << off; - - if ( 7 == off ) { - bsw->fill = 1 + fill; - bsw->off = 0; - } - else { - bsw->off = 1 + off; - } - - bsw->bits++; -} - -void -ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit) -{ - if ( (7 == bsw->off) - && ((1 + bsw->fill) == bsw->size) ) - { - ur_bsw_grow(bsw, bsw->prev); - } - - _bsw_bit_unsafe(bsw, bit); -} - -static inline void -_bsw8_unsafe(ur_bsw_t *bsw, uint8_t len, uint8_t byt) -{ - uint64_t fill = bsw->fill; - uint8_t off = bsw->off; - uint8_t rest = 8 - off; - uint8_t l, m; - - // the least-significant bits of the input become the - // most-significant bits of a byte in the output stream - // - if ( len < rest ) { - l = byt & ((1 << len) - 1); - - bsw->bytes[fill] ^= l << off; - bsw->off = off + len; - } - // and vice-versa - // - else { - l = byt & ((1 << rest) - 1); - m = byt >> rest; - - bsw->bytes[fill++] ^= l << off; - off = len - rest; - bsw->bytes[fill] = m & ((1 << off) - 1); - - bsw->fill = fill; - bsw->off = off; - } - - bsw->bits += len; -} - -void -ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt) -{ - len = ur_min(8, len); - - if ( bsw->fill + !!((bsw->off + len) >> 3) >= bsw->size ) { - ur_bsw_grow(bsw, bsw->prev); - } - - _bsw8_unsafe(bsw, len, byt); -} - -static inline void -_bsw32_unsafe(ur_bsw_t *bsw, uint8_t len, uint32_t val) -{ - uint64_t fill = bsw->fill; - uint8_t off = bsw->off; - uint8_t *bytes = bsw->bytes; - - bsw->bits += len; - - if ( off ) { - uint8_t rest = 8 - off; - - if ( len < rest ) { - bytes[fill] ^= (val & ((1 << len) - 1)) << off; - bsw->off = off + len; - return; - } - - bytes[fill++] ^= (val & ((1 << rest) - 1)) << off; - val >>= rest; - len -= rest; - } - - switch ( len >> 3 ) { - case 4: { - bytes[fill++] = ur_mask_8(val); - bytes[fill++] = ur_mask_8(val >> 8); - bytes[fill++] = ur_mask_8(val >> 16); - bytes[fill++] = ur_mask_8(val >> 24); - - // no offset is possible here - // - bsw->fill = fill; - bsw->off = 0; - return; - } - - case 3: { - bytes[fill++] = ur_mask_8(val); - bytes[fill++] = ur_mask_8(val >> 8); - bytes[fill++] = ur_mask_8(val >> 16); - val >>= 24; - } break; - - case 2: { - bytes[fill++] = ur_mask_8(val); - bytes[fill++] = ur_mask_8(val >> 8); - val >>= 16; - } break; - - case 1: { - bytes[fill++] = ur_mask_8(val); - val >>= 8; - } break; - } - - off = ur_mask_3(len); - - if ( off ) { - bytes[fill] = (uint8_t)(val & ((1 << off) - 1)); - } - - bsw->fill = fill; - bsw->off = off; -} - -void -ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val) -{ - uint8_t bits, need; - - len = ur_min(32, len); - bits = bsw->off + len; - need = (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw32_unsafe(bsw, len, val); -} - -static inline void -_bsw64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - uint64_t fill = bsw->fill; - uint8_t off = bsw->off; - uint8_t *bytes = bsw->bytes; - - bsw->bits += len; - - if ( off ) { - uint8_t rest = 8 - off; - - if ( len < rest ) { - bytes[fill] ^= (val & ((1 << len) - 1)) << off; - bsw->off = off + len; - return; - } - - bytes[fill++] ^= (val & ((1 << rest) - 1)) << off; - val >>= rest; - len -= rest; - } - - switch ( len >> 3 ) { - case 8: { - bytes[fill++] = ur_mask_8(val); - bytes[fill++] = ur_mask_8(val >> 8); - bytes[fill++] = ur_mask_8(val >> 16); - bytes[fill++] = ur_mask_8(val >> 24); - bytes[fill++] = ur_mask_8(val >> 32); - bytes[fill++] = ur_mask_8(val >> 40); - bytes[fill++] = ur_mask_8(val >> 48); - bytes[fill++] = ur_mask_8(val >> 56); - - // no offset is possible here - // - bsw->fill = fill; - bsw->off = 0; - return; - } - - case 7: { - bytes[fill++] = ur_mask_8(val); - bytes[fill++] = ur_mask_8(val >> 8); - bytes[fill++] = ur_mask_8(val >> 16); - bytes[fill++] = ur_mask_8(val >> 24); - bytes[fill++] = ur_mask_8(val >> 32); - bytes[fill++] = ur_mask_8(val >> 40); - bytes[fill++] = ur_mask_8(val >> 48); - val >>= 56; - } break; - - case 6: { - bytes[fill++] = ur_mask_8(val); - bytes[fill++] = ur_mask_8(val >> 8); - bytes[fill++] = ur_mask_8(val >> 16); - bytes[fill++] = ur_mask_8(val >> 24); - bytes[fill++] = ur_mask_8(val >> 32); - bytes[fill++] = ur_mask_8(val >> 40); - val >>= 48; - } break; - - case 5: { - bytes[fill++] = ur_mask_8(val); - bytes[fill++] = ur_mask_8(val >> 8); - bytes[fill++] = ur_mask_8(val >> 16); - bytes[fill++] = ur_mask_8(val >> 24); - bytes[fill++] = ur_mask_8(val >> 32); - val >>= 40; - } break; - - case 4: { - bytes[fill++] = ur_mask_8(val); - bytes[fill++] = ur_mask_8(val >> 8); - bytes[fill++] = ur_mask_8(val >> 16); - bytes[fill++] = ur_mask_8(val >> 24); - val >>= 32; - } break; - - case 3: { - bytes[fill++] = ur_mask_8(val); - bytes[fill++] = ur_mask_8(val >> 8); - bytes[fill++] = ur_mask_8(val >> 16); - val >>= 24; - } break; - - case 2: { - bytes[fill++] = ur_mask_8(val); - bytes[fill++] = ur_mask_8(val >> 8); - val >>= 16; - } break; - - case 1: { - bytes[fill++] = ur_mask_8(val); - val >>= 8; - } break; - } - - off = ur_mask_3(len); - - if ( off ) { - bytes[fill] = (uint8_t)(val & ((1 << off) - 1)); - } - - bsw->fill = fill; - bsw->off = off; -} - -void -ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - uint8_t bits, need; - - len = ur_min(64, len); - bits = bsw->off + len; - need = (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw64_unsafe(bsw, len, val); -} - -static inline void -_bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) -{ - uint64_t len_byt = len >> 3; - uint8_t len_bit = ur_mask_3(len); - uint64_t fill = bsw->fill; - uint8_t off = bsw->off; - - if ( !off ) { - memcpy(bsw->bytes + fill, byt, len_byt); - fill += len_byt; - off = len_bit; - - if ( off ) { - bsw->bytes[fill] = byt[len_byt] & ((1 << off) - 1); - } - } - // the least-significant bits of the input become the - // most-significant bits of a byte in the output stream, and vice-versa - // - else { - uint8_t rest = 8 - off; - uint8_t mask = (1 << rest) - 1; - uint8_t l, m = bsw->bytes[fill]; - uint64_t i; - - for ( i = 0; i < len_byt; i++ ) { - l = byt[i] & mask; - bsw->bytes[fill++] = m ^ (l << off); - m = byt[i] >> rest; - } - - if ( len_bit < rest ) { - l = byt[len_byt] & ((1 << len_bit) - 1); - bsw->bytes[fill] = m ^ (l << off); - off += len_bit; - } - else { - l = byt[len_byt] & mask; - bsw->bytes[fill++] = m ^ (l << off); - - m = byt[len_byt] >> rest; - - off = len_bit - rest; - bsw->bytes[fill] = m & ((1 << off) - 1); - } - } - - bsw->off = off; - bsw->fill = fill; - bsw->bits += len; -} - -void -ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) -{ - uint8_t bits = len + bsw->off; - uint64_t need = (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw_bytes_unsafe(bsw, len, byt); -} - -static inline void -_bsw_bex_unsafe(ur_bsw_t *bsw, uint8_t n) -{ - uint64_t fill = bsw->fill; - uint8_t off = bsw->off; - uint32_t bits = n + off; - - fill += bits >> 3; - off = ur_mask_3(bits); - - bsw->bytes[fill] ^= 1 << off; - - if ( 7 == off ) { - bsw->off = 0; - bsw->fill = 1 + fill; - } - else { - bsw->off = 1 + off; - bsw->fill = fill; - } - - bsw->bits += 1 + n; -} - -void -ur_bsw_bex(ur_bsw_t *bsw, uint8_t n) -{ - uint32_t bits = 1 + n + bsw->off; - uint8_t need = (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw_bex_unsafe(bsw, n); -} - -static inline void -_bsw_mat64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - if ( 0 == len ) { - _bsw_bit_unsafe(bsw, 1); - } - else { - { - uint8_t nel = ur_met0_64(len); - _bsw_bex_unsafe(bsw, nel); - _bsw64_unsafe(bsw, nel - 1, len); - } - - _bsw64_unsafe(bsw, len, val); - } -} - -void -ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - uint8_t next, bits, need; - - len = ur_min(64, len); - next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); - bits = bsw->off + next; - need = (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw_mat64_unsafe(bsw, len, val); -} - -static inline void -_bsw_mat_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) -{ - if ( 0 == len ) { - _bsw_bit_unsafe(bsw, 1); - } - else { - // write run-length - // - { - uint8_t nel = ur_met0_64(len); - _bsw_bex_unsafe(bsw, nel); - _bsw64_unsafe(bsw, nel - 1, len); - } - - _bsw_bytes_unsafe(bsw, len, byt); - } -} - -void -ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) -{ - uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); - uint64_t bits = bsw->off + next; - uint64_t need = (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw_mat_bytes_unsafe(bsw, len, byt); -} - -static inline void -_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - _bsw8_unsafe(bsw, 2, 3); - _bsw_mat64_unsafe(bsw, len, val); -} - -void -ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); - uint64_t bits = 2 + bsw->off + next; - uint64_t need = (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw_back64(bsw, len, val); -} - -static inline void -_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - _bsw_bit_unsafe(bsw, 0); - _bsw_mat64_unsafe(bsw, len, val); -} - -void -ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val) -{ - uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); - uint64_t bits = 1 + bsw->off + next; - uint64_t need = (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw_atom64(bsw, len, val); -} - -static inline void -_bsw_atom_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) -{ - _bsw_bit_unsafe(bsw, 0); - _bsw_mat_bytes_unsafe(bsw, len, byt); -} - -void -ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) -{ - uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); - uint64_t bits = 1 + bsw->off + next; - uint64_t need = (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw_atom_bytes_unsafe(bsw, len, byt); -} - -void -ur_bsw_cell(ur_bsw_t *bsw) -{ - uint8_t bits = 2 + bsw->off; - uint8_t need = (bits >> 3) + !!ur_mask_3(bits); - - if ( bsw->fill + need >= bsw->size ) { - ur_bsw_grow(bsw, ur_max(need, bsw->prev)); - } - - _bsw8_unsafe(bsw, 2, 1); -} - -ur_bool_t -ur_bsr_sane(ur_bsr_t *bsr) -{ - ur_bool_t ret = 8 > bsr->off; - - if ( !bsr->left ) { - ret = ret && (!bsr->off && !bsr->bytes); - } - - return ret; -} - -ur_cue_res_e -ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out) -{ - uint64_t left = bsr->left; - - if ( !left ) { - return ur_cue_gone; - } - else { - const uint8_t *b = bsr->bytes; - uint8_t off = bsr->off; - uint8_t bit = (b[0] >> off) & 1; - - if ( 7 == off ) { - bsr->bytes = ( --left ) ? (b + 1) : 0; - bsr->left = left; - bsr->off = 0; - } - else { - bsr->off = 1 + off; - } - - bsr->bits++; - - *out = bit; - return ur_cue_good; - } -} - -uint8_t -ur_bsr_bit_any(ur_bsr_t *bsr) -{ - uint64_t left = bsr->left; - - bsr->bits++; - - if ( !left ) { - return 0; - } - else { - const uint8_t *b = bsr->bytes; - uint8_t off = bsr->off; - uint8_t bit = (b[0] >> off) & 1; - - if ( 7 == off ) { - bsr->bytes = ( --left ) ? (b + 1) : 0; - bsr->left = left; - bsr->off = 0; - } - else { - bsr->off = 1 + off; - } - - return bit; - } -} - -uint8_t -ur_bsr8_any(ur_bsr_t *bsr, uint8_t len) -{ - uint64_t left = bsr->left; - - len = ur_min(8, len); - - bsr->bits += len; - - if ( !left ) { - return 0; - } - else { - uint8_t off = bsr->off; - uint8_t rest = 8 - off; - const uint8_t *b = bsr->bytes; - uint8_t m = b[0] >> off; - - if ( len < rest ) { - bsr->off = off + len; - return m & ((1 << len) - 1); - } - else if ( 1 == left ) { - bsr->off = 0; - bsr->left = 0; - bsr->bytes = 0; - return m; - } - else { - off = len - rest; - - bsr->off = off; - bsr->left--; - bsr->bytes++; - - { - uint8_t l = b[1] & ((1 << off) - 1); - return m ^ (l << rest); - } - } - } -} - -uint32_t -ur_bsr32_any(ur_bsr_t *bsr, uint8_t len) -{ - uint64_t left = bsr->left; - - len = ur_min(32, len); - - bsr->bits += len; - - if ( !left ) { - return 0; - } - else { - uint8_t off = bsr->off; - uint8_t rest = 8 - off; - const uint8_t *b = bsr->bytes; - uint32_t m = b[0] >> off; - - if ( len < rest ) { - bsr->off = off + len; - return m & ((1 << len) - 1); - } - else { - uint8_t mask, len_byt; - uint32_t l; - - len -= rest; - left--; - bsr->bytes++; - - len_byt = len >> 3; - - if ( len_byt >= left ) { - len_byt = left; - bsr->off = off = 0; - bsr->left = 0; - bsr->bytes = 0; - } - else { - bsr->off = off = ur_mask_3(len); - bsr->left = left - len_byt; - bsr->bytes += len_byt; - } - - mask = (1 << off) - 1; - - switch ( len_byt ) { - case 4: { - l = (uint32_t)b[1] - ^ (uint32_t)b[2] << 8 - ^ (uint32_t)b[3] << 16 - ^ (uint32_t)b[4] << 24; - } break; - - case 3: { - l = (uint32_t)b[1] - ^ (uint32_t)b[2] << 8 - ^ (uint32_t)b[3] << 16 - ^ (uint32_t)(b[4] & mask) << 24; - } break; - - case 2: { - l = (uint32_t)b[1] - ^ (uint32_t)b[2] << 8 - ^ (uint32_t)(b[3] & mask) << 16; - } break; - - case 1: { - l = (uint32_t)b[1] - ^ (uint32_t)(b[2] & mask) << 8; - } break; - - case 0: { - l = (uint32_t)(b[1] & mask); - } break; - } - - return m ^ (l << rest); - } - } -} - -uint64_t -ur_bsr64_any(ur_bsr_t *bsr, uint8_t len) -{ - uint64_t left = bsr->left; - - len = ur_min(64, len); - - bsr->bits += len; - - if ( !left ) { - return 0; - } - else { - uint8_t off = bsr->off; - uint8_t rest = 8 - off; - const uint8_t *b = bsr->bytes; - uint64_t m = b[0] >> off; - - if ( len < rest ) { - bsr->off = off + len; - return m & ((1 << len) - 1); - } - else { - uint8_t mask, len_byt; - uint64_t l; - - len -= rest; - left--; - bsr->bytes++; - - len_byt = len >> 3; - - if ( len_byt >= left ) { - len_byt = left; - bsr->off = off = 0; - bsr->left = 0; - bsr->bytes = 0; - } - else { - bsr->off = off = ur_mask_3(len); - bsr->left = left - len_byt; - bsr->bytes += len_byt; - } - - mask = (1 << off) - 1; - - switch ( len_byt ) { - case 8: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)b[4] << 24 - ^ (uint64_t)b[5] << 32 - ^ (uint64_t)b[6] << 40 - ^ (uint64_t)b[7] << 48 - ^ (uint64_t)b[8] << 56; - } break; - - case 7: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)b[4] << 24 - ^ (uint64_t)b[5] << 32 - ^ (uint64_t)b[6] << 40 - ^ (uint64_t)b[7] << 48 - ^ (uint64_t)(b[8] & mask) << 56; - } break; - - case 6: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)b[4] << 24 - ^ (uint64_t)b[5] << 32 - ^ (uint64_t)b[6] << 40 - ^ (uint64_t)(b[7] & mask) << 48; - } break; - - case 5: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)b[4] << 24 - ^ (uint64_t)b[5] << 32 - ^ (uint64_t)(b[6] & mask) << 40; - } break; - - case 4: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)b[4] << 24 - ^ (uint64_t)(b[5] & mask) << 32; - } break; - - case 3: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)(b[4] & mask) << 24; - } break; - - case 2: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)(b[3] & mask) << 16; - } break; - - case 1: { - l = (uint64_t)b[1] - ^ (uint64_t)(b[2] & mask) << 8; - } break; - - case 0: { - l = (uint64_t)(b[1] & mask); - } break; - } - - return m ^ (l << rest); - } - } -} - -void -ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) -{ - uint64_t left = bsr->left; - - if ( !left ) { - return; - } - else { - const uint8_t *b = bsr->bytes; - uint8_t off = bsr->off; - uint64_t len_byt = len >> 3; - uint8_t len_bit = ur_mask_3(len); - - if ( !off ) { - uint8_t bits = off + len_bit; - uint64_t need = len_byt + (bits >> 3) + !!ur_mask_3(bits); - - if ( need > left ) { - memcpy(out, b, left); - bsr->bytes = 0; - bsr->left = 0; - } - else { - memcpy(out, b, len_byt); - off = len_bit; - left -= len_byt; - - if ( !left ) { - bsr->bytes = 0; - } - else { - bsr->bytes += len_byt; - } - - bsr->left = left; - - if ( off ) { - out[len_byt] = b[len_byt] & ((1 << off) - 1); - } - } - } - // the most-significant bits from a byte in the stream - // become the least-significant bits of an output byte, and vice-versa - // - else { - uint64_t need = len_byt + (len_bit >> 3) + !!ur_mask_3(len_bit); - ur_bool_t end = need >= left; - uint64_t max = end ? (left - 1) : len_byt; - uint8_t rest = 8 - off; - uint8_t mask = (1 << off) - 1; - uint8_t byt = b[0]; - uint8_t l, m = byt >> off; - uint64_t i; - - for ( i = 0; i < max; i++ ) { - byt = b[1ULL + i]; - l = byt & mask; - out[i] = m ^ (l << rest); - m = byt >> off; - } - - if ( end ) { - if ( len_bit && len_bit < rest ) { - out[max] = m & ((1 << len_bit) - 1); - bsr->bytes += max; - left -= max; - off += len_bit; - } - else { - out[max] = m; - bsr->bytes = 0; - left = 0; - off = 0; - } - } - else { - uint8_t bits = off + len_bit; - - bsr->bytes += max; - left -= max + !!(bits >> 3); - off = ur_mask_3(bits); - - if ( len_bit <= rest ) { - out[max] = m & ((1 << len_bit) - 1); - } - else { - l = b[1ULL + max] & ((1 << off) - 1);; - out[max] = m ^ (l << rest); - } - } - } - - bsr->off = off; - bsr->left = left; - bsr->bits += len; - } -} - -static inline ur_cue_res_e -_bsr_set_gone(ur_bsr_t *bsr, uint8_t bits) -{ - bsr->bits += bits; - bsr->bytes = 0; - bsr->left = 0; - bsr->off = 0; - return ur_cue_gone; -} - -ur_cue_res_e -ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) -{ - uint64_t left = bsr->left; - - if ( !left ) { - return ur_cue_gone; - } - else { - const uint8_t *b = bsr->bytes; - uint8_t off = bsr->off; - uint8_t bit = (b[0] >> off) & 1; - uint8_t len = 1; - - if ( 0 == bit ) { - *out = ur_jam_atom; - } - else { - if ( 7 == off ) { - if ( 1 == left ) { - return _bsr_set_gone(bsr, 1); - } - - bit = b[1] & 1; - } - else { - bit = (b[0] >> (off + 1)) & 1; - } - - len++; - *out = ( 0 == bit ) ? ur_jam_cell : ur_jam_back; - } - - { - uint8_t bits = off + len; - uint8_t bytes = bits >> 3; - - left -= bytes; - - if ( !left ) { - bsr->bytes = 0; - bsr->left = 0; - bsr->off = 0; - } - else { - bsr->bytes += bytes; - bsr->left = left; - bsr->off = ur_mask_3(bits); - } - - bsr->bits += len; - - return ur_cue_good; - } - } -} - -static inline ur_cue_res_e -_bsr_rub_log_meme(ur_bsr_t *bsr) -{ - bsr->bits += 256; - bsr->bytes += 32; - bsr->left -= 32; - return ur_cue_meme; -} - -ur_cue_res_e -ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) -{ - uint64_t left = bsr->left; - - if ( !left ) { - return ur_cue_gone; - } - else { - uint8_t off = bsr->off; - uint8_t rest = 8 - off; - const uint8_t *b = bsr->bytes; - uint8_t byt = b[0] >> off; - uint8_t skip = 0; - - while ( !byt ) { - if ( 32 == skip ) { - return _bsr_rub_log_meme(bsr); - } - - skip++; - - if ( skip == left ) { - return _bsr_set_gone(bsr, (skip << 3) - off); - } - - byt = b[skip]; - } - - { - uint32_t zeros = ur_tz8(byt) + (skip ? ((skip << 3) - off) : 0); - - if ( 255 < zeros ) { - return _bsr_rub_log_meme(bsr); - } - else { - uint32_t bits = off + 1 + zeros; - uint8_t bytes = bits >> 3; - - left -= bytes; - - bsr->bytes = left ? (b + bytes) : 0; - bsr->bits += 1 + zeros; - bsr->left = left; - bsr->off = ur_mask_3(bits); - - *out = zeros; - return ur_cue_good; - } - } - } -} - -ur_cue_res_e -ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out) -{ - ur_cue_res_e res; - uint8_t len; - - if ( ur_cue_good != (res = ur_bsr_rub_log(bsr, &len)) ) { - return res; - } - else if ( 64 <= len ) { - return ur_cue_meme; - } - - switch ( len ) { - case 0: { - *out = 0; - } break; - - case 1: { - *out = 1; - } break; - - default: { - len--; - *out = ur_bsr64_any(bsr, len) ^ (1ULL << len); - } break; - } - - return ur_cue_good; -} - -static inline void -_bsw_atom(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len) -{ - switch ( ur_nref_tag(ref) ) { - default: assert(0); - - case ur_direct: return ur_bsw_atom64(bsw, len, ref); - - case ur_iatom: { - uint8_t *byt = r->atoms.bytes[ur_nref_idx(ref)]; - return ur_bsw_atom_bytes(bsw, len, byt); - } - } -} - -typedef struct _jam_s { - ur_dict64_t dict; - ur_bsw_t bsw; -} _jam_t; - -static void -_jam_atom(ur_root_t *r, ur_nref ref, void *ptr) -{ - _jam_t *j = ptr; - ur_dict64_t *dict = &(j->dict); - ur_bsw_t *bsw = &j->bsw; - uint64_t bak, len = ur_met(r, 0, ref); - - if ( !ur_dict64_get(r, dict, ref, &bak) ) { - ur_dict64_put(r, dict, ref, bsw->bits); - - _bsw_atom(r, ref, bsw, len); - } - else { - uint64_t len_bak = ur_met0_64(bak); - - if ( len <= len_bak ) { - _bsw_atom(r, ref, bsw, len); - } - else { - ur_bsw_back64(bsw, len_bak, bak); - } - } -} - -static ur_bool_t -_jam_cell(ur_root_t *r, ur_nref ref, void *ptr) -{ - _jam_t *j = ptr; - ur_dict64_t *dict = &(j->dict); - ur_bsw_t *bsw = &j->bsw; - uint64_t bak; - - if ( !ur_dict64_get(r, dict, ref, &bak) ) { - ur_dict64_put(r, dict, ref, bsw->bits); - - ur_bsw_cell(bsw); - return 1; - } - else { - ur_bsw_back64(bsw, ur_met0_64(bak), bak); - return 0; - } -} - -uint64_t -ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) -{ - _jam_t j = {0}; - { - uint64_t fib11 = 89, fib12 = 144; - - j.bsw.prev = fib11; - j.bsw.size = fib12; - j.bsw.bytes = calloc(j.bsw.size, 1); - - ur_dict64_grow(r, &j.dict, fib11, fib12); - } - - ur_walk_fore(r, ref, &j, _jam_atom, _jam_cell); - ur_dict_free((ur_dict_t*)&j.dict); - - *len = j.bsw.fill + !!j.bsw.off; - *byt = j.bsw.bytes; - - return j.bsw.bits; -} - -typedef struct _cue_s { - ur_dict64_t dict; - ur_bsr_t bsr; -} _cue_t; - -static inline ur_cue_res_e -_cue_atom(ur_root_t *r, _cue_t *c, ur_nref *out) -{ - ur_bsr_t *bsr = &c->bsr; - ur_cue_res_e res; - uint64_t len; - - if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { - return res; - } - - if ( 62 >= len ) { - *out = (ur_nref)ur_bsr64_any(bsr, len); - } - else { - uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); - uint8_t *byt = calloc(len_byt, 1); - ur_bsr_bytes_any(bsr, len, byt); - - // strip trailing zeroes - // - while ( len_byt && !byt[len_byt - 1] ) { - len_byt--; - } - - *out = _coin_bytes_unsafe(r, byt, len_byt); - } - - return ur_cue_good; -} - -static inline ur_cue_res_e -_cue_back(ur_bsr_t *bsr, uint64_t *out) -{ - ur_cue_res_e res; - uint64_t len; - - if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { - return res; - } - else if ( 62 < len ) { - return ur_cue_meme; - } - - *out = ur_bsr64_any(bsr, len); - return ur_cue_good; -} - -#define STACK_ROOT 0 -#define STACK_HEAD 1 -#define STACK_TAIL 2 - -// stack frame for recording head vs tail iteration -// -// In Hoon, this structure would be as follows: -// -// $% [%root ~] -// [%head cursor=@] -// [%tail cursor=@ hed-ref=*] -// == -// -typedef struct _cue_frame_s { - uint8_t tag; - uint64_t bits; - ur_nref ref; -} _cue_frame_t; - -typedef struct _cue_stack_s { - uint32_t prev; - uint32_t size; - uint32_t fill; - _cue_frame_t* f; -} _cue_stack_t; - -static inline void -_cue_stack_push(_cue_stack_t *s, uint8_t tag, uint64_t bits, ur_nref ref) -{ - if ( s->fill == s->size ) { - uint32_t next = s->prev + s->size; - s->f = realloc(s->f, next * sizeof(*s->f)); - s->prev = s->size; - s->size = next; - } - - _cue_frame_t* f = &(s->f[s->fill++]); - f->tag = tag; - f->bits = bits; - f->ref = ref; -} - -ur_cue_res_e -ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out) -{ - ur_cue_res_e res; - ur_nref ref; - _cue_t c = {0}; - _cue_stack_t s = { .prev = 89, .size = 144, .fill = 0, .f = 0 }; - - // init bitstream-reader - // - c.bsr.left = len; - c.bsr.bytes = byt; - - // init dictionary - // - { - uint64_t fib11 = 89, fib12 = 144; - ur_dict64_grow(r, &c.dict, fib11, fib12); - } - - // setup stack - // - s.f = malloc(s.size * sizeof(*s.f)); - _cue_stack_push(&s, STACK_ROOT, 0, 0); - - // advance into buffer - // - advance: { - uint64_t bits = c.bsr.bits; - ur_cue_tag_e tag; - - if ( ur_cue_good != (res = ur_bsr_tag(&c.bsr, &tag)) ) { - goto perfect; - } - - switch ( tag ) { - default: assert(0); - - case ur_jam_atom: { - if ( ur_cue_good != (res = _cue_atom(r, &c, &ref)) ) { - goto perfect; - } - else { - ur_dict64_put(r, &c.dict, bits, (uint64_t)ref); - goto retreat; - } - } - - case ur_jam_back: { - uint64_t bak, val; - - if ( ur_cue_good != (res = _cue_back(&c.bsr, &bak)) ) { - goto perfect; - } - else if ( !ur_dict64_get(r, &c.dict, bak, &val) ) { - // XX distinguish bad backref? - // - res = ur_cue_gone; - goto perfect; - } - - ref = (ur_nref)val; - goto retreat; - } - - case ur_jam_cell: { - _cue_stack_push(&s, STACK_HEAD, bits, 0); - goto advance; - } - } - } - - // retreat down the stack - // - retreat: { - _cue_frame_t f = s.f[--s.fill]; - - switch ( f.tag ) { - default: assert(0); - - case STACK_ROOT: { - res = ur_cue_good; - goto perfect; - } - - case STACK_HEAD: { - _cue_stack_push(&s, STACK_TAIL, f.bits, ref); - goto advance; - } - - case STACK_TAIL: { - ref = ur_cons(r, f.ref, ref); - ur_dict64_put(r, &c.dict, f.bits, (uint64_t)ref); - goto retreat; - } - } - } - - // we done - // - perfect: { - ur_dict_free((ur_dict_t*)&c.dict); - free(s.f); - - if ( ur_cue_good == res ) { - *out = ref; - } - - return res; - } -} diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c new file mode 100644 index 0000000000..bfcc859914 --- /dev/null +++ b/pkg/urbit/ur/serial.c @@ -0,0 +1,299 @@ +#include +#include + +#include "ur/ur.h" + +static inline void +_bsw_atom(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len) +{ + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: return ur_bsw_atom64(bsw, len, ref); + + case ur_iatom: { + uint8_t *byt = r->atoms.bytes[ur_nref_idx(ref)]; + return ur_bsw_atom_bytes(bsw, len, byt); + } + } +} + +typedef struct _jam_s { + ur_dict64_t dict; + ur_bsw_t bsw; +} _jam_t; + +static void +_jam_atom(ur_root_t *r, ur_nref ref, void *ptr) +{ + _jam_t *j = ptr; + ur_dict64_t *dict = &(j->dict); + ur_bsw_t *bsw = &j->bsw; + uint64_t bak, len = ur_met(r, 0, ref); + + if ( !ur_dict64_get(r, dict, ref, &bak) ) { + ur_dict64_put(r, dict, ref, bsw->bits); + + _bsw_atom(r, ref, bsw, len); + } + else { + uint64_t len_bak = ur_met0_64(bak); + + if ( len <= len_bak ) { + _bsw_atom(r, ref, bsw, len); + } + else { + ur_bsw_back64(bsw, len_bak, bak); + } + } +} + +static ur_bool_t +_jam_cell(ur_root_t *r, ur_nref ref, void *ptr) +{ + _jam_t *j = ptr; + ur_dict64_t *dict = &(j->dict); + ur_bsw_t *bsw = &j->bsw; + uint64_t bak; + + if ( !ur_dict64_get(r, dict, ref, &bak) ) { + ur_dict64_put(r, dict, ref, bsw->bits); + + ur_bsw_cell(bsw); + return 1; + } + else { + ur_bsw_back64(bsw, ur_met0_64(bak), bak); + return 0; + } +} + +uint64_t +ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) +{ + _jam_t j = {0}; + { + uint64_t fib11 = 89, fib12 = 144; + + j.bsw.prev = fib11; + j.bsw.size = fib12; + j.bsw.bytes = calloc(j.bsw.size, 1); + + ur_dict64_grow(r, &j.dict, fib11, fib12); + } + + ur_walk_fore(r, ref, &j, _jam_atom, _jam_cell); + ur_dict_free((ur_dict_t*)&j.dict); + + *len = j.bsw.fill + !!j.bsw.off; + *byt = j.bsw.bytes; + + return j.bsw.bits; +} + +typedef struct _cue_s { + ur_dict64_t dict; + ur_bsr_t bsr; +} _cue_t; + +static inline ur_cue_res_e +_cue_atom(ur_root_t *r, _cue_t *c, ur_nref *out) +{ + ur_bsr_t *bsr = &c->bsr; + ur_cue_res_e res; + uint64_t len; + + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + + if ( 62 >= len ) { + *out = (ur_nref)ur_bsr64_any(bsr, len); + } + else { + uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); + uint8_t *byt = calloc(len_byt, 1); + ur_bsr_bytes_any(bsr, len, byt); + + // strip trailing zeroes + // + while ( len_byt && !byt[len_byt - 1] ) { + len_byt--; + } + + *out = ur_coin_bytes_unsafe(r, byt, len_byt); + } + + return ur_cue_good; +} + +static inline ur_cue_res_e +_cue_back(ur_bsr_t *bsr, uint64_t *out) +{ + ur_cue_res_e res; + uint64_t len; + + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + else if ( 62 < len ) { + return ur_cue_meme; + } + + *out = ur_bsr64_any(bsr, len); + return ur_cue_good; +} + +#define STACK_ROOT 0 +#define STACK_HEAD 1 +#define STACK_TAIL 2 + +// stack frame for recording head vs tail iteration +// +// In Hoon, this structure would be as follows: +// +// $% [%root ~] +// [%head cursor=@] +// [%tail cursor=@ hed-ref=*] +// == +// +typedef struct _cue_frame_s { + uint8_t tag; + uint64_t bits; + ur_nref ref; +} _cue_frame_t; + +typedef struct _cue_stack_s { + uint32_t prev; + uint32_t size; + uint32_t fill; + _cue_frame_t* f; +} _cue_stack_t; + +static inline void +_cue_stack_push(_cue_stack_t *s, uint8_t tag, uint64_t bits, ur_nref ref) +{ + if ( s->fill == s->size ) { + uint32_t next = s->prev + s->size; + s->f = realloc(s->f, next * sizeof(*s->f)); + s->prev = s->size; + s->size = next; + } + + _cue_frame_t* f = &(s->f[s->fill++]); + f->tag = tag; + f->bits = bits; + f->ref = ref; +} + +ur_cue_res_e +ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out) +{ + ur_cue_res_e res; + ur_nref ref; + _cue_t c = {0}; + _cue_stack_t s = { .prev = 89, .size = 144, .fill = 0, .f = 0 }; + + // init bitstream-reader + // + c.bsr.left = len; + c.bsr.bytes = byt; + + // init dictionary + // + { + uint64_t fib11 = 89, fib12 = 144; + ur_dict64_grow(r, &c.dict, fib11, fib12); + } + + // setup stack + // + s.f = malloc(s.size * sizeof(*s.f)); + _cue_stack_push(&s, STACK_ROOT, 0, 0); + + // advance into buffer + // + advance: { + uint64_t bits = c.bsr.bits; + ur_cue_tag_e tag; + + if ( ur_cue_good != (res = ur_bsr_tag(&c.bsr, &tag)) ) { + goto perfect; + } + + switch ( tag ) { + default: assert(0); + + case ur_jam_atom: { + if ( ur_cue_good != (res = _cue_atom(r, &c, &ref)) ) { + goto perfect; + } + else { + ur_dict64_put(r, &c.dict, bits, (uint64_t)ref); + goto retreat; + } + } + + case ur_jam_back: { + uint64_t bak, val; + + if ( ur_cue_good != (res = _cue_back(&c.bsr, &bak)) ) { + goto perfect; + } + else if ( !ur_dict64_get(r, &c.dict, bak, &val) ) { + // XX distinguish bad backref? + // + res = ur_cue_gone; + goto perfect; + } + + ref = (ur_nref)val; + goto retreat; + } + + case ur_jam_cell: { + _cue_stack_push(&s, STACK_HEAD, bits, 0); + goto advance; + } + } + } + + // retreat down the stack + // + retreat: { + _cue_frame_t f = s.f[--s.fill]; + + switch ( f.tag ) { + default: assert(0); + + case STACK_ROOT: { + res = ur_cue_good; + goto perfect; + } + + case STACK_HEAD: { + _cue_stack_push(&s, STACK_TAIL, f.bits, ref); + goto advance; + } + + case STACK_TAIL: { + ref = ur_cons(r, f.ref, ref); + ur_dict64_put(r, &c.dict, f.bits, (uint64_t)ref); + goto retreat; + } + } + } + + // we done + // + perfect: { + ur_dict_free((ur_dict_t*)&c.dict); + free(s.f); + + if ( ur_cue_good == res ) { + *out = ref; + } + + return res; + } +} From 9c008c269b6e57ee6e8c08dbee98131c62a88883 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 24 Aug 2020 13:28:22 -0700 Subject: [PATCH 048/123] u3: refactors u3u_uniq() --- pkg/urbit/include/noun/events.h | 11 +++ pkg/urbit/noun/events.c | 22 +++++ pkg/urbit/noun/urth.c | 150 ++++++++++++++++++++------------ 3 files changed, 126 insertions(+), 57 deletions(-) diff --git a/pkg/urbit/include/noun/events.h b/pkg/urbit/include/noun/events.h index 965548a1fb..67729118f8 100644 --- a/pkg/urbit/include/noun/events.h +++ b/pkg/urbit/include/noun/events.h @@ -97,3 +97,14 @@ */ c3_o u3e_wipe(void); + + + /* u3e_yolo(): disable dirty page tracking, read/write whole loom. + */ + c3_o + u3e_yolo(void); + + /* u3e_foul(): dirty all the pages of the loom. + */ + void + u3e_foul(void); diff --git a/pkg/urbit/noun/events.c b/pkg/urbit/noun/events.c index 289c30d399..1e1fa6b815 100644 --- a/pkg/urbit/noun/events.c +++ b/pkg/urbit/noun/events.c @@ -995,3 +995,25 @@ u3e_wipe(void) return c3y; } + +/* u3e_yolo(): disable dirty page tracking, read/write whole loom. +*/ +c3_o +u3e_yolo(void) +{ + // NB: u3e_save() will reinstate protection flags + // + if ( 0 != mprotect((void *)u3_Loom, u3a_bytes, (PROT_READ | PROT_WRITE)) ) { + return c3n; + } + + return c3y; +} + +/* u3e_foul(): dirty all the pages of the loom. +*/ +void +u3e_foul(void) +{ + memset((void*)u3P.dit_w, 0xff, u3a_pages >> 3); +} diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index aa3557bd12..142d5a0467 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -221,6 +221,23 @@ _cu_hamt_walk(u3_noun kev, void* ptr) vec_u->refs[vec_u->fill++] = _cu_from_loom(rot_u, kev); } +/* _cu_all_from_loom(): reallocate essential persistent state off-loom. +** +** NB: destroys the loom. +*/ +static ur_nref +_cu_all_from_loom(ur_root_t* rot_u, ur_nvec_t* cod_u) +{ + ur_nref ken = _cu_from_loom(rot_u, u3A->roc); + c3_w cod_w = u3h_wyt(u3R->jed.cod_p); + _cu_vec dat_u = { .vec_u = cod_u, .rot_u = rot_u }; + + ur_nvec_init(cod_u, cod_w); + u3h_walk_with(u3R->jed.cod_p, _cu_hamt_walk, &dat_u); + + return ken; +} + typedef struct _cu_loom_s { ur_dict32_t map_u; // direct->indirect mapping u3_atom *vat; // indirect atoms @@ -312,43 +329,72 @@ _cu_cells_to_loom(ur_root_t* rot_u, _cu_loom* lom_u) } } -/* u3u_uniq(): hash-cons roots off-loom, reallocate on loom. +/* _cu_all_to_loom(): reallocate all of [rot_u] on the loom, restore roots. */ -void -u3u_uniq(void) +static void +_cu_all_to_loom(ur_root_t* rot_u, ur_nref ken, ur_nvec_t* cod_u) { - c3_assert( &(u3H->rod_u) == u3R ); + _cu_loom lom_u = {0}; + ur_dict32_grow(0, &lom_u.map_u, 89, 144); - // allow read/write on the whole loom, bypassing page tracking + _cu_atoms_to_loom(rot_u, &lom_u); + _cu_cells_to_loom(rot_u, &lom_u); + + // restore kernel reference (always a cell) // - // NB: u3e_save() will reinstate protection flags + u3A->roc = lom_u.cel[ur_nref_idx(ken)]; + + // restore cold jet state (always cells) // - if ( 0 != mprotect((void *)u3_Loom, u3a_bytes, (PROT_READ | PROT_WRITE)) ) { - c3_assert(0); + { + c3_d max_d = cod_u->fill; + c3_d i_d; + ur_nref ref; + u3_noun kev; + + for ( i_d = 0; i_d < max_d; i_d++) { + ref = cod_u->refs[i_d]; + kev = lom_u.cel[ur_nref_idx(ref)]; + u3h_put(u3R->jed.cod_p, u3h(kev), u3k(u3t(kev))); + u3z(kev); + } + } + + _cu_loom_free(&lom_u); +} + +/* _cu_realloc(): hash-cons roots off-loom, reallocate on loom. +*/ +static ur_nref +_cu_realloc(FILE* fil_u, ur_root_t** tor_u, ur_nvec_t* doc_u) +{ + // bypassing page tracking as an optimization + // + // NB: u3e_yolo() will mark all as dirty, and + // u3e_save() will reinstate protection flags + // + if ( c3n == u3e_yolo() ) { + if ( fil_u ) { + fprintf(fil_u, "uniq: unable to bypass page tracking, continuing\r\n"); + } } // stash event number // c3_d eve_d = u3A->ent_d; - // reallocate kernel and cold jet state // ur_root_t* rot_u = ur_hcon_init(); - ur_nref ken = _cu_from_loom(rot_u, u3A->roc); - - ur_nvec_t cod_u; - { - c3_w cod_w = u3h_wyt(u3R->jed.cod_p); - _cu_vec dat_u = { .vec_u = &cod_u, .rot_u = rot_u }; - ur_nvec_init(&cod_u, cod_w); - u3h_walk_with(u3R->jed.cod_p, _cu_hamt_walk, &dat_u); - } + ur_nvec_t cod_u; + ur_nref ken = _cu_all_from_loom(rot_u, &cod_u); // print [rot_u] measurements // - ur_hcon_info(stderr, rot_u); - fprintf(stderr, "\r\n"); + if ( fil_u ) { + ur_hcon_info(fil_u, rot_u); + fprintf(stderr, "\r\n"); + } // reinitialize loom // @@ -356,43 +402,9 @@ u3u_uniq(void) // u3m_pave(c3y, c3n); - { - // reallocate all nouns on the loom - // - _cu_loom lom_u = {0}; - - ur_dict32_grow(0, &lom_u.map_u, 89, 144); - - _cu_atoms_to_loom(rot_u, &lom_u); - _cu_cells_to_loom(rot_u, &lom_u); - - // restore kernel reference (always a cell) - // - u3A->roc = lom_u.cel[ur_nref_idx(ken)]; - - // restore cold jet state (always cells) - // - { - c3_d max_d = cod_u.fill; - c3_d i_d; - ur_nref ref; - u3_noun kev; - - for ( i_d = 0; i_d < max_d; i_d++) { - ref = cod_u.refs[i_d]; - kev = lom_u.cel[ur_nref_idx(ref)]; - u3h_put(u3R->jed.cod_p, u3h(kev), u3k(u3t(kev))); - u3z(kev); - } - } - - _cu_loom_free(&lom_u); - } - - // dispose off-loom structures + // reallocate all nouns on the loom // - ur_nvec_free(&cod_u); - ur_hcon_free(rot_u); + _cu_all_to_loom(rot_u, ken, &cod_u); // allocate new hot jet state; re-establish warm // @@ -405,5 +417,29 @@ u3u_uniq(void) // mark all pages dirty // - memset((void*)u3P.dit_w, 0xff, u3a_pages >> 3); + u3e_foul(); + + *tor_u = rot_u; + *doc_u = cod_u; + + return ken; +} + +/* u3u_uniq(): hash-cons roots off-loom, reallocate on loom. +*/ +void +u3u_uniq(void) +{ + ur_root_t* rot_u; + ur_nvec_t cod_u; + ur_nref ken; + + c3_assert( &(u3H->rod_u) == u3R ); + + ken = _cu_realloc(stderr, &rot_u, &cod_u); + + // dispose off-loom structures + // + ur_nvec_free(&cod_u); + ur_hcon_free(rot_u); } From 5dca7bd62070bf4d113766b789b8d821fad2c6b9 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 24 Aug 2020 16:59:12 -0700 Subject: [PATCH 049/123] u3: implements u3u_cram() --- pkg/urbit/include/noun/urth.h | 5 + pkg/urbit/noun/urth.c | 172 +++++++++++++++++++++++++++++++++- pkg/urbit/worker/main.c | 15 ++- 3 files changed, 188 insertions(+), 4 deletions(-) diff --git a/pkg/urbit/include/noun/urth.h b/pkg/urbit/include/noun/urth.h index d777debc5e..988920e602 100644 --- a/pkg/urbit/include/noun/urth.h +++ b/pkg/urbit/include/noun/urth.h @@ -6,3 +6,8 @@ */ void u3u_uniq(void); + + /* u3u_cram(): globably deduplicate memory, and write a rock to disk. + */ + c3_o + u3u_cram(c3_c* dir_c, c3_d eve_d); diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 142d5a0467..adfd8c7dc1 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -7,7 +7,7 @@ #include #include "all.h" -#include "ur/hashcons.h" +#include "ur/ur.h" /* _cu_met_3(): atom bytewidth a la u3r_met(3, ...) */ @@ -443,3 +443,173 @@ u3u_uniq(void) ur_nvec_free(&cod_u); ur_hcon_free(rot_u); } + +static c3_o +_cu_rock_save(c3_c* dir_c, c3_d eve_d, c3_d len_d, c3_y* byt_y) +{ + c3_i fid_i; + + // open rock file, creating the containing directory if necessary + // + { + c3_w nam_w = 1 + snprintf(0, 0, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, eve_d); + c3_c* nam_c = c3_malloc(nam_w); + c3_i ret_i; + + // create $pier/.urb/roc, if it doesn't exist + // + // NB, $pier/.urb is guaranteed to already exist + // + { + ret_i = snprintf(nam_c, nam_w, "%s/.urb/roc", dir_c); + + if ( ret_i < 0 ) { + fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): %s\r\n", + dir_c, eve_d, strerror(errno)); + c3_free(nam_c); + return c3n; + } + else if ( ret_i >= nam_w ) { + fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): truncated\r\n", + dir_c, eve_d); + c3_free(nam_c); + return c3n; + } + + if ( mkdir(nam_c, 0700) + && (EEXIST != errno) ) + { + fprintf(stderr, "rock: directory create failed (%s, %" PRIu64 "): %s\r\n", + dir_c, eve_d, strerror(errno)); + c3_free(nam_c); + return c3n; + } + } + + ret_i = snprintf(nam_c, nam_w, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, eve_d); + + if ( ret_i < 0 ) { + fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): %s\r\n", + dir_c, eve_d, strerror(errno)); + c3_free(nam_c); + return c3n; + } + else if ( ret_i >= nam_w ) { + fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): truncated\r\n", + dir_c, eve_d); + c3_free(nam_c); + return c3n; + } + + if ( -1 == (fid_i = open(nam_c, O_RDWR | O_CREAT | O_TRUNC, 0644)) ) { + fprintf(stderr, "rock: open failed (%s, %" PRIu64 "): %s\r\n", + dir_c, eve_d, strerror(errno)); + c3_free(nam_c); + return c3n; + } + + c3_free(nam_c); + } + + // write jam-buffer into [fid_i] + // + // XX deduplicate with _write() wrapper in term.c + // + { + ssize_t ret_i; + + while ( len_d > 0 ) { + c3_w lop_w = 0; + // retry interrupt/async errors + // + do { + // abort pathological retry loop + // + if ( 100 == ++lop_w ) { + fprintf(stderr, "rock: write loop: %s\r\n", strerror(errno)); + close(fid_i); + // XX unlink file? + // + return c3n; + } + + ret_i = write(fid_i, byt_y, len_d); + } + while ( (ret_i < 0) + && ( (errno == EINTR) + || (errno == EAGAIN) + || (errno == EWOULDBLOCK) )); + + // assert on true errors + // + // NB: can't call u3l_log here or we would re-enter _write() + // + if ( ret_i < 0 ) { + fprintf(stderr, "rock: write failed %s\r\n", strerror(errno)); + close(fid_i); + // XX unlink file? + // + return c3n; + } + // continue partial writes + // + else { + len_d -= ret_i; + byt_y += ret_i; + } + } + } + + close(fid_i); + + return c3y; +} + +/* u3u_cram(): globably deduplicate memory, and write a rock to disk. +*/ +c3_o +u3u_cram(c3_c* dir_c, c3_d eve_d) +{ + c3_o ret_o = c3y; + c3_d len_d; + c3_y* byt_y; + + c3_assert( &(u3H->rod_u) == u3R ); + + { + ur_root_t* rot_u; + ur_nvec_t cod_u; + ur_nref ken = _cu_realloc(stderr, &rot_u, &cod_u); + + { + ur_nref roc = u3_nul; + c3_d max_d = cod_u.fill; + c3_d i_d; + + // cons vector of cold jet-state entries onto a list + // + for ( i_d = 0; i_d < max_d; i_d++) { + roc = ur_cons(rot_u, cod_u.refs[i_d], roc); + } + + roc = ur_cons(rot_u, ur_coin64(rot_u, c3__fast), ur_cons(rot_u, ken, roc)); + + ur_jam(rot_u, roc, &len_d, &byt_y); + } + + // dispose off-loom structures + // + ur_nvec_free(&cod_u); + ur_hcon_free(rot_u); + } + + // write jam-buffer into pier + // + if ( c3n == _cu_rock_save(dir_c, eve_d, len_d, byt_y) ) { + ret_o = c3n; + } + + c3_free(byt_y); + + return ret_o; +} diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index 76eba3004e..8cd196a0ac 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -251,15 +251,24 @@ _cw_cram(c3_i argc, c3_c* argv[]) c3_c* dir_c = argv[2]; c3_d eve_d = u3m_boot(dir_c); + c3_o ret_o; fprintf(stderr, "urbit-worker: cram: preparing\r\n"); - if ( c3n == u3m_rock_stay(dir_c, eve_d) ) { + if ( c3n == (ret_o = u3u_cram(dir_c, eve_d)) ) { fprintf(stderr, "urbit-worker: cram: unable to jam state\r\n"); - exit(1); + } + else { + fprintf(stderr, "urbit-worker: cram: rock saved at event %" PRIu64 "\r\n", eve_d); } - fprintf(stderr, "urbit-worker: cram: rock saved at event %" PRIu64 "\r\n", eve_d); + // save even on failure, as we just did all the work of deduplication + // + u3e_save(); + + if ( c3n == ret_o ) { + exit(1); + } } /* _cw_queu(); cue rock, save, and exit. From 8d5adc8bc1cc373b780118cafbc0ca19388c102b Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 25 Aug 2020 13:04:52 -0700 Subject: [PATCH 050/123] ur: refactors ur_cue() into higher-order traversal --- pkg/urbit/include/ur/defs.h | 4 + pkg/urbit/ur/serial.c | 353 +++++++++++++++++------------------- 2 files changed, 167 insertions(+), 190 deletions(-) diff --git a/pkg/urbit/include/ur/defs.h b/pkg/urbit/include/ur/defs.h index 51f9064cc3..c20f379502 100644 --- a/pkg/urbit/include/ur/defs.h +++ b/pkg/urbit/include/ur/defs.h @@ -5,6 +5,10 @@ #include #include +#define ur_fib10 55 +#define ur_fib11 89 +#define ur_fib12 144 + typedef uint8_t ur_bool_t; #define ur_min(a, b) ( ((a) < (b)) ? (a) : (b) ) diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index bfcc859914..245eac50ca 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -72,15 +72,11 @@ uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) { _jam_t j = {0}; - { - uint64_t fib11 = 89, fib12 = 144; - j.bsw.prev = fib11; - j.bsw.size = fib12; - j.bsw.bytes = calloc(j.bsw.size, 1); - - ur_dict64_grow(r, &j.dict, fib11, fib12); - } + j.bsw.prev = ur_fib11; + j.bsw.size = ur_fib12; + j.bsw.bytes = calloc(j.bsw.size, 1); + ur_dict64_grow(r, &j.dict, ur_fib11, ur_fib12); ur_walk_fore(r, ref, &j, _jam_atom, _jam_cell); ur_dict_free((ur_dict_t*)&j.dict); @@ -91,24 +87,168 @@ ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) return j.bsw.bits; } -typedef struct _cue_s { - ur_dict64_t dict; - ur_bsr_t bsr; -} _cue_t; +#define CUE_HEAD64 0xffffffffffffffffULL -static inline ur_cue_res_e -_cue_atom(ur_root_t *r, _cue_t *c, ur_nref *out) +/* +** stack frame for recording head vs tail iteration +** +** $? [CUE_HEAD64 bits=@] +** [hed=* bits=@] +*/ +typedef struct _cue64_frame_s { + uint64_t ref; + uint64_t bits; +} _cue64_frame_t; + +typedef struct _cue64_stack_s { + uint32_t prev; + uint32_t size; + uint32_t fill; + _cue64_frame_t* f; +} _cue64_stack_t; + +static inline void +_cue64_stack_push(_cue64_stack_t *s, uint64_t ref, uint64_t bits) { - ur_bsr_t *bsr = &c->bsr; - ur_cue_res_e res; - uint64_t len; - - if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { - return res; + if ( s->fill == s->size ) { + uint32_t next = s->prev + s->size; + s->f = realloc(s->f, next * sizeof(*s->f)); + s->prev = s->size; + s->size = next; } + _cue64_frame_t* f = &(s->f[s->fill++]); + f->ref = ref; + f->bits = bits; +} + +static inline ur_cue_res_e +_cue_walk64_advance(ur_root_t *r, + _cue64_stack_t *s, + ur_bsr_t *bsr, + ur_dict64_t *dict, + uint64_t (*coin)(ur_root_t*, ur_bsr_t*, uint64_t), + uint64_t *out) +{ + while ( 1 ) { + uint64_t len, bits = bsr->bits; + ur_cue_tag_e tag; + ur_cue_res_e res; + + if ( ur_cue_good != (res = ur_bsr_tag(bsr, &tag)) ) { + return res; + } + + switch ( tag ) { + default: assert(0); + + case ur_jam_atom: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + + *out = coin(r, bsr, len); + ur_dict64_put(r, dict, bits, *out); + return ur_cue_good; + } + + case ur_jam_back: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + else if ( 62 < len ) { + return ur_cue_meme; + } + else { + uint64_t bak = ur_bsr64_any(bsr, len); + + if ( !ur_dict64_get(r, dict, bak, out) ) { + // XX distinguish bad backref? + // + return ur_cue_gone; + } + + return ur_cue_good; + } + } + + case ur_jam_cell: { + _cue64_stack_push(s, CUE_HEAD64, bits); + continue; + } + } + } +} + +ur_cue_res_e +ur_cue_walk64(ur_root_t *r, + uint64_t len, + const uint8_t *byt, + uint64_t *out, + uint64_t (*coin)(ur_root_t*, ur_bsr_t*, uint64_t), + uint64_t (*cons)(ur_root_t*, uint64_t, uint64_t)) +{ + ur_cue_res_e res; + uint64_t ref; + ur_dict64_t dict = {0}; + ur_bsr_t bsr = {0}; + _cue64_stack_t s = {0}; + + // init dictionary + // + ur_dict64_grow(r, &dict, ur_fib11, ur_fib12); + + // init bitstream-reader + // + bsr.left = len; + bsr.bytes = byt; + + // setup stack + // + s.prev = ur_fib10; + s.size = ur_fib11; + s.f = malloc(s.size * sizeof(*s.f)); + + // advance into stream + // + res = _cue_walk64_advance(r, &s, &bsr, &dict, coin, &ref); + + // retreat down the stack + // + while ( s.fill && (ur_cue_good == res) ) { + _cue64_frame_t f = s.f[--s.fill]; + + // f is a head-frame + // + if ( CUE_HEAD64 == f.ref ) { + _cue64_stack_push(&s, ref, f.bits); + res = _cue_walk64_advance(r, &s, &bsr, &dict, coin, &ref); + } + // f is a tail-frame + // + else { + ref = cons(r, f.ref, ref); + ur_dict64_put(r, &dict, f.bits, ref); + } + } + + // finalize result + // + ur_dict_free((ur_dict_t*)&dict); + free(s.f); + + if ( ur_cue_good == res ) { + *out = ref; + } + + return res; +} + +static inline uint64_t +_cue_coin(ur_root_t *r, ur_bsr_t *bsr, uint64_t len) +{ if ( 62 >= len ) { - *out = (ur_nref)ur_bsr64_any(bsr, len); + return ur_bsr64_any(bsr, len); } else { uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); @@ -121,179 +261,12 @@ _cue_atom(ur_root_t *r, _cue_t *c, ur_nref *out) len_byt--; } - *out = ur_coin_bytes_unsafe(r, byt, len_byt); + return (uint64_t)ur_coin_bytes_unsafe(r, byt, len_byt); } - - return ur_cue_good; -} - -static inline ur_cue_res_e -_cue_back(ur_bsr_t *bsr, uint64_t *out) -{ - ur_cue_res_e res; - uint64_t len; - - if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { - return res; - } - else if ( 62 < len ) { - return ur_cue_meme; - } - - *out = ur_bsr64_any(bsr, len); - return ur_cue_good; -} - -#define STACK_ROOT 0 -#define STACK_HEAD 1 -#define STACK_TAIL 2 - -// stack frame for recording head vs tail iteration -// -// In Hoon, this structure would be as follows: -// -// $% [%root ~] -// [%head cursor=@] -// [%tail cursor=@ hed-ref=*] -// == -// -typedef struct _cue_frame_s { - uint8_t tag; - uint64_t bits; - ur_nref ref; -} _cue_frame_t; - -typedef struct _cue_stack_s { - uint32_t prev; - uint32_t size; - uint32_t fill; - _cue_frame_t* f; -} _cue_stack_t; - -static inline void -_cue_stack_push(_cue_stack_t *s, uint8_t tag, uint64_t bits, ur_nref ref) -{ - if ( s->fill == s->size ) { - uint32_t next = s->prev + s->size; - s->f = realloc(s->f, next * sizeof(*s->f)); - s->prev = s->size; - s->size = next; - } - - _cue_frame_t* f = &(s->f[s->fill++]); - f->tag = tag; - f->bits = bits; - f->ref = ref; } ur_cue_res_e ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out) { - ur_cue_res_e res; - ur_nref ref; - _cue_t c = {0}; - _cue_stack_t s = { .prev = 89, .size = 144, .fill = 0, .f = 0 }; - - // init bitstream-reader - // - c.bsr.left = len; - c.bsr.bytes = byt; - - // init dictionary - // - { - uint64_t fib11 = 89, fib12 = 144; - ur_dict64_grow(r, &c.dict, fib11, fib12); - } - - // setup stack - // - s.f = malloc(s.size * sizeof(*s.f)); - _cue_stack_push(&s, STACK_ROOT, 0, 0); - - // advance into buffer - // - advance: { - uint64_t bits = c.bsr.bits; - ur_cue_tag_e tag; - - if ( ur_cue_good != (res = ur_bsr_tag(&c.bsr, &tag)) ) { - goto perfect; - } - - switch ( tag ) { - default: assert(0); - - case ur_jam_atom: { - if ( ur_cue_good != (res = _cue_atom(r, &c, &ref)) ) { - goto perfect; - } - else { - ur_dict64_put(r, &c.dict, bits, (uint64_t)ref); - goto retreat; - } - } - - case ur_jam_back: { - uint64_t bak, val; - - if ( ur_cue_good != (res = _cue_back(&c.bsr, &bak)) ) { - goto perfect; - } - else if ( !ur_dict64_get(r, &c.dict, bak, &val) ) { - // XX distinguish bad backref? - // - res = ur_cue_gone; - goto perfect; - } - - ref = (ur_nref)val; - goto retreat; - } - - case ur_jam_cell: { - _cue_stack_push(&s, STACK_HEAD, bits, 0); - goto advance; - } - } - } - - // retreat down the stack - // - retreat: { - _cue_frame_t f = s.f[--s.fill]; - - switch ( f.tag ) { - default: assert(0); - - case STACK_ROOT: { - res = ur_cue_good; - goto perfect; - } - - case STACK_HEAD: { - _cue_stack_push(&s, STACK_TAIL, f.bits, ref); - goto advance; - } - - case STACK_TAIL: { - ref = ur_cons(r, f.ref, ref); - ur_dict64_put(r, &c.dict, f.bits, (uint64_t)ref); - goto retreat; - } - } - } - - // we done - // - perfect: { - ur_dict_free((ur_dict_t*)&c.dict); - free(s.f); - - if ( ur_cue_good == res ) { - *out = ref; - } - - return res; - } + return ur_cue_walk64(r, len, byt, (uint64_t*)out, _cue_coin, ur_cons); } From 63d36b4e754d5952dd1e8d015d0c07620d6482a6 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 25 Aug 2020 15:50:26 -0700 Subject: [PATCH 051/123] ur: fixes bug in ur_bsr_bytes_any pointer math, adds tests --- pkg/urbit/tests/ur_tests.c | 12 ++++++++++++ pkg/urbit/ur/bitstream.c | 7 ++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 66a145559f..4d9670a6cb 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -892,6 +892,12 @@ _bsr_cmp_any_check(const char* cap, uint8_t off, uint8_t len, ur_bsr_t *a, ur_bs ret = 0; } + if ( a->bytes != b->bytes ) { + fprintf(stderr, "%s: off %u len %u: bytes fail (%p, %p)\r\n", + cap, off, len, a->bytes, b->bytes); + ret = 0; + } + return ret; } @@ -1251,6 +1257,12 @@ _bsr_cmp_check(const char* cap, ret = 0; } + if ( a->bytes != b->bytes ) { + fprintf(stderr, "%s: off %u len %u: bytes fail (%p, %p)\r\n", + cap, off, len, a->bytes, b->bytes); + ret = 0; + } + return ret; } diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index 476cf40d89..eea2202543 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -400,10 +400,11 @@ ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) } } else { - uint8_t bits = off + len_bit; + uint8_t bits = off + len_bit; + uint64_t step = max + !!(bits >> 3); - bsr->bytes += max; - left -= max + !!(bits >> 3); + bsr->bytes += step; + left -= step; off = ur_mask_3(bits); if ( len_bit <= rest ) { From 177432f914bb89522c6dceadf12af2bf5800299f Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 25 Aug 2020 16:56:05 -0700 Subject: [PATCH 052/123] ur: adds 32-bit ur_cue traversal, ur_cue_test validator --- pkg/urbit/include/ur/serial.h | 55 +++++ pkg/urbit/tests/ur_tests.c | 20 +- pkg/urbit/ur/serial.c | 372 ++++++++++++++++++++++++++++------ 3 files changed, 373 insertions(+), 74 deletions(-) diff --git a/pkg/urbit/include/ur/serial.h b/pkg/urbit/include/ur/serial.h index 2a8d98b86e..0fe634108b 100644 --- a/pkg/urbit/include/ur/serial.h +++ b/pkg/urbit/include/ur/serial.h @@ -7,7 +7,62 @@ uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); +typedef uint32_t (*ur_coin32_f)(ur_root_t*, ur_bsr_t*, uint64_t); +typedef uint32_t (*ur_cons32_f)(ur_root_t*, uint32_t, uint32_t); + +typedef uint64_t (*ur_coin64_f)(ur_root_t*, ur_bsr_t*, uint64_t); +typedef uint64_t (*ur_cons64_f)(ur_root_t*, uint64_t, uint64_t); + +ur_cue_res_e +ur_cue_walk32_unsafe(ur_root_t *r, + ur_dict32_t *dict, + uint64_t len, + const uint8_t *byt, + uint32_t *out, + ur_coin32_f coin, + ur_cons32_f cons); + +ur_cue_res_e +ur_cue_walk32(ur_root_t *r, + uint64_t len, + const uint8_t *byt, + uint32_t *out, + ur_coin32_f coin, + ur_cons32_f cons); + +ur_cue_res_e +ur_cue_walk64_unsafe(ur_root_t *r, + ur_dict64_t *dict, + uint64_t len, + const uint8_t *byt, + uint64_t *out, + ur_coin64_f coin, + ur_cons64_f cons); + +ur_cue_res_e +ur_cue_walk64(ur_root_t *r, + uint64_t len, + const uint8_t *byt, + uint64_t *out, + ur_coin64_f coin, + ur_cons64_f cons); + +ur_cue_res_e +ur_cue_unsafe(ur_root_t *r, + ur_dict64_t *dict, + uint64_t len, + const uint8_t *byt, + ur_nref *out); + ur_cue_res_e ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out); +ur_bool_t +ur_cue_test_unsafe(ur_dict32_t *dict, + uint64_t len, + const uint8_t *byt); + +ur_bool_t +ur_cue_test(uint64_t len, const uint8_t *byt); + #endif diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 4d9670a6cb..b2d5b9b36e 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1475,18 +1475,24 @@ _test_cue_spec(const char *cap, size_t len, const uint8_t *res) { + int ret = 1; ur_nref out; - if ( ur_cue_good != ur_cue(r, len, res, &out) ) { + if ( !ur_cue_test(len, res) ) { fprintf(stderr, "\033[31mcue %s fail 1\033[0m\r\n", cap); - return 0; - } - else if ( ref != out ) { - fprintf(stderr, "\033[31mcue %s fail 2 ref=%" PRIu64 " out=%" PRIu64 " \033[0m\r\n", cap, ref, out); - return 0; + ret = 0; } - return 1; + if ( ur_cue_good != ur_cue(r, len, res, &out) ) { + fprintf(stderr, "\033[31mcue %s fail 2\033[0m\r\n", cap); + ret = 0; + } + else if ( ref != out ) { + fprintf(stderr, "\033[31mcue %s fail 3 ref=%" PRIu64 " out=%" PRIu64 " \033[0m\r\n", cap, ref, out); + ret = 0; + } + + return ret; } static int diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index 245eac50ca..c6cb02efc3 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -87,19 +87,32 @@ ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) return j.bsw.bits; } +#define CUE_HEAD32 0xffffffff #define CUE_HEAD64 0xffffffffffffffffULL /* ** stack frame for recording head vs tail iteration ** -** $? [CUE_HEAD64 bits=@] +** $? [CUE_HEAD_NN bits=@] ** [hed=* bits=@] */ +typedef struct _cue32_frame_s { + uint32_t ref; + uint64_t bits; +} _cue32_frame_t; + typedef struct _cue64_frame_s { uint64_t ref; uint64_t bits; } _cue64_frame_t; +typedef struct _cue32_stack_s { + uint32_t prev; + uint32_t size; + uint32_t fill; + _cue32_frame_t* f; +} _cue32_stack_t; + typedef struct _cue64_stack_s { uint32_t prev; uint32_t size; @@ -107,28 +120,13 @@ typedef struct _cue64_stack_s { _cue64_frame_t* f; } _cue64_stack_t; -static inline void -_cue64_stack_push(_cue64_stack_t *s, uint64_t ref, uint64_t bits) -{ - if ( s->fill == s->size ) { - uint32_t next = s->prev + s->size; - s->f = realloc(s->f, next * sizeof(*s->f)); - s->prev = s->size; - s->size = next; - } - - _cue64_frame_t* f = &(s->f[s->fill++]); - f->ref = ref; - f->bits = bits; -} - static inline ur_cue_res_e -_cue_walk64_advance(ur_root_t *r, - _cue64_stack_t *s, - ur_bsr_t *bsr, - ur_dict64_t *dict, - uint64_t (*coin)(ur_root_t*, ur_bsr_t*, uint64_t), - uint64_t *out) +_cue_walk32_next(ur_root_t *r, + _cue32_stack_t *s, + ur_bsr_t *bsr, + ur_dict32_t *dict, + ur_coin32_f coin, + uint32_t *out) { while ( 1 ) { uint64_t len, bits = bsr->bits; @@ -142,14 +140,24 @@ _cue_walk64_advance(ur_root_t *r, switch ( tag ) { default: assert(0); - case ur_jam_atom: { - if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { - return res; + case ur_jam_cell: { + // reallocate the stack if full + // + if ( s->fill == s->size ) { + uint32_t next = s->prev + s->size; + s->f = realloc(s->f, next * sizeof(*s->f)); + s->prev = s->size; + s->size = next; } - *out = coin(r, bsr, len); - ur_dict64_put(r, dict, bits, *out); - return ur_cue_good; + // save a head-frame and read the head from the stream + // + { + _cue32_frame_t* f = &(s->f[s->fill++]); + f->ref = CUE_HEAD32; + f->bits = bits; + } + continue; } case ur_jam_back: { @@ -162,41 +170,40 @@ _cue_walk64_advance(ur_root_t *r, else { uint64_t bak = ur_bsr64_any(bsr, len); - if ( !ur_dict64_get(r, dict, bak, out) ) { - // XX distinguish bad backref? - // - return ur_cue_gone; - } - - return ur_cue_good; + // XX distinguish bad backref? + // + return ur_dict32_get(r, dict, bak, out) + ? ur_cue_good + : ur_cue_gone; } } - case ur_jam_cell: { - _cue64_stack_push(s, CUE_HEAD64, bits); - continue; + case ur_jam_atom: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + + *out = coin(r, bsr, len); + ur_dict32_put(r, dict, bits, *out); + return ur_cue_good; } } } } ur_cue_res_e -ur_cue_walk64(ur_root_t *r, - uint64_t len, - const uint8_t *byt, - uint64_t *out, - uint64_t (*coin)(ur_root_t*, ur_bsr_t*, uint64_t), - uint64_t (*cons)(ur_root_t*, uint64_t, uint64_t)) +ur_cue_walk32_unsafe(ur_root_t *r, + ur_dict32_t *dict, + uint64_t len, + const uint8_t *byt, + uint32_t *out, + ur_coin32_f coin, + ur_cons32_f cons) { ur_cue_res_e res; - uint64_t ref; - ur_dict64_t dict = {0}; + uint32_t ref; ur_bsr_t bsr = {0}; - _cue64_stack_t s = {0}; - - // init dictionary - // - ur_dict64_grow(r, &dict, ur_fib11, ur_fib12); + _cue32_stack_t s = {0}; // init bitstream-reader // @@ -211,36 +218,200 @@ ur_cue_walk64(ur_root_t *r, // advance into stream // - res = _cue_walk64_advance(r, &s, &bsr, &dict, coin, &ref); + res = _cue_walk32_next(r, &s, &bsr, dict, coin, &ref); - // retreat down the stack + // process result // while ( s.fill && (ur_cue_good == res) ) { - _cue64_frame_t f = s.f[--s.fill]; - - // f is a head-frame + // peek at the top of the stack // - if ( CUE_HEAD64 == f.ref ) { - _cue64_stack_push(&s, ref, f.bits); - res = _cue_walk64_advance(r, &s, &bsr, &dict, coin, &ref); + _cue32_frame_t *f = &(s.f[s.fill - 1]); + + // f is a head-frame; stash result and read the tail from the stream + // + if ( CUE_HEAD32 == f->ref ) { + f->ref = ref; + res = _cue_walk32_next(r, &s, &bsr, dict, coin, &ref); } - // f is a tail-frame + // f is a tail-frame; pop the stack and continue // else { - ref = cons(r, f.ref, ref); - ur_dict64_put(r, &dict, f.bits, ref); + ref = cons(r, f->ref, ref); + ur_dict32_put(r, dict, f->bits, ref); + s.fill--; } } - // finalize result - // - ur_dict_free((ur_dict_t*)&dict); free(s.f); if ( ur_cue_good == res ) { *out = ref; } + return res; +} +ur_cue_res_e +ur_cue_walk32(ur_root_t *r, + uint64_t len, + const uint8_t *byt, + uint32_t *out, + ur_coin32_f coin, + ur_cons32_f cons) +{ + ur_dict32_t dict = {0}; + ur_dict32_grow(r, &dict, ur_fib11, ur_fib12); + ur_cue_res_e res = ur_cue_walk32_unsafe(r, &dict, len, + byt, out, coin, cons); + + ur_dict_free((ur_dict_t*)&dict); + return res; +} + +static inline ur_cue_res_e +_cue_walk64_next(ur_root_t *r, + _cue64_stack_t *s, + ur_bsr_t *bsr, + ur_dict64_t *dict, + ur_coin64_f coin, + uint64_t *out) +{ + while ( 1 ) { + uint64_t len, bits = bsr->bits; + ur_cue_tag_e tag; + ur_cue_res_e res; + + if ( ur_cue_good != (res = ur_bsr_tag(bsr, &tag)) ) { + return res; + } + + switch ( tag ) { + default: assert(0); + + case ur_jam_cell: { + // reallocate the stack if full + // + if ( s->fill == s->size ) { + uint32_t next = s->prev + s->size; + s->f = realloc(s->f, next * sizeof(*s->f)); + s->prev = s->size; + s->size = next; + } + + // save a head-frame and read the head from the stream + // + { + _cue64_frame_t* f = &(s->f[s->fill++]); + f->ref = CUE_HEAD64; + f->bits = bits; + } + continue; + } + + case ur_jam_back: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + else if ( 62 < len ) { + return ur_cue_meme; + } + else { + uint64_t bak = ur_bsr64_any(bsr, len); + + // XX distinguish bad backref? + // + return ur_dict64_get(r, dict, bak, out) + ? ur_cue_good + : ur_cue_gone; + } + } + + case ur_jam_atom: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + + *out = coin(r, bsr, len); + ur_dict64_put(r, dict, bits, *out); + return ur_cue_good; + } + } + } +} + +ur_cue_res_e +ur_cue_walk64_unsafe(ur_root_t *r, + ur_dict64_t *dict, + uint64_t len, + const uint8_t *byt, + uint64_t *out, + ur_coin64_f coin, + ur_cons64_f cons) +{ + ur_cue_res_e res; + uint64_t ref; + ur_bsr_t bsr = {0}; + _cue64_stack_t s = {0}; + + // init bitstream-reader + // + bsr.left = len; + bsr.bytes = byt; + + // setup stack + // + s.prev = ur_fib10; + s.size = ur_fib11; + s.f = malloc(s.size * sizeof(*s.f)); + + // advance into stream + // + res = _cue_walk64_next(r, &s, &bsr, dict, coin, &ref); + + // process result + // + while ( s.fill && (ur_cue_good == res) ) { + // peek at the top of the stack + // + _cue64_frame_t *f = &(s.f[s.fill - 1]); + + // f is a head-frame; stash result and read the tail from the stream + // + if ( CUE_HEAD64 == f->ref ) { + f->ref = ref; + res = _cue_walk64_next(r, &s, &bsr, dict, coin, &ref); + } + // f is a tail-frame; pop the stack and continue + // + else { + ref = cons(r, f->ref, ref); + ur_dict64_put(r, dict, f->bits, ref); + s.fill--; + } + } + + free(s.f); + + if ( ur_cue_good == res ) { + *out = ref; + } + return res; +} + +ur_cue_res_e +ur_cue_walk64(ur_root_t *r, + uint64_t len, + const uint8_t *byt, + uint64_t *out, + ur_coin64_f coin, + ur_cons64_f cons) +{ + ur_dict64_t dict = {0}; + ur_dict64_grow(r, &dict, ur_fib11, ur_fib12); + + ur_cue_res_e res = ur_cue_walk64_unsafe(r, &dict, len, + byt, out, coin, cons); + + ur_dict_free((ur_dict_t*)&dict); return res; } @@ -266,7 +437,74 @@ _cue_coin(ur_root_t *r, ur_bsr_t *bsr, uint64_t len) } ur_cue_res_e -ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out) +ur_cue_unsafe(ur_root_t *r, + ur_dict64_t *dict, + uint64_t len, + const uint8_t *byt, + ur_nref *out) { - return ur_cue_walk64(r, len, byt, (uint64_t*)out, _cue_coin, ur_cons); + return ur_cue_walk64_unsafe(r, dict, len, byt, + (uint64_t*)out, _cue_coin, ur_cons); +} + +ur_cue_res_e +ur_cue(ur_root_t *r, + uint64_t len, + const uint8_t *byt, + ur_nref *out) +{ + return ur_cue_walk64(r, len, byt, + (uint64_t*)out, _cue_coin, ur_cons); +} + +static inline uint32_t +_cue_test_coin(ur_root_t *r, ur_bsr_t *bsr, uint64_t len) +{ + // XX need a ur_bsr_skip() + // + { + uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); + uint8_t *byt = calloc(len_byt, 1); + ur_bsr_bytes_any(bsr, len, byt); + + free(byt); + } + + return 0; +} + +static inline uint32_t +_cue_test_cons(ur_root_t *r, uint32_t hed, uint32_t tal) +{ + return 0; +} + +ur_bool_t +ur_cue_test_unsafe(ur_dict32_t *dict, + uint64_t len, + const uint8_t *byt) +{ + // NB, the root argument is unused, as all dict keys are + // backreferences, limited to 62-bit direct atoms + // + ur_root_t *r = 0; + uint32_t out; + ur_cue_res_e res = ur_cue_walk32_unsafe(r, dict, len, + byt, &out, + _cue_test_coin, + _cue_test_cons); + return ur_cue_good == res; +} + +ur_bool_t +ur_cue_test(uint64_t len, const uint8_t *byt) +{ + ur_root_t *r = 0; + ur_dict32_t dict = {0}; + ur_dict32_grow(r, &dict, ur_fib11, ur_fib12); + + ur_bool_t ret = ur_cue_test_unsafe(&dict, len, byt); + + ur_dict_free((ur_dict_t*)&dict); + return ret; } From 88f58d3f017ae9fda59448a49a759cc1645f0b4e Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 26 Aug 2020 13:17:34 -0700 Subject: [PATCH 053/123] ur: fixes serial.h includes --- pkg/urbit/include/ur/serial.h | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/urbit/include/ur/serial.h b/pkg/urbit/include/ur/serial.h index 0fe634108b..904bcc3f38 100644 --- a/pkg/urbit/include/ur/serial.h +++ b/pkg/urbit/include/ur/serial.h @@ -2,6 +2,7 @@ #define UR_SERIAL_H #include +#include #include uint64_t From 0420bf9aa52d08847749d3709b5706e8746f2ad5 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 26 Aug 2020 13:20:16 -0700 Subject: [PATCH 054/123] ur: adds additional fibonacci constants --- pkg/urbit/include/ur/defs.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/urbit/include/ur/defs.h b/pkg/urbit/include/ur/defs.h index c20f379502..605193b1d3 100644 --- a/pkg/urbit/include/ur/defs.h +++ b/pkg/urbit/include/ur/defs.h @@ -8,6 +8,8 @@ #define ur_fib10 55 #define ur_fib11 89 #define ur_fib12 144 +#define ur_fib33 3524578 +#define ur_fib34 5702887 typedef uint8_t ur_bool_t; From 6f93ec3aa5027ecb096e552e2c834cfccb61b536 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 25 Aug 2020 17:55:49 -0700 Subject: [PATCH 055/123] ur: implements ur_dict_get/put, cleans up ur_dict*_t handling --- pkg/urbit/include/ur/hashcons.h | 24 ++++++++--- pkg/urbit/ur/hashcons.c | 70 +++++++++++++++++++++++++++++---- 2 files changed, 82 insertions(+), 12 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 73601c174f..9cfd9d2da2 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -11,6 +11,8 @@ #define ur_nref_tag(ref) ( ref >> 62 ) #define ur_nref_idx(ref) ur_mask_62(ref) +#define ur_pail_max 10 + typedef uint32_t ur_mug; typedef uint64_t ur_nref; typedef enum { @@ -26,8 +28,8 @@ typedef struct ur_nvec_s { typedef struct ur_pail32_s { uint8_t fill; - ur_nref refs[10]; - uint32_t vals[10]; + ur_nref refs[ur_pail_max]; + uint32_t vals[ur_pail_max]; } ur_pail32_t; typedef struct ur_dict32_s { @@ -38,8 +40,8 @@ typedef struct ur_dict32_s { typedef struct ur_pail64_s { uint8_t fill; - ur_nref refs[10]; - uint64_t vals[10]; + ur_nref refs[ur_pail_max]; + uint64_t vals[ur_pail_max]; } ur_pail64_t; typedef struct ur_dict64_s { @@ -50,7 +52,7 @@ typedef struct ur_dict64_s { typedef struct ur_pail_s { uint8_t fill; - ur_nref refs[10]; + ur_nref refs[ur_pail_max]; } ur_pail_t; typedef struct ur_dict_s { @@ -99,9 +101,21 @@ ur_dict32_put(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t val); void ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size); +void +ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size); + +ur_bool_t +ur_dict_get(ur_root_t *r, ur_dict_t *dict, ur_nref ref); + +void +ur_dict_put(ur_root_t *r, ur_dict_t *dict, ur_nref ref); + void ur_dict_free(ur_dict_t *dict); +void +ur_dict_wipe(ur_dict_t *dict); + ur_bool_t ur_dict64_get(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t *out); diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index a10579c7cb..7ad97c3a9a 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -128,7 +128,7 @@ ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size) ur_pail32_t *bucket = &(buckets[idx]); uint8_t new_fill = bucket->fill; - if ( 10 == new_fill ) { + if ( ur_pail_max == new_fill ) { free(buckets); return ur_dict32_grow(r, dict, size, next); } @@ -182,7 +182,7 @@ ur_dict32_put(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t val) } } - if ( 10 == fill ) { + if ( ur_pail_max == fill ) { ur_dict32_grow(r, dict, dict->prev, dict->size); continue; } @@ -216,7 +216,7 @@ ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size) ur_pail64_t *bucket = &(buckets[idx]); uint8_t new_fill = bucket->fill; - if ( 10 == new_fill ) { + if ( ur_pail_max == new_fill ) { free(buckets); return ur_dict64_grow(r, dict, size, next); } @@ -270,7 +270,7 @@ ur_dict64_put(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t val) } } - if ( 10 == fill ) { + if ( ur_pail_max == fill ) { ur_dict64_grow(r, dict, dict->prev, dict->size); continue; } @@ -303,7 +303,7 @@ ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size) ur_pail_t *bucket = &(buckets[idx]); uint8_t new_fill = bucket->fill; - if ( 10 == new_fill ) { + if ( ur_pail_max == new_fill ) { free(buckets); return ur_dict_grow(r, dict, size, next); } @@ -320,12 +320,68 @@ ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size) dict->buckets = buckets; } +ur_bool_t +ur_dict_get(ur_root_t *r, ur_dict_t *dict, ur_nref ref) +{ + ur_mug mug = ur_nref_mug(r, ref); + uint64_t idx = ( mug % dict->size ); + + ur_pail_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = bucket->fill; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + return 1; + } + } + + return 0; +} + +void +ur_dict_put(ur_root_t *r, ur_dict_t *dict, ur_nref ref) +{ + ur_mug mug = ur_nref_mug(r, ref); + + while ( 1 ) { + uint64_t idx = ( mug % dict->size ); + ur_pail_t *bucket = &(dict->buckets[idx]); + uint8_t i, fill = bucket->fill; + + for ( i = 0; i < fill; i++ ) { + if ( ref == bucket->refs[i] ) { + return; + } + } + + if ( ur_pail_max == fill ) { + ur_dict_grow(r, dict, dict->prev, dict->size); + continue; + } + + bucket->refs[fill] = ref; + bucket->fill = 1 + fill; + break; + } +} + void ur_dict_free(ur_dict_t *dict) { free(dict->buckets); } +void +ur_dict_wipe(ur_dict_t *dict) +{ + ur_pail_t *buckets = dict->buckets; + uint64_t i, size = dict->size; + + for ( i = 0; i < size; i++ ) { + buckets[i].fill = 0; + } +} + void ur_atoms_grow(ur_atoms_t *atoms) { @@ -538,7 +594,7 @@ ur_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len) } } - if ( 10 == b_fill ) { + if ( ur_pail_max == b_fill ) { ur_dict_grow(r, dict, dict->prev, dict->size); continue; } @@ -633,7 +689,7 @@ ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal) } } - if ( 10 == b_fill ) { + if ( ur_pail_max == b_fill ) { ur_dict_grow(r, dict, dict->prev, dict->size); continue; } From 43bd092476c7cc4ea47c5ab8fba51bc7b1e6daad Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 26 Aug 2020 15:11:41 -0700 Subject: [PATCH 056/123] ur: reimplements ur_cue_test() directly --- pkg/urbit/include/ur/serial.h | 4 +- pkg/urbit/ur/serial.c | 167 +++++++++++++++++++++++++++------- 2 files changed, 135 insertions(+), 36 deletions(-) diff --git a/pkg/urbit/include/ur/serial.h b/pkg/urbit/include/ur/serial.h index 904bcc3f38..68ad17750d 100644 --- a/pkg/urbit/include/ur/serial.h +++ b/pkg/urbit/include/ur/serial.h @@ -58,8 +58,8 @@ ur_cue_unsafe(ur_root_t *r, ur_cue_res_e ur_cue(ur_root_t *r, uint64_t len, const uint8_t *byt, ur_nref *out); -ur_bool_t -ur_cue_test_unsafe(ur_dict32_t *dict, +ur_cue_res_e +ur_cue_test_unsafe(ur_dict_t *dict, uint64_t len, const uint8_t *byt); diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index c6cb02efc3..3453449e19 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -457,54 +457,153 @@ ur_cue(ur_root_t *r, (uint64_t*)out, _cue_coin, ur_cons); } -static inline uint32_t -_cue_test_coin(ur_root_t *r, ur_bsr_t *bsr, uint64_t len) -{ - // XX need a ur_bsr_skip() - // - { - uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); - uint8_t *byt = calloc(len_byt, 1); - ur_bsr_bytes_any(bsr, len, byt); +typedef struct _cue_test_frame_s { + ur_bool_t tal; + uint64_t bits; +} _cue_test_frame_t; - free(byt); +typedef struct _cue_test_stack_s { + uint32_t prev; + uint32_t size; + uint32_t fill; + _cue_test_frame_t* f; +} _cue_test_stack_t; + +static inline ur_cue_res_e +_cue_test_next(_cue_test_stack_t *s, + ur_bsr_t *bsr, + ur_dict_t *dict) +{ + while ( 1 ) { + uint64_t len, bits = bsr->bits; + ur_cue_tag_e tag; + ur_cue_res_e res; + + if ( ur_cue_good != (res = ur_bsr_tag(bsr, &tag)) ) { + return res; + } + + switch ( tag ) { + default: assert(0); + + case ur_jam_cell: { + // reallocate the stack if full + // + if ( s->fill == s->size ) { + uint32_t next = s->prev + s->size; + s->f = realloc(s->f, next * sizeof(*s->f)); + s->prev = s->size; + s->size = next; + } + + // save a head-frame and read the head from the stream + // + { + _cue_test_frame_t* f = &(s->f[s->fill++]); + f->tal = 0; + f->bits = bits; + } + continue; + } + + case ur_jam_back: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + else if ( 62 < len ) { + return ur_cue_meme; + } + else { + uint64_t bak = ur_bsr64_any(bsr, len); + + // XX distinguish bad backref? + // + return ur_dict_get((ur_root_t*)0, dict, bak) + ? ur_cue_good + : ur_cue_gone; + } + } + + case ur_jam_atom: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + + // XX need a ur_bsr_skip() + // + { + uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); + uint8_t *byt = calloc(len_byt, 1); + ur_bsr_bytes_any(bsr, len, byt); + + free(byt); + } + + ur_dict_put((ur_root_t*)0, dict, bits); + return ur_cue_good; + } + } } - - return 0; } -static inline uint32_t -_cue_test_cons(ur_root_t *r, uint32_t hed, uint32_t tal) -{ - return 0; -} - -ur_bool_t -ur_cue_test_unsafe(ur_dict32_t *dict, +ur_cue_res_e +ur_cue_test_unsafe(ur_dict_t *dict, uint64_t len, const uint8_t *byt) { - // NB, the root argument is unused, as all dict keys are - // backreferences, limited to 62-bit direct atoms + ur_bsr_t bsr = {0}; + _cue_test_stack_t s = {0}; + ur_cue_res_e res; + + // init bitstream-reader // - ur_root_t *r = 0; - uint32_t out; - ur_cue_res_e res = ur_cue_walk32_unsafe(r, dict, len, - byt, &out, - _cue_test_coin, - _cue_test_cons); - return ur_cue_good == res; + bsr.left = len; + bsr.bytes = byt; + + // setup stack + // + s.prev = ur_fib10; + s.size = ur_fib11; + s.f = malloc(s.size * sizeof(*s.f)); + + // advance into stream + // + res = _cue_test_next(&s, &bsr, dict); + + // process result + // + while ( s.fill && (ur_cue_good == res) ) { + // peek at the top of the stack + // + _cue_test_frame_t *f = &(s.f[s.fill - 1]); + + // f is a head-frame; stash result and read the tail from the stream + // + if ( !f->tal ) { + f->tal = 1; + res = _cue_test_next(&s, &bsr, dict); + } + // f is a tail-frame; pop the stack and continue + // + else { + ur_dict_put((ur_root_t*)0, dict, f->bits); + s.fill--; + } + } + + free(s.f); + + return res; } ur_bool_t ur_cue_test(uint64_t len, const uint8_t *byt) { - ur_root_t *r = 0; - ur_dict32_t dict = {0}; - ur_dict32_grow(r, &dict, ur_fib11, ur_fib12); + ur_dict_t dict = {0}; + ur_dict_grow((ur_root_t*)0, &dict, ur_fib11, ur_fib12); - ur_bool_t ret = ur_cue_test_unsafe(&dict, len, byt); + ur_bool_t ret = ur_cue_good == ur_cue_test_unsafe(&dict, len, byt); - ur_dict_free((ur_dict_t*)&dict); + ur_dict_free(&dict); return ret; } From e964e4a1fb0c2c8239bc5ddc9202466065ce0e1a Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 26 Aug 2020 16:12:03 -0700 Subject: [PATCH 057/123] ur: reimplements ur_cue/_unsafe directly --- pkg/urbit/ur/serial.c | 154 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 136 insertions(+), 18 deletions(-) diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index 3453449e19..398f0082a7 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -415,24 +415,91 @@ ur_cue_walk64(ur_root_t *r, return res; } -static inline uint64_t -_cue_coin(ur_root_t *r, ur_bsr_t *bsr, uint64_t len) +static inline ur_cue_res_e +_cue_next(ur_root_t *r, + _cue64_stack_t *s, + ur_bsr_t *bsr, + ur_dict64_t *dict, + ur_nref *out) { - if ( 62 >= len ) { - return ur_bsr64_any(bsr, len); - } - else { - uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); - uint8_t *byt = calloc(len_byt, 1); - ur_bsr_bytes_any(bsr, len, byt); + while ( 1 ) { + uint64_t len, bits = bsr->bits; + ur_cue_tag_e tag; + ur_cue_res_e res; - // strip trailing zeroes - // - while ( len_byt && !byt[len_byt - 1] ) { - len_byt--; + if ( ur_cue_good != (res = ur_bsr_tag(bsr, &tag)) ) { + return res; } - return (uint64_t)ur_coin_bytes_unsafe(r, byt, len_byt); + switch ( tag ) { + default: assert(0); + + case ur_jam_cell: { + // reallocate the stack if full + // + if ( s->fill == s->size ) { + uint32_t next = s->prev + s->size; + s->f = realloc(s->f, next * sizeof(*s->f)); + s->prev = s->size; + s->size = next; + } + + // save a head-frame and read the head from the stream + // + { + _cue64_frame_t* f = &(s->f[s->fill++]); + f->ref = CUE_HEAD64; + f->bits = bits; + } + continue; + } + + case ur_jam_back: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + else if ( 62 < len ) { + return ur_cue_meme; + } + else { + uint64_t val, bak = ur_bsr64_any(bsr, len); + + // XX distinguish bad backref? + // + if ( !ur_dict64_get(r, dict, bak, &val) ) { + return ur_cue_gone; + } + + *out = (ur_nref)val; + return ur_cue_good; + } + } + + case ur_jam_atom: { + if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { + return res; + } + else if ( 62 >= len ) { + *out = (ur_nref)ur_bsr64_any(bsr, len); + } + else { + uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); + uint8_t *byt = calloc(len_byt, 1); + ur_bsr_bytes_any(bsr, len, byt); + + // strip trailing zeroes + // + while ( len_byt && !byt[len_byt - 1] ) { + len_byt--; + } + + *out = ur_coin_bytes_unsafe(r, byt, len_byt); + } + + ur_dict64_put(r, dict, bits, (uint64_t)*out); + return ur_cue_good; + } + } } } @@ -443,8 +510,54 @@ ur_cue_unsafe(ur_root_t *r, const uint8_t *byt, ur_nref *out) { - return ur_cue_walk64_unsafe(r, dict, len, byt, - (uint64_t*)out, _cue_coin, ur_cons); + ur_bsr_t bsr = {0}; + _cue64_stack_t s = {0}; + ur_cue_res_e res; + ur_nref ref; + + // init bitstream-reader + // + bsr.left = len; + bsr.bytes = byt; + + // setup stack + // + s.prev = ur_fib10; + s.size = ur_fib11; + s.f = malloc(s.size * sizeof(*s.f)); + + // advance into stream + // + res = _cue_next(r, &s, &bsr, dict, &ref); + + // process result + // + while ( s.fill && (ur_cue_good == res) ) { + // peek at the top of the stack + // + _cue64_frame_t *f = &(s.f[s.fill - 1]); + + // f is a head-frame; stash result and read the tail from the stream + // + if ( CUE_HEAD64 == f->ref ) { + f->ref = ref; + res = _cue_next(r, &s, &bsr, dict, &ref); + } + // f is a tail-frame; pop the stack and continue + // + else { + ref = ur_cons(r, f->ref, ref); + ur_dict64_put(r, dict, f->bits, (uint64_t)ref); + s.fill--; + } + } + + free(s.f); + + if ( ur_cue_good == res ) { + *out = ref; + } + return res; } ur_cue_res_e @@ -453,8 +566,13 @@ ur_cue(ur_root_t *r, const uint8_t *byt, ur_nref *out) { - return ur_cue_walk64(r, len, byt, - (uint64_t*)out, _cue_coin, ur_cons); + ur_dict64_t dict = {0}; + ur_dict64_grow(r, &dict, ur_fib11, ur_fib12); + + ur_cue_res_e res = ur_cue_unsafe(r, &dict, len, byt, out); + + ur_dict_free((ur_dict_t*)&dict); + return res; } typedef struct _cue_test_frame_s { From 1da0c6d3531fd7cb2f3251723f9708775f41254d Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 26 Aug 2020 16:18:18 -0700 Subject: [PATCH 058/123] ur: removes higher-ordered cue traversals --- pkg/urbit/include/ur/serial.h | 40 ---- pkg/urbit/ur/serial.c | 348 +++------------------------------- 2 files changed, 24 insertions(+), 364 deletions(-) diff --git a/pkg/urbit/include/ur/serial.h b/pkg/urbit/include/ur/serial.h index 68ad17750d..afac9e6a0d 100644 --- a/pkg/urbit/include/ur/serial.h +++ b/pkg/urbit/include/ur/serial.h @@ -8,46 +8,6 @@ uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); -typedef uint32_t (*ur_coin32_f)(ur_root_t*, ur_bsr_t*, uint64_t); -typedef uint32_t (*ur_cons32_f)(ur_root_t*, uint32_t, uint32_t); - -typedef uint64_t (*ur_coin64_f)(ur_root_t*, ur_bsr_t*, uint64_t); -typedef uint64_t (*ur_cons64_f)(ur_root_t*, uint64_t, uint64_t); - -ur_cue_res_e -ur_cue_walk32_unsafe(ur_root_t *r, - ur_dict32_t *dict, - uint64_t len, - const uint8_t *byt, - uint32_t *out, - ur_coin32_f coin, - ur_cons32_f cons); - -ur_cue_res_e -ur_cue_walk32(ur_root_t *r, - uint64_t len, - const uint8_t *byt, - uint32_t *out, - ur_coin32_f coin, - ur_cons32_f cons); - -ur_cue_res_e -ur_cue_walk64_unsafe(ur_root_t *r, - ur_dict64_t *dict, - uint64_t len, - const uint8_t *byt, - uint64_t *out, - ur_coin64_f coin, - ur_cons64_f cons); - -ur_cue_res_e -ur_cue_walk64(ur_root_t *r, - uint64_t len, - const uint8_t *byt, - uint64_t *out, - ur_coin64_f coin, - ur_cons64_f cons); - ur_cue_res_e ur_cue_unsafe(ur_root_t *r, ur_dict64_t *dict, diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index 398f0082a7..b32973a6ff 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -87,337 +87,30 @@ ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) return j.bsw.bits; } -#define CUE_HEAD32 0xffffffff -#define CUE_HEAD64 0xffffffffffffffffULL - /* ** stack frame for recording head vs tail iteration ** -** $? [CUE_HEAD_NN bits=@] +** $? [CUE_HEAD bits=@] ** [hed=* bits=@] */ -typedef struct _cue32_frame_s { - uint32_t ref; - uint64_t bits; -} _cue32_frame_t; -typedef struct _cue64_frame_s { +#define CUE_HEAD 0xffffffffffffffffULL + +typedef struct _cue_frame_s { uint64_t ref; uint64_t bits; -} _cue64_frame_t; +} _cue_frame_t; -typedef struct _cue32_stack_s { - uint32_t prev; - uint32_t size; - uint32_t fill; - _cue32_frame_t* f; -} _cue32_stack_t; - -typedef struct _cue64_stack_s { - uint32_t prev; - uint32_t size; - uint32_t fill; - _cue64_frame_t* f; -} _cue64_stack_t; - -static inline ur_cue_res_e -_cue_walk32_next(ur_root_t *r, - _cue32_stack_t *s, - ur_bsr_t *bsr, - ur_dict32_t *dict, - ur_coin32_f coin, - uint32_t *out) -{ - while ( 1 ) { - uint64_t len, bits = bsr->bits; - ur_cue_tag_e tag; - ur_cue_res_e res; - - if ( ur_cue_good != (res = ur_bsr_tag(bsr, &tag)) ) { - return res; - } - - switch ( tag ) { - default: assert(0); - - case ur_jam_cell: { - // reallocate the stack if full - // - if ( s->fill == s->size ) { - uint32_t next = s->prev + s->size; - s->f = realloc(s->f, next * sizeof(*s->f)); - s->prev = s->size; - s->size = next; - } - - // save a head-frame and read the head from the stream - // - { - _cue32_frame_t* f = &(s->f[s->fill++]); - f->ref = CUE_HEAD32; - f->bits = bits; - } - continue; - } - - case ur_jam_back: { - if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { - return res; - } - else if ( 62 < len ) { - return ur_cue_meme; - } - else { - uint64_t bak = ur_bsr64_any(bsr, len); - - // XX distinguish bad backref? - // - return ur_dict32_get(r, dict, bak, out) - ? ur_cue_good - : ur_cue_gone; - } - } - - case ur_jam_atom: { - if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { - return res; - } - - *out = coin(r, bsr, len); - ur_dict32_put(r, dict, bits, *out); - return ur_cue_good; - } - } - } -} - -ur_cue_res_e -ur_cue_walk32_unsafe(ur_root_t *r, - ur_dict32_t *dict, - uint64_t len, - const uint8_t *byt, - uint32_t *out, - ur_coin32_f coin, - ur_cons32_f cons) -{ - ur_cue_res_e res; - uint32_t ref; - ur_bsr_t bsr = {0}; - _cue32_stack_t s = {0}; - - // init bitstream-reader - // - bsr.left = len; - bsr.bytes = byt; - - // setup stack - // - s.prev = ur_fib10; - s.size = ur_fib11; - s.f = malloc(s.size * sizeof(*s.f)); - - // advance into stream - // - res = _cue_walk32_next(r, &s, &bsr, dict, coin, &ref); - - // process result - // - while ( s.fill && (ur_cue_good == res) ) { - // peek at the top of the stack - // - _cue32_frame_t *f = &(s.f[s.fill - 1]); - - // f is a head-frame; stash result and read the tail from the stream - // - if ( CUE_HEAD32 == f->ref ) { - f->ref = ref; - res = _cue_walk32_next(r, &s, &bsr, dict, coin, &ref); - } - // f is a tail-frame; pop the stack and continue - // - else { - ref = cons(r, f->ref, ref); - ur_dict32_put(r, dict, f->bits, ref); - s.fill--; - } - } - - free(s.f); - - if ( ur_cue_good == res ) { - *out = ref; - } - return res; -} - -ur_cue_res_e -ur_cue_walk32(ur_root_t *r, - uint64_t len, - const uint8_t *byt, - uint32_t *out, - ur_coin32_f coin, - ur_cons32_f cons) -{ - ur_dict32_t dict = {0}; - ur_dict32_grow(r, &dict, ur_fib11, ur_fib12); - ur_cue_res_e res = ur_cue_walk32_unsafe(r, &dict, len, - byt, out, coin, cons); - - ur_dict_free((ur_dict_t*)&dict); - return res; -} - -static inline ur_cue_res_e -_cue_walk64_next(ur_root_t *r, - _cue64_stack_t *s, - ur_bsr_t *bsr, - ur_dict64_t *dict, - ur_coin64_f coin, - uint64_t *out) -{ - while ( 1 ) { - uint64_t len, bits = bsr->bits; - ur_cue_tag_e tag; - ur_cue_res_e res; - - if ( ur_cue_good != (res = ur_bsr_tag(bsr, &tag)) ) { - return res; - } - - switch ( tag ) { - default: assert(0); - - case ur_jam_cell: { - // reallocate the stack if full - // - if ( s->fill == s->size ) { - uint32_t next = s->prev + s->size; - s->f = realloc(s->f, next * sizeof(*s->f)); - s->prev = s->size; - s->size = next; - } - - // save a head-frame and read the head from the stream - // - { - _cue64_frame_t* f = &(s->f[s->fill++]); - f->ref = CUE_HEAD64; - f->bits = bits; - } - continue; - } - - case ur_jam_back: { - if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { - return res; - } - else if ( 62 < len ) { - return ur_cue_meme; - } - else { - uint64_t bak = ur_bsr64_any(bsr, len); - - // XX distinguish bad backref? - // - return ur_dict64_get(r, dict, bak, out) - ? ur_cue_good - : ur_cue_gone; - } - } - - case ur_jam_atom: { - if ( ur_cue_good != (res = ur_bsr_rub_len(bsr, &len)) ) { - return res; - } - - *out = coin(r, bsr, len); - ur_dict64_put(r, dict, bits, *out); - return ur_cue_good; - } - } - } -} - -ur_cue_res_e -ur_cue_walk64_unsafe(ur_root_t *r, - ur_dict64_t *dict, - uint64_t len, - const uint8_t *byt, - uint64_t *out, - ur_coin64_f coin, - ur_cons64_f cons) -{ - ur_cue_res_e res; - uint64_t ref; - ur_bsr_t bsr = {0}; - _cue64_stack_t s = {0}; - - // init bitstream-reader - // - bsr.left = len; - bsr.bytes = byt; - - // setup stack - // - s.prev = ur_fib10; - s.size = ur_fib11; - s.f = malloc(s.size * sizeof(*s.f)); - - // advance into stream - // - res = _cue_walk64_next(r, &s, &bsr, dict, coin, &ref); - - // process result - // - while ( s.fill && (ur_cue_good == res) ) { - // peek at the top of the stack - // - _cue64_frame_t *f = &(s.f[s.fill - 1]); - - // f is a head-frame; stash result and read the tail from the stream - // - if ( CUE_HEAD64 == f->ref ) { - f->ref = ref; - res = _cue_walk64_next(r, &s, &bsr, dict, coin, &ref); - } - // f is a tail-frame; pop the stack and continue - // - else { - ref = cons(r, f->ref, ref); - ur_dict64_put(r, dict, f->bits, ref); - s.fill--; - } - } - - free(s.f); - - if ( ur_cue_good == res ) { - *out = ref; - } - return res; -} - -ur_cue_res_e -ur_cue_walk64(ur_root_t *r, - uint64_t len, - const uint8_t *byt, - uint64_t *out, - ur_coin64_f coin, - ur_cons64_f cons) -{ - ur_dict64_t dict = {0}; - ur_dict64_grow(r, &dict, ur_fib11, ur_fib12); - - ur_cue_res_e res = ur_cue_walk64_unsafe(r, &dict, len, - byt, out, coin, cons); - - ur_dict_free((ur_dict_t*)&dict); - return res; -} +typedef struct _cue_stack_s { + uint32_t prev; + uint32_t size; + uint32_t fill; + _cue_frame_t* f; +} _cue_stack_t; static inline ur_cue_res_e _cue_next(ur_root_t *r, - _cue64_stack_t *s, + _cue_stack_t *s, ur_bsr_t *bsr, ur_dict64_t *dict, ur_nref *out) @@ -447,8 +140,8 @@ _cue_next(ur_root_t *r, // save a head-frame and read the head from the stream // { - _cue64_frame_t* f = &(s->f[s->fill++]); - f->ref = CUE_HEAD64; + _cue_frame_t* f = &(s->f[s->fill++]); + f->ref = CUE_HEAD; f->bits = bits; } continue; @@ -511,7 +204,7 @@ ur_cue_unsafe(ur_root_t *r, ur_nref *out) { ur_bsr_t bsr = {0}; - _cue64_stack_t s = {0}; + _cue_stack_t s = {0}; ur_cue_res_e res; ur_nref ref; @@ -535,11 +228,11 @@ ur_cue_unsafe(ur_root_t *r, while ( s.fill && (ur_cue_good == res) ) { // peek at the top of the stack // - _cue64_frame_t *f = &(s.f[s.fill - 1]); + _cue_frame_t *f = &(s.f[s.fill - 1]); // f is a head-frame; stash result and read the tail from the stream // - if ( CUE_HEAD64 == f->ref ) { + if ( CUE_HEAD == f->ref ) { f->ref = ref; res = _cue_next(r, &s, &bsr, dict, &ref); } @@ -575,6 +268,13 @@ ur_cue(ur_root_t *r, return res; } +/* +** stack frame for recording head vs tail iteration +** +** [hed=? bits=@] +** +*/ + typedef struct _cue_test_frame_s { ur_bool_t tal; uint64_t bits; From 11e7afdefcd05301a5b296c177c6278c8803b613 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 26 Aug 2020 13:17:18 -0700 Subject: [PATCH 059/123] u3: adds u3s_cue_xeno() --- pkg/urbit/include/noun/serial.h | 5 + pkg/urbit/noun/serial.c | 206 ++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) diff --git a/pkg/urbit/include/noun/serial.h b/pkg/urbit/include/noun/serial.h index c81e95d6db..2e07378a4e 100644 --- a/pkg/urbit/include/noun/serial.h +++ b/pkg/urbit/include/noun/serial.h @@ -34,3 +34,8 @@ */ u3_noun u3s_cue(u3_atom a); + + /* u3s_cue_xeno(): cue onto the loom, bookkeeping off the loom. + */ + u3_noun + u3s_cue_xeno(c3_d len_d, const c3_y* byt_y); diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index 9accdf733d..589fd0bfc5 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -6,6 +6,7 @@ #include #include "all.h" +#include "ur/ur.h" /* _cs_met0_w(): safe bitwidth for any c3_w */ @@ -726,3 +727,208 @@ u3s_cue(u3_atom a) return pro; } + +/* +** stack frame for recording head vs tail iteration +** +** $? [u3_none bits=@] +** [hed=* bits=@] +*/ +typedef struct _cue_frame_s { + u3_weak ref; + c3_d bit_d; +} _cue_frame_t; + +typedef struct _cue_stack_s { + c3_w pre_w; + c3_w siz_w; + c3_w fil_w; + _cue_frame_t* fam_u; +} _cue_stack_t; + +/* _cs_cue_xeno_next(): read next value from bitstream, bookkeeping off-loom. +*/ +static inline ur_cue_res_e +_cs_cue_xeno_next(_cue_stack_t* tac_u, + ur_bsr_t* red_u, + ur_dict32_t* dic_u, + u3_noun* out) +{ + ur_root_t* rot_u = 0; + + while ( 1 ) { + c3_d len_d, bit_d = red_u->bits; + ur_cue_tag_e tag_e; + ur_cue_res_e res_e; + + if ( ur_cue_good != (res_e = ur_bsr_tag(red_u, &tag_e)) ) { + return res_e; + } + + switch ( tag_e ) { + default: c3_assert(0); + + case ur_jam_cell: { + // reallocate the stack if full + // + if ( tac_u->fil_w == tac_u->siz_w ) { + c3_w nex_w = tac_u->pre_w + tac_u->siz_w; + tac_u->fam_u = c3_realloc(tac_u->fam_u, nex_w * sizeof(*tac_u->fam_u)); + tac_u->pre_w = tac_u->siz_w; + tac_u->siz_w = nex_w; + } + + // save a head-frame and read the head from the stream + // + { + _cue_frame_t* fam_u = &(tac_u->fam_u[tac_u->fil_w++]); + fam_u->ref = u3_none; + fam_u->bit_d = bit_d; + } + continue; + } + + case ur_jam_back: { + if ( ur_cue_good != (res_e = ur_bsr_rub_len(red_u, &len_d)) ) { + return res_e; + } + else if ( 62 < len_d ) { + return ur_cue_meme; + } + else { + c3_d bak_d = ur_bsr64_any(red_u, len_d); + c3_w bak_w; + + if ( !ur_dict32_get(rot_u, dic_u, bak_d, &bak_w) ) { + // XX distinguish bad backref? + // + return ur_cue_gone; + } + else { + *out = u3k((u3_noun)bak_w); + return ur_cue_good; + } + } + } + + case ur_jam_atom: { + if ( ur_cue_good != (res_e = ur_bsr_rub_len(red_u, &len_d)) ) { + return res_e; + } + + if ( 31 >= len_d ) { + *out = (u3_noun)ur_bsr32_any(red_u, len_d); + } + // XX need a ur_bsr_words_any() + // + else { + c3_d byt_d = (len_d >> 3) + !!ur_mask_3(len_d); + c3_y* byt_y; + + // XX check that byt_d fits in a c3_w; + + byt_y = c3_calloc(byt_d); + ur_bsr_bytes_any(red_u, len_d, byt_y); + + *out = u3i_bytes(byt_d, byt_y); + + c3_free(byt_y); + } + + ur_dict32_put(rot_u, dic_u, bit_d, *out); + return ur_cue_good; + } + } + } +} + +/* _cs_cue_xeno_unsafe(): cue onto the loom, all bookkeeping off-loom. +*/ +static ur_cue_res_e +_cs_cue_xeno_unsafe(ur_dict32_t* dic_u, + c3_d len_d, + const c3_y* byt_y, + u3_noun* out) +{ + ur_bsr_t red_u = {0}; + _cue_stack_t tac_u = {0}; + ur_cue_res_e res_e; + u3_noun ref; + + // init bitstream-reader + // + red_u.left = len_d; + red_u.bytes = byt_y; + + // setup stack + // + tac_u.pre_w = ur_fib10; + tac_u.siz_w = ur_fib11; + tac_u.fam_u = c3_malloc(tac_u.siz_w * sizeof(*tac_u.fam_u)); + + // advance into stream + // + res_e = _cs_cue_xeno_next(&tac_u, &red_u, dic_u, &ref); + + // process result + // + while ( tac_u.fil_w && (ur_cue_good == res_e) ) { + // peek at the top of the stack + // + _cue_frame_t* fam_u = &(tac_u.fam_u[tac_u.fil_w - 1]); + + // f is a head-frame; stash result and read the tail from the stream + // + if ( u3_none == fam_u->ref ) { + fam_u->ref = ref; + res_e = _cs_cue_xeno_next(&tac_u, &red_u, dic_u, &ref); + } + // f is a tail-frame; pop the stack and continue + // + else { + ur_root_t* rot_u = 0; + + ref = u3nc(fam_u->ref, ref); + ur_dict32_put(rot_u, dic_u, fam_u->bit_d, ref); + tac_u.fil_w--; + } + } + + c3_free(tac_u.fam_u); + + if ( ur_cue_good == res_e ) { + *out = ref; + } + return res_e; +} + +/* u3s_cue_xeno(): cue onto the loom, bookkeeping off the loom. +*/ +u3_noun +u3s_cue_xeno(c3_d len_d, const c3_y* byt_y) +{ + ur_dict32_t dic_u = {0}; + ur_cue_res_e res_e; + u3_noun pro; + + c3_assert( &(u3H->rod_u) == u3R ); + + // XX tune the initial dictionary size for less reallocation + // + { + ur_root_t* rot_u = 0; + ur_dict32_grow(rot_u, &dic_u, ur_fib33, ur_fib34); + } + + // errors are fatal + // + if ( ur_cue_good != + (res_e = _cs_cue_xeno_unsafe(&dic_u, len_d, byt_y, &pro)) ) + { + fprintf(stderr, "cue xeno: failed\r\n"); + exit(1); + } + + ur_dict_free((ur_dict_t*)&dic_u); + return pro; +} From 24b91af8885159cbd53db481392b043fcd4ad393 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 25 Aug 2020 17:43:36 -0700 Subject: [PATCH 060/123] u3: refactors _cu_rock_save(), splitting out path/directory format/creation --- pkg/urbit/noun/urth.c | 91 +++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index adfd8c7dc1..14c1885f69 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -444,49 +444,21 @@ u3u_uniq(void) ur_hcon_free(rot_u); } +/* _cu_rock_path_make(): format rock path, creating directory if necessary.. +*/ static c3_o -_cu_rock_save(c3_c* dir_c, c3_d eve_d, c3_d len_d, c3_y* byt_y) +_cu_rock_path_make(c3_c* dir_c, c3_d eve_d, c3_c** out_c) { - c3_i fid_i; + c3_w nam_w = 1 + snprintf(0, 0, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, eve_d); + c3_c* nam_c = c3_malloc(nam_w); + c3_i ret_i; - // open rock file, creating the containing directory if necessary + // create $pier/.urb/roc, if it doesn't exist + // + // NB, $pier/.urb is guaranteed to already exist // { - c3_w nam_w = 1 + snprintf(0, 0, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, eve_d); - c3_c* nam_c = c3_malloc(nam_w); - c3_i ret_i; - - // create $pier/.urb/roc, if it doesn't exist - // - // NB, $pier/.urb is guaranteed to already exist - // - { - ret_i = snprintf(nam_c, nam_w, "%s/.urb/roc", dir_c); - - if ( ret_i < 0 ) { - fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): %s\r\n", - dir_c, eve_d, strerror(errno)); - c3_free(nam_c); - return c3n; - } - else if ( ret_i >= nam_w ) { - fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): truncated\r\n", - dir_c, eve_d); - c3_free(nam_c); - return c3n; - } - - if ( mkdir(nam_c, 0700) - && (EEXIST != errno) ) - { - fprintf(stderr, "rock: directory create failed (%s, %" PRIu64 "): %s\r\n", - dir_c, eve_d, strerror(errno)); - c3_free(nam_c); - return c3n; - } - } - - ret_i = snprintf(nam_c, nam_w, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, eve_d); + ret_i = snprintf(nam_c, nam_w, "%s/.urb/roc", dir_c); if ( ret_i < 0 ) { fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): %s\r\n", @@ -501,6 +473,49 @@ _cu_rock_save(c3_c* dir_c, c3_d eve_d, c3_d len_d, c3_y* byt_y) return c3n; } + if ( mkdir(nam_c, 0700) + && (EEXIST != errno) ) + { + fprintf(stderr, "rock: directory create failed (%s, %" PRIu64 "): %s\r\n", + dir_c, eve_d, strerror(errno)); + c3_free(nam_c); + return c3n; + } + } + + ret_i = snprintf(nam_c, nam_w, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, eve_d); + + if ( ret_i < 0 ) { + fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): %s\r\n", + dir_c, eve_d, strerror(errno)); + c3_free(nam_c); + return c3n; + } + else if ( ret_i >= nam_w ) { + fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): truncated\r\n", + dir_c, eve_d); + c3_free(nam_c); + return c3n; + } + + *out_c = nam_c; + return c3y; +} + +static c3_o +_cu_rock_save(c3_c* dir_c, c3_d eve_d, c3_d len_d, c3_y* byt_y) +{ + c3_i fid_i; + + // open rock file, creating the containing directory if necessary + // + { + c3_c* nam_c; + + if ( c3n == _cu_rock_path_make(dir_c, eve_d, &nam_c) ) { + return c3n; + } + if ( -1 == (fid_i = open(nam_c, O_RDWR | O_CREAT | O_TRUNC, 0644)) ) { fprintf(stderr, "rock: open failed (%s, %" PRIu64 "): %s\r\n", dir_c, eve_d, strerror(errno)); From dd8a74cea255ecd00b1c6155427ec097e225eeb8 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 25 Aug 2020 17:45:17 -0700 Subject: [PATCH 061/123] u3: replaces u3_serf_uncram with u3u_uncram() --- pkg/urbit/include/noun/urth.h | 4 + pkg/urbit/include/vere/serf.h | 5 -- pkg/urbit/noun/urth.c | 161 ++++++++++++++++++++++++++++++++++ pkg/urbit/worker/main.c | 14 ++- pkg/urbit/worker/serf.c | 91 ++----------------- 5 files changed, 185 insertions(+), 90 deletions(-) diff --git a/pkg/urbit/include/noun/urth.h b/pkg/urbit/include/noun/urth.h index 988920e602..7320d3b329 100644 --- a/pkg/urbit/include/noun/urth.h +++ b/pkg/urbit/include/noun/urth.h @@ -11,3 +11,7 @@ */ c3_o u3u_cram(c3_c* dir_c, c3_d eve_d); + /* u3u_uncram(): restore persistent state from a rock. + */ + c3_o + u3u_uncram(c3_c* dir_c, c3_d eve_d); diff --git a/pkg/urbit/include/vere/serf.h b/pkg/urbit/include/vere/serf.h index 12647dbb6a..004e297511 100644 --- a/pkg/urbit/include/vere/serf.h +++ b/pkg/urbit/include/vere/serf.h @@ -24,11 +24,6 @@ u3_noun u3_serf_init(u3_serf* sef_u); - /* u3_serf_uncram(): initialize from rock at [eve_d]. - */ - void - u3_serf_uncram(u3_serf* sef_u, c3_d eve_d); - /* u3_serf_writ(): apply writ [wit], producing plea [*pel] on c3y. */ c3_o diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 14c1885f69..442d15587a 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -444,6 +444,34 @@ u3u_uniq(void) ur_hcon_free(rot_u); } +/* _cu_rock_path(): format rock path. +*/ +static c3_o +_cu_rock_path(c3_c* dir_c, c3_d eve_d, c3_c** out_c) +{ + c3_w nam_w = 1 + snprintf(0, 0, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, eve_d); + c3_c* nam_c = c3_malloc(nam_w); + c3_i ret_i; + + ret_i = snprintf(nam_c, nam_w, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, eve_d); + + if ( ret_i < 0 ) { + fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): %s\r\n", + dir_c, eve_d, strerror(errno)); + c3_free(nam_c); + return c3n; + } + else if ( ret_i >= nam_w ) { + fprintf(stderr, "rock: path format failed (%s, %" PRIu64 "): truncated\r\n", + dir_c, eve_d); + c3_free(nam_c); + return c3n; + } + + *out_c = nam_c; + return c3y; +} + /* _cu_rock_path_make(): format rock path, creating directory if necessary.. */ static c3_o @@ -628,3 +656,136 @@ u3u_cram(c3_c* dir_c, c3_d eve_d) return ret_o; } + +/* _cu_rock_load(): load a rock into a byte buffer. +*/ +static c3_o +_cu_rock_load(c3_c* dir_c, c3_d eve_d, c3_d* out_d, c3_y** out_y) +{ + c3_i fid_i; + c3_d len_d; + + // open rock file + // + { + c3_c* nam_c; + + if ( c3n == _cu_rock_path(dir_c, eve_d, &nam_c) ) { + return c3n; + } + + if ( -1 == (fid_i = open(nam_c, O_RDONLY, 0644)) ) { + fprintf(stderr, "rock: open failed (%s, %" PRIu64 "): %s\r\n", + dir_c, eve_d, strerror(errno)); + c3_free(nam_c); + return c3n; + } + + c3_free(nam_c); + } + + // measure rock file + // + { + struct stat buf_b; + + if ( -1 == fstat(fid_i, &buf_b) ) { + fprintf(stderr, "rock: stat failed (%s, %" PRIu64 "): %s\r\n", + dir_c, eve_d, strerror(errno)); + close(fid_i); + return c3n; + } + + len_d = buf_b.st_size; + } + + // mmap rock file + // + { + void* ptr_v; + + if ( MAP_FAILED == (ptr_v = mmap(0, len_d, PROT_READ, MAP_SHARED, fid_i, 0)) ) { + fprintf(stderr, "rock: mmap failed (%s, %" PRIu64 "): %s\r\n", + dir_c, eve_d, strerror(errno)); + close(fid_i); + return c3n; + } + + *out_d = len_d; + *out_y = (c3_y*)ptr_v; + } + + close(fid_i); + + return c3y; +} + +/* u3u_uncram(): restore persistent state from a rock. +*/ +c3_o +u3u_uncram(c3_c* dir_c, c3_d eve_d) +{ + c3_d len_d; + c3_y* byt_y; + + // load rock file into buffer + // + if ( c3n == _cu_rock_load(dir_c, eve_d, &len_d, &byt_y) ) { + return c3n; + } + + // bypassing page tracking as an optimization + // + // NB: u3e_yolo() will mark all as dirty, and + // u3e_save() will reinstate protection flags + // + if ( c3n == u3e_yolo() ) { + fprintf(stderr, "uncram: unable to bypass page tracking, continuing\r\n"); + } + + // reinitialize loom + // + // NB: hot jet state is not yet re-established + // + u3m_pave(c3y, c3n); + + // cue rock, restore persistent state + // + // XX errors are fatal, barring a full "u3m_reboot"-type operation. + // + { + u3_noun roc, cod, ref = u3s_cue_xeno(len_d, byt_y); + + if ( u3r_pq(ref, c3__fast, &roc, &cod) ) { + u3z(ref); + fprintf(stderr, "uncram: failed: invalid rock format\r\n"); + exit(1); + } + + u3A->roc = u3k(roc); + u3j_load(u3k(cod)); + + u3z(ref); + } + + // allocate new hot jet state; re-establish warm + // + u3j_boot(c3y); + u3j_ream(); + + // restore event number + // + u3A->ent_d = eve_d; + + // mark all pages dirty + // + u3e_foul(); + + // leave rocks on disk + // + // if ( c3n == u3m_rock_drop(dir_c, eve_d) ) { + // u3l_log("serf: warning: orphaned state file\r\n"); + // } + + return c3y; +} diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index 8cd196a0ac..d7060d6b44 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -189,7 +189,12 @@ _cw_serf_commence(c3_i argc, c3_c* argv[]) u3V.sen_d = u3V.dun_d = u3m_boot(dir_c); if ( eve_d ) { - u3_serf_uncram(&u3V, eve_d); + // XX need not be fatal, need a u3m_reboot equivalent + // + if ( c3n == u3u_uncram(u3V.dir_c, eve_d) ) { + fprintf(stderr, "serf (%" PRIu64 "): rock load failed\r\n", eve_d); + exit(1); + } } } @@ -292,7 +297,12 @@ _cw_queu(c3_i argc, c3_c* argv[]) memset(&u3V, 0, sizeof(u3V)); u3V.dir_c = strdup(dir_c); u3V.sen_d = u3V.dun_d = u3m_boot(dir_c); - u3_serf_uncram(&u3V, eve_d); + + if ( c3n == u3u_uncram(dir_c, eve_d) ) { + fprintf(stderr, "urbit-worker: queu: failed\r\n"); + exit(1); + } + u3e_save(); fprintf(stderr, "urbit-worker: queu: rock loaded at event %" PRIu64 "\r\n", eve_d); diff --git a/pkg/urbit/worker/serf.c b/pkg/urbit/worker/serf.c index 8ddc30e48e..98215b979c 100644 --- a/pkg/urbit/worker/serf.c +++ b/pkg/urbit/worker/serf.c @@ -277,27 +277,6 @@ u3_serf_grab(void) fflush(stderr); } -/* _serf_cram(): deduplicate and compact memory. ORPHANED -*/ -static void -_serf_cram(u3_serf* sef_u) -{ - u3_serf_grab(); - - u3l_log("serf (%" PRIu64 "): compacting loom\r\n", sef_u->dun_d); - - if ( c3n == u3m_rock_stay(sef_u->dir_c, sef_u->dun_d) ) { - u3l_log("serf: unable to jam state\r\n"); - return; - } - - u3_serf_uncram(sef_u, sef_u->dun_d); - - u3l_log("serf (%" PRIu64 "): compacted loom\r\n", sef_u->dun_d); - - u3_serf_grab(); -} - /* u3_serf_post(): update serf state post-writ. */ void @@ -971,11 +950,18 @@ u3_serf_live(u3_serf* sef_u, u3_noun com, u3_noun* ret) u3l_log("serf (%" PRIu64 "): saving rock\r\n", sef_u->dun_d); - if ( c3n == u3m_rock_stay(sef_u->dir_c, eve_d) ) { + if ( c3n == u3u_cram(sef_u->dir_c, eve_d) ) { fprintf(stderr, "serf (%" PRIu64 "): unable to jam state\r\n", eve_d); return c3n; } + if ( u3r_mug(u3A->roc) != sef_u->mug_l ) { + fprintf(stderr, "serf (%" PRIu64 "): mug mismatch 0x%08x 0x%08x\r\n", + eve_d, sef_u->mug_l, u3r_mug(u3A->roc)); + return c3n; + } + + u3e_save(); u3_serf_grab(); *ret = u3nc(c3__live, u3_nul); @@ -1120,67 +1106,6 @@ _serf_ripe(u3_serf* sef_u) return u3nc(u3i_chubs(1, &sef_u->dun_d), sef_u->mug_l); } -/* u3_serf_uncram(): initialize from rock at [eve_d]. -*/ -void -u3_serf_uncram(u3_serf* sef_u, c3_d eve_d) -{ - c3_o roc_o; - c3_c nam_c[8193]; - snprintf(nam_c, 8192, "%s/.urb/roc/%" PRIu64 ".jam", sef_u->dir_c, eve_d); - - struct stat buf_b; - c3_i fid_i = open(nam_c, O_RDONLY, 0644); - - if ( (fid_i < 0) || (fstat(fid_i, &buf_b) < 0) ) { - fprintf(stderr, "serf: rock: %s not found\r\n", nam_c); - roc_o = c3n; - } - else { - fprintf(stderr, "serf: rock: %s found\r\n", nam_c); - roc_o = c3y; - } - - close(fid_i); - - - if ( c3y == roc_o ) { - if ( c3n == u3e_hold() ) { - fprintf(stderr, "serf: unable to backup checkpoint\r\n"); - } - else { - u3m_wipe(); - - if ( c3n == u3m_rock_load(sef_u->dir_c, eve_d) ) { - fprintf(stderr, "serf: compaction failed, restoring checkpoint\r\n"); - - if ( c3n == u3e_fall() ) { - fprintf(stderr, "serf: unable to restore checkpoint\r\n"); - c3_assert(0); - } - } - - if ( c3n == u3e_drop() ) { - fprintf(stderr, "serf: warning: orphaned backup checkpoint file\r\n"); - } - - // leave rocks on disk - // - // if ( c3n == u3m_rock_drop(sef_u->dir_c, sef_u->dun_d) ) { - // u3l_log("serf: warning: orphaned state file\r\n"); - // } - - fprintf(stderr, "serf (%" PRIu64 "): compacted loom\r\n", eve_d); - - sef_u->sen_d = sef_u->dun_d = eve_d; - - // save now for flexibility - // - u3e_save(); - } - } -} - /* u3_serf_init(): init or restore, producing status. */ u3_noun From ac7d0731f66499e40e657491fa354bcbb31775d0 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 27 Aug 2020 12:46:55 -0700 Subject: [PATCH 062/123] u3: adds u3s_cue_smol/full/bytes/atom --- pkg/urbit/include/noun/serial.h | 20 ++ pkg/urbit/noun/serial.c | 368 +++++++++++++++++++++++++++++++- 2 files changed, 387 insertions(+), 1 deletion(-) diff --git a/pkg/urbit/include/noun/serial.h b/pkg/urbit/include/noun/serial.h index 2e07378a4e..53883e95a6 100644 --- a/pkg/urbit/include/noun/serial.h +++ b/pkg/urbit/include/noun/serial.h @@ -39,3 +39,23 @@ */ u3_noun u3s_cue_xeno(c3_d len_d, const c3_y* byt_y); + + /* u3s_cue_smol(): cue onto the loom, bitwidth fits in a direct atom. + */ + u3_noun + u3s_cue_smol(c3_w len_w, const c3_y* byt_y); + + /* u3s_cue_full(): cue onto the loom. + */ + u3_noun + u3s_cue_full(c3_d len_d, const c3_y* byt_y); + + /* u3s_cue_bytes(): cue bytes. + */ + u3_noun + u3s_cue_bytes(c3_d len_d, const c3_y* byt_y); + + /* u3s_cue_atom(): cue atom. + */ + u3_noun + u3s_cue_atom(u3_atom a); diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index 589fd0bfc5..be51e314e9 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -826,7 +826,7 @@ _cs_cue_xeno_next(_cue_stack_t* tac_u, c3_y* byt_y; // XX check that byt_d fits in a c3_w; - + // byt_y = c3_calloc(byt_d); ur_bsr_bytes_any(red_u, len_d, byt_y); @@ -932,3 +932,369 @@ u3s_cue_xeno(c3_d len_d, const c3_y* byt_y) ur_dict_free((ur_dict_t*)&dic_u); return pro; } + +/* _cs_cue_need(): bail on ur_cue_* read failures. +*/ +static inline void +_cs_cue_need(ur_cue_res_e res_e) +{ + if ( ur_cue_good == res_e ) { + return; + } + else { + c3_m mot_m = (ur_cue_meme == res_e) ? c3__meme : c3__exit; + u3m_bail(mot_m); + } +} + +/* _cs_cue_smol_next(): read next value from bitstream, direct-atom cursors. +*/ +static inline u3_noun +_cs_cue_smol_next(c3_ys mov, + c3_ys off, + u3p(u3h_root) har_p, + ur_bsr_t* red_u) +{ + while ( 1 ) { + c3_d len_d, bit_d = red_u->bits; + ur_cue_tag_e tag_e; + + _cs_cue_need(ur_bsr_tag(red_u, &tag_e)); + + switch ( tag_e ) { + default: c3_assert(0); + + case ur_jam_cell: { + // wind the stack + // + u3R->cap_p += mov; + + // ensure we haven't overflowed (ie, run into the heap) + // (off==0 means we're on a north road) + // + if ( 0 == off ) { + if( !(u3R->cap_p > u3R->hat_p) ) { + u3m_bail(c3__meme); + } + } + else { + if( !(u3R->cap_p < u3R->hat_p) ) { + u3m_bail(c3__meme); + } + } + + // save a head-frame and read the head from the stream + // + { + _cue_frame_t* fam_u = u3to(_cue_frame_t, u3R->cap_p + off); + fam_u->ref = u3_none; + fam_u->bit_d = bit_d; + } + continue; + } + + case ur_jam_back: { + _cs_cue_need(ur_bsr_rub_len(red_u, &len_d)); + + // XX review, s/b deterministic error + // + c3_assert( 32 > len_d ); + + { + c3_w bak_w = ur_bsr32_any(red_u, len_d); + u3_weak bak = u3h_get(har_p, (u3_noun)bak_w); + return u3x_good(bak); + } + } + + case ur_jam_atom: { + u3_atom vat; + + _cs_cue_need(ur_bsr_rub_len(red_u, &len_d)); + + if ( 31 >= len_d ) { + vat = (u3_noun)ur_bsr32_any(red_u, len_d); + } + else { + c3_w byt_w = (len_d >> 3) + !!ur_mask_3(len_d); + c3_w* wor_w = u3a_slaq(3, byt_w); + // XX assumes little-endian + // XX need a ur_bsr_words_any() + // + c3_y* byt_y = (c3_y*)wor_w; + + ur_bsr_bytes_any(red_u, len_d, byt_y); + vat = u3a_malt(wor_w); + } + + u3h_put(har_p, (u3_noun)(c3_w)bit_d, u3k(vat)); + + return vat; + } + } + } +} + +/* u3s_cue_smol(): cue onto the loom, bitwidth fits in a direct atom. +*/ +u3_noun +u3s_cue_smol(c3_w len_w, const c3_y* byt_y) +{ + ur_bsr_t red_u = {0}; + u3_noun ref; + + c3_assert( 0x10000000 >= len_w ); + + // initialize a hash table for dereferencing backrefs + // + u3p(u3h_root) har_p = u3h_new(); + const u3_post top_p = u3R->cap_p; + + // initialize signed stack offsets (relative to north/south road) + // + c3_ys mov, off; + { + c3_o nor_o = u3a_is_north(u3R); + c3_y wis_y = c3_wiseof(_cue_frame_t); + mov = ( c3y == nor_o ? -wis_y : wis_y ); + off = ( c3y == nor_o ? 0 : -wis_y ); + } + + // init bitstream-reader + // + red_u.left = len_w; + red_u.bytes = byt_y; + + // advance into stream + // + ref = _cs_cue_smol_next(mov, off, har_p, &red_u); + + // process result + // + while ( top_p != u3R->cap_p ) { + // peek at the top of the stack + // + _cue_frame_t* fam_u = u3to(_cue_frame_t, u3R->cap_p + off); + + // f is a head-frame; stash result and read the tail from the stream + // + if ( u3_none == fam_u->ref ) { + fam_u->ref = ref; + ref = _cs_cue_smol_next(mov, off, har_p, &red_u); + } + // f is a tail-frame; pop the stack and continue + // + else { + ref = u3nc(fam_u->ref, ref); + u3h_put(har_p, (u3_noun)(c3_w)fam_u->bit_d, u3k(ref)); + u3R->cap_p -= mov; + } + } + + u3h_free(har_p); + + return ref; +} + +/* _cs_cue_get(): u3h_get wrapper handling allocation and refcounts. +*/ +static inline u3_weak +_cs_cue_get(u3p(u3h_root) har_p, c3_d key_d) +{ + u3_atom key = u3i_chubs(1, &key_d); + u3_weak pro = u3h_get(har_p, key); + u3z(key); + return pro; +} + +/* _cs_cue_put(): u3h_put wrapper handling allocation and refcounts. +*/ +static inline u3_noun +_cs_cue_put(u3p(u3h_root) har_p, c3_d key_d, u3_noun val) +{ + u3_atom key = u3i_chubs(1, &key_d); + u3h_put(har_p, key, u3k(val)); + u3z(key); + return val; +} + +/* _cs_cue_full_next(): read next value from bitstream. +*/ +static inline u3_noun +_cs_cue_full_next(c3_ys mov, + c3_ys off, + u3p(u3h_root) har_p, + ur_bsr_t* red_u) +{ + while ( 1 ) { + c3_d len_d, bit_d = red_u->bits; + ur_cue_tag_e tag_e; + + _cs_cue_need(ur_bsr_tag(red_u, &tag_e)); + + switch ( tag_e ) { + default: c3_assert(0); + + case ur_jam_cell: { + // wind the stack + // + u3R->cap_p += mov; + + // ensure we haven't overflowed (ie, run into the heap) + // (off==0 means we're on a north road) + // + if ( 0 == off ) { + if( !(u3R->cap_p > u3R->hat_p) ) { + u3m_bail(c3__meme); + } + } + else { + if( !(u3R->cap_p < u3R->hat_p) ) { + u3m_bail(c3__meme); + } + } + + // save a head-frame and read the head from the stream + // + { + _cue_frame_t* fam_u = u3to(_cue_frame_t, u3R->cap_p + off); + fam_u->ref = u3_none; + fam_u->bit_d = bit_d; + } + continue; + } + + case ur_jam_back: { + _cs_cue_need(ur_bsr_rub_len(red_u, &len_d)); + + if ( 62 < len_d ) { + return u3m_bail(c3__meme); + } + else { + c3_d bak_d = ur_bsr64_any(red_u, len_d); + u3_weak bak = _cs_cue_get(har_p, bak_d); + return u3x_good(bak); + } + } + + case ur_jam_atom: { + u3_atom vat; + + _cs_cue_need(ur_bsr_rub_len(red_u, &len_d)); + + if ( 31 >= len_d ) { + vat = (u3_noun)ur_bsr32_any(red_u, len_d); + } + // XX need a ur_bsr_words_any() + // + else { + // XX check that byt_d fits in a c3_w; + // + c3_d byt_d = (len_d >> 3) + !!ur_mask_3(len_d); + c3_w* wor_w = u3a_slaq(3, byt_d); + // XX assumes little-endian + // + c3_y* byt_y = (c3_y*)wor_w; + + ur_bsr_bytes_any(red_u, len_d, byt_y); + + vat = u3a_malt(wor_w); + } + + return _cs_cue_put(har_p, bit_d, vat); + } + } + } +} + +/* u3s_cue_full(): cue onto the loom. +*/ +u3_noun +u3s_cue_full(c3_d len_d, const c3_y* byt_y) +{ + ur_bsr_t red_u = {0}; + u3_noun ref; + + // initialize a hash table for dereferencing backrefs + // + u3p(u3h_root) har_p = u3h_new(); + const u3_post top_p = u3R->cap_p; + + // initialize signed stack offsets (relative to north/south road) + // + c3_ys mov, off; + { + c3_o nor_o = u3a_is_north(u3R); + c3_y wis_y = c3_wiseof(_cue_frame_t); + mov = ( c3y == nor_o ? -wis_y : wis_y ); + off = ( c3y == nor_o ? 0 : -wis_y ); + } + + // init bitstream-reader + // + red_u.left = len_d; + red_u.bytes = byt_y; + + // advance into stream + // + ref = _cs_cue_full_next(mov, off, har_p, &red_u); + + // process result + // + while ( top_p != u3R->cap_p ) { + // peek at the top of the stack + // + _cue_frame_t* fam_u = u3to(_cue_frame_t, u3R->cap_p + off); + + // f is a head-frame; stash result and read the tail from the stream + // + if ( u3_none == fam_u->ref ) { + fam_u->ref = ref; + ref = _cs_cue_full_next(mov, off, har_p, &red_u); + } + // f is a tail-frame; pop the stack and continue + // + else { + ref = u3nc(fam_u->ref, ref); + _cs_cue_put(har_p, fam_u->bit_d, ref); + u3R->cap_p -= mov; + } + } + + u3h_free(har_p); + + return ref; +} + +/* u3s_cue_bytes(): cue bytes. +*/ +u3_noun +u3s_cue_bytes(c3_d len_d, const c3_y* byt_y) +{ + // check if we can bitwise-index [len_d] bytes in a direct atom + // + return ( 0x10000000ULL >= len_d ) + ? u3s_cue_smol(len_d, byt_y) + : u3s_cue_full(len_d, byt_y); +} + +/* u3s_cue_atom(): cue atom. +*/ +u3_noun +u3s_cue_atom(u3_atom a) +{ + c3_w len_w = u3r_met(3, a); + c3_y* byt_y; + + // XX assumes little-endian + // + if ( c3y == u3a_is_cat(a) ) { + byt_y = (c3_y*)&a; + } + else { + u3a_atom* vat_u = u3a_to_ptr(a); + byt_y = (c3_y*)vat_u->buf_w; + } + + return u3s_cue_bytes((c3_d)len_w, byt_y); +} From baf8b884faa4303e696691ee23a8e0723b98bf8c Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 27 Aug 2020 13:51:30 -0700 Subject: [PATCH 063/123] u3: adds u3s_jam_xeno() --- pkg/urbit/include/noun/serial.h | 5 ++ pkg/urbit/noun/serial.c | 127 +++++++++++++++++++++++++++++++- 2 files changed, 130 insertions(+), 2 deletions(-) diff --git a/pkg/urbit/include/noun/serial.h b/pkg/urbit/include/noun/serial.h index 53883e95a6..96a39bcd84 100644 --- a/pkg/urbit/include/noun/serial.h +++ b/pkg/urbit/include/noun/serial.h @@ -30,6 +30,11 @@ c3_o u3s_jam_file(u3_noun a, c3_c* pas_c); + /* u3s_jam_xeno(): jam with off-loom buffer (re-)allocation. + */ + c3_d + u3s_jam_xeno(u3_noun a, c3_d* len_d, c3_y** byt_y); + /* u3s_cue(): cue [a] */ u3_noun diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index be51e314e9..26cd330af9 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -502,6 +502,129 @@ u3s_jam_file(u3_noun a, c3_c* pas_c) } } +typedef struct _jam_xeno_s { + u3p(u3h_root) har_p; + ur_bsw_t rit_u; +} _jam_xeno_t; + +/* _cs_coin_chub(): shortcircuit u3i_chubs(). +*/ +static inline u3_atom +_cs_coin_chub(c3_d a_d) +{ + return ( 0x7fffffffULL >= a_d ) ? a_d : u3i_chubs(1, &a_d); +} + +/* _cs_jam_xeno_atom(): encode in/direct atom in bitstream. +*/ +static inline void +_cs_jam_bsw_atom(ur_bsw_t* rit_u, c3_w met_w, u3_atom a) +{ + if ( c3y == u3a_is_cat(a) ) { + // XX need a ur_bsw_atom32() + // + ur_bsw_atom64(rit_u, (c3_y)met_w, (c3_d)a); + } + else { + u3a_atom* vat_u = u3a_to_ptr(a); + // XX assumes little-endian + // XX need a ur_bsw_atom_words() + // + c3_y* byt_y = (c3_y*)vat_u->buf_w; + ur_bsw_atom_bytes(rit_u, (c3_d)met_w, byt_y); + } +} + +/* _cs_jam_bsw_back(): encode in/direct backref in bitstream. +*/ +static inline void +_cs_jam_bsw_back(ur_bsw_t* rit_u, c3_w met_w, u3_atom a) +{ + c3_d bak_d = ( c3y == u3a_is_cat(a) ) + ? (c3_d)a + : u3r_chub(0, a); + + // XX need a ur_bsw_back32() + // + ur_bsw_back64(rit_u, (c3_y)met_w, bak_d); +} + +/* _cs_jam_xeno_atom(): encode atom or backref in bitstream. +*/ +static void +_cs_jam_xeno_atom(u3_atom a, void* ptr_v) +{ + _jam_xeno_t* jam_u = ptr_v; + ur_bsw_t* rit_u = &(jam_u->rit_u); + u3_weak bak = u3h_git(jam_u->har_p, a); + c3_w met_w = u3r_met(0, a); + + if ( u3_none == bak ) { + u3h_put(jam_u->har_p, a, _cs_coin_chub(rit_u->bits)); + _cs_jam_bsw_atom(rit_u, met_w, a); + } + else { + c3_w bak_w = u3r_met(0, bak); + + if ( met_w <= bak_w ) { + _cs_jam_bsw_atom(rit_u, met_w, a); + } + else { + _cs_jam_bsw_back(rit_u, bak_w, bak); + } + } +} + +/* _cs_jam_xeno_cell(): encode cell or backref in bitstream. +*/ +static c3_o +_cs_jam_xeno_cell(u3_noun a, void* ptr_v) +{ + _jam_xeno_t* jam_u = ptr_v; + ur_bsw_t* rit_u = &(jam_u->rit_u); + u3_weak bak = u3h_git(jam_u->har_p, a); + + if ( u3_none == bak ) { + u3h_put(jam_u->har_p, a, _cs_coin_chub(rit_u->bits)); + ur_bsw_cell(rit_u); + return c3y; + } + else { + _cs_jam_bsw_back(rit_u, u3r_met(0, bak), bak); + return c3n; + } +} + +/* u3s_jam_xeno(): jam with off-loom buffer (re-)allocation. +*/ +c3_d +u3s_jam_xeno(u3_noun a, c3_d* len_d, c3_y** byt_y) +{ + _jam_xeno_t jam_u = {0}; + + c3_assert( &(u3H->rod_u) == u3R ); + + jam_u.har_p = u3h_new(); + + jam_u.rit_u.prev = ur_fib11; + jam_u.rit_u.size = ur_fib12; + jam_u.rit_u.bytes = c3_calloc(jam_u.rit_u.size); + + // as this is a hot path, we unsafely elide overflow checks + // + // a page-fault overflow detection system is urgently needed ... + // + u3a_walk_fore_unsafe(a, &jam_u, _cs_jam_xeno_atom, + _cs_jam_xeno_cell); + + *len_d = jam_u.rit_u.fill + !!jam_u.rit_u.off; + *byt_y = jam_u.rit_u.bytes; + + u3h_free(jam_u.har_p); + + return jam_u.rit_u.bits; +} + #define CUE_ROOT 0 #define CUE_HEAD 1 #define CUE_TAIL 2 @@ -1101,7 +1224,7 @@ u3s_cue_smol(c3_w len_w, const c3_y* byt_y) static inline u3_weak _cs_cue_get(u3p(u3h_root) har_p, c3_d key_d) { - u3_atom key = u3i_chubs(1, &key_d); + u3_atom key = _cs_coin_chub(key_d); u3_weak pro = u3h_get(har_p, key); u3z(key); return pro; @@ -1112,7 +1235,7 @@ _cs_cue_get(u3p(u3h_root) har_p, c3_d key_d) static inline u3_noun _cs_cue_put(u3p(u3h_root) har_p, c3_d key_d, u3_noun val) { - u3_atom key = u3i_chubs(1, &key_d); + u3_atom key = _cs_coin_chub(key_d); u3h_put(har_p, key, u3k(val)); u3z(key); return val; From 9addb545c0f83f3511d8fc821b9a470e89aea27c Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 27 Aug 2020 15:01:43 -0700 Subject: [PATCH 064/123] u3: refactors jam_tests to run all and capture results --- pkg/urbit/tests/jam_tests.c | 124 ++++++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 42 deletions(-) diff --git a/pkg/urbit/tests/jam_tests.c b/pkg/urbit/tests/jam_tests.c index bc557c8572..b49f17827c 100644 --- a/pkg/urbit/tests/jam_tests.c +++ b/pkg/urbit/tests/jam_tests.c @@ -9,19 +9,21 @@ _setup(void) u3m_pave(c3y, c3n); } -/* _test_jam(): spot check jam/cue +/* _test_jam_spot_a(): spot check jam/cue */ -static void -_test_jam(void) +static c3_i +_test_jam_spot_a(void) { + c3_i ret_i = 1; + if ( 0xc != u3qe_jam(1) ) { fprintf(stderr, "jam: fail (a)\r\n"); - exit(1); + ret_i = 0; } if ( 1 != u3ke_cue(u3qe_jam(1)) ) { fprintf(stderr, "jam: fail (b)\r\n"); - exit(1); + ret_i = 0; } { @@ -29,12 +31,12 @@ _test_jam(void) if ( 0x1231 != u3qe_jam(a) ) { fprintf(stderr, "jam: fail (c)\r\n"); - exit(1); + ret_i = 0; } if ( c3y != u3r_sing(a, u3ke_cue(u3qe_jam(a))) ) { fprintf(stderr, "jam: fail (d)\r\n"); - exit(1); + ret_i = 0; } } @@ -43,28 +45,26 @@ _test_jam(void) if ( 0x344871 != u3qe_jam(a) ) { fprintf(stderr, "jam: fail (e)\r\n"); - exit(1); + ret_i = 0; } if ( c3y != u3r_sing(a, u3ke_cue(u3qe_jam(a))) ) { fprintf(stderr, "jam: fail (f)\r\n"); - exit(1); + ret_i = 0; } } { u3_noun a = u3nc(u3nc(1, 2), 3); - // fprintf(stderr, "%x\n", u3qe_jam(a)); - if ( 0x3448c5 != u3qe_jam(a) ) { fprintf(stderr, "jam: fail (g)\r\n"); - exit(1); + ret_i = 0; } if ( c3y != u3r_sing(a, u3ke_cue(u3qe_jam(a))) ) { fprintf(stderr, "jam: fail (h)\r\n"); - exit(1); + ret_i = 0; } } @@ -74,7 +74,7 @@ _test_jam(void) if ( c3y != u3r_sing(a, u3ke_cue(u3qe_jam(a))) ) { fprintf(stderr, "jam: fail (j)\r\n"); - exit(1); + ret_i = 0; } } @@ -84,7 +84,7 @@ _test_jam(void) if ( c3y != u3r_sing(a, u3ke_cue(u3qe_jam(a))) ) { fprintf(stderr, "jam: fail (k)\r\n"); - exit(1); + ret_i = 0; } } @@ -93,16 +93,20 @@ _test_jam(void) if ( c3y != u3r_sing(a, u3ke_cue(u3qe_jam(a))) ) { fprintf(stderr, "jam: fail (l)\r\n"); - exit(1); + ret_i = 0; } } + + return ret_i; } -/* _test_cue_jam(): more jam/cue spot-checking, ported from the 64-bit effort +/* _test_jam_spot_b(): more jam/cue spot-checking, ported from the 64-bit effort */ -static void -_test_cue_jam() +static c3_i +_test_jam_spot_b() { + c3_i ret_i = 1; + // the boot msg from the worker { u3_noun dat = u3_nul; @@ -114,13 +118,14 @@ _test_cue_jam() u3_noun tail_out = u3t(out_1); if (c3__play != head_out){ - printf("*** cue_jam 0 out head \n"); + fprintf(stderr, "*** cue_jam 0 out head\r\n"); + ret_i = 0; } if (u3_nul != tail_out){ - printf("*** cue_jam 0 out tail \n"); + fprintf(stderr, "*** cue_jam 0 out tail\r\n"); + ret_i = 0; } - } // the boot msg from the worker, again, @@ -144,7 +149,8 @@ _test_cue_jam() u3_noun jam_2 = u3i_bytes(len_w, buf_y); if ( c3n == u3r_sing(jam_1, jam_2) ) { - printf("*** error in 6 byte message\n"); + fprintf(stderr, "*** error in 6 byte message\r\n"); + ret_i = 0; } u3_noun out_1 = u3ke_cue(jam_2); @@ -153,13 +159,14 @@ _test_cue_jam() u3_noun tail_out = u3t(out_1); if (c3__play != head_out){ - printf("*** cue_jam 0 out head \n"); + fprintf(stderr, "*** cue_jam 0 out head\r\n"); + ret_i = 0; } if (u3_nul != tail_out){ - printf("*** cue_jam 0 out tail \n"); + fprintf(stderr, "*** cue_jam 0 out tail\r\n"); + ret_i = 0; } - } // 1 @@ -169,13 +176,15 @@ _test_cue_jam() u3_atom jam_1 = u3ke_jam(in_1); if (12 != jam_1){ - printf("*** cue_jam 1a \n"); + fprintf(stderr, "*** cue_jam 1a\r\n"); + ret_i = 0; } u3_noun out_1 = u3ke_cue(jam_1); if (1 != out_1){ - printf("*** cue_jam 1b \n"); + fprintf(stderr, "*** cue_jam 1b\r\n"); + ret_i = 0; } } @@ -186,7 +195,8 @@ _test_cue_jam() u3_atom jam_1 = u3ke_jam(in_1); if (817 != jam_1){ - printf("*** cue_jam 2 in \n"); + fprintf(stderr, "*** cue_jam 2 in\r\n"); + ret_i = 0; } u3_noun out_1 = u3ke_cue(jam_1); @@ -196,11 +206,13 @@ _test_cue_jam() u3_noun tail_out = u3t(out_1); if (1 != head_out){ - printf("*** cue_jam 2 out head \n"); + fprintf(stderr, "*** cue_jam 2 out head\r\n"); + ret_i = 0; } if (1 != tail_out){ - printf("*** cue_jam 2 out tail \n"); + fprintf(stderr, "*** cue_jam 2 out tail\r\n"); + ret_i = 0; } } @@ -211,7 +223,8 @@ _test_cue_jam() u3_atom jam_1 = u3ke_jam(in_1); if (4657 != jam_1){ - printf("*** cue_jam 2 in \n"); + fprintf(stderr, "*** cue_jam 2 in\r\n"); + ret_i = 0; } u3_noun out_1 = u3ke_cue(jam_1); @@ -220,11 +233,13 @@ _test_cue_jam() u3_noun tail_out = u3t(out_1); if (1 != head_out){ - printf("*** cue_jam 2 out head \n"); + fprintf(stderr, "*** cue_jam 2 out head\r\n"); + ret_i = 0; } if (2 != tail_out){ - printf("*** cue_jam 2 out tail \n"); + fprintf(stderr, "*** cue_jam 2 out tail\r\n"); + ret_i = 0; } } @@ -252,25 +267,49 @@ _test_cue_jam() u3_noun a2 = u3h(out_1); u3_noun r2 = u3t(out_1); if (a2 != a){ - printf("*** _cue_jam: complicated a\n"); + fprintf(stderr, "*** _cue_jam: complicated a\r\n"); + ret_i = 0; } u3_noun b2 = u3h(r2); u3_noun s2 = u3t(r2); if (b2 != b){ - printf("*** _cue_jam: complicated b\n"); + fprintf(stderr, "*** _cue_jam: complicated b\r\n"); + ret_i = 0; } u3_noun c2 = u3h(s2); u3_noun d2 = u3t(s2); if (c2 != c){ - printf("*** _cue_jam: complicated c\n"); + fprintf(stderr, "*** _cue_jam: complicated c\r\n"); + ret_i = 0; } if (d2 != d){ - printf("*** _cue_jam: complicated d\n"); + fprintf(stderr, "*** _cue_jam: complicated d\r\n"); + ret_i = 0; } } + + return 1; +} + +static c3_i +_test_jam(void) +{ + c3_i ret_i = 1; + + if ( !_test_jam_spot_a() ) { + fprintf(stderr, "test jam: spot a: failed\r\n"); + ret_i = 0; + } + + if ( !_test_jam_spot_b() ) { + fprintf(stderr, "test jam: spot b: failed\r\n"); + ret_i = 0; + } + + return ret_i; } /* main(): run all test cases. @@ -280,10 +319,11 @@ main(int argc, char* argv[]) { _setup(); - _test_jam(); - _test_cue_jam(); - - fprintf(stderr, "test_jam: ok\n"); + if ( !_test_jam() ) { + fprintf(stderr, "test jam: failed\r\n"); + exit(1); + } + fprintf(stderr, "test jam: ok\r\n"); return 0; } From 5b9c48c3fd629895ca486feae0179a1340b581dd Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 27 Aug 2020 15:12:57 -0700 Subject: [PATCH 065/123] u3: adds jam/cue roundtrip tests of new implementations --- pkg/urbit/noun/serial.c | 8 +- pkg/urbit/tests/jam_tests.c | 230 ++++++++++++++++++++++++++++++++++++ 2 files changed, 234 insertions(+), 4 deletions(-) diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index 26cd330af9..db04067a02 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -965,10 +965,10 @@ _cs_cue_xeno_next(_cue_stack_t* tac_u, } } -/* _cs_cue_xeno_unsafe(): cue onto the loom, all bookkeeping off-loom. +/* u3s_cue_xeno_unsafe(): cue onto the loom, all bookkeeping off-loom. */ -static ur_cue_res_e -_cs_cue_xeno_unsafe(ur_dict32_t* dic_u, +ur_cue_res_e +u3s_cue_xeno_unsafe(ur_dict32_t* dic_u, c3_d len_d, const c3_y* byt_y, u3_noun* out) @@ -1046,7 +1046,7 @@ u3s_cue_xeno(c3_d len_d, const c3_y* byt_y) // errors are fatal // if ( ur_cue_good != - (res_e = _cs_cue_xeno_unsafe(&dic_u, len_d, byt_y, &pro)) ) + (res_e = u3s_cue_xeno_unsafe(&dic_u, len_d, byt_y, &pro)) ) { fprintf(stderr, "cue xeno: failed\r\n"); exit(1); diff --git a/pkg/urbit/tests/jam_tests.c b/pkg/urbit/tests/jam_tests.c index b49f17827c..420ee3da0b 100644 --- a/pkg/urbit/tests/jam_tests.c +++ b/pkg/urbit/tests/jam_tests.c @@ -1,4 +1,13 @@ #include "all.h" +#include "ur/ur.h" + +// XX not declared in serial.h due to presence of ur_* types +// +ur_cue_res_e +u3s_cue_xeno_unsafe(ur_dict32_t* dic_u, + c3_d len_d, + const c3_y* byt_y, + u3_noun* out); /* _setup(): prepare for tests. */ @@ -294,6 +303,222 @@ _test_jam_spot_b() return 1; } +static void +_byte_print(c3_d out_d, + c3_y* out_y, + c3_w len_w, + const c3_y* byt_y) +{ + c3_d i_d; + + fprintf(stderr, " actual: { "); + for ( i_d = 0; i_d < out_d; i_d++ ) { + fprintf(stderr, "0x%x, ", out_y[i_d]); + } + fprintf(stderr, "}\r\n"); + fprintf(stderr, " expect: { "); + for ( i_d = 0; i_d < len_w; i_d++ ) { + fprintf(stderr, "0x%x, ", byt_y[i_d]); + } + fprintf(stderr, "}\r\n"); +} + +static c3_i +_test_jam_spec(const c3_c* cap_c, + u3_noun ref, + c3_w len_w, + const c3_y* byt_y) +{ + c3_i ret_i = 1; + c3_d out_d; + c3_y* out_y; + + { + u3s_jam_xeno(ref, &out_d, &out_y); + + if ( 0 != memcmp(out_y, byt_y, len_w) ) { + fprintf(stderr, "\033[31mjam xeno %s fail\033[0m\r\n", cap_c); + _byte_print(out_d, out_y, len_w, byt_y); + ret_i = 0; + } + + free(out_y); + } + + { + c3_w bit_w; + c3_w* wor_w = u3s_jam_fib(ref, &bit_w); + + out_d = (bit_w >> 3) + !!ur_mask_3(bit_w); + // XX assumes little-endian + // + out_y = (c3_y*)wor_w; + + if ( 0 != memcmp(out_y, byt_y, len_w) ) { + fprintf(stderr, "\033[31mjam fib %s fail\033[0m\r\n", cap_c); + _byte_print(out_d, out_y, len_w, byt_y); + ret_i = 0; + } + + u3a_wfree(wor_w); + } + + return ret_i; +} + +static c3_i +_test_cue_spec(const c3_c* cap_c, + u3_noun ref, + c3_w len_w, + const c3_y* byt_y) +{ + c3_i ret_i = 1; + + { + ur_dict32_t dic_u = {0}; + u3_noun out; + + ur_dict32_grow((ur_root_t*)0, &dic_u, ur_fib10, ur_fib11); + + if ( ur_cue_good != u3s_cue_xeno_unsafe(&dic_u, len_w, byt_y, &out) ) { + fprintf(stderr, "\033[31mcue %s fail 1\033[0m\r\n", cap_c); + ret_i = 0; + } + else if ( c3n == u3r_sing(ref, out) ) { + fprintf(stderr, "\033[31mcue %s fail 2\033[0m\r\n", cap_c); + u3m_p("ref", ref); + u3m_p("out", out); + ret_i = 0; + } + + u3z(out); + ur_dict_free((ur_dict_t*)&dic_u); + } + + { + u3_noun pro = u3m_soft(0, u3s_cue_atom, u3i_bytes(len_w, byt_y)); + u3_noun tag, out; + + u3x_cell(pro, &tag, &out); + + if ( u3_blip != tag ) { + fprintf(stderr, "\033[31mcue %s fail 3\033[0m\r\n", cap_c); + ret_i = 0; + } + else if ( c3n == u3r_sing(ref, out) ) { + fprintf(stderr, "\033[31mcue %s fail 4\033[0m\r\n", cap_c); + u3m_p("ref", ref); + u3m_p("out", out); + ret_i = 0; + } + + u3z(pro); + } + + // if we haven't failed yet, run u3s_cue_full w/out virtualization + // + if ( ret_i ) { + u3_noun out = u3s_cue_full(len_w, byt_y); + + if ( c3n == u3r_sing(ref, out) ) { + fprintf(stderr, "\033[31mcue %s fail 5\033[0m\r\n", cap_c); + u3m_p("ref", ref); + u3m_p("out", out); + ret_i = 0; + } + + u3z(out); + } + + return ret_i; +} + +static c3_i +_test_jam_roundtrip(void) +{ + c3_i ret_i = 1; + +# define TEST_CASE(a, b) \ + const c3_c* cap_c = a; \ + u3_noun ref = b; \ + ret_i &= _test_jam_spec(cap_c, ref, sizeof(res_y), res_y); \ + ret_i &= _test_cue_spec(cap_c, ref, sizeof(res_y), res_y); \ + u3z(ref); + + { + c3_y res_y[1] = { 0x2 }; + TEST_CASE("0", 0); + } + + { + c3_y res_y[1] = { 0xc }; + TEST_CASE("1", 1); + } + + { + c3_y res_y[1] = { 0x48 }; + TEST_CASE("2", 2); + } + + { + c3_y res_y[6] = { 0xc0, 0x37, 0xb, 0x9b, 0xa3, 0x3 }; + TEST_CASE("%fast", c3__fast); + } + + { + c3_y res_y[6] = { 0xc0, 0x37, 0xab, 0x63, 0x63, 0x3 }; + TEST_CASE("%full", c3__full); + } + + { + c3_y res_y[1] = { 0x29 }; + TEST_CASE("[0 0]", u3nc(0, 0)); + } + + { + c3_y res_y[2] = { 0x31, 0x3 }; + TEST_CASE("[1 1]", u3nc(1, 1)); + } + + { + c3_y res_y[2] = { 0x21, 0xd1 }; + TEST_CASE("[2 3]", u3nc(2, 3)); + } + + { + c3_y res_y[11] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0xe, 0x7c, 0xb3, 0x3a, 0x36, 0x36 }; + TEST_CASE("[%fast %full]", u3nc(c3__fast, c3__full)); + } + + { + c3_y res_y[2] = { 0x71, 0xcc }; + TEST_CASE("[1 1 1]", u3nc(1, u3nc(1, 1))); + } + + { + c3_y res_y[12] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0x1e, 0xf0, 0xcd, 0xea, 0xd8, 0xd8, 0x93 }; + TEST_CASE("[%fast %full %fast]", u3nc(c3__fast, u3nc(c3__full, c3__fast))); + } + + { + c3_y res_y[6] = { 0xa5, 0x35, 0x19, 0xf3, 0x18, 0x5 }; + TEST_CASE("[[0 0] [[0 0] 1 1] 1 1]", u3nc(u3nc(0, 0), u3nc(u3nc(u3nc(0, 0), u3nc(1, 1)), u3nc(1, 1)))); + } + + { + c3_y res_y[14] = { 0x15, 0x17, 0xb2, 0xd0, 0x85, 0x59, 0xb8, 0x61, 0x87, 0x5f, 0x10, 0x54, 0x55, 0x5 }; + TEST_CASE("deep", u3nc(u3nc(u3nc(1, u3nc(u3nc(2, u3nc(u3nc(3, u3nc(u3nc(4, u3nc(u3nt(5, 6, u3nc(7, u3nc(u3nc(8, 0), 0))), 0)), 0)), 0)), 0)), 0), 0)); + } + + { + c3_y inp_y[33] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + c3_y res_y[35] = { 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; + TEST_CASE("wide", u3i_bytes(sizeof(inp_y), inp_y)); + } + + return ret_i; +} + static c3_i _test_jam(void) { @@ -309,6 +534,11 @@ _test_jam(void) ret_i = 0; } + if ( !_test_jam_roundtrip() ) { + fprintf(stderr, "test jam: roundtrip: failed\r\n"); + ret_i = 0; + } + return ret_i; } From aee89f6a6434ac6f51beed050d39d99ebc105430 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 27 Aug 2020 15:13:54 -0700 Subject: [PATCH 066/123] ur: removes trailing whitespace in tests --- pkg/urbit/tests/ur_tests.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index b2d5b9b36e..95e84e89f4 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1522,7 +1522,7 @@ _test_jam_cue(void) uint8_t res[1] = { 0xc }; TEST_CASE("1", 1); } - + { uint8_t res[1] = { 0x48 }; TEST_CASE("2", 2); @@ -1554,7 +1554,7 @@ _test_jam_cue(void) } { - uint8_t res[11] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0xe, 0x7c, 0xb3, 0x3a, 0x36, 0x36 }; + uint8_t res[11] = { 0x1, 0xdf, 0x2c, 0x6c, 0x8e, 0xe, 0x7c, 0xb3, 0x3a, 0x36, 0x36 }; TEST_CASE("[%fast %full]", NC(FAST, FULL)); } From 899f4312842e8b16e3fa4ef4ed441c6e1926b427 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 27 Aug 2020 22:22:18 -0700 Subject: [PATCH 067/123] ur: cleans up hashtable grow, adds consistent wipe operations --- pkg/urbit/include/ur/hashcons.h | 18 ++-- pkg/urbit/ur/hashcons.c | 143 +++++++++++++++++++------------- 2 files changed, 98 insertions(+), 63 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 9cfd9d2da2..797ab74cfc 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -98,9 +98,21 @@ ur_dict32_get(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t *out); void ur_dict32_put(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t val); +void +ur_dict32_wipe(ur_dict32_t *dict); + void ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size); +ur_bool_t +ur_dict64_get(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t *out); + +void +ur_dict64_put(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t val); + +void +ur_dict64_wipe(ur_dict64_t *dict); + void ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size); @@ -116,12 +128,6 @@ ur_dict_free(ur_dict_t *dict); void ur_dict_wipe(ur_dict_t *dict); -ur_bool_t -ur_dict64_get(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t *out); - -void -ur_dict64_put(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t val); - ur_nref ur_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len); diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 7ad97c3a9a..a4aca7d8b5 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -115,31 +115,33 @@ ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size) buckets = calloc(next, sizeof(*buckets)); - for ( i = 0; i < old_size; i++ ) { - ur_pail32_t *old_bucket = &(old_buckets[i]); - uint8_t j, old_fill = old_bucket->fill; + if ( old_buckets ) { + for ( i = 0; i < old_size; i++ ) { + ur_pail32_t *old_bucket = &(old_buckets[i]); + uint8_t j, old_fill = old_bucket->fill; - for ( j = 0; j < old_fill; j++ ) { - uint32_t val = old_bucket->vals[j]; - ur_nref ref = old_bucket->refs[j]; - ur_mug mug = ur_nref_mug(r, ref); + for ( j = 0; j < old_fill; j++ ) { + uint32_t val = old_bucket->vals[j]; + ur_nref ref = old_bucket->refs[j]; + ur_mug mug = ur_nref_mug(r, ref); - uint64_t idx = ( mug % next ); - ur_pail32_t *bucket = &(buckets[idx]); - uint8_t new_fill = bucket->fill; + uint64_t idx = ( mug % next ); + ur_pail32_t *bucket = &(buckets[idx]); + uint8_t new_fill = bucket->fill; - if ( ur_pail_max == new_fill ) { - free(buckets); - return ur_dict32_grow(r, dict, size, next); + if ( ur_pail_max == new_fill ) { + free(buckets); + return ur_dict32_grow(r, dict, size, next); + } + + bucket->refs[new_fill] = ref; + bucket->vals[new_fill] = val; + bucket->fill = 1 + new_fill; } - - bucket->refs[new_fill] = ref; - bucket->vals[new_fill] = val; - bucket->fill = 1 + new_fill; } - } - free(old_buckets); + free(old_buckets); + } dict->prev = size; dict->size = next; @@ -194,6 +196,17 @@ ur_dict32_put(ur_root_t *r, ur_dict32_t *dict, ur_nref ref, uint32_t val) } } +void +ur_dict32_wipe(ur_dict32_t *dict) +{ + ur_pail32_t *buckets = dict->buckets; + uint64_t i, size = dict->size; + + for ( i = 0; i < size; i++ ) { + buckets[i].fill = 0; + } +} + void ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size) { @@ -203,31 +216,33 @@ ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size) buckets = calloc(next, sizeof(*buckets)); - for ( i = 0; i < old_size; i++ ) { - ur_pail64_t *old_bucket = &(old_buckets[i]); - uint8_t j, old_fill = old_bucket->fill; + if ( old_buckets ) { + for ( i = 0; i < old_size; i++ ) { + ur_pail64_t *old_bucket = &(old_buckets[i]); + uint8_t j, old_fill = old_bucket->fill; - for ( j = 0; j < old_fill; j++ ) { - uint64_t val = old_bucket->vals[j]; - ur_nref ref = old_bucket->refs[j]; - ur_mug mug = ur_nref_mug(r, ref); + for ( j = 0; j < old_fill; j++ ) { + uint64_t val = old_bucket->vals[j]; + ur_nref ref = old_bucket->refs[j]; + ur_mug mug = ur_nref_mug(r, ref); - uint64_t idx = ( mug % next ); - ur_pail64_t *bucket = &(buckets[idx]); - uint8_t new_fill = bucket->fill; + uint64_t idx = ( mug % next ); + ur_pail64_t *bucket = &(buckets[idx]); + uint8_t new_fill = bucket->fill; - if ( ur_pail_max == new_fill ) { - free(buckets); - return ur_dict64_grow(r, dict, size, next); + if ( ur_pail_max == new_fill ) { + free(buckets); + return ur_dict64_grow(r, dict, size, next); + } + + bucket->refs[new_fill] = ref; + bucket->vals[new_fill] = val; + bucket->fill = 1 + new_fill; } - - bucket->refs[new_fill] = ref; - bucket->vals[new_fill] = val; - bucket->fill = 1 + new_fill; } - } - free(old_buckets); + free(old_buckets); + } dict->prev = size; dict->size = next; @@ -282,6 +297,17 @@ ur_dict64_put(ur_root_t *r, ur_dict64_t *dict, ur_nref ref, uint64_t val) } } +void +ur_dict64_wipe(ur_dict64_t *dict) +{ + ur_pail64_t *buckets = dict->buckets; + uint64_t i, size = dict->size; + + for ( i = 0; i < size; i++ ) { + buckets[i].fill = 0; + } +} + void ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size) { @@ -291,29 +317,31 @@ ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size) buckets = calloc(next, sizeof(*buckets)); - for ( i = 0; i < old_size; i++ ) { - ur_pail_t *old_bucket = &(old_buckets[i]); - uint8_t j, old_fill = old_bucket->fill; + if ( old_buckets ) { + for ( i = 0; i < old_size; i++ ) { + ur_pail_t *old_bucket = &(old_buckets[i]); + uint8_t j, old_fill = old_bucket->fill; - for ( j = 0; j < old_fill; j++ ) { - ur_nref ref = old_bucket->refs[j]; - ur_mug mug = ur_nref_mug(r, ref); - - uint64_t idx = ( mug % next ); - ur_pail_t *bucket = &(buckets[idx]); - uint8_t new_fill = bucket->fill; + for ( j = 0; j < old_fill; j++ ) { + ur_nref ref = old_bucket->refs[j]; + ur_mug mug = ur_nref_mug(r, ref); - if ( ur_pail_max == new_fill ) { - free(buckets); - return ur_dict_grow(r, dict, size, next); + uint64_t idx = ( mug % next ); + ur_pail_t *bucket = &(buckets[idx]); + uint8_t new_fill = bucket->fill; + + if ( ur_pail_max == new_fill ) { + free(buckets); + return ur_dict_grow(r, dict, size, next); + } + + bucket->refs[new_fill] = ref; + bucket->fill = 1 + new_fill; } - - bucket->refs[new_fill] = ref; - bucket->fill = 1 + new_fill; } - } - free(old_buckets); + free(old_buckets); + } dict->prev = size; dict->size = next; @@ -369,6 +397,7 @@ void ur_dict_free(ur_dict_t *dict) { free(dict->buckets); + dict->buckets = 0; } void @@ -604,7 +633,7 @@ ur_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len) } tom = _coin_unsafe(atoms, mug, byt, len); - + bucket->refs[b_fill] = tom; bucket->fill = 1 + b_fill; From 851f03294d6ab96c462ba49605bc60f6f869ee4e Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 27 Aug 2020 22:26:14 -0700 Subject: [PATCH 068/123] build: adds jam/cue microbenchmarks (make bench) --- pkg/urbit/Makefile | 14 +- pkg/urbit/bench/ur_bench.c | 316 +++++++++++++++++++++++++++++++++++++ 2 files changed, 328 insertions(+), 2 deletions(-) create mode 100644 pkg/urbit/bench/ur_bench.c diff --git a/pkg/urbit/Makefile b/pkg/urbit/Makefile index 76e65e89e3..872c5eb1d3 100644 --- a/pkg/urbit/Makefile +++ b/pkg/urbit/Makefile @@ -7,6 +7,7 @@ vere = $(wildcard vere/*.c) $(wildcard vere/*/*.c) daemon = $(wildcard daemon/*.c) worker = $(wildcard worker/*.c) tests = $(wildcard tests/*.c) +bench = $(wildcard bench/*.c) common = $(jets) $(noun) $(ur) $(vere) headers = $(shell find include -type f) @@ -18,8 +19,9 @@ worker_objs = $(shell echo $(worker) | sed 's/\.c/.o/g') all_objs = $(common_objs) $(daemon_objs) $(worker_objs) all_srcs = $(common) $(daemon) $(worker) -test_exes = $(shell echo $(tests) | sed 's/tests\//.\/build\//g' | sed 's/\.c//g') -all_exes = $(test_exes) ./build/urbit ./build/urbit-worker +test_exes = $(shell echo $(tests) | sed 's/tests\//.\/build\//g' | sed 's/\.c//g') +bench_exes = $(shell echo $(bench) | sed 's/bench\//.\/build\//g' | sed 's/\.c//g') +all_exes = $(test_exes) $(bench_exes) ./build/urbit ./build/urbit-worker # -Werror promotes all warnings that are enabled into errors (this is on) # -Wall issues all types of errors. This is off (for now) @@ -41,6 +43,9 @@ test: $(test_exes) done; \ if [ $$FAIL != 0 ]; then echo "\n" && exit 1; fi; +bench: $(bench_exes) + build/ur_bench + clean: rm -f ./tags $(all_objs) $(all_exes) @@ -49,6 +54,11 @@ mrproper: clean ################################################################################ +build/ur_bench: $(common_objs) bench/ur_bench.o + @echo CC -o $@ + @mkdir -p ./build + @$(CC) $^ $(LDFLAGS) -o $@ + build/%_tests: $(common_objs) tests/%_tests.o @echo CC -o $@ @mkdir -p ./build diff --git a/pkg/urbit/bench/ur_bench.c b/pkg/urbit/bench/ur_bench.c new file mode 100644 index 0000000000..16a08d15e0 --- /dev/null +++ b/pkg/urbit/bench/ur_bench.c @@ -0,0 +1,316 @@ +#include "all.h" +#include "vere/vere.h" +#include "ur/ur.h" + +// XX not declared in serial.h due to presence of ur_* types +// +ur_cue_res_e +u3s_cue_xeno_unsafe(ur_dict32_t* dic_u, + c3_d len_d, + const c3_y* byt_y, + u3_noun* out); + +/* _setup(): prepare for tests. +*/ +static void +_setup(void) +{ + u3m_init(); + u3m_pave(c3y, c3n); +} + +/* _ames_writ_ex(): |hi packet from fake ~zod to fake ~nec +*/ +static u3_noun +_ames_writ_ex(void) +{ + c3_y bod_y[63] = { + 0x30, 0x90, 0x2d, 0x0, 0x0, 0x0, 0x1, 0x0, 0x9, 0xc0, 0xd0, + 0x0, 0x4, 0x40, 0x30, 0xf4, 0xa, 0x3d, 0x45, 0x86, 0x66, 0x2c, + 0x2, 0x38, 0xf8, 0x72, 0xa3, 0x9, 0xf6, 0x6, 0xf3, 0x0, 0xbe, + 0x67, 0x61, 0x49, 0x50, 0x4, 0x3c, 0x13, 0xb2, 0x96, 0x42, 0x1b, + 0x62, 0xac, 0x97, 0xff, 0x24, 0xeb, 0x69, 0x1b, 0xb2, 0x60, 0x72, + 0xa, 0x53, 0xdf, 0xe8, 0x8a, 0x9c, 0x6f, 0xb3 + }; + u3_noun lan = u3nc(0, 1); + u3_noun cad = u3nt(c3__send, lan, u3i_bytes(sizeof(bod_y), bod_y)); + u3_noun wir = u3nt(c3__newt, 0x1234, u3_nul); + u3_noun ovo = u3nc(u3nc(u3_blip, wir), cad); + u3_noun wen; + + { + struct timeval tim_u; + gettimeofday(&tim_u, 0); + wen = u3_time_in_tv(&tim_u); + } + + return u3nt(c3__work, 0, u3nc(wen, ovo)); +} + +static void +_jam_bench(void) +{ + struct timeval b4, f2, d0; + c3_w mil_w, i_w, max_w = 10000; + u3_noun wit = _ames_writ_ex(); + + fprintf(stderr, "\r\njam microbenchmark:\r\n"); + + { + gettimeofday(&b4, 0); + + { + c3_w* wor_w, bit_w; + + for ( i_w = 0; i_w < max_w; i_w++ ) { + wor_w = u3s_jam_fib(wit, &bit_w); + u3a_wfree(wor_w); + } + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " jam og: %u ms\r\n", mil_w); + } + + { + gettimeofday(&b4, 0); + + { + c3_d len_d; + c3_y* byt_y; + + for ( i_w = 0; i_w < max_w; i_w++ ) { + u3s_jam_xeno(wit, &len_d, &byt_y); + c3_free(byt_y); + } + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " jam xeno: %u ms\r\n", mil_w); + } + + while ( 1 ) { + ur_root_t* rot_u = ur_hcon_init(); + c3_d len_d; + c3_y* byt_y; + ur_nref ref; + + u3s_jam_xeno(wit, &len_d, &byt_y); + if ( ur_cue_good != ur_cue(rot_u, len_d, byt_y, &ref) ) { + fprintf(stderr, " jam bench: cue failed wtf\r\n"); + break; + } + + c3_free(byt_y); + + { + gettimeofday(&b4, 0); + + for ( i_w = 0; i_w < max_w; i_w++ ) { + ur_jam(rot_u, ref, &len_d, &byt_y); + c3_free(byt_y); + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " jam cons: %u ms\r\n", mil_w); + } + + ur_hcon_free(rot_u); + break; + } + + u3z(wit); +} + +static void +_cue_bench(void) +{ + struct timeval b4, f2, d0; + c3_w mil_w, i_w, max_w = 20000; + u3_atom vat = u3ke_jam(_ames_writ_ex()); + + fprintf(stderr, "\r\ncue microbenchmark:\r\n"); + + { + gettimeofday(&b4, 0); + + for ( i_w = 0; i_w < max_w; i_w++ ) { + u3z(u3s_cue(vat)); + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue og: %u ms\r\n", mil_w); + } + + // XX "loom: corrupt" assertion failure + // + // { + // gettimeofday(&b4, 0); + + // // for ( i_w = 0; i_w < max_w; i_w++ ) { + // u3z(u3s_cue_atom(vat)); + // // } + + // gettimeofday(&f2, 0); + // timersub(&f2, &b4, &d0); + // mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + // fprintf(stderr, " cue_atom: %u ms\r\n", mil_w); + // } + + // NB: runs 1/8th the number of times + // + { + gettimeofday(&b4, 0); + + { + c3_w len_w = u3r_met(3, vat); + // XX assumes little-endian + // + c3_y* byt_y = ( c3y == u3a_is_cat(vat) ) + ? (c3_y*)&vat + : (c3_y*)((u3a_atom*)u3a_to_ptr(vat))->buf_w; + + for ( i_w = 0; i_w < max_w / 8; i_w++ ) { + u3z(u3s_cue_xeno(len_w, byt_y)); + } + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue xeno: %u ms (estimated)\r\n", mil_w * 8); + } + + // NB: runs 1/8th the number of times + // + { + gettimeofday(&b4, 0); + + { + ur_dict32_t dic_u = {0}; + u3_noun out; + + c3_w len_w = u3r_met(3, vat); + // XX assumes little-endian + // + c3_y* byt_y = ( c3y == u3a_is_cat(vat) ) + ? (c3_y*)&vat + : (c3_y*)((u3a_atom*)u3a_to_ptr(vat))->buf_w; + + ur_dict32_grow((ur_root_t*)0, &dic_u, ur_fib10, ur_fib11); + + for ( i_w = 0; i_w < max_w / 8; i_w++ ) { + u3z(u3s_cue_xeno(len_w, byt_y)); + ur_dict32_wipe(&dic_u); + } + + ur_dict_free((ur_dict_t*)&dic_u); + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue xeno unsafe: %u ms (estimated)\r\n", mil_w * 8); + } + + { + gettimeofday(&b4, 0); + + { + c3_w len_w = u3r_met(3, vat); + // XX assumes little-endian + // + c3_y* byt_y = ( c3y == u3a_is_cat(vat) ) + ? (c3_y*)&vat + : (c3_y*)((u3a_atom*)u3a_to_ptr(vat))->buf_w; + + for ( i_w = 0; i_w < max_w; i_w++ ) { + ur_cue_test(len_w, byt_y); + } + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue test: %u ms\r\n", mil_w); + } + + { + gettimeofday(&b4, 0); + + { + ur_root_t* rot_u = ur_hcon_init(); + ur_nref ref; + c3_w len_w = u3r_met(3, vat); + // XX assumes little-endian + // + c3_y* byt_y = ( c3y == u3a_is_cat(vat) ) + ? (c3_y*)&vat + : (c3_y*)((u3a_atom*)u3a_to_ptr(vat))->buf_w; + + for ( i_w = 0; i_w < max_w; i_w++ ) { + ur_cue(rot_u, len_w, byt_y, &ref); + } + + ur_hcon_free(rot_u); + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue cons: %u ms\r\n", mil_w); + } + + { + gettimeofday(&b4, 0); + + { + ur_root_t* rot_u; + ur_nref ref; + c3_w len_w = u3r_met(3, vat); + // XX assumes little-endian + // + c3_y* byt_y = ( c3y == u3a_is_cat(vat) ) + ? (c3_y*)&vat + : (c3_y*)((u3a_atom*)u3a_to_ptr(vat))->buf_w; + + for ( i_w = 0; i_w < max_w; i_w++ ) { + rot_u = ur_hcon_init(); + ur_cue(rot_u, len_w, byt_y, &ref); + ur_hcon_free(rot_u); + } + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue re-cons: %u ms\r\n", mil_w); + } + + u3z(vat); +} + +/* main(): run all benchmarks +*/ +int +main(int argc, char* argv[]) +{ + _setup(); + + _jam_bench(); + _cue_bench(); + + // GC + // + u3m_grab(u3_none); + + return 0; +} From 4eb3e1217ea9f39a83f4bf6171c69f4ed04b1aea Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 27 Aug 2020 23:37:59 -0700 Subject: [PATCH 069/123] u3: fixes indirect-atom allocation bug in u3s_cue_smol/full --- pkg/urbit/bench/ur_bench.c | 44 +++++++++++++++++++++++++++----------- pkg/urbit/noun/serial.c | 23 +++++++++++--------- 2 files changed, 45 insertions(+), 22 deletions(-) diff --git a/pkg/urbit/bench/ur_bench.c b/pkg/urbit/bench/ur_bench.c index 16a08d15e0..3f8ce37d88 100644 --- a/pkg/urbit/bench/ur_bench.c +++ b/pkg/urbit/bench/ur_bench.c @@ -150,20 +150,40 @@ _cue_bench(void) fprintf(stderr, " cue og: %u ms\r\n", mil_w); } - // XX "loom: corrupt" assertion failure - // - // { - // gettimeofday(&b4, 0); + { + gettimeofday(&b4, 0); - // // for ( i_w = 0; i_w < max_w; i_w++ ) { - // u3z(u3s_cue_atom(vat)); - // // } + for ( i_w = 0; i_w < max_w; i_w++ ) { + u3z(u3s_cue_atom(vat)); + } - // gettimeofday(&f2, 0); - // timersub(&f2, &b4, &d0); - // mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); - // fprintf(stderr, " cue_atom: %u ms\r\n", mil_w); - // } + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue atom: %u ms\r\n", mil_w); + } + + { + gettimeofday(&b4, 0); + + { + c3_w len_w = u3r_met(3, vat); + // XX assumes little-endian + // + c3_y* byt_y = ( c3y == u3a_is_cat(vat) ) + ? (c3_y*)&vat + : (c3_y*)((u3a_atom*)u3a_to_ptr(vat))->buf_w; + + for ( i_w = 0; i_w < max_w; i_w++ ) { + u3z(u3s_cue_full(len_w, byt_y)); + } + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue full: %u ms\r\n", mil_w); + } // NB: runs 1/8th the number of times // diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index db04067a02..6a335fe024 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -1140,14 +1140,12 @@ _cs_cue_smol_next(c3_ys mov, } else { c3_w byt_w = (len_d >> 3) + !!ur_mask_3(len_d); - c3_w* wor_w = u3a_slaq(3, byt_w); - // XX assumes little-endian - // XX need a ur_bsr_words_any() - // - c3_y* byt_y = (c3_y*)wor_w; + c3_y* byt_y = u3a_calloc(1, byt_w); ur_bsr_bytes_any(red_u, len_d, byt_y); - vat = u3a_malt(wor_w); + + vat = u3i_bytes(byt_w, byt_y); + u3a_free(byt_y); } u3h_put(har_p, (u3_noun)(c3_w)bit_d, u3k(vat)); @@ -1314,14 +1312,19 @@ _cs_cue_full_next(c3_ys mov, // XX check that byt_d fits in a c3_w; // c3_d byt_d = (len_d >> 3) + !!ur_mask_3(len_d); - c3_w* wor_w = u3a_slaq(3, byt_d); - // XX assumes little-endian + // XX the following (little-endian cheat) corrupts the loom // - c3_y* byt_y = (c3_y*)wor_w; + // c3_w* wor_w = u3a_slaq(3, byt_d); + // c3_y* byt_y = (c3_y*)wor_w; + // // copy bytes + // vat = u3a_malt(wor_w); + // + c3_y* byt_y = u3a_calloc(1, byt_d); ur_bsr_bytes_any(red_u, len_d, byt_y); - vat = u3a_malt(wor_w); + vat = u3i_bytes(byt_d, byt_y); + u3a_free(byt_y); } return _cs_cue_put(har_p, bit_d, vat); From b9fd36e47bc3a9a569e2ee839487d17137365a98 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 27 Aug 2020 23:40:43 -0700 Subject: [PATCH 070/123] u3: removes u3s_cue_smol, hoped-for performance never materialized --- pkg/urbit/bench/ur_bench.c | 22 ----- pkg/urbit/include/noun/serial.h | 12 +-- pkg/urbit/noun/serial.c | 163 +------------------------------- pkg/urbit/tests/jam_tests.c | 15 --- 4 files changed, 3 insertions(+), 209 deletions(-) diff --git a/pkg/urbit/bench/ur_bench.c b/pkg/urbit/bench/ur_bench.c index 3f8ce37d88..35b101ad7d 100644 --- a/pkg/urbit/bench/ur_bench.c +++ b/pkg/urbit/bench/ur_bench.c @@ -163,28 +163,6 @@ _cue_bench(void) fprintf(stderr, " cue atom: %u ms\r\n", mil_w); } - { - gettimeofday(&b4, 0); - - { - c3_w len_w = u3r_met(3, vat); - // XX assumes little-endian - // - c3_y* byt_y = ( c3y == u3a_is_cat(vat) ) - ? (c3_y*)&vat - : (c3_y*)((u3a_atom*)u3a_to_ptr(vat))->buf_w; - - for ( i_w = 0; i_w < max_w; i_w++ ) { - u3z(u3s_cue_full(len_w, byt_y)); - } - } - - gettimeofday(&f2, 0); - timersub(&f2, &b4, &d0); - mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); - fprintf(stderr, " cue full: %u ms\r\n", mil_w); - } - // NB: runs 1/8th the number of times // { diff --git a/pkg/urbit/include/noun/serial.h b/pkg/urbit/include/noun/serial.h index 96a39bcd84..1ecfdd5112 100644 --- a/pkg/urbit/include/noun/serial.h +++ b/pkg/urbit/include/noun/serial.h @@ -45,17 +45,7 @@ u3_noun u3s_cue_xeno(c3_d len_d, const c3_y* byt_y); - /* u3s_cue_smol(): cue onto the loom, bitwidth fits in a direct atom. - */ - u3_noun - u3s_cue_smol(c3_w len_w, const c3_y* byt_y); - - /* u3s_cue_full(): cue onto the loom. - */ - u3_noun - u3s_cue_full(c3_d len_d, const c3_y* byt_y); - - /* u3s_cue_bytes(): cue bytes. + /* u3s_cue_bytes(): cue bytes onto the loom. */ u3_noun u3s_cue_bytes(c3_d len_d, const c3_y* byt_y); diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index 6a335fe024..165ecd2e29 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -1070,153 +1070,6 @@ _cs_cue_need(ur_cue_res_e res_e) } } -/* _cs_cue_smol_next(): read next value from bitstream, direct-atom cursors. -*/ -static inline u3_noun -_cs_cue_smol_next(c3_ys mov, - c3_ys off, - u3p(u3h_root) har_p, - ur_bsr_t* red_u) -{ - while ( 1 ) { - c3_d len_d, bit_d = red_u->bits; - ur_cue_tag_e tag_e; - - _cs_cue_need(ur_bsr_tag(red_u, &tag_e)); - - switch ( tag_e ) { - default: c3_assert(0); - - case ur_jam_cell: { - // wind the stack - // - u3R->cap_p += mov; - - // ensure we haven't overflowed (ie, run into the heap) - // (off==0 means we're on a north road) - // - if ( 0 == off ) { - if( !(u3R->cap_p > u3R->hat_p) ) { - u3m_bail(c3__meme); - } - } - else { - if( !(u3R->cap_p < u3R->hat_p) ) { - u3m_bail(c3__meme); - } - } - - // save a head-frame and read the head from the stream - // - { - _cue_frame_t* fam_u = u3to(_cue_frame_t, u3R->cap_p + off); - fam_u->ref = u3_none; - fam_u->bit_d = bit_d; - } - continue; - } - - case ur_jam_back: { - _cs_cue_need(ur_bsr_rub_len(red_u, &len_d)); - - // XX review, s/b deterministic error - // - c3_assert( 32 > len_d ); - - { - c3_w bak_w = ur_bsr32_any(red_u, len_d); - u3_weak bak = u3h_get(har_p, (u3_noun)bak_w); - return u3x_good(bak); - } - } - - case ur_jam_atom: { - u3_atom vat; - - _cs_cue_need(ur_bsr_rub_len(red_u, &len_d)); - - if ( 31 >= len_d ) { - vat = (u3_noun)ur_bsr32_any(red_u, len_d); - } - else { - c3_w byt_w = (len_d >> 3) + !!ur_mask_3(len_d); - c3_y* byt_y = u3a_calloc(1, byt_w); - - ur_bsr_bytes_any(red_u, len_d, byt_y); - - vat = u3i_bytes(byt_w, byt_y); - u3a_free(byt_y); - } - - u3h_put(har_p, (u3_noun)(c3_w)bit_d, u3k(vat)); - - return vat; - } - } - } -} - -/* u3s_cue_smol(): cue onto the loom, bitwidth fits in a direct atom. -*/ -u3_noun -u3s_cue_smol(c3_w len_w, const c3_y* byt_y) -{ - ur_bsr_t red_u = {0}; - u3_noun ref; - - c3_assert( 0x10000000 >= len_w ); - - // initialize a hash table for dereferencing backrefs - // - u3p(u3h_root) har_p = u3h_new(); - const u3_post top_p = u3R->cap_p; - - // initialize signed stack offsets (relative to north/south road) - // - c3_ys mov, off; - { - c3_o nor_o = u3a_is_north(u3R); - c3_y wis_y = c3_wiseof(_cue_frame_t); - mov = ( c3y == nor_o ? -wis_y : wis_y ); - off = ( c3y == nor_o ? 0 : -wis_y ); - } - - // init bitstream-reader - // - red_u.left = len_w; - red_u.bytes = byt_y; - - // advance into stream - // - ref = _cs_cue_smol_next(mov, off, har_p, &red_u); - - // process result - // - while ( top_p != u3R->cap_p ) { - // peek at the top of the stack - // - _cue_frame_t* fam_u = u3to(_cue_frame_t, u3R->cap_p + off); - - // f is a head-frame; stash result and read the tail from the stream - // - if ( u3_none == fam_u->ref ) { - fam_u->ref = ref; - ref = _cs_cue_smol_next(mov, off, har_p, &red_u); - } - // f is a tail-frame; pop the stack and continue - // - else { - ref = u3nc(fam_u->ref, ref); - u3h_put(har_p, (u3_noun)(c3_w)fam_u->bit_d, u3k(ref)); - u3R->cap_p -= mov; - } - } - - u3h_free(har_p); - - return ref; -} - /* _cs_cue_get(): u3h_get wrapper handling allocation and refcounts. */ static inline u3_weak @@ -1333,10 +1186,10 @@ _cs_cue_full_next(c3_ys mov, } } -/* u3s_cue_full(): cue onto the loom. +/* u3s_cue_bytes(): cue bytes onto the loom. */ u3_noun -u3s_cue_full(c3_d len_d, const c3_y* byt_y) +u3s_cue_bytes(c3_d len_d, const c3_y* byt_y) { ur_bsr_t red_u = {0}; u3_noun ref; @@ -1392,18 +1245,6 @@ u3s_cue_full(c3_d len_d, const c3_y* byt_y) return ref; } -/* u3s_cue_bytes(): cue bytes. -*/ -u3_noun -u3s_cue_bytes(c3_d len_d, const c3_y* byt_y) -{ - // check if we can bitwise-index [len_d] bytes in a direct atom - // - return ( 0x10000000ULL >= len_d ) - ? u3s_cue_smol(len_d, byt_y) - : u3s_cue_full(len_d, byt_y); -} - /* u3s_cue_atom(): cue atom. */ u3_noun diff --git a/pkg/urbit/tests/jam_tests.c b/pkg/urbit/tests/jam_tests.c index 420ee3da0b..f759d89345 100644 --- a/pkg/urbit/tests/jam_tests.c +++ b/pkg/urbit/tests/jam_tests.c @@ -415,21 +415,6 @@ _test_cue_spec(const c3_c* cap_c, u3z(pro); } - // if we haven't failed yet, run u3s_cue_full w/out virtualization - // - if ( ret_i ) { - u3_noun out = u3s_cue_full(len_w, byt_y); - - if ( c3n == u3r_sing(ref, out) ) { - fprintf(stderr, "\033[31mcue %s fail 5\033[0m\r\n", cap_c); - u3m_p("ref", ref); - u3m_p("out", out); - ret_i = 0; - } - - u3z(out); - } - return ret_i; } From daf463c4be6a97153d33286e4fcd4e5b738124da Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 28 Aug 2020 11:17:22 -0700 Subject: [PATCH 071/123] ur: fixes a buffer over-write in ur_bsr_bytes_any() --- pkg/urbit/ur/bitstream.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index eea2202543..fabc15adbc 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -369,14 +369,18 @@ ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) // become the least-significant bits of an output byte, and vice-versa // else { - uint64_t need = len_byt + (len_bit >> 3) + !!ur_mask_3(len_bit); - ur_bool_t end = need >= left; - uint64_t max = end ? (left - 1) : len_byt; - uint8_t rest = 8 - off; - uint8_t mask = (1 << off) - 1; - uint8_t byt = b[0]; - uint8_t l, m = byt >> off; - uint64_t i; + uint8_t rest = 8 - off; + uint8_t mask = (1 << off) - 1; + uint8_t byt = b[0]; + uint8_t l, m = byt >> off; + ur_bool_t end; + uint64_t i, max; + + { + uint64_t need = len_byt + !!len_bit; + end = need >= left; + max = end ? (left - 1) : len_byt; + } for ( i = 0; i < max; i++ ) { byt = b[1ULL + i]; @@ -407,12 +411,14 @@ ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) left -= step; off = ur_mask_3(bits); - if ( len_bit <= rest ) { - out[max] = m & ((1 << len_bit) - 1); - } - else { - l = b[1ULL + max] & ((1 << off) - 1);; - out[max] = m ^ (l << rest); + if ( len_bit ) { + if ( len_bit <= rest ) { + out[max] = m & ((1 << len_bit) - 1); + } + else { + l = b[1ULL + max] & ((1 << off) - 1);; + out[max] = m ^ (l << rest); + } } } } From f6042e440fbb721c07751b16a90bdfc89fc4eb7c Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 28 Aug 2020 11:18:01 -0700 Subject: [PATCH 072/123] u3: removes double atom allocation in u3s_cue_bytes() --- pkg/urbit/noun/serial.c | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index 165ecd2e29..8924162bb5 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -1162,22 +1162,24 @@ _cs_cue_full_next(c3_ys mov, // XX need a ur_bsr_words_any() // else { - // XX check that byt_d fits in a c3_w; - // - c3_d byt_d = (len_d >> 3) + !!ur_mask_3(len_d); - // XX the following (little-endian cheat) corrupts the loom - // - // c3_w* wor_w = u3a_slaq(3, byt_d); - // c3_y* byt_y = (c3_y*)wor_w; - // // copy bytes - // vat = u3a_malt(wor_w); - // - c3_y* byt_y = u3a_calloc(1, byt_d); + c3_w* wor_w; + c3_y* byt_y; + + { + c3_d byt_d = (len_d >> 3) + !!ur_mask_3(len_d); + + if ( 0xffffffffULL < byt_d) { + return u3m_bail(c3__meme); + } + + // XX assumes little-endian + // + wor_w = u3a_slaq(3, byt_d); + byt_y = (c3_y*)wor_w; + } ur_bsr_bytes_any(red_u, len_d, byt_y); - - vat = u3i_bytes(byt_d, byt_y); - u3a_free(byt_y); + vat = u3a_malt(wor_w); } return _cs_cue_put(har_p, bit_d, vat); From 1ea8e7310453ed9fdc5a3a06555fff579d71c15d Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 28 Aug 2020 11:18:35 -0700 Subject: [PATCH 073/123] build: adds microbenchmark for virtualized cue --- pkg/urbit/bench/ur_bench.c | 59 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/pkg/urbit/bench/ur_bench.c b/pkg/urbit/bench/ur_bench.c index 35b101ad7d..9b257966f6 100644 --- a/pkg/urbit/bench/ur_bench.c +++ b/pkg/urbit/bench/ur_bench.c @@ -296,6 +296,64 @@ _cue_bench(void) u3z(vat); } +static u3_noun +_cue_loop(u3_atom a) +{ + c3_w i_w, max_w = 20000; + + for ( i_w = 0; i_w < max_w; i_w++ ) { + u3z(u3s_cue(a)); + } + + return u3_blip; +} + +static u3_noun +_cue_atom_loop(u3_atom a) +{ + c3_w i_w, max_w = 20000; + + for ( i_w = 0; i_w < max_w; i_w++ ) { + u3z(u3s_cue_atom(a)); + } + + return u3_blip; +} + +static void +_cue_soft_bench(void) +{ + struct timeval b4, f2, d0; + u3_atom vat = u3ke_jam(_ames_writ_ex()); + c3_w mil_w; + + fprintf(stderr, "\r\ncue virtual microbenchmark:\r\n"); + + { + gettimeofday(&b4, 0); + + u3z(u3m_soft(0, _cue_loop, u3k(vat))); + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue virtual og: %u ms\r\n", mil_w); + } + + { + gettimeofday(&b4, 0); + + u3z(u3m_soft(0, _cue_atom_loop, u3k(vat))); + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue virtual atom: %u ms\r\n", mil_w); + } + + u3z(vat); +} + /* main(): run all benchmarks */ int @@ -305,6 +363,7 @@ main(int argc, char* argv[]) _jam_bench(); _cue_bench(); + _cue_soft_bench(); // GC // From 7b42f540b40706647676c76734277499cbacd79d Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 28 Aug 2020 14:45:51 -0700 Subject: [PATCH 074/123] ur: fixes buffer over-read bugs in bitstream in tests --- pkg/urbit/tests/ur_tests.c | 6 +- pkg/urbit/ur/bitstream.c | 200 ++++++++++++++++++++++--------------- 2 files changed, 121 insertions(+), 85 deletions(-) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 95e84e89f4..9ec4fecb52 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1126,13 +1126,15 @@ _test_bsr64(void) static void _bsr_bytes_any_slow(ur_bsr_t *bsr, uint64_t len, uint8_t *out) { - uint64_t i, len_byt = len >> 3; + uint64_t i, len_byt = len >> 3, len_bit = ur_mask_3(len); for ( i = 0; i < len_byt; i++ ) { out[i] = _bsr8_any_slow(bsr, 8); } - out[len_byt] = _bsr8_any_slow(bsr, ur_mask_3(len)); + if ( len_bit ) { + out[len_byt] = _bsr8_any_slow(bsr, len_bit); + } } static int diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index fabc15adbc..d5ff9642df 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -130,22 +130,22 @@ ur_bsr32_any(ur_bsr_t *bsr, uint8_t len) return 0; } else { - uint8_t off = bsr->off; - uint8_t rest = 8 - off; - const uint8_t *b = bsr->bytes; - uint32_t m = b[0] >> off; + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + uint32_t m = bsr->bytes[0] >> off; if ( len < rest ) { bsr->off = off + len; return m & ((1 << len) - 1); } else { - uint8_t mask, len_byt; - uint32_t l; + const uint8_t *b; + uint8_t mask, len_byt; + uint32_t l; len -= rest; left--; - bsr->bytes++; + b = ++bsr->bytes; len_byt = len >> 3; @@ -164,33 +164,44 @@ ur_bsr32_any(ur_bsr_t *bsr, uint8_t len) mask = (1 << off) - 1; switch ( len_byt ) { + default: assert(0); + case 4: { - l = (uint32_t)b[1] - ^ (uint32_t)b[2] << 8 - ^ (uint32_t)b[3] << 16 - ^ (uint32_t)b[4] << 24; + l = (uint32_t)b[0] + ^ (uint32_t)b[1] << 8 + ^ (uint32_t)b[2] << 16 + ^ (uint32_t)b[3] << 24; } break; case 3: { - l = (uint32_t)b[1] - ^ (uint32_t)b[2] << 8 - ^ (uint32_t)b[3] << 16 - ^ (uint32_t)(b[4] & mask) << 24; + l = (uint32_t)b[0] + ^ (uint32_t)b[1] << 8 + ^ (uint32_t)b[2] << 16; + + if ( mask ) { + l ^= (uint32_t)(b[3] & mask) << 24; + } } break; case 2: { - l = (uint32_t)b[1] - ^ (uint32_t)b[2] << 8 - ^ (uint32_t)(b[3] & mask) << 16; + l = (uint32_t)b[0] + ^ (uint32_t)b[1] << 8; + + if ( mask ) { + l ^= (uint32_t)(b[2] & mask) << 16; + } } break; case 1: { - l = (uint32_t)b[1] - ^ (uint32_t)(b[2] & mask) << 8; + l = (uint32_t)b[0]; + + if ( mask ) { + l ^= (uint32_t)(b[1] & mask) << 8; + } } break; case 0: { - l = (uint32_t)(b[1] & mask); + l = ( mask ) ? (uint32_t)(b[0] & mask) : 0; } break; } @@ -212,22 +223,22 @@ ur_bsr64_any(ur_bsr_t *bsr, uint8_t len) return 0; } else { - uint8_t off = bsr->off; - uint8_t rest = 8 - off; - const uint8_t *b = bsr->bytes; - uint64_t m = b[0] >> off; + uint8_t off = bsr->off; + uint8_t rest = 8 - off; + uint64_t m = bsr->bytes[0] >> off; if ( len < rest ) { bsr->off = off + len; return m & ((1 << len) - 1); } else { - uint8_t mask, len_byt; - uint64_t l; + const uint8_t *b; + uint8_t mask, len_byt; + uint64_t l; len -= rest; left--; - bsr->bytes++; + b = ++bsr->bytes; len_byt = len >> 3; @@ -247,74 +258,95 @@ ur_bsr64_any(ur_bsr_t *bsr, uint8_t len) switch ( len_byt ) { case 8: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)b[4] << 24 - ^ (uint64_t)b[5] << 32 - ^ (uint64_t)b[6] << 40 - ^ (uint64_t)b[7] << 48 - ^ (uint64_t)b[8] << 56; + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16 + ^ (uint64_t)b[3] << 24 + ^ (uint64_t)b[4] << 32 + ^ (uint64_t)b[5] << 40 + ^ (uint64_t)b[6] << 48 + ^ (uint64_t)b[7] << 56; } break; case 7: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)b[4] << 24 - ^ (uint64_t)b[5] << 32 - ^ (uint64_t)b[6] << 40 - ^ (uint64_t)b[7] << 48 - ^ (uint64_t)(b[8] & mask) << 56; + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16 + ^ (uint64_t)b[3] << 24 + ^ (uint64_t)b[4] << 32 + ^ (uint64_t)b[5] << 40 + ^ (uint64_t)b[6] << 48; + + if ( mask ) { + l ^= (uint64_t)(b[7] & mask) << 56; + } } break; case 6: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)b[4] << 24 - ^ (uint64_t)b[5] << 32 - ^ (uint64_t)b[6] << 40 - ^ (uint64_t)(b[7] & mask) << 48; + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16 + ^ (uint64_t)b[3] << 24 + ^ (uint64_t)b[4] << 32 + ^ (uint64_t)b[5] << 40; + + if ( mask ) { + l ^= (uint64_t)(b[6] & mask) << 48; + } } break; case 5: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)b[4] << 24 - ^ (uint64_t)b[5] << 32 - ^ (uint64_t)(b[6] & mask) << 40; + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16 + ^ (uint64_t)b[3] << 24 + ^ (uint64_t)b[4] << 32; + + if ( mask ) { + l ^= (uint64_t)(b[5] & mask) << 40; + } } break; case 4: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)b[4] << 24 - ^ (uint64_t)(b[5] & mask) << 32; + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16 + ^ (uint64_t)b[3] << 24; + + if ( mask ) { + l ^= (uint64_t)(b[4] & mask) << 32; + } } break; case 3: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)b[3] << 16 - ^ (uint64_t)(b[4] & mask) << 24; + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8 + ^ (uint64_t)b[2] << 16; + + if ( mask ) { + l ^= (uint64_t)(b[3] & mask) << 24; + } } break; case 2: { - l = (uint64_t)b[1] - ^ (uint64_t)b[2] << 8 - ^ (uint64_t)(b[3] & mask) << 16; + l = (uint64_t)b[0] + ^ (uint64_t)b[1] << 8; + + if ( mask ) { + l ^= (uint64_t)(b[2] & mask) << 16; + } } break; case 1: { - l = (uint64_t)b[1] - ^ (uint64_t)(b[2] & mask) << 8; + l = (uint64_t)b[0]; + + if ( mask ) { + l ^= (uint64_t)(b[1] & mask) << 8; + } } break; case 0: { - l = (uint64_t)(b[1] & mask); + l = ( mask ) ? (uint64_t)(b[0] & mask) : 0; } break; } @@ -925,19 +957,21 @@ _bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) m = byt[i] >> rest; } - if ( len_bit < rest ) { - l = byt[len_byt] & ((1 << len_bit) - 1); - bsw->bytes[fill] = m ^ (l << off); - off += len_bit; - } - else { - l = byt[len_byt] & mask; - bsw->bytes[fill++] = m ^ (l << off); + if ( len_bit ) { + if ( len_bit < rest ) { + l = byt[len_byt] & ((1 << len_bit) - 1); + bsw->bytes[fill] = m ^ (l << off); + off += len_bit; + } + else { + l = byt[len_byt] & mask; + bsw->bytes[fill++] = m ^ (l << off); - m = byt[len_byt] >> rest; + m = byt[len_byt] >> rest; - off = len_bit - rest; - bsw->bytes[fill] = m & ((1 << off) - 1); + off = len_bit - rest; + bsw->bytes[fill] = m & ((1 << off) - 1); + } } } From 6f3d83f4dca0a36e8569b2615ee7b07964a35500 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 28 Aug 2020 14:52:51 -0700 Subject: [PATCH 075/123] ur: adds ur_jam_unsafe() --- pkg/urbit/include/ur/serial.h | 7 +++++++ pkg/urbit/ur/serial.c | 31 ++++++++++++++++++++++++------- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/pkg/urbit/include/ur/serial.h b/pkg/urbit/include/ur/serial.h index afac9e6a0d..13d8fae511 100644 --- a/pkg/urbit/include/ur/serial.h +++ b/pkg/urbit/include/ur/serial.h @@ -5,6 +5,13 @@ #include #include +uint64_t +ur_jam_unsafe(ur_root_t *r, + ur_nref ref, + ur_dict64_t *dict, + uint64_t *len, + uint8_t **byt); + uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index b32973a6ff..b7964ee9af 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -19,15 +19,15 @@ _bsw_atom(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len) } typedef struct _jam_s { - ur_dict64_t dict; - ur_bsw_t bsw; + ur_dict64_t *dict; + ur_bsw_t bsw; } _jam_t; static void _jam_atom(ur_root_t *r, ur_nref ref, void *ptr) { _jam_t *j = ptr; - ur_dict64_t *dict = &(j->dict); + ur_dict64_t *dict = j->dict; ur_bsw_t *bsw = &j->bsw; uint64_t bak, len = ur_met(r, 0, ref); @@ -52,7 +52,7 @@ static ur_bool_t _jam_cell(ur_root_t *r, ur_nref ref, void *ptr) { _jam_t *j = ptr; - ur_dict64_t *dict = &(j->dict); + ur_dict64_t *dict = j->dict; ur_bsw_t *bsw = &j->bsw; uint64_t bak; @@ -69,17 +69,21 @@ _jam_cell(ur_root_t *r, ur_nref ref, void *ptr) } uint64_t -ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) +ur_jam_unsafe(ur_root_t *r, + ur_nref ref, + ur_dict64_t *dict, + uint64_t *len, + uint8_t **byt) { _jam_t j = {0}; + j.dict = dict; + j.bsw.prev = ur_fib11; j.bsw.size = ur_fib12; j.bsw.bytes = calloc(j.bsw.size, 1); - ur_dict64_grow(r, &j.dict, ur_fib11, ur_fib12); ur_walk_fore(r, ref, &j, _jam_atom, _jam_cell); - ur_dict_free((ur_dict_t*)&j.dict); *len = j.bsw.fill + !!j.bsw.off; *byt = j.bsw.bytes; @@ -87,6 +91,19 @@ ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) return j.bsw.bits; } +uint64_t +ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt) +{ + ur_dict64_t dict = {0}; + ur_dict64_grow(r, &dict, ur_fib11, ur_fib12); + + { + uint64_t bits = ur_jam_unsafe(r, ref, &dict, len, byt); + ur_dict_free((ur_dict_t*)&dict); + return bits; + } +} + /* ** stack frame for recording head vs tail iteration ** From 7fa6499e3725387468d143ee477eb7aca3d5dfb4 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Fri, 28 Aug 2020 14:59:30 -0700 Subject: [PATCH 076/123] build: updates benchmarks --- pkg/urbit/bench/ur_bench.c | 64 +++++++++++++++++++++++++++++++++++--- 1 file changed, 59 insertions(+), 5 deletions(-) diff --git a/pkg/urbit/bench/ur_bench.c b/pkg/urbit/bench/ur_bench.c index 9b257966f6..7b6283fefd 100644 --- a/pkg/urbit/bench/ur_bench.c +++ b/pkg/urbit/bench/ur_bench.c @@ -121,6 +121,31 @@ _jam_bench(void) fprintf(stderr, " jam cons: %u ms\r\n", mil_w); } + { + gettimeofday(&b4, 0); + + { + ur_dict64_t dic_u = {0}; + c3_d len_d; + c3_y* byt_y; + + ur_dict64_grow((ur_root_t*)0, &dic_u, ur_fib10, ur_fib11); + + for ( i_w = 0; i_w < max_w; i_w++ ) { + ur_jam_unsafe(rot_u, ref, &dic_u, &len_d, &byt_y); + c3_free(byt_y); + ur_dict64_wipe(&dic_u); + } + + ur_dict_free((ur_dict_t*)&dic_u); + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " jam cons unsafe: %u ms\r\n", mil_w); + } + ur_hcon_free(rot_u); break; } @@ -187,8 +212,6 @@ _cue_bench(void) fprintf(stderr, " cue xeno: %u ms (estimated)\r\n", mil_w * 8); } - // NB: runs 1/8th the number of times - // { gettimeofday(&b4, 0); @@ -205,8 +228,9 @@ _cue_bench(void) ur_dict32_grow((ur_root_t*)0, &dic_u, ur_fib10, ur_fib11); - for ( i_w = 0; i_w < max_w / 8; i_w++ ) { - u3z(u3s_cue_xeno(len_w, byt_y)); + for ( i_w = 0; i_w < max_w; i_w++ ) { + u3s_cue_xeno_unsafe(&dic_u, len_w, byt_y, &out); + u3z(out); ur_dict32_wipe(&dic_u); } @@ -216,7 +240,7 @@ _cue_bench(void) gettimeofday(&f2, 0); timersub(&f2, &b4, &d0); mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); - fprintf(stderr, " cue xeno unsafe: %u ms (estimated)\r\n", mil_w * 8); + fprintf(stderr, " cue xeno unsafe: %u ms\r\n", mil_w); } { @@ -241,6 +265,36 @@ _cue_bench(void) fprintf(stderr, " cue test: %u ms\r\n", mil_w); } + { + gettimeofday(&b4, 0); + + { + ur_dict_t dic_u = {0}; + u3_noun out; + + c3_w len_w = u3r_met(3, vat); + // XX assumes little-endian + // + c3_y* byt_y = ( c3y == u3a_is_cat(vat) ) + ? (c3_y*)&vat + : (c3_y*)((u3a_atom*)u3a_to_ptr(vat))->buf_w; + + ur_dict_grow((ur_root_t*)0, &dic_u, ur_fib10, ur_fib11); + + for ( i_w = 0; i_w < max_w; i_w++ ) { + ur_cue_test_unsafe(&dic_u, len_w, byt_y); + ur_dict_wipe(&dic_u); + } + + ur_dict_free(&dic_u); + } + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); + fprintf(stderr, " cue test unsafe: %u ms\r\n", mil_w); + } + { gettimeofday(&b4, 0); From bb136b7981c6529619c17104fc2af9846ea4a647 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 31 Aug 2020 15:55:45 -0700 Subject: [PATCH 077/123] ur: updates ur_bsr_bytes_any() test (failing) to cover off-the-end edge-cases --- pkg/urbit/tests/ur_tests.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 9ec4fecb52..511ebe0ff1 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1140,22 +1140,23 @@ _bsr_bytes_any_slow(ur_bsr_t *bsr, uint64_t len, uint8_t *out) static int _test_bsr_bytes_any_loop(const char *cap, uint8_t len, uint8_t val) { - int ret = 1; - uint64_t len_bit = len << 3; - ur_bsr_t a, b; - uint8_t *bytes, *c, *d; - uint8_t i, j, k; + int ret = 1; + uint64_t max = (len << 3) + 7; + ur_bsr_t a, b; + uint8_t *bytes, *c, *d; + uint8_t i, j, k; - c = malloc(len); - d = malloc(len); + c = malloc(1 + len); + d = malloc(1 + len); bytes = malloc(len); memset(bytes, val, len); for ( i = 0; i < 8; i++) { - for ( j = 1; j <= len_bit; j++ ) { + for ( j = 1; j <= max; j++ ) { a.left = b.left = len; - a.bytes = b.bytes = bytes; - a.off = a.bits = b.off = b.bits = i; + a.bytes = b.bytes = len ? bytes : 0; + a.off = b.off = len ? i : 0; + a.bits = b.bits = i; memset(c, 0x0, len); memset(d, 0x0, len); From c123e9413a5c8e9ea070907c01f0df1c9407e400 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 31 Aug 2020 15:56:55 -0700 Subject: [PATCH 078/123] ur: refactors ur_bsr_bytes_any(), fixes edge-case bugs --- pkg/urbit/tests/ur_tests.c | 2 + pkg/urbit/ur/bitstream.c | 91 ++++++++++++++++++++------------------ 2 files changed, 49 insertions(+), 44 deletions(-) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 511ebe0ff1..4eae9ac614 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1184,6 +1184,8 @@ _test_bsr_bytes_any_loop(const char *cap, uint8_t len, uint8_t val) } free(bytes); + free(d); + free(c); return ret; } diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index d5ff9642df..04910f36a0 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -360,6 +360,8 @@ ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) { uint64_t left = bsr->left; + bsr->bits += len; + if ( !left ) { return; } @@ -368,33 +370,24 @@ ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) uint8_t off = bsr->off; uint64_t len_byt = len >> 3; uint8_t len_bit = ur_mask_3(len); + uint64_t need = len_byt + !!len_bit; if ( !off ) { - uint8_t bits = off + len_bit; - uint64_t need = len_byt + (bits >> 3) + !!ur_mask_3(bits); - if ( need > left ) { memcpy(out, b, left); + left = 0; bsr->bytes = 0; - bsr->left = 0; } else { memcpy(out, b, len_byt); - off = len_bit; - left -= len_byt; - - if ( !left ) { - bsr->bytes = 0; - } - else { - bsr->bytes += len_byt; - } - - bsr->left = left; + off = len_bit; if ( off ) { out[len_byt] = b[len_byt] & ((1 << off) - 1); } + + left -= len_byt; + bsr->bytes = ( left ) ? b + len_byt : 0; } } // the most-significant bits from a byte in the stream @@ -403,61 +396,71 @@ ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) else { uint8_t rest = 8 - off; uint8_t mask = (1 << off) - 1; - uint8_t byt = b[0]; - uint8_t l, m = byt >> off; - ur_bool_t end; - uint64_t i, max; + uint8_t byt, l, m = *b >> off; + uint64_t last = left - 1; + // loop over all the bytes we need (or all that remain) + // + // [l] holds [off] bits + // [m] holds [rest] bits + // { - uint64_t need = len_byt + !!len_bit; - end = need >= left; - max = end ? (left - 1) : len_byt; + uint64_t max = ur_min(last, len_byt); + uint64_t i; + + for ( i = 0; i < max; i++ ) { + byt = *++b; + l = byt & mask; + out[i] = m ^ (l << rest); + m = byt >> off; + } } - for ( i = 0; i < max; i++ ) { - byt = b[1ULL + i]; - l = byt & mask; - out[i] = m ^ (l << rest); - m = byt >> off; - } + // we're reading into or beyond the last byte [bsr] + // + // [m] holds all the remaining bits in [bsr], + // but we might not need all of it + // + if ( need >= left ) { + uint8_t bits = len - (last << 3); - if ( end ) { - if ( len_bit && len_bit < rest ) { - out[max] = m & ((1 << len_bit) - 1); - bsr->bytes += max; - left -= max; + if ( bits < rest ) { + out[last] = m & ((1 << bits) - 1); + bsr->bytes = b; + left = 1; off += len_bit; } else { - out[max] = m; + out[last] = m; bsr->bytes = 0; left = 0; off = 0; } } + // we need less than a byte, but it might span multiple bytes + // else { - uint8_t bits = off + len_bit; - uint64_t step = max + !!(bits >> 3); + uint8_t bits = off + len_bit; + uint8_t step = !!(bits >> 3); - bsr->bytes += step; - left -= step; + bsr->bytes = b + step; + left -= len_byt + step; off = ur_mask_3(bits); if ( len_bit ) { if ( len_bit <= rest ) { - out[max] = m & ((1 << len_bit) - 1); + out[len_byt] = m & ((1 << len_bit) - 1); } else { - l = b[1ULL + max] & ((1 << off) - 1);; - out[max] = m ^ (l << rest); + l = *++b & ((1 << off) - 1); + out[len_byt] = m ^ (l << rest); } } } } - bsr->off = off; - bsr->left = left; - bsr->bits += len; + bsr->off = off; + bsr->left = left; } } From 4387c3e2d826d51b4897ee49288567ce9e607ee6 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 31 Aug 2020 16:51:39 -0700 Subject: [PATCH 079/123] ur: adds ur_bsr_skip_any() and tests, uses in ur_cue_test() --- pkg/urbit/include/ur/bitstream.h | 3 ++ pkg/urbit/tests/ur_tests.c | 49 ++++++++++++++++++++++++++++++++ pkg/urbit/ur/bitstream.c | 49 ++++++++++++++++++++++++++++++++ pkg/urbit/ur/serial.c | 11 +------ 4 files changed, 102 insertions(+), 10 deletions(-) diff --git a/pkg/urbit/include/ur/bitstream.h b/pkg/urbit/include/ur/bitstream.h index 785866857f..0207853806 100644 --- a/pkg/urbit/include/ur/bitstream.h +++ b/pkg/urbit/include/ur/bitstream.h @@ -52,6 +52,9 @@ ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); void ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); +void +ur_bsr_skip_any(ur_bsr_t *bsr, uint64_t len); + ur_cue_res_e ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 4eae9ac614..2470ef5a7e 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1204,6 +1204,54 @@ _test_bsr_bytes_any(void) & _test_bsr_bytes_any_loop("bsr bytes alt 2 even", 10, 0x55); } +static int +_test_bsr_skip_any_loop(const char *cap, uint8_t len, uint8_t val) +{ + int ret = 1; + uint64_t max = (len << 3) + 7; + ur_bsr_t a, b; + uint8_t *bytes, *c; + uint8_t i, j, k; + + c = malloc(1 + len); + bytes = malloc(len); + memset(bytes, val, len); + + for ( i = 0; i < 8; i++) { + for ( j = 1; j <= max; j++ ) { + a.left = b.left = len; + a.bytes = b.bytes = len ? bytes : 0; + a.off = b.off = len ? i : 0; + a.bits = b.bits = i; + memset(c, 0x0, len); + + _bsr_bytes_any_slow(&a, j, c); + ur_bsr_skip_any(&b, j); + + ret &= _bsr_cmp_any_check(cap, i, j, &a, &b); + } + } + + free(bytes); + free(c); + + return ret; +} + +static int +_test_bsr_skip_any(void) +{ + return _test_bsr_skip_any_loop("bsr skip nought", 0, 0x0) + & _test_bsr_skip_any_loop("bsr skip ones odd", 3, 0xff) + & _test_bsr_skip_any_loop("bsr skip ones even", 4, 0xff) + & _test_bsr_skip_any_loop("bsr skip zeros odd", 5, 0x0) + & _test_bsr_skip_any_loop("bsr skip zeros even", 6, 0x0) + & _test_bsr_skip_any_loop("bsr skip alt 1 odd", 7, 0xaa) + & _test_bsr_skip_any_loop("bsr skip alt 1 even", 8, 0xaa) + & _test_bsr_skip_any_loop("bsr skip alt 2 odd", 9, 0x55) + & _test_bsr_skip_any_loop("bsr skip alt 2 even", 10, 0x55); +} + static int _bsr_cmp_check(const char* cap, uint8_t off, @@ -1428,6 +1476,7 @@ _test_bsr(void) return _test_bsr_bit() & _test_bsr_bit_any() & _test_bsr_bytes_any() + & _test_bsr_skip_any() & _test_bsr8() & _test_bsr32() & _test_bsr64() diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index 04910f36a0..b9dbc34dd8 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -464,6 +464,55 @@ ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out) } } +void +ur_bsr_skip_any(ur_bsr_t *bsr, uint64_t len) +{ + uint64_t left = bsr->left; + + bsr->bits += len; + + if ( !left ) { + return; + } + else { + const uint8_t *b = bsr->bytes; + uint8_t off = bsr->off; + uint64_t len_byt = len >> 3; + uint8_t len_bit = ur_mask_3(len); + uint64_t need = len_byt + !!len_bit; + uint8_t rest = 8 - off; + uint64_t last = left - 1; + + b += ur_min(last, len_byt) + 1; + + if ( need >= left ) { + uint8_t bits = len - (last << 3); + + if ( bits < rest ) { + bsr->bytes = b - 1; + left = 1; + off += len_bit; + } + else { + bsr->bytes = 0; + left = 0; + off = 0; + } + } + else { + uint8_t bits = off + len_bit; + uint8_t step = !!(bits >> 3); + + bsr->bytes = b - (1 - step); + left -= len_byt + step; + off = ur_mask_3(bits); + } + + bsr->off = off; + bsr->left = left; + } +} + static inline ur_cue_res_e _bsr_set_gone(ur_bsr_t *bsr, uint8_t bits) { diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index b7964ee9af..8f48b3d8ba 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -364,16 +364,7 @@ _cue_test_next(_cue_test_stack_t *s, return res; } - // XX need a ur_bsr_skip() - // - { - uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); - uint8_t *byt = calloc(len_byt, 1); - ur_bsr_bytes_any(bsr, len, byt); - - free(byt); - } - + ur_bsr_skip_any(bsr, len); ur_dict_put((ur_root_t*)0, dict, bits); return ur_cue_good; } From 4128b19d91a08c8fd69d9f8ee02859c833322904 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 31 Aug 2020 17:05:10 -0700 Subject: [PATCH 080/123] ur: adds ur_bsr_init() to check 64-bit bitwise-addressing overflow --- pkg/urbit/include/ur/bitstream.h | 14 ++++++++++++++ pkg/urbit/ur/bitstream.c | 15 +++++++++++++++ pkg/urbit/ur/serial.c | 10 ++++++---- 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/pkg/urbit/include/ur/bitstream.h b/pkg/urbit/include/ur/bitstream.h index 0207853806..7ca513352c 100644 --- a/pkg/urbit/include/ur/bitstream.h +++ b/pkg/urbit/include/ur/bitstream.h @@ -15,6 +15,14 @@ typedef enum { ur_jam_back = 2 } ur_cue_tag_e; +/* +** stateful bitstream reader, backed by a byte-buffer, +** supporting a variety of read sizes/patterns. +** +** NB: ur_bsr*_any() functions behave as if the stream were infinite, +** subject to overall limit of a 64-bit bit-cursor. +** +*/ typedef struct ur_bsr_s { uint64_t left; uint64_t bits; @@ -31,6 +39,12 @@ typedef struct ur_bsw_s { uint8_t *bytes; } ur_bsw_t; +/* +** initialize bitstream and check for 64-bit bit-cursor overflow. +*/ +ur_cue_res_e +ur_bsr_init(ur_bsr_t *bsr, uint64_t len, const uint8_t *bytes); + ur_bool_t ur_bsr_sane(ur_bsr_t *bsr); diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index b9dbc34dd8..b91a107eef 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -5,6 +5,21 @@ #include "ur/defs.h" #include "ur/bitstream.h" +ur_cue_res_e +ur_bsr_init(ur_bsr_t *bsr, uint64_t len, const uint8_t *bytes) +{ + // check for overflow + // + if ( (len << 3) < len ) { + return ur_cue_meme; + } + + bsr->left = len; + bsr->bytes = bytes; + + return ur_cue_good; +} + ur_bool_t ur_bsr_sane(ur_bsr_t *bsr) { diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index 8f48b3d8ba..a4aa5e74a4 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -227,8 +227,9 @@ ur_cue_unsafe(ur_root_t *r, // init bitstream-reader // - bsr.left = len; - bsr.bytes = byt; + if ( ur_cue_good != (res = ur_bsr_init(&bsr, len, byt)) ) { + return res; + } // setup stack // @@ -383,8 +384,9 @@ ur_cue_test_unsafe(ur_dict_t *dict, // init bitstream-reader // - bsr.left = len; - bsr.bytes = byt; + if ( ur_cue_good != (res = ur_bsr_init(&bsr, len, byt)) ) { + return res; + } // setup stack // From 612df9541fd23767bc56a7244b79226ae1d17a17 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 31 Aug 2020 17:48:32 -0700 Subject: [PATCH 081/123] ur: documents jam/cue invariants, enforces cue --- pkg/urbit/include/ur/serial.h | 20 ++++++++++++++++++++ pkg/urbit/ur/serial.c | 10 ++++++++++ 2 files changed, 30 insertions(+) diff --git a/pkg/urbit/include/ur/serial.h b/pkg/urbit/include/ur/serial.h index 13d8fae511..9a05115a04 100644 --- a/pkg/urbit/include/ur/serial.h +++ b/pkg/urbit/include/ur/serial.h @@ -5,6 +5,15 @@ #include #include +/* +** bit-wise serialize a noun of arbitrary into a byte-buffer. +** supports up to 64-bits of bit-addressed output (nearly 2 EiB). +** as this is an impractical volume data, cursor overflow is not checked. +** +** unsafe variant is unsafe wrt its [dict] parameter, which must be empty, +** but can be passed in order to skip reallocation inside hot loops. +** +*/ uint64_t ur_jam_unsafe(ur_root_t *r, ur_nref ref, @@ -15,6 +24,17 @@ ur_jam_unsafe(ur_root_t *r, uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); +/* +** bitwise deserialization of an arbitrary byte-buffer into a noun. +** supports up to 62-bits of bit-addressed input (511 PiB). +** return will be [ur_cue_good] upon success. +** +** unsafe variant is unsafe wrt its [dict] parameter, which must be empty, +** but can be passed in order to skip reallocation inside hot loops. +** +** test variant does not allocate nouns, but merely parses the input. +** +*/ ur_cue_res_e ur_cue_unsafe(ur_root_t *r, ur_dict64_t *dict, diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index a4aa5e74a4..71b14222ed 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -230,6 +230,11 @@ ur_cue_unsafe(ur_root_t *r, if ( ur_cue_good != (res = ur_bsr_init(&bsr, len, byt)) ) { return res; } + // bit-cursor (and backreferences) must fit in 62-bit direct atoms + // + else if ( 0x7ffffffffffffffULL < len ) { + return ur_cue_meme; + } // setup stack // @@ -387,6 +392,11 @@ ur_cue_test_unsafe(ur_dict_t *dict, if ( ur_cue_good != (res = ur_bsr_init(&bsr, len, byt)) ) { return res; } + // bit-cursor (and backreferences) must fit in 62-bit direct atoms + // + else if ( 0x7ffffffffffffffULL < len ) { + return ur_cue_meme; + } // setup stack // From fd5edcb6b64f8fcba60d0f9c40cf9dc5f06e8212 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 31 Aug 2020 17:51:39 -0700 Subject: [PATCH 082/123] ur: distinguish bad backreferences from other failures in cue --- pkg/urbit/include/ur/bitstream.h | 5 +++-- pkg/urbit/ur/serial.c | 9 ++------- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/pkg/urbit/include/ur/bitstream.h b/pkg/urbit/include/ur/bitstream.h index 7ca513352c..5a6d5239cd 100644 --- a/pkg/urbit/include/ur/bitstream.h +++ b/pkg/urbit/include/ur/bitstream.h @@ -5,8 +5,9 @@ typedef enum { ur_cue_good = 0, - ur_cue_gone = 1, - ur_cue_meme = 2 + ur_cue_back = 1, + ur_cue_gone = 2, + ur_cue_meme = 3 } ur_cue_res_e; typedef enum { diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index 71b14222ed..5a945072df 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -174,10 +174,8 @@ _cue_next(ur_root_t *r, else { uint64_t val, bak = ur_bsr64_any(bsr, len); - // XX distinguish bad backref? - // if ( !ur_dict64_get(r, dict, bak, &val) ) { - return ur_cue_gone; + return ur_cue_back; } *out = (ur_nref)val; @@ -356,12 +354,9 @@ _cue_test_next(_cue_test_stack_t *s, } else { uint64_t bak = ur_bsr64_any(bsr, len); - - // XX distinguish bad backref? - // return ur_dict_get((ur_root_t*)0, dict, bak) ? ur_cue_good - : ur_cue_gone; + : ur_cue_back; } } From b01d57eb3037b1d4a607a72d0474e1aa9ff00f43 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 31 Aug 2020 17:53:32 -0700 Subject: [PATCH 083/123] u3: distinguish bad backreferences, enforce 62-bit cursors in cue --- pkg/urbit/noun/serial.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index 8924162bb5..688026a698 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -923,14 +923,11 @@ _cs_cue_xeno_next(_cue_stack_t* tac_u, c3_w bak_w; if ( !ur_dict32_get(rot_u, dic_u, bak_d, &bak_w) ) { - // XX distinguish bad backref? - // - return ur_cue_gone; - } - else { - *out = u3k((u3_noun)bak_w); - return ur_cue_good; + return ur_cue_back; } + + *out = u3k((u3_noun)bak_w); + return ur_cue_good; } } @@ -980,8 +977,14 @@ u3s_cue_xeno_unsafe(ur_dict32_t* dic_u, // init bitstream-reader // - red_u.left = len_d; - red_u.bytes = byt_y; + if ( ur_cue_good != (res_e = ur_bsr_init(&red_u, len_d, byt_y)) ) { + return res_e; + } + // bit-cursor (and backreferences) must fit in 62-bit direct atoms + // + else if ( 0x7ffffffffffffffULL < len_d ) { + return ur_cue_meme; + } // setup stack // @@ -1213,8 +1216,13 @@ u3s_cue_bytes(c3_d len_d, const c3_y* byt_y) // init bitstream-reader // - red_u.left = len_d; - red_u.bytes = byt_y; + _cs_cue_need(ur_bsr_init(&red_u, len_d, byt_y)); + + // bit-cursor (and backreferences) must fit in 62-bit direct atoms + // + if ( 0x7ffffffffffffffULL < len_d ) { + return u3m_bail(c3__meme); + } // advance into stream // From f0dd6dc7a6ca5ba5274b53e08cb850b3e6347047 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Mon, 31 Aug 2020 19:40:32 -0700 Subject: [PATCH 084/123] u3: refactors u3s_cue_xeno, declares u3s_cue_xeno_unsafe() --- pkg/urbit/bench/ur_bench.c | 18 ++++-------- pkg/urbit/include/noun/serial.h | 18 ++++++++++-- pkg/urbit/noun/serial.c | 51 ++++++++++++++++++--------------- pkg/urbit/noun/urth.c | 21 +++++++++++--- pkg/urbit/tests/jam_tests.c | 10 +------ 5 files changed, 67 insertions(+), 51 deletions(-) diff --git a/pkg/urbit/bench/ur_bench.c b/pkg/urbit/bench/ur_bench.c index 7b6283fefd..af99346ffc 100644 --- a/pkg/urbit/bench/ur_bench.c +++ b/pkg/urbit/bench/ur_bench.c @@ -2,14 +2,6 @@ #include "vere/vere.h" #include "ur/ur.h" -// XX not declared in serial.h due to presence of ur_* types -// -ur_cue_res_e -u3s_cue_xeno_unsafe(ur_dict32_t* dic_u, - c3_d len_d, - const c3_y* byt_y, - u3_noun* out); - /* _setup(): prepare for tests. */ static void @@ -188,12 +180,11 @@ _cue_bench(void) fprintf(stderr, " cue atom: %u ms\r\n", mil_w); } - // NB: runs 1/8th the number of times - // { gettimeofday(&b4, 0); { + u3_noun out; c3_w len_w = u3r_met(3, vat); // XX assumes little-endian // @@ -201,15 +192,16 @@ _cue_bench(void) ? (c3_y*)&vat : (c3_y*)((u3a_atom*)u3a_to_ptr(vat))->buf_w; - for ( i_w = 0; i_w < max_w / 8; i_w++ ) { - u3z(u3s_cue_xeno(len_w, byt_y)); + for ( i_w = 0; i_w < max_w; i_w++ ) { + u3s_cue_xeno(len_w, byt_y, &out); + u3z(out); } } gettimeofday(&f2, 0); timersub(&f2, &b4, &d0); mil_w = (d0.tv_sec * 1000) + (d0.tv_usec / 1000); - fprintf(stderr, " cue xeno: %u ms (estimated)\r\n", mil_w * 8); + fprintf(stderr, " cue xeno: %u ms\r\n", mil_w); } { diff --git a/pkg/urbit/include/noun/serial.h b/pkg/urbit/include/noun/serial.h index 1ecfdd5112..8d82e3de80 100644 --- a/pkg/urbit/include/noun/serial.h +++ b/pkg/urbit/include/noun/serial.h @@ -1,6 +1,12 @@ /* i/n/serial.h ** */ + /* forward declarations + */ + /* ur_dict32_s: off-loom 32-bit dictionary. + */ + struct ur_dict32_s; + /* Noun serialization. All noun arguments RETAINED. */ @@ -40,10 +46,18 @@ u3_noun u3s_cue(u3_atom a); + /* u3s_cue_xeno_unsafe(): cue onto the loom, all bookkeeping off-loom. + */ + c3_o + u3s_cue_xeno_unsafe(struct ur_dict32_s* dic_u, + c3_d len_d, + const c3_y* byt_y, + u3_noun* out); + /* u3s_cue_xeno(): cue onto the loom, bookkeeping off the loom. */ - u3_noun - u3s_cue_xeno(c3_d len_d, const c3_y* byt_y); + c3_o + u3s_cue_xeno(c3_d len_d, const c3_y* byt_y, u3_noun* out); /* u3s_cue_bytes(): cue bytes onto the loom. */ diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index 688026a698..7c48cb58a5 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -963,8 +963,10 @@ _cs_cue_xeno_next(_cue_stack_t* tac_u, } /* u3s_cue_xeno_unsafe(): cue onto the loom, all bookkeeping off-loom. +** +** NB: unsafe wrt to [dic_u], which must be empty. */ -ur_cue_res_e +c3_o u3s_cue_xeno_unsafe(ur_dict32_t* dic_u, c3_d len_d, const c3_y* byt_y, @@ -978,12 +980,12 @@ u3s_cue_xeno_unsafe(ur_dict32_t* dic_u, // init bitstream-reader // if ( ur_cue_good != (res_e = ur_bsr_init(&red_u, len_d, byt_y)) ) { - return res_e; + return c3n; } // bit-cursor (and backreferences) must fit in 62-bit direct atoms // else if ( 0x7ffffffffffffffULL < len_d ) { - return ur_cue_meme; + return c3n; } // setup stack @@ -1020,43 +1022,46 @@ u3s_cue_xeno_unsafe(ur_dict32_t* dic_u, } } - c3_free(tac_u.fam_u); - if ( ur_cue_good == res_e ) { *out = ref; + c3_free(tac_u.fam_u); + return c3y; + } + else { + // unwind the stack, disposing intermediate nouns + // + while ( tac_u.fil_w ) { + _cue_frame_t* fam_u = &(tac_u.fam_u[--tac_u.fil_w]); + + if ( u3_none != fam_u->ref ) { + u3z(fam_u->ref); + } + } + + c3_free(tac_u.fam_u); + return c3n; } - return res_e; } /* u3s_cue_xeno(): cue onto the loom, bookkeeping off the loom. */ -u3_noun -u3s_cue_xeno(c3_d len_d, const c3_y* byt_y) +c3_o +u3s_cue_xeno(c3_d len_d, const c3_y* byt_y, u3_noun* out) { ur_dict32_t dic_u = {0}; - ur_cue_res_e res_e; - u3_noun pro; + c3_o ret_o; c3_assert( &(u3H->rod_u) == u3R ); // XX tune the initial dictionary size for less reallocation // - { - ur_root_t* rot_u = 0; - ur_dict32_grow(rot_u, &dic_u, ur_fib33, ur_fib34); - } + ur_dict32_grow((ur_root_t*)0, &dic_u, ur_fib10, ur_fib11); - // errors are fatal - // - if ( ur_cue_good != - (res_e = u3s_cue_xeno_unsafe(&dic_u, len_d, byt_y, &pro)) ) - { - fprintf(stderr, "cue xeno: failed\r\n"); - exit(1); - } + ret_o = u3s_cue_xeno_unsafe(&dic_u, len_d, byt_y, out); ur_dict_free((ur_dict_t*)&dic_u); - return pro; + + return ret_o; } /* _cs_cue_need(): bail on ur_cue_* read failures. diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 442d15587a..16dcd66a06 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -754,12 +754,25 @@ u3u_uncram(c3_c* dir_c, c3_d eve_d) // XX errors are fatal, barring a full "u3m_reboot"-type operation. // { - u3_noun roc, cod, ref = u3s_cue_xeno(len_d, byt_y); + ur_dict32_t dic_u = {0}; + u3_noun roc, cod, ref; - if ( u3r_pq(ref, c3__fast, &roc, &cod) ) { - u3z(ref); + // XX tune the initial dictionary size for less reallocation + // + ur_dict32_grow((ur_root_t*)0, &dic_u, ur_fib33, ur_fib34); + + if ( c3n == u3s_cue_xeno_unsafe(&dic_u, len_d, byt_y, &ref) ) { + fprintf(stderr, "uncram: failed to cue rock\r\n"); + ur_dict_free((ur_dict_t*)&dic_u); + return c3n; + } + + ur_dict_free((ur_dict_t*)&dic_u); + + if ( c3n == u3r_pq(ref, c3__fast, &roc, &cod) ) { fprintf(stderr, "uncram: failed: invalid rock format\r\n"); - exit(1); + u3z(ref); + return c3n; } u3A->roc = u3k(roc); diff --git a/pkg/urbit/tests/jam_tests.c b/pkg/urbit/tests/jam_tests.c index f759d89345..da8fc914ae 100644 --- a/pkg/urbit/tests/jam_tests.c +++ b/pkg/urbit/tests/jam_tests.c @@ -1,14 +1,6 @@ #include "all.h" #include "ur/ur.h" -// XX not declared in serial.h due to presence of ur_* types -// -ur_cue_res_e -u3s_cue_xeno_unsafe(ur_dict32_t* dic_u, - c3_d len_d, - const c3_y* byt_y, - u3_noun* out); - /* _setup(): prepare for tests. */ static void @@ -380,7 +372,7 @@ _test_cue_spec(const c3_c* cap_c, ur_dict32_grow((ur_root_t*)0, &dic_u, ur_fib10, ur_fib11); - if ( ur_cue_good != u3s_cue_xeno_unsafe(&dic_u, len_w, byt_y, &out) ) { + if ( c3n == u3s_cue_xeno_unsafe(&dic_u, len_w, byt_y, &out) ) { fprintf(stderr, "\033[31mcue %s fail 1\033[0m\r\n", cap_c); ret_i = 0; } From 7d541b5a1f984fb2f11ac333e853da014d715330 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 1 Sep 2020 12:07:21 -0700 Subject: [PATCH 085/123] u3: refactors mmap read/write patterns in urth.c --- pkg/urbit/include/noun/urth.h | 20 +++++ pkg/urbit/noun/urth.c | 136 +++++++++++++++++++++++++++------- 2 files changed, 128 insertions(+), 28 deletions(-) diff --git a/pkg/urbit/include/noun/urth.h b/pkg/urbit/include/noun/urth.h index 7320d3b329..c8dd98a96b 100644 --- a/pkg/urbit/include/noun/urth.h +++ b/pkg/urbit/include/noun/urth.h @@ -15,3 +15,23 @@ */ c3_o u3u_uncram(c3_c* dir_c, c3_d eve_d); + + /* u3u_mmap_read(): open and mmap the file at [pat_c] for reading. + */ + c3_o + u3u_mmap_read(c3_c* cap_c, c3_c* pat_c, c3_d* out_d, c3_y** out_y); + + /* u3u_mmap(): open/create file-backed mmap at [pat_c] for read/write. + */ + c3_o + u3u_mmap(c3_c* cap_c, c3_c* pat_c, c3_d len_d, c3_y** out_y); + + /* u3u_mmap_save(): sync file-backed mmap. + */ + c3_o + u3u_mmap_save(c3_c* cap_c, c3_c* pat_c, c3_d len_d, c3_y* byt_y); + + /* u3u_munmap(): unmap the region at [byt_y]. + */ + c3_o + u3u_munmap(c3_d len_d, c3_y* byt_y); diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 16dcd66a06..a791a3f65c 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -657,41 +657,30 @@ u3u_cram(c3_c* dir_c, c3_d eve_d) return ret_o; } -/* _cu_rock_load(): load a rock into a byte buffer. +/* u3u_mmap_read(): open and mmap the file at [pat_c] for reading. */ -static c3_o -_cu_rock_load(c3_c* dir_c, c3_d eve_d, c3_d* out_d, c3_y** out_y) +c3_o +u3u_mmap_read(c3_c* cap_c, c3_c* pat_c, c3_d* out_d, c3_y** out_y) { c3_i fid_i; c3_d len_d; - // open rock file + // open file // - { - c3_c* nam_c; - - if ( c3n == _cu_rock_path(dir_c, eve_d, &nam_c) ) { - return c3n; - } - - if ( -1 == (fid_i = open(nam_c, O_RDONLY, 0644)) ) { - fprintf(stderr, "rock: open failed (%s, %" PRIu64 "): %s\r\n", - dir_c, eve_d, strerror(errno)); - c3_free(nam_c); - return c3n; - } - - c3_free(nam_c); + if ( -1 == (fid_i = open(pat_c, O_RDONLY, 0644)) ) { + fprintf(stderr, "%s: open failed (%s): %s\r\n", + cap_c, pat_c, strerror(errno)); + return c3n; } - // measure rock file + // measure file // { struct stat buf_b; if ( -1 == fstat(fid_i, &buf_b) ) { - fprintf(stderr, "rock: stat failed (%s, %" PRIu64 "): %s\r\n", - dir_c, eve_d, strerror(errno)); + fprintf(stderr, "%s: stat failed (%s): %s\r\n", + cap_c, pat_c, strerror(errno)); close(fid_i); return c3n; } @@ -699,14 +688,14 @@ _cu_rock_load(c3_c* dir_c, c3_d eve_d, c3_d* out_d, c3_y** out_y) len_d = buf_b.st_size; } - // mmap rock file + // mmap file // { void* ptr_v; if ( MAP_FAILED == (ptr_v = mmap(0, len_d, PROT_READ, MAP_SHARED, fid_i, 0)) ) { - fprintf(stderr, "rock: mmap failed (%s, %" PRIu64 "): %s\r\n", - dir_c, eve_d, strerror(errno)); + fprintf(stderr, "%s: mmap failed (%s): %s\r\n", + cap_c, pat_c, strerror(errno)); close(fid_i); return c3n; } @@ -715,22 +704,104 @@ _cu_rock_load(c3_c* dir_c, c3_d eve_d, c3_d* out_d, c3_y** out_y) *out_y = (c3_y*)ptr_v; } + // close file + // close(fid_i); return c3y; } +/* u3u_mmap(): open/create file-backed mmap at [pat_c] for read/write. +*/ +c3_o +u3u_mmap(c3_c* cap_c, c3_c* pat_c, c3_d len_d, c3_y** out_y) +{ + c3_i fid_i; + + // open file + // + if ( -1 == (fid_i = open(pat_c, O_RDWR | O_CREAT | O_TRUNC, 0644)) ) { + fprintf(stderr, "%s: open failed (%s): %s\r\n", + cap_c, pat_c, strerror(errno)); + return c3n; + } + + // grow [fid_i] to [len_w] + // + // XX build with _FILE_OFFSET_BITS == 64 ? + // + if ( 0 != ftruncate(fid_i, len_d) ) { + fprintf(stderr, "%s: ftruncate grow %s: %s\r\n", + cap_c, pat_c, strerror(errno)); + close(fid_i); + return c3n; + } + + // mmap file + // + { + void* ptr_v; + + if ( MAP_FAILED == (ptr_v = mmap(0, len_d, PROT_READ|PROT_WRITE, MAP_SHARED, fid_i, 0)) ) { + fprintf(stderr, "%s: mmap failed (%s): %s\r\n", + cap_c, pat_c, strerror(errno)); + close(fid_i); + return c3n; + } + + *out_y = (c3_y*)ptr_v; + } + + // close file + // + close(fid_i); + + return c3y; +} + +/* u3u_mmap_save(): sync file-backed mmap. +*/ +c3_o +u3u_mmap_save(c3_c* cap_c, c3_c* pat_c, c3_d len_d, c3_y* byt_y) +{ + if ( 0 != msync(byt_y, len_d, MS_SYNC) ) { + fprintf(stderr, "%s: msync %s: %s\r\n", cap_c, pat_c, strerror(errno)); + return c3n; + } + + return c3y; +} + +/* u3u_munmap(): unmap the region at [byt_y]. +*/ +c3_o +u3u_munmap(c3_d len_d, c3_y* byt_y) +{ + if ( 0 != munmap(byt_y, len_d) ) { + return c3n; + } + + return c3y; +} + /* u3u_uncram(): restore persistent state from a rock. */ c3_o u3u_uncram(c3_c* dir_c, c3_d eve_d) { + c3_c* nam_c; c3_d len_d; c3_y* byt_y; // load rock file into buffer // - if ( c3n == _cu_rock_load(dir_c, eve_d, &len_d, &byt_y) ) { + if ( c3n == _cu_rock_path(dir_c, eve_d, &nam_c) ) { + fprintf(stderr, "uncram: failed to make rock path (%s, %" PRIu64 ")\r\n", + dir_c, eve_d); + return c3n; + } + else if ( c3n == u3u_mmap_read("rock", nam_c, &len_d, &byt_y) ) { + c3_free(nam_c); return c3n; } @@ -764,6 +835,7 @@ u3u_uncram(c3_c* dir_c, c3_d eve_d) if ( c3n == u3s_cue_xeno_unsafe(&dic_u, len_d, byt_y, &ref) ) { fprintf(stderr, "uncram: failed to cue rock\r\n"); ur_dict_free((ur_dict_t*)&dic_u); + c3_free(nam_c); return c3n; } @@ -772,6 +844,7 @@ u3u_uncram(c3_c* dir_c, c3_d eve_d) if ( c3n == u3r_pq(ref, c3__fast, &roc, &cod) ) { fprintf(stderr, "uncram: failed: invalid rock format\r\n"); u3z(ref); + c3_free(nam_c); return c3n; } @@ -781,6 +854,8 @@ u3u_uncram(c3_c* dir_c, c3_d eve_d) u3z(ref); } + u3u_munmap(len_d, byt_y); + // allocate new hot jet state; re-establish warm // u3j_boot(c3y); @@ -796,9 +871,14 @@ u3u_uncram(c3_c* dir_c, c3_d eve_d) // leave rocks on disk // - // if ( c3n == u3m_rock_drop(dir_c, eve_d) ) { - // u3l_log("serf: warning: orphaned state file\r\n"); + // if ( 0 != unlink(nam_c) ) { + // fprintf(stderr, "uncram: failed to delete rock (%s, %" PRIu64 "): %s\r\n", + // dir_c, eve_d, strerror(errno)); + // c3_free(nam_c); + // return c3n; // } + c3_free(nam_c); + return c3y; } From 2897feeea39ada1945dbde567dbf8411c625086b Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 1 Sep 2020 13:59:20 -0700 Subject: [PATCH 086/123] u3: removes obsolete portable snapshot implementation --- pkg/urbit/include/noun/events.h | 21 --- pkg/urbit/include/noun/manage.h | 20 -- pkg/urbit/include/noun/serial.h | 18 -- pkg/urbit/noun/events.c | 114 ------------ pkg/urbit/noun/manage.c | 115 ------------ pkg/urbit/noun/serial.c | 311 -------------------------------- 6 files changed, 599 deletions(-) diff --git a/pkg/urbit/include/noun/events.h b/pkg/urbit/include/noun/events.h index 67729118f8..16b706f535 100644 --- a/pkg/urbit/include/noun/events.h +++ b/pkg/urbit/include/noun/events.h @@ -78,27 +78,6 @@ c3_w u3e_dirty(void); - /* u3e_hold(): backup memory images - */ - c3_o - u3e_hold(void); - - /* u3e_drop(): remove backed-up memory images - */ - c3_o - u3e_drop(void); - - /* u3e_fall(): restore memory images - */ - c3_o - u3e_fall(void); - - /* u3e_wipe(): discard memory images - */ - c3_o - u3e_wipe(void); - - /* u3e_yolo(): disable dirty page tracking, read/write whole loom. */ c3_o diff --git a/pkg/urbit/include/noun/manage.h b/pkg/urbit/include/noun/manage.h index a05ce78714..fec47392ec 100644 --- a/pkg/urbit/include/noun/manage.h +++ b/pkg/urbit/include/noun/manage.h @@ -145,23 +145,3 @@ */ c3_w u3m_pack(void); - - /* u3m_rock_stay(): jam state into [dir_c] at [evt_d] - */ - c3_o - u3m_rock_stay(c3_c* dir_c, c3_d evt_d); - - /* u3m_rock_load(): load state from [dir_c] at [evt_d] - */ - c3_o - u3m_rock_load(c3_c* dir_c, c3_d evt_d); - - /* u3m_rock_drop(): delete saved state from [dir_c] at [evt_d] - */ - c3_o - u3m_rock_drop(c3_c* dir_c, c3_d evt_d); - - /* u3m_wipe(): purge and reinitialize loom, with checkpointing - */ - void - u3m_wipe(void); diff --git a/pkg/urbit/include/noun/serial.h b/pkg/urbit/include/noun/serial.h index 8d82e3de80..f7a06aa4aa 100644 --- a/pkg/urbit/include/noun/serial.h +++ b/pkg/urbit/include/noun/serial.h @@ -18,24 +18,6 @@ c3_w* u3s_jam_fib(u3_noun a, c3_w* bit_w); - /* u3s_jam_met(): measure a noun for jam, calculating backrefs - */ - c3_d - u3s_jam_met(u3_noun a, u3p(u3h_root)* bak_p); - - /* u3s_jam_buf(): jam [a] into [buf_w], without allocation - ** - ** using backrefs in [bak_p], as computed by u3s_jam_met - ** can only encode up to c3_w bits - */ - void - u3s_jam_buf(u3_noun a, u3p(u3h_root) bak_p, c3_w* buf_w); - - /* u3s_jam_file(): jam [a] into a file, overwriting - */ - c3_o - u3s_jam_file(u3_noun a, c3_c* pas_c); - /* u3s_jam_xeno(): jam with off-loom buffer (re-)allocation. */ c3_d diff --git a/pkg/urbit/noun/events.c b/pkg/urbit/noun/events.c index 1e1fa6b815..da34120c72 100644 --- a/pkg/urbit/noun/events.c +++ b/pkg/urbit/noun/events.c @@ -882,120 +882,6 @@ u3e_live(c3_o nuu_o, c3_c* dir_c) return nuu_o; } -static c3_o -_ce_image_move(u3e_image* img_u, c3_o bak_o) -{ - c3_c old_c[8193]; - c3_c new_c[8197]; - snprintf(old_c, 8193, "%s/.urb/chk/%s.bin", u3P.dir_c, img_u->nam_c); - snprintf(new_c, 8197, "%s.bak", old_c); - - c3_i ret_i; - - if ( c3y == bak_o ) { - ret_i = rename(old_c, new_c); - } - else { - ret_i = rename(new_c, old_c); - } - - if ( 0 != ret_i ) { - u3l_log("loom: %s %s failed: %s\r\n", ( c3y == bak_o ) ? "hold" : "fall", - img_u->nam_c, strerror(errno)); - return c3n; - } - - return c3y; -} - -/* u3e_hold(): backup memory images -*/ -c3_o -u3e_hold(void) -{ - if ( (c3n == _ce_image_move(&u3P.nor_u, c3y)) || - (c3n == _ce_image_move(&u3P.sou_u, c3y)) ) - { - return c3n; - } - - // XX sync directory - - return c3y; -} - -static c3_o -_ce_image_drop(u3e_image* img_u) -{ - c3_c pat_c[8193]; - snprintf(pat_c, 8192, "%s/.urb/chk/%s.bin.bak", u3P.dir_c, img_u->nam_c); - - if ( 0 != unlink(pat_c) ) { - u3l_log("loom: drop %s failed: %s\r\n", img_u->nam_c, strerror(errno)); - return c3n; - } - - return c3y; -} - -/* u3e_drop(): remove backed-up memory images -*/ -c3_o -u3e_drop(void) -{ - if ( (c3n == _ce_image_drop(&u3P.nor_u)) || - (c3n == _ce_image_drop(&u3P.sou_u)) ) - { - return c3n; - } - - return c3y; -} - -/* u3e_fall(): restore memory images -*/ -c3_o -u3e_fall(void) -{ - if ( (c3n == _ce_image_move(&u3P.nor_u, c3n)) || - (c3n == _ce_image_move(&u3P.sou_u, c3n)) ) - { - return c3n; - } - - // XX sync directory - - return c3y; -} - -/* u3e_wipe(): discard memory images -*/ -c3_o -u3e_wipe(void) -{ - // XX ensure no patch files are present - - if ( 0 != ftruncate(u3P.nor_u.fid_i, 0) ) { - u3l_log("loom: wipe %s failed: %s\r\n", u3P.nor_u.nam_c, strerror(errno)); - return c3n; - } - - if ( 0 != ftruncate(u3P.sou_u.fid_i, 0) ) { - u3l_log("loom: wipe %s failed: %s\r\n", u3P.sou_u.nam_c, strerror(errno)); - return c3n; - } - - c3_sync(u3P.nor_u.fid_i); - c3_sync(u3P.sou_u.fid_i); - - close(u3P.nor_u.fid_i); - close(u3P.sou_u.fid_i); - - // XX sync directory - - return c3y; -} - /* u3e_yolo(): disable dirty page tracking, read/write whole loom. */ c3_o diff --git a/pkg/urbit/noun/manage.c b/pkg/urbit/noun/manage.c index c83d006055..2a1c36c7d1 100644 --- a/pkg/urbit/noun/manage.c +++ b/pkg/urbit/noun/manage.c @@ -1729,121 +1729,6 @@ u3m_boot_lite(void) return 0; } -/* u3m_rock_stay(): jam state into [dir_c] at [evt_d] -*/ -c3_o -u3m_rock_stay(c3_c* dir_c, c3_d evt_d) -{ - c3_c nam_c[8193]; - - snprintf(nam_c, 8192, "%s", dir_c); - mkdir(nam_c, 0700); - - snprintf(nam_c, 8192, "%s/.urb", dir_c); - mkdir(nam_c, 0700); - - snprintf(nam_c, 8192, "%s/.urb/roc", dir_c); - mkdir(nam_c, 0700); - - snprintf(nam_c, 8192, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, evt_d); - - { - u3_noun dat = u3nt(c3__fast, u3k(u3A->roc), u3j_stay()); - c3_o ret_o = u3s_jam_file(dat, nam_c); - u3z(dat); - return ret_o; - } -} - -/* u3m_rock_load(): load state from [dir_c] at [evt_d] -*/ -c3_o -u3m_rock_load(c3_c* dir_c, c3_d evt_d) -{ - c3_c nam_c[8193]; - snprintf(nam_c, 8192, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, evt_d); - - { - u3_noun dat; - - { - // XX u3m_file bails, but we'd prefer to return errors - // - u3_noun fil = u3m_file(nam_c); - u3a_print_memory(stderr, "rock: load", u3r_met(5, fil)); - - u3_noun pro = u3m_soft(0, u3ke_cue, fil); - - if ( u3_blip != u3h(pro) ) { - fprintf(stderr, "rock: unable to cue %s\r\n", nam_c); - u3z(pro); - return c3n; - } - else { - dat = u3k(u3t(pro)); - u3z(pro); - } - } - - { - u3_noun roc, rel; - - if ( u3r_pq(dat, c3__fast, &roc, &rel) ) { - u3z(dat); - return c3n; - } - - u3A->roc = u3k(roc); - u3j_load(u3k(rel)); - } - - u3z(dat); - } - - u3A->ent_d = evt_d; - u3j_ream(); - u3n_ream(); - - return c3y; -} - -/* u3m_rock_drop(): delete saved state from [dir_c] at [evt_d] -*/ -c3_o -u3m_rock_drop(c3_c* dir_c, c3_d evt_d) -{ - c3_c nam_c[8193]; - snprintf(nam_c, 8192, "%s/.urb/roc/%" PRIu64 ".jam", dir_c, evt_d); - - if ( 0 != unlink(nam_c) ) { - u3l_log("rock: drop %s failed: %s\r\n", nam_c, strerror(errno)); - return c3n; - } - - return c3y; -} - -/* u3m_wipe(): purge and reinitialize loom, with checkpointing -*/ -void -u3m_wipe(void) -{ - // clear page flags - // - memset((void*)u3P.dit_w, 0, u3a_pages >> 3); - // reinitialize checkpoint system - // - // NB: callers must first u3e_hold() or u3e_wipe() - // - u3e_live(c3n, u3P.dir_c); - // reinitialize loom - // - u3m_pave(c3y, c3n); - // reinitialize jets - // - u3j_boot(c3y); -} - /* u3m_reclaim: clear persistent caches to reclaim memory */ void diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index 7c48cb58a5..7e6ecb2cec 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -191,317 +191,6 @@ u3s_jam_fib(u3_noun a, c3_w* bit_w) return fib_u.buf_w; } -/* _cs_jam_met_mat(): the jam bitwidth of an atom of bitwidth [wid_w] -** -** equivalent to (head (rub a)) -*/ -static c3_d -_cs_jam_met_mat(c3_w wid_w) -{ - return ( 0 == wid_w ) ? 1ULL : - (c3_d)wid_w + (2ULL * (c3_d)_cs_met0_w(wid_w)); -} - -/* _cs_jam_met: struct for tracking the jam bitwidth of a noun -*/ -struct _cs_jam_met { - u3p(u3h_root) har_p; - u3p(u3h_root) bak_p; - c3_d len_d; -}; - -/* _cs_jam_met_atom_cb(): bitwidth of atom or backref encoding for [a] -*/ -static void -_cs_jam_met_atom_cb(u3_atom a, void* ptr_v) -{ - struct _cs_jam_met* met_u = ptr_v; - c3_w a_w = u3r_met(0, a); - u3_weak b = u3h_git(met_u->har_p, a); - - // if we haven't haven't seen [a], put cursor into [har_p] - // - if ( u3_none == b ) { - u3h_put(met_u->har_p, a, u3i_chubs(1, &(met_u->len_d))); - met_u->len_d += 1ULL + _cs_jam_met_mat(a_w); - } - else { - c3_w b_w = u3r_met(0, b); - - // if [a] is smaller than a backref, use directly - // - if ( a_w <= b_w ) { - met_u->len_d += 1ULL + _cs_jam_met_mat(a_w); - } - // otherwise, save backref - // - else { - u3h_put(met_u->bak_p, a, u3k(b)); - met_u->len_d += 2ULL + _cs_jam_met_mat(b_w); - } - } -} - -/* _cs_jam_met_cell_cb(): bitwidth of cell or backref encoding for [a] -*/ -static c3_o -_cs_jam_met_cell_cb(u3_noun a, void* ptr_v) -{ - struct _cs_jam_met* met_u = ptr_v; - u3_weak b = u3h_git(met_u->har_p, a); - - // if we haven't haven't seen [a], put cursor into [har_p] - // - if ( u3_none == b ) { - u3h_put(met_u->har_p, a, u3i_chubs(1, &(met_u->len_d))); - met_u->len_d += 2ULL; - return c3y; - } - // otherwise, save backref and shortcircuit traversal - // - else { - c3_w b_w = u3r_met(0, b); - u3h_put(met_u->bak_p, a, u3k(b)); - met_u->len_d += 2ULL + _cs_jam_met_mat(b_w); - return c3n; - } -} - -/* u3s_jam_met(): measure a noun for jam, calculating backrefs -*/ -c3_d -u3s_jam_met(u3_noun a, u3p(u3h_root)* bak_p) -{ - struct _cs_jam_met met_u; - met_u.har_p = u3h_new(); - met_u.bak_p = u3h_new(); - met_u.len_d = 0ULL; - - u3a_walk_fore(a, &met_u, _cs_jam_met_atom_cb, - _cs_jam_met_cell_cb); - u3h_free(met_u.har_p); - *bak_p = met_u.bak_p; - - return met_u.len_d; -} - -/* _cs_jam_buf: struct for tracking the pre-measured jam of a noun -*/ -struct _cs_jam_buf { - u3p(u3h_root) bak_p; - c3_w bit_w; - c3_w* buf_w; -}; - -/* _cs_jam_buf_chop(): chop [met_w] bits of [a] into [buf_u] -*/ -static void -_cs_jam_buf_chop(struct _cs_jam_buf* buf_u, c3_w met_w, u3_noun a) -{ - u3r_chop(0, 0, met_w, buf_u->bit_w, buf_u->buf_w, a); - buf_u->bit_w += met_w; -} - -/* _cs_jam_buf_mat(): length-prefixed encode (mat) [a] into [buf_u] -*/ -static void -_cs_jam_buf_mat(struct _cs_jam_buf* buf_u, u3_atom a) -{ - if ( 0 == a ) { - _cs_jam_buf_chop(buf_u, 1, 1); - } - else { - c3_w a_w = u3r_met(0, a); - c3_w b_w = _cs_met0_w(a_w); - - _cs_jam_buf_chop(buf_u, b_w+1, 1 << b_w); - _cs_jam_buf_chop(buf_u, b_w-1, a_w & ((1 << (b_w-1)) - 1)); - _cs_jam_buf_chop(buf_u, a_w, a); - } -} - -/* _cs_jam_buf_atom_cb(): encode atom or backref -*/ -static void -_cs_jam_buf_atom_cb(u3_atom a, void* ptr_v) -{ - struct _cs_jam_buf* buf_u = ptr_v; - u3_weak b = u3h_git(buf_u->bak_p, a); - - // if [a] has no backref (or this is the referent), encode atom - // - if ( (u3_none == b) || - (u3r_word(0, b) == buf_u->bit_w) ) - { - _cs_jam_buf_chop(buf_u, 1, 0); - _cs_jam_buf_mat(buf_u, a); - } - else { - c3_w a_w = u3r_met(0, a); - c3_w b_w = u3r_met(0, b); - - // if [a] is smaller than the backref, encode atom - // - if ( a_w <= b_w ) { - _cs_jam_buf_chop(buf_u, 1, 0); - _cs_jam_buf_mat(buf_u, a); - } - // otherwise, encode backref - // - else { - _cs_jam_buf_chop(buf_u, 2, 3); - _cs_jam_buf_mat(buf_u, b); - } - } -} - -/* _cs_jam_buf_cell_cb(): encode cell or backref -*/ -static c3_o -_cs_jam_buf_cell_cb(u3_noun a, void* ptr_v) -{ - struct _cs_jam_buf* buf_u = ptr_v; - u3_weak b = u3h_git(buf_u->bak_p, a); - - // if [a] has no backref (or this is the referent), encode cell - // - if ( (u3_none == b) || - (u3r_word(0, b) == buf_u->bit_w) ) - { - _cs_jam_buf_chop(buf_u, 2, 1); - return c3y; - } - // otherwise, encode backref and shortcircuit traversal - // - else { - _cs_jam_buf_chop(buf_u, 2, 3); - _cs_jam_buf_mat(buf_u, b); - return c3n; - } -} - -/* u3s_jam_buf(): jam [a] into pre-allocated [buf_w], without allocation -** -** using backrefs in [bak_p], as computed by u3s_jam_met() -** NB [buf_w] must be pre-allocated with sufficient space -** -** XX can only encode up to c3_w bits, due to use of chop -*/ -void -u3s_jam_buf(u3_noun a, u3p(u3h_root) bak_p, c3_w* buf_w) -{ - struct _cs_jam_buf buf_u; - buf_u.bak_p = bak_p; - buf_u.buf_w = buf_w; - buf_u.bit_w = 0; - - // this is in fact safe under normal usage, as - // the stack will have been checked in u3s_jam_met() - // - u3a_walk_fore_unsafe(a, &buf_u, _cs_jam_buf_atom_cb, - _cs_jam_buf_cell_cb); -} - -/* u3s_jam_file(): jam [a] into a file, overwriting -*/ -c3_o -u3s_jam_file(u3_noun a, c3_c* pas_c) -{ - u3p(u3h_root) bak_p; - c3_i fid_i = open(pas_c, O_RDWR | O_CREAT | O_TRUNC, 0644); - c3_w byt_w, wor_w, len_w; - - if ( fid_i < 0 ) { - fprintf(stderr, "jam: open %s: %s\r\n", pas_c, strerror(errno)); - return c3n; - } - - { - c3_d len_d = u3s_jam_met(a, &bak_p); - - if ( len_d > 0xffffffffULL ) { - fprintf(stderr, "jam: overflow c3_w: %" PRIu64 "\r\n", len_d); - u3h_free(bak_p); - return c3n; - } - - // length in bytes a la u3i_bytes - // - byt_w = (c3_w)(len_d >> 3ULL); - if ( len_d > (c3_d)(byt_w << 3) ) { - byt_w++; - } - - // length in words - // - wor_w = (c3_w)(len_d >> 5ULL); - if ( len_d > (c3_d)(wor_w << 5) ) { - wor_w++; - } - - // byte-length of word-length - // - len_w = 4 * wor_w; - } - - // grow [fid_i] to [len_w] - // - if ( 0 != ftruncate(fid_i, len_w) ) { - fprintf(stderr, "jam: ftruncate grow %s: %s\r\n", pas_c, strerror(errno)); - goto error; - } - - // mmap [fid_i], jam into it, sync, and unmap - // - { - c3_w* buf_w; - void* ptr_v = mmap(0, len_w, PROT_READ|PROT_WRITE, MAP_SHARED, fid_i, 0); - - if ( MAP_FAILED == ptr_v ) { - fprintf(stderr, "jam: mmap %s: %s\r\n", pas_c, strerror(errno)); - goto error; - } - - buf_w = ptr_v; - u3s_jam_buf(a, bak_p, buf_w); - - if ( 0 != msync(ptr_v, len_w, MS_SYNC) ) { - fprintf(stderr, "jam: msync %s: %s\r\n", pas_c, strerror(errno)); - // XX ignore return? - // - munmap(ptr_v, len_w); - goto error; - } - - if ( 0 != munmap(ptr_v, len_w) ) { - fprintf(stderr, "jam: munmap %s: %s\r\n", pas_c, strerror(errno)); - // XX fatal error? - // - goto error; - } - } - - // shrink [fid_i] to [byt_w] - // - if ( 0 != ftruncate(fid_i, byt_w) ) { - fprintf(stderr, "jam: ftruncate shrink %s: %s\r\n", pas_c, strerror(errno)); - goto error; - } - - { - close(fid_i); - u3h_free(bak_p); - return c3y; - } - - error: { - close(fid_i); - unlink(pas_c); - u3h_free(bak_p); - return c3n; - } -} - typedef struct _jam_xeno_s { u3p(u3h_root) har_p; ur_bsw_t rit_u; From 1839384d52a65f52bdb59700081786f42f4f16ff Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 1 Sep 2020 14:17:26 -0700 Subject: [PATCH 087/123] u3: refactors ivory-pill boot --- pkg/urbit/noun/vortex.c | 11 ++++----- pkg/urbit/vere/king.c | 52 +++++++++++++++++++++++++++++------------ 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/pkg/urbit/noun/vortex.c b/pkg/urbit/noun/vortex.c index 75351185db..8288baf03d 100644 --- a/pkg/urbit/noun/vortex.c +++ b/pkg/urbit/noun/vortex.c @@ -50,30 +50,29 @@ u3v_boot(u3_noun eve) static u3_noun _cv_lite(u3_noun pil) { - u3_noun arv = u3ke_cue(pil); u3_noun eve, pro; - u3x_trel(arv, &eve, 0, 0); + u3x_trel(pil, &eve, 0, 0); - u3l_log("lite: arvo formula %x\r\n", u3r_mug(arv)); + u3l_log("lite: arvo formula %x\r\n", u3r_mug(pil)); pro = u3v_life(u3k(eve)); u3l_log("lite: core %x\r\n", u3r_mug(pro)); - u3z(arv); + u3z(pil); return pro; } /* u3v_boot_lite(): light bootstrap sequence, just making a kernel. */ c3_o -u3v_boot_lite(u3_atom lit) +u3v_boot_lite(u3_noun pil) { // ensure zero-initialized kernel // u3A->roc = 0; { - u3_noun pro = u3m_soft(0, _cv_lite, lit); + u3_noun pro = u3m_soft(0, _cv_lite, pil); if ( u3_blip != u3h(pro) ) { u3z(pro); diff --git a/pkg/urbit/vere/king.c b/pkg/urbit/vere/king.c index c0df14592a..60f8a2c76f 100644 --- a/pkg/urbit/vere/king.c +++ b/pkg/urbit/vere/king.c @@ -692,6 +692,42 @@ _king_loop_exit() unlink(u3K.certs_c); } +static void +_king_boot_ivory(void) +{ + c3_d len_d; + c3_y* byt_y; + + if ( u3_Host.ops_u.lit_c ) { + if ( c3n == u3u_mmap_read("lite", u3_Host.ops_u.lit_c, &len_d, &byt_y) ) { + u3l_log("lite: unable to load ivory pill at %s\n", + u3_Host.ops_u.lit_c); + exit(1); + } + } + else { + len_d = u3_Ivory_pill_len; + byt_y = u3_Ivory_pill; + } + + { + u3_noun pil = u3ke_cue(u3i_bytes(len_d, byt_y)); + + if ( c3n == u3v_boot_lite(pil)) { + u3l_log("lite: boot failed\r\n"); + exit(1); + } + } + + if ( u3_Host.ops_u.lit_c ) { + if ( c3n == u3u_munmap(len_d, byt_y) ) { + u3l_log("lite: unable to unmap ivory pill at %s\n", + u3_Host.ops_u.lit_c); + exit(1); + } + } +} + /* u3_king_commence(): start the daemon */ void @@ -726,21 +762,7 @@ u3_king_commence() // boot the ivory pill // - { - u3_noun lit; - - if ( 0 != u3_Host.ops_u.lit_c ) { - lit = u3m_file(u3_Host.ops_u.lit_c); - } - else { - lit = u3i_bytes(u3_Ivory_pill_len, u3_Ivory_pill); - } - - if ( c3n == u3v_boot_lite(lit)) { - u3l_log("lite: boot failed\r\n"); - exit(1); - } - } + _king_boot_ivory(); // disable core dumps (due to lmdb size) // From d676d034cc0ee31512eb5ff6460f44930d72ab97 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 1 Sep 2020 14:38:49 -0700 Subject: [PATCH 088/123] vere: refactors ipc-framing api (newt.c) --- pkg/urbit/include/vere/vere.h | 7 +-- pkg/urbit/vere/lord.c | 106 ++++++++++++++++++---------------- pkg/urbit/vere/newt.c | 43 +++++++------- pkg/urbit/worker/main.c | 15 +++-- 4 files changed, 92 insertions(+), 79 deletions(-) diff --git a/pkg/urbit/include/vere/vere.h b/pkg/urbit/include/vere/vere.h index de8379d3ba..b47dd50d09 100644 --- a/pkg/urbit/include/vere/vere.h +++ b/pkg/urbit/include/vere/vere.h @@ -40,7 +40,7 @@ /* u3_moor_poke: poke callback function. */ - typedef void (*u3_moor_poke)(void*, u3_atom); + typedef void (*u3_moor_poke)(void*, c3_d, c3_y*); /* u3_moor_bail: bailout callback function. */ @@ -407,7 +407,6 @@ */ typedef struct _u3_writ { struct timeval tim_u; // time enqueued - u3_atom mat; // serialized struct _u3_writ* nex_u; // next in queue u3_writ_type typ_e; // type-tagged union { // @@ -1169,10 +1168,10 @@ void u3_newt_decode(u3_moat* mot_u, c3_y* buf_y, c3_d len_d); - /* u3_newt_write(): write atom to stream; free atom. + /* u3_newt_send(): write buffer to stream. */ void - u3_newt_write(u3_mojo* moj_u, u3_atom mat); + u3_newt_send(u3_mojo* moj_u, c3_d len_d, c3_y* byt_y); /* u3_newt_read_sync(): start reading; multiple msgs synchronous. */ diff --git a/pkg/urbit/vere/lord.c b/pkg/urbit/vere/lord.c index fef5584c5d..2fc2762ba1 100644 --- a/pkg/urbit/vere/lord.c +++ b/pkg/urbit/vere/lord.c @@ -652,14 +652,12 @@ _lord_plea_work(u3_lord* god_u, u3_noun dat) /* _lord_on_plea(): handle plea from serf. */ static void -_lord_on_plea(void* ptr_v, u3_noun mat) +_lord_on_plea(void* ptr_v, c3_d len_d, c3_y* byt_y) { u3_lord* god_u = ptr_v; - u3_noun jar = u3ke_cue(mat); - u3_noun tag, dat; + u3_noun tag, dat, jar = u3ke_cue(u3i_bytes(len_d, byt_y)); if ( c3n == u3r_cell(jar, &tag, &dat) ) { - u3m_p("jar", jar); return _lord_plea_foul(god_u, 0, jar); } @@ -706,64 +704,62 @@ _lord_writ_new(u3_lord* god_u) return wit_u; } -/* _lord_writ_jam(): serialize writ. +/* _lord_writ_make(): cons writ. */ -static void -_lord_writ_jam(u3_lord* god_u, u3_writ* wit_u) +static u3_noun +_lord_writ_make(u3_lord* god_u, u3_writ* wit_u) { - if ( 0 == wit_u->mat ) { - u3_noun msg; + u3_noun msg; - switch ( wit_u->typ_e ) { - default: c3_assert(0); + switch ( wit_u->typ_e ) { + default: c3_assert(0); - case u3_writ_work: { - u3_noun mil = u3i_words(1, &wit_u->wok_u.egg_u->mil_w); - msg = u3nt(c3__work, mil, u3k(wit_u->wok_u.job)); - } break; + case u3_writ_work: { + u3_noun mil = u3i_words(1, &wit_u->wok_u.egg_u->mil_w); + msg = u3nt(c3__work, mil, u3k(wit_u->wok_u.job)); + } break; - case u3_writ_peek: { - msg = u3nc(c3__peek, u3nq(0, // XX support timeouts - u3k(wit_u->pek_u->now), - u3k(wit_u->pek_u->gan), - u3k(wit_u->pek_u->ful))); - } break; + case u3_writ_peek: { + msg = u3nc(c3__peek, u3nq(0, // XX support timeouts + u3k(wit_u->pek_u->now), + u3k(wit_u->pek_u->gan), + u3k(wit_u->pek_u->ful))); + } break; - case u3_writ_play: { - u3_fact* tac_u = wit_u->fon_u.ext_u; - c3_d eve_d = tac_u->eve_d; - u3_noun lit = u3_nul; + case u3_writ_play: { + u3_fact* tac_u = wit_u->fon_u.ext_u; + c3_d eve_d = tac_u->eve_d; + u3_noun lit = u3_nul; - while ( tac_u ) { - lit = u3nc(u3k(tac_u->job), lit); - tac_u = tac_u->nex_u; - } + while ( tac_u ) { + lit = u3nc(u3k(tac_u->job), lit); + tac_u = tac_u->nex_u; + } - msg = u3nt(c3__play, u3i_chubs(1, &eve_d), u3kb_flop(lit)); + msg = u3nt(c3__play, u3i_chubs(1, &eve_d), u3kb_flop(lit)); - } break; + } break; - case u3_writ_save: { - msg = u3nt(c3__live, c3__save, u3i_chubs(1, &god_u->eve_d)); - } break; + case u3_writ_save: { + msg = u3nt(c3__live, c3__save, u3i_chubs(1, &god_u->eve_d)); + } break; - case u3_writ_cram: { - msg = u3nt(c3__live, c3__cram, u3i_chubs(1, &god_u->eve_d)); - } break; + case u3_writ_cram: { + msg = u3nt(c3__live, c3__cram, u3i_chubs(1, &god_u->eve_d)); + } break; - case u3_writ_pack: { - msg = u3nt(c3__live, c3__pack, u3_nul); - } break; + case u3_writ_pack: { + msg = u3nt(c3__live, c3__pack, u3_nul); + } break; - case u3_writ_exit: { - // requested exit code is always 0 - // - msg = u3nt(c3__live, c3__exit, 0); - } break; - } - - wit_u->mat = u3ke_jam(msg); + case u3_writ_exit: { + // requested exit code is always 0 + // + msg = u3nt(c3__live, c3__exit, 0); + } break; } + + return msg; } /* _lord_writ_send(): send writ to serf. @@ -778,9 +774,17 @@ _lord_writ_send(u3_lord* god_u, u3_writ* wit_u) god_u->inn_u.bal_f = _lord_bail_noop; } - _lord_writ_jam(god_u, wit_u); - u3_newt_write(&god_u->inn_u, wit_u->mat); - wit_u->mat = 0; + { + u3_noun jar = _lord_writ_make(god_u, wit_u); + u3_noun mat = u3ke_jam(jar); + c3_w len_w = u3r_met(3, mat); + c3_y* byt_y = c3_malloc(len_w); + + u3r_bytes(0, len_w, byt_y, mat); + u3_newt_send(&god_u->inn_u, len_w, byt_y); + + u3z(mat); + } } /* _lord_writ_plan(): enqueue a writ and send. diff --git a/pkg/urbit/vere/newt.c b/pkg/urbit/vere/newt.c index e7cfe2be50..3b349a46cd 100644 --- a/pkg/urbit/vere/newt.c +++ b/pkg/urbit/vere/newt.c @@ -70,8 +70,7 @@ _newt_meat_plan(u3_moat* mot_u, u3_meat* met_u) static void _newt_meat_poke(u3_moat* mot_u, u3_meat* met_u) { - u3_noun mat = u3i_bytes((c3_w)met_u->len_d, met_u->hun_y); - mot_u->pok_f(mot_u->ptr_v, mat); + mot_u->pok_f(mot_u->ptr_v, met_u->len_d, met_u->hun_y); c3_free(met_u); } @@ -388,7 +387,8 @@ u3_newt_moat_info(u3_moat* mot_u) typedef struct _n_req { uv_write_t wri_u; u3_mojo* moj_u; - c3_y buf_y[0]; + c3_y* buf_y; + c3_y len_y[8]; } n_req; /* _newt_write_cb(): generic write callback. @@ -399,6 +399,7 @@ _newt_write_cb(uv_write_t* wri_u, c3_i sas_i) n_req* req_u = (n_req*)wri_u; u3_mojo* moj_u = req_u->moj_u; + c3_free(req_u->buf_y); c3_free(req_u); if ( 0 != sas_i ) { @@ -435,35 +436,37 @@ u3_newt_mojo_stop(u3_mojo* moj_u, u3_moor_bail bal_f) uv_close((uv_handle_t*)&moj_u->pyp_u, _mojo_stop_cb); } -/* u3_newt_write(): write atom to stream; free atom. +/* u3_newt_send(): write buffer to stream. */ void -u3_newt_write(u3_mojo* moj_u, u3_atom mat) +u3_newt_send(u3_mojo* moj_u, c3_d len_d, c3_y* byt_y) { - c3_w len_w = u3r_met(3, mat); - n_req* req_u = c3_malloc(8 + len_w + sizeof(*req_u)); + n_req* req_u = c3_malloc(sizeof(*req_u)); req_u->moj_u = moj_u; + req_u->buf_y = byt_y; - // write header; c3_d is futureproofing + // write header // - req_u->buf_y[0] = ((len_w >> 0) & 0xff); - req_u->buf_y[1] = ((len_w >> 8) & 0xff); - req_u->buf_y[2] = ((len_w >> 16) & 0xff); - req_u->buf_y[3] = ((len_w >> 24) & 0xff); - req_u->buf_y[4] = req_u->buf_y[5] = req_u->buf_y[6] = req_u->buf_y[7] = 0; - - // write payload - // - u3r_bytes(0, len_w, req_u->buf_y + 8, mat); - u3z(mat); + req_u->len_y[0] = ( len_d & 0xff); + req_u->len_y[1] = ((len_d >> 8) & 0xff); + req_u->len_y[2] = ((len_d >> 16) & 0xff); + req_u->len_y[3] = ((len_d >> 24) & 0xff); + req_u->len_y[4] = ((len_d >> 32) & 0xff); + req_u->len_y[5] = ((len_d >> 40) & 0xff); + req_u->len_y[6] = ((len_d >> 48) & 0xff); + req_u->len_y[7] = ((len_d >> 56) & 0xff); { - uv_buf_t buf_u = uv_buf_init((c3_c*)req_u->buf_y, 8 + len_w); + uv_buf_t buf_u[2] = { + uv_buf_init((c3_c*)req_u->len_y, 8), + uv_buf_init((c3_c*)req_u->buf_y, len_d) + }; + c3_i sas_i; if ( 0 != (sas_i = uv_write(&req_u->wri_u, (uv_stream_t*)&moj_u->pyp_u, - &buf_u, 1, + buf_u, 2, _newt_write_cb)) ) { c3_free(req_u); diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index d7060d6b44..84baa65e47 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -42,7 +42,14 @@ _cw_serf_fail(void* vod_p, const c3_c* wut_c) static void _cw_serf_send(u3_noun pel) { - u3_newt_write(&out_u, u3ke_jam(pel)); + u3_noun mat = u3ke_jam(pel); + c3_d len_w = u3r_met(3, mat); + c3_y* byt_y = c3_malloc(len_w); + + u3r_bytes(0, len_w, byt_y, mat); + u3_newt_send(&out_u, len_w, byt_y); + + u3z(mat); } /* _cw_serf_send_slog(): send hint output (hod is [priority tank]). @@ -64,11 +71,11 @@ _cw_serf_send_stdr(c3_c* str_c) /* _cw_serf_writ(): */ static void -_cw_serf_writ(void* vod_p, u3_noun mat) +_cw_serf_writ(void* vod_p, c3_d len_d, c3_y* byt_y) { - u3_noun ret; + u3_noun ret, jar = u3ke_cue(u3i_bytes(len_d, byt_y)); - if ( c3n == u3_serf_writ(&u3V, u3ke_cue(mat), &ret) ) { + if ( c3n == u3_serf_writ(&u3V, jar, &ret) ) { _cw_serf_fail(0, "bad jar"); } else { From f6bd14c7c6248a348c7835a20321fa4f997611a0 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 1 Sep 2020 14:46:33 -0700 Subject: [PATCH 089/123] ur: cleans up and comments defs.h --- pkg/urbit/include/ur/defs.h | 41 ++++++++++++++++++++++++------------- pkg/urbit/ur/hashcons.c | 6 +++--- 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/pkg/urbit/include/ur/defs.h b/pkg/urbit/include/ur/defs.h index 605193b1d3..d3b1c6ebfa 100644 --- a/pkg/urbit/include/ur/defs.h +++ b/pkg/urbit/include/ur/defs.h @@ -5,17 +5,34 @@ #include #include -#define ur_fib10 55 -#define ur_fib11 89 -#define ur_fib12 144 -#define ur_fib33 3524578 -#define ur_fib34 5702887 - typedef uint8_t ur_bool_t; #define ur_min(a, b) ( ((a) < (b)) ? (a) : (b) ) #define ur_max(a, b) ( ((a) > (b)) ? (a) : (b) ) +/* +** fibonacci constants, for convenient initialization of +** objects intended to be reallocated with fibonacci growth +*/ +#define ur_fib10 55 +#define ur_fib11 89 +#define ur_fib12 144 +#define ur_fib27 196418 +#define ur_fib28 317811 +#define ur_fib33 3524578 +#define ur_fib34 5702887 + +/* +** bit-masking helpers +*/ +#define ur_mask_3(a) (a & 0x7) +#define ur_mask_8(a) (a & 0xff) +#define ur_mask_31(a) (a & 0x7fffffff) +#define ur_mask_62(a) (a & 0x3fffffffffffffffULL) + +/* +** atom measurement +*/ #if (32 == (CHAR_BIT * __SIZEOF_INT__)) # define ur_lz32 __builtin_clz # define ur_tz32 __builtin_ctz @@ -39,20 +56,16 @@ typedef uint8_t ur_bool_t; #define ur_lz8(a) ( ur_lz32(a) - 24 ) #define ur_tz8 ur_tz32 -#define ur_mask_3(a) (a & 0x7) -#define ur_mask_8(a) (a & 0xff) -#define ur_mask_31(a) (a & 0x7fffffff) -#define ur_mask_62(a) (a & 0x3fffffffffffffffULL) - #define ur_met0_8(a) ( (a) ? 8 - ur_lz8(a) : 0 ) #define ur_met0_32(a) ( (a) ? 32 - ur_lz32(a) : 0 ) #define ur_met0_64(a) ( (a) ? 64 - ur_lz64(a) : 0 ) +/* +** unsafe wrt trailing null bytes, which are invalid +*/ inline uint64_t -ur_met0_bytes(uint8_t *byt, uint64_t len) +ur_met0_bytes_unsafe(uint8_t *byt, uint64_t len) { - // XX requires no trailing null bytes - // uint64_t last = len - 1; return (last << 3) + ur_met0_8(byt[last]); } diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index a4aca7d8b5..514fed6675 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -15,7 +15,7 @@ // declarations of inline functions // uint64_t -ur_met0_bytes(uint8_t *byt, uint64_t len); +ur_met0_bytes_unsafe(uint8_t *byt, uint64_t len); ur_mug ur_mug_bytes(const uint8_t *byt, uint64_t len) @@ -534,7 +534,7 @@ ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref) uint64_t len = r->atoms.lens[idx]; uint8_t *byt = r->atoms.bytes[idx]; - m_bit = ur_met0_bytes(byt, len); + m_bit = ur_met0_bytes_unsafe(byt, len); } switch ( bloq ) { @@ -652,7 +652,7 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) // produce a direct atom if possible // - if ( 62 >= ur_met0_bytes(byt, len) ) { + if ( 62 >= ur_met0_bytes_unsafe(byt, len) ) { uint64_t i, direct = 0; for ( i = 0; i < len; i++ ) { From f49963ffcfbb7370aea56d52f42a34dfbe58615e Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 2 Sep 2020 16:42:05 -0700 Subject: [PATCH 090/123] ur: refactors and adds comments to bitstream.h --- pkg/urbit/include/ur/bitstream.h | 133 +++++++++++++++++++++++++++---- pkg/urbit/tests/ur_tests.c | 38 ++++----- pkg/urbit/ur/bitstream.c | 23 ++++-- 3 files changed, 153 insertions(+), 41 deletions(-) diff --git a/pkg/urbit/include/ur/bitstream.h b/pkg/urbit/include/ur/bitstream.h index 5a6d5239cd..66781f569e 100644 --- a/pkg/urbit/include/ur/bitstream.h +++ b/pkg/urbit/include/ur/bitstream.h @@ -3,25 +3,13 @@ #include -typedef enum { - ur_cue_good = 0, - ur_cue_back = 1, - ur_cue_gone = 2, - ur_cue_meme = 3 -} ur_cue_res_e; - -typedef enum { - ur_jam_atom = 0, - ur_jam_cell = 1, - ur_jam_back = 2 -} ur_cue_tag_e; - /* ** stateful bitstream reader, backed by a byte-buffer, -** supporting a variety of read sizes/patterns. +** maintaing a 64-bit bit-cursor, and supporting a variety +** of read sizes and patterns. ** ** NB: ur_bsr*_any() functions behave as if the stream were infinite, -** subject to overall limit of a 64-bit bit-cursor. +** subject to the overall limit of the bit-cursor. ** */ typedef struct ur_bsr_s { @@ -31,6 +19,31 @@ typedef struct ur_bsr_s { const uint8_t *bytes; } ur_bsr_t; +/* +** generalized bitstream-reader/cue response enum +*/ +typedef enum { + ur_cue_good = 0, // successful read + ur_cue_back = 1, // missing backreference + ur_cue_gone = 2, // read off the end of the stream + ur_cue_meme = 3 // exceeded memory representation +} ur_cue_res_e; + +/* +** jam/cue type tag enumeration +*/ +typedef enum { + ur_jam_atom = 0, + ur_jam_cell = 1, + ur_jam_back = 2 +} ur_cue_tag_e; + +/* +** stateful bitstream writer, backed by a byte-buffer automatically +** reallocated with fibonacc growth, maintaing a 64-bit bit-cursor, +** and supporting a variety of write sizes and patterns. +** +*/ typedef struct ur_bsw_s { uint64_t prev; uint64_t size; @@ -41,83 +54,169 @@ typedef struct ur_bsw_s { } ur_bsw_t; /* -** initialize bitstream and check for 64-bit bit-cursor overflow. +** initialize bitstream-reader and check for 64-bit bit-cursor overflow. */ ur_cue_res_e ur_bsr_init(ur_bsr_t *bsr, uint64_t len, const uint8_t *bytes); +/* +** validate bitstream-reader invariants. +*/ ur_bool_t ur_bsr_sane(ur_bsr_t *bsr); +/* +** read a bit, failing at EOS +*/ ur_cue_res_e ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out); +/* +** read a bit +*/ uint8_t ur_bsr_bit_any(ur_bsr_t *bsr); +/* +** read N (up to 8) bits into a uint8. +*/ uint8_t ur_bsr8_any(ur_bsr_t *bsr, uint8_t len); +/* +** read N (up to 32) bits into a uint32. +*/ uint32_t ur_bsr32_any(ur_bsr_t *bsr, uint8_t len); +/* +** read N (up to 64) bits into a uint64. +*/ uint64_t ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); +/* +** read N bits into a zero-initialized byte array. +*/ void ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); +/* +** advance the bitstream cursor as if we had read N bits. +*/ void ur_bsr_skip_any(ur_bsr_t *bsr, uint64_t len); +/* +** read a jam/cue type tag. +*/ ur_cue_res_e ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out); +/* +** read a binary exponent, producing the binary log. +** +** read N (up to 255) zero bits followed by a 1, produce N. +*/ ur_cue_res_e -ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out); +ur_bsr_log(ur_bsr_t *bsr, uint8_t *out); +/* +** read an atomic run-length (a la +rub). +** +** read a binary log N, then read N (up to 64) bits, +** produce (N-bits ^ (1 << N)) +*/ ur_cue_res_e ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out); +/* +** reallocate bitstream write buffer with max(fibonacci, step) growth. +*/ void ur_bsw_grow(ur_bsw_t *bsw, uint64_t step); +/* +** validate bitstream-writer invariants. +*/ ur_bool_t ur_bsw_sane(ur_bsw_t *bsw); +/* +** write a bit +*/ void ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit); +/* +** write N (up to 8) bits of a uint8. +*/ void ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt); +/* +** write N (up to 32) bits of a uint32. +*/ void ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val); +/* +** write N (up to 64) bits of a uint64. +*/ void ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val); +/* +** write N bits of a byte array. +** +** NB: [byt] must contain at least N bits. +*/ void ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); +/* +** write a binary exponent (N zero bits, followed by a 1). +*/ void ur_bsw_bex(ur_bsw_t *bsw, uint8_t n); +/* +** write N (up to 64) run-length prefixed bits (a la +mat). +*/ void ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val); +/* +** write N run-length prefixed bits (a la +mat). +** +** NB: [byt] must contain at least N bits. +*/ void ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); +/* +** write a backref tag (1, 1) and N (up to 64) run-length prefixed bits. +*/ void ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val); +/* +** write an atom tag (0) and N (up to 64) run-length prefixed bits. +*/ void ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val); +/* +** write an atom tag (0) and N run-length prefixed bits. +** +** NB: [byt] must contain at least N bits. +*/ void ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); +/* +** write a cell tag (1, 0) +*/ void ur_bsw_cell(ur_bsw_t *bsw); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 2470ef5a7e..f65a1fd4a2 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1321,7 +1321,7 @@ _bsr_cmp_check(const char* cap, } static ur_cue_res_e -_bsr_rub_log_slow(ur_bsr_t *bsr, uint8_t *out) +_bsr_log_slow(ur_bsr_t *bsr, uint8_t *out) { ur_cue_res_e res; uint8_t bit, i = 0; @@ -1341,7 +1341,7 @@ _bsr_rub_log_slow(ur_bsr_t *bsr, uint8_t *out) } static int -_test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val) +_test_bsr_log_loop(const char *cap, uint8_t len, uint8_t val) { int ret = 1; ur_bsr_t a, b; @@ -1360,8 +1360,8 @@ _test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val) memset(bytes, 0x0, j); memset(bytes + j, val, len - j); - e = _bsr_rub_log_slow(&a, &c); - f = ur_bsr_rub_log(&b, &d); + e = _bsr_log_slow(&a, &c); + f = ur_bsr_log(&b, &d); ret &= _bsr_cmp_check(cap, i, j, &a, &b, c, d, e, f); } @@ -1373,29 +1373,29 @@ _test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val) } static int -_test_bsr_rub_log(void) +_test_bsr_log(void) { - int ret = _test_bsr_rub_log_loop("bsr rub_log nought", 0, 0x0) - & _test_bsr_rub_log_loop("bsr rub_log ones odd", 3, 0xff) - & _test_bsr_rub_log_loop("bsr rub_log ones even", 4, 0xff) - & _test_bsr_rub_log_loop("bsr rub_log ones big", 50, 0xff) - & _test_bsr_rub_log_loop("bsr rub_log zeros odd", 5, 0x0) - & _test_bsr_rub_log_loop("bsr rub_log zeros even", 6, 0x0) - & _test_bsr_rub_log_loop("bsr rub_log zeros big", 50, 0x0); + int ret = _test_bsr_log_loop("bsr log nought", 0, 0x0) + & _test_bsr_log_loop("bsr log ones odd", 3, 0xff) + & _test_bsr_log_loop("bsr log ones even", 4, 0xff) + & _test_bsr_log_loop("bsr log ones big", 50, 0xff) + & _test_bsr_log_loop("bsr log zeros odd", 5, 0x0) + & _test_bsr_log_loop("bsr log zeros even", 6, 0x0) + & _test_bsr_log_loop("bsr log zeros big", 50, 0x0); { uint8_t i, j = 5; char cap[1024]; for ( i = 0; i < 8; i++ ) { - snprintf(cap, 1000, "bsr rub_log 1<<%u odd", i); - ret &= _test_bsr_rub_log_loop((const char*)cap, j++, 0x1 << i); + snprintf(cap, 1000, "bsr log 1<<%u odd", i); + ret &= _test_bsr_log_loop((const char*)cap, j++, 0x1 << i); - snprintf(cap, 1000, "bsr rub_log 1<<%u even", i); - ret &= _test_bsr_rub_log_loop((const char*)cap, j++, 0x1 << i); + snprintf(cap, 1000, "bsr log 1<<%u even", i); + ret &= _test_bsr_log_loop((const char*)cap, j++, 0x1 << i); - snprintf(cap, 1000, "bsr rub_log 1<<%u big", i); - ret &= _test_bsr_rub_log_loop((const char*)cap, 50, 0x1 << i); + snprintf(cap, 1000, "bsr log 1<<%u big", i); + ret &= _test_bsr_log_loop((const char*)cap, 50, 0x1 << i); } } @@ -1480,7 +1480,7 @@ _test_bsr(void) & _test_bsr8() & _test_bsr32() & _test_bsr64() - & _test_bsr_rub_log() + & _test_bsr_log() & _test_bsr_tag(); } diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index b91a107eef..d72548257d 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -596,7 +596,7 @@ ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) } static inline ur_cue_res_e -_bsr_rub_log_meme(ur_bsr_t *bsr) +_bsr_log_meme(ur_bsr_t *bsr) { bsr->bits += 256; bsr->bytes += 32; @@ -605,7 +605,7 @@ _bsr_rub_log_meme(ur_bsr_t *bsr) } ur_cue_res_e -ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) +ur_bsr_log(ur_bsr_t *bsr, uint8_t *out) { uint64_t left = bsr->left; @@ -621,7 +621,7 @@ ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) while ( !byt ) { if ( 32 == skip ) { - return _bsr_rub_log_meme(bsr); + return _bsr_log_meme(bsr); } skip++; @@ -637,7 +637,7 @@ ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) uint32_t zeros = ur_tz8(byt) + (skip ? ((skip << 3) - off) : 0); if ( 255 < zeros ) { - return _bsr_rub_log_meme(bsr); + return _bsr_log_meme(bsr); } else { uint32_t bits = off + 1 + zeros; @@ -663,7 +663,7 @@ ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out) ur_cue_res_e res; uint8_t len; - if ( ur_cue_good != (res = ur_bsr_rub_log(bsr, &len)) ) { + if ( ur_cue_good != (res = ur_bsr_log(bsr, &len)) ) { return res; } else if ( 64 <= len ) { @@ -688,6 +688,19 @@ ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out) return ur_cue_good; } +/* +** bitstream-writer operations follow a pattern of an unsafe (inline) +** implementation, unsafe wrt to buffer size and reallocation, +** wrapped in a public function with buffer size checks. +** +** higher-level operations made up of multiple discrete writes check +** the buffer size once for all involved writes. +** +** this pattern should be easily adaptable to an alternate bitstream-writer +** implementation that flushes accumulated output periodically instead +** of reallocating the output buffer. +*/ + void ur_bsw_grow(ur_bsw_t *bsw, uint64_t step) { From f45dfaed367348ea907fbda6db0157430ddae2d6 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 2 Sep 2020 17:24:22 -0700 Subject: [PATCH 091/123] ur: refactors and adds comments to hashcons.h --- pkg/urbit/bench/ur_bench.c | 12 +- pkg/urbit/include/ur/hashcons.h | 119 +++++++++++++++---- pkg/urbit/noun/urth.c | 8 +- pkg/urbit/tests/ur_tests.c | 4 +- pkg/urbit/ur/hashcons.c | 199 ++++++++++++++++---------------- 5 files changed, 208 insertions(+), 134 deletions(-) diff --git a/pkg/urbit/bench/ur_bench.c b/pkg/urbit/bench/ur_bench.c index af99346ffc..8a29de65a2 100644 --- a/pkg/urbit/bench/ur_bench.c +++ b/pkg/urbit/bench/ur_bench.c @@ -86,7 +86,7 @@ _jam_bench(void) } while ( 1 ) { - ur_root_t* rot_u = ur_hcon_init(); + ur_root_t* rot_u = ur_root_init(); c3_d len_d; c3_y* byt_y; ur_nref ref; @@ -138,7 +138,7 @@ _jam_bench(void) fprintf(stderr, " jam cons unsafe: %u ms\r\n", mil_w); } - ur_hcon_free(rot_u); + ur_root_free(rot_u); break; } @@ -291,7 +291,7 @@ _cue_bench(void) gettimeofday(&b4, 0); { - ur_root_t* rot_u = ur_hcon_init(); + ur_root_t* rot_u = ur_root_init(); ur_nref ref; c3_w len_w = u3r_met(3, vat); // XX assumes little-endian @@ -304,7 +304,7 @@ _cue_bench(void) ur_cue(rot_u, len_w, byt_y, &ref); } - ur_hcon_free(rot_u); + ur_root_free(rot_u); } gettimeofday(&f2, 0); @@ -327,9 +327,9 @@ _cue_bench(void) : (c3_y*)((u3a_atom*)u3a_to_ptr(vat))->buf_w; for ( i_w = 0; i_w < max_w; i_w++ ) { - rot_u = ur_hcon_init(); + rot_u = ur_root_init(); ur_cue(rot_u, len_w, byt_y, &ref); - ur_hcon_free(rot_u); + ur_root_free(rot_u); } } diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 797ab74cfc..3851d47ac9 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -8,23 +8,37 @@ #include "ur/defs.h" -#define ur_nref_tag(ref) ( ref >> 62 ) -#define ur_nref_idx(ref) ur_mask_62(ref) - -#define ur_pail_max 10 - -typedef uint32_t ur_mug; +/* +** noun references are representated by 64-bits, with the top 2 bits +** reserved for discriminable tags. +*/ typedef uint64_t ur_nref; + typedef enum { ur_direct = 0, ur_iatom = 1, ur_icell = 2, } ur_tag; -typedef struct ur_nvec_s { - uint64_t fill; - ur_nref* refs; -} ur_nvec_t; +#define ur_nref_tag(ref) ( ref >> 62 ) +#define ur_nref_idx(ref) ur_mask_62(ref) + +/* +** 31-bit, non-zero, murmur3-based noun hash. +*/ +typedef uint32_t ur_mug; + +/* +** associative structures (dictionaries) of noun references, +** distributed by mug across fixed-size buckets (pails), +** reallocated with fibonacci growth once a bucket is full. +** +** - ur_dict_t: set of noun references +** - ur_dict32_t: map from noun reference to uint32 +** - ur_dict32_t: map from noun reference to uint64 +*/ + +#define ur_pail_max 10 typedef struct ur_pail32_s { uint8_t fill; @@ -61,6 +75,10 @@ typedef struct ur_dict_s { ur_pail_t *buckets; } ur_dict_t; +/* +** cells are hash-consed, atoms are deduplicated (byte-array comparison), +** mug hashes are stored, and noun references are unique within a root. +*/ typedef struct ur_cells_s { ur_dict_t dict; uint64_t prev; @@ -86,9 +104,22 @@ typedef struct ur_root_s { ur_atoms_t atoms; } ur_root_t; -uint64_t -ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref); +/* +** a vector of noun references. +*/ +typedef struct ur_nvec_s { + uint64_t fill; + ur_nref* refs; +} ur_nvec_t; +/* +** type-specific dictionary operations. +** +** NB: [r] is only used to retrieve the stored mug of cells and +** indirect atoms. If all references are direct atoms (62-bits or less), +** [r] can be null. This option is used extensively in cue (de-serialization) +** implementations, where the dictionary keys are bit-cursors. +*/ void ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size); @@ -122,12 +153,27 @@ ur_dict_get(ur_root_t *r, ur_dict_t *dict, ur_nref ref); void ur_dict_put(ur_root_t *r, ur_dict_t *dict, ur_nref ref); -void -ur_dict_free(ur_dict_t *dict); - void ur_dict_wipe(ur_dict_t *dict); +/* +** free the buckets of any dictionary (cast to ur_dict_t*). +*/ +void +ur_dict_free(ur_dict_t *dict); + +/* +** measure the bloq (binary-exponent) length of an atom in [r] +*/ +uint64_t +ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref); + +/* +** find or allocate an atom in [r] +** +** unsafe variant is unsafe wrt allocation (byte arrays must be +** allocated with system malloc) and trailing null bytes (not allowed). +*/ ur_nref ur_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len); @@ -137,24 +183,49 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len); ur_nref ur_coin64(ur_root_t *r, uint64_t n); +/* +** find or construct a cell in [r] +*/ ur_nref ur_cons(ur_root_t *r, ur_nref hed, ur_nref tal); -void -ur_hcon_info(FILE *f, ur_root_t *r); - -void -ur_hcon_free(ur_root_t *r); +/* +** calculate the mug of [ref], or produce the stored value in [r]. +*/ +ur_mug +ur_nref_mug(ur_root_t *r, ur_nref ref); +/* +** initialize a noun arena (root). +*/ ur_root_t* -ur_hcon_init(void); +ur_root_init(void); + +/* +** print root details to [f] +*/ +void +ur_root_info(FILE *f, ur_root_t *r); + +/* +** dispose all allocations in [r] +*/ +void +ur_root_free(ur_root_t *r); + +/* +** initialize or dispose a vector of noun references +*/ +void +ur_nvec_init(ur_nvec_t *v, uint64_t size); void ur_nvec_free(ur_nvec_t *v); -void -ur_nvec_init(ur_nvec_t *v, uint64_t size); - +/* +** abitrary depth-first, pre-order noun traversal, where +** cells can optionally short-circuit. +*/ void ur_walk_fore(ur_root_t *r, ur_nref ref, diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index a791a3f65c..bf42d75823 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -385,14 +385,14 @@ _cu_realloc(FILE* fil_u, ur_root_t** tor_u, ur_nvec_t* doc_u) // reallocate kernel and cold jet state // - ur_root_t* rot_u = ur_hcon_init(); + ur_root_t* rot_u = ur_root_init(); ur_nvec_t cod_u; ur_nref ken = _cu_all_from_loom(rot_u, &cod_u); // print [rot_u] measurements // if ( fil_u ) { - ur_hcon_info(fil_u, rot_u); + ur_root_info(fil_u, rot_u); fprintf(stderr, "\r\n"); } @@ -441,7 +441,7 @@ u3u_uniq(void) // dispose off-loom structures // ur_nvec_free(&cod_u); - ur_hcon_free(rot_u); + ur_root_free(rot_u); } /* _cu_rock_path(): format rock path. @@ -643,7 +643,7 @@ u3u_cram(c3_c* dir_c, c3_d eve_d) // dispose off-loom structures // ur_nvec_free(&cod_u); - ur_hcon_free(rot_u); + ur_root_free(rot_u); } // write jam-buffer into pier diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index f65a1fd4a2..aa4a83c69d 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1552,7 +1552,7 @@ _test_cue_spec(const char *cap, static int _test_jam_cue(void) { - ur_root_t *r = ur_hcon_init(); + ur_root_t *r = ur_root_init(); int ret = 1; # define NC(a, b) ur_cons(r, a, b) @@ -1638,6 +1638,8 @@ _test_jam_cue(void) TEST_CASE("wide", ur_coin_bytes(r, inp, sizeof(inp))); } + ur_root_free(r); + return ret; } diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 514fed6675..b4b4c65209 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -17,95 +17,6 @@ uint64_t ur_met0_bytes_unsafe(uint8_t *byt, uint64_t len); -ur_mug -ur_mug_bytes(const uint8_t *byt, uint64_t len) -{ - uint32_t seed = 0xcafebabe; - ur_mug mug; - - while ( 1 ) { - uint32_t raw; - MurmurHash3_x86_32(byt, len, seed, &raw); - mug = (raw >> 31) ^ ( ur_mask_31(raw) ); - - if ( 0 == mug ) { - seed++; - } - else { - return mug; - } - } -} - -ur_mug -ur_mug32(uint32_t x) -{ - uint8_t byt[4] = { - ur_mask_8(x >> 0), - ur_mask_8(x >> 8), - ur_mask_8(x >> 16), - ur_mask_8(x >> 24) - }; - - return ur_mug_bytes(byt, ur_met3_32(x)); -} - -ur_mug -ur_mug64(uint64_t x) -{ - uint8_t byt[8] = { - ur_mask_8(x >> 0), - ur_mask_8(x >> 8), - ur_mask_8(x >> 16), - ur_mask_8(x >> 24), - ur_mask_8(x >> 32), - ur_mask_8(x >> 40), - ur_mask_8(x >> 48), - ur_mask_8(x >> 56) - }; - - return ur_mug_bytes(byt, ur_met3_64(x)); -} - -ur_mug -ur_mug_both(ur_mug hed, ur_mug tal) -{ - // XX not correct per u3r_mug, but necessary to avoid collisions - // - return ur_mug32(hed ^ (0x7fffffff ^ ur_mug32(tal))); -} - -ur_mug -ur_nref_mug(ur_root_t *r, ur_nref ref) -{ - switch ( ur_nref_tag(ref) ) { - default: assert(0); - - case ur_direct: return ur_mug64(ref); - case ur_iatom: return r->atoms.mugs[ur_nref_idx(ref)]; - case ur_icell: return r->cells.mugs[ur_nref_idx(ref)]; - } -} - -ur_bool_t -ur_deep(ur_nref ref) -{ - return ur_icell == ur_nref_tag(ref); -} - -ur_nref -ur_head(ur_root_t *r, ur_nref ref) -{ - assert( ur_deep(ref) ); - return r->cells.heads[ur_nref_idx(ref)]; -} - -ur_nref -ur_tail(ur_root_t *r, ur_nref ref) -{ - assert( ur_deep(ref) ); - return r->cells.tails[ur_nref_idx(ref)]; -} void ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size) { @@ -393,13 +304,6 @@ ur_dict_put(ur_root_t *r, ur_dict_t *dict, ur_nref ref) } } -void -ur_dict_free(ur_dict_t *dict) -{ - free(dict->buckets); - dict->buckets = 0; -} - void ur_dict_wipe(ur_dict_t *dict) { @@ -411,6 +315,103 @@ ur_dict_wipe(ur_dict_t *dict) } } +void +ur_dict_free(ur_dict_t *dict) +{ + free(dict->buckets); + dict->buckets = 0; +} + +ur_mug +ur_mug_bytes(const uint8_t *byt, uint64_t len) +{ + uint32_t seed = 0xcafebabe; + ur_mug mug; + + while ( 1 ) { + uint32_t raw; + MurmurHash3_x86_32(byt, len, seed, &raw); + mug = (raw >> 31) ^ ( ur_mask_31(raw) ); + + if ( 0 == mug ) { + seed++; + } + else { + return mug; + } + } +} + +ur_mug +ur_mug32(uint32_t x) +{ + uint8_t byt[4] = { + ur_mask_8(x >> 0), + ur_mask_8(x >> 8), + ur_mask_8(x >> 16), + ur_mask_8(x >> 24) + }; + + return ur_mug_bytes(byt, ur_met3_32(x)); +} + +ur_mug +ur_mug64(uint64_t x) +{ + uint8_t byt[8] = { + ur_mask_8(x >> 0), + ur_mask_8(x >> 8), + ur_mask_8(x >> 16), + ur_mask_8(x >> 24), + ur_mask_8(x >> 32), + ur_mask_8(x >> 40), + ur_mask_8(x >> 48), + ur_mask_8(x >> 56) + }; + + return ur_mug_bytes(byt, ur_met3_64(x)); +} + +ur_mug +ur_mug_both(ur_mug hed, ur_mug tal) +{ + // XX not correct per u3r_mug, but necessary to avoid collisions + // + return ur_mug32(hed ^ (0x7fffffff ^ ur_mug32(tal))); +} + +ur_mug +ur_nref_mug(ur_root_t *r, ur_nref ref) +{ + switch ( ur_nref_tag(ref) ) { + default: assert(0); + + case ur_direct: return ur_mug64(ref); + case ur_iatom: return r->atoms.mugs[ur_nref_idx(ref)]; + case ur_icell: return r->cells.mugs[ur_nref_idx(ref)]; + } +} + +ur_bool_t +ur_deep(ur_nref ref) +{ + return ur_icell == ur_nref_tag(ref); +} + +ur_nref +ur_head(ur_root_t *r, ur_nref ref) +{ + assert( ur_deep(ref) ); + return r->cells.heads[ur_nref_idx(ref)]; +} + +ur_nref +ur_tail(ur_root_t *r, ur_nref ref) +{ + assert( ur_deep(ref) ); + return r->cells.tails[ur_nref_idx(ref)]; +} + void ur_atoms_grow(ur_atoms_t *atoms) { @@ -819,7 +820,7 @@ _cells_info(FILE *f, ur_cells_t *cells) } void -ur_hcon_info(FILE *f, ur_root_t *r) +ur_root_info(FILE *f, ur_root_t *r) { uint64_t total = 0; @@ -859,7 +860,7 @@ _cells_free(ur_cells_t *cells) } void -ur_hcon_free(ur_root_t *r) +ur_root_free(ur_root_t *r) { _atoms_free(&(r->atoms)); _cells_free(&(r->cells)); @@ -867,7 +868,7 @@ ur_hcon_free(ur_root_t *r) } ur_root_t* -ur_hcon_init(void) +ur_root_init(void) { ur_root_t *r = calloc(1, sizeof(*r)); assert( r ); From 8eb8a142a6a3e689f7d0051a5f283dc36ed366b9 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 2 Sep 2020 18:45:34 -0700 Subject: [PATCH 092/123] ur: adds explanatory comments to ur_tests.c --- pkg/urbit/tests/ur_tests.c | 161 +++++++++++++++++++++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index aa4a83c69d..1aeece955b 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -7,6 +7,9 @@ #include "ur/ur.h" +/* +** initialize helper for bitstream-writer tests. +*/ static void _bsw_init(ur_bsw_t *bsw, uint64_t prev, uint64_t size) { @@ -20,6 +23,9 @@ _bsw_init(ur_bsw_t *bsw, uint64_t prev, uint64_t size) bsw->bytes = calloc(size, 1); } +/* +** check bitstream-writer test invariants. +*/ static int _bsw_bit_check(const char* cap, ur_bsw_t *bsw, uint8_t byt, uint8_t off) { @@ -44,6 +50,9 @@ _bsw_bit_check(const char* cap, ur_bsw_t *bsw, uint8_t byt, uint8_t off) return ret; } +/* +** test 8 sequential writes of a set bit. +*/ static int _test_bsw_bit_ones(void) { @@ -87,6 +96,9 @@ _test_bsw_bit_ones(void) return ret; } +/* +** test 8 sequential writes of 1 null bit. +*/ static int _test_bsw_bit_zeros(void) { @@ -130,6 +142,9 @@ _test_bsw_bit_zeros(void) return ret; } +/* +** test 8 sequential writes of alternating bits. +*/ static int _test_bsw_bit_alt(void) { @@ -181,6 +196,16 @@ _test_bsw_bit(void) & _test_bsw_bit_alt(); } +/* +** subsequents bitstream-writer tests assume the correctnesss of +** ur_bsw_bit(), and compare the output of a bit-at-a-time +** "golden master" with that of the relevant, higher-level operation. +** +** XX the "golden" master implementations shouldn't be in bitstream module, +** as we don't intend to run them, but it's kind of weird implement them +** in the test itself. +** +*/ static int _bsw_cmp_check(const char* cap, uint8_t val, uint8_t off, uint8_t len, ur_bsw_t *a, ur_bsw_t *b) { @@ -218,6 +243,9 @@ _bsw_cmp_check(const char* cap, uint8_t val, uint8_t off, uint8_t len, ur_bsw_t return ret; } +/* +** ur_bsw8 golden master +*/ static void _bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt) { @@ -230,6 +258,10 @@ _bsw8_slow(ur_bsw_t *bsw, uint8_t len, uint8_t byt) } } +/* +** at varying offsets, write varying numbers of bits via +** ur_bsw8 and master, comparing the result each time. +*/ static int _test_bsw8_loop(const char* cap, uint8_t val) { @@ -263,6 +295,9 @@ _test_bsw8(void) & _test_bsw8_loop("bsw bits alt 2", 0x55); } +/* +** ur_bsw32 golden master +*/ static void _bsw32_slow(ur_bsw_t *bsw, uint8_t len, uint32_t val) { @@ -275,6 +310,10 @@ _bsw32_slow(ur_bsw_t *bsw, uint8_t len, uint32_t val) } } +/* +** at varying offsets, write varying numbers of bits via +** ur_bsw32 and master, comparing the result each time. +*/ static int _test_bsw32_loop(const char* cap, uint32_t val) { @@ -308,6 +347,9 @@ _test_bsw32(void) & _test_bsw32_loop("bsw 32 alt 2", 0x55555555); } +/* +** ur_bsw64 golden master +*/ static void _bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val) { @@ -320,6 +362,10 @@ _bsw64_slow(ur_bsw_t *bsw, uint8_t len, uint64_t val) } } +/* +** at varying offsets, write varying numbers of bits via +** ur_bsw64 and master, comparing the result each time. +*/ static int _test_bsw64_loop(const char* cap, uint64_t val) { @@ -353,6 +399,9 @@ _test_bsw64(void) & _test_bsw64_loop("bsw 64 alt 2", 0x5555555555555555ULL); } +/* +** ur_bsw_bytes() golden master +*/ static void _bsw_bytes_slow(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { @@ -366,6 +415,10 @@ _bsw_bytes_slow(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) _bsw8_slow(bsw, len, byt[len_byt]); } +/* +** at varying offsets, write varying numbers of bits via +** ur_bsw_bytes and master, comparing the result each time. +*/ static int _test_bsw_bytes_loop(const char* cap, uint64_t len, uint8_t val) { @@ -410,6 +463,9 @@ _test_bsw_bytes(void) & _test_bsw_bytes_loop("bsw bytes alt 2 even", 10, 0x55); } +/* +** ur_bsw_bex golden master +*/ static void _bsw_bex_slow(ur_bsw_t *bsw, uint8_t n) { @@ -421,6 +477,10 @@ _bsw_bex_slow(ur_bsw_t *bsw, uint8_t n) _bsw64_slow(bsw, n + 1, 1ULL << n); } +/* +** at varying offsets, write varying numbers of bits via +** ur_bsw_bex and master, comparing the result each time. +*/ static int _test_bsw_bex() { @@ -457,6 +517,9 @@ _test_bsw(void) & _test_bsw_bex(); } +/* +** check bitstream-reader test invariants. +*/ static int _bsr_bit_check(const char *cap, ur_bsr_t *bsr, @@ -502,6 +565,10 @@ _bsr_bit_check(const char *cap, return ret; } +/* +** read a bit 8 times from a bitstream initialized to all ones, +** checking invariants and result after each read. +*/ static int _test_bsr_bit_ones(void) { @@ -541,6 +608,10 @@ _test_bsr_bit_ones(void) return ret; } +/* +** read a bit 8 times from a bitstream initialized to all zeros, +** checking invariants and result after each read. +*/ static int _test_bsr_bit_zeros(void) { @@ -580,6 +651,10 @@ _test_bsr_bit_zeros(void) return ret; } +/* +** read a bit 8 times from a bitstream initialized to alternating zeros and ones, +** checking invariants and result after each read. +*/ static int _test_bsr_bit_alt(void) { @@ -627,6 +702,10 @@ _test_bsr_bit(void) & _test_bsr_bit_alt(); } +/* +** check bitstream-reader test invariants, after (maybe) reading +** of the end of the stream. +*/ static int _bsr_bit_any_check(const char* cap, ur_bsr_t *bsr, uint8_t off, uint64_t bits, uint8_t exp, uint8_t val) { @@ -657,6 +736,10 @@ _bsr_bit_any_check(const char* cap, ur_bsr_t *bsr, uint8_t off, uint64_t bits, u return ret; } +/* +** read a bit 17 times from a bitstream initialized to 8 ones, +** checking invariants and result after each read. +*/ static int _test_bsr_bit_any_ones(void) { @@ -721,6 +804,10 @@ _test_bsr_bit_any_ones(void) return ret; } +/* +** read a bit 17 times from a bitstream initialized to 8 zeros, +** checking invariants and result after each read. +*/ static int _test_bsr_bit_any_zeros(void) { @@ -785,6 +872,10 @@ _test_bsr_bit_any_zeros(void) return ret; } +/* +** read a bit 17 times from a bitstream initialized to 8 bits of alternating, +** ones and zeros, checking invariants and result after each read. +*/ static int _test_bsr_bit_any_alt(void) { @@ -857,6 +948,16 @@ _test_bsr_bit_any(void) & _test_bsr_bit_any_alt(); } +/* +** subsequents bitstream-reader tests assume the correctnesss of +** ur_bsr_bit_any(), and compare the output of a bit-at-a-time +** "golden master" with that of the relevant, higher-level operation. +** +** XX the "golden" master implementations shouldn't be in bitstream module, +** as we don't intend to run them, but it's kind of weird implement them +** in the test itself. +** +*/ static int _bsr_cmp_any_check(const char* cap, uint8_t off, uint8_t len, ur_bsr_t *a, ur_bsr_t *b) { @@ -901,6 +1002,9 @@ _bsr_cmp_any_check(const char* cap, uint8_t off, uint8_t len, ur_bsr_t *a, ur_bs return ret; } +/* +** ur_bsr8_any golden master +*/ static uint8_t _bsr8_any_slow(ur_bsr_t *bsr, uint8_t len) { @@ -915,6 +1019,11 @@ _bsr8_any_slow(ur_bsr_t *bsr, uint8_t len) return out; } +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a varying numbers of bits via ur_bsr8_any and master, comparing +** the results and respective states each time. +*/ static int _test_bsr8_loop(const char *cap, uint8_t len, uint8_t val) { @@ -963,6 +1072,9 @@ _test_bsr8(void) & _test_bsr8_loop("bsr8 alt-2 2", 2, 0x55); } +/* +** ur_bsr32_any golden master +*/ static uint32_t _bsr32_any_slow(ur_bsr_t *bsr, uint8_t len) { @@ -978,6 +1090,11 @@ _bsr32_any_slow(ur_bsr_t *bsr, uint8_t len) return out; } +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a varying numbers of bits via ur_bsr32_any and master, comparing +** the results and respective states each time. +*/ static int _test_bsr32_loop(const char *cap, uint8_t len, uint8_t val) { @@ -1035,6 +1152,9 @@ _test_bsr32(void) & _test_bsr32_loop("bsr32 alt-2 4", 4, 0x55); } +/* +** ur_bsr64_any golden master +*/ static uint64_t _bsr64_any_slow(ur_bsr_t *bsr, uint8_t len) { @@ -1050,6 +1170,11 @@ _bsr64_any_slow(ur_bsr_t *bsr, uint8_t len) return out; } +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a varying numbers of bits via ur_bsr64_any and master, comparing +** the results and respective states each time. +*/ static int _test_bsr64_loop(const char *cap, uint8_t len, uint8_t val) { @@ -1123,6 +1248,9 @@ _test_bsr64(void) & _test_bsr64_loop("bsr64 alt-2 8", 8, 0x55); } +/* +** ur_bsr_bytes_any golden master +*/ static void _bsr_bytes_any_slow(ur_bsr_t *bsr, uint64_t len, uint8_t *out) { @@ -1137,6 +1265,11 @@ _bsr_bytes_any_slow(ur_bsr_t *bsr, uint64_t len, uint8_t *out) } } +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a varying numbers of bits via ur_bsr_bytes_any and master, comparing +** the results and respective states each time. +*/ static int _test_bsr_bytes_any_loop(const char *cap, uint8_t len, uint8_t val) { @@ -1204,6 +1337,11 @@ _test_bsr_bytes_any(void) & _test_bsr_bytes_any_loop("bsr bytes alt 2 even", 10, 0x55); } +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** skip a varying numbers of bits via ur_bsr_skip_any and read the same via +** ur_bsr_bytes_any master, comparing the respective states each time. +*/ static int _test_bsr_skip_any_loop(const char *cap, uint8_t len, uint8_t val) { @@ -1252,6 +1390,10 @@ _test_bsr_skip_any(void) & _test_bsr_skip_any_loop("bsr skip alt 2 even", 10, 0x55); } +/* +** compare the result and state of two reads (that were not permitted +** to read past the end of the stream). +*/ static int _bsr_cmp_check(const char* cap, uint8_t off, @@ -1320,6 +1462,9 @@ _bsr_cmp_check(const char* cap, return ret; } +/* +** ur_bsr_log golden master +*/ static ur_cue_res_e _bsr_log_slow(ur_bsr_t *bsr, uint8_t *out) { @@ -1340,6 +1485,11 @@ _bsr_log_slow(ur_bsr_t *bsr, uint8_t *out) return ur_cue_meme; } +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a varying numbers of bits via ur_bsr_log and master, comparing +** the results and respective states each time. +*/ static int _test_bsr_log_loop(const char *cap, uint8_t len, uint8_t val) { @@ -1402,6 +1552,9 @@ _test_bsr_log(void) return ret; } +/* +** ur_bsr_tag golden master +*/ static ur_cue_res_e _bsr_tag_slow(ur_bsr_t *bsr, ur_cue_tag_e *out) { @@ -1423,6 +1576,11 @@ _bsr_tag_slow(ur_bsr_t *bsr, ur_cue_tag_e *out) return ur_cue_good; } +/* +** from a bitstream-reader initialized with varying values/lengths/offsets, +** read a jam type tag via ur_bsr_tag and master, comparing the results and +** respective states each time. +*/ static int _test_bsr_tag_loop(const char *cap, uint8_t len, uint8_t val) { @@ -1549,6 +1707,9 @@ _test_cue_spec(const char *cap, return ret; } +/* +** test jam/cue correctness and roundtrips across a variety of inputs +*/ static int _test_jam_cue(void) { From 9ef5e096a767cd70dc964f4dcd41b0a1e056e70d Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 1 Sep 2020 14:45:21 -0700 Subject: [PATCH 093/123] TMP: adds ivory-pill boot-timing printfs --- pkg/urbit/vere/king.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pkg/urbit/vere/king.c b/pkg/urbit/vere/king.c index 60f8a2c76f..aa2ae96016 100644 --- a/pkg/urbit/vere/king.c +++ b/pkg/urbit/vere/king.c @@ -762,7 +762,16 @@ u3_king_commence() // boot the ivory pill // - _king_boot_ivory(); + { + struct timeval b4, f2, d0; + gettimeofday(&b4, 0); + + _king_boot_ivory(); + + gettimeofday(&f2, 0); + timersub(&f2, &b4, &d0); + fprintf(stderr, "lite: boot %lu ms\r\n", (d0.tv_sec * 1000) + (d0.tv_usec / 1000)); + } // disable core dumps (due to lmdb size) // From 0709edb84a400e3ece9298b2e3ff547c50a8d147 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 1 Sep 2020 14:46:54 -0700 Subject: [PATCH 094/123] vere: swaps out cue implementation in ivory pill boot --- pkg/urbit/vere/king.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pkg/urbit/vere/king.c b/pkg/urbit/vere/king.c index aa2ae96016..dd0d39b9a1 100644 --- a/pkg/urbit/vere/king.c +++ b/pkg/urbit/vere/king.c @@ -5,8 +5,10 @@ #include #include #include + #include "all.h" #include "vere/vere.h" +#include "ur/ur.h" #include "ivory.h" @@ -711,7 +713,17 @@ _king_boot_ivory(void) } { - u3_noun pil = u3ke_cue(u3i_bytes(len_d, byt_y)); + ur_dict32_t dic_u = {0}; + u3_noun pil; + + ur_dict32_grow((ur_root_t*)0, &dic_u, ur_fib27, ur_fib28); + + if ( c3n == u3s_cue_xeno_unsafe(&dic_u, len_d, byt_y, &pil) ) { + u3l_log("lite: unable to cue ivory pill\r\n"); + exit(1); + } + + ur_dict_free((ur_dict_t*)&dic_u); if ( c3n == u3v_boot_lite(pil)) { u3l_log("lite: boot failed\r\n"); From 2bd460214357f7138a0a2883a8435d8c57655a6e Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 1 Sep 2020 14:47:41 -0700 Subject: [PATCH 095/123] Revert "TMP: adds ivory-pill boot-timing printfs" This reverts commit e7df5cedc1ff24b8cf9f07484278a9fc9d647ff4. --- pkg/urbit/vere/king.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/pkg/urbit/vere/king.c b/pkg/urbit/vere/king.c index dd0d39b9a1..5528041360 100644 --- a/pkg/urbit/vere/king.c +++ b/pkg/urbit/vere/king.c @@ -774,16 +774,7 @@ u3_king_commence() // boot the ivory pill // - { - struct timeval b4, f2, d0; - gettimeofday(&b4, 0); - - _king_boot_ivory(); - - gettimeofday(&f2, 0); - timersub(&f2, &b4, &d0); - fprintf(stderr, "lite: boot %lu ms\r\n", (d0.tv_sec * 1000) + (d0.tv_usec / 1000)); - } + _king_boot_ivory(); // disable core dumps (due to lmdb size) // From b9409693dcf4aae4a5e3c001908991a85663dc9a Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 2 Sep 2020 16:07:07 -0700 Subject: [PATCH 096/123] serf: refactors event tracing, adds jam/cue tracing --- pkg/urbit/include/vere/serf.h | 19 +++++----- pkg/urbit/worker/main.c | 69 ++++++++++++++++++++++++++++++++--- pkg/urbit/worker/serf.c | 26 +++---------- 3 files changed, 78 insertions(+), 36 deletions(-) diff --git a/pkg/urbit/include/vere/serf.h b/pkg/urbit/include/vere/serf.h index 004e297511..4ed63f5ea0 100644 --- a/pkg/urbit/include/vere/serf.h +++ b/pkg/urbit/include/vere/serf.h @@ -6,15 +6,16 @@ /* u3_serf: worker-process state */ typedef struct _u3_serf { - c3_d key_d[4]; // disk key - c3_c* dir_c; // execution directory (pier) - c3_d sen_d; // last event requested - c3_d dun_d; // last event processed - c3_l mug_l; // hash of state - c3_o pac_o; // pack kernel - c3_o rec_o; // reclaim cache - c3_o mut_o; // mutated kerne - u3_noun sac; // space measurementl + c3_d key_d[4]; // disk key + c3_c* dir_c; // execution directory (pier) + c3_d sen_d; // last event requested + c3_d dun_d; // last event processed + c3_l mug_l; // hash of state + c3_o pac_o; // pack kernel + c3_o rec_o; // reclaim cache + c3_o mut_o; // mutated kerne + u3_noun sac; // space measurementl + void (*xit_f)(void); // exit callback } u3_serf; /** Functions. diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index 84baa65e47..e429636d7f 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -28,6 +28,9 @@ static u3_serf u3V; // one serf per process static u3_moat inn_u; // input stream static u3_mojo out_u; // output stream +#undef SERF_TRACE_JAM +#undef SERF_TRACE_CUE + /* _cw_serf_fail(): failure stub. */ static void @@ -42,13 +45,24 @@ _cw_serf_fail(void* vod_p, const c3_c* wut_c) static void _cw_serf_send(u3_noun pel) { - u3_noun mat = u3ke_jam(pel); - c3_d len_w = u3r_met(3, mat); - c3_y* byt_y = c3_malloc(len_w); + u3_noun mat; + c3_w len_w; + c3_y* byt_y; +#ifdef SERF_TRACE_JAM + u3t_event_trace("serf ipc jam", 'B'); +#endif + + mat = u3ke_jam(pel); + len_w = u3r_met(3, mat); + byt_y = c3_malloc(len_w); u3r_bytes(0, len_w, byt_y, mat); - u3_newt_send(&out_u, len_w, byt_y); +#ifdef SERF_TRACE_JAM + u3t_event_trace("serf ipc jam", 'E'); +#endif + + u3_newt_send(&out_u, len_w, byt_y); u3z(mat); } @@ -68,12 +82,41 @@ _cw_serf_send_stdr(c3_c* str_c) _cw_serf_send_slog(u3nc(0, u3i_string(str_c))); } -/* _cw_serf_writ(): + +/* _cw_serf_step_trace(): initialize or rotate trace file. +*/ +static void +_cw_serf_step_trace(void) +{ + if ( u3C.wag_w & u3o_trace ) { + if ( u3_Host.tra_u.con_w == 0 && u3_Host.tra_u.fun_w == 0 ) { + u3t_trace_open(u3V.dir_c); + } + else if ( u3_Host.tra_u.con_w >= 100000 ) { + u3t_trace_close(); + u3t_trace_open(u3V.dir_c); + } + } +} + +/* _cw_serf_writ(): process a command from the king. */ static void _cw_serf_writ(void* vod_p, c3_d len_d, c3_y* byt_y) { - u3_noun ret, jar = u3ke_cue(u3i_bytes(len_d, byt_y)); + u3_noun ret, jar; + + _cw_serf_step_trace(); + +#ifdef SERF_TRACE_CUE + u3t_event_trace("serf ipc cue", 'B'); +#endif + + jar = u3ke_cue(u3i_bytes(len_d, byt_y)); + +#ifdef SERF_TRACE_CUE + u3t_event_trace("serf ipc cue", 'E'); +#endif if ( c3n == u3_serf_writ(&u3V, jar, &ret) ) { _cw_serf_fail(0, "bad jar"); @@ -108,6 +151,14 @@ _cw_serf_stdio(c3_i* inn_i, c3_i* out_i) close(nul_i); } +/* _cw_serf_stdio(): cleanup on serf exit. +*/ +static void +_cw_serf_exit(void) +{ + u3t_trace_close(); +} + /* _cw_serf_commence(); initialize and run serf */ static void @@ -214,6 +265,12 @@ _cw_serf_commence(c3_i argc, c3_c* argv[]) u3C.slog_f = _cw_serf_send_slog; } + u3V.xit_f = _cw_serf_exit; + +#if defined(SERF_TRACE_JAM) || defined(SERF_TRACE_CUE) + u3t_trace_open(u3V.dir_c); +#endif + // start serf // { diff --git a/pkg/urbit/worker/serf.c b/pkg/urbit/worker/serf.c index 98215b979c..79023fe252 100644 --- a/pkg/urbit/worker/serf.c +++ b/pkg/urbit/worker/serf.c @@ -587,7 +587,7 @@ _serf_work(u3_serf* sef_u, c3_w mil_w, u3_noun job) u3_noun u3_serf_work(u3_serf* sef_u, c3_w mil_w, u3_noun job) { - c3_t tac_t = ( 0 != u3_Host.tra_u.fil_u ); + c3_t tac_t = ( u3C.wag_w & u3o_trace ); c3_c lab_c[2056]; u3_noun pro; @@ -838,7 +838,7 @@ u3_serf_peek(u3_serf* sef_u, c3_w mil_w, u3_noun sam) /* _serf_writ_live_exit(): exit on command. */ static void -_serf_writ_live_exit(c3_w cod_w) +_serf_writ_live_exit(u3_serf* sef_u, c3_w cod_w) { if ( u3C.wag_w & u3o_debug_cpu ) { FILE* fil_u; @@ -875,6 +875,8 @@ _serf_writ_live_exit(c3_w cod_w) // c3_free(u3D.ray_u); + sef_u->xit_f(); + exit(cod_w); } @@ -924,7 +926,7 @@ u3_serf_live(u3_serf* sef_u, u3_noun com, u3_noun* ret) u3z(com); // NB, doesn't return // - _serf_writ_live_exit(cod_y); + _serf_writ_live_exit(sef_u, cod_y); *ret = u3nc(c3__live, u3_nul); return c3y; } @@ -997,22 +999,6 @@ u3_serf_live(u3_serf* sef_u, u3_noun com, u3_noun* ret) } } -/* _serf_step_trace(): initialize or rotate trace file. -*/ -static void -_serf_step_trace(u3_serf* sef_u) -{ - if ( u3C.wag_w & u3o_trace ) { - if ( u3_Host.tra_u.con_w == 0 && u3_Host.tra_u.fun_w == 0 ) { - u3t_trace_open(sef_u->dir_c); - } - else if ( u3_Host.tra_u.con_w >= 100000 ) { - u3t_trace_close(); - u3t_trace_open(sef_u->dir_c); - } - } -} - /* u3_serf_writ(): apply writ [wit], producing plea [*pel] on c3y. */ c3_o @@ -1025,8 +1011,6 @@ u3_serf_writ(u3_serf* sef_u, u3_noun wit, u3_noun* pel) ret_o = c3n; } else { - _serf_step_trace(sef_u); - switch ( tag ) { default: { ret_o = c3n; From 313c5f6050ab3828af4de395659eee5bb895a010 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 2 Sep 2020 22:56:59 -0700 Subject: [PATCH 097/123] serf: use off-loom jam for ipc $plea --- pkg/urbit/noun/serial.c | 2 -- pkg/urbit/worker/main.c | 12 ++++-------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index 7e6ecb2cec..46ba7866bb 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -291,8 +291,6 @@ u3s_jam_xeno(u3_noun a, c3_d* len_d, c3_y** byt_y) { _jam_xeno_t jam_u = {0}; - c3_assert( &(u3H->rod_u) == u3R ); - jam_u.har_p = u3h_new(); jam_u.rit_u.prev = ur_fib11; diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index e429636d7f..8801a93e82 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -45,25 +45,21 @@ _cw_serf_fail(void* vod_p, const c3_c* wut_c) static void _cw_serf_send(u3_noun pel) { - u3_noun mat; - c3_w len_w; + c3_d len_d; c3_y* byt_y; #ifdef SERF_TRACE_JAM u3t_event_trace("serf ipc jam", 'B'); #endif - mat = u3ke_jam(pel); - len_w = u3r_met(3, mat); - byt_y = c3_malloc(len_w); - u3r_bytes(0, len_w, byt_y, mat); + u3s_jam_xeno(pel, &len_d, &byt_y); #ifdef SERF_TRACE_JAM u3t_event_trace("serf ipc jam", 'E'); #endif - u3_newt_send(&out_u, len_w, byt_y); - u3z(mat); + u3_newt_send(&out_u, len_d, byt_y); + u3z(pel); } /* _cw_serf_send_slog(): send hint output (hod is [priority tank]). From 9159a7a318afb405deb97580123bfa36c0542a89 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 2 Sep 2020 23:02:55 -0700 Subject: [PATCH 098/123] serf: use off-loom cue for ipc $writ --- pkg/urbit/worker/main.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index 8801a93e82..9e8bcadf41 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -24,9 +24,10 @@ #include "ur/hashcons.h" -static u3_serf u3V; // one serf per process -static u3_moat inn_u; // input stream -static u3_mojo out_u; // output stream +static u3_serf u3V; // one serf per process +static u3_moat inn_u; // input stream +static u3_mojo out_u; // output stream +static ur_dict32_t dic_u; // cue dictionary #undef SERF_TRACE_JAM #undef SERF_TRACE_CUE @@ -101,6 +102,7 @@ static void _cw_serf_writ(void* vod_p, c3_d len_d, c3_y* byt_y) { u3_noun ret, jar; + c3_o ret_o; _cw_serf_step_trace(); @@ -108,13 +110,18 @@ _cw_serf_writ(void* vod_p, c3_d len_d, c3_y* byt_y) u3t_event_trace("serf ipc cue", 'B'); #endif - jar = u3ke_cue(u3i_bytes(len_d, byt_y)); + ret_o = u3s_cue_xeno_unsafe(&dic_u, len_d, byt_y, &jar); + // XX check if the dictionary grew too much and shrink? + // + ur_dict32_wipe(&dic_u); #ifdef SERF_TRACE_CUE u3t_event_trace("serf ipc cue", 'E'); #endif - if ( c3n == u3_serf_writ(&u3V, jar, &ret) ) { + if ( (c3n == ret_o) + || (c3n == u3_serf_writ(&u3V, jar, &ret)) ) + { _cw_serf_fail(0, "bad jar"); } else { @@ -152,6 +159,7 @@ _cw_serf_stdio(c3_i* inn_i, c3_i* out_i) static void _cw_serf_exit(void) { + ur_dict_free((ur_dict_t*)&dic_u); u3t_trace_close(); } @@ -225,6 +233,8 @@ _cw_serf_commence(c3_i argc, c3_c* argv[]) uv_stream_set_blocking((uv_stream_t*)&out_u.pyp_u, 1); } + ur_dict32_grow((ur_root_t*)0, &dic_u, ur_fib10, ur_fib11); + // set up writing // out_u.ptr_v = &u3V; From 91f5c50befc19cd23a367f4d266a54dac94261a1 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 2 Sep 2020 15:43:16 -0700 Subject: [PATCH 099/123] vere: adds jam/cue tracing to ipc and disk layers --- pkg/urbit/vere/disk.c | 48 +++++++++++++++++++++++++++++++++++-------- pkg/urbit/vere/lord.c | 42 ++++++++++++++++++++++++++++++++----- 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/pkg/urbit/vere/disk.c b/pkg/urbit/vere/disk.c index feea9697f1..7bfdd338b9 100644 --- a/pkg/urbit/vere/disk.c +++ b/pkg/urbit/vere/disk.c @@ -39,6 +39,8 @@ struct _cd_save { }; #undef VERBOSE_DISK +#undef DISK_TRACE_JAM +#undef DISK_TRACE_CUE static void _disk_commit(u3_disk* log_u); @@ -168,11 +170,23 @@ _disk_commit_start(struct _cd_save* req_u) static c3_w _disk_serialize_v0(u3_fact* tac_u, c3_y** dat_y) { - u3_atom mat = u3ke_jam(u3nc(tac_u->bug_l, u3k(tac_u->job))); - c3_w len_w = u3r_met(3, mat); + u3_noun val = u3nc(tac_u->bug_l, u3k(tac_u->job)); + u3_atom mat; + c3_w len_w; + +#ifdef DISK_TRACE_JAM + u3t_event_trace("king disk jam", 'B'); +#endif + + mat = u3ke_jam(val); + len_w = u3r_met(3, mat); *dat_y = c3_malloc(len_w); u3r_bytes(0, len_w, *dat_y, mat); +#ifdef DISK_TRACE_JAM + u3t_event_trace("king disk jam", 'E'); +#endif + u3z(mat); return len_w; @@ -368,19 +382,27 @@ _disk_read_one_cb(void* ptr_v, c3_d eve_d, size_t val_i, void* val_p) u3_fact* tac_u; { - // XX u3m_soft? - // - u3_noun dat = u3ke_cue(u3i_bytes(val_i, val_p)); - u3_noun mug, job; + u3_noun val, mug, job; c3_l bug_l; +#ifdef DISK_TRACE_CUE + u3t_event_trace("king disk cue", 'B'); +#endif - if ( (c3n == u3r_cell(dat, &mug, &job)) + // XX u3m_soft? + // + val = u3ke_cue(u3i_bytes(val_i, val_p)); + +#ifdef DISK_TRACE_CUE + u3t_event_trace("king disk cue", 'E'); +#endif + + if ( (c3n == u3r_cell(val, &mug, &job)) || (c3n == u3r_safe_word(mug, &bug_l)) ) // XX { // failure here triggers cleanup in _disk_read_start_cb() // - u3z(dat); + u3z(val); return c3n; } @@ -389,7 +411,7 @@ _disk_read_one_cb(void* ptr_v, c3_d eve_d, size_t val_i, void* val_p) tac_u = u3_fact_init(eve_d, 0, u3k(job)); tac_u->bug_l = bug_l; - u3z(dat); + u3z(val); } if ( !red_u->ent_u ) { @@ -643,6 +665,10 @@ u3_disk_exit(u3_disk* log_u) u3_dire_free(log_u->com_u); c3_free(log_u); + +#if defined(DISK_TRACE_JAM) || defined(DISK_TRACE_CUE) + u3t_trace_close(); +#endif } /* u3_disk_info(): print status info. @@ -782,5 +808,9 @@ u3_disk_init(c3_c* pax_c, u3_disk_cb cb_u) log_u->liv_o = c3y; +#if defined(DISK_TRACE_JAM) || defined(DISK_TRACE_CUE) + u3t_trace_open(pax_c); +#endif + return log_u; } diff --git a/pkg/urbit/vere/lord.c b/pkg/urbit/vere/lord.c index 2fc2762ba1..e435261b48 100644 --- a/pkg/urbit/vere/lord.c +++ b/pkg/urbit/vere/lord.c @@ -19,6 +19,9 @@ #include "all.h" #include "vere/vere.h" +#undef LORD_TRACE_JAM +#undef LORD_TRACE_CUE + /* |% :: +writ: from king to serf @@ -150,6 +153,10 @@ _lord_stop(u3_lord* god_u) u3_newt_mojo_stop(&god_u->inn_u, _lord_bail_noop); uv_close((uv_handle_t*)&god_u->cub_u, 0); + +#if defined(LORD_TRACE_JAM) || defined(LORD_TRACE_CUE) + u3t_trace_close(); +#endif } /* _lord_bail(): serf/lord error. @@ -655,7 +662,17 @@ static void _lord_on_plea(void* ptr_v, c3_d len_d, c3_y* byt_y) { u3_lord* god_u = ptr_v; - u3_noun tag, dat, jar = u3ke_cue(u3i_bytes(len_d, byt_y)); + u3_noun tag, dat, jar; + +#ifdef LORD_TRACE_CUE + u3t_event_trace("king ipc cue", 'B'); +#endif + + jar = u3ke_cue(u3i_bytes(len_d, byt_y)); + +#ifdef LORD_TRACE_CUE + u3t_event_trace("king ipc cue", 'E'); +#endif if ( c3n == u3r_cell(jar, &tag, &dat) ) { return _lord_plea_foul(god_u, 0, jar); @@ -776,13 +793,24 @@ _lord_writ_send(u3_lord* god_u, u3_writ* wit_u) { u3_noun jar = _lord_writ_make(god_u, wit_u); - u3_noun mat = u3ke_jam(jar); - c3_w len_w = u3r_met(3, mat); - c3_y* byt_y = c3_malloc(len_w); + u3_noun mat; + c3_w len_w; + c3_y* byt_y; +#ifdef LORD_TRACE_JAM + u3t_event_trace("king ipc jam", 'B'); +#endif + + mat = u3ke_jam(jar); + len_w = u3r_met(3, mat); + byt_y = c3_malloc(len_w); u3r_bytes(0, len_w, byt_y, mat); - u3_newt_send(&god_u->inn_u, len_w, byt_y); +#ifdef LORD_TRACE_JAM + u3t_event_trace("king ipc jam", 'E'); +#endif + + u3_newt_send(&god_u->inn_u, len_w, byt_y); u3z(mat); } } @@ -1131,6 +1159,10 @@ u3_lord_init(c3_c* pax_c, c3_w wag_w, c3_d key_d[4], u3_lord_cb cb_u) } } +#if defined(LORD_TRACE_JAM) || defined(LORD_TRACE_CUE) + u3t_trace_open(god_u->pax_c); +#endif + // start reading from proc // { From 0569bd9e3d4db4910d55c845a1aece4f05d9943b Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 14:26:29 -0700 Subject: [PATCH 100/123] ur: adds failing jam test case --- pkg/urbit/tests/ur_tests.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 1aeece955b..dee9ca5e6c 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1799,6 +1799,12 @@ _test_jam_cue(void) TEST_CASE("wide", ur_coin_bytes(r, inp, sizeof(inp))); } + { + uint8_t inp[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc, 0xa8, 0xab, 0x60, 0xef, 0x2d, 0xd, 0x0, 0x0, 0x80 }; + uint8_t res[19] = { 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x18, 0x50, 0x57, 0xc1, 0xde, 0x5b, 0x1a, 0x0, 0x0, 0x0, 0x1 }; + TEST_CASE("date", ur_coin_bytes(r, inp, sizeof(inp))); + } + ur_root_free(r); return ret; From 9c593636800fbf38ceb9c16e7e89ffb80770cb8d Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 14:19:35 -0700 Subject: [PATCH 101/123] ur: updates ur_bsw_t comparison tests to memcmp() buffers (failing) --- pkg/urbit/tests/ur_tests.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index dee9ca5e6c..8859966ee2 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -222,12 +222,6 @@ _bsw_cmp_check(const char* cap, uint8_t val, uint8_t off, uint8_t len, ur_bsw_t ret = 0; } - if ( a->bytes[0] != b->bytes[0] ) { - fprintf(stderr, "%s: val 0x%02x off %u len %u: bytes fail (0x%02x, 0x%02x)\r\n", - cap, val, off, len, a->bytes[0], b->bytes[0]); - ret = 0; - } - if ( a->off != b->off ) { fprintf(stderr, "%s: val 0x%02x off %u len %u: offset fail (%u, %u)\r\n", cap, val, off, len, a->off, b->off); @@ -240,6 +234,26 @@ _bsw_cmp_check(const char* cap, uint8_t val, uint8_t off, uint8_t len, ur_bsw_t ret = 0; } + { + uint64_t k, len_byt = a->fill + !!a->off; + + if ( memcmp(a->bytes, b->bytes, len_byt) ) { + fprintf(stderr, "%s: val 0x%02x off %u, len %u not equal off=%u fill=%" PRIu64 "\r\n", + cap, val, off, len, b->off, b->fill); + fprintf(stderr, " a: { "); + for ( k = 0; k < len_byt; k++ ) { + fprintf(stderr, "%02x, ", a->bytes[k]); + } + fprintf(stderr, "}\r\n"); + fprintf(stderr, " b: { "); + for ( k = 0; k < len_byt; k++ ) { + fprintf(stderr, "%02x, ", b->bytes[k]); + } + fprintf(stderr, "}\r\n"); + ret = 0; + } + } + return ret; } From c469ccb04350bdc76f92d76a4fdd299ea35e764e Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 14:24:29 -0700 Subject: [PATCH 102/123] ur: fixes a bug in ur_bsw_bytes() which was introduced by an earlier fix for a buffer over-read --- pkg/urbit/ur/bitstream.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index d72548257d..14b841ba40 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -1037,21 +1037,31 @@ _bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) m = byt[i] >> rest; } - if ( len_bit ) { - if ( len_bit < rest ) { - l = byt[len_byt] & ((1 << len_bit) - 1); - bsw->bytes[fill] = m ^ (l << off); - off += len_bit; - } - else { - l = byt[len_byt] & mask; - bsw->bytes[fill++] = m ^ (l << off); + // no trailing bits; we need only write the rest of the last byte. + // + // NB: while semantically equivalent to the subsequent block, + // this case must be separate to avoid reading off the end of [byt] + // + if ( !len_bit ) { + bsw->bytes[fill] = m; + } + // trailing bits fit into the current output byte. + // + else if ( len_bit < rest ) { + l = byt[len_byt] & ((1 << len_bit) - 1); + bsw->bytes[fill] = m ^ (l << off); + off += len_bit; + } + // trailing bits extend into the next output byte. + // + else { + l = byt[len_byt] & mask; + bsw->bytes[fill++] = m ^ (l << off); - m = byt[len_byt] >> rest; + m = byt[len_byt] >> rest; - off = len_bit - rest; - bsw->bytes[fill] = m & ((1 << off) - 1); - } + off = len_bit - rest; + bsw->bytes[fill] = m & ((1 << off) - 1); } } From 33b6a8b924c8ce70f2dd934989a8ace20b8b7180 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 14:26:50 -0700 Subject: [PATCH 103/123] u3: ports (formerly) failing jam test case from ur_tests --- pkg/urbit/tests/jam_tests.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pkg/urbit/tests/jam_tests.c b/pkg/urbit/tests/jam_tests.c index da8fc914ae..41e5d767b2 100644 --- a/pkg/urbit/tests/jam_tests.c +++ b/pkg/urbit/tests/jam_tests.c @@ -493,6 +493,12 @@ _test_jam_roundtrip(void) TEST_CASE("wide", u3i_bytes(sizeof(inp_y), inp_y)); } + { + c3_y inp_y[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc, 0xa8, 0xab, 0x60, 0xef, 0x2d, 0xd, 0x0, 0x0, 0x80 }; + c3_y res_y[19] = { 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x18, 0x50, 0x57, 0xc1, 0xde, 0x5b, 0x1a, 0x0, 0x0, 0x0, 0x1 }; + TEST_CASE("date", u3i_bytes(sizeof(inp_y), inp_y)); + } + return ret_i; } From 466f342e7b95ddc72c0dae2f75c181d435965493 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 14:48:58 -0700 Subject: [PATCH 104/123] vere: use off-loom jam for ipc $writ --- pkg/urbit/vere/lord.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/pkg/urbit/vere/lord.c b/pkg/urbit/vere/lord.c index e435261b48..9d94f66e4c 100644 --- a/pkg/urbit/vere/lord.c +++ b/pkg/urbit/vere/lord.c @@ -793,25 +793,21 @@ _lord_writ_send(u3_lord* god_u, u3_writ* wit_u) { u3_noun jar = _lord_writ_make(god_u, wit_u); - u3_noun mat; - c3_w len_w; + c3_d len_d; c3_y* byt_y; #ifdef LORD_TRACE_JAM u3t_event_trace("king ipc jam", 'B'); #endif - mat = u3ke_jam(jar); - len_w = u3r_met(3, mat); - byt_y = c3_malloc(len_w); - u3r_bytes(0, len_w, byt_y, mat); + u3s_jam_xeno(jar, &len_d, &byt_y); #ifdef LORD_TRACE_JAM u3t_event_trace("king ipc jam", 'E'); #endif - u3_newt_send(&god_u->inn_u, len_w, byt_y); - u3z(mat); + u3_newt_send(&god_u->inn_u, len_d, byt_y); + u3z(jar); } } From f42e4360552c764c7ed2106952afeb25b990344d Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 15:00:17 -0700 Subject: [PATCH 105/123] vere: use off-loom cue for ipc $plea --- pkg/urbit/include/vere/vere.h | 1 + pkg/urbit/vere/lord.c | 23 +++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pkg/urbit/include/vere/vere.h b/pkg/urbit/include/vere/vere.h index b47dd50d09..cf84b868a2 100644 --- a/pkg/urbit/include/vere/vere.h +++ b/pkg/urbit/include/vere/vere.h @@ -444,6 +444,7 @@ uv_process_t cub_u; // process handle uv_process_options_t ops_u; // process configuration uv_stdio_container_t cod_u[3]; // process options + void* dic_u; // cue dictionary time_t wen_t; // process creation time u3_mojo inn_u; // client's stdin u3_moat out_u; // client's stdout diff --git a/pkg/urbit/vere/lord.c b/pkg/urbit/vere/lord.c index 9d94f66e4c..52fa4ee7bf 100644 --- a/pkg/urbit/vere/lord.c +++ b/pkg/urbit/vere/lord.c @@ -18,6 +18,7 @@ #include "all.h" #include "vere/vere.h" +#include "ur/hashcons.h" #undef LORD_TRACE_JAM #undef LORD_TRACE_CUE @@ -71,6 +72,7 @@ _lord_stop_cb(void* ptr_v, void (*exit_f)(void*) = god_u->cb_u.exit_f; void* exit_v = god_u->cb_u.ptr_v; + ur_dict_free((ur_dict_t*)god_u->dic_u); c3_free(god_u); if ( exit_f ) { @@ -661,20 +663,27 @@ _lord_plea_work(u3_lord* god_u, u3_noun dat) static void _lord_on_plea(void* ptr_v, c3_d len_d, c3_y* byt_y) { - u3_lord* god_u = ptr_v; - u3_noun tag, dat, jar; + u3_lord* god_u = ptr_v; + ur_dict32_t* dic_u = god_u->dic_u; + u3_noun tag, dat, jar = u3_blip; + c3_o ret_o; #ifdef LORD_TRACE_CUE u3t_event_trace("king ipc cue", 'B'); #endif - jar = u3ke_cue(u3i_bytes(len_d, byt_y)); + ret_o = u3s_cue_xeno_unsafe(dic_u, len_d, byt_y, &jar); + // XX check if the dictionary grew too much and shrink? + // + ur_dict32_wipe(dic_u); #ifdef LORD_TRACE_CUE u3t_event_trace("king ipc cue", 'E'); #endif - if ( c3n == u3r_cell(jar, &tag, &dat) ) { + if ( (c3n == ret_o) + || (c3n == u3r_cell(jar, &tag, &dat)) ) + { return _lord_plea_foul(god_u, 0, jar); } @@ -1159,6 +1168,12 @@ u3_lord_init(c3_c* pax_c, c3_w wag_w, c3_d key_d[4], u3_lord_cb cb_u) u3t_trace_open(god_u->pax_c); #endif + { + ur_dict32_t* dic_u = c3_calloc(sizeof(*dic_u)); + ur_dict32_grow((ur_root_t*)0, dic_u, ur_fib10, ur_fib11); + god_u->dic_u = dic_u; + } + // start reading from proc // { From 51b85dc5a8a5b04f5199be66ed87e4edc7ebc876 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 15:38:27 -0700 Subject: [PATCH 106/123] u3: removes redundant byte-array allocation in u3s_cue_xeno --- pkg/urbit/noun/serial.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/pkg/urbit/noun/serial.c b/pkg/urbit/noun/serial.c index 46ba7866bb..79208801a0 100644 --- a/pkg/urbit/noun/serial.c +++ b/pkg/urbit/noun/serial.c @@ -629,17 +629,24 @@ _cs_cue_xeno_next(_cue_stack_t* tac_u, // XX need a ur_bsr_words_any() // else { - c3_d byt_d = (len_d >> 3) + !!ur_mask_3(len_d); + c3_w* wor_w; c3_y* byt_y; - // XX check that byt_d fits in a c3_w; - // - byt_y = c3_calloc(byt_d); + { + c3_d byt_d = (len_d >> 3) + !!ur_mask_3(len_d); + + if ( 0xffffffffULL < byt_d) { + return u3m_bail(c3__meme); + } + + // XX assumes little-endian + // + wor_w = u3a_slaq(3, byt_d); + byt_y = (c3_y*)wor_w; + } + ur_bsr_bytes_any(red_u, len_d, byt_y); - - *out = u3i_bytes(byt_d, byt_y); - - c3_free(byt_y); + *out = u3a_malt(wor_w); } ur_dict32_put(rot_u, dic_u, bit_d, *out); From e9d4b7b7624dfe0c80911b6fc2a2d41008767a3d Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 15:50:58 -0700 Subject: [PATCH 107/123] ur: cleans up header comments --- pkg/urbit/include/ur/hashcons.h | 8 +++----- pkg/urbit/include/ur/serial.h | 10 +++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 3851d47ac9..045d5055af 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -9,8 +9,7 @@ #include "ur/defs.h" /* -** noun references are representated by 64-bits, with the top 2 bits -** reserved for discriminable tags. +** 64-bit noun references, with the top 2 bits reserved for type tags. */ typedef uint64_t ur_nref; @@ -223,8 +222,7 @@ void ur_nvec_free(ur_nvec_t *v); /* -** abitrary depth-first, pre-order noun traversal, where -** cells can optionally short-circuit. +** depth-first, pre-order noun traversal, cells can short-circuit. */ void ur_walk_fore(ur_root_t *r, @@ -233,4 +231,4 @@ ur_walk_fore(ur_root_t *r, void (*atom)(ur_root_t*, ur_nref, void*), ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)); -#endif \ No newline at end of file +#endif diff --git a/pkg/urbit/include/ur/serial.h b/pkg/urbit/include/ur/serial.h index 9a05115a04..dde11ee67b 100644 --- a/pkg/urbit/include/ur/serial.h +++ b/pkg/urbit/include/ur/serial.h @@ -6,9 +6,9 @@ #include /* -** bit-wise serialize a noun of arbitrary into a byte-buffer. +** bit-wise serialization of a noun into a byte-buffer. ** supports up to 64-bits of bit-addressed output (nearly 2 EiB). -** as this is an impractical volume data, cursor overflow is not checked. +** (as this is an impractical volume data, cursor overflow is not checked.) ** ** unsafe variant is unsafe wrt its [dict] parameter, which must be empty, ** but can be passed in order to skip reallocation inside hot loops. @@ -25,12 +25,12 @@ uint64_t ur_jam(ur_root_t *r, ur_nref ref, uint64_t *len, uint8_t **byt); /* -** bitwise deserialization of an arbitrary byte-buffer into a noun. +** bitwise deserialization of a byte-buffer into a noun. ** supports up to 62-bits of bit-addressed input (511 PiB). -** return will be [ur_cue_good] upon success. +** returns [ur_cue_good] on success. ** ** unsafe variant is unsafe wrt its [dict] parameter, which must be empty, -** but can be passed in order to skip reallocation inside hot loops. +** (present in order to skip reallocation inside hot loops). ** ** test variant does not allocate nouns, but merely parses the input. ** From 4d86e3595cc43247a897182446b4c7481170d5ad Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 16:00:19 -0700 Subject: [PATCH 108/123] u3: switches +cue jet to use new implementation --- pkg/urbit/jets/e/cue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/urbit/jets/e/cue.c b/pkg/urbit/jets/e/cue.c index a26cecfac4..eab7ac0310 100644 --- a/pkg/urbit/jets/e/cue.c +++ b/pkg/urbit/jets/e/cue.c @@ -6,7 +6,7 @@ u3_noun u3qe_cue(u3_atom a) { - return u3s_cue(a); + return u3s_cue_atom(a); } u3_noun From b47047b8692ea558245648f1eb448861ffe7e7a7 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 22:07:55 -0700 Subject: [PATCH 109/123] u3: renames global deduplication to %meld, adds to $writ --- pkg/urbit/include/noun/urth.h | 4 ++-- pkg/urbit/noun/urth.c | 4 ++-- pkg/urbit/worker/main.c | 10 +++++----- pkg/urbit/worker/serf.c | 14 ++++++++++++++ 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/pkg/urbit/include/noun/urth.h b/pkg/urbit/include/noun/urth.h index c8dd98a96b..d837a72291 100644 --- a/pkg/urbit/include/noun/urth.h +++ b/pkg/urbit/include/noun/urth.h @@ -2,10 +2,10 @@ */ /** Functions. **/ - /* u3u_uniq(): hash-cons roots off-loom, reallocate on loom. + /* u3u_meld(): globally deduplicate memory. */ void - u3u_uniq(void); + u3u_meld(void); /* u3u_cram(): globably deduplicate memory, and write a rock to disk. */ diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index bf42d75823..33c9bf2c95 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -425,10 +425,10 @@ _cu_realloc(FILE* fil_u, ur_root_t** tor_u, ur_nvec_t* doc_u) return ken; } -/* u3u_uniq(): hash-cons roots off-loom, reallocate on loom. +/* u3u_meld(): globally deduplicate memory. */ void -u3u_uniq(void) +u3u_meld(void) { ur_root_t* rot_u; ur_nvec_t cod_u; diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index 9e8bcadf41..a9d48e840c 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -382,7 +382,7 @@ _cw_queu(c3_i argc, c3_c* argv[]) /* _cw_uniq(); deduplicate persistent nouns */ static void -_cw_uniq(c3_i argc, c3_c* argv[]) +_cw_meld(c3_i argc, c3_c* argv[]) { c3_assert( 3 <= argc ); @@ -392,7 +392,7 @@ _cw_uniq(c3_i argc, c3_c* argv[]) u3_serf_grab(); - u3u_uniq(); + u3u_meld(); u3_serf_grab(); @@ -428,7 +428,7 @@ _cw_usage(c3_i argc, c3_c* argv[]) " compact persistent state:\n" " %s pack \n\n" " deduplicate persistent state:\n" - " %s uniq \n\n" + " %s meld \n\n" " jam persistent state:\n" " %s cram \n\n" " cue persistent state:\n" @@ -475,8 +475,8 @@ main(c3_i argc, c3_c* argv[]) else if ( 0 == strcmp("queu", argv[1]) ) { _cw_queu(argc, argv); } - else if ( 0 == strcmp("uniq", argv[1]) ) { - _cw_uniq(argc, argv); + else if ( 0 == strcmp("meld", argv[1]) ) { + _cw_meld(argc, argv); } else if ( 0 == strcmp("pack", argv[1]) ) { _cw_pack(argc, argv); diff --git a/pkg/urbit/worker/serf.c b/pkg/urbit/worker/serf.c index 79023fe252..cea2e8dcad 100644 --- a/pkg/urbit/worker/serf.c +++ b/pkg/urbit/worker/serf.c @@ -34,6 +34,7 @@ $% [%cram eve=@] [%exit cod=@] [%save eve=@] + [%meld ~] [%pack ~] == == [%peek mil=@ now=@da lyc=gang pat=path] @@ -983,6 +984,19 @@ u3_serf_live(u3_serf* sef_u, u3_noun com, u3_noun* ret) } } + case c3__meld: { + if ( u3_nul != dat ) { + u3z(com); + return c3n; + } + else { + u3z(com); + u3u_meld(); + *ret = u3nc(c3__live, u3_nul); + return c3y; + } + } + case c3__save: { c3_d eve_d; From 81aa0e30037219eb5b124e2de818c8c3ef65b616 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 22:14:07 -0700 Subject: [PATCH 110/123] vere: adds missing function comments in pier.c --- pkg/urbit/vere/pier.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pkg/urbit/vere/pier.c b/pkg/urbit/vere/pier.c index 9c66b5315a..333a22e213 100644 --- a/pkg/urbit/vere/pier.c +++ b/pkg/urbit/vere/pier.c @@ -1435,6 +1435,8 @@ u3_pier_boot(c3_w wag_w, // config flags return pir_u; } +/* _pier_save_cb(): save snapshot upon serf/disk synchronization. +*/ static void _pier_save_cb(void* ptr_v, c3_d eve_d) { @@ -1468,6 +1470,8 @@ u3_pier_save(u3_pier* pir_u) return c3n; } +/* _pier_cram_cb(): save snapshot upon serf/disk synchronization. +*/ static void _pier_cram_cb(void* ptr_v, c3_d eve_d) { @@ -1502,6 +1506,8 @@ u3_pier_cram(u3_pier* pir_u) return c3n; } +/* _pier_work_close_cb(): dispose u3_work after closing handles. +*/ static void _pier_work_close_cb(uv_handle_t* idl_u) { @@ -1509,6 +1515,8 @@ _pier_work_close_cb(uv_handle_t* idl_u) c3_free(wok_u); } +/* _pier_work_close(): close drivers/handles in the u3_psat_work state. +*/ static void _pier_work_close(u3_work* wok_u) { From 844ed22a2e10fb251fb10fc95b12075513c3124e Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Thu, 3 Sep 2020 22:22:38 -0700 Subject: [PATCH 111/123] vere: moves |pack initiation to king, handles future |meld --- pkg/urbit/include/vere/vere.h | 25 +++++++++++++++++++++++-- pkg/urbit/vere/io/term.c | 11 ++++++----- pkg/urbit/vere/lord.c | 31 +++++++++++++++++++++++++++++++ pkg/urbit/vere/pier.c | 24 ++++++++++++++++++++++++ pkg/urbit/worker/serf.c | 9 +++------ 5 files changed, 87 insertions(+), 13 deletions(-) diff --git a/pkg/urbit/include/vere/vere.h b/pkg/urbit/include/vere/vere.h index cf84b868a2..5ee6b79708 100644 --- a/pkg/urbit/include/vere/vere.h +++ b/pkg/urbit/include/vere/vere.h @@ -399,8 +399,9 @@ u3_writ_play = 2, u3_writ_save = 3, u3_writ_cram = 4, - u3_writ_pack = 5, - u3_writ_exit = 6 + u3_writ_meld = 5, + u3_writ_pack = 6, + u3_writ_exit = 7 } u3_writ_type; /* u3_writ: ipc message from king to serf @@ -948,6 +949,16 @@ c3_o u3_lord_cram(u3_lord* god_u); + /* u3_lord_meld(): globally deduplicate persistent state. + */ + void + u3_lord_meld(u3_lord* god_u); + + /* u3_lord_pack(): defragment persistent state. + */ + void + u3_lord_pack(u3_lord* god_u); + /* u3_lord_work(): attempt work. */ void @@ -1221,6 +1232,16 @@ c3_o u3_pier_cram(u3_pier* pir_u); + /* u3_pier_meld(): globally deduplicate persistent state. + */ + void + u3_pier_meld(u3_pier* pir_u); + + /* u3_pier_pack(): defragment persistent state. + */ + void + u3_pier_pack(u3_pier* pir_u); + /* u3_pier_info(): print status info. */ void diff --git a/pkg/urbit/vere/io/term.c b/pkg/urbit/vere/io/term.c index 3eb24050cd..2da13a87b1 100644 --- a/pkg/urbit/vere/io/term.c +++ b/pkg/urbit/vere/io/term.c @@ -1413,13 +1413,14 @@ _term_io_kick(u3_auto* car_u, u3_noun wir, u3_noun cad) // uv_timer_start(&u3K.tim_u, (uv_timer_cb)u3_king_grab, 0, 0); } break; - // ignore pack (processed in worker) - // + case c3__meld: { + ret_o = c3y; + u3_pier_meld(car_u->pir_u); + } break; + case c3__pack: { ret_o = c3y; - // XX would be - // - // u3_assure(u3_pier_pack(car_u->pir_u)); + u3_pier_pack(car_u->pir_u); } break; } } diff --git a/pkg/urbit/vere/lord.c b/pkg/urbit/vere/lord.c index 52fa4ee7bf..a216ce1389 100644 --- a/pkg/urbit/vere/lord.c +++ b/pkg/urbit/vere/lord.c @@ -32,6 +32,7 @@ $% [%cram eve=@] [%exit cod=@] [%save eve=@] + [%meld ~] [%pack ~] == == [%peek mil=@ now=@da lyc=gang pat=path] @@ -274,6 +275,12 @@ _lord_plea_live(u3_lord* god_u, u3_noun dat) god_u->cb_u.cram_f(god_u->cb_u.ptr_v); } break; + case u3_writ_meld: { + // XX wire into cb + // + u3l_log("pier: meld complete\n"); + } break; + case u3_writ_pack: { // XX wire into cb // @@ -774,6 +781,10 @@ _lord_writ_make(u3_lord* god_u, u3_writ* wit_u) msg = u3nt(c3__live, c3__cram, u3i_chubs(1, &god_u->eve_d)); } break; + case u3_writ_meld: { + msg = u3nt(c3__live, c3__meld, u3_nul); + } break; + case u3_writ_pack: { msg = u3nt(c3__live, c3__pack, u3_nul); } break; @@ -1004,6 +1015,26 @@ u3_lord_cram(u3_lord* god_u) } } +/* u3_lord_meld(): globally deduplicate persistent state. +*/ +void +u3_lord_meld(u3_lord* god_u) +{ + u3_writ* wit_u = _lord_writ_new(god_u); + wit_u->typ_e = u3_writ_meld; + _lord_writ_plan(god_u, wit_u); +} + +/* u3_lord_pack(): defragment persistent state. +*/ +void +u3_lord_pack(u3_lord* god_u) +{ + u3_writ* wit_u = _lord_writ_new(god_u); + wit_u->typ_e = u3_writ_pack; + _lord_writ_plan(god_u, wit_u); +} + /* u3_lord_exit(): shutdown gracefully. */ void diff --git a/pkg/urbit/vere/pier.c b/pkg/urbit/vere/pier.c index 333a22e213..6ea699eac8 100644 --- a/pkg/urbit/vere/pier.c +++ b/pkg/urbit/vere/pier.c @@ -1506,6 +1506,30 @@ u3_pier_cram(u3_pier* pir_u) return c3n; } +/* u3_pier_meld(): globally deduplicate persistent state. +*/ +void +u3_pier_meld(u3_pier* pir_u) +{ +#ifdef VERBOSE_PIER + fprintf(stderr, "pier: (%" PRIu64 "): meld: plan\r\n", pir_u->god_u->eve_d); +#endif + + u3_lord_meld(pir_u->god_u); +} + +/* u3_pier_pack(): defragment persistent state. +*/ +void +u3_pier_pack(u3_pier* pir_u) +{ +#ifdef VERBOSE_PIER + fprintf(stderr, "pier: (%" PRIu64 "): meld: plan\r\n", pir_u->god_u->eve_d); +#endif + + u3_lord_pack(pir_u->god_u); +} + /* _pier_work_close_cb(): dispose u3_work after closing handles. */ static void diff --git a/pkg/urbit/worker/serf.c b/pkg/urbit/worker/serf.c index cea2e8dcad..71460db93a 100644 --- a/pkg/urbit/worker/serf.c +++ b/pkg/urbit/worker/serf.c @@ -343,12 +343,6 @@ _serf_sure_feck(u3_serf* sef_u, c3_w pre_w, u3_noun vir) rec_o = c3y; } - // pack memory on |pack - // - if ( c3__pack == u3h(fec) ) { - pac_o = c3y; - } - riv = u3t(riv); i_w++; } @@ -367,6 +361,9 @@ _serf_sure_feck(u3_serf* sef_u, c3_w pre_w, u3_noun vir) // low-priority: 2^27 contiguous words remaining (~536 MB) // XX maybe use 2^23 (~16 MB) and 2^26 (~268 MB? // + // XX these thresholds should trigger notifications sent to the king + // instead of directly triggering these remedial actions. + // { u3_noun pri = u3_none; c3_w pos_w = u3a_open(u3R); From 562548440b2c57138b72e5936e38da89f0726143 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 8 Sep 2020 13:20:43 -0700 Subject: [PATCH 112/123] ur: assert not-null after all allocations --- pkg/urbit/ur/hashcons.c | 14 ++++++++++---- pkg/urbit/ur/serial.c | 7 +++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index b4b4c65209..04c2b735a4 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -25,6 +25,7 @@ ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size) uint64_t i, next = prev + size; buckets = calloc(next, sizeof(*buckets)); + assert( buckets ); if ( old_buckets ) { for ( i = 0; i < old_size; i++ ) { @@ -126,6 +127,7 @@ ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size) uint64_t i, next = prev + size; buckets = calloc(next, sizeof(*buckets)); + assert( buckets ); if ( old_buckets ) { for ( i = 0; i < old_size; i++ ) { @@ -227,6 +229,7 @@ ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size) uint64_t i, next = prev + size; buckets = calloc(next, sizeof(*buckets)); + assert( buckets ); if ( old_buckets ) { for ( i = 0; i < old_size; i++ ) { @@ -425,11 +428,11 @@ ur_atoms_grow(ur_atoms_t *atoms) atoms->bytes = malloc(next * ( sizeof(*atoms->bytes) + sizeof(*atoms->lens) + sizeof(*atoms->mugs) )); + assert( atoms->bytes ); + atoms->lens = (void*)((char*)atoms->bytes + (next * sizeof(*atoms->bytes))); atoms->mugs = (void*)((char*)atoms->lens + (next * sizeof(*atoms->lens))); - assert( atoms->bytes ); - if ( bytes ) { memcpy(atoms->bytes, bytes, size * (sizeof(*bytes))); memcpy(atoms->lens, lens, size * (sizeof(*lens))); @@ -455,11 +458,11 @@ ur_cells_grow(ur_cells_t *cells) cells->heads = malloc(next * ( sizeof(*cells->heads) + sizeof(*cells->heads) + sizeof(*cells->mugs) )); + assert( cells->heads ); + cells->tails = (void*)((char*)cells->heads + (next * sizeof(*cells->heads))); cells->mugs = (void*)((char*)cells->tails + (next * sizeof(*cells->tails))); - assert( cells->heads ); - if ( heads ) { memcpy(cells->heads, heads, size * (sizeof(*heads))); memcpy(cells->tails, tails, size * (sizeof(*tails))); @@ -914,6 +917,7 @@ ur_nvec_init(ur_nvec_t *v, uint64_t size) { v->fill = 0; v->refs = calloc(size, sizeof(ur_nref)); + assert( v->refs ); } void @@ -927,6 +931,7 @@ ur_walk_fore(ur_root_t *r, ur_nref *top, *don; don = malloc(size * sizeof(*don)); + assert( don ); top = don + ++fill; *top = ref; @@ -952,6 +957,7 @@ ur_walk_fore(ur_root_t *r, if ( size == fill ) { uint64_t next = prev + size; don = realloc(don, next * sizeof(*don)); + assert( don ); top = don + fill; prev = size; size = next; diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index 5a945072df..e30c8d9123 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -82,6 +82,7 @@ ur_jam_unsafe(ur_root_t *r, j.bsw.prev = ur_fib11; j.bsw.size = ur_fib12; j.bsw.bytes = calloc(j.bsw.size, 1); + assert( j.bsw.bytes ); ur_walk_fore(r, ref, &j, _jam_atom, _jam_cell); @@ -150,6 +151,7 @@ _cue_next(ur_root_t *r, if ( s->fill == s->size ) { uint32_t next = s->prev + s->size; s->f = realloc(s->f, next * sizeof(*s->f)); + assert( s->f ); s->prev = s->size; s->size = next; } @@ -193,6 +195,8 @@ _cue_next(ur_root_t *r, else { uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); uint8_t *byt = calloc(len_byt, 1); + assert( byt ); + ur_bsr_bytes_any(bsr, len, byt); // strip trailing zeroes @@ -239,6 +243,7 @@ ur_cue_unsafe(ur_root_t *r, s.prev = ur_fib10; s.size = ur_fib11; s.f = malloc(s.size * sizeof(*s.f)); + assert( s.f ); // advance into stream // @@ -331,6 +336,7 @@ _cue_test_next(_cue_test_stack_t *s, if ( s->fill == s->size ) { uint32_t next = s->prev + s->size; s->f = realloc(s->f, next * sizeof(*s->f)); + assert( s->f ); s->prev = s->size; s->size = next; } @@ -398,6 +404,7 @@ ur_cue_test_unsafe(ur_dict_t *dict, s.prev = ur_fib10; s.size = ur_fib11; s.f = malloc(s.size * sizeof(*s.f)); + assert( s.f ); // advance into stream // From 82e91811d4bee5612c89f5f401d6afd798c8da51 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 8 Sep 2020 22:25:24 -0700 Subject: [PATCH 113/123] ur: replaces allocation-failure assertions with error printfs/abort() --- pkg/urbit/ur/bitstream.c | 9 ++++++- pkg/urbit/ur/hashcons.c | 57 ++++++++++++++++++++-------------------- pkg/urbit/ur/serial.c | 30 ++++++++++++--------- 3 files changed, 55 insertions(+), 41 deletions(-) diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index 14b841ba40..f525f31b56 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -1,4 +1,5 @@ #include +#include #include #include @@ -708,7 +709,13 @@ ur_bsw_grow(ur_bsw_t *bsw, uint64_t step) uint64_t next = size + step; bsw->bytes = realloc(bsw->bytes, next); - assert(bsw->bytes); + + if ( !bsw->bytes ) { + fprintf(stderr, + "ur: bitstream-write allocation failed, out of memory\r\n"); + abort(); + } + memset(bsw->bytes + size, 0, step); bsw->prev = size; diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 04c2b735a4..38a8abbf5f 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -17,6 +17,18 @@ uint64_t ur_met0_bytes_unsafe(uint8_t *byt, uint64_t len); +static void* +_oom(const char* cap, void* v) +{ + if ( !v ) { + fprintf(stderr, + "ur: hashcons: %s: allocation failed, out of memory\r\n", cap); + abort(); + } + + return v; +} + void ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size) { @@ -24,8 +36,7 @@ ur_dict32_grow(ur_root_t *r, ur_dict32_t *dict, uint64_t prev, uint64_t size) uint64_t old_size = dict->size; uint64_t i, next = prev + size; - buckets = calloc(next, sizeof(*buckets)); - assert( buckets ); + buckets = _oom("dict32_grow", calloc(next, sizeof(*buckets))); if ( old_buckets ) { for ( i = 0; i < old_size; i++ ) { @@ -126,8 +137,7 @@ ur_dict64_grow(ur_root_t *r, ur_dict64_t *dict, uint64_t prev, uint64_t size) uint64_t old_size = dict->size; uint64_t i, next = prev + size; - buckets = calloc(next, sizeof(*buckets)); - assert( buckets ); + buckets = _oom("dict64_grow", calloc(next, sizeof(*buckets))); if ( old_buckets ) { for ( i = 0; i < old_size; i++ ) { @@ -228,8 +238,7 @@ ur_dict_grow(ur_root_t *r, ur_dict_t *dict, uint64_t prev, uint64_t size) uint64_t old_size = dict->size; uint64_t i, next = prev + size; - buckets = calloc(next, sizeof(*buckets)); - assert( buckets ); + buckets = _oom("dict_grow", calloc(next, sizeof(*buckets))); if ( old_buckets ) { for ( i = 0; i < old_size; i++ ) { @@ -425,11 +434,9 @@ ur_atoms_grow(ur_atoms_t *atoms) uint64_t *lens = atoms->lens; ur_mug *mugs = atoms->mugs; - atoms->bytes = malloc(next * ( sizeof(*atoms->bytes) - + sizeof(*atoms->lens) - + sizeof(*atoms->mugs) )); - assert( atoms->bytes ); - + atoms->bytes = _oom("atoms_grow", malloc(next * ( sizeof(*atoms->bytes) + + sizeof(*atoms->lens) + + sizeof(*atoms->mugs) ))); atoms->lens = (void*)((char*)atoms->bytes + (next * sizeof(*atoms->bytes))); atoms->mugs = (void*)((char*)atoms->lens + (next * sizeof(*atoms->lens))); @@ -455,11 +462,9 @@ ur_cells_grow(ur_cells_t *cells) ur_nref *tails = cells->tails; ur_mug *mugs = cells->mugs; - cells->heads = malloc(next * ( sizeof(*cells->heads) - + sizeof(*cells->heads) - + sizeof(*cells->mugs) )); - assert( cells->heads ); - + cells->heads = _oom("cells_grow", malloc(next * ( sizeof(*cells->heads) + + sizeof(*cells->heads) + + sizeof(*cells->mugs) ))); cells->tails = (void*)((char*)cells->heads + (next * sizeof(*cells->heads))); cells->mugs = (void*)((char*)cells->tails + (next * sizeof(*cells->tails))); @@ -666,8 +671,7 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) return (ur_nref)direct; } else { - uint8_t *copy = malloc(len); - assert( copy ); + uint8_t *copy = _oom("coin_bytes", malloc(len)); memcpy(copy, byt, len); return ur_coin_bytes_unsafe(r, copy, len); @@ -681,10 +685,11 @@ ur_coin64(ur_root_t *r, uint64_t n) return n; } else { - uint8_t *byt = malloc(8); - assert( byt ); + uint8_t *byt; assert( 8 == ur_met3_64(n) ); + byt = _oom("coin64", malloc(8)); + byt[0] = ur_mask_8(n); byt[1] = ur_mask_8(n >> 8); byt[2] = ur_mask_8(n >> 16); @@ -873,8 +878,7 @@ ur_root_free(ur_root_t *r) ur_root_t* ur_root_init(void) { - ur_root_t *r = calloc(1, sizeof(*r)); - assert( r ); + ur_root_t *r = _oom("root_init", calloc(1, sizeof(*r))); { ur_dict_t *dict; @@ -916,8 +920,7 @@ void ur_nvec_init(ur_nvec_t *v, uint64_t size) { v->fill = 0; - v->refs = calloc(size, sizeof(ur_nref)); - assert( v->refs ); + v->refs = _oom("nvec_init", calloc(size, sizeof(ur_nref))); } void @@ -930,8 +933,7 @@ ur_walk_fore(ur_root_t *r, uint64_t prev = 89, size = 144, fill = 0; ur_nref *top, *don; - don = malloc(size * sizeof(*don)); - assert( don ); + don = _oom("walk_fore", malloc(size * sizeof(*don))); top = don + ++fill; *top = ref; @@ -956,8 +958,7 @@ ur_walk_fore(ur_root_t *r, // if ( size == fill ) { uint64_t next = prev + size; - don = realloc(don, next * sizeof(*don)); - assert( don ); + don = _oom("walk_fore", realloc(don, next * sizeof(*don))); top = don + fill; prev = size; size = next; diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index e30c8d9123..83610c12bf 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -3,6 +3,18 @@ #include "ur/ur.h" +static void* +_oom(const char* cap, void* v) +{ + if ( !v ) { + fprintf(stderr, + "ur: hashcons: %s: allocation failed, out of memory\r\n", cap); + abort(); + } + + return v; +} + static inline void _bsw_atom(ur_root_t *r, ur_nref ref, ur_bsw_t *bsw, uint64_t len) { @@ -81,8 +93,7 @@ ur_jam_unsafe(ur_root_t *r, j.bsw.prev = ur_fib11; j.bsw.size = ur_fib12; - j.bsw.bytes = calloc(j.bsw.size, 1); - assert( j.bsw.bytes ); + j.bsw.bytes = _oom("jam", calloc(j.bsw.size, 1)); ur_walk_fore(r, ref, &j, _jam_atom, _jam_cell); @@ -150,8 +161,7 @@ _cue_next(ur_root_t *r, // if ( s->fill == s->size ) { uint32_t next = s->prev + s->size; - s->f = realloc(s->f, next * sizeof(*s->f)); - assert( s->f ); + s->f = _oom("cue_next stack", realloc(s->f, next * sizeof(*s->f))); s->prev = s->size; s->size = next; } @@ -194,8 +204,7 @@ _cue_next(ur_root_t *r, } else { uint64_t len_byt = (len >> 3) + !!ur_mask_3(len); - uint8_t *byt = calloc(len_byt, 1); - assert( byt ); + uint8_t *byt = _oom("cue_next bytes", calloc(len_byt, 1)); ur_bsr_bytes_any(bsr, len, byt); @@ -242,8 +251,7 @@ ur_cue_unsafe(ur_root_t *r, // s.prev = ur_fib10; s.size = ur_fib11; - s.f = malloc(s.size * sizeof(*s.f)); - assert( s.f ); + s.f = _oom("cue stack", malloc(s.size * sizeof(*s.f))); // advance into stream // @@ -335,8 +343,7 @@ _cue_test_next(_cue_test_stack_t *s, // if ( s->fill == s->size ) { uint32_t next = s->prev + s->size; - s->f = realloc(s->f, next * sizeof(*s->f)); - assert( s->f ); + s->f = _oom("cue_test", realloc(s->f, next * sizeof(*s->f))); s->prev = s->size; s->size = next; } @@ -403,8 +410,7 @@ ur_cue_test_unsafe(ur_dict_t *dict, // s.prev = ur_fib10; s.size = ur_fib11; - s.f = malloc(s.size * sizeof(*s.f)); - assert( s.f ); + s.f = _oom("cue_test", malloc(s.size * sizeof(*s.f))); // advance into stream // From 6cb5df513eb6bf8f7a23691d10950525ce58464c Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 8 Sep 2020 22:30:34 -0700 Subject: [PATCH 114/123] u3: refactors off-loom re-allocation in _cu_from_loom() --- pkg/urbit/noun/urth.c | 194 ++++++++++++++++++++++-------------------- 1 file changed, 104 insertions(+), 90 deletions(-) diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 33c9bf2c95..6d802ed579 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -29,29 +29,38 @@ _cu_met_3(u3a_atom* vat_u) } } +/* _cu_atom_to_ref(): allocate indirect atom off-loom. +*/ static inline ur_nref -_cu_atom_to_ref(u3a_atom* vat_u, ur_root_t *r) +_cu_atom_to_ref(ur_root_t* rot_u, u3a_atom* vat_u) { ur_nref ref; + c3_d val_d; switch ( vat_u->len_w ) { case 2: { - ref = ur_coin64(r, ( ((c3_d)vat_u->buf_w[1]) << 32 - | ((c3_d)vat_u->buf_w[0]) )); + val_d = ((c3_d)vat_u->buf_w[1]) << 32 + | ((c3_d)vat_u->buf_w[0]); + ref = ur_coin64(rot_u, val_d); } break; case 1: { - ref = ur_coin64(r, (c3_d)vat_u->buf_w[0]); + val_d = (c3_d)vat_u->buf_w[0]; + ref = ur_coin64(rot_u, val_d); } break; - default: { - c3_assert( vat_u->len_w ); - + // XX assumes little-endian + // c3_y* byt_y = (c3_y*)vat_u->buf_w; - c3_w len_w = _cu_met_3(vat_u); + c3_d len_d = ((c3_d)vat_u->len_w) << 2; - ref = ur_coin_bytes(r, byt_y, (c3_d)len_w); + c3_assert( len_d ); + + // NB: this call will accounts for any trailing null bytes + // caused by an overestimate in [len_d] + // + ref = ur_coin_bytes(rot_u, byt_y, len_d); } break; } @@ -90,25 +99,19 @@ _cu_box_stash(u3a_noun* som_u, ur_nref ref) box_w[2] = ref >> 32; } -// stack frame for recording head vs tail iteration -// -// In Hoon, this structure would be as follows: -// -// $% [%root ~] -// [%head loom-cell=^] -// [%tail loom-cell=^ off-loom-head=*] -// == -// +/* +** stack frame for recording head vs tail iteration +** +** $? [LOM_HEAD cell=*] +** [ref=* cell=*] +*/ -#define STACK_ROOT 0 -#define STACK_HEAD 1 -#define STACK_TAIL 2 +#define LOM_HEAD 0xffffffffffffffffULL typedef struct _cu_frame_s { - c3_y tag_y; - u3a_cell* cel_u; ur_nref ref; + u3a_cell* cel_u; } _cu_frame; typedef struct _cu_stack_s @@ -119,91 +122,103 @@ typedef struct _cu_stack_s _cu_frame* fam_u; } _cu_stack; -/* _cu_stack_push(): push a "stack" frame. +/* _cu_from_loom_next(): advance off-loom reallocation traversal. */ -static inline void -_cu_stack_push(_cu_stack *s, c3_y tag_y, u3a_cell* cel_u, ur_nref ref) +static inline ur_nref +_cu_from_loom_next(_cu_stack* tac_u, ur_root_t* rot_u, u3_noun a) { - if ( s->fil_w == s->siz_w ) { - c3_w nex_w = s->pre_w + s->siz_w; - s->fam_u = c3_realloc(s->fam_u, nex_w * sizeof(*s->fam_u)); - s->pre_w = s->siz_w; - s->siz_w = nex_w; - } + while ( 1 ) { + // u3 direct == ur direct + // + if ( c3y == u3a_is_cat(a) ) { + return (ur_nref)a; + } + else { + u3a_noun* som_u = u3a_to_ptr(a); + ur_nref ref; - _cu_frame* fam_u = &(s->fam_u[s->fil_w++]); - fam_u->tag_y = tag_y; - fam_u->cel_u = cel_u; - fam_u->ref = ref; + // check for relocation pointers + // + if ( c3y == _cu_box_check(som_u, &ref) ) { + return ref; + } + // reallocate indirect atoms, stashing relocation pointers + // + else if ( c3y == u3a_is_atom(a) ) { + ref = _cu_atom_to_ref(rot_u, (u3a_atom*)som_u); + _cu_box_stash(som_u, ref); + return ref; + } + else { + u3a_cell* cel_u = (u3a_cell*)som_u; + + // reallocate the stack if full + // + if ( tac_u->fil_w == tac_u->siz_w ) { + c3_w nex_w = tac_u->pre_w + tac_u->siz_w; + tac_u->fam_u = c3_realloc(tac_u->fam_u, nex_w * sizeof(*tac_u->fam_u)); + tac_u->pre_w = tac_u->siz_w; + tac_u->siz_w = nex_w; + } + + // push a head-frame and continue into the head + // + { + _cu_frame* fam_u = &(tac_u->fam_u[tac_u->fil_w++]); + fam_u->ref = LOM_HEAD; + fam_u->cel_u = cel_u; + } + + a = cel_u->hed; + continue; + } + } + } } /* _cu_from_loom(): reallocate [a] off loom, in [r]. */ static ur_nref -_cu_from_loom(ur_root_t *r, u3_noun a) +_cu_from_loom(ur_root_t* rot_u, u3_noun a) { - ur_nref ref; + _cu_stack tac_u = {0}; + ur_nref ref; - _cu_stack s = { .pre_w = 89, .siz_w = 144, .fil_w = 0, .fam_u = 0 }; - s.fam_u = c3_malloc((s.pre_w + s.siz_w) * sizeof(*s.fam_u)); - _cu_stack_push(&s, STACK_ROOT, 0, 0); + tac_u.pre_w = ur_fib10; + tac_u.siz_w = ur_fib11; + tac_u.fam_u = c3_malloc(tac_u.siz_w * sizeof(*tac_u.fam_u)); - advance: { - // u3 direct == ur direct + ref = _cu_from_loom_next(&tac_u, rot_u, a); + + // incorporate reallocated ref, accounting for cells + // + while ( tac_u.fil_w ) { + // peek at the top of the stack // - if ( c3y == u3a_is_cat(a) ) { - ref = (ur_nref)a; - goto retreat; + _cu_frame* fam_u = &(tac_u.fam_u[tac_u.fil_w - 1]); + + // [fam_u] is a head-frame; stash ref and continue into the tail + // + if ( LOM_HEAD == fam_u->ref ) { + fam_u->ref = ref; + ref = _cu_from_loom_next(&tac_u, rot_u, fam_u->cel_u->tel); } + // [fam_u] is a tail-frame; cons refs and pop the stack + // else { - u3a_noun* som_u = u3a_to_ptr(a); - - // all bits set == already reallocated - // - if ( c3y == _cu_box_check(som_u, &ref) ) { - goto retreat; - } - else if ( c3y == u3a_is_atom(a) ) { - ref = _cu_atom_to_ref((u3a_atom*)som_u, r); - _cu_box_stash(som_u, ref); - goto retreat; - } - else { - u3a_cell* cel_u = (u3a_cell*)som_u; - _cu_stack_push(&s, STACK_HEAD, cel_u, 0); - a = cel_u->hed; - goto advance; - } + ref = ur_cons(rot_u, fam_u->ref, ref); + _cu_box_stash((u3a_noun*)fam_u->cel_u, ref); + tac_u.fil_w--; } } - retreat: { - _cu_frame fam_u = s.fam_u[--s.fil_w]; - - switch ( fam_u.tag_y ) { - default: c3_assert(0); - case STACK_ROOT: break; - - case STACK_HEAD: { - _cu_stack_push(&s, STACK_TAIL, fam_u.cel_u, ref); - a = fam_u.cel_u->tel; - goto advance; - } - - case STACK_TAIL: { - u3a_cell* cel_u = fam_u.cel_u; - ref = ur_cons(r, fam_u.ref, ref); - _cu_box_stash((u3a_noun*)cel_u, ref); - goto retreat; - } - } - } - - free(s.fam_u); + c3_free(tac_u.fam_u); return ref; } +/* _cu_vec: parameters for cold-state hamt walk. +*/ typedef struct _cu_vec_s { ur_nvec_t* vec_u; ur_root_t* rot_u; @@ -393,7 +408,7 @@ _cu_realloc(FILE* fil_u, ur_root_t** tor_u, ur_nvec_t* doc_u) // if ( fil_u ) { ur_root_info(fil_u, rot_u); - fprintf(stderr, "\r\n"); + fprintf(fil_u, "\r\n"); } // reinitialize loom @@ -432,11 +447,10 @@ u3u_meld(void) { ur_root_t* rot_u; ur_nvec_t cod_u; - ur_nref ken; c3_assert( &(u3H->rod_u) == u3R ); - ken = _cu_realloc(stderr, &rot_u, &cod_u); + _cu_realloc(stderr, &rot_u, &cod_u); // dispose off-loom structures // From c20385a212041dacd834c9bacf4581c5701ced58 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 9 Sep 2020 13:12:27 -0700 Subject: [PATCH 115/123] u3: refactors on-loom re-allocation in _cu_all_to_loom() --- pkg/urbit/noun/urth.c | 144 +++++++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 6d802ed579..8a58000ecd 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -259,101 +259,93 @@ typedef struct _cu_loom_s { u3_noun *cel; // cells } _cu_loom; -/* _cu_loom_free(): dispose loom relocation pointers -*/ -static void -_cu_loom_free(_cu_loom* lom_u) -{ - ur_dict_free((ur_dict_t*)&(lom_u->map_u)); - free(lom_u->vat); - free(lom_u->cel); -} - -/* _cu_atoms_to_loom(): allocate all indirect atoms on the loom. -*/ -static void -_cu_atoms_to_loom(ur_root_t* rot_u, _cu_loom* lom_u) -{ - c3_d* len_d = rot_u->atoms.lens; - c3_y** byt_y = rot_u->atoms.bytes; - c3_d fil_d = rot_u->atoms.fill; - u3_atom* vat = lom_u->vat = calloc(fil_d, sizeof(u3_atom)); - c3_d i_d; - - for ( i_d = 0; i_d < fil_d; i_d++ ) { - vat[i_d] = u3i_bytes(len_d[i_d], byt_y[i_d]); - } -} - /* _cu_ref_to_noun(): lookup/allocate [ref] on the loom. */ static u3_noun -_cu_ref_to_noun(ur_nref ref, _cu_loom* lom_u) +_cu_ref_to_noun(ur_root_t* rot_u, ur_nref ref, _cu_loom* lom_u) { switch ( ur_nref_tag(ref) ) { - default: assert(0); + default: c3_assert(0); + // all ur indirect atoms have been pre-reallocated on the loom. + // + case ur_iatom: return lom_u->vat[ur_nref_idx(ref)]; + + + // cells were allocated off-loom in cons-order, and are traversed + // in the same order: we've already relocated any one we could need here. + // + case ur_icell: return lom_u->cel[ur_nref_idx(ref)]; + + // u3 direct atoms are 31-bit, while ur direct atoms are 62-bit; + // we use a hashtable to deduplicate the non-overlapping space + // case ur_direct: { + u3_atom vat; + if ( 0x7fffffffULL >= ref ) { return (u3_atom)ref; } + else if ( ur_dict32_get(rot_u, &lom_u->map_u, ref, (c3_w*)&vat) ) { + return vat; + } else { - c3_w val_w; - - // XX the ur_root_t argument here is only used to dereference a mug, - // but these atoms are all direct, so it'll never be used - // - if ( ur_dict32_get(0, &lom_u->map_u, ref, &val_w) ) { - return (u3_atom)val_w; + { + c3_w wor_w[2] = { ref & 0xffffffff, ref >> 32 }; + vat = (c3_w)u3i_words(2, wor_w); } - else { - u3_atom vat; - { - c3_w wor_w[2] = { ref & 0xffffffff, ref >> 32 }; - vat = val_w = u3i_words(2, wor_w); - } - ur_dict32_put(0, &lom_u->map_u, ref, val_w); - - return vat; - } + ur_dict32_put(0, &lom_u->map_u, ref, (c3_w)vat); + return vat; } } break; - - case ur_iatom: return lom_u->vat[ur_nref_idx(ref)]; - - case ur_icell: return lom_u->cel[ur_nref_idx(ref)]; - } -} - -/* _cu_cells_to_loom(): allocate all cells on the loom. -*/ -static void -_cu_cells_to_loom(ur_root_t* rot_u, _cu_loom* lom_u) -{ - ur_nref* hed = rot_u->cells.heads; - ur_nref* tal = rot_u->cells.tails; - c3_d fil_d = rot_u->cells.fill; - u3_noun* cel = lom_u->cel = calloc(fil_d, sizeof(u3_noun)); - c3_d i_d; - - for ( i_d = 0; i_d < fil_d; i_d++ ) { - cel[i_d] = u3nc(_cu_ref_to_noun(hed[i_d], lom_u), - _cu_ref_to_noun(tal[i_d], lom_u)); - // XX mug? } } /* _cu_all_to_loom(): reallocate all of [rot_u] on the loom, restore roots. +** NB: requires all roots to be cells +** does *not* track refcounts, which must be +** subsequently reconstructed via tracing. */ static void _cu_all_to_loom(ur_root_t* rot_u, ur_nref ken, ur_nvec_t* cod_u) { - _cu_loom lom_u = {0}; - ur_dict32_grow(0, &lom_u.map_u, 89, 144); + _cu_loom lom_u = {0}; + c3_d i_d, fil_d; - _cu_atoms_to_loom(rot_u, &lom_u); - _cu_cells_to_loom(rot_u, &lom_u); + ur_dict32_grow(0, &lom_u.map_u, ur_fib11, ur_fib12); + + // allocate all atoms on the loom. + // + { + c3_d* len_d = rot_u->atoms.lens; + c3_y** byt_y = rot_u->atoms.bytes; + + fil_d = rot_u->atoms.fill; + lom_u.vat = calloc(fil_d, sizeof(u3_atom)); + + for ( i_d = 0; i_d < fil_d; i_d++ ) { + lom_u.vat[i_d] = u3i_bytes(len_d[i_d], byt_y[i_d]); + } + } + + // allocate all cells on the loom. + // + { + ur_nref* hed = rot_u->cells.heads; + ur_nref* tal = rot_u->cells.tails; + u3_noun cel; + + fil_d = rot_u->cells.fill; + lom_u.cel = c3_calloc(fil_d * sizeof(u3_noun)); + + for ( i_d = 0; i_d < fil_d; i_d++ ) { + cel = u3nc(_cu_ref_to_noun(rot_u, hed[i_d], &lom_u), + _cu_ref_to_noun(rot_u, tal[i_d], &lom_u)); + lom_u.cel[i_d] = cel; + u3r_mug(cel); + } + } // restore kernel reference (always a cell) // @@ -375,7 +367,11 @@ _cu_all_to_loom(ur_root_t* rot_u, ur_nref ken, ur_nvec_t* cod_u) } } - _cu_loom_free(&lom_u); + // dispose of relocation pointers + // + c3_free(lom_u.cel); + c3_free(lom_u.vat); + ur_dict_free((ur_dict_t*)&lom_u.map_u); } /* _cu_realloc(): hash-cons roots off-loom, reallocate on loom. @@ -421,6 +417,10 @@ _cu_realloc(FILE* fil_u, ur_root_t** tor_u, ur_nvec_t* doc_u) // _cu_all_to_loom(rot_u, ken, &cod_u); + // establish correct refcounts via tracing + // + u3m_grab(u3_none); + // allocate new hot jet state; re-establish warm // u3j_boot(c3y); From efe1f7ce3944a9a482a92c200b4e670f21b3aeeb Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 9 Sep 2020 22:30:20 -0700 Subject: [PATCH 116/123] u3: disables meld and cram under U3_MEMORY_DEEBUG --- pkg/urbit/noun/urth.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 8a58000ecd..82f052d1d7 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -379,6 +379,10 @@ _cu_all_to_loom(ur_root_t* rot_u, ur_nref ken, ur_nvec_t* cod_u) static ur_nref _cu_realloc(FILE* fil_u, ur_root_t** tor_u, ur_nvec_t* doc_u) { +#ifdef U3_MEMORY_DEBUG + c3_assert(0); +#endif + // bypassing page tracking as an optimization // // NB: u3e_yolo() will mark all as dirty, and @@ -442,6 +446,13 @@ _cu_realloc(FILE* fil_u, ur_root_t** tor_u, ur_nvec_t* doc_u) /* u3u_meld(): globally deduplicate memory. */ +#ifdef U3_MEMORY_DEBUG +void +u3u_meld(void) +{ + fprintf(stderr, "u3: unable to meld under U3_MEMORY_DEBUG\r\n"); +} +#else void u3u_meld(void) { @@ -457,6 +468,7 @@ u3u_meld(void) ur_nvec_free(&cod_u); ur_root_free(rot_u); } +#endif /* _cu_rock_path(): format rock path. */ @@ -624,6 +636,14 @@ _cu_rock_save(c3_c* dir_c, c3_d eve_d, c3_d len_d, c3_y* byt_y) /* u3u_cram(): globably deduplicate memory, and write a rock to disk. */ +#ifdef U3_MEMORY_DEBUG +c3_o +u3u_cram(c3_c* dir_c, c3_d eve_d) +{ + fprintf(stderr, "u3: unable to cram under U3_MEMORY_DEBUG\r\n"); + return c3n; +} +#else c3_o u3u_cram(c3_c* dir_c, c3_d eve_d) { @@ -670,6 +690,7 @@ u3u_cram(c3_c* dir_c, c3_d eve_d) return ret_o; } +#endif /* u3u_mmap_read(): open and mmap the file at [pat_c] for reading. */ From cda7a7edb0f9d4c8117bad6462d4b284f8f51785 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 9 Sep 2020 22:33:44 -0700 Subject: [PATCH 117/123] ur: consistently use fibonacci constants --- pkg/urbit/ur/hashcons.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 38a8abbf5f..a08de03110 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -882,29 +882,28 @@ ur_root_init(void) { ur_dict_t *dict; - uint64_t fib11 = 89, fib12 = 144; // allocate atom storage // - r->atoms.prev = fib11; - r->atoms.size = fib12; + r->atoms.prev = ur_fib11; + r->atoms.size = ur_fib12; ur_atoms_grow(&(r->atoms)); // allocate atom hashtable // dict = &(r->atoms.dict); - ur_dict_grow(r, dict, fib11, fib12); + ur_dict_grow(r, dict, ur_fib11, ur_fib12); // allocate cell storage // - r->cells.prev = fib11; - r->cells.size = fib12; + r->cells.prev = ur_fib11; + r->cells.size = ur_fib12; ur_cells_grow(&(r->cells)); // allocate cell hashtable // dict = &(r->cells.dict); - ur_dict_grow(r, dict, fib11, fib12); + ur_dict_grow(r, dict, ur_fib11, ur_fib12); } return r; @@ -930,7 +929,7 @@ ur_walk_fore(ur_root_t *r, void (*atom)(ur_root_t*, ur_nref, void*), ur_bool_t (*cell)(ur_root_t*, ur_nref, void*)) { - uint64_t prev = 89, size = 144, fill = 0; + uint64_t prev = ur_fib11, size = ur_fib12, fill = 0; ur_nref *top, *don; don = _oom("walk_fore", malloc(size * sizeof(*don))); From a3531799086b2aefcde819b368c2ef8f01d963d4 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Wed, 9 Sep 2020 22:35:52 -0700 Subject: [PATCH 118/123] vere: consistently handle %meld $writ in king --- pkg/urbit/vere/lord.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/urbit/vere/lord.c b/pkg/urbit/vere/lord.c index a216ce1389..05e55e27d0 100644 --- a/pkg/urbit/vere/lord.c +++ b/pkg/urbit/vere/lord.c @@ -116,6 +116,7 @@ _lord_writ_free(u3_writ* wit_u) case u3_writ_save: case u3_writ_cram: + case u3_writ_meld: case u3_writ_pack: case u3_writ_exit: { } break; @@ -209,6 +210,7 @@ _lord_writ_str(u3_writ_type typ_e) case u3_writ_play: return "play"; case u3_writ_save: return "save"; case u3_writ_cram: return "cram"; + case u3_writ_meld: return "meld"; case u3_writ_pack: return "pack"; case u3_writ_exit: return "exit"; } From 39a82fe66d8e97e3517077387499515924ebdc83 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 15 Sep 2020 13:50:22 -0700 Subject: [PATCH 119/123] ur: updates api to consistently use (len, bytes) for vectors --- pkg/urbit/include/ur/defs.h | 2 +- pkg/urbit/include/ur/hashcons.h | 4 ++-- pkg/urbit/noun/urth.c | 4 ++-- pkg/urbit/tests/ur_tests.c | 4 ++-- pkg/urbit/ur/hashcons.c | 18 +++++++++--------- pkg/urbit/ur/serial.c | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/pkg/urbit/include/ur/defs.h b/pkg/urbit/include/ur/defs.h index d3b1c6ebfa..c5cf411c1f 100644 --- a/pkg/urbit/include/ur/defs.h +++ b/pkg/urbit/include/ur/defs.h @@ -64,7 +64,7 @@ typedef uint8_t ur_bool_t; ** unsafe wrt trailing null bytes, which are invalid */ inline uint64_t -ur_met0_bytes_unsafe(uint8_t *byt, uint64_t len) +ur_met0_bytes_unsafe(uint64_t len, uint8_t *byt) { uint64_t last = len - 1; return (last << 3) + ur_met0_8(byt[last]); diff --git a/pkg/urbit/include/ur/hashcons.h b/pkg/urbit/include/ur/hashcons.h index 045d5055af..c8a8e4ba50 100644 --- a/pkg/urbit/include/ur/hashcons.h +++ b/pkg/urbit/include/ur/hashcons.h @@ -174,10 +174,10 @@ ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref); ** allocated with system malloc) and trailing null bytes (not allowed). */ ur_nref -ur_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len); +ur_coin_bytes_unsafe(ur_root_t *r, uint64_t len, uint8_t *byt); ur_nref -ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len); +ur_coin_bytes(ur_root_t *r, uint64_t len, uint8_t *byt); ur_nref ur_coin64(ur_root_t *r, uint64_t n); diff --git a/pkg/urbit/noun/urth.c b/pkg/urbit/noun/urth.c index 82f052d1d7..876e2d5dc1 100644 --- a/pkg/urbit/noun/urth.c +++ b/pkg/urbit/noun/urth.c @@ -57,10 +57,10 @@ _cu_atom_to_ref(ur_root_t* rot_u, u3a_atom* vat_u) c3_assert( len_d ); - // NB: this call will accounts for any trailing null bytes + // NB: this call will account for any trailing null bytes // caused by an overestimate in [len_d] // - ref = ur_coin_bytes(rot_u, byt_y, len_d); + ref = ur_coin_bytes(rot_u, len_d, byt_y); } break; } diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 8859966ee2..56e8f2dc17 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1810,13 +1810,13 @@ _test_jam_cue(void) { uint8_t inp[33] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; uint8_t res[35] = { 0, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; - TEST_CASE("wide", ur_coin_bytes(r, inp, sizeof(inp))); + TEST_CASE("wide", ur_coin_bytes(r, sizeof(inp), inp)); } { uint8_t inp[16] = { 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc, 0xa8, 0xab, 0x60, 0xef, 0x2d, 0xd, 0x0, 0x0, 0x80 }; uint8_t res[19] = { 0x0, 0x2, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x18, 0x50, 0x57, 0xc1, 0xde, 0x5b, 0x1a, 0x0, 0x0, 0x0, 0x1 }; - TEST_CASE("date", ur_coin_bytes(r, inp, sizeof(inp))); + TEST_CASE("date", ur_coin_bytes(r, sizeof(inp), inp)); } ur_root_free(r); diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index a08de03110..756c92f6df 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -15,7 +15,7 @@ // declarations of inline functions // uint64_t -ur_met0_bytes_unsafe(uint8_t *byt, uint64_t len); +ur_met0_bytes_unsafe(uint64_t len, uint8_t *byt); static void* _oom(const char* cap, void* v) @@ -543,7 +543,7 @@ ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref) uint64_t len = r->atoms.lens[idx]; uint8_t *byt = r->atoms.bytes[idx]; - m_bit = ur_met0_bytes_unsafe(byt, len); + m_bit = ur_met0_bytes_unsafe(len, byt); } switch ( bloq ) { @@ -568,7 +568,7 @@ ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref) } static ur_nref -_coin_unsafe(ur_atoms_t *atoms, ur_mug mug, uint8_t *byt, uint64_t len) +_coin_unsafe(ur_atoms_t *atoms, ur_mug mug, uint64_t len, uint8_t *byt) { uint64_t fill = atoms->fill; ur_tag tag = ur_iatom; @@ -606,7 +606,7 @@ _cons_unsafe(ur_cells_t *cells, ur_mug mug, ur_nref hed, ur_nref tal) } ur_nref -ur_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len) +ur_coin_bytes_unsafe(ur_root_t *r, uint64_t len, uint8_t *byt) { ur_atoms_t *atoms = &(r->atoms); ur_dict_t *dict = &(atoms->dict); @@ -641,7 +641,7 @@ ur_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len) ur_atoms_grow(atoms); } - tom = _coin_unsafe(atoms, mug, byt, len); + tom = _coin_unsafe(atoms, mug, len, byt); bucket->refs[b_fill] = tom; bucket->fill = 1 + b_fill; @@ -651,7 +651,7 @@ ur_coin_bytes_unsafe(ur_root_t *r, uint8_t *byt, uint64_t len) } ur_nref -ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) +ur_coin_bytes(ur_root_t *r, uint64_t len, uint8_t *byt) { // strip trailing zeroes // @@ -661,7 +661,7 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) // produce a direct atom if possible // - if ( 62 >= ur_met0_bytes_unsafe(byt, len) ) { + if ( 62 >= ur_met0_bytes_unsafe(len, byt) ) { uint64_t i, direct = 0; for ( i = 0; i < len; i++ ) { @@ -674,7 +674,7 @@ ur_coin_bytes(ur_root_t *r, uint8_t *byt, uint64_t len) uint8_t *copy = _oom("coin_bytes", malloc(len)); memcpy(copy, byt, len); - return ur_coin_bytes_unsafe(r, copy, len); + return ur_coin_bytes_unsafe(r, len, copy); } } @@ -699,7 +699,7 @@ ur_coin64(ur_root_t *r, uint64_t n) byt[6] = ur_mask_8(n >> 48); byt[7] = ur_mask_8(n >> 56); - return ur_coin_bytes_unsafe(r, byt, 8); + return ur_coin_bytes_unsafe(r, 8, byt); } } diff --git a/pkg/urbit/ur/serial.c b/pkg/urbit/ur/serial.c index 83610c12bf..a2d111e91e 100644 --- a/pkg/urbit/ur/serial.c +++ b/pkg/urbit/ur/serial.c @@ -214,7 +214,7 @@ _cue_next(ur_root_t *r, len_byt--; } - *out = ur_coin_bytes_unsafe(r, byt, len_byt); + *out = ur_coin_bytes_unsafe(r, len_byt, byt); } ur_dict64_put(r, dict, bits, (uint64_t)*out); From 76f1b782187fa50e8b6008b8f9b167589aa15fd2 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 15 Sep 2020 14:01:34 -0700 Subject: [PATCH 120/123] ur: refactors scalar/direct-atom measurement macros --- pkg/urbit/include/ur/defs.h | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/pkg/urbit/include/ur/defs.h b/pkg/urbit/include/ur/defs.h index c5cf411c1f..c16a7e153e 100644 --- a/pkg/urbit/include/ur/defs.h +++ b/pkg/urbit/include/ur/defs.h @@ -30,6 +30,13 @@ typedef uint8_t ur_bool_t; #define ur_mask_31(a) (a & 0x7fffffff) #define ur_mask_62(a) (a & 0x3fffffffffffffffULL) +/* +** bloq (binary exponent) conversions +*/ +#define ur_bloq_up1(a) ( (a + 0x1) >> 1 ) +#define ur_bloq_up2(a) ( (a + 0x3) >> 2 ) +#define ur_bloq_up3(a) ( (a + 0x7) >> 3 ) + /* ** atom measurement */ @@ -70,16 +77,8 @@ ur_met0_bytes_unsafe(uint64_t len, uint8_t *byt) return (last << 3) + ur_met0_8(byt[last]); } -#define ur_met3_8(a) \ - ({ uint8_t _a = ur_met0_8(a); \ - ( (_a >> 3) + !!ur_mask_3(_a) ); }) - -#define ur_met3_32(a) \ - ({ uint8_t _a = ur_met0_32(a); \ - ( (_a >> 3) + !!ur_mask_3(_a) ); }) - -#define ur_met3_64(a) \ - ({ uint8_t _a = ur_met0_64(a); \ - ( (_a >> 3) + !!ur_mask_3(_a) ); }) +#define ur_met3_8(a) ur_bloq_up3(ur_met0_8(a)) +#define ur_met3_32(a) ur_bloq_up3(ur_met0_32(a)) +#define ur_met3_64(a) ur_bloq_up3(ur_met0_64(a)) #endif From 6477f6304532002613e8c684972de0e50b4d1aec Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 15 Sep 2020 14:06:46 -0700 Subject: [PATCH 121/123] ur: refactors ur_met, deduplicating and using bloq-conversion macros --- pkg/urbit/ur/hashcons.c | 76 +++++++++++++---------------------------- 1 file changed, 24 insertions(+), 52 deletions(-) diff --git a/pkg/urbit/ur/hashcons.c b/pkg/urbit/ur/hashcons.c index 756c92f6df..f00a8be5a7 100644 --- a/pkg/urbit/ur/hashcons.c +++ b/pkg/urbit/ur/hashcons.c @@ -505,65 +505,37 @@ ur_bytes(ur_root_t *r, ur_nref ref, uint8_t **byt, uint64_t *len) uint64_t ur_met(ur_root_t *r, uint8_t bloq, ur_nref ref) { + uint64_t m_bit; + + // XX return bool for cells, length in out parameter + // assert( !ur_deep(ref) ); - // these cases are the same, except for the - // bit-width calculation and the width of their operands - // - switch ( ur_nref_tag(ref) ) { - default: assert(0); + if ( ur_direct == ur_nref_tag(ref) ) { + m_bit = ur_met0_64(ref); + } + else { + uint64_t idx = ur_nref_idx(ref); + uint64_t len = r->atoms.lens[idx]; + uint8_t *byt = r->atoms.bytes[idx]; - case ur_direct: { - uint8_t m_bit = ur_met0_64(ref); + m_bit = ur_met0_bytes_unsafe(len, byt); + } - switch ( bloq ) { - case 0: return m_bit; - case 1: return (m_bit + 1) >> 1; - case 2: return (m_bit + 3) >> 2; + switch ( bloq ) { + case 0: return m_bit; + case 1: return ur_bloq_up1(m_bit); + case 2: return ur_bloq_up2(m_bit); - { - // hand-inline of ur_met3_64 - // - uint8_t m_byt = (m_bit >> 3) + !!ur_mask_3(m_bit); + { + uint64_t m_byt = ur_bloq_up3(m_bit); - case 3: return m_byt; - default: { - uint8_t off = (bloq - 3); - return (m_byt + ((1 << off) - 1)) >> off; - } - } + case 3: return m_byt; + default: { + uint8_t off = (bloq - 3); + return (m_byt + ((1ULL << off) - 1)) >> off; } - } break; - - case ur_iatom: { - uint64_t m_bit; - - { - uint64_t idx = ur_nref_idx(ref); - uint64_t len = r->atoms.lens[idx]; - uint8_t *byt = r->atoms.bytes[idx]; - - m_bit = ur_met0_bytes_unsafe(len, byt); - } - - switch ( bloq ) { - case 0: return m_bit; - case 1: return (m_bit + 1) >> 1; - case 2: return (m_bit + 3) >> 2; - - { - // hand-inline of ur_met3_64 - // - uint64_t m_byt = (m_bit >> 3) + !!ur_mask_3(m_bit); - - case 3: return m_byt; - default: { - uint8_t off = (bloq - 3); - return (m_byt + ((1ULL << off) - 1)) >> off; - } - } - } - } break; + } } } From 0064271e7d85f679f1cbabea7b6d7f9b2197758f Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 15 Sep 2020 15:09:17 -0700 Subject: [PATCH 122/123] ur: refactors bitstream length calculatings, using bloq-conversion macros --- pkg/urbit/ur/bitstream.c | 52 +++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index f525f31b56..31f485b853 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -875,11 +875,10 @@ _bsw32_unsafe(ur_bsw_t *bsw, uint8_t len, uint32_t val) void ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val) { - uint8_t bits, need; + uint8_t need; len = ur_min(32, len); - bits = bsw->off + len; - need = (bits >> 3) + !!ur_mask_3(bits); + need = ur_bloq_up3( bsw->off + len ); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); @@ -999,11 +998,10 @@ _bsw64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) void ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val) { - uint8_t bits, need; + uint8_t need; len = ur_min(64, len); - bits = bsw->off + len; - need = (bits >> 3) + !!ur_mask_3(bits); + need = ur_bloq_up3( bsw->off + len ); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); @@ -1080,8 +1078,7 @@ _bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) void ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { - uint8_t bits = len + bsw->off; - uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + uint64_t need = ur_bloq_up3(len + bsw->off); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); @@ -1117,8 +1114,7 @@ _bsw_bex_unsafe(ur_bsw_t *bsw, uint8_t n) void ur_bsw_bex(ur_bsw_t *bsw, uint8_t n) { - uint32_t bits = 1 + n + bsw->off; - uint8_t need = (bits >> 3) + !!ur_mask_3(bits); + uint64_t need = ur_bloq_up3(1 + n + bsw->off); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); @@ -1144,15 +1140,18 @@ _bsw_mat64_unsafe(ur_bsw_t *bsw, uint8_t len, uint64_t val) } } +/* +* the length of a "mat" run-length encoded atom of [len] bits +*/ +#define MAT_LEN(len) ( ( 0 == len ) ? 1 : len + (2 * ur_met0_64((uint64_t)len)) ) + void ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val) { - uint8_t next, bits, need; + uint8_t need; len = ur_min(64, len); - next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); - bits = bsw->off + next; - need = (bits >> 3) + !!ur_mask_3(bits); + need = ur_bloq_up3( bsw->off + MAT_LEN(len) ); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); @@ -1183,9 +1182,7 @@ _bsw_mat_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) void ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { - uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); - uint64_t bits = bsw->off + next; - uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + uint64_t need = ur_bloq_up3( bsw->off + MAT_LEN(len) ); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); @@ -1204,9 +1201,10 @@ _bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val) void ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val) { - uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); - uint64_t bits = 2 + bsw->off + next; - uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + uint8_t need; + + len = ur_min(64, len); + need = ur_bloq_up3( 2 + bsw->off + MAT_LEN(len) ); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); @@ -1225,9 +1223,10 @@ _bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val) void ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val) { - uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); - uint64_t bits = 1 + bsw->off + next; - uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + uint8_t need; + + len = ur_min(64, len); + need = ur_bloq_up3( 1 + bsw->off + MAT_LEN(len) ); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); @@ -1246,9 +1245,7 @@ _bsw_atom_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) void ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) { - uint64_t next = ( 0 == len ) ? 1 : len + (2 * ur_met0_64(len)); - uint64_t bits = 1 + bsw->off + next; - uint64_t need = (bits >> 3) + !!ur_mask_3(bits); + uint64_t need = ur_bloq_up3( 1 + bsw->off + MAT_LEN(len) ); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); @@ -1260,8 +1257,7 @@ ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) void ur_bsw_cell(ur_bsw_t *bsw) { - uint8_t bits = 2 + bsw->off; - uint8_t need = (bits >> 3) + !!ur_mask_3(bits); + uint8_t need = ur_bloq_up3( 2 + bsw->off ); if ( bsw->fill + need >= bsw->size ) { ur_bsw_grow(bsw, ur_max(need, bsw->prev)); From a6a7b557a95cd246430ce68809d8188d40ae28f4 Mon Sep 17 00:00:00 2001 From: Joe Bryan Date: Tue, 15 Sep 2020 15:12:14 -0700 Subject: [PATCH 123/123] serf: adds comments re: unnecessary failure-mode of %uncram --- pkg/urbit/worker/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pkg/urbit/worker/main.c b/pkg/urbit/worker/main.c index a9d48e840c..dabd2ae182 100644 --- a/pkg/urbit/worker/main.c +++ b/pkg/urbit/worker/main.c @@ -254,6 +254,9 @@ _cw_serf_commence(c3_i argc, c3_c* argv[]) if ( eve_d ) { // XX need not be fatal, need a u3m_reboot equivalent + // XX can spuriously fail do to corrupt memory-image checkpoint, + // need a u3m_half_boot equivalent + // workaround is to delete/move the checkpoint in case of corruption // if ( c3n == u3u_uncram(u3V.dir_c, eve_d) ) { fprintf(stderr, "serf (%" PRIu64 "): rock load failed\r\n", eve_d); @@ -368,6 +371,10 @@ _cw_queu(c3_i argc, c3_c* argv[]) u3V.dir_c = strdup(dir_c); u3V.sen_d = u3V.dun_d = u3m_boot(dir_c); + // XX can spuriously fail do to corrupt memory-image checkpoint, + // need a u3m_half_boot equivalent + // workaround is to delete/move the checkpoint in case of corruption + // if ( c3n == u3u_uncram(dir_c, eve_d) ) { fprintf(stderr, "urbit-worker: queu: failed\r\n"); exit(1);