diff --git a/pkg/urbit/include/ur/bitstream.h b/pkg/urbit/include/ur/bitstream.h index 5a6d5239c..66781f569 100644 --- a/pkg/urbit/include/ur/bitstream.h +++ b/pkg/urbit/include/ur/bitstream.h @@ -3,25 +3,13 @@ #include -typedef enum { - ur_cue_good = 0, - ur_cue_back = 1, - ur_cue_gone = 2, - ur_cue_meme = 3 -} ur_cue_res_e; - -typedef enum { - ur_jam_atom = 0, - ur_jam_cell = 1, - ur_jam_back = 2 -} ur_cue_tag_e; - /* ** stateful bitstream reader, backed by a byte-buffer, -** supporting a variety of read sizes/patterns. +** maintaing a 64-bit bit-cursor, and supporting a variety +** of read sizes and patterns. ** ** NB: ur_bsr*_any() functions behave as if the stream were infinite, -** subject to overall limit of a 64-bit bit-cursor. +** subject to the overall limit of the bit-cursor. ** */ typedef struct ur_bsr_s { @@ -31,6 +19,31 @@ typedef struct ur_bsr_s { const uint8_t *bytes; } ur_bsr_t; +/* +** generalized bitstream-reader/cue response enum +*/ +typedef enum { + ur_cue_good = 0, // successful read + ur_cue_back = 1, // missing backreference + ur_cue_gone = 2, // read off the end of the stream + ur_cue_meme = 3 // exceeded memory representation +} ur_cue_res_e; + +/* +** jam/cue type tag enumeration +*/ +typedef enum { + ur_jam_atom = 0, + ur_jam_cell = 1, + ur_jam_back = 2 +} ur_cue_tag_e; + +/* +** stateful bitstream writer, backed by a byte-buffer automatically +** reallocated with fibonacc growth, maintaing a 64-bit bit-cursor, +** and supporting a variety of write sizes and patterns. +** +*/ typedef struct ur_bsw_s { uint64_t prev; uint64_t size; @@ -41,83 +54,169 @@ typedef struct ur_bsw_s { } ur_bsw_t; /* -** initialize bitstream and check for 64-bit bit-cursor overflow. +** initialize bitstream-reader and check for 64-bit bit-cursor overflow. */ ur_cue_res_e ur_bsr_init(ur_bsr_t *bsr, uint64_t len, const uint8_t *bytes); +/* +** validate bitstream-reader invariants. +*/ ur_bool_t ur_bsr_sane(ur_bsr_t *bsr); +/* +** read a bit, failing at EOS +*/ ur_cue_res_e ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out); +/* +** read a bit +*/ uint8_t ur_bsr_bit_any(ur_bsr_t *bsr); +/* +** read N (up to 8) bits into a uint8. +*/ uint8_t ur_bsr8_any(ur_bsr_t *bsr, uint8_t len); +/* +** read N (up to 32) bits into a uint32. +*/ uint32_t ur_bsr32_any(ur_bsr_t *bsr, uint8_t len); +/* +** read N (up to 64) bits into a uint64. +*/ uint64_t ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); +/* +** read N bits into a zero-initialized byte array. +*/ void ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); +/* +** advance the bitstream cursor as if we had read N bits. +*/ void ur_bsr_skip_any(ur_bsr_t *bsr, uint64_t len); +/* +** read a jam/cue type tag. +*/ ur_cue_res_e ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out); +/* +** read a binary exponent, producing the binary log. +** +** read N (up to 255) zero bits followed by a 1, produce N. +*/ ur_cue_res_e -ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out); +ur_bsr_log(ur_bsr_t *bsr, uint8_t *out); +/* +** read an atomic run-length (a la +rub). +** +** read a binary log N, then read N (up to 64) bits, +** produce (N-bits ^ (1 << N)) +*/ ur_cue_res_e ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out); +/* +** reallocate bitstream write buffer with max(fibonacci, step) growth. +*/ void ur_bsw_grow(ur_bsw_t *bsw, uint64_t step); +/* +** validate bitstream-writer invariants. +*/ ur_bool_t ur_bsw_sane(ur_bsw_t *bsw); +/* +** write a bit +*/ void ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit); +/* +** write N (up to 8) bits of a uint8. +*/ void ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt); +/* +** write N (up to 32) bits of a uint32. +*/ void ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val); +/* +** write N (up to 64) bits of a uint64. +*/ void ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val); +/* +** write N bits of a byte array. +** +** NB: [byt] must contain at least N bits. +*/ void ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); +/* +** write a binary exponent (N zero bits, followed by a 1). +*/ void ur_bsw_bex(ur_bsw_t *bsw, uint8_t n); +/* +** write N (up to 64) run-length prefixed bits (a la +mat). +*/ void ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val); +/* +** write N run-length prefixed bits (a la +mat). +** +** NB: [byt] must contain at least N bits. +*/ void ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); +/* +** write a backref tag (1, 1) and N (up to 64) run-length prefixed bits. +*/ void ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val); +/* +** write an atom tag (0) and N (up to 64) run-length prefixed bits. +*/ void ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val); +/* +** write an atom tag (0) and N run-length prefixed bits. +** +** NB: [byt] must contain at least N bits. +*/ void ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); +/* +** write a cell tag (1, 0) +*/ void ur_bsw_cell(ur_bsw_t *bsw); diff --git a/pkg/urbit/tests/ur_tests.c b/pkg/urbit/tests/ur_tests.c index 2470ef5a7..f65a1fd4a 100644 --- a/pkg/urbit/tests/ur_tests.c +++ b/pkg/urbit/tests/ur_tests.c @@ -1321,7 +1321,7 @@ _bsr_cmp_check(const char* cap, } static ur_cue_res_e -_bsr_rub_log_slow(ur_bsr_t *bsr, uint8_t *out) +_bsr_log_slow(ur_bsr_t *bsr, uint8_t *out) { ur_cue_res_e res; uint8_t bit, i = 0; @@ -1341,7 +1341,7 @@ _bsr_rub_log_slow(ur_bsr_t *bsr, uint8_t *out) } static int -_test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val) +_test_bsr_log_loop(const char *cap, uint8_t len, uint8_t val) { int ret = 1; ur_bsr_t a, b; @@ -1360,8 +1360,8 @@ _test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val) memset(bytes, 0x0, j); memset(bytes + j, val, len - j); - e = _bsr_rub_log_slow(&a, &c); - f = ur_bsr_rub_log(&b, &d); + e = _bsr_log_slow(&a, &c); + f = ur_bsr_log(&b, &d); ret &= _bsr_cmp_check(cap, i, j, &a, &b, c, d, e, f); } @@ -1373,29 +1373,29 @@ _test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val) } static int -_test_bsr_rub_log(void) +_test_bsr_log(void) { - int ret = _test_bsr_rub_log_loop("bsr rub_log nought", 0, 0x0) - & _test_bsr_rub_log_loop("bsr rub_log ones odd", 3, 0xff) - & _test_bsr_rub_log_loop("bsr rub_log ones even", 4, 0xff) - & _test_bsr_rub_log_loop("bsr rub_log ones big", 50, 0xff) - & _test_bsr_rub_log_loop("bsr rub_log zeros odd", 5, 0x0) - & _test_bsr_rub_log_loop("bsr rub_log zeros even", 6, 0x0) - & _test_bsr_rub_log_loop("bsr rub_log zeros big", 50, 0x0); + int ret = _test_bsr_log_loop("bsr log nought", 0, 0x0) + & _test_bsr_log_loop("bsr log ones odd", 3, 0xff) + & _test_bsr_log_loop("bsr log ones even", 4, 0xff) + & _test_bsr_log_loop("bsr log ones big", 50, 0xff) + & _test_bsr_log_loop("bsr log zeros odd", 5, 0x0) + & _test_bsr_log_loop("bsr log zeros even", 6, 0x0) + & _test_bsr_log_loop("bsr log zeros big", 50, 0x0); { uint8_t i, j = 5; char cap[1024]; for ( i = 0; i < 8; i++ ) { - snprintf(cap, 1000, "bsr rub_log 1<<%u odd", i); - ret &= _test_bsr_rub_log_loop((const char*)cap, j++, 0x1 << i); + snprintf(cap, 1000, "bsr log 1<<%u odd", i); + ret &= _test_bsr_log_loop((const char*)cap, j++, 0x1 << i); - snprintf(cap, 1000, "bsr rub_log 1<<%u even", i); - ret &= _test_bsr_rub_log_loop((const char*)cap, j++, 0x1 << i); + snprintf(cap, 1000, "bsr log 1<<%u even", i); + ret &= _test_bsr_log_loop((const char*)cap, j++, 0x1 << i); - snprintf(cap, 1000, "bsr rub_log 1<<%u big", i); - ret &= _test_bsr_rub_log_loop((const char*)cap, 50, 0x1 << i); + snprintf(cap, 1000, "bsr log 1<<%u big", i); + ret &= _test_bsr_log_loop((const char*)cap, 50, 0x1 << i); } } @@ -1480,7 +1480,7 @@ _test_bsr(void) & _test_bsr8() & _test_bsr32() & _test_bsr64() - & _test_bsr_rub_log() + & _test_bsr_log() & _test_bsr_tag(); } diff --git a/pkg/urbit/ur/bitstream.c b/pkg/urbit/ur/bitstream.c index b91a107ee..d72548257 100644 --- a/pkg/urbit/ur/bitstream.c +++ b/pkg/urbit/ur/bitstream.c @@ -596,7 +596,7 @@ ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out) } static inline ur_cue_res_e -_bsr_rub_log_meme(ur_bsr_t *bsr) +_bsr_log_meme(ur_bsr_t *bsr) { bsr->bits += 256; bsr->bytes += 32; @@ -605,7 +605,7 @@ _bsr_rub_log_meme(ur_bsr_t *bsr) } ur_cue_res_e -ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) +ur_bsr_log(ur_bsr_t *bsr, uint8_t *out) { uint64_t left = bsr->left; @@ -621,7 +621,7 @@ ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) while ( !byt ) { if ( 32 == skip ) { - return _bsr_rub_log_meme(bsr); + return _bsr_log_meme(bsr); } skip++; @@ -637,7 +637,7 @@ ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) uint32_t zeros = ur_tz8(byt) + (skip ? ((skip << 3) - off) : 0); if ( 255 < zeros ) { - return _bsr_rub_log_meme(bsr); + return _bsr_log_meme(bsr); } else { uint32_t bits = off + 1 + zeros; @@ -663,7 +663,7 @@ ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out) ur_cue_res_e res; uint8_t len; - if ( ur_cue_good != (res = ur_bsr_rub_log(bsr, &len)) ) { + if ( ur_cue_good != (res = ur_bsr_log(bsr, &len)) ) { return res; } else if ( 64 <= len ) { @@ -688,6 +688,19 @@ ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out) return ur_cue_good; } +/* +** bitstream-writer operations follow a pattern of an unsafe (inline) +** implementation, unsafe wrt to buffer size and reallocation, +** wrapped in a public function with buffer size checks. +** +** higher-level operations made up of multiple discrete writes check +** the buffer size once for all involved writes. +** +** this pattern should be easily adaptable to an alternate bitstream-writer +** implementation that flushes accumulated output periodically instead +** of reallocating the output buffer. +*/ + void ur_bsw_grow(ur_bsw_t *bsw, uint64_t step) {