ur: refactors and adds comments to bitstream.h

This commit is contained in:
Joe Bryan 2020-09-02 16:42:05 -07:00
parent f6bd14c7c6
commit f49963ffcf
3 changed files with 153 additions and 41 deletions

View File

@ -3,25 +3,13 @@
#include <inttypes.h> #include <inttypes.h>
typedef enum {
ur_cue_good = 0,
ur_cue_back = 1,
ur_cue_gone = 2,
ur_cue_meme = 3
} ur_cue_res_e;
typedef enum {
ur_jam_atom = 0,
ur_jam_cell = 1,
ur_jam_back = 2
} ur_cue_tag_e;
/* /*
** stateful bitstream reader, backed by a byte-buffer, ** stateful bitstream reader, backed by a byte-buffer,
** supporting a variety of read sizes/patterns. ** maintaing a 64-bit bit-cursor, and supporting a variety
** of read sizes and patterns.
** **
** NB: ur_bsr*_any() functions behave as if the stream were infinite, ** NB: ur_bsr*_any() functions behave as if the stream were infinite,
** subject to overall limit of a 64-bit bit-cursor. ** subject to the overall limit of the bit-cursor.
** **
*/ */
typedef struct ur_bsr_s { typedef struct ur_bsr_s {
@ -31,6 +19,31 @@ typedef struct ur_bsr_s {
const uint8_t *bytes; const uint8_t *bytes;
} ur_bsr_t; } ur_bsr_t;
/*
** generalized bitstream-reader/cue response enum
*/
typedef enum {
ur_cue_good = 0, // successful read
ur_cue_back = 1, // missing backreference
ur_cue_gone = 2, // read off the end of the stream
ur_cue_meme = 3 // exceeded memory representation
} ur_cue_res_e;
/*
** jam/cue type tag enumeration
*/
typedef enum {
ur_jam_atom = 0,
ur_jam_cell = 1,
ur_jam_back = 2
} ur_cue_tag_e;
/*
** stateful bitstream writer, backed by a byte-buffer automatically
** reallocated with fibonacc growth, maintaing a 64-bit bit-cursor,
** and supporting a variety of write sizes and patterns.
**
*/
typedef struct ur_bsw_s { typedef struct ur_bsw_s {
uint64_t prev; uint64_t prev;
uint64_t size; uint64_t size;
@ -41,83 +54,169 @@ typedef struct ur_bsw_s {
} ur_bsw_t; } ur_bsw_t;
/* /*
** initialize bitstream and check for 64-bit bit-cursor overflow. ** initialize bitstream-reader and check for 64-bit bit-cursor overflow.
*/ */
ur_cue_res_e ur_cue_res_e
ur_bsr_init(ur_bsr_t *bsr, uint64_t len, const uint8_t *bytes); ur_bsr_init(ur_bsr_t *bsr, uint64_t len, const uint8_t *bytes);
/*
** validate bitstream-reader invariants.
*/
ur_bool_t ur_bool_t
ur_bsr_sane(ur_bsr_t *bsr); ur_bsr_sane(ur_bsr_t *bsr);
/*
** read a bit, failing at EOS
*/
ur_cue_res_e ur_cue_res_e
ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out); ur_bsr_bit(ur_bsr_t *bsr, uint8_t *out);
/*
** read a bit
*/
uint8_t uint8_t
ur_bsr_bit_any(ur_bsr_t *bsr); ur_bsr_bit_any(ur_bsr_t *bsr);
/*
** read N (up to 8) bits into a uint8.
*/
uint8_t uint8_t
ur_bsr8_any(ur_bsr_t *bsr, uint8_t len); ur_bsr8_any(ur_bsr_t *bsr, uint8_t len);
/*
** read N (up to 32) bits into a uint32.
*/
uint32_t uint32_t
ur_bsr32_any(ur_bsr_t *bsr, uint8_t len); ur_bsr32_any(ur_bsr_t *bsr, uint8_t len);
/*
** read N (up to 64) bits into a uint64.
*/
uint64_t uint64_t
ur_bsr64_any(ur_bsr_t *bsr, uint8_t len); ur_bsr64_any(ur_bsr_t *bsr, uint8_t len);
/*
** read N bits into a zero-initialized byte array.
*/
void void
ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out); ur_bsr_bytes_any(ur_bsr_t *bsr, uint64_t len, uint8_t *out);
/*
** advance the bitstream cursor as if we had read N bits.
*/
void void
ur_bsr_skip_any(ur_bsr_t *bsr, uint64_t len); ur_bsr_skip_any(ur_bsr_t *bsr, uint64_t len);
/*
** read a jam/cue type tag.
*/
ur_cue_res_e ur_cue_res_e
ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out); ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out);
/*
** read a binary exponent, producing the binary log.
**
** read N (up to 255) zero bits followed by a 1, produce N.
*/
ur_cue_res_e ur_cue_res_e
ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out); ur_bsr_log(ur_bsr_t *bsr, uint8_t *out);
/*
** read an atomic run-length (a la +rub).
**
** read a binary log N, then read N (up to 64) bits,
** produce (N-bits ^ (1 << N))
*/
ur_cue_res_e ur_cue_res_e
ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out); ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out);
/*
** reallocate bitstream write buffer with max(fibonacci, step) growth.
*/
void void
ur_bsw_grow(ur_bsw_t *bsw, uint64_t step); ur_bsw_grow(ur_bsw_t *bsw, uint64_t step);
/*
** validate bitstream-writer invariants.
*/
ur_bool_t ur_bool_t
ur_bsw_sane(ur_bsw_t *bsw); ur_bsw_sane(ur_bsw_t *bsw);
/*
** write a bit
*/
void void
ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit); ur_bsw_bit(ur_bsw_t *bsw, uint8_t bit);
/*
** write N (up to 8) bits of a uint8.
*/
void void
ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt); ur_bsw8(ur_bsw_t *bsw, uint8_t len, uint8_t byt);
/*
** write N (up to 32) bits of a uint32.
*/
void void
ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val); ur_bsw32(ur_bsw_t *bsw, uint8_t len, uint32_t val);
/*
** write N (up to 64) bits of a uint64.
*/
void void
ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val); ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val);
/*
** write N bits of a byte array.
**
** NB: [byt] must contain at least N bits.
*/
void void
ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); ur_bsw_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt);
/*
** write a binary exponent (N zero bits, followed by a 1).
*/
void void
ur_bsw_bex(ur_bsw_t *bsw, uint8_t n); ur_bsw_bex(ur_bsw_t *bsw, uint8_t n);
/*
** write N (up to 64) run-length prefixed bits (a la +mat).
*/
void void
ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val); ur_bsw_mat64(ur_bsw_t *bsw, uint8_t len, uint64_t val);
/*
** write N run-length prefixed bits (a la +mat).
**
** NB: [byt] must contain at least N bits.
*/
void void
ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); ur_bsw_mat_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt);
/*
** write a backref tag (1, 1) and N (up to 64) run-length prefixed bits.
*/
void void
ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val); ur_bsw_back64(ur_bsw_t *bsw, uint8_t len, uint64_t val);
/*
** write an atom tag (0) and N (up to 64) run-length prefixed bits.
*/
void void
ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val); ur_bsw_atom64(ur_bsw_t *bsw, uint8_t len, uint64_t val);
/*
** write an atom tag (0) and N run-length prefixed bits.
**
** NB: [byt] must contain at least N bits.
*/
void void
ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt); ur_bsw_atom_bytes(ur_bsw_t *bsw, uint64_t len, uint8_t *byt);
/*
** write a cell tag (1, 0)
*/
void void
ur_bsw_cell(ur_bsw_t *bsw); ur_bsw_cell(ur_bsw_t *bsw);

View File

@ -1321,7 +1321,7 @@ _bsr_cmp_check(const char* cap,
} }
static ur_cue_res_e static ur_cue_res_e
_bsr_rub_log_slow(ur_bsr_t *bsr, uint8_t *out) _bsr_log_slow(ur_bsr_t *bsr, uint8_t *out)
{ {
ur_cue_res_e res; ur_cue_res_e res;
uint8_t bit, i = 0; uint8_t bit, i = 0;
@ -1341,7 +1341,7 @@ _bsr_rub_log_slow(ur_bsr_t *bsr, uint8_t *out)
} }
static int static int
_test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val) _test_bsr_log_loop(const char *cap, uint8_t len, uint8_t val)
{ {
int ret = 1; int ret = 1;
ur_bsr_t a, b; ur_bsr_t a, b;
@ -1360,8 +1360,8 @@ _test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val)
memset(bytes, 0x0, j); memset(bytes, 0x0, j);
memset(bytes + j, val, len - j); memset(bytes + j, val, len - j);
e = _bsr_rub_log_slow(&a, &c); e = _bsr_log_slow(&a, &c);
f = ur_bsr_rub_log(&b, &d); f = ur_bsr_log(&b, &d);
ret &= _bsr_cmp_check(cap, i, j, &a, &b, c, d, e, f); ret &= _bsr_cmp_check(cap, i, j, &a, &b, c, d, e, f);
} }
@ -1373,29 +1373,29 @@ _test_bsr_rub_log_loop(const char *cap, uint8_t len, uint8_t val)
} }
static int static int
_test_bsr_rub_log(void) _test_bsr_log(void)
{ {
int ret = _test_bsr_rub_log_loop("bsr rub_log nought", 0, 0x0) int ret = _test_bsr_log_loop("bsr log nought", 0, 0x0)
& _test_bsr_rub_log_loop("bsr rub_log ones odd", 3, 0xff) & _test_bsr_log_loop("bsr log ones odd", 3, 0xff)
& _test_bsr_rub_log_loop("bsr rub_log ones even", 4, 0xff) & _test_bsr_log_loop("bsr log ones even", 4, 0xff)
& _test_bsr_rub_log_loop("bsr rub_log ones big", 50, 0xff) & _test_bsr_log_loop("bsr log ones big", 50, 0xff)
& _test_bsr_rub_log_loop("bsr rub_log zeros odd", 5, 0x0) & _test_bsr_log_loop("bsr log zeros odd", 5, 0x0)
& _test_bsr_rub_log_loop("bsr rub_log zeros even", 6, 0x0) & _test_bsr_log_loop("bsr log zeros even", 6, 0x0)
& _test_bsr_rub_log_loop("bsr rub_log zeros big", 50, 0x0); & _test_bsr_log_loop("bsr log zeros big", 50, 0x0);
{ {
uint8_t i, j = 5; uint8_t i, j = 5;
char cap[1024]; char cap[1024];
for ( i = 0; i < 8; i++ ) { for ( i = 0; i < 8; i++ ) {
snprintf(cap, 1000, "bsr rub_log 1<<%u odd", i); snprintf(cap, 1000, "bsr log 1<<%u odd", i);
ret &= _test_bsr_rub_log_loop((const char*)cap, j++, 0x1 << i); ret &= _test_bsr_log_loop((const char*)cap, j++, 0x1 << i);
snprintf(cap, 1000, "bsr rub_log 1<<%u even", i); snprintf(cap, 1000, "bsr log 1<<%u even", i);
ret &= _test_bsr_rub_log_loop((const char*)cap, j++, 0x1 << i); ret &= _test_bsr_log_loop((const char*)cap, j++, 0x1 << i);
snprintf(cap, 1000, "bsr rub_log 1<<%u big", i); snprintf(cap, 1000, "bsr log 1<<%u big", i);
ret &= _test_bsr_rub_log_loop((const char*)cap, 50, 0x1 << i); ret &= _test_bsr_log_loop((const char*)cap, 50, 0x1 << i);
} }
} }
@ -1480,7 +1480,7 @@ _test_bsr(void)
& _test_bsr8() & _test_bsr8()
& _test_bsr32() & _test_bsr32()
& _test_bsr64() & _test_bsr64()
& _test_bsr_rub_log() & _test_bsr_log()
& _test_bsr_tag(); & _test_bsr_tag();
} }

View File

@ -596,7 +596,7 @@ ur_bsr_tag(ur_bsr_t *bsr, ur_cue_tag_e *out)
} }
static inline ur_cue_res_e static inline ur_cue_res_e
_bsr_rub_log_meme(ur_bsr_t *bsr) _bsr_log_meme(ur_bsr_t *bsr)
{ {
bsr->bits += 256; bsr->bits += 256;
bsr->bytes += 32; bsr->bytes += 32;
@ -605,7 +605,7 @@ _bsr_rub_log_meme(ur_bsr_t *bsr)
} }
ur_cue_res_e ur_cue_res_e
ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out) ur_bsr_log(ur_bsr_t *bsr, uint8_t *out)
{ {
uint64_t left = bsr->left; uint64_t left = bsr->left;
@ -621,7 +621,7 @@ ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out)
while ( !byt ) { while ( !byt ) {
if ( 32 == skip ) { if ( 32 == skip ) {
return _bsr_rub_log_meme(bsr); return _bsr_log_meme(bsr);
} }
skip++; skip++;
@ -637,7 +637,7 @@ ur_bsr_rub_log(ur_bsr_t *bsr, uint8_t *out)
uint32_t zeros = ur_tz8(byt) + (skip ? ((skip << 3) - off) : 0); uint32_t zeros = ur_tz8(byt) + (skip ? ((skip << 3) - off) : 0);
if ( 255 < zeros ) { if ( 255 < zeros ) {
return _bsr_rub_log_meme(bsr); return _bsr_log_meme(bsr);
} }
else { else {
uint32_t bits = off + 1 + zeros; uint32_t bits = off + 1 + zeros;
@ -663,7 +663,7 @@ ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out)
ur_cue_res_e res; ur_cue_res_e res;
uint8_t len; uint8_t len;
if ( ur_cue_good != (res = ur_bsr_rub_log(bsr, &len)) ) { if ( ur_cue_good != (res = ur_bsr_log(bsr, &len)) ) {
return res; return res;
} }
else if ( 64 <= len ) { else if ( 64 <= len ) {
@ -688,6 +688,19 @@ ur_bsr_rub_len(ur_bsr_t *bsr, uint64_t *out)
return ur_cue_good; return ur_cue_good;
} }
/*
** bitstream-writer operations follow a pattern of an unsafe (inline)
** implementation, unsafe wrt to buffer size and reallocation,
** wrapped in a public function with buffer size checks.
**
** higher-level operations made up of multiple discrete writes check
** the buffer size once for all involved writes.
**
** this pattern should be easily adaptable to an alternate bitstream-writer
** implementation that flushes accumulated output periodically instead
** of reallocating the output buffer.
*/
void void
ur_bsw_grow(ur_bsw_t *bsw, uint64_t step) ur_bsw_grow(ur_bsw_t *bsw, uint64_t step)
{ {