ur: optimizes bitstream bytes-writer implementation

This commit is contained in:
Joe Bryan 2022-09-07 10:21:22 -04:00
parent 0ab44a44b7
commit 29bce64669

View File

@ -1035,67 +1035,60 @@ ur_bsw64(ur_bsw_t *bsw, uint8_t len, uint64_t val)
} }
static inline void static inline void
_bsw_bytes_unsafe(ur_bsw_t *bsw, uint64_t len, uint8_t *byt) _bsw_bytes_unsafe(ur_bsw_t *bsw, const uint64_t len, const uint8_t* src)
{ {
uint64_t len_byt = len >> 3; uint64_t fill = bsw->fill;
uint8_t len_bit = ur_mask_3(len); uint8_t off = bsw->off;
uint64_t fill = bsw->fill; uint8_t *dst = bsw->bytes + fill;
uint8_t off = bsw->off;
if ( !off ) { if ( !off ) {
memcpy(bsw->bytes + fill, byt, len_byt); const uint64_t len_byt = len >> 3;
fill += len_byt; const uint8_t len_bit = ur_mask_3(len);
off = len_bit;
if ( off ) { memcpy(dst, src, len_byt);
bsw->bytes[fill] = byt[len_byt] & ((1 << off) - 1); bsw->fill = fill + len_byt;
if ( len_bit ) {
dst[len_byt] = src[len_byt] & ((1 << len_bit) - 1);
bsw->off = len_bit;
} }
} }
// the least-significant bits of the input become the else {
// most-significant bits of a byte in the output stream, and vice-versa const uint8_t rest = 8 - off;
//
else {
uint8_t rest = 8 - off;
uint8_t mask = (1 << rest) - 1;
uint8_t l, m = bsw->bytes[fill];
uint64_t i;
for ( i = 0; i < len_byt; i++ ) { if ( rest >= len ) {
l = byt[i] & mask; uint16_t ful = off + len;
bsw->bytes[fill++] = m ^ (l << off);
m = byt[i] >> rest;
}
// no trailing bits; we need only write the rest of the last byte. *dst ^= (*src & ((1 << len) - 1)) << off;
//
// NB: while semantically equivalent to the subsequent block, if ( ful >> 3 ) {
// this case must be separate to avoid reading off the end of [byt] bsw->fill = fill + 1;
// }
if ( !len_bit ) {
bsw->bytes[fill] = m; bsw->off = ur_mask_3(ful);
} }
// trailing bits fit into the current output byte.
//
else if ( len_bit < rest ) {
l = byt[len_byt] & ((1 << len_bit) - 1);
bsw->bytes[fill] = m ^ (l << off);
off += len_bit;
}
// trailing bits extend into the next output byte.
//
else { else {
l = byt[len_byt] & mask; const uint64_t nel = len - rest;
bsw->bytes[fill++] = m ^ (l << off); const uint64_t len_byt = nel >> 3;
const uint8_t len_bit = ur_mask_3(nel);
m = byt[len_byt] >> rest; *dst++ ^= *src << off;
off = len_bit - rest; for ( uint64_t i = 0; i < len_byt; i++ ) {
bsw->bytes[fill] = m & ((1 << off) - 1); dst[i] = (src[i] >> rest) ^ (src[i + 1] << off);
}
{
uint8_t tal = (src[len_byt] >> rest)
^ (( off > len_bit ) ? 0 : (src[len_byt + 1] << off));
dst[len_byt] = tal & ((1 << len_bit) - 1);
}
bsw->fill = fill + len_byt + 1;
bsw->off = len_bit;
} }
} }
bsw->off = off;
bsw->fill = fill;
bsw->bits += len; bsw->bits += len;
} }