[ares] rewrite HAMT entirely, more readable/less raw representation

This commit is contained in:
Edward Amsden 2023-02-16 19:19:16 -06:00
parent 646576dd56
commit 4e422efecc
4 changed files with 282 additions and 170 deletions

7
rust/ares/Cargo.lock generated
View File

@ -22,6 +22,7 @@ dependencies = [
"murmur3",
"num-derive",
"num-traits",
"static_assertions",
]
[[package]]
@ -528,6 +529,12 @@ dependencies = [
"serde",
]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "syn"
version = "1.0.98"

View File

@ -17,6 +17,7 @@ intmap = "1.1.0"
num-traits = "0.2"
num-derive = "0.3"
criterion = "0.4"
static_assertions = "1.1.0"
[[bin]]
name = "cue_pill"

View File

@ -1,239 +1,340 @@
use std::marker::PhantomData;
use either::Either::{self, *};
use crate::noun::Noun;
use crate::mem::{NockStack, unifying_equality, Preserve};
use crate::mug::mug_u32;
use std::ptr::{write_bytes, copy_nonoverlapping};
use std::ptr::{copy_nonoverlapping, null};
use std::slice;
/// A HamtNode is a pointer to a buffer, structured as follows:
///
/// word 0: occupancy bitmap for mug chunk values
/// word 1: type bitmap for occupied mug chunk values (clear - node, set - leaf)
/// following words: an entry for each set bit in the occupancy bitmap, pointer to HamtNode or HamtLeaf as
/// given by type bitmap
#[derive(Copy,Clone)]
struct HamtNode<T> {
ptr: *mut u64,
phantom: PhantomData<T>
#[inline]
fn chunk_to_bit(chunk: u32) -> u32 {
1u32 << chunk
}
#[inline]
fn chunk_to_bit(chunk: u32) -> u64 {
1u64 << chunk
}
#[inline]
fn chunk_to_mask(chunk: u32) -> u64 {
fn chunk_to_mask(chunk: u32) -> u32 { // mask out the bit for the chunk and all more significant
chunk_to_bit(chunk) - 1
}
#[inline]
fn ptr_as_node<T>(ptr: *mut u64) -> HamtNode<T> {
HamtNode {
ptr: ptr,
phantom: PhantomData::<T>,
}
#[repr(packed)]
#[derive(Copy,Clone)]
struct Stem<T: Copy> {
bitmap: u32,
typemap: u32,
buffer: *const Entry<T>,
}
impl<T: Copy> HamtNode<T> {
unsafe fn new_raw(stack: &mut NockStack, entries: usize) -> Self {
let buf = stack.struct_alloc(entries + 2);
write_bytes(buf, 0, entries + 2);
ptr_as_node(buf)
}
impl<T: Copy> Stem<T> {
#[inline]
fn size(self) -> usize {
unsafe {
(*self.ptr).count_ones() as usize
}
self.bitmap.count_ones() as usize
}
fn bitmap(self) -> u64 {
unsafe { *self.ptr }
#[inline]
fn has_index(self, chunk:u32) -> bool {
self.bitmap & chunk_to_bit(chunk) == 1
}
fn typemap(self) -> u64 {
unsafe { *self.ptr.add(1) }
#[inline]
fn hypothetical_index(self, chunk:u32) -> usize {
(self.bitmap & chunk_to_mask(chunk)).count_ones() as usize
}
#[inline]
fn index(self, chunk: u32) -> Option<usize> {
if self.bitmap() & chunk_to_bit(chunk) != 0 {
Some((self.bitmap() & chunk_to_mask(chunk)).count_ones() as usize)
if self.has_index(chunk) {
Some(self.hypothetical_index(chunk))
} else {
None
}
}
fn entry(self, chunk: u32) -> Option<(Either<HamtNode<T>, *const HamtLeaf<T>>, usize)> {
#[inline]
fn entry(self, chunk: u32) -> Option<(Either<Stem<T>,Leaf<T>>, usize)> {
self.index(chunk).map(|idx| {
(unsafe {
if (*self.ptr.add(1)) & chunk_to_bit(chunk) == 0 {
Left(ptr_as_node(*(self.ptr.add(2 + idx)) as *mut u64))
if self.typemap & chunk_to_bit(chunk) == 1 {
Left((*self.buffer.add(idx)).stem)
} else {
Right((*self.ptr.add(2 + idx)) as *const HamtLeaf<T>)
Right((*self.buffer.add(idx)).leaf)
}
}, idx)
},
idx)
})
}
}
/// A HamtLeaf is a size and pointer to a memory buffer of map entries
struct HamtLeaf<T> {
claimants: usize,
entries: *mut (Noun, T),
#[repr(packed)]
#[derive(Copy,Clone)]
struct Leaf<T: Copy> {
len: usize,
buffer: *mut (Noun, T) // mutable for unifying equality
}
pub struct Hamt<T>(HamtNode<T>);
impl<T: Copy> Leaf<T> {
unsafe fn to_mut_slice<'a>(self) -> &'a mut [(Noun, T)] {
slice::from_raw_parts_mut(self.buffer, self.len)
}
}
#[derive(Copy,Clone)]
union Entry<T: Copy> {
stem: Stem<T>,
leaf: Leaf<T>,
}
// Entries in our union are the same size and alignment
assert_eq_size!(Entry<()>, Leaf<()>);
assert_eq_align!(Entry<()>, Leaf<()>);
assert_eq_size!(Entry<()>, Stem<()>);
assert_eq_align!(Entry<()>, Stem<()>);
// Our custom leaf type is the same size as a fat pointer to key-value pairs
assert_eq_size!(&[(Noun, ())], Leaf<()>);
// Our custom stem type is the same size as a fat pointer to `Entry`s
assert_eq_size!(&[Entry<()>], Stem<()>);
pub struct Hamt<T: Copy>(Stem<T>);
impl<T: Copy> Hamt<T> {
pub fn new(stack: &mut NockStack) -> Self {
unsafe {
Hamt(HamtNode::new_raw(stack, 0))
}
// Make a new, empty HAMT
pub fn new() -> Hamt<T> {
Hamt(Stem {
bitmap: 0,
typemap: 0,
buffer: null()
})
}
/// Look up a noun in an immutable HAMT and return the associated value
pub fn lookup(self, stack: &mut NockStack, n: &mut Noun) -> Option<T> {
let mut node = self.0;
/**
* Look up a pair keyed by a noun in the HAMT
*
* A mutable reference is required so that unifying equality can unify the key with a key entry
* in the HAMT
*/
pub fn lookup(&self, stack: &mut NockStack, n: &mut Noun) -> Option<T> {
let mut stem = self.0;
let mut mug = mug_u32(stack, *n);
'lookup: loop {
unsafe {
let mug_chunk = mug & 0x3f;
mug = mug >> 6;
match node.entry(mug_chunk) {
None => { break None; },
Some((Left(next_node), _idx)) => {
node = next_node;
let chunk = mug & 0x1F; // 5 bits
mug = mug >> 5;
match stem.entry(chunk) {
None => {
break None;
},
Some((Left(next_stem), _idx)) => {
stem = next_stem;
continue;
},
Some((Right(leaf), _idx)) => {
for i in 0..(*leaf).claimants {
if unifying_equality(stack, &mut (*(*leaf).entries.add(i)).0, n) {
break 'lookup Some((*(*leaf).entries.add(i)).1);
for pair in unsafe { leaf.to_mut_slice().iter_mut() } {
if unsafe { unifying_equality(stack, n, &mut pair.0) } {
break 'lookup Some(pair.1);
}
}
};
break None;
},
}
}
}
}
}
/// Insert a pair into an immutable HAMT, creating a new HAMT
///
/// The noun key must be mutable to support unifying equality
pub fn insert(self, stack: &mut NockStack, n: &mut Noun, t: T) -> Self {
let mut node = self.0;
let mut new_node = unsafe { HamtNode::<T>::new_raw(stack, node.size() + 1) };
let ret = Hamt(node);
let mut depth = 0u8;
// XX a delete function requires a stack, do we need one?
/// Make a new HAMT with the value inserted or replaced at the key.
pub fn insert(&self, stack: &mut NockStack, n: &mut Noun, t: T) -> Hamt<T> {
let mut mug = mug_u32(stack, *n);
'insert: loop {
let mut depth = 0u8;
let mut stem = self.0;
let mut stem_ret = self.0;
let mut dest = &mut stem_ret as *mut Stem<T>;
unsafe {
depth += 1;
let mug_chunk = mug & 0x3f; // least-significant 6 bits
mug = mug >> 6;
match node.entry(mug_chunk) {
None => { // no entry in the bitmap, write a leaf
let new_bitmap = node.bitmap() | chunk_to_bit(mug_chunk);
let new_typemap = node.typemap() | chunk_to_bit(mug_chunk);
*new_node.ptr = new_bitmap;
*new_node.ptr.add(1) = new_typemap;
let new_leaf_buf = stack.struct_alloc(1);
*new_leaf_buf = (*n, t);
let new_leaf = stack.struct_alloc(1);
*new_leaf = HamtLeaf {
claimants: 1,
entries: new_leaf_buf
'insert: loop {
let chunk = mug & 0x1F; // 5 bits
mug = mug >> 5;
match stem.entry(chunk) {
None => {
let new_leaf_buffer = stack.struct_alloc(1);
*new_leaf_buffer = (*n, t);
let split = stem.hypothetical_index(chunk);
let new_buffer = stack.struct_alloc(stem.size() + 1);
copy_nonoverlapping(stem.buffer, new_buffer, split);
*new_buffer.add(split) = Entry{
leaf: Leaf {
len: 1,
buffer: new_leaf_buffer,
}
};
let split = (node.bitmap() & chunk_to_mask(mug_chunk)).count_ones() as usize;
copy_nonoverlapping(node.ptr.add(2), new_node.ptr.add(2), split);
*new_node.ptr.add(2+split) = new_leaf as u64;
copy_nonoverlapping(node.ptr.add(2+split), new_node.ptr.add(3+split), node.size() - split);
break;
copy_nonoverlapping(stem.buffer.add(split), new_buffer.add(split + 1), stem.size() - split);
*dest = Stem {
bitmap: stem.bitmap | chunk_to_bit(chunk),
typemap: stem.typemap & !chunk_to_bit(chunk),
buffer: new_buffer
};
break Hamt(stem_ret);
},
// there's already a node at this entry, insert into it
Some((Left(next_node), idx)) => {
let next_new_node = HamtNode::new_raw(stack, next_node.size() + 1);
copy_nonoverlapping(node.ptr, new_node.ptr, node.size() + 2);
*new_node.ptr.add(2 + idx) = next_new_node.ptr as u64;
node = next_node;
new_node = next_new_node;
Some((Left(next_stem), idx)) => {
let new_buffer = stack.struct_alloc(stem.size());
copy_nonoverlapping(stem.buffer, new_buffer, stem.size());
*dest = Stem {
bitmap: stem.bitmap,
typemap: stem.typemap,
buffer: new_buffer,
};
dest = &mut (*new_buffer.add(idx)).stem;
stem = next_stem;
depth += 1;
continue;
},
Some((Right(leaf), idx)) => {
// check whether we should overwrite a key
for i in 0..(*leaf).claimants {
if unifying_equality(stack, &mut (*(*leaf).entries.add(i)).0, n) {
let new_leaf_buf = stack.struct_alloc((*leaf).claimants);
copy_nonoverlapping((*leaf).entries, new_leaf_buf, (*leaf).claimants);
(*new_leaf_buf.add(i)).1 = t;
let new_leaf = stack.struct_alloc(1);
*new_leaf = HamtLeaf {
claimants: (*leaf).claimants,
entries: new_leaf_buf,
};
copy_nonoverlapping(node.ptr, new_node.ptr, node.size() + 2);
*new_node.ptr.add(2+idx) = new_leaf as u64;
break 'insert;
for (ldx, pair) in leaf.to_mut_slice().iter_mut().enumerate() {
if unifying_equality(stack, n, &mut pair.0) {
let new_leaf_buffer = stack.struct_alloc(leaf.len);
copy_nonoverlapping(leaf.buffer, new_leaf_buffer, leaf.len);
(*new_leaf_buffer.add(ldx)).1 = t;
let new_buffer = stack.struct_alloc(stem.size());
copy_nonoverlapping(stem.buffer, new_buffer, stem.size());
*new_buffer.add(idx) = Entry {
leaf: Leaf {
len: leaf.len,
buffer: new_leaf_buffer,
}
}
// We have gone as far as we can by distinguishing mugs, chain by nouns now
if depth >= 6 {
// append to this leaf
let new_leaf_buf = stack.struct_alloc((*leaf).claimants + 1);
copy_nonoverlapping((*leaf).entries, new_leaf_buf, (*leaf).claimants);
*new_leaf_buf.add((*leaf).claimants) = (*n, t);
let new_leaf = stack.struct_alloc(1);
*new_leaf = HamtLeaf {
claimants: (*leaf).claimants + 1,
entries: new_leaf_buf,
};
copy_nonoverlapping(node.ptr, new_node.ptr, node.size() + 2);
*new_node.ptr.add(2+idx) = new_leaf as u64;
break;
// We encountered a leaf which we should push down as a node
*dest = Stem {
bitmap: stem.bitmap,
typemap: stem.typemap,
buffer: new_buffer,
};
break 'insert Hamt(stem_ret);
}
};
if depth >= 5 {
let new_leaf_buffer = stack.struct_alloc(leaf.len + 1);
copy_nonoverlapping(leaf.buffer, new_leaf_buffer, leaf.len);
*new_leaf_buffer.add(leaf.len) = (*n, t);
let new_buffer = stack.struct_alloc(stem.size());
copy_nonoverlapping(stem.buffer, new_buffer, stem.size());
*new_buffer.add(idx) = Entry {
leaf: Leaf {
len: leaf.len,
buffer: new_leaf_buffer,
}
};
*dest = Stem {
bitmap: stem.bitmap,
typemap: stem.typemap,
buffer: new_buffer,
};
break 'insert Hamt(stem_ret);
} else {
// We make a node which won't go in our new tree, but contains the existing
// leaf in the proper spot in the bitmap for the next level. We use this as
// the value of `node` in the next iteration.
// We then allocate our next new node as usual, set up the references in the
// current new_node, update the iterators, and go around again
//
// assertion: we haven't gone deep enough to chain at leaves, so there is
// only one key-value pair at this leaf
assert!((*leaf).claimants == 1);
let rival = (*(*leaf).entries).0;
let rival_mug = mug_u32(stack, rival);
let rival_mug_chunk = rival_mug >> (depth * 6) & 0x3f;
let rival_mug_bit = chunk_to_bit(rival_mug_chunk);
let fake_next_leaf_buf = stack.struct_alloc(1);
copy_nonoverlapping((*leaf).entries, fake_next_leaf_buf, 1);
let fake_next_leaf = stack.struct_alloc(1);
*fake_next_leaf = HamtLeaf {
claimants: 1,
entries: fake_next_leaf_buf,
// if we haven't hit depth limit yet we shouldn't be chaining
// we'll make a fake node pointing to the old leaf and "insert into" that
// next time around
assert!(leaf.len == 1);
let fake_buffer = stack.struct_alloc(1);
*fake_buffer = Entry {
leaf: leaf
};
let fake_next_node = HamtNode::new_raw(stack, 1);
*fake_next_node.ptr = rival_mug_bit;
*fake_next_node.ptr.add(1) = rival_mug_bit;
*fake_next_node.ptr.add(2) = fake_next_leaf as u64;
copy_nonoverlapping(node.ptr, new_node.ptr, node.size() + 2);
let next_new_node = HamtNode::new_raw(stack, 2);
*new_node.ptr.add(2 + idx) = next_new_node.ptr as u64;
node = fake_next_node;
new_node = next_new_node;
let next_stem = Stem {
bitmap: chunk_to_bit(chunk),
typemap: 0,
buffer: fake_buffer,
};
let new_buffer = stack.struct_alloc(stem.size());
copy_nonoverlapping(stem.buffer, new_buffer, stem.size());
*dest = Stem {
bitmap: stem.bitmap,
typemap: stem.typemap | chunk_to_bit(chunk), // node now
buffer: new_buffer,
};
dest = &mut (*new_buffer.add(idx)).stem;
stem = next_stem;
depth += 1;
continue;
}
},
}
}
};
return ret;
}
}
}
}
impl<T: Copy + Preserve> Preserve for Hamt<T> {
unsafe fn preserve(&mut self, stack: &mut NockStack) {
// XX make in_frame not care about pointer type
if stack.in_frame((*self).0.buffer as *const u64) {
let dest_buffer = stack.struct_alloc_in_previous_frame((*self).0.size());
copy_nonoverlapping((*self).0.buffer, dest_buffer, (*self).0.size());
(*self).0.buffer = dest_buffer;
let traversal_stack = stack.struct_alloc::<(Stem<T>, u32)>(6);
let mut traversal_depth = 1;
*traversal_stack = ((*self).0, 0);
'preserve: loop {
if traversal_depth == 0 { break; }
let (stem, mut position) = *traversal_stack.add(traversal_depth - 1);
// can we loop over the size and count leading 0s remaining in the bitmap?
'preserve_stem: loop {
if position >= 32 {
traversal_depth -= 1;
continue 'preserve;
}
match stem.entry(position) {
None => {
position += 1;
continue 'preserve_stem;
},
Some((Left(next_stem), idx)) => {
if stack.in_frame(next_stem.buffer as *const u64) {
let dest_buffer = stack.struct_alloc_in_previous_frame(next_stem.size());
copy_nonoverlapping(next_stem.buffer, dest_buffer, next_stem.size());
let new_stem = Stem {
bitmap: next_stem.bitmap,
typemap: next_stem.typemap,
buffer: dest_buffer,
};
*(stem.buffer.add(idx) as *mut Entry<T>) = Entry { stem: new_stem };
assert!(traversal_depth <= 5); // will increment
(*traversal_stack.add(traversal_depth - 1)).1 = position + 1;
*traversal_stack.add(traversal_depth) = (new_stem, 0);
traversal_depth += 1;
continue 'preserve;
} else {
position += 1;
continue 'preserve_stem;
}
},
Some((Right(leaf), idx)) => {
if stack.in_frame(leaf.buffer as *const u64) {
let dest_buffer = stack.struct_alloc_in_previous_frame(leaf.len);
copy_nonoverlapping(leaf.buffer, dest_buffer, leaf.len);
let new_leaf = Leaf {
len: leaf.len,
buffer: dest_buffer,
};
for pair in new_leaf.to_mut_slice().iter_mut() {
(*pair).0.preserve(stack);
(*pair).1.preserve(stack);
};
*(stem.buffer.add(idx) as *mut Entry<T>) = Entry {
leaf: new_leaf,
};
}
position += 1;
continue 'preserve_stem;
},
}
}
}
}
}
}
/*
impl <T: Copy + Preserve> Preserve for Hamt<T> {
unsafe fn preserve(&mut self, stack: &mut NockStack) {
// we special case the outer copy because it's destination is just a pointer and not a
@ -303,3 +404,4 @@ impl <T: Copy + Preserve> Preserve for Hamt<T> {
}
}
}
*/

View File

@ -1,5 +1,7 @@
#[macro_use]
extern crate num_derive;
#[macro_use]
extern crate static_assertions;
pub mod interpreter;
pub mod jets;
pub mod mem;