[ares] rewrite HAMT entirely, more readable/less raw representation

This commit is contained in:
Edward Amsden 2023-02-16 19:19:16 -06:00
parent 646576dd56
commit 4e422efecc
4 changed files with 282 additions and 170 deletions

7
rust/ares/Cargo.lock generated
View File

@ -22,6 +22,7 @@ dependencies = [
"murmur3", "murmur3",
"num-derive", "num-derive",
"num-traits", "num-traits",
"static_assertions",
] ]
[[package]] [[package]]
@ -528,6 +529,12 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.98" version = "1.0.98"

View File

@ -17,6 +17,7 @@ intmap = "1.1.0"
num-traits = "0.2" num-traits = "0.2"
num-derive = "0.3" num-derive = "0.3"
criterion = "0.4" criterion = "0.4"
static_assertions = "1.1.0"
[[bin]] [[bin]]
name = "cue_pill" name = "cue_pill"

View File

@ -1,239 +1,340 @@
use std::marker::PhantomData;
use either::Either::{self, *}; use either::Either::{self, *};
use crate::noun::Noun; use crate::noun::Noun;
use crate::mem::{NockStack, unifying_equality, Preserve}; use crate::mem::{NockStack, unifying_equality, Preserve};
use crate::mug::mug_u32; use crate::mug::mug_u32;
use std::ptr::{write_bytes, copy_nonoverlapping}; use std::ptr::{copy_nonoverlapping, null};
use std::slice;
/// A HamtNode is a pointer to a buffer, structured as follows: #[inline]
/// fn chunk_to_bit(chunk: u32) -> u32 {
/// word 0: occupancy bitmap for mug chunk values 1u32 << chunk
/// word 1: type bitmap for occupied mug chunk values (clear - node, set - leaf)
/// following words: an entry for each set bit in the occupancy bitmap, pointer to HamtNode or HamtLeaf as
/// given by type bitmap
#[derive(Copy,Clone)]
struct HamtNode<T> {
ptr: *mut u64,
phantom: PhantomData<T>
} }
#[inline] #[inline]
fn chunk_to_bit(chunk: u32) -> u64 { fn chunk_to_mask(chunk: u32) -> u32 { // mask out the bit for the chunk and all more significant
1u64 << chunk
}
#[inline]
fn chunk_to_mask(chunk: u32) -> u64 {
chunk_to_bit(chunk) - 1 chunk_to_bit(chunk) - 1
} }
#[inline]
fn ptr_as_node<T>(ptr: *mut u64) -> HamtNode<T> { #[repr(packed)]
HamtNode { #[derive(Copy,Clone)]
ptr: ptr, struct Stem<T: Copy> {
phantom: PhantomData::<T>, bitmap: u32,
} typemap: u32,
buffer: *const Entry<T>,
} }
impl<T: Copy> HamtNode<T> { impl<T: Copy> Stem<T> {
unsafe fn new_raw(stack: &mut NockStack, entries: usize) -> Self { #[inline]
let buf = stack.struct_alloc(entries + 2);
write_bytes(buf, 0, entries + 2);
ptr_as_node(buf)
}
fn size(self) -> usize { fn size(self) -> usize {
unsafe { self.bitmap.count_ones() as usize
(*self.ptr).count_ones() as usize
}
} }
fn bitmap(self) -> u64 { #[inline]
unsafe { *self.ptr } fn has_index(self, chunk:u32) -> bool {
self.bitmap & chunk_to_bit(chunk) == 1
} }
fn typemap(self) -> u64 { #[inline]
unsafe { *self.ptr.add(1) } fn hypothetical_index(self, chunk:u32) -> usize {
(self.bitmap & chunk_to_mask(chunk)).count_ones() as usize
} }
#[inline] #[inline]
fn index(self, chunk: u32) -> Option<usize> { fn index(self, chunk: u32) -> Option<usize> {
if self.bitmap() & chunk_to_bit(chunk) != 0 { if self.has_index(chunk) {
Some((self.bitmap() & chunk_to_mask(chunk)).count_ones() as usize) Some(self.hypothetical_index(chunk))
} else { } else {
None None
} }
} }
fn entry(self, chunk: u32) -> Option<(Either<HamtNode<T>, *const HamtLeaf<T>>, usize)> { #[inline]
fn entry(self, chunk: u32) -> Option<(Either<Stem<T>,Leaf<T>>, usize)> {
self.index(chunk).map(|idx| { self.index(chunk).map(|idx| {
(unsafe { (unsafe {
if (*self.ptr.add(1)) & chunk_to_bit(chunk) == 0 { if self.typemap & chunk_to_bit(chunk) == 1 {
Left(ptr_as_node(*(self.ptr.add(2 + idx)) as *mut u64)) Left((*self.buffer.add(idx)).stem)
} else { } else {
Right((*self.ptr.add(2 + idx)) as *const HamtLeaf<T>) Right((*self.buffer.add(idx)).leaf)
} }
}, idx) },
idx)
}) })
} }
} }
/// A HamtLeaf is a size and pointer to a memory buffer of map entries #[repr(packed)]
struct HamtLeaf<T> { #[derive(Copy,Clone)]
claimants: usize, struct Leaf<T: Copy> {
entries: *mut (Noun, T), len: usize,
buffer: *mut (Noun, T) // mutable for unifying equality
} }
pub struct Hamt<T>(HamtNode<T>); impl<T: Copy> Leaf<T> {
unsafe fn to_mut_slice<'a>(self) -> &'a mut [(Noun, T)] {
slice::from_raw_parts_mut(self.buffer, self.len)
}
}
#[derive(Copy,Clone)]
union Entry<T: Copy> {
stem: Stem<T>,
leaf: Leaf<T>,
}
// Entries in our union are the same size and alignment
assert_eq_size!(Entry<()>, Leaf<()>);
assert_eq_align!(Entry<()>, Leaf<()>);
assert_eq_size!(Entry<()>, Stem<()>);
assert_eq_align!(Entry<()>, Stem<()>);
// Our custom leaf type is the same size as a fat pointer to key-value pairs
assert_eq_size!(&[(Noun, ())], Leaf<()>);
// Our custom stem type is the same size as a fat pointer to `Entry`s
assert_eq_size!(&[Entry<()>], Stem<()>);
pub struct Hamt<T: Copy>(Stem<T>);
impl<T: Copy> Hamt<T> { impl<T: Copy> Hamt<T> {
pub fn new(stack: &mut NockStack) -> Self { // Make a new, empty HAMT
unsafe { pub fn new() -> Hamt<T> {
Hamt(HamtNode::new_raw(stack, 0)) Hamt(Stem {
} bitmap: 0,
typemap: 0,
buffer: null()
})
} }
/// Look up a noun in an immutable HAMT and return the associated value /**
pub fn lookup(self, stack: &mut NockStack, n: &mut Noun) -> Option<T> { * Look up a pair keyed by a noun in the HAMT
let mut node = self.0; *
* A mutable reference is required so that unifying equality can unify the key with a key entry
* in the HAMT
*/
pub fn lookup(&self, stack: &mut NockStack, n: &mut Noun) -> Option<T> {
let mut stem = self.0;
let mut mug = mug_u32(stack, *n); let mut mug = mug_u32(stack, *n);
'lookup: loop { 'lookup: loop {
unsafe { let chunk = mug & 0x1F; // 5 bits
let mug_chunk = mug & 0x3f; mug = mug >> 5;
mug = mug >> 6; match stem.entry(chunk) {
match node.entry(mug_chunk) { None => {
None => { break None; }, break None;
Some((Left(next_node), _idx)) => { },
node = next_node; Some((Left(next_stem), _idx)) => {
continue; stem = next_stem;
}, continue;
Some((Right(leaf), _idx)) => { },
for i in 0..(*leaf).claimants { Some((Right(leaf), _idx)) => {
if unifying_equality(stack, &mut (*(*leaf).entries.add(i)).0, n) { for pair in unsafe { leaf.to_mut_slice().iter_mut() } {
break 'lookup Some((*(*leaf).entries.add(i)).1); if unsafe { unifying_equality(stack, n, &mut pair.0) } {
} break 'lookup Some(pair.1);
}; }
break None; }
}, break None;
} },
} }
} }
} }
// XX a delete function requires a stack, do we need one?
/// Insert a pair into an immutable HAMT, creating a new HAMT /// Make a new HAMT with the value inserted or replaced at the key.
/// pub fn insert(&self, stack: &mut NockStack, n: &mut Noun, t: T) -> Hamt<T> {
/// The noun key must be mutable to support unifying equality
pub fn insert(self, stack: &mut NockStack, n: &mut Noun, t: T) -> Self {
let mut node = self.0;
let mut new_node = unsafe { HamtNode::<T>::new_raw(stack, node.size() + 1) };
let ret = Hamt(node);
let mut depth = 0u8;
let mut mug = mug_u32(stack, *n); let mut mug = mug_u32(stack, *n);
'insert: loop { let mut depth = 0u8;
unsafe { let mut stem = self.0;
depth += 1; let mut stem_ret = self.0;
let mug_chunk = mug & 0x3f; // least-significant 6 bits let mut dest = &mut stem_ret as *mut Stem<T>;
mug = mug >> 6; unsafe {
match node.entry(mug_chunk) { 'insert: loop {
None => { // no entry in the bitmap, write a leaf let chunk = mug & 0x1F; // 5 bits
let new_bitmap = node.bitmap() | chunk_to_bit(mug_chunk); mug = mug >> 5;
let new_typemap = node.typemap() | chunk_to_bit(mug_chunk); match stem.entry(chunk) {
*new_node.ptr = new_bitmap; None => {
*new_node.ptr.add(1) = new_typemap; let new_leaf_buffer = stack.struct_alloc(1);
let new_leaf_buf = stack.struct_alloc(1); *new_leaf_buffer = (*n, t);
*new_leaf_buf = (*n, t); let split = stem.hypothetical_index(chunk);
let new_leaf = stack.struct_alloc(1); let new_buffer = stack.struct_alloc(stem.size() + 1);
*new_leaf = HamtLeaf { copy_nonoverlapping(stem.buffer, new_buffer, split);
claimants: 1, *new_buffer.add(split) = Entry{
entries: new_leaf_buf leaf: Leaf {
len: 1,
buffer: new_leaf_buffer,
}
}; };
let split = (node.bitmap() & chunk_to_mask(mug_chunk)).count_ones() as usize; copy_nonoverlapping(stem.buffer.add(split), new_buffer.add(split + 1), stem.size() - split);
copy_nonoverlapping(node.ptr.add(2), new_node.ptr.add(2), split); *dest = Stem {
*new_node.ptr.add(2+split) = new_leaf as u64; bitmap: stem.bitmap | chunk_to_bit(chunk),
copy_nonoverlapping(node.ptr.add(2+split), new_node.ptr.add(3+split), node.size() - split); typemap: stem.typemap & !chunk_to_bit(chunk),
break; buffer: new_buffer
};
break Hamt(stem_ret);
}, },
// there's already a node at this entry, insert into it Some((Left(next_stem), idx)) => {
Some((Left(next_node), idx)) => { let new_buffer = stack.struct_alloc(stem.size());
let next_new_node = HamtNode::new_raw(stack, next_node.size() + 1); copy_nonoverlapping(stem.buffer, new_buffer, stem.size());
copy_nonoverlapping(node.ptr, new_node.ptr, node.size() + 2); *dest = Stem {
*new_node.ptr.add(2 + idx) = next_new_node.ptr as u64; bitmap: stem.bitmap,
node = next_node; typemap: stem.typemap,
new_node = next_new_node; buffer: new_buffer,
};
dest = &mut (*new_buffer.add(idx)).stem;
stem = next_stem;
depth += 1;
continue; continue;
}, },
Some((Right(leaf), idx)) => { Some((Right(leaf), idx)) => {
// check whether we should overwrite a key for (ldx, pair) in leaf.to_mut_slice().iter_mut().enumerate() {
for i in 0..(*leaf).claimants { if unifying_equality(stack, n, &mut pair.0) {
if unifying_equality(stack, &mut (*(*leaf).entries.add(i)).0, n) { let new_leaf_buffer = stack.struct_alloc(leaf.len);
let new_leaf_buf = stack.struct_alloc((*leaf).claimants); copy_nonoverlapping(leaf.buffer, new_leaf_buffer, leaf.len);
copy_nonoverlapping((*leaf).entries, new_leaf_buf, (*leaf).claimants); (*new_leaf_buffer.add(ldx)).1 = t;
(*new_leaf_buf.add(i)).1 = t; let new_buffer = stack.struct_alloc(stem.size());
let new_leaf = stack.struct_alloc(1); copy_nonoverlapping(stem.buffer, new_buffer, stem.size());
*new_leaf = HamtLeaf { *new_buffer.add(idx) = Entry {
claimants: (*leaf).claimants, leaf: Leaf {
entries: new_leaf_buf, len: leaf.len,
buffer: new_leaf_buffer,
}
}; };
copy_nonoverlapping(node.ptr, new_node.ptr, node.size() + 2); *dest = Stem {
*new_node.ptr.add(2+idx) = new_leaf as u64; bitmap: stem.bitmap,
break 'insert; typemap: stem.typemap,
buffer: new_buffer,
};
break 'insert Hamt(stem_ret);
} }
} };
// We have gone as far as we can by distinguishing mugs, chain by nouns now if depth >= 5 {
if depth >= 6 { let new_leaf_buffer = stack.struct_alloc(leaf.len + 1);
// append to this leaf copy_nonoverlapping(leaf.buffer, new_leaf_buffer, leaf.len);
let new_leaf_buf = stack.struct_alloc((*leaf).claimants + 1); *new_leaf_buffer.add(leaf.len) = (*n, t);
copy_nonoverlapping((*leaf).entries, new_leaf_buf, (*leaf).claimants); let new_buffer = stack.struct_alloc(stem.size());
*new_leaf_buf.add((*leaf).claimants) = (*n, t); copy_nonoverlapping(stem.buffer, new_buffer, stem.size());
let new_leaf = stack.struct_alloc(1); *new_buffer.add(idx) = Entry {
*new_leaf = HamtLeaf { leaf: Leaf {
claimants: (*leaf).claimants + 1, len: leaf.len,
entries: new_leaf_buf, buffer: new_leaf_buffer,
}
}; };
copy_nonoverlapping(node.ptr, new_node.ptr, node.size() + 2); *dest = Stem {
*new_node.ptr.add(2+idx) = new_leaf as u64; bitmap: stem.bitmap,
break; typemap: stem.typemap,
// We encountered a leaf which we should push down as a node buffer: new_buffer,
};
break 'insert Hamt(stem_ret);
} else { } else {
// We make a node which won't go in our new tree, but contains the existing // if we haven't hit depth limit yet we shouldn't be chaining
// leaf in the proper spot in the bitmap for the next level. We use this as // we'll make a fake node pointing to the old leaf and "insert into" that
// the value of `node` in the next iteration. // next time around
// We then allocate our next new node as usual, set up the references in the assert!(leaf.len == 1);
// current new_node, update the iterators, and go around again let fake_buffer = stack.struct_alloc(1);
// *fake_buffer = Entry {
// assertion: we haven't gone deep enough to chain at leaves, so there is leaf: leaf
// only one key-value pair at this leaf
assert!((*leaf).claimants == 1);
let rival = (*(*leaf).entries).0;
let rival_mug = mug_u32(stack, rival);
let rival_mug_chunk = rival_mug >> (depth * 6) & 0x3f;
let rival_mug_bit = chunk_to_bit(rival_mug_chunk);
let fake_next_leaf_buf = stack.struct_alloc(1);
copy_nonoverlapping((*leaf).entries, fake_next_leaf_buf, 1);
let fake_next_leaf = stack.struct_alloc(1);
*fake_next_leaf = HamtLeaf {
claimants: 1,
entries: fake_next_leaf_buf,
}; };
let fake_next_node = HamtNode::new_raw(stack, 1); let next_stem = Stem {
*fake_next_node.ptr = rival_mug_bit; bitmap: chunk_to_bit(chunk),
*fake_next_node.ptr.add(1) = rival_mug_bit; typemap: 0,
*fake_next_node.ptr.add(2) = fake_next_leaf as u64; buffer: fake_buffer,
copy_nonoverlapping(node.ptr, new_node.ptr, node.size() + 2); };
let next_new_node = HamtNode::new_raw(stack, 2); let new_buffer = stack.struct_alloc(stem.size());
*new_node.ptr.add(2 + idx) = next_new_node.ptr as u64; copy_nonoverlapping(stem.buffer, new_buffer, stem.size());
node = fake_next_node; *dest = Stem {
new_node = next_new_node; bitmap: stem.bitmap,
typemap: stem.typemap | chunk_to_bit(chunk), // node now
buffer: new_buffer,
};
dest = &mut (*new_buffer.add(idx)).stem;
stem = next_stem;
depth += 1;
continue; continue;
} }
}, }
} }
} }
}; }
return ret;
} }
} }
impl<T: Copy + Preserve> Preserve for Hamt<T> {
unsafe fn preserve(&mut self, stack: &mut NockStack) {
// XX make in_frame not care about pointer type
if stack.in_frame((*self).0.buffer as *const u64) {
let dest_buffer = stack.struct_alloc_in_previous_frame((*self).0.size());
copy_nonoverlapping((*self).0.buffer, dest_buffer, (*self).0.size());
(*self).0.buffer = dest_buffer;
let traversal_stack = stack.struct_alloc::<(Stem<T>, u32)>(6);
let mut traversal_depth = 1;
*traversal_stack = ((*self).0, 0);
'preserve: loop {
if traversal_depth == 0 { break; }
let (stem, mut position) = *traversal_stack.add(traversal_depth - 1);
// can we loop over the size and count leading 0s remaining in the bitmap?
'preserve_stem: loop {
if position >= 32 {
traversal_depth -= 1;
continue 'preserve;
}
match stem.entry(position) {
None => {
position += 1;
continue 'preserve_stem;
},
Some((Left(next_stem), idx)) => {
if stack.in_frame(next_stem.buffer as *const u64) {
let dest_buffer = stack.struct_alloc_in_previous_frame(next_stem.size());
copy_nonoverlapping(next_stem.buffer, dest_buffer, next_stem.size());
let new_stem = Stem {
bitmap: next_stem.bitmap,
typemap: next_stem.typemap,
buffer: dest_buffer,
};
*(stem.buffer.add(idx) as *mut Entry<T>) = Entry { stem: new_stem };
assert!(traversal_depth <= 5); // will increment
(*traversal_stack.add(traversal_depth - 1)).1 = position + 1;
*traversal_stack.add(traversal_depth) = (new_stem, 0);
traversal_depth += 1;
continue 'preserve;
} else {
position += 1;
continue 'preserve_stem;
}
},
Some((Right(leaf), idx)) => {
if stack.in_frame(leaf.buffer as *const u64) {
let dest_buffer = stack.struct_alloc_in_previous_frame(leaf.len);
copy_nonoverlapping(leaf.buffer, dest_buffer, leaf.len);
let new_leaf = Leaf {
len: leaf.len,
buffer: dest_buffer,
};
for pair in new_leaf.to_mut_slice().iter_mut() {
(*pair).0.preserve(stack);
(*pair).1.preserve(stack);
};
*(stem.buffer.add(idx) as *mut Entry<T>) = Entry {
leaf: new_leaf,
};
}
position += 1;
continue 'preserve_stem;
},
}
}
}
}
}
}
/*
impl <T: Copy + Preserve> Preserve for Hamt<T> { impl <T: Copy + Preserve> Preserve for Hamt<T> {
unsafe fn preserve(&mut self, stack: &mut NockStack) { unsafe fn preserve(&mut self, stack: &mut NockStack) {
// we special case the outer copy because it's destination is just a pointer and not a // we special case the outer copy because it's destination is just a pointer and not a
@ -303,3 +404,4 @@ impl <T: Copy + Preserve> Preserve for Hamt<T> {
} }
} }
} }
*/

View File

@ -1,5 +1,7 @@
#[macro_use] #[macro_use]
extern crate num_derive; extern crate num_derive;
#[macro_use]
extern crate static_assertions;
pub mod interpreter; pub mod interpreter;
pub mod jets; pub mod jets;
pub mod mem; pub mod mem;