diff --git a/.github/workflows/ares-shared.yml b/.github/workflows/ares-shared.yml
index fe7aba4..244017e 100644
--- a/.github/workflows/ares-shared.yml
+++ b/.github/workflows/ares-shared.yml
@@ -75,7 +75,7 @@ jobs:
       # Build Ares
       - name: Build
         run: |
-          nix develop --command bash -c "cargo build --release --verbose --features check_all"
+          nix develop --command bash -c "cargo build --release --verbose"
 
       # Run tests
       - name: Test
diff --git a/.gitignore b/.gitignore
index 917314b..e90d778 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@ ships/
 *.backup
 urbit
 *.jam.out
+*.o
+*.a
diff --git a/rust/ares/Cargo.lock b/rust/ares/Cargo.lock
index 77636ad..23a871e 100644
--- a/rust/ares/Cargo.lock
+++ b/rust/ares/Cargo.lock
@@ -60,6 +60,7 @@ version = "0.1.0"
 dependencies = [
  "ares_crypto",
  "ares_macros",
+ "ares_pma",
  "assert_no_alloc",
  "autotools",
  "bitvec",
@@ -103,6 +104,14 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "ares_pma"
+version = "0.1.0"
+dependencies = [
+ "bindgen 0.69.1",
+ "cc",
+]
+
 [[package]]
 name = "assert_no_alloc"
 version = "1.1.2"
@@ -158,6 +167,29 @@ dependencies = [
  "which",
 ]
 
+[[package]]
+name = "bindgen"
+version = "0.69.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ffcebc3849946a7170a05992aac39da343a90676ab392c51a4280981d6379c2"
+dependencies = [
+ "bitflags 2.4.1",
+ "cexpr",
+ "clang-sys",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "peeking_take_while",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "syn 2.0.39",
+ "which",
+]
+
 [[package]]
 name = "bitflags"
 version = "1.3.2"
@@ -1105,7 +1137,7 @@ version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ced751f95a527a3458eb67c75e4ae7093d41585edaa7565f5769101502473019"
 dependencies = [
- "bindgen",
+ "bindgen 0.68.1",
  "pkg-config",
 ]
 
diff --git a/rust/ares/Cargo.toml b/rust/ares/Cargo.toml
index ef58fed..3d7ec25 100644
--- a/rust/ares/Cargo.toml
+++ b/rust/ares/Cargo.toml
@@ -13,9 +13,12 @@ edition = "2018"
 [dependencies]
 ares_crypto = { path = "../ares_crypto" }
 ares_macros = { path = "../ares_macros" }
-# assert_no_alloc = "1.1.2"
+# Use this when debugging requires the debug printfs in the PMA
+# ares_pma = { path = "../ares_pma", features=["debug_prints"] }
+ares_pma = { path = "../ares_pma" }
+assert_no_alloc = "1.1.2"
 # use this when debugging requires allocation (e.g. eprintln)
-assert_no_alloc = {version="1.1.2", features=["warn_debug"]}
+# assert_no_alloc = {version="1.1.2", features=["warn_debug"]}
 bitvec = "1.0.0"
 criterion = "0.4"
 either = "1.9.0"
@@ -47,6 +50,7 @@ opt-level = 3
 
 # run with e.g. 'cargo build --features check_forwarding,check_acyclic'
 [features]
+# FOR DEBUGGING MEMORY ISSUES ONLY
 check_all = [ "check_acyclic", "check_forwarding", "check_junior" ]
 check_acyclic = []
 check_forwarding = []
diff --git a/rust/ares/src/hamt.rs b/rust/ares/src/hamt.rs
index 001f51a..d7f08a3 100644
--- a/rust/ares/src/hamt.rs
+++ b/rust/ares/src/hamt.rs
@@ -1,8 +1,11 @@
-use crate::mem::{unifying_equality, NockStack, Preserve};
+use crate::mem::{NockStack, Preserve};
 use crate::mug::mug_u32;
 use crate::noun::Noun;
+use crate::persist::{pma_contains, Persist};
+use crate::unifying_equality::unifying_equality;
 use either::Either::{self, *};
-use std::ptr::{copy_nonoverlapping, null};
+use std::mem::size_of;
+use std::ptr::{copy_nonoverlapping, null_mut};
 use std::slice;
 
 type MutStemEntry<T> = Either<*mut MutStem<T>, Leaf<T>>;
@@ -160,11 +163,23 @@ impl<T: Copy> MutHamt<T> {
     }
 }
 
+/**
+ * This is the core memory structure of an immutable HAMT.
+ *
+ * The root Stem lives in its own memory allocation, addressed by the pointer wrapped by [Hamt].
+ * All other Stems and Leaves live in memory blocks pointed to by [buffer]. The memory pointed to
+ * by this field may be zero to 32 entries, depending on the *number of bits set* in bitmap.
+ *
+ * Addressing a chunk of the key's hash is done by counting the number of set bits in the bitmap
+ * before the chunk'th bit. The typemap is a parallel bitmap in which bits are set if the
+ * corresponding entry is a stem, and cleared if it is a leaf.
+ */
 #[repr(packed)]
+#[repr(C)]
 struct Stem<T: Copy> {
     bitmap: u32,
     typemap: u32,
-    buffer: *const Entry<T>,
+    buffer: *mut Entry<T>,
 }
 
 impl<T: Copy> Copy for Stem<T> {}
@@ -218,6 +233,7 @@ impl<T: Copy> Stem<T> {
 }
 
 #[repr(packed)]
+#[repr(C)]
 struct Leaf<T: Copy> {
     len: usize,
     buffer: *mut (Noun, T), // mutable for unifying equality
@@ -238,6 +254,8 @@ impl<T: Copy> Leaf<T> {
 }
 
 #[derive(Copy, Clone)]
+#[repr(packed)]
+#[repr(C)]
 union Entry<T: Copy> {
     stem: Stem<T>,
     leaf: Leaf<T>,
@@ -256,19 +274,23 @@ assert_eq_size!(&[(Noun, ())], Leaf<()>);
 assert_eq_size!(&[Entry<()>], Stem<()>);
 
 #[derive(Copy, Clone)]
-pub struct Hamt<T: Copy>(Stem<T>);
+pub struct Hamt<T: Copy>(*mut Stem<T>);
 
 impl<T: Copy + Preserve> Hamt<T> {
     pub fn is_null(&self) -> bool {
-        self.0.bitmap == 0
+        unsafe { (*self.0).bitmap == 0 }
     }
     // Make a new, empty HAMT
-    pub fn new() -> Self {
-        Hamt(Stem {
-            bitmap: 0,
-            typemap: 0,
-            buffer: null(),
-        })
+    pub fn new(stack: &mut NockStack) -> Self {
+        unsafe {
+            let stem_ptr = stack.struct_alloc::<Stem<T>>(1);
+            *stem_ptr = Stem {
+                bitmap: 0,
+                typemap: 0,
+                buffer: null_mut(),
+            };
+            Hamt(stem_ptr)
+        }
     }
 
     /**
@@ -278,7 +300,7 @@ impl<T: Copy + Preserve> Hamt<T> {
      * in the HAMT
      */
     pub fn lookup(&self, stack: &mut NockStack, n: &mut Noun) -> Option<T> {
-        let mut stem = self.0;
+        let mut stem = unsafe { *self.0 };
         let mut mug = mug_u32(stack, *n);
         'lookup: loop {
             let chunk = mug & 0x1F; // 5 bits
@@ -309,9 +331,9 @@ impl<T: Copy + Preserve> Hamt<T> {
     pub fn insert(&self, stack: &mut NockStack, n: &mut Noun, t: T) -> Hamt<T> {
         let mut mug = mug_u32(stack, *n);
         let mut depth = 0u8;
-        let mut stem = self.0;
-        let mut stem_ret = self.0;
-        let mut dest = &mut stem_ret as *mut Stem<T>;
+        let mut stem = unsafe { *self.0 };
+        let stem_ret = unsafe { stack.struct_alloc::<Stem<T>>(1) };
+        let mut dest = stem_ret;
         unsafe {
             'insert: loop {
                 let chunk = mug & 0x1F; // 5 bits
@@ -439,17 +461,12 @@ impl<T: Copy + Preserve> Hamt<T> {
     }
 }
 
-impl<T: Copy + Preserve> Default for Hamt<T> {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
 impl<T: Copy + Preserve> Preserve for Hamt<T> {
     unsafe fn assert_in_stack(&self, stack: &NockStack) {
-        stack.assert_struct_is_in(self.0.buffer, self.0.size());
+        stack.assert_struct_is_in(self.0, 1);
+        stack.assert_struct_is_in((*self.0).buffer, (*self.0).size());
         let mut traversal_stack: [Option<(Stem<T>, u32)>; 6] = [None; 6];
-        traversal_stack[0] = Some((self.0, 0));
+        traversal_stack[0] = Some(((*self.0), 0));
         let mut traversal_depth = 1;
         'check: loop {
             if traversal_depth == 0 {
@@ -491,78 +508,85 @@ impl<T: Copy + Preserve> Preserve for Hamt<T> {
     }
 
     unsafe fn preserve(&mut self, stack: &mut NockStack) {
-        if stack.is_in_frame(self.0.buffer) {
-            let dest_buffer = stack.struct_alloc_in_previous_frame(self.0.size());
-            copy_nonoverlapping(self.0.buffer, dest_buffer, self.0.size());
-            self.0.buffer = dest_buffer;
-            // Here we're using the Rust stack since the array is a fixed
-            // size. Thus it will be cleaned up if the Rust thread running
-            // this is killed, and is therefore not an issue vs. if it were allocated
-            // on the heap.
-            //
-            // In the past, this traversal stack was allocated in NockStack, but
-            // exactly the right way to do this is less clear with the split stack.
-            let mut traversal_stack: [Option<(Stem<T>, u32)>; 6] = [None; 6];
-            traversal_stack[0] = Some((self.0, 0));
-            let mut traversal_depth = 1;
-            'preserve: loop {
-                if traversal_depth == 0 {
-                    break;
-                }
-                let (stem, mut position) = traversal_stack[traversal_depth - 1]
-                    .expect("Attempted to access uninitialized array element");
-                // can we loop over the size and count leading 0s remaining in the bitmap?
-                'preserve_stem: loop {
-                    if position >= 32 {
-                        traversal_depth -= 1;
-                        continue 'preserve;
+        if stack.is_in_frame(self.0) {
+            let dest_stem = stack.struct_alloc_in_previous_frame(1);
+            copy_nonoverlapping(self.0, dest_stem, 1);
+            self.0 = dest_stem;
+            if stack.is_in_frame((*dest_stem).buffer) {
+                let dest_buffer = stack.struct_alloc_in_previous_frame((*dest_stem).size());
+                copy_nonoverlapping((*dest_stem).buffer, dest_buffer, (*dest_stem).size());
+                (*dest_stem).buffer = dest_buffer;
+                // Here we're using the Rust stack since the array is a fixed
+                // size. Thus it will be cleaned up if the Rust thread running
+                // this is killed, and is therefore not an issue vs. if it were allocated
+                // on the heap.
+                //
+                // In the past, this traversal stack was allocated in NockStack, but
+                // exactly the right way to do this is less clear with the split stack.
+                let mut traversal_stack: [Option<(Stem<T>, u32)>; 6] = [None; 6];
+                traversal_stack[0] = Some(((*dest_stem), 0));
+                let mut traversal_depth = 1;
+                'preserve: loop {
+                    if traversal_depth == 0 {
+                        break;
                     }
-                    match stem.entry(position) {
-                        None => {
-                            position += 1;
-                            continue 'preserve_stem;
+                    let (stem, mut position) = traversal_stack[traversal_depth - 1]
+                        .expect("Attempted to access uninitialized array element");
+                    // can we loop over the size and count leading 0s remaining in the bitmap?
+                    'preserve_stem: loop {
+                        if position >= 32 {
+                            traversal_depth -= 1;
+                            continue 'preserve;
                         }
-                        Some((Left(next_stem), idx)) => {
-                            if stack.is_in_frame(next_stem.buffer) {
-                                let dest_buffer =
-                                    stack.struct_alloc_in_previous_frame(next_stem.size());
-                                copy_nonoverlapping(
-                                    next_stem.buffer,
-                                    dest_buffer,
-                                    next_stem.size(),
-                                );
-                                let new_stem = Stem {
-                                    bitmap: next_stem.bitmap,
-                                    typemap: next_stem.typemap,
-                                    buffer: dest_buffer,
-                                };
-                                *(stem.buffer.add(idx) as *mut Entry<T>) = Entry { stem: new_stem };
-                                assert!(traversal_depth <= 5); // will increment
-                                traversal_stack[traversal_depth - 1] = Some((stem, position + 1));
-                                traversal_stack[traversal_depth] = Some((new_stem, 0));
-                                traversal_depth += 1;
-                                continue 'preserve;
-                            } else {
+                        match stem.entry(position) {
+                            None => {
                                 position += 1;
                                 continue 'preserve_stem;
                             }
-                        }
-                        Some((Right(leaf), idx)) => {
-                            if stack.is_in_frame(leaf.buffer) {
-                                let dest_buffer = stack.struct_alloc_in_previous_frame(leaf.len);
-                                copy_nonoverlapping(leaf.buffer, dest_buffer, leaf.len);
-                                let new_leaf = Leaf {
-                                    len: leaf.len,
-                                    buffer: dest_buffer,
-                                };
-                                for pair in new_leaf.to_mut_slice().iter_mut() {
-                                    pair.0.preserve(stack);
-                                    pair.1.preserve(stack);
+                            Some((Left(next_stem), idx)) => {
+                                if stack.is_in_frame(next_stem.buffer) {
+                                    let dest_buffer =
+                                        stack.struct_alloc_in_previous_frame(next_stem.size());
+                                    copy_nonoverlapping(
+                                        next_stem.buffer,
+                                        dest_buffer,
+                                        next_stem.size(),
+                                    );
+                                    let new_stem = Stem {
+                                        bitmap: next_stem.bitmap,
+                                        typemap: next_stem.typemap,
+                                        buffer: dest_buffer,
+                                    };
+                                    *stem.buffer.add(idx) = Entry { stem: new_stem };
+                                    assert!(traversal_depth <= 5); // will increment
+                                    traversal_stack[traversal_depth - 1] =
+                                        Some((stem, position + 1));
+                                    traversal_stack[traversal_depth] = Some((new_stem, 0));
+                                    traversal_depth += 1;
+                                    continue 'preserve;
+                                } else {
+                                    position += 1;
+                                    continue 'preserve_stem;
                                 }
-                                *(stem.buffer.add(idx) as *mut Entry<T>) = Entry { leaf: new_leaf };
                             }
-                            position += 1;
-                            continue 'preserve_stem;
+                            Some((Right(leaf), idx)) => {
+                                if stack.is_in_frame(leaf.buffer) {
+                                    let dest_buffer =
+                                        stack.struct_alloc_in_previous_frame(leaf.len);
+                                    copy_nonoverlapping(leaf.buffer, dest_buffer, leaf.len);
+                                    let new_leaf = Leaf {
+                                        len: leaf.len,
+                                        buffer: dest_buffer,
+                                    };
+                                    for pair in new_leaf.to_mut_slice().iter_mut() {
+                                        pair.0.preserve(stack);
+                                        pair.1.preserve(stack);
+                                    }
+                                    *stem.buffer.add(idx) = Entry { leaf: new_leaf };
+                                }
+                                position += 1;
+                                continue 'preserve_stem;
+                            }
                         }
                     }
                 }
@@ -570,3 +594,182 @@ impl<T: Copy + Preserve> Preserve for Hamt<T> {
         }
     }
 }
+
+impl<T: Copy + Persist> Persist for Hamt<T> {
+    unsafe fn space_needed(&mut self, stack: &mut NockStack) -> usize {
+        if pma_contains(self.0, 1) {
+            return 0;
+        }
+        let mut bytes: usize = size_of::<Stem<T>>();
+        if pma_contains((*self.0).buffer, (*self.0).size()) {
+            return bytes;
+        };
+
+        bytes += (*self.0).size() * size_of::<Entry<T>>();
+
+        let mut depth: usize = 0;
+        let mut traversal = [Stem {
+            bitmap: 0,
+            typemap: 0,
+            buffer: null_mut(),
+        }; 6];
+        traversal[0] = *self.0;
+
+        loop {
+            assert!(depth < 6);
+            if traversal[depth].bitmap == 0 {
+                if depth == 0 {
+                    break bytes;
+                }
+                depth -= 1;
+                continue;
+            }
+
+            let next_chunk = traversal[depth].bitmap.trailing_zeros();
+            let next_type = traversal[depth].typemap & (1 << next_chunk) != 0;
+            let next_entry = *traversal[depth].buffer;
+            traversal[depth].bitmap >>= next_chunk + 1;
+            traversal[depth].typemap >>= next_chunk + 1;
+            traversal[depth].buffer = traversal[depth].buffer.add(1);
+
+            if next_type {
+                // true->stem false->leaf
+                // found another stem
+                traversal[depth + 1] = next_entry.stem;
+
+                if pma_contains(traversal[depth + 1].buffer, traversal[depth + 1].size()) {
+                    continue;
+                }
+
+                // count the buffer for the next stem
+                bytes += traversal[depth + 1].size() * size_of::<Entry<T>>();
+                depth += 1;
+            } else {
+                let mut leaf = next_entry.leaf;
+
+                if leaf.len == 0 {
+                    continue;
+                }
+
+                if pma_contains(leaf.buffer, leaf.len) {
+                    continue;
+                }
+
+                bytes += size_of::<(Noun, T)>() * leaf.len;
+
+                while leaf.len > 0 {
+                    bytes += (*leaf.buffer).0.space_needed(stack);
+                    bytes += (*leaf.buffer).1.space_needed(stack);
+                    leaf.buffer = leaf.buffer.add(1);
+                    leaf.len -= 1;
+                }
+            }
+        }
+    }
+
+    unsafe fn copy_to_buffer(&mut self, stack: &mut NockStack, buffer: &mut *mut u8) {
+        if pma_contains(self.0, 1) {
+            return;
+        }
+        let stem_ptr = *buffer as *mut Stem<T>;
+        copy_nonoverlapping(self.0, stem_ptr, 1);
+        *buffer = stem_ptr.add(1) as *mut u8;
+        self.0 = stem_ptr;
+
+        let stem_buffer_size = (*stem_ptr).size();
+        if pma_contains((*stem_ptr).buffer, stem_buffer_size) {
+            return;
+        }
+        let stem_buffer_ptr = *buffer as *mut Entry<T>;
+        copy_nonoverlapping((*stem_ptr).buffer, stem_buffer_ptr, stem_buffer_size);
+        *buffer = stem_buffer_ptr.add(stem_buffer_size) as *mut u8;
+        (*stem_ptr).buffer = stem_buffer_ptr;
+
+        let mut depth: usize = 0;
+        let mut traversal = [Stem {
+            bitmap: 0,
+            typemap: 0,
+            buffer: null_mut(),
+        }; 6];
+
+        traversal[0] = *stem_ptr;
+
+        loop {
+            if traversal[depth].bitmap == 0 {
+                if depth == 0 {
+                    break;
+                }
+                depth -= 1;
+                continue;
+            }
+
+            let next_chunk = traversal[depth].bitmap.trailing_zeros();
+            let next_type = traversal[depth].typemap & (1 << next_chunk) != 0;
+            let next_entry_ptr = traversal[depth].buffer;
+
+            traversal[depth].bitmap >>= next_chunk + 1;
+            traversal[depth].typemap >>= next_chunk + 1;
+            traversal[depth].buffer = traversal[depth].buffer.add(1);
+
+            if next_type {
+                // Stem case
+                assert!(depth < 5);
+
+                let stem_ptr: *mut Stem<T> = &mut (*next_entry_ptr).stem;
+                let stem_size = (*stem_ptr).size();
+
+                if pma_contains((*stem_ptr).buffer, stem_size) {
+                    continue;
+                }
+
+                let stem_buffer_ptr = *buffer as *mut Entry<T>;
+
+                copy_nonoverlapping((*stem_ptr).buffer, stem_buffer_ptr, stem_size);
+                *buffer = stem_buffer_ptr.add(stem_size) as *mut u8;
+
+                (*stem_ptr).buffer = stem_buffer_ptr;
+                traversal[depth + 1] = *stem_ptr;
+                depth += 1;
+            } else {
+                // Leaf case
+                let leaf_ptr: *mut Leaf<T> = &mut (*next_entry_ptr).leaf;
+
+                if (*leaf_ptr).len == 0 {
+                    continue;
+                }
+
+                if pma_contains((*leaf_ptr).buffer, (*leaf_ptr).len) {
+                    continue;
+                }
+
+                let leaf_buffer_ptr = *buffer as *mut (Noun, T);
+
+                copy_nonoverlapping((*leaf_ptr).buffer, leaf_buffer_ptr, (*leaf_ptr).len);
+                *buffer = leaf_buffer_ptr.add((*leaf_ptr).len) as *mut u8;
+
+                (*leaf_ptr).buffer = leaf_buffer_ptr;
+
+                let mut leaf_idx = 0;
+
+                while leaf_idx < (*leaf_ptr).len {
+                    (*(*leaf_ptr).buffer.add(leaf_idx))
+                        .0
+                        .copy_to_buffer(stack, buffer);
+                    (*(*leaf_ptr).buffer.add(leaf_idx))
+                        .1
+                        .copy_to_buffer(stack, buffer);
+
+                    leaf_idx += 1;
+                }
+            }
+        }
+    }
+
+    unsafe fn handle_to_u64(&self) -> u64 {
+        self.0 as u64
+    }
+
+    unsafe fn handle_from_u64(meta_handle: u64) -> Self {
+        Hamt(meta_handle as *mut Stem<T>)
+    }
+}
diff --git a/rust/ares/src/interpreter.rs b/rust/ares/src/interpreter.rs
index bf406af..358f396 100644
--- a/rust/ares/src/interpreter.rs
+++ b/rust/ares/src/interpreter.rs
@@ -7,7 +7,6 @@ use crate::jets::cold::Cold;
 use crate::jets::hot::Hot;
 use crate::jets::warm::Warm;
 use crate::jets::JetErr;
-use crate::mem::unifying_equality;
 use crate::mem::NockStack;
 use crate::mem::Preserve;
 use crate::newt::Newt;
@@ -15,6 +14,7 @@ use crate::noun;
 use crate::noun::{Atom, Cell, IndirectAtom, Noun, Slots, D, T};
 use crate::serf::TERMINATOR;
 use crate::trace::{write_nock_trace, TraceInfo, TraceStack};
+use crate::unifying_equality::unifying_equality;
 use ares_macros::tas;
 use assert_no_alloc::assert_no_alloc;
 use bitvec::prelude::{BitSlice, Lsb0};
@@ -1304,9 +1304,9 @@ mod hint {
     use crate::jets;
     use crate::jets::cold;
     use crate::jets::nock::util::{mook, LEAF};
-    use crate::mem::unifying_equality;
     use crate::noun::{tape, Atom, Cell, Noun, D, T};
     use crate::serf::TERMINATOR;
+    use crate::unifying_equality::unifying_equality;
     use ares_macros::tas;
     use std::sync::atomic::Ordering;
     use std::sync::Arc;
diff --git a/rust/ares/src/jets.rs b/rust/ares/src/jets.rs
index 5bff6b1..00fc1a0 100644
--- a/rust/ares/src/jets.rs
+++ b/rust/ares/src/jets.rs
@@ -307,8 +307,9 @@ pub mod util {
     pub mod test {
         use super::*;
         use crate::hamt::Hamt;
-        use crate::mem::{unifying_equality, NockStack};
+        use crate::mem::NockStack;
         use crate::noun::{Atom, Noun, D, T};
+        use crate::unifying_equality::unifying_equality;
         use assert_no_alloc::assert_no_alloc;
         use ibig::UBig;
 
@@ -316,9 +317,9 @@ pub mod util {
             let mut stack = NockStack::new(8 << 10 << 10, 0);
             let newt = Newt::new_mock();
             let cold = Cold::new(&mut stack);
-            let warm = Warm::new();
+            let warm = Warm::new(&mut stack);
             let hot = Hot::init(&mut stack, URBIT_HOT_STATE);
-            let cache = Hamt::<Noun>::new();
+            let cache = Hamt::<Noun>::new(&mut stack);
 
             Context {
                 stack,
diff --git a/rust/ares/src/jets/bits.rs b/rust/ares/src/jets/bits.rs
index 9b6b700..64bcf4d 100644
--- a/rust/ares/src/jets/bits.rs
+++ b/rust/ares/src/jets/bits.rs
@@ -3,7 +3,7 @@
 use crate::interpreter::{Context, Error};
 use crate::jets::util::*;
 use crate::jets::{JetErr, Result};
-use crate::noun::{DirectAtom, IndirectAtom, Noun, D};
+use crate::noun::{IndirectAtom, Noun, D};
 use std::cmp;
 
 crate::gdb!();
@@ -207,17 +207,9 @@ pub fn jet_rev(context: &mut Context, subject: Noun) -> Result {
 
     let bits = len << boz;
 
-    /* 63 is the maximum number of bits for a direct atom */
-    let mut output = if dat.is_direct() && bits < 64 {
-        unsafe { DirectAtom::new_unchecked(0).as_atom() }
-    } else {
-        unsafe {
-            IndirectAtom::new_raw(&mut context.stack, ((bits + 7) / 8) as usize, &0).as_atom()
-        }
-    };
-
     let src = dat.as_bitslice();
-    let dest = output.as_bitslice_mut();
+    let (mut output, dest) =
+        unsafe { IndirectAtom::new_raw_mut_bitslice(&mut context.stack, bits as usize) };
 
     let len = len as usize;
     let total_len = len << boz;
@@ -226,7 +218,7 @@ pub fn jet_rev(context: &mut Context, subject: Noun) -> Result {
         dest[start..end].copy_from_bitslice(&src[(total_len - end)..(total_len - start)]);
     }
 
-    Ok(unsafe { output.normalize() }.as_noun())
+    Ok(unsafe { output.normalize_as_atom() }.as_noun())
 }
 
 pub fn jet_rip(context: &mut Context, subject: Noun) -> Result {
@@ -736,12 +728,15 @@ mod tests {
     fn test_rev() {
         let c = &mut init_context();
 
-        let (_a0, a24, _a63, _a96, _a128) = atoms(&mut c.stack);
+        let (_a0, a24, _a63, a96, _a128) = atoms(&mut c.stack);
         let sam = T(&mut c.stack, &[D(0), D(60), a24]);
         assert_jet(c, jet_rev, sam, D(0xc2a6e1000000000));
         let test = 0x1234567890123u64;
         let sam = T(&mut c.stack, &[D(3), D(7), D(test)]);
         assert_jet(c, jet_rev, sam, D(test.swap_bytes() >> 8));
+        let sam = T(&mut c.stack, &[D(3), D(12), a96]);
+        let res = A(&mut c.stack, &ubig!(0x563412efbeadde150cb0cefa));
+        assert_jet(c, jet_rev, sam, res);
     }
 
     #[test]
diff --git a/rust/ares/src/jets/cold.rs b/rust/ares/src/jets/cold.rs
index e4391bd..3be45ee 100644
--- a/rust/ares/src/jets/cold.rs
+++ b/rust/ares/src/jets/cold.rs
@@ -1,7 +1,10 @@
 use crate::hamt::Hamt;
-use crate::mem::{unifying_equality, NockStack, Preserve};
+use crate::mem::{NockStack, Preserve};
 use crate::noun;
 use crate::noun::{Atom, DirectAtom, Noun, Slots, D, T};
+use crate::persist::{pma_contains, Persist};
+use crate::unifying_equality::unifying_equality;
+use std::mem::size_of;
 use std::ptr::copy_nonoverlapping;
 use std::ptr::null_mut;
 
@@ -31,6 +34,59 @@ struct BatteriesMem {
     parent_batteries: Batteries,
 }
 
+impl Persist for Batteries {
+    unsafe fn space_needed(&mut self, stack: &mut NockStack) -> usize {
+        let mut bytes = 0;
+        let mut batteries = *self;
+
+        loop {
+            if batteries.0.is_null() {
+                break;
+            }
+            if pma_contains(batteries.0, 1) {
+                break;
+            }
+            bytes += size_of::<BatteriesMem>();
+            bytes += (*batteries.0).battery.space_needed(stack);
+            bytes += (*batteries.0).parent_axis.space_needed(stack);
+            batteries = (*batteries.0).parent_batteries;
+        }
+        bytes
+    }
+
+    unsafe fn copy_to_buffer(&mut self, stack: &mut NockStack, buffer: &mut *mut u8) {
+        let mut dest = self;
+        loop {
+            if dest.0.is_null() {
+                break;
+            }
+            if pma_contains(dest.0, 1) {
+                break;
+            }
+
+            let batteries_mem_ptr = *buffer as *mut BatteriesMem;
+            copy_nonoverlapping(dest.0, batteries_mem_ptr, 1);
+            *buffer = batteries_mem_ptr.add(1) as *mut u8;
+
+            (*batteries_mem_ptr).battery.copy_to_buffer(stack, buffer);
+            (*batteries_mem_ptr)
+                .parent_axis
+                .copy_to_buffer(stack, buffer);
+
+            dest.0 = batteries_mem_ptr;
+            dest = &mut (*dest.0).parent_batteries;
+        }
+    }
+
+    unsafe fn handle_to_u64(&self) -> u64 {
+        self.0 as u64
+    }
+
+    unsafe fn handle_from_u64(meta_handle: u64) -> Self {
+        Batteries(meta_handle as *mut BatteriesMem)
+    }
+}
+
 impl Preserve for Batteries {
     unsafe fn assert_in_stack(&self, stack: &NockStack) {
         if self.0.is_null() {
@@ -143,6 +199,55 @@ struct BatteriesListMem {
     next: BatteriesList,
 }
 
+impl Persist for BatteriesList {
+    unsafe fn space_needed(&mut self, stack: &mut NockStack) -> usize {
+        let mut bytes = 0;
+        let mut list = *self;
+        loop {
+            if list.0.is_null() {
+                break;
+            }
+            if pma_contains(list.0, 1) {
+                break;
+            }
+            bytes += size_of::<BatteriesListMem>();
+            bytes += (*list.0).batteries.space_needed(stack);
+
+            list = (*list.0).next;
+        }
+        bytes
+    }
+
+    unsafe fn copy_to_buffer(&mut self, stack: &mut NockStack, buffer: &mut *mut u8) {
+        let mut dest = self;
+
+        loop {
+            if dest.0.is_null() {
+                break;
+            }
+            if pma_contains(dest.0, 1) {
+                break;
+            }
+
+            let list_mem_ptr = *buffer as *mut BatteriesListMem;
+            copy_nonoverlapping(dest.0, list_mem_ptr, 1);
+            *buffer = list_mem_ptr.add(1) as *mut u8;
+            dest.0 = list_mem_ptr;
+
+            (*dest.0).batteries.copy_to_buffer(stack, buffer);
+            dest = &mut (*dest.0).next;
+        }
+    }
+
+    unsafe fn handle_to_u64(&self) -> u64 {
+        self.0 as u64
+    }
+
+    unsafe fn handle_from_u64(meta_handle: u64) -> Self {
+        BatteriesList(meta_handle as *mut BatteriesListMem)
+    }
+}
+
 impl Preserve for BatteriesList {
     unsafe fn assert_in_stack(&self, stack: &NockStack) {
         if self.0.is_null() {
@@ -215,6 +320,58 @@ struct NounListMem {
     next: NounList,
 }
 
+impl Persist for NounList {
+    unsafe fn space_needed(&mut self, stack: &mut NockStack) -> usize {
+        let mut bytes: usize = 0;
+        let mut list = *self;
+
+        loop {
+            if list.0.is_null() {
+                break;
+            }
+            if pma_contains(list.0, 1) {
+                break;
+            }
+
+            bytes += size_of::<NounListMem>();
+            bytes += (*list.0).element.space_needed(stack);
+
+            list = (*list.0).next;
+        }
+        bytes
+    }
+
+    unsafe fn copy_to_buffer(&mut self, stack: &mut NockStack, buffer: &mut *mut u8) {
+        let mut dest = self;
+
+        loop {
+            if dest.0.is_null() {
+                break;
+            }
+            if pma_contains(dest.0, 1) {
+                break;
+            }
+
+            let noun_list_mem_ptr = *buffer as *mut NounListMem;
+            copy_nonoverlapping(dest.0, noun_list_mem_ptr, 1);
+            *buffer = noun_list_mem_ptr.add(1) as *mut u8;
+
+            dest.0 = noun_list_mem_ptr;
+            (*dest.0).element.copy_to_buffer(stack, buffer);
+
+            dest = &mut (*dest.0).next;
+        }
+    }
+
+    unsafe fn handle_to_u64(&self) -> u64 {
+        self.0 as u64
+    }
+
+    unsafe fn handle_from_u64(meta_handle: u64) -> Self {
+        NounList(meta_handle as *mut NounListMem)
+    }
+}
+
 impl Preserve for NounList {
     unsafe fn assert_in_stack(&self, stack: &NockStack) {
         if self.0.is_null() {
@@ -292,6 +449,44 @@ struct ColdMem {
     path_to_batteries: Hamt<BatteriesList>,
 }
 
+impl Persist for Cold {
+    unsafe fn space_needed(&mut self, stack: &mut NockStack) -> usize {
+        if pma_contains(self.0, 1) {
+            return 0;
+        }
+
+        let mut bytes = size_of::<ColdMem>();
+        bytes += (*self.0).battery_to_paths.space_needed(stack);
+        bytes += (*self.0).root_to_paths.space_needed(stack);
+        bytes += (*self.0).path_to_batteries.space_needed(stack);
+        bytes
+    }
+
+    unsafe fn copy_to_buffer(&mut self, stack: &mut NockStack, buffer: &mut *mut u8) {
+        if pma_contains(self.0, 1) {
+            return;
+        }
+
+        let cold_mem_ptr = *buffer as *mut ColdMem;
+        copy_nonoverlapping(self.0, cold_mem_ptr, 1);
+        *buffer = cold_mem_ptr.add(1) as *mut u8;
+
+        self.0 = cold_mem_ptr;
+
+        (*self.0).battery_to_paths.copy_to_buffer(stack, buffer);
+        (*self.0).root_to_paths.copy_to_buffer(stack, buffer);
+        (*self.0).path_to_batteries.copy_to_buffer(stack, buffer);
+    }
+
+    unsafe fn handle_to_u64(&self) -> u64 {
+        self.0 as u64
+    }
+
+    unsafe fn handle_from_u64(meta_handle: u64) -> Self {
+        Cold(meta_handle as *mut ColdMem)
+    }
+}
+
 impl Preserve for Cold {
     unsafe fn assert_in_stack(&self, stack: &NockStack) {
         stack.assert_struct_is_in(self.0, 1);
@@ -319,9 +514,9 @@ impl Cold {
     }
 
     pub fn new(stack: &mut NockStack) -> Self {
-        let battery_to_paths = Hamt::new();
-        let root_to_paths = Hamt::new();
-        let path_to_batteries = Hamt::new();
+        let battery_to_paths = Hamt::new(stack);
+        let root_to_paths = Hamt::new(stack);
+        let path_to_batteries = Hamt::new(stack);
         unsafe {
             let cold_mem_ptr: *mut ColdMem = stack.struct_alloc(1);
             *cold_mem_ptr = ColdMem {
diff --git a/rust/ares/src/jets/nock.rs b/rust/ares/src/jets/nock.rs
index 2dfd2d0..33c54f5 100644
--- a/rust/ares/src/jets/nock.rs
+++ b/rust/ares/src/jets/nock.rs
@@ -149,7 +149,7 @@ pub mod util {
         let cache_snapshot = context.cache;
         let scry_snapshot = context.scry_stack;
 
-        context.cache = Hamt::<Noun>::new();
+        context.cache = Hamt::<Noun>::new(&mut context.stack);
         context.scry_stack = T(&mut context.stack, &[scry, context.scry_stack]);
 
         match interpret(context, subject, formula) {
diff --git a/rust/ares/src/jets/warm.rs b/rust/ares/src/jets/warm.rs
index aaeb056..b66db8b 100644
--- a/rust/ares/src/jets/warm.rs
+++ b/rust/ares/src/jets/warm.rs
@@ -86,8 +86,8 @@ impl Iterator for WarmEntry {
 
 impl Warm {
     #[allow(clippy::new_without_default)]
-    pub fn new() -> Self {
-        Warm(Hamt::new())
+    pub fn new(stack: &mut NockStack) -> Self {
+        Warm(Hamt::new(stack))
     }
 
     fn insert(
@@ -112,7 +112,7 @@ impl Warm {
     }
 
     pub fn init(stack: &mut NockStack, cold: &mut Cold, hot: &Hot) -> Self {
-        let mut warm = Self::new();
+        let mut warm = Self::new(stack);
         for (mut path, axis, jet) in *hot {
             let batteries_list = cold.find(stack, &mut path);
             for batteries in batteries_list {
diff --git a/rust/ares/src/lib.rs b/rust/ares/src/lib.rs
index 8393ff9..17b7223 100644
--- a/rust/ares/src/lib.rs
+++ b/rust/ares/src/lib.rs
@@ -12,8 +12,10 @@ pub mod newt;
 pub mod noun;
 pub mod serf;
 //pub mod bytecode;
+pub mod persist;
 pub mod serialization;
 pub mod trace;
+pub mod unifying_equality;
 
 /** Introduce useful functions for debugging
  *
diff --git a/rust/ares/src/mem.rs b/rust/ares/src/mem.rs
index 22909f5..31f81c5 100644
--- a/rust/ares/src/mem.rs
+++ b/rust/ares/src/mem.rs
@@ -5,7 +5,6 @@ use crate::noun::{Atom, Cell, CellMemory, IndirectAtom, Noun, NounAllocator};
 use assert_no_alloc::permit_alloc;
 use either::Either::{self, Left, Right};
 use ibig::Stack;
-use libc::{c_void, memcmp};
 use memmap::MmapMut;
 use std::alloc::Layout;
 use std::mem;
@@ -50,6 +49,7 @@ pub struct NockStack {
     alloc_pointer: *mut u64,
     /** MMap which must be kept alive as long as this NockStack is */
     memory: MmapMut,
+    /** PMA from which we will copy into the NockStack */
     /** Whether or not pre_copy() has been called on the current stack frame. */
     pc: bool,
 }
@@ -142,6 +142,26 @@ impl NockStack {
         self.frame_pointer
     }
 
+    /** Current stack pointer of this NockStack */
+    pub fn get_stack_pointer(&self) -> *const u64 {
+        self.stack_pointer
+    }
+
+    /** Current alloc pointer of this NockStack */
+    pub fn get_alloc_pointer(&self) -> *const u64 {
+        self.alloc_pointer
+    }
+
+    /** Start of the memory range for this NockStack */
+    pub fn get_start(&self) -> *const u64 {
+        self.start
+    }
+
+    /** End of the memory range for this NockStack */
+    pub fn get_size(&self) -> usize {
+        self.size
+    }
+
     /** Checks if the current stack frame has West polarity */
     #[inline]
     pub fn is_west(&self) -> bool {
@@ -227,7 +247,7 @@ impl NockStack {
     }
 
     /** Pointer to where the previous stack pointer is saved in a frame */
-    unsafe fn prev_stack_pointer_pointer(&self) -> *mut *mut u64 {
+    pub unsafe fn prev_stack_pointer_pointer(&self) -> *mut *mut u64 {
         if !self.pc {
             self.slot_pointer(STACK) as *mut *mut u64
         } else {
@@ -816,240 +836,6 @@ impl NockStack {
     }
 }
 
-#[cfg(feature = "check_junior")]
-#[macro_export]
-macro_rules! assert_no_junior_pointers {
-    ( $x:expr, $y:expr ) => {
-        assert_no_alloc::permit_alloc(|| {
-            assert!($x.no_junior_pointers($y));
-        })
-    };
-}
-
-#[cfg(not(feature = "check_junior"))]
-#[macro_export]
-macro_rules! assert_no_junior_pointers {
-    ( $x:expr, $y:expr ) => {};
-}
-
-pub unsafe fn unifying_equality(stack: &mut NockStack, a: *mut Noun, b: *mut Noun) -> bool {
-    /* This version of unifying equality is not like that of vere.
-     * Vere does a tree comparison (accelerated by pointer equality and short-circuited by mug
-     * equality) and then unifies the nouns at the top level if they are equal.
-     *
-     * Here we recursively attempt to unify nouns. Pointer-equal nouns are already unified.
-     * Disequal mugs again short-circuit the unification and equality check.
-     *
-     * Since we expect atoms to be normalized, direct and indirect atoms do not unify with each
-     * other. For direct atoms, no unification is possible as there is no pointer involved in their
-     * representation. Equality is simply direct equality on the word representation. Indirect
-     * atoms require equality first of the size and then of the memory buffers' contents.
-     *
-     * Cell equality is tested (after mug and pointer equality) by attempting to unify the heads and tails,
-     * respectively, of cells, and then re-testing. If unification succeeds then the heads and
-     * tails will be pointer-wise equal and the cell itself can be unified. A failed unification of
-     * the head or the tail will already short-circuit the unification/equality test, so we will
-     * not return to re-test the pointer equality.
-     *
-     * When actually mutating references for unification, we must be careful to respect seniority.
-     * A reference to a more junior noun should always be replaced with a reference to a more
-     * senior noun, *never vice versa*, to avoid introducing references from more senior frames
-     * into more junior frames, which would result in incorrect operation of the copier.
-     */
-    assert_acyclic!(*a);
-    assert_acyclic!(*b);
-    assert_no_forwarding_pointers!(*a);
-    assert_no_forwarding_pointers!(*b);
-    assert_no_junior_pointers!(stack, *a);
-    assert_no_junior_pointers!(stack, *b);
-
-    // If the nouns are already word-equal we have nothing to do
-    if (*a).raw_equals(*b) {
-        return true;
-    };
-    // If the nouns have cached mugs which are disequal we have nothing to do
-    if let (Ok(a_alloc), Ok(b_alloc)) = ((*a).as_allocated(), (*b).as_allocated()) {
-        if let (Some(a_mug), Some(b_mug)) = (a_alloc.get_cached_mug(), b_alloc.get_cached_mug()) {
-            if a_mug != b_mug {
-                return false;
-            };
-        };
-    };
-    stack.frame_push(0);
-    *(stack.push::<(*mut Noun, *mut Noun)>()) = (a, b);
-    loop {
-        if stack.stack_is_empty() {
-            break;
-        };
-        let (x, y): (*mut Noun, *mut Noun) = *(stack.top());
-        if (*x).raw_equals(*y) {
-            stack.pop::<(*mut Noun, *mut Noun)>();
-            continue;
-        };
-        if let (Ok(x_alloc), Ok(y_alloc)) = (
-            // equal direct atoms return true for raw_equals()
-            (*x).as_allocated(),
-            (*y).as_allocated(),
-        ) {
-            if let (Some(x_mug), Some(y_mug)) = (x_alloc.get_cached_mug(), y_alloc.get_cached_mug())
-            {
-                if x_mug != y_mug {
-                    break; // short-circuit, the mugs differ therefore the nouns must differ
-                }
-            };
-            match (x_alloc.as_either(), y_alloc.as_either()) {
-                (Left(x_indirect), Left(y_indirect)) => {
-                    let x_as_ptr = x_indirect.to_raw_pointer();
-                    let y_as_ptr = y_indirect.to_raw_pointer();
-                    if x_indirect.size() == y_indirect.size()
-                        && memcmp(
-                            x_indirect.data_pointer() as *const c_void,
-                            y_indirect.data_pointer() as *const c_void,
-                            x_indirect.size() << 3,
-                        ) == 0
-                    {
-                        let (_senior, junior) = senior_pointer_first(stack, x_as_ptr, y_as_ptr);
-                        if x_as_ptr == junior {
-                            *x = *y;
-                        } else {
-                            *y = *x;
-                        }
-                        stack.pop::<(*mut Noun, *mut Noun)>();
-                        continue;
-                    } else {
-                        break;
-                    }
-                }
-                (Right(x_cell), Right(y_cell)) => {
-                    let x_as_ptr = x_cell.to_raw_pointer() as *const u64;
-                    let y_as_ptr = y_cell.to_raw_pointer() as *const u64;
-                    if x_cell.head().raw_equals(y_cell.head())
-                        && x_cell.tail().raw_equals(y_cell.tail())
-                    {
-                        let (_senior, junior) = senior_pointer_first(stack, x_as_ptr, y_as_ptr);
-                        if x_as_ptr == junior {
-                            *x = *y;
-                        } else {
-                            *y = *x;
-                        }
-                        stack.pop::<(*mut Noun, *mut Noun)>();
-                        continue;
-                    } else {
-                        /* THIS ISN'T AN INFINITE LOOP
-                         * If we discover a disequality in either side, we will
-                         * short-circuit the entire loop and reset the work stack.
-                         *
-                         * If both sides are equal, then we will discover pointer
-                         * equality when we return and unify the cell.
-                         */
-                        *(stack.push::<(*mut Noun, *mut Noun)>()) =
-                            (x_cell.tail_as_mut(), y_cell.tail_as_mut());
-                        *(stack.push::<(*mut Noun, *mut Noun)>()) =
-                            (x_cell.head_as_mut(), y_cell.head_as_mut());
-                        continue;
-                    }
-                }
-                (_, _) => {
-                    break; // cells don't unify with atoms
-                }
-            }
-        } else {
-            break; // direct atom not raw equal, so short circuit
-        }
-    }
-    stack.frame_pop();
-
-    assert_acyclic!(*a);
-    assert_acyclic!(*b);
-    assert_no_forwarding_pointers!(*a);
-    assert_no_forwarding_pointers!(*b);
-    assert_no_junior_pointers!(stack, *a);
-    assert_no_junior_pointers!(stack, *b);
-
-    (*a).raw_equals(*b)
-}
-
-unsafe fn senior_pointer_first(
-    stack: &NockStack,
-    a: *const u64,
-    b: *const u64,
-) -> (*const u64, *const u64) {
-    let mut frame_pointer: *const u64 = stack.frame_pointer;
-    let mut stack_pointer: *const u64 = stack.stack_pointer;
-    let mut alloc_pointer: *const u64 = stack.alloc_pointer;
-    let prev_stack_pointer = *(stack.prev_stack_pointer_pointer());
-
-    let (mut high_pointer, mut low_pointer): (*const u64, *const u64) = if stack.is_west() {
-        (prev_stack_pointer, alloc_pointer)
-    } else {
-        (alloc_pointer, prev_stack_pointer)
-    };
-
-    loop {
-        if low_pointer.is_null() || high_pointer.is_null() {
-            // we found the bottom of the stack; check entirety of the stack
-            low_pointer = stack.start;
-            high_pointer = stack.start.add(stack.size);
-        }
-
-        match (
-            a < high_pointer && a >= low_pointer,
-            b < high_pointer && b >= low_pointer,
-        ) {
-            (true, true) => {
-                // both pointers are in the same frame, pick arbitrarily (lower in mem)
-                break lower_pointer_first(a, b);
-            }
-            (true, false) => break (b, a), // a is in the frame, b is not, so b is senior
-            (false, true) => break (a, b), // b is in the frame, a is not, so a is senior
-            (false, false) => {
-                // chase up the stack
-                #[allow(clippy::comparison_chain)]
-                // test to see if the frame under consideration is a west frame
-                if stack_pointer < alloc_pointer {
-                    stack_pointer = *(frame_pointer.sub(STACK + 1)) as *const u64;
-                    alloc_pointer = *(frame_pointer.sub(ALLOC + 1)) as *const u64;
-                    frame_pointer = *(frame_pointer.sub(FRAME + 1)) as *const u64;
-
-                    // both pointers are in the PMA, pick arbitrarily (lower in mem)
-                    if frame_pointer.is_null() {
-                        break lower_pointer_first(a, b);
-                    };
-
-                    // previous allocation pointer
-                    high_pointer = alloc_pointer;
-                    // "previous previous" stack pointer. this is the other boundary of the previous allocation arena
-                    low_pointer = *(frame_pointer.add(STACK)) as *const u64;
-                } else if stack_pointer > alloc_pointer {
-                    stack_pointer = *(frame_pointer.add(STACK)) as *const u64;
-                    alloc_pointer = *(frame_pointer.add(ALLOC)) as *const u64;
-                    frame_pointer = *(frame_pointer.add(FRAME)) as *const u64;
-
-                    // both pointers are in the PMA, pick arbitrarily (lower in mem)
-                    if frame_pointer.is_null() {
-                        break lower_pointer_first(a, b);
-                    };
-
-                    // previous allocation pointer
-                    low_pointer = alloc_pointer;
-                    // "previous previous" stack pointer. this is the other boundary of the previous allocation arena
-                    high_pointer = *(frame_pointer.sub(STACK + 1)) as *const u64;
-                } else {
-                    panic!("senior_pointer_first: stack_pointer == alloc_pointer");
-                }
-            }
-        }
-    }
-}
-
-fn lower_pointer_first(a: *const u64, b: *const u64) -> (*const u64, *const u64) {
-    if a < b {
-        (a, b)
-    } else {
-        (b, a)
-    }
-}
-
 impl NounAllocator for NockStack {
     unsafe fn alloc_indirect(&mut self, words: usize) -> *mut u64 {
         self.indirect_alloc(words)
diff --git a/rust/ares/src/noun.rs b/rust/ares/src/noun.rs
index 72272fd..0c5008b 100644
--- a/rust/ares/src/noun.rs
+++ b/rust/ares/src/noun.rs
@@ -446,6 +446,11 @@ impl IndirectAtom {
         unsafe { *(self.to_raw_pointer().add(1)) as usize }
     }
 
+    /** Memory size of an indirect atom (including size + metadata fields) in 64-bit words */
+    pub fn raw_size(&self) -> usize {
+        self.size() + 2
+    }
+
     pub fn bit_size(&self) -> usize {
         unsafe {
             ((self.size() - 1) << 6) + 64
@@ -906,6 +911,21 @@ impl Atom {
             *self
         }
     }
+
+    /** Make an atom from a raw u64
+     *
+     * # Safety
+     *
+     * Note that the [u64] parameter is *not*, in general, the value of the atom!
+     *
+     * In particular, anything with the high bit set will be treated as a tagged pointer.
+     * This method is only to be used to restore an atom from the raw [u64] representation
+     * returned by [Noun::as_raw], and should only be used if we are sure the restored noun is in
+     * fact an atom.
+     */
+    pub unsafe fn from_raw(raw: u64) -> Atom {
+        Atom { raw }
+    }
 }
 
 impl fmt::Display for Atom {
diff --git a/rust/ares/src/persist.rs b/rust/ares/src/persist.rs
new file mode 100644
index 0000000..f13abc5
--- /dev/null
+++ b/rust/ares/src/persist.rs
@@ -0,0 +1,311 @@
+use crate::mem::NockStack;
+use crate::noun::{Allocated, Atom, Cell, CellMemory, IndirectAtom, Noun};
+use ares_pma::*;
+use either::Either::{Left, Right};
+use std::convert::TryInto;
+use std::ffi::{c_void, CString};
+use std::mem::size_of;
+use std::path::PathBuf;
+use std::ptr::copy_nonoverlapping;
+use std::sync::OnceLock;
+
+const PMA_MODE: mode_t = 0o600; // RW for user only
+const PMA_FLAGS: ULONG = 0; // ignored for now
+
+const NOUN_MARKED: u64 = 1 << 63;
+
+/// Handle to a PMA
+#[derive(Copy, Clone)]
+struct PMAState(u64); // this is idiotic but necessary for Rust to let us put this in a oncelock
+
+static PMA: OnceLock<PMAState> = OnceLock::new();
+
+fn get_pma_state() -> Option<*mut BT_state> {
+    PMA.get().map(|r| r.0 as *mut BT_state)
+}
+
+fn pma_state_err() -> std::io::Error {
+    std::io::Error::new(std::io::ErrorKind::AlreadyExists, "PMA")
+}
+
+#[cfg(unix)]
+pub fn pma_open(path: PathBuf) -> Result<(), std::io::Error> {
+    let mut state: *mut BT_state = std::ptr::null_mut();
+
+    // correct for Unix thus cfg gated
+    let path_cstring = CString::new(path.into_os_string().as_encoded_bytes())?;
+    unsafe {
+        bt_state_new(&mut state);
+        let err = bt_state_open(state, path_cstring.as_ptr(), PMA_FLAGS, PMA_MODE);
+        if err == 0 {
+            PMA.set(PMAState(state as u64))
+                .map_err(|state| state.0 as *mut BT_state)
+                .expect("PMA state already initialized to:");
+            assert!(get_pma_state().is_some());
+            Ok(())
+        } else {
+            // XX need to free the state
+            Err(std::io::Error::from_raw_os_error(err))
+        }
+    }
+}
+
+#[cfg(windows)]
+pub fn pma_open(path: PathBuf) -> Result<Self, std::io::Error> {
+    unimplemented!()
+}
+
+pub fn pma_close() -> Result<(), std::io::Error> {
+    // XX need a way to free the state after
+    let err = unsafe { bt_state_close(get_pma_state().ok_or_else(pma_state_err)?) };
+    if err == 0 {
+        Ok(())
+    } else {
+        Err(std::io::Error::from_raw_os_error(err))
+    }
+}
+
+#[inline]
+pub fn pma_meta_get(field: usize) -> u64 {
+    unsafe { bt_meta_get(get_pma_state().unwrap(), field) }
+}
+
+#[inline]
+pub fn pma_meta_set(field: usize, val: u64) {
+    unsafe { bt_meta_set(get_pma_state().unwrap(), field, val) };
+}
+
+pub unsafe fn pma_contains<T>(ptr: *const T, count: usize) -> bool {
+    if let Some(pma_state) = get_pma_state() {
+        bt_inbounds(pma_state, ptr as *mut c_void) != 0
+            && bt_inbounds(pma_state, ptr.add(count) as *mut c_void) != 0
+    } else {
+        false
+    }
+}
+
+pub fn pma_sync() {
+    unsafe {
+        if bt_sync(get_pma_state().unwrap()) != 0 {
+            panic!("PMA sync failed but did not abort: this should never happen.");
+        }
+    }
+}
+
+pub unsafe fn pma_dirty<T>(ptr: *mut T, count: usize) {
+    let lo = bt_page_round_down(ptr);
+    let hi = bt_page_round_up(ptr.add(count));
+    let e = bt_dirty(get_pma_state().unwrap(), lo, hi);
+    assert!(e == 0);
+}
+
+/**
+ * This trait defines operations for copying a structure into the PMA.
+ *
+ * This is done in two phases. The [space_needed] phase counts how much space the structure needs in
+ * the PMA, not counting referenced structures already in the PMA. Then a buffer is allocated in
+ * the PMA of at least the computed size, and the [copy_to_buffer] phase copies the structure into
+ * this buffer.
+ *
+ * The phases are separated so that instances of the trait may compose, while still allocating a
+ * single buffer. Thus, in the instance for a HAMT, the [space_needed] method for the HAMT will
+ * call the [space_needed] method on each noun key, and on each value, as well as computing the
+ * size of the HAMT's own structures. Similarly, the [copy_to_buffer] method for the HAMT will call
+ * the [copy_to_buffer] method for the keys and values as it copies its own structures in.
+ */
+pub trait Persist {
+    /// Count how much space is needed, in bytes. May set marks so long as marks are cleaned up by
+    /// [copy_into_buffer]
+    unsafe fn space_needed(&mut self, stack: &mut NockStack) -> usize;
+
+    /// Copy into the provided buffer, which may be assumed to be at least as large as the size
+    /// returned by [space_needed] on the same structure.
+    unsafe fn copy_to_buffer(&mut self, stack: &mut NockStack, buffer: &mut *mut u8);
+
+    /// Persist an object into the PMA using [space_needed] and [copy_to_buffer], returning
+    /// a [u64] (probably a pointer or tagged pointer) that can be saved into metadata.
+    unsafe fn save_to_pma(&mut self, stack: &mut NockStack) -> u64 {
+        unsafe {
+            let space = self.space_needed(stack);
+
+            if space == 0 {
+                return self.handle_to_u64();
+            }
+
+            let space_as_pages = (space + (BT_PAGESIZE as usize - 1)) >> BT_PAGEBITS;
+
+            let mut buffer = bt_malloc(get_pma_state().unwrap(), space_as_pages) as *mut u8;
+            let orig_buffer = buffer;
+            self.copy_to_buffer(stack, &mut buffer);
+            let space_isize: isize = space.try_into().unwrap();
+            assert!(buffer.offset_from(orig_buffer) == space_isize);
+            self.handle_to_u64()
+        }
+    }
+
+    unsafe fn handle_to_u64(&self) -> u64;
+    unsafe fn handle_from_u64(meta_handle: u64) -> Self;
+}
+
+/// Ensure an allocated noun is marked and return if it was already marked
+unsafe fn mark(a: Allocated) -> bool {
+    let metadata = a.get_metadata();
+    a.set_metadata(metadata | NOUN_MARKED);
+    metadata & NOUN_MARKED != 0
+}
+
+/// Unmark an allocated noun
+unsafe fn unmark(a: Allocated) {
+    let metadata = a.get_metadata();
+    a.set_metadata(metadata & !NOUN_MARKED);
+}
+
+impl Persist for Atom {
+    unsafe fn space_needed(&mut self, _stack: &mut NockStack) -> usize {
+        if let Ok(indirect) = self.as_indirect() {
+            let count = indirect.raw_size();
+            if !pma_contains(indirect.to_raw_pointer(), count) && !mark(indirect.as_allocated()) {
+                return count * size_of::<u64>();
+            }
+        }
+        0
+    }
+
+    unsafe fn copy_to_buffer(&mut self, _stack: &mut NockStack, buffer: &mut *mut u8) {
+        if let Ok(mut indirect) = self.as_indirect() {
+            let count = indirect.raw_size();
+            if !pma_contains(indirect.to_raw_pointer(), count) {
+                if let Some(forward) = indirect.forwarding_pointer() {
+                    *self = forward.as_atom();
+                } else {
+                    let indirect_buffer_ptr = *buffer as *mut u64;
+                    copy_nonoverlapping(indirect.to_raw_pointer(), indirect_buffer_ptr, count);
+                    *buffer = indirect_buffer_ptr.add(count) as *mut u8;
+
+                    indirect.set_forwarding_pointer(indirect_buffer_ptr);
+
+                    *self = IndirectAtom::from_raw_pointer(indirect_buffer_ptr).as_atom();
+                }
+            }
+        }
+    }
+
+    unsafe fn handle_to_u64(&self) -> u64 {
+        self.as_noun().as_raw()
+    }
+
+    unsafe fn handle_from_u64(meta_handle: u64) -> Self {
+        Atom::from_raw(meta_handle)
+    }
+}
+
+impl Persist for Noun {
+    unsafe fn space_needed(&mut self, stack: &mut NockStack) -> usize {
+        let mut space = 0usize;
+        stack.frame_push(0);
+        *(stack.push::<Noun>()) = *self;
+        loop {
+            if stack.stack_is_empty() {
+                break;
+            }
+            let noun = *(stack.top::<Noun>());
+            stack.pop::<Noun>();
+
+            match noun.as_either_atom_cell() {
+                Left(mut atom) => {
+                    space += atom.space_needed(stack);
+                }
+                Right(cell) => {
+                    if !pma_contains(cell.to_raw_pointer(), 1) && !mark(cell.as_allocated()) {
+                        space += size_of::<CellMemory>();
+                        (*stack.push::<Noun>()) = cell.tail();
+                        (*stack.push::<Noun>()) = cell.head();
+                    }
+                }
+            }
+        }
+        stack.frame_pop();
+        space
+    }
+
+    unsafe fn copy_to_buffer(&mut self, stack: &mut NockStack, buffer: &mut *mut u8) {
+        let mut buffer_u64 = (*buffer) as *mut u64;
+        stack.frame_push(0);
+        *(stack.push::<*mut Noun>()) = self as *mut Noun;
+
+        loop {
+            if stack.stack_is_empty() {
+                break;
+            }
+
+            let dest = *(stack.top::<*mut Noun>());
+            stack.pop::<*mut Noun>();
+
+            match (*dest).as_either_direct_allocated() {
+                Left(_direct) => {}
+                Right(allocated) => {
+                    if let Some(a) = allocated.forwarding_pointer() {
+                        *dest = a.as_noun();
+                        continue;
+                    }
+
+                    match allocated.as_either() {
+                        Left(mut indirect) => {
+                            let count = indirect.raw_size();
+                            if pma_contains(indirect.to_raw_pointer(), count) {
+                                continue;
+                            }
+
+                            unmark(allocated);
+                            copy_nonoverlapping(indirect.to_raw_pointer(), buffer_u64, count);
+                            indirect.set_forwarding_pointer(buffer_u64);
+                            *dest = IndirectAtom::from_raw_pointer(buffer_u64).as_noun();
+                            buffer_u64 = buffer_u64.add(count);
+                        }
+                        Right(mut cell) => {
+                            if pma_contains(cell.to_raw_pointer(), 1) {
+                                continue;
+                            }
+
+                            unmark(allocated);
+
+                            let new_cell_mem = buffer_u64 as *mut CellMemory;
+                            copy_nonoverlapping(cell.to_raw_pointer(), new_cell_mem, 1);
+                            cell.set_forwarding_pointer(new_cell_mem);
+
+                            *dest = Cell::from_raw_pointer(new_cell_mem).as_noun();
+
+                            *(stack.push::<*mut Noun>()) = &mut (*new_cell_mem).tail;
+                            *(stack.push::<*mut Noun>()) = &mut (*new_cell_mem).head;
+
+                            buffer_u64 = new_cell_mem.add(1) as *mut u64;
+                        }
+                    }
+                }
+            }
+        }
+        *buffer = buffer_u64 as *mut u8;
+        stack.frame_pop();
+    }
+
+    unsafe fn handle_to_u64(&self) -> u64 {
+        self.as_raw()
+    }
+
+    unsafe fn handle_from_u64(meta_handle: u64) -> Self {
+        Noun::from_raw(meta_handle)
+    }
+}
+
+/** Mask to mask out pointer bits not aligned with a BT_PAGESIZE page */
+const BT_PAGEBITS_MASK_OUT: u64 = !((1 << BT_PAGEBITS) - 1);
+
+// round an address down to a page boundary
+fn bt_page_round_down<T>(ptr: *mut T) -> *mut c_void {
+    ((ptr as u64) & BT_PAGEBITS_MASK_OUT) as *mut c_void
+}
+
+// round an address up to a page boundary
+fn bt_page_round_up<T>(ptr: *mut T) -> *mut c_void {
+    (((ptr as u64) + (BT_PAGESIZE as u64) - 1) & BT_PAGEBITS_MASK_OUT) as *mut c_void
+}
diff --git a/rust/ares/src/pma/README.md b/rust/ares/src/pma/README.md
deleted file mode 100644
index a2cd3d1..0000000
--- a/rust/ares/src/pma/README.md
+++ /dev/null
@@ -1,8 +0,0 @@
-## PMA - TODO
-
-Ported from development in a
-[separate repo](https://github.com/ashelkovnykov/pma_malloc). README will be
-updated after the final implementation is complete, which replaces the
-array-based page directory with a B+ Tree one. Until then, please refer to the
-README in the above-linked directory.
-
diff --git a/rust/ares/src/pma/malloc.c b/rust/ares/src/pma/malloc.c
deleted file mode 100644
index 399e563..0000000
--- a/rust/ares/src/pma/malloc.c
+++ /dev/null
@@ -1,2167 +0,0 @@
-/**
- * ----------------------------------------------------------------------------
- * "THE BEER-WARE LICENSE" (Revision 42):
- * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
- * can do whatever you want with this stuff. If we meet some day, and you think
- * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
- * ----------------------------------------------------------------------------
- */
-#include <assert.h>
-#include <dirent.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "includes/checksum.h"
-#include "malloc.h"
-
-//==============================================================================
-// CONFIGURABLE MACROS
-//==============================================================================
-
-/**
- * PMA_PAGE_SIZE = 1 << PMA_PAGE_SHIFT
- *
- * Should be configured to native page size.
- */
-#define PMA_PAGE_SHIFT        12U
-
-/**
- * PMA_MIN_ALLOC_SIZE = 1 << PMA_MIN_ALLOC_SHIFT
- *
- * Note that types/sizes in PMASharedPageHeader are currently hardcoded to this
- * value being 4.
- */
-#define PMA_MIN_ALLOC_SHIFT   4U
-
-/**
- * How many bits per bitmap element. Change only if not 8 bits/byte
- */
-#define PMA_BITMAP_BITS       (8 * sizeof(uint8_t))
-
-/**
- * Increment block size for resizing the snapshot backing file (4 GiB in bytes).
- * This is just the default increment; the backing file is extended by the
- * smallest multiple of this value sufficient to fit the new allocation.
- */
-#define PMA_SNAPSHOT_RESIZE_INC 0x100000000
-
-//==============================================================================
-// AUTO MACROS (do not manually configure)
-//==============================================================================
-
-/**
- * Number bytes per page
- */
-#define PMA_PAGE_SIZE         (1UL << PMA_PAGE_SHIFT)
-
-/**
- * A mask for the offset of an address inside a page
- */
-#define PMA_PAGE_MASK         (PMA_PAGE_SIZE - 1)
-
-/**
- * Minimum size of an allocation in bytes
- *
- * If this is too small, it's too much work to manage small allocations.
- */
-#define PMA_MIN_ALLOC_SIZE    (1U << PMA_MIN_ALLOC_SHIFT)
-
-/**
- * PMA_MAX_SHARED_ALLOC = 1 << PMA_MAX_SHARED_SHIFT
- *
- * Should be log_2 of 1/4 of page size. Also the number of buckets in the array
- * of shared page pointers.
- */
-#define PMA_MAX_SHARED_SHIFT  (PMA_PAGE_SHIFT - 2U)
-
-/**
- * Max slot size (in bytes) for shared page allocations
- *
- * In the original phk_malloc code, this was set to 1/2 the size of a page.
- * However, since shared page metadata is stored as a header inside the page
- * itself, an allocation of 1/2 a page will use a full page anyway. Therefore,
- * the limit is set to 1/4 of a page to remove the overhead of dealing with
- * the shared page header for a page containing a single allocation.
- */
-#define PMA_MAX_SHARED_ALLOC  (1UL << PMA_MAX_SHARED_SHIFT)
-
-/** 
- * Number of buckets for shared page linked lists in the metadata page
- */
-#define PMA_SHARED_BUCKETS    (PMA_MAX_SHARED_SHIFT - PMA_MIN_ALLOC_SHIFT + 1)
-
-/**
- * Round address down to beginning of containing page
- */
-#define PAGE_ROUND_DOWN(foo)  (foo & (~PMA_PAGE_MASK))
-
-/**
- * Round address up to beginning of next page
- */
-#define PAGE_ROUND_UP(foo)    ((foo + PMA_PAGE_MASK) & (~PMA_PAGE_MASK))
-
-/**
- * Convert pointer to index in page directory
- */
-#define PTR_TO_INDEX(foo)     ((((uint64_t)(foo)) - ((uint64_t)_pma_state->metadata->arena_start)) >> PMA_PAGE_SHIFT)
-
-/**
- * Convert index in page directory to pointer
- */
-#define INDEX_TO_PTR(foo)     (void *)((char *)_pma_state->metadata->arena_start + ((foo) * PMA_PAGE_SIZE))
-
-/**
- * Flags to use for all mmap operations, excluding initial metadata page mapping
- *
- * We don't care to what memory the metadata pages are mapped, so long as it's
- * before the memory arena, because we track it in the PMA process itself.
- * However, to retain consistent pointers between ship shutdown & relaunch, we
- * want all memory arena mmap mappings to go to the exact address to which we
- * tell them. Another mapping already existing at one of those addresses is a
- * fatal error.
- *
- * For more info, see https://www.man7.org/linux/man-pages/man2/mmap.2.html.
- */
-#ifdef __linux__
-  #define PMA_MMAP_FLAGS        (MAP_SHARED | MAP_FIXED_NOREPLACE)
-#else
-  #define PMA_MMAP_FLAGS        (MAP_SHARED | MAP_FIXED)
-#endif
-
-/**
- * Magic code that identifies a file as an event snapshot file
- */
-#define PMA_MAGIC_CODE        0xBADDECAFC0FFEE00  // i.e. all decaf coffee
-
-/**
- * Version of the persistent memory arena which created an event snapshot (in
- * case of breaking changes)
- */
-#define PMA_DATA_VERSION      1
-
-/**
- * Representation of an empty byte for a byte in a bitmap (1 = empty, 0 = full)
- */
-#define PMA_EMPTY_BITMAP      0xFF
-
-/**
- * See PMASharedPageHeader for explanation
- */
-#define PMA_BITMAP_SIZE       32
-
-/**
- * Max number of dpage offsets that can fit into a cache of free dpages stored
- * as an array in a single page (when factoring in space used by metadata).
- *
- * 511 for 4 KiB page
- */
-#define PMA_DPAGE_CACHE_SIZE  ((PMA_PAGE_SIZE - sizeof(PMADPageCache)) / sizeof(uint64_t))
-
-/**
- * Max number of dirty page entries that can be stored in the extra space of the
- * metadata page. Caching the dirty page entries and writing them as a part of
- * the metadata allows us to solve the problem of desynchronization between the
- * metadata and page directory without using B+ Trees.
- *
- * 164 for 4 KiB page
- */
-// #define PMA_DIRTY_PAGE_LIMIT  ((PMA_PAGE_SIZE - sizeof(PMAMetadata)) / sizeof(PMADirtyPageEntry))
-#define PMA_DIRTY_PAGE_LIMIT  164
-
-/**
- * Default settings for new PMA backing files
- *
- * See https://www.man7.org/linux/man-pages/man2/chmod.2.html for more info
- * about individual flags.
- *
- * Start with a page directory big enough to hold 1 GiB of data:
- *
- *    1 GiB = 262144 page entries
- *    (up to) 16 bytes per page dir entry
- *    4096 / 16 = 256 entries per page
- *    262144 / 256 = 1024 pages
- *    1024 * 4096 = 4194304 bytes
- *
- * Maximum size of page directory = 340 GiB
- */
-#define PMA_SNAPSHOT_FILENAME "snap.bin"
-#define PMA_PAGE_DIR_FILENAME "page.bin"
-#define PMA_DEFAULT_DIR_NAME  ".bin"
-#define PMA_NEW_FILE_FLAGS    (O_RDWR | O_CREAT)
-#define PMA_LOAD_FILE_FLAGS   (O_RDWR)
-#define PMA_DIR_PERMISSIONS   (S_IRWXU | S_IRWXG | S_IRWXO)
-#define PMA_FILE_PERMISSIONS  (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)
-#define PMA_INIT_SNAP_SIZE    0x40000000
-#define PMA_INIT_DIR_SIZE     0x400000
-
-/**
- * Maximum possible size of the page directory. This is how big the page
- * directory would need to be to reach all addressable virtual memory in Linux.
- */
-#define PMA_MAXIMUM_DIR_SIZE  0x5500000000
-
-/**
- * Base address for the PMA. Lowest address not reserved by Linux.
- */
-#ifdef __linux__
-  #define PMA_SNAPSHOT_ADDR     0x10000
-#else
-  #define PMA_SNAPSHOT_ADDR     0x28000000000
-#endif
-
-/**
- * Maximum file size on disk for the filesystem (16 TiB for ext4).
- * 
- * TODO: need to automatically discover this and set it accordingly
- */
-#define PMA_MAX_DISK_FILE_SIZE  0x100000000000
-
-/**
- * Maximum multiplier for resizing the snapshot backing file.
- */
-#define PMA_MAX_RESIZE_FACTOR   (PMA_MAX_DISK_FILE_SIZE / PMA_SNAPSHOT_RESIZE_INC)
-
-//==============================================================================
-// HELPER MACROS
-//==============================================================================
-
-/* TODO: these should just be funlike macros. The "save line" and goto is
-   unnecessary */
-/**
- * Log error and return failure during new PMA bootstrap
- */
-#define INIT_ERROR    do { err_line = __LINE__; goto init_error; } while(0)
-
-/**
- * Log error and return failure during existing PMA load
- */
-#define LOAD_ERROR    do { err_line = __LINE__; goto load_error; } while(0)
-
-/**
- * Log error and return failure during PMA sync
- */
-#define SYNC_ERROR    do { err_line = __LINE__; goto sync_error; } while(0)
-
-/**
- * Log warning to console
- */
-#define WARNING(foo)  _pma_warning(foo, address, __LINE__)
-
-//==============================================================================
-// TYPES
-//==============================================================================
-
-/**
- * Page statuses used in page directory
- */
-enum PMAPageStatus {
-  UNALLOCATED,
-  FREE,
-  SHARED,
-  FIRST,
-  FOLLOW
-};
-typedef enum PMAPageStatus PMAPageStatus;
-
-/**
- * Directory entry for a page in virtual memory
- */
-typedef struct PMAPageDirEntry PMAPageDirEntry;
-struct PMAPageDirEntry {
-  uint64_t      offset;         // Offset for page in backing file
-  PMAPageStatus status;         // Status of page
-};
-
-/**
- * Directory of pages in virtual memory
- */
-typedef struct PMAPageDir PMAPageDir;
-struct PMAPageDir {
-  uint64_t         size;        // Number of slots currently supported by page directory
-  uint64_t         next_index;  // Index of next open slot in (makes it easier to resize)
-  PMAPageDirEntry *entries;     // Address to start of page directory as an array of entries
-};
-
-/**
- * Shared allocation page
- *
- * A shared page is an array of slots of a single size. The metadata for each
- * page is stored as a header within the page itself.
- *
- * On a 64-bit system, the alignment of this struct is 8. This is relevant to
- * the currently hard-coded values for simplifying how slots work. The ideal
- * size of a hard-coded bitmap, given the number of slots available in a page
- * after subtracting the header, is 32 bytes:
- *
- *    X = max # slots in page (min slot size = 16-bytes)
- *    (4096 - (11 + ceil(X/8))) > 16X
- *    (4096 - (11 + (X/8) + 1)) > 16X
- *                   4084 - X/8 > 16X
- *                    32672 - X > 128X
- *                        32672 > 129X
- *                       253.27 > X
- *                            X = 253
- *    bitmap bytes = ceil(253 div 8) = ceil(31.625) = 32
- *
- * However, the alignment adds padding bytes in between the scalar and array
- * struct members:
- *    (253 * 16) + 11 + 5 + 32 = 4096
- *
- * In this case, this doesn't affect the total number of
- * available slots, but it could if the members of the PMASharedPageHeader change.
- */
-typedef struct PMASharedPageHeader PMASharedPageHeader;
-struct PMASharedPageHeader {
-  struct PMASharedPageHeader *next;   // Next shared page; forms a stack as additional pages of the same slot size are allocated
-  uint8_t                     dirty;  // Dirty bit; necessary when allocating twice to the same page in one event
-  uint8_t                     size;   // Slot size for this page = 2^size
-  uint8_t                     free;   // Number of free slots in page
-  uint8_t                     bits[PMA_BITMAP_SIZE];  // Bitmap of which slots are free
-};
-
-/**
- * Update to page directory state for an allocation. A limited number of such
- * updates can be stored behind the header in the metadata page, allowing
- * simultaneous copy-on-write semantics for the metadata and updates to the page
- * directory without a B+ Tree.
- */
-typedef struct PMADirtyPageEntry PMADirtyPageEntry;
-struct PMADirtyPageEntry {
-  uint64_t      index;          // Index in page directory
-  uint64_t      offset;         // Offset on disk backing file
-  uint32_t      num_pages;      // Number of pages marked dirty (for multi-page allocations)
-  PMAPageStatus status;         // Page status after sync
-};
-
-/**
- * Free page cache node
- *
- * Nodes form a linked list of single free pages. A free page is an allocated
- * page already backed by disk, but available for use (the old values were
- * freed).
- *
- * Free pages are purposely not merged into runs, because two pages being
- * adjacent in virtual memory does not mean that they are adjacent on disk, and
- * disk locality is preferable for multi-page allocations.
- *
- * The caches for free single pages and free multi-page runs are split to save
- * time: any free page will do for a shared page or single page allocation, but
- * exact ranges are preferable for multi-page allocations.
- */
-typedef struct PMASinglePageCache PMASinglePageCache;
-struct PMASinglePageCache {
-  PMASinglePageCache *next;     // Next node in list
-  void               *page;     // Pointer to free page
-};
-
-/**
- * Free page run cache node
- *
- * Nodes form a linked list of free multi-page runs. A free page is an allocated
- * page already backed by disk, but available for use (the old values were
- * freed).
- *
- * Free pages are purposely not merged into runs, because two pages being
- * adjacent in virtual memory does not mean that they are adjacent on disk, and
- * disk locality is preferable for multi-page allocations (typically, when the
- * OS experiences a page miss, the OS/hardware will fetch not just the missing
- * page, but also several of the following [nearby?] pages).
- *
- * The caches for free single pages and free multi-page runs are split to save
- * time: any free page will do for a shared page or single page allocation, but
- * exact ranges are preferable for multi-page allocations.
- */
-typedef struct PMAPageRunCache PMAPageRunCache;
-struct PMAPageRunCache {
-  PMAPageRunCache *next;        // Next node in list
-  void            *page;        // Pointer to start of page run
-  uint64_t         length;      // Number of pages in run
-};
-
-/**
- * Free dpage cache
- *
- * A dpage is a page-sized block already allocated to the snapshot file on disk
- * but without memory mapped to it. Reusing free dpages allows allocations
- * without growing the backing file.
- * 
- * The cache contains only individual dpages. Since multi-page allocations are
- * never moved, their corresponding dpage allocations never change. When freed,
- * multi-page allocations in the free page run cache still refer to the same
- * contiguous block of dpages that they were assigned upon initial allocation.
- */
-typedef struct PMADPageCache PMADPageCache;
-struct PMADPageCache {
-  uint8_t   dirty;    // Has dpage cache already been copied to a new page with PROT_WRITE
-  uint16_t  size;     // Number of entries in queue
-  uint16_t  head;     // Index of front of queue
-  uint16_t  tail;     // Index of back of queue
-  uint64_t  queue[];  // Cache of free dpages as queue; array of size PMA_DPAGE_CACHE_SIZE
-};
-
-/**
- * Persistent Memory Arena/event snapshot metadata
- */
-typedef struct PMAMetadata PMAMetadata;
-struct PMAMetadata {
-  uint64_t             magic_code;      // Stamp identifying a file as a New Mars PMA file
-  uint32_t             checksum;        // Checksum value to detect corruption
-  uint32_t             version;         // Version of Vere (New Mars?) used to produce the backing file
-  uint64_t             epoch;           // Epoch ID of the most recently processed event
-  uint64_t             event;           // ID of the most recently processed event
-  uint64_t             root;            // Root after most recent event
-  void                *arena_start;     // Beginning of mapped address space
-  void                *arena_end;       // End of mapped address space (first address beyond mapped range)
-  PMASharedPageHeader *shared_pages[PMA_SHARED_BUCKETS]; // Shared allocation pages
-  PMADPageCache       *dpage_cache;     // Cache of free dpges as queue
-  uint64_t             snapshot_size;   // Size of the backing file
-  uint64_t             next_offset;     // Next open dpage in the backing file
-  uint8_t              num_dirty_pages; // Counter of dirty page entries
-  uint64_t             padding[2];      // sizeof(PMAMetadata) must be PMA_PAGE_SIZE
-  PMADirtyPageEntry    dirty_pages[PMA_DIRTY_PAGE_LIMIT]; // Queue of changes not yet synced to page directory
-};
-static_assert(sizeof(PMAMetadata) == PMA_PAGE_SIZE, "PMAMetadata must be a page in length");
-
-/**
- * Struct containing global data used by PMA
- *
- * Containment zone for what would otherwise be global variables. Global state
- * stored in struct and passed around to functions that need it. Data that
- * could otherwise go into the metadata, but is recomputable as derived state
- * should go here.
- */
-typedef struct PMAState PMAState;
-struct PMAState {
-  PMAMetadata        *metadata;         // Metadata; contains current status of snapshot
-  uint64_t            meta_page_offset; // Offset on disk of next metadata page to be replaced
-  PMAPageDir          page_directory;   // Page directory; maps virtual memory addresses to pages on disk
-  int                 snapshot_fd;      // File descriptor for PMA backing file
-  int                 page_dir_fd;      // File descriptor for page directory
-  PMASinglePageCache *free_pages;       // Cache of free single pages
-  PMAPageRunCache    *free_page_runs;   // Cache of free multi-page runs
-};
-
-
-//==============================================================================
-// GLOBALS
-//==============================================================================
-
-PMAState *_pma_state = NULL;
-
-
-//==============================================================================
-// FORWARD DECLARATIONS
-//==============================================================================
-
-int       _pma_verify_checksum(PMAMetadata *meta_page);
-int       _pma_sync_dirty_pages(int fd, uint8_t num_dirty_pages, PMADirtyPageEntry *dirty_pages);
-int       _pma_write_page_status(int fd, uint64_t index, PMAPageStatus status);
-int       _pma_write_page_offset(int fd, uint64_t index, uint64_t offset);
-int       _pma_update_free_pages(uint8_t num_dirty_pages, PMADirtyPageEntry *dirty_pages);
-void     *_pma_malloc_bytes(size_t size);
-int       _pma_malloc_shared_page(uint8_t bucket);
-void     *_pma_malloc_pages(size_t size);
-void     *_pma_malloc_single_page(PMAPageStatus status);
-void     *_pma_malloc_multi_pages(uint64_t num_pages);
-void     *_pma_get_cached_pages(uint64_t num_pages);
-void     *_pma_get_new_page(PMAPageStatus status);
-void     *_pma_get_new_pages(uint64_t num_pages);
-int       _pma_free_pages(void *address);
-int       _pma_free_bytes(void *address);
-int       _pma_copy_shared_page(void *address);
-uint64_t  _pma_get_single_dpage(void);
-uint64_t  _pma_get_cached_dpage(void);
-int       _pma_copy_dpage_cache(void);
-uint64_t  _pma_get_disk_dpage(void);
-void      _pma_copy_page(void *address, uint64_t offset, PMAPageStatus status, int fd);
-void      _pma_mark_page_dirty(uint64_t index, uint64_t offset, PMAPageStatus status, uint32_t num_pages);
-int       _pma_extend_snapshot_file(uint32_t multiplier);
-void      _pma_warning(const char *p, void *a, int l);
-void      _pma_state_free(void);
-int       _pma_state_malloc(void);
-
-
-//==============================================================================
-// PUBLIC FUNCTIONS
-//==============================================================================
-
-// TODO: Replace errno codes with our own error codes
-
-// TODO: Inconsistent abort() calls; should better define when an error is fatal
-
-int
-pma_init(const char *path) {
-  DIR         *dir;
-  char        *filepath;
-  PMAMetadata *meta_pages  = 0;
-  void        *page_dir    = 0;
-  uint64_t     meta_bytes;
-  int          err;
-  int          err_line;
-  int          page_dir_fd = 0;
-  int          snapshot_fd = 0;
-
-  // Precompute metadata and page directory sizes in bytes
-  meta_bytes = 2 * PMA_PAGE_SIZE;
-
-  // Allocate memory for state
-  if (_pma_state_malloc()) return -1;
-
-  //
-  // Create backing files
-  //
-
-  // Initialize dir and file path buffer
-  filepath = malloc(
-      strlen(path) + 1 +
-      strlen(PMA_DEFAULT_DIR_NAME) + 1 +
-      strlen(PMA_SNAPSHOT_FILENAME) + 1);
-
-  // Create input directory, if necessary
-  dir = opendir(path);
-  if (dir == NULL) {
-    // Error if opening dir failed for reason other than it doesn't exist
-    if (ENOENT != errno) INIT_ERROR;
-
-    // Error if creating dir failed
-    if (mkdir(path, PMA_DIR_PERMISSIONS)) INIT_ERROR;
-  }
-
-  // Create file path for dir of backing files
-  sprintf(filepath, "%s/%s", path, PMA_DEFAULT_DIR_NAME);
-
-  // Create dir for backing files, if necessary
-  dir = opendir(filepath);
-  if (dir == NULL) {
-    // Error if opening dir failed for reason other than it doesn't exist
-    if (ENOENT != errno) INIT_ERROR;
-
-    // Error if creating dir failed
-    if (mkdir(filepath, PMA_DIR_PERMISSIONS)) INIT_ERROR;
-  }
-
-  // Create backing file for snapshot
-  sprintf(filepath, "%s/%s/%s", path, PMA_DEFAULT_DIR_NAME, PMA_SNAPSHOT_FILENAME);
-  snapshot_fd = open(filepath, PMA_NEW_FILE_FLAGS, PMA_FILE_PERMISSIONS);
-  if (snapshot_fd == -1) INIT_ERROR;
-
-  // Create backing file for page directory
-  sprintf(filepath, "%s/%s/%s", path, PMA_DEFAULT_DIR_NAME, PMA_PAGE_DIR_FILENAME);
-  page_dir_fd = open(filepath, PMA_NEW_FILE_FLAGS, PMA_FILE_PERMISSIONS);
-  if (page_dir_fd == -1) INIT_ERROR;
-
-  //
-  // Set initial sizes for backing files
-  //
-
-  // Set initial size of snapshot file
-  err = lseek(snapshot_fd, (PMA_INIT_SNAP_SIZE - 1), SEEK_SET);
-  if (err == -1) INIT_ERROR;
-  err = write(snapshot_fd, "", 1);
-  if (err != 1) INIT_ERROR;
-
-  // Set initial size of page directory
-  err = lseek(page_dir_fd, (PMA_INIT_DIR_SIZE - 1), SEEK_SET);
-  if (err == -1) INIT_ERROR;
-  err = write(page_dir_fd, "", 1);
-  if (err != 1) INIT_ERROR;
-
-  //
-  // Initialize snapshot and page directory
-  //
-
-  /*
-   * The following links are useful for understanding the layout of virtual memory for a Linux process:
-   *    https://www.sobyte.net/post/2022-08/linux-virtual-memory/
-   *    https://blog.holbertonschool.com/hack-the-virtual-memory-malloc-the-heap-the-program-break/
-   *        Chapters 2 & 3
-   *
-   * Practically, on my machine, this translates to the following virtual memory layout:
-   *    - ???   = 0x0000 0000 0000  -  0x0000 0000 ffff      64 KiB
-   *    - empty = 0x0000 0001 0000  -  0x559f ffff ffff     ~85 TiB
-   *    - data  = 0x55a0 0000 0000  -  0x560f ffff ffff     448 GiB
-   *    - heap  = 0x5610 0000 0000  -  0x7f3f ffff ffff     ~41 TiB
-   *    - libs  = 0x7f40 0000 0000  -  0x7f9f ffff ffff     384 GiB
-   *    - stack = 0x7fa0 0000 0000  -  0x7ffb ffff ffff     368 GiB
-   *    - vdso  = 0x7ffc 0000 0000  -  0x7fff ffff ffff      16 GiB
-   * Note that these address ranges are rough approximations and the sizes are vastly larger for sections like 'data'
-   * and 'vdso' than the actual memory section for the process because I'm documenting the range in which the section
-   * can be found. Identical Linux processes will not have identical memory layouts due to Address Space Layout
-   * Randomization.
-   *
-   * Without explicit arguments, calls to mmap will return addresses in the above 'stack' range, and successive calls
-   * will grow down. I presume that this is due to the implementation of this proposal: https://lwn.net/Articles/91829/
-   *
-   * Given these circumstances, probably the easiest things to do are:
-   *  1.  mmap the snapshot to a low address (i.e. 0x1 0000) so that it can use all of the available space before the
-   *      'data' section
-   *  2.  mmap the page directory using its maximum possible size (at least on Linux, it's okay to mmap a file to more
-   *      pages than it actually occupies and have it grow into the space). Doing so on eliminates the need to ever
-   *      resize the mapping using mremap.
-   *  3.  mmap the page directory without a location hint. How big is this mmap? Given the above estimate of virtual
-   *      memory available to the snapshot (85 TiB) and the ratio of snapshot size to page directory size (256:1), this
-   *      mapping would be 340 GiB in size. Even assuming the kernel were not smart enough to work around the linked
-   *      libs, this is still small enough to fit into the stack, according to the above memory section size estimates.
-   */
-
-  // Init metadata pages
-  meta_pages = mmap(
-      NULL,
-      meta_bytes,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED,
-      snapshot_fd,
-      0);
-  if (meta_pages == MAP_FAILED) INIT_ERROR;
-
-  // Init page directory
-  page_dir = mmap(
-      NULL,
-      PMA_MAXIMUM_DIR_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED,
-      page_dir_fd,
-      0);
-  if (page_dir == MAP_FAILED) INIT_ERROR;
-
-  // Initialize simple metadata state
-  _pma_state->metadata->magic_code = PMA_MAGIC_CODE;
-  _pma_state->metadata->checksum   = 0;
-  _pma_state->metadata->version    = PMA_DATA_VERSION;
-  _pma_state->metadata->epoch      = 0;
-  _pma_state->metadata->event      = 0;
-  _pma_state->metadata->root       = 0;
-
-  // Initialize shared pages stacks
-  for(uint8_t i = 0; i < PMA_SHARED_BUCKETS; ++i) {
-    _pma_state->metadata->shared_pages[i] = NULL;
-  }
-
-  // Initialize dirty page array
-  for(uint8_t i = 0; i < PMA_DIRTY_PAGE_LIMIT; ++i) {
-    _pma_state->metadata->dirty_pages[i].index     = 0;
-    _pma_state->metadata->dirty_pages[i].offset    = 0;
-    _pma_state->metadata->dirty_pages[i].num_pages = 0;
-  }
-  _pma_state->metadata->num_dirty_pages = 0;
-
-  // Initialize snapshot page info
-  _pma_state->metadata->snapshot_size = PMA_INIT_SNAP_SIZE;
-  _pma_state->metadata->next_offset = meta_bytes + PMA_PAGE_SIZE;
-
-  // Initialize arena start pointer
-  _pma_state->metadata->arena_start  = (void *)PMA_SNAPSHOT_ADDR;
-
-  // Manually allocate a page for the dpage cache
-  _pma_state->metadata->dpage_cache = mmap(
-      _pma_state->metadata->arena_start,
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      PMA_MMAP_FLAGS,
-      snapshot_fd,
-      meta_bytes);
-  if (_pma_state->metadata->dpage_cache == MAP_FAILED) INIT_ERROR;
-
-  // Initialize arena end pointer
-  _pma_state->metadata->arena_end = ((char*)_pma_state->metadata->arena_start + PMA_PAGE_SIZE);
-
-  // Setup initial dpage cache values
-  _pma_state->metadata->dpage_cache->dirty = 0;
-  _pma_state->metadata->dpage_cache->size  = 0;
-  _pma_state->metadata->dpage_cache->head  = 0;
-  _pma_state->metadata->dpage_cache->tail  = 0;
-
-  //
-  // Setup page directory
-  //
-
-  _pma_state->page_directory.size       = PMA_INIT_DIR_SIZE;
-  _pma_state->page_directory.next_index = 1;
-  _pma_state->page_directory.entries    = (PMAPageDirEntry *)page_dir;
-
-  // First page used by dpage cache
-  _pma_state->page_directory.entries[0].status = FIRST;
-  _pma_state->page_directory.entries[0].offset = meta_bytes;
-
-  //
-  // Setup transient state
-  //
-
-  // Replace the first metadata page, since they're identical
-  _pma_state->meta_page_offset = 0;
-
-  // Initialize file descriptors
-  _pma_state->snapshot_fd = snapshot_fd;
-  _pma_state->page_dir_fd = page_dir_fd;
-
-  // Initialize free page caches
-  _pma_state->free_pages      = NULL;
-  _pma_state->free_page_runs  = NULL;
-
-  //
-  // Sync initial PMA state to disk
-  //
-
-  // Sync dpage cache
-  err = msync(
-      _pma_state->metadata->dpage_cache,
-      PMA_PAGE_SIZE,
-      MS_SYNC);
-  if (err) INIT_ERROR;
-
-  // Sync page directory
-  err = msync(_pma_state->page_directory.entries, PMA_PAGE_SIZE, MS_SYNC);
-  if (err) INIT_ERROR;
-
-  // Compute checksum for metadata
-  _pma_state->metadata->checksum = crc_32((unsigned char*)_pma_state->metadata, PMA_PAGE_SIZE);
-
-  // Copy and sync metadata to both buffers
-  memset(meta_pages, 0, meta_bytes);
-  memcpy(&meta_pages[0], _pma_state->metadata, PMA_PAGE_SIZE);
-  memcpy(&meta_pages[1], _pma_state->metadata, PMA_PAGE_SIZE);
-  if (msync(meta_pages, meta_bytes, MS_SYNC)) INIT_ERROR;
-
-  // Remove PROT_WRITE permissions from snapshot and page directory
-  if (mprotect(meta_pages, meta_bytes, PROT_READ)) INIT_ERROR;
-  if (mprotect(_pma_state->metadata->dpage_cache, PMA_PAGE_SIZE, PROT_READ)) INIT_ERROR;
-  if (mprotect(page_dir, PMA_PAGE_SIZE, PROT_READ)) INIT_ERROR;
-
-  //
-  // Done
-  //
-
-  // Clean up
-  free(filepath);
-  munmap(meta_pages, meta_bytes);
-
-  return 0;
-
-init_error:
-  fprintf(stderr, "(L%d) PMA initialization error: %s\n", err_line, strerror(errno));
-
-  if (meta_pages) munmap(meta_pages, meta_bytes);
-  if (page_dir) munmap(page_dir, PMA_INIT_DIR_SIZE);
-  if (snapshot_fd) close(snapshot_fd);
-  if (page_dir_fd) close(page_dir_fd);
-  free(filepath);
-  _pma_state_free();
-
-  return -1;
-}
-
-PMARootState
-pma_load(const char *path) {
-  PMAMetadata *newer_page;
-  PMAMetadata *older_page;
-  char        *filepath;
-  void        *address;
-  PMAMetadata *meta_pages  = 0;
-  uint64_t     index;
-  uint64_t     meta_bytes;
-  int          err;
-  int          err_line;
-  int          page_dir_fd = 0;
-  int          snapshot_fd = 0;
-
-  // Precompute metadata and page directory sizes in bytes
-  meta_bytes = 2 * PMA_PAGE_SIZE;
-
-  // Allocate memory for state
-  if (_pma_state_malloc()) return (PMARootState){0};
-
-  //
-  // Open backing files
-  //
-
-  // Initialize dir and file path buffer
-  filepath = malloc(
-      strlen(path) + 1 +
-      strlen(PMA_DEFAULT_DIR_NAME) + 1 +
-      strlen(PMA_SNAPSHOT_FILENAME) + 1);
-
-  // Open backing file for snapshot
-  sprintf(filepath, "%s/%s/%s", path, PMA_DEFAULT_DIR_NAME, PMA_SNAPSHOT_FILENAME);
-  snapshot_fd = open(filepath, PMA_LOAD_FILE_FLAGS, PMA_FILE_PERMISSIONS);
-  if (snapshot_fd == -1) LOAD_ERROR;
-  _pma_state->snapshot_fd = snapshot_fd;
-
-  // Open backing file for page directory
-  sprintf(filepath, "%s/%s/%s", path, PMA_DEFAULT_DIR_NAME, PMA_PAGE_DIR_FILENAME);
-  page_dir_fd = open(filepath, PMA_LOAD_FILE_FLAGS, PMA_FILE_PERMISSIONS);
-  if (page_dir_fd == -1) LOAD_ERROR;
-  _pma_state->page_dir_fd = page_dir_fd;
-
-  //
-  // Verify file can be loaded
-  //
-
-  // Read magic code
-  if (-1 == read(snapshot_fd, &_pma_state->metadata->magic_code, sizeof(uint64_t))) {
-    LOAD_ERROR;
-  } else if (_pma_state->metadata->magic_code != PMA_MAGIC_CODE) {
-    errno = EILSEQ;
-    LOAD_ERROR;
-  }
-
-  // Read version
-  if (-1 == pread(snapshot_fd, &_pma_state->metadata->version, sizeof(uint32_t), 12)) {
-    LOAD_ERROR;
-  } else if (_pma_state->metadata->version != PMA_DATA_VERSION) {
-    // TODO: possibly upgrade
-    errno = EILSEQ;
-    LOAD_ERROR;
-  }
-
-  // Load metadata pages
-  meta_pages = mmap(
-      NULL,
-      meta_bytes,
-      PROT_READ,
-      MAP_SHARED,
-      snapshot_fd,
-      0);
-  if (meta_pages == MAP_FAILED) LOAD_ERROR;
-
-  // Determine newer metadata page
-  newer_page = &meta_pages[0];
-  older_page = &meta_pages[1];
-  assert(newer_page->magic_code == PMA_MAGIC_CODE); assert(older_page->magic_code == PMA_MAGIC_CODE);
-  if ((newer_page->epoch < older_page->epoch)
-      || ((newer_page->epoch == older_page->epoch)
-          && (newer_page->event < older_page->event))) {
-    newer_page = &meta_pages[1];
-    older_page = &meta_pages[0];
-  }
-
-  // Verify checksum for either page
-  if (!_pma_verify_checksum(newer_page)) {
-    if (_pma_verify_checksum(older_page)) {
-      newer_page = older_page;
-    } else {
-      errno = EILSEQ;
-      LOAD_ERROR;
-    }
-  }
-
-  // Next page replaced is the older of the two pages
-  _pma_state->meta_page_offset = (newer_page == meta_pages) ? PMA_PAGE_SIZE : 0;
-
-  // Update page directory using metadata dirty page list
-  err = _pma_sync_dirty_pages(page_dir_fd, _pma_state->metadata->num_dirty_pages, _pma_state->metadata->dirty_pages);
-  if (err) LOAD_ERROR;
-
-  _pma_state->metadata->num_dirty_pages = 0;
-
-  //
-  // Load page directory
-  //
-
-  // mmap page directory
-  _pma_state->page_directory.entries = mmap(
-      NULL,
-      PMA_MAXIMUM_DIR_SIZE,
-      PROT_READ,
-      MAP_SHARED,
-      page_dir_fd,
-      0);
-  if (_pma_state->page_directory.entries == MAP_FAILED) LOAD_ERROR;
-
-  //
-  // Map pages and compute free page caches
-  //
-
-  // get total number of indices
-  struct stat   st;
-  fstat(page_dir_fd, &st);
-  _pma_state->page_directory.size = (st.st_size / sizeof(PMAPageDirEntry)) - 1;
-
-
-  index = 0;
-  while (index < _pma_state->page_directory.size) {
-    uint64_t      count = 1;
-
-    switch (_pma_state->page_directory.entries[index].status) {
-      case UNALLOCATED:
-        ++index;
-        continue;
-
-      case FREE:
-        // While pages have FREE status AND are contiguous on disk, scan forward
-        ++index;
-        while (
-            _pma_state->page_directory.entries[index].status == FREE &&
-            _pma_state->page_directory.entries[index].offset == (_pma_state->page_directory.entries[index - 1].offset + PMA_PAGE_SIZE)) {
-          ++count;
-          ++index;
-        }
-
-        // Add to appropriate free page cache
-        if (count == 1) {
-          PMASinglePageCache *free_page = malloc(sizeof *free_page);
-
-          // Add it to the single-page cache
-          free_page->next = _pma_state->free_pages;
-          free_page->page = INDEX_TO_PTR(index - 1);
-          _pma_state->free_pages = free_page;
-
-        } else {
-          PMAPageRunCache *page_run = malloc(sizeof *page_run);
-
-          page_run->next = _pma_state->free_page_runs;
-          page_run->page = INDEX_TO_PTR(index - count);
-          page_run->length = count;
-          _pma_state->free_page_runs = page_run;
-        }
-
-        // Map free pages (they're expected to be mapped but read only)
-        address = mmap(
-            INDEX_TO_PTR(index - count),
-            (PMA_PAGE_SIZE * count),
-            PROT_READ,
-            PMA_MMAP_FLAGS,
-            snapshot_fd,
-            _pma_state->page_directory.entries[index - count].offset);
-
-        continue;
-
-      case SHARED:
-        // Map immediately
-        address = mmap(
-            INDEX_TO_PTR(index),
-            PMA_PAGE_SIZE,
-            PROT_READ,
-            PMA_MMAP_FLAGS,
-            snapshot_fd,
-            _pma_state->page_directory.entries[index].offset);
-        if (address == MAP_FAILED) LOAD_ERROR;
-
-        ++index;
-
-        continue;
-
-      case FIRST:
-        // While pages have FOLLOW status, scan forward
-        ++index;
-        while (_pma_state->page_directory.entries[index].status == FOLLOW) {
-          ++count;
-          ++index;
-        }
-
-        // mmap entire block
-        address = mmap(
-            INDEX_TO_PTR(index - count),
-            (count * PMA_PAGE_SIZE),
-            PROT_READ,
-            PMA_MMAP_FLAGS,
-            snapshot_fd,
-            _pma_state->page_directory.entries[index - count].offset);
-        if (address == MAP_FAILED) LOAD_ERROR;
-
-        continue;
-
-      case FOLLOW:
-        // FOLLOW pages should be passed over correctly by FIRST case
-      default:
-        fprintf(stderr, "Index %" PRIu64 " invalid\n", index);
-        errno = EINVAL;
-        LOAD_ERROR;
-    }
-  }
-
-  // Get next free index
-  _pma_state->page_directory.next_index = index;
-
-  //
-  // Done
-  //
-
-  // Clean up
-  munmap(meta_pages, meta_bytes);
-  free(filepath);
-
-  return (PMARootState){
-    .epoch = _pma_state->metadata->epoch,
-    .event = _pma_state->metadata->event,
-    .root = _pma_state->metadata->root,
-  };
-
-load_error:
-  fprintf(stderr, "(L%d) Error loading PMA from %s: %s\n", err_line, path, strerror(errno));
-
-  if (meta_pages) munmap(meta_pages, meta_bytes);
-  if (_pma_state->page_directory.entries) {
-    munmap(_pma_state->page_directory.entries, PMA_MAXIMUM_DIR_SIZE);
-  }
-  if (_pma_state->metadata && _pma_state->metadata->arena_start) {
-    munmap(_pma_state->metadata->arena_start,
-          (uintptr_t)_pma_state->metadata->arena_end
-          - (uintptr_t)_pma_state->metadata->arena_start);
-  }
-  if (snapshot_fd > 0) close(snapshot_fd);
-  if (page_dir_fd > 0) close(page_dir_fd);
-  free(filepath);
-  _pma_state_free();
-
-  return (PMARootState){0};
-}
-
-int
-pma_close(uint64_t epoch, uint64_t event, uint64_t root) {
-  // Sync changes to disk
-  if (pma_sync(epoch, event, root)) {
-    return -1;
-  }
-
-  // Unmap page directory
-  munmap(_pma_state->page_directory.entries, PMA_MAXIMUM_DIR_SIZE);
-
-  // Unmap snapshot
-  // XX should just be end - start?
-  munmap(_pma_state->metadata->arena_start, _pma_state->metadata->snapshot_size);
-
-  // Close file descriptors
-  close(_pma_state->page_dir_fd);
-  close(_pma_state->snapshot_fd);
-
-  // free pma state
-  _pma_state_free();
-
-  return 0;
-}
-
-void *
-pma_malloc(size_t size) {
-  void *result = NULL;
-
-  /* MALLOC_LOCK */
-
-  if (!size) {
-    /* MALLOC_UNLOCK */
-    return result;
-  } else if ((size + PMA_PAGE_SIZE) < size) {   // Check for overflow
-    errno = ENOMEM;
-  } else if (size <= PMA_MAX_SHARED_ALLOC) {
-    result = _pma_malloc_bytes(size);
-  } else {
-    result = _pma_malloc_pages(size);
-  }
-
-  /* MALLOC_UNLOCK */
-
-  return result;
-}
-
-int
-pma_free(void *address) {
-  uint64_t  index;
-
-  // TODO: This is legal for POSIX free, but would this ever happen for pma_free?
-  if (address == NULL) return 0;
-
-  if (address < _pma_state->metadata->arena_start) {
-    WARNING("address too low to make sense");
-    errno = EINVAL;
-    return -1;
-  }
-  if (address >= _pma_state->metadata->arena_end) {
-    WARNING("address too high to make sense");
-    errno = EINVAL;
-    return -1;
-  }
-
-  index = PTR_TO_INDEX(address);
-  switch (_pma_state->page_directory.entries[index].status) {
-    case UNALLOCATED:
-      // Something has definitely gone wrong if an address between arena_start
-      // and arena_end, with an index between 0 and next_free_index is
-      // unallocated
-      WARNING("address marked unallocated");
-      errno = EINVAL;
-      return -1;
-
-    case FREE:
-      WARNING("address already free");
-      errno = EINVAL;
-      return -1;
-
-    case SHARED:
-      return _pma_free_bytes(address);
-
-    case FIRST:
-      return _pma_free_pages(address);
-
-    case FOLLOW:
-      WARNING("address points to middle of multi-page allocation");
-      errno = EINVAL;
-      return -1;
-  }
-
-  return 0;
-}
-
-int
-pma_sync(uint64_t epoch, uint64_t event, uint64_t root) {
-  PMADPageCache *dpage_cache = _pma_state->metadata->dpage_cache;
-  ssize_t        bytes_out;
-  int            err;
-  int            err_line;
-
-  // Epoch & event may only increase
-  if (
-      (epoch < _pma_state->metadata->epoch) ||
-      ((epoch == _pma_state->metadata->epoch) && (event <= _pma_state->metadata->event))) {
-    errno = EINVAL;
-    return -1;
-  }
-
-  // Clear dpage cache dirty bit and compute new size. This is the only place
-  // where the dpage cache active size should ever increase!
-  if (dpage_cache->dirty) {
-    dpage_cache->dirty = 0;
-    dpage_cache->size = (dpage_cache->tail - dpage_cache->head);
-    if (dpage_cache->size > PMA_DPAGE_CACHE_SIZE) {
-      // Simple correction of integer underflow when queue wraps around
-      dpage_cache->size += PMA_DPAGE_CACHE_SIZE;
-    }
-  }
-
-  // Sync dirty pages
-  for (uint8_t i = 0; i < _pma_state->metadata->num_dirty_pages; ++i) {
-    void     *address = INDEX_TO_PTR(_pma_state->metadata->dirty_pages[i].index);
-    uint64_t  bytes = (_pma_state->metadata->dirty_pages[i].num_pages * PMA_PAGE_SIZE);
-
-    // Clear dirty bit for shared pages
-    if (_pma_state->metadata->dirty_pages[i].status == SHARED) {
-      ((PMASharedPageHeader*)address)->dirty = 0;
-    }
-
-    err = msync(address, bytes, MS_SYNC);
-    if (err) SYNC_ERROR;
-
-    if (mprotect(address, bytes, PROT_READ)) SYNC_ERROR;
-  }
-
-  // Compute checksum
-  _pma_state->metadata->epoch = epoch;
-  _pma_state->metadata->event = event;
-  _pma_state->metadata->root = root;
-  _pma_state->metadata->checksum = 0;
-  _pma_state->metadata->checksum
-    = crc_32((unsigned char *)_pma_state->metadata, PMA_PAGE_SIZE);
-
-  // Sync metadata
-  // 
-  // Note:  It's a long-standing Unix convention that while both write and
-  //        pwrite return the number of bytes written, when operating on a file
-  //        (as opposed to a pipe or socket) it is assumed that the entire
-  //        buffer will be written. If this isn't the case, an error has
-  //        occurred.
-  bytes_out = pwrite(
-      _pma_state->snapshot_fd,
-      _pma_state->metadata,
-      PMA_PAGE_SIZE,
-      _pma_state->meta_page_offset);
-  if (bytes_out != PMA_PAGE_SIZE) SYNC_ERROR;
-
-  _pma_state->meta_page_offset = _pma_state->meta_page_offset ? 0 : PMA_PAGE_SIZE;
-
-  // Sync dirty pages in page directory
-  err = _pma_sync_dirty_pages(
-      _pma_state->page_dir_fd,
-      _pma_state->metadata->num_dirty_pages,
-      _pma_state->metadata->dirty_pages);
-  if (err) SYNC_ERROR;
-
-  // Update free page caches
-  err = _pma_update_free_pages(_pma_state->metadata->num_dirty_pages, _pma_state->metadata->dirty_pages);
-  if (err) SYNC_ERROR;
-
-  // Reset dirty page array
-  _pma_state->metadata->num_dirty_pages = 0;
-
-  return 0;
-
-sync_error:
-  fprintf(stderr, "(L%d) Error syncing PMA: %s\n", err_line, strerror(errno));
-
-  return -1;
-}
-
-bool
-pma_in_arena(void *address) {
-  return (address >= _pma_state->metadata->arena_start)
-    && (address < _pma_state->metadata->arena_end);
-}
-
-//==============================================================================
-// PRIVATE FUNCTIONS
-//==============================================================================
-
-/**
- * Verify that the checksum of a metadata page is valid
- *
- * Corruption or malicious interference is rare, so we assume that the checksum
- * is correct and copy it into the global state in advance, then confirm its
- * correctness there.
- *
- * @param meta_page  Pointer to a metadata page loaded from disk
- *
- * @return  Boolean (as int) for whether checksums match or not
- */
-int
-_pma_verify_checksum(PMAMetadata *meta_page) {
-  uint32_t checksum;
-
-  // Copy metadata in advance of using it, since: 1) we expect the checksum to
-  // be valid; 2) we need to set the value of the checksum in the metadata to 0.
-  memcpy(_pma_state->metadata, meta_page, PMA_PAGE_SIZE);
-
-  // Since we're computing the checksum on the object which itself includes the
-  // checksum, we treat the checksum as 0.
-  _pma_state->metadata->checksum = 0;
-
-  // Compute checksum
-  checksum = crc_32((unsigned char *)_pma_state->metadata, PMA_PAGE_SIZE);
-
-  // Compare checksums
-  return (checksum == meta_page->checksum);
-}
-
-/**
- * Sync updates from the dirty page cache in the metadata page to the page
- * directory
- *
- * This sync is technically the *first* step of a new event, since the page
- * directory for a snapshot is not valid until all of the changes from the dirty
- * page cache have been applied. The PMA can crash at any moment, therefore
- * applying the changes to the page directory from the previous event is
- * required before processing a new one. Note that applying these changes to the
- * page directory is an idempotent operation - which is good since we could
- * theoretically crash on the same event repeatedly.
- *
- * @param fd              Page directory file descriptor
- * @param num_dirty_pages Size of dirty page cache
- * @param dirty_pages     Dirty page cache as array
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-_pma_sync_dirty_pages(int fd, uint8_t num_dirty_pages, PMADirtyPageEntry *dirty_pages) {
-  PMAPageStatus cont_status;
-  uint64_t      init_offset;
-  uint64_t      index;
-
-  for (uint8_t i = 0; i < num_dirty_pages; ++i) {
-    cont_status = (dirty_pages[i].status == FIRST) ? FOLLOW : dirty_pages[i].status;
-    init_offset = dirty_pages[i].offset;
-    index = dirty_pages[i].index;
-
-    if (_pma_write_page_status(fd, index, dirty_pages[i].status)) return -1;
-    // Offset of 0 is code for "leave it alone"
-    if (init_offset) {
-      if (_pma_write_page_offset(fd, index, init_offset)) return -1;
-    }
-
-    // The offset on disk doesn't actually matter for the continuation pages of
-    // a multi-page allocation, but it does matter for free page runs: just
-    // because two page runs are contiguous in memory, it doesn't mean they are
-    // contiguous on disk. An order of events like:
-    //
-    //    [multi-page allocation] -> [shared-page allocation] -> [multi-page allocation]
-    //
-    // could produce a situation where the two multi-page allocations are
-    // adjacent in memory, but separated by one page on disk (because of
-    // copy-on-write using a new dpage during the shared page allocation).
-    for (uint32_t j = 1; j < dirty_pages[i].num_pages; ++j) {
-      assert((dirty_pages[i].status == FIRST) || (cont_status == FREE));
-
-      if (_pma_write_page_status(fd, (index + j), cont_status)) return -1;
-      // Offset of 0 is code for "leave it alone"
-      if (init_offset) {
-        if (_pma_write_page_offset(fd, (index + j), (init_offset + (j * PMA_PAGE_SIZE)))) return -1;
-      }
-    }
-  }
-
-  return 0;
-}
-
-/**
- * Update page status of entry in page directory
- *
- * @param fd      Page directory file descriptor
- * @param index   Directory index of entry
- * @param status  Page status
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-_pma_write_page_status(int fd, uint64_t index, PMAPageStatus status) {
-  ssize_t bytes_out;
-
-  bytes_out = pwrite(
-      fd,
-      (const void *)&status,
-      sizeof(PMAPageStatus),
-      ((index * sizeof(PMAPageDirEntry)) + sizeof(uint64_t)));
-  if (bytes_out < 1) {
-    return -1;
-  }
-
-  return 0;
-}
-
-/**
- * Update page offset of entry in page directory
- *
- * @param fd      Page directory file descriptor
- * @param index   Directory index of entry
- * @param offset  Page offset on disk
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-_pma_write_page_offset(int fd, uint64_t index, uint64_t offset) {
-  ssize_t bytes_out;
-
-  bytes_out = pwrite(
-      fd,
-      (const void *)&offset,
-      sizeof(uint64_t),
-      (index * sizeof(PMAPageDirEntry)));
-  if (bytes_out < 1) {
-    return -1;
-  }
-
-  return 0;
-}
-
-/**
- * Add newly freed pages and page runs to the free page caches
- *
- * @param num_dirty_pages   Size of dirty page cache
- * @param dirty_pages       Dirty page cache as array
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-_pma_update_free_pages(uint8_t num_dirty_pages, PMADirtyPageEntry *dirty_pages) {
-  PMASinglePageCache *free_page;
-  PMAPageRunCache    *page_run;
-
-  // TODO: Pull out common code between here and pma_load
-  for (uint8_t i = 0; i < num_dirty_pages; ++i) {
-    if (dirty_pages[i].status != FREE) continue;
-
-    if (dirty_pages[i].num_pages > 1) {
-      page_run = malloc(sizeof *page_run);
-      if (page_run == NULL) return -1;
-
-      page_run->next = _pma_state->free_page_runs;
-      page_run->page = INDEX_TO_PTR(dirty_pages[i].index);
-      page_run->length = dirty_pages[i].num_pages;
-      _pma_state->free_page_runs = page_run;
-
-    } else {
-      free_page = malloc(sizeof *free_page);
-      if (free_page == NULL) return -1;
-
-      free_page->next = _pma_state->free_pages;
-      free_page->page = INDEX_TO_PTR(dirty_pages[i].index);
-      _pma_state->free_pages = free_page;
-    }
-  }
-
-  return 0;
-}
-
-/**
- * Allocate memory within a shared allocation page.
- *
- * @param size  Size in bytes to allocate (must be <= 1/4 page)
- *
- * @return  NULL    failure; errno set to error code
- * @return  void*   address of the newly allocated memory
- */
-void *
-_pma_malloc_bytes(size_t size)
-{
-  PMASharedPageHeader *shared_page;
-  uint16_t             i, slot_size;
-  uint8_t              bucket, byte, bit;
-
-  assert(size <= PMA_MAX_SHARED_ALLOC);
-
-  // Don't bother with anything less than the minimum allocation size
-  if (size < PMA_MIN_ALLOC_SIZE) {
-    size = PMA_MIN_ALLOC_SIZE;
-  }
-
-  // Find the right bucket
-  bucket = 1;
-  if (size) {
-    i = size - 1;
-    while (i >>= 1) bucket++;
-  }
-  slot_size = (1 << bucket);
-  bucket = bucket - PMA_MIN_ALLOC_SHIFT;
-
-  // Search for a shared page with open slots
-  shared_page = _pma_state->metadata->shared_pages[bucket];
-  while ((shared_page != NULL) && (shared_page->free == 0)) {
-    shared_page = shared_page->next;
-  }
-
-  // Make a new shared page if necessary
-  if (shared_page == NULL) {
-    if (_pma_malloc_shared_page(bucket)) {
-      return NULL;
-    }
-
-    shared_page = _pma_state->metadata->shared_pages[bucket];
-
-  } else {
-    if (_pma_copy_shared_page(shared_page)) {
-      return NULL;
-    }
-  }
-
-  assert(shared_page->free);
-
-  // Find first empty slot using bitmap (1 = empty, 0 = full)
-  byte = 0;
-  while (shared_page->bits[byte] == 0) {
-    assert(byte < PMA_BITMAP_SIZE);
-    ++byte;
-  }
-  i = shared_page->bits[byte];
-  bit = 0;
-  while (~i & 1U) {
-    i >>= 1;
-    ++bit;
-  }
-
-  // Mark slot full
-  shared_page->bits[byte] -= (1 << bit);
-  --(shared_page->free);
-
-  // Return slot
-  return (char *)shared_page +
-    (sizeof(PMASharedPageHeader)) +
-    (slot_size * ((PMA_BITMAP_BITS * byte) + bit));
-}
-
-/**
- * Allocate a new shared allocation page.
- *
- * @param bucket  Into which bucket in the shared allocation pages array the new
- *                page will go (which also corresponds to the size of the slots
- *                in the page)
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-_pma_malloc_shared_page(uint8_t bucket)
-{
-  PMASharedPageHeader  *shared_page;
-  uint8_t               shift;
-
-  assert(bucket <= PMA_SHARED_BUCKETS);
-
-  // Get a new writeable page
-  shared_page = (PMASharedPageHeader *)_pma_malloc_single_page(SHARED);
-  if (shared_page == NULL) {
-    return -1;
-  }
-
-  // Compute shift
-  shift = bucket + PMA_MIN_ALLOC_SHIFT;
-
-  // Initialize header for shared page
-  shared_page->dirty = 1;
-  shared_page->size = shift;
-  shared_page->free = ((PMA_PAGE_SIZE - sizeof(PMASharedPageHeader)) / (1 << shift));
-  for (uint8_t i = 0; i < PMA_BITMAP_SIZE; ++i) {
-    shared_page->bits[i] = PMA_EMPTY_BITMAP;
-  }
-
-  // Add new shared page to top of stack
-  shared_page->next = _pma_state->metadata->shared_pages[bucket];
-  _pma_state->metadata->shared_pages[bucket] = shared_page;
-
-  return 0;
-}
-
-/**
- * Allocate memory for a large object in one or more pages.
- *
- * @param size  Size in bytes to allocate (must be > 1/4 page)
- *
- * @return  NULL    failure; errno set to error code
- * @return  void*   address of the newly allocated memory
- */
-void *
-_pma_malloc_pages(size_t size)
-{
-  void     *address;
-  uint64_t  num_pages;
-
-  assert(size > PMA_MAX_SHARED_ALLOC);
-
-  // Round size up to nearest page boundary
-  size = PAGE_ROUND_UP(size);
-  num_pages = size >> PMA_PAGE_SHIFT;
-
-  if (num_pages == 1) {
-    address = _pma_malloc_single_page(FIRST);
-  } else {
-    address = _pma_malloc_multi_pages(num_pages);
-  }
-
-  return address;
-}
-
-/**
- * Allocate a single new page
- *
- * Reuse pages from the free page cache, if any are available. These pages are
- * used for shared allocations and for "large" allocations that are between 1/4
- * and 1 page in size: (0.25, 1].
- *
- * @param status  Page status after allocation (SHARED or FIRST)
- *
- * @return  NULL    failure; errno set to error code
- * @return  void*   address of the newly allocated memory
- */
-void *
-_pma_malloc_single_page(PMAPageStatus status) {
-  void               *address;
-  PMASinglePageCache *free_page = _pma_state->free_pages;
-
-  // Get an existing free page from cache, if available
-  if (free_page != NULL) {
-    address = free_page->page;
-    _pma_state->free_pages = free_page->next;
-    free(free_page);
-
-    // Make the page writeable
-    mprotect(address, PMA_PAGE_SIZE, (PROT_READ | PROT_WRITE));
-
-    // Add page to dirty list
-    _pma_mark_page_dirty(PTR_TO_INDEX(address), 0, status, 1);
-  } else {
-    // Otherwise, allocate a new page
-    address = _pma_get_new_page(status);
-  }
-
-  assert((((uint64_t)address) % PMA_PAGE_SIZE) == 0);
-
-  return address;
-}
-
-/**
- * Allocate a contiguous block of multiple pages
- *
- * Reuse pages from the free page run cache, if any are available.
- *
- * @param num_pages   # pages to allocate
- *
- * @return  NULL    failure; errno set to error code
- * @return  void*   address of the newly allocated memory
- */
-void *
-_pma_malloc_multi_pages(uint64_t num_pages) {
-  void *address;
-
-  address = _pma_get_cached_pages(num_pages);
-  if (!address) {
-    address = _pma_get_new_pages(num_pages);
-  }
-
-  return address;
-}
-
-/**
- * Pull existing free pages from the free page run cache
- *
- * Does a pass over the entire cache to see if there is an exactly-sized page
- * run. If so, it's used immediately. Otherwise, keeps track of the smallest
- * page run that can be split to accommodate the requested allocation.
- *
- * @param num_pages   # pages to allocate
- *
- * @return  void*   address of the newly allocated memory (NULL if none available)
- */
-void *
-_pma_get_cached_pages(uint64_t num_pages) {
-  PMAPageRunCache **pre_valid_ptr  = NULL;
-  PMAPageRunCache **prev_node_ptr  = &(_pma_state->free_page_runs);
-  PMAPageRunCache  *page_run_cache = _pma_state->free_page_runs;
-  PMAPageRunCache  *valid_page_run = NULL;
-  void             *address        = NULL;
-
-  // Do a pass looking for an exactly-sized run. While doing this, also record the smallest run still big enough to fit
-  // our data.
-  while (page_run_cache != NULL) {
-    uint64_t run_length = page_run_cache->length;
-
-    if (run_length == num_pages) {
-      valid_page_run = page_run_cache;
-      pre_valid_ptr = prev_node_ptr;
-      break;
-
-    } else if (run_length > num_pages ) {
-      if ((valid_page_run == NULL) || (valid_page_run->length > run_length)) {
-        valid_page_run = page_run_cache;
-        pre_valid_ptr = prev_node_ptr;
-      }
-    }
-
-    prev_node_ptr  = &(page_run_cache->next);
-    page_run_cache = page_run_cache->next;
-  }
-
-  //  If run found...
-  if (valid_page_run != NULL) {
-    // Use it
-    address = valid_page_run->page;
-
-    // If run larger than necessary by two pages...
-    if (valid_page_run->length > (num_pages + 1)) {
-      // Reduce it
-      valid_page_run->page = (uint8_t*)valid_page_run->page + (num_pages * PMA_PAGE_SIZE);
-      valid_page_run->length -= num_pages;
-
-    // Otherwise...
-    } else {
-      // Update cache pointers: we're going to use the whole run or we're going
-      // to move the remaining page to the single-page cache. Either way, we're
-      // going to free the run object.
-      *pre_valid_ptr = valid_page_run->next;
-
-      // If there's a page left...
-      if (valid_page_run->length == (num_pages + 1)) {
-        PMASinglePageCache *trailing_page = (PMASinglePageCache *)malloc(sizeof(PMASinglePageCache));
-
-        // Add it to the single-page cache
-        trailing_page->next = _pma_state->free_pages;
-        trailing_page->page = ((char *)address + (num_pages * PMA_PAGE_SIZE));
-        _pma_state->free_pages = trailing_page;
-      }
-
-      free(valid_page_run);
-    }
-
-    // Make pages writeable
-    mprotect(address, (num_pages * PMA_PAGE_SIZE), (PROT_READ | PROT_WRITE));
-
-    // Add pages to dirty list
-    _pma_mark_page_dirty(PTR_TO_INDEX(address), 0, FIRST, num_pages);
-  }
-
-  return address;
-}
-
-/**
- * Allocate a single new page
- *
- * Allocates a new page in virtual memory. May or may not use a new dpage.
- *
- * @param status  Page status after allocation (SHARED or FIRST)
- *
- * @return  NULL    failure; errno set to error code
- * @return  void*   address of the newly allocated memory
- */
-void *
-_pma_get_new_page(PMAPageStatus status) {
-  void     *address;
-  uint64_t  offset;
-
-  // Get a dpage to which to map the address
-  offset = _pma_get_single_dpage();
-  if (!offset) {
-    return NULL;
-  }
-
-  // Try to map next open memory address to dpage
-  address = mmap(
-      _pma_state->metadata->arena_end,
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      PMA_MMAP_FLAGS,
-      _pma_state->snapshot_fd,
-      offset);
-  if (address == MAP_FAILED) {
-    address = _pma_state->metadata->arena_end;
-    WARNING("mmap failed");
-    abort();
-  }
-
-  assert(address == _pma_state->metadata->arena_end);
-
-  // Record PMA expansion
-  _pma_state->metadata->arena_end = (uint8_t*)_pma_state->metadata->arena_end + PMA_PAGE_SIZE;
-
-  // Add page to dirty list
-  _pma_mark_page_dirty(PTR_TO_INDEX(address), offset, status, 1);
-
-  return address;
-}
-
-/**
- * Allocate multiple new pages
- *
- * Allocate 2 or more pages in virtual memory. May or may not use new dpages.
- *
- * @param num_pages   # pages to allocate
- *
- * @return  NULL    failure; errno set to error code
- * @return  void*   address of the newly allocated memory
- */
-void *
-_pma_get_new_pages(uint64_t num_pages) {
-  void     *address;
-  uint64_t  bytes = (num_pages * PMA_PAGE_SIZE);
-  uint64_t  offset = _pma_state->metadata->next_offset;
-  uint64_t  size = _pma_state->metadata->snapshot_size;
-  uint64_t  new_size = (offset + bytes);
-
-  // Get new dpages. Extend snapshot backing file first, if necessary.
-  if (new_size >= size) {
-    // Multi-page allocations maybe larger than the snapshot resize increment
-    uint32_t multiplier = ((new_size - size) / PMA_SNAPSHOT_RESIZE_INC) + 1;
-
-    // Fail if snapshot file couldn't be extended
-    if (_pma_extend_snapshot_file(multiplier)) return NULL;
-  }
-
-  // Try to map dpages to address
-  address = mmap(
-      _pma_state->metadata->arena_end,
-      bytes,
-      PROT_READ | PROT_WRITE,
-      PMA_MMAP_FLAGS,
-      _pma_state->snapshot_fd,
-      offset);
-  if (address == MAP_FAILED) {
-    address = _pma_state->metadata->arena_end;
-    WARNING("mmap failed");
-    abort();
-  }
-
-  assert(address == _pma_state->metadata->arena_end);
-
-  // Update offset of next open dpage
-  _pma_state->metadata->next_offset += bytes;
-  _pma_state->metadata->arena_end = (uint8_t*)_pma_state->metadata->arena_end + bytes;
-
-  // Add allocated pages to dirty list
-  _pma_mark_page_dirty(PTR_TO_INDEX(address), offset, FIRST, num_pages);
-
-  return address;
-}
-
-/**
- * Deallocate one or more pages of allocated memory
- *
- * @param address   Address of block to deallocated
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-_pma_free_pages(void *address) {
-  uint32_t index = PTR_TO_INDEX(address);
-  uint32_t num_pages = 0;
-
-  if ((uint64_t)address & PMA_PAGE_MASK) {
-    WARNING("address does not point to the root of a page");
-    errno = EINVAL;
-    return -1;
-  }
-
-  assert(_pma_state->page_directory.entries[index].status == FIRST);
-
-  // Count number of pages in allocation
-  do {
-    ++num_pages;
-  } while (_pma_state->page_directory.entries[index + num_pages].status == FOLLOW);
-
-  // Mark pages dirty
-  _pma_mark_page_dirty(index, 0, FREE, num_pages);
-
-  return 0;
-}
-
-/**
- * Deallocate a block of memory in a shared allocation page.
- *
- * @param address   Address of block to deallocated
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-_pma_free_bytes(void *address) {
-  PMASharedPageHeader *header = (PMASharedPageHeader *)((uint64_t)address & (~PMA_PAGE_MASK));
-  uint8_t              slot   = ((((uint64_t)address & PMA_PAGE_MASK) - sizeof(PMASharedPageHeader)) / (1 << header->size));
-  uint8_t              byte   = slot / PMA_BITMAP_BITS;
-  uint8_t              bit    = slot % PMA_BITMAP_BITS;
-
-  // Copy-on-write
-  _pma_copy_shared_page(header);
-
-  if (header->bits[byte] & (1 << bit)) {
-    WARNING("bucketized address already free");
-    errno = EINVAL;
-    return -1;
-  }
-
-  header->bits[byte] += (1 << bit);
-  ++header->free;
-
-  return 0;
-}
-
-/**
- * Copy a shared allocation page
- *
- * @param address   Virtual memory address of shared allocation page
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-_pma_copy_shared_page(void *address) {
-  PMASharedPageHeader *shared_page;
-  uint64_t             offset;
-
-  // Check if page has already been copied
-  shared_page = (PMASharedPageHeader*)address;
-  if (shared_page->dirty) {
-    return 0;
-  }
-
-  offset = _pma_get_single_dpage();
-  if (!offset) {
-    return -1;
-  }
-
-  // Make sure dpage cache is writeable
-  if (!_pma_state->metadata->dpage_cache->dirty) {
-    if (_pma_copy_dpage_cache()) {
-      WARNING("dpage cache copy failed");
-      abort();
-    }
-  }
-
-  // Copy page
-  _pma_copy_page(address, offset, SHARED, _pma_state->snapshot_fd);
-
-  // Mark page dirty so it isn't copied again
-  shared_page->dirty = 1;
-
-  return 0;
-}
-
-/**
- * Allocate a new dpage (disk page)
- *
- * Reuse a page from the free dpage cache, if any are available.
- *
- * @return  0         failure; errno set to error code
- * @return  uint64_t  offset of new page in backing file
- */
-uint64_t
-_pma_get_single_dpage(void) {
-  uint64_t offset;
-
-  // Get a cached dpage, if one is available
-  offset = _pma_get_cached_dpage();
-  if (!offset) {
-    // Otherwise, get a new dpage from disk
-    //
-    // XX returns 0 on failure, should assert
-    offset = _pma_get_disk_dpage();
-  }
-
-  assert((offset % PMA_PAGE_SIZE) == 0);
-
-  return offset;
-}
-
-/**
- * Pull a free dpage from the dpage cache
- *
- * @return  offset of new page in backing file (0 if cache empty)
- */
-uint64_t
-_pma_get_cached_dpage(void) {
-  uint64_t offset;
-  uint16_t dirty  = _pma_state->metadata->dpage_cache->dirty;
-  uint16_t size   = _pma_state->metadata->dpage_cache->size;
-  uint16_t head;
-
-  // If the cache is empty, or there's only one page in the cache and the cache
-  // hasn't been touched yet, then exit early. If the cache hasn't been touched
-  // yet, we'll need to copy-on-write the cache as well, so if there's only one
-  // page, don't even bother.
-  if ((size == 0) || ((size == 1) && !dirty)) {
-    return 0;
-  }
-
-  // Special copy-on-write for dpage cache
-  if (!dirty) {
-    if (_pma_copy_dpage_cache()) {
-      void *address = _pma_state->metadata->dpage_cache;
-      WARNING(strerror(errno));
-      abort();
-    }
-  }
-
-  // TODO: macros for dealing with cache?
-  // Pop page off queue; head can't be assigned earlier as _pma_copy_dpage_cache
-  // may also try to pop a page off of the queue
-  head   = _pma_state->metadata->dpage_cache->head;
-  offset = _pma_state->metadata->dpage_cache->queue[head];
-  assert(offset != 0);
-  _pma_state->metadata->dpage_cache->size -= 1;
-  _pma_state->metadata->dpage_cache->head = ((head + 1) % PMA_DPAGE_CACHE_SIZE);
-
-  return offset;
-}
-
-/**
- * Copy the free dpage cache
- *
- * Free dpage cache needs to be copied using copy-on-write semantics when pages
- * are added or removed.
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-_pma_copy_dpage_cache(void) {
-  void     *address;
-  uint64_t  offset;
-  uint16_t  dirty  = _pma_state->metadata->dpage_cache->dirty;
-  uint16_t  size   = _pma_state->metadata->dpage_cache->size;
-  uint16_t  head   = _pma_state->metadata->dpage_cache->head;
-
-  assert(!dirty);
-
-  address = _pma_state->metadata->dpage_cache;
-
-  // If pages available in cache...
-  if (size) {
-    // Use a page from the cache and record that it was used afterwards
-    offset = _pma_state->metadata->dpage_cache->queue[head];
-    assert(offset != 0);
-
-    _pma_copy_page(address, offset, FIRST, _pma_state->snapshot_fd);
-
-    _pma_state->metadata->dpage_cache->size -= 1;
-    _pma_state->metadata->dpage_cache->head = ((head + 1) % PMA_DPAGE_CACHE_SIZE);
-
-  } else {
-    // Otherwise, get a brand new page from disk
-    offset = _pma_get_disk_dpage();
-    if (!offset) return -1;
-
-    _pma_copy_page(address, offset, FIRST, _pma_state->snapshot_fd);
-  }
-
-  // Mark dpage cache dirty (aka writeable)
-  _pma_state->metadata->dpage_cache->dirty = 1;
-
-  return 0;
-}
-
-/**
- * Get a new free dpage on disk
- *
- * May require extending the snapshot backing file on disk.
- *
- * @return  offset of new page in backing file (0 if failure)
- */
-uint64_t
-_pma_get_disk_dpage(void) {
-  uint64_t offset = _pma_state->metadata->next_offset;
-  uint64_t size = _pma_state->metadata->snapshot_size;
-
-  // Get a new dpage. Extend snapshot backing file first, if necessary.
-  if (offset == size) {
-    // Fail if snapshot file couldn't be extended
-    if (_pma_extend_snapshot_file(1)) return 0;
-  }
-
-  // Update offset of next open dpage
-  _pma_state->metadata->next_offset += PMA_PAGE_SIZE;
-
-  return offset;
-}
-
-/**
- * Copy an existing page to a new dpage
- *
- * Core copy-on-write implementation.
- *
- * @param address   Virtual memory address of existing page
- * @param offset    Offset of dpage in backing file to which to copy
- * @param status    Page status after copy (SHARED or FIRST)
- * @param fd        PMA file descriptor
- */
-void
-_pma_copy_page(void *address, uint64_t offset, PMAPageStatus status, int fd) {
-  void     *new_address;
-  ssize_t   bytes_out;
-  uint64_t  index = PTR_TO_INDEX(address);
-  uint16_t  tail = _pma_state->metadata->dpage_cache->tail;
-
-  bytes_out = pwrite(fd, address, PMA_PAGE_SIZE, offset);
-  if (bytes_out != PMA_PAGE_SIZE) {
-    WARNING(strerror(errno));
-    abort();
-  }
-
-  new_address = mmap(
-      address,
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      offset);
-  if (new_address == MAP_FAILED) {
-    WARNING(strerror(errno));
-    abort();
-  }
-
-  assert(new_address == address);
-
-  // Add previous dpage to cache
-  // Note:  the dpage cache should always be writeable here, either because the
-  //        dpage cache is the page we just copied, or because it was made
-  //        writeable in advance by _pma_copy_shared_page
-  assert(_pma_state->page_directory.entries[index].offset != 0);
-  _pma_state->metadata->dpage_cache->queue[tail] = _pma_state->page_directory.entries[index].offset;
-  _pma_state->metadata->dpage_cache->tail = ((tail + 1) % PMA_DPAGE_CACHE_SIZE);
-
-  // Add page to dirty page list
-  _pma_mark_page_dirty(index, offset, status, 1);
-}
-
-/**
- * Add entry to the dirty page store
- *
- * @param index       Index of page in page directory
- * @param offset      Offset of page in PMA file
- * @param status      Status of pages
- * @param num_pages   Number of pages represented by this entry
- */
-void
-_pma_mark_page_dirty(uint64_t index, uint64_t offset, PMAPageStatus status, uint32_t num_pages) {
-  PMADirtyPageEntry *dirty_page = _pma_state->metadata->dirty_pages;
-
-  dirty_page += _pma_state->metadata->num_dirty_pages++;
-
-  assert(_pma_state->metadata->num_dirty_pages <= PMA_DIRTY_PAGE_LIMIT);
-
-  dirty_page->index     = index;
-  dirty_page->offset    = offset;
-  dirty_page->status    = status;
-  dirty_page->num_pages = num_pages;
-}
-
-/**
- * Extend the size of the PMA backing file on disk
- *
- * Note: while it's possible that a multiplier larger than 2^32 could be valid
- *       (i.e. using ZFS is the file system, so the backing file can be up to
- *       16 EiB in size, and the PMA backing file extension increment is less
- *       than 4 GiB), it almost certainly would never be encountered (the user
- *       needs to allocate a 2 EiB file to the loom?).
- * 
- * @param multiplier  New size = old size * multiplier
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-_pma_extend_snapshot_file(uint32_t multiplier) {
-  off_t     err;
-  ssize_t   bytes;
-  uint64_t  new_snapshot_size;
-
-  // Reject invalid multipliers
-  if (!multiplier || (multiplier > PMA_MAX_RESIZE_FACTOR)) return -1;
-
-  // Update size in metadata
-  new_snapshot_size = _pma_state->metadata->snapshot_size + (multiplier * PMA_SNAPSHOT_RESIZE_INC);
-
-  // Extend snapshot file
-  err = lseek(_pma_state->snapshot_fd, (new_snapshot_size - 1), SEEK_SET);
-  if (err == -1) return -1;
-
-  bytes = write(_pma_state->snapshot_fd, "", 1);
-  if (bytes < 1) return -1;
-
-  _pma_state->metadata->snapshot_size = new_snapshot_size;
-  return 0;
-}
-
-/**
- * Log warning message to console.
- *
- * @param s   Error message
- * @param p   Address which caused error
- * @param l   Line number
- */
-void
-_pma_warning(const char *s, void *p, int l) {
-   fprintf(stderr, "*** %d: %p - %s\n", l, p, s);
-}
-
-/**
- * Helper function to deallocate PMA state on shutdown.
- */
-void
-_pma_state_free(void)
-{
-  if (_pma_state) {
-    if (_pma_state->metadata) free(_pma_state->metadata);
-    free(_pma_state);
-    _pma_state = NULL;
-  }
-}
-
-/**
- * Helper function to allocate memory for PMA state.
- * 
- * @return  1   allocated PMA state already exists
- * @return  0   memory for new PMA state successfully allocated
- */
-int
-_pma_state_malloc(void)
-{
-  if (_pma_state != NULL) return 1;
-  PMAState *ret = calloc(1, sizeof *ret);
-  ret->metadata = calloc(1, sizeof *ret->metadata);
-  _pma_state = ret;
-  return 0;
-}
diff --git a/rust/ares/src/pma/malloc.h b/rust/ares/src/pma/malloc.h
deleted file mode 100644
index bfb7d82..0000000
--- a/rust/ares/src/pma/malloc.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/**
- * Persistent Memory Arena for the New Mars Nock virtualization engine.
- */
-
-#pragma once
-
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-
-//==============================================================================
-// PROTOTYPES
-//==============================================================================
-
-/**
- * Struct returned from pma_load()
- */
-typedef struct PMARootState PMARootState;
-struct PMARootState {
-  uint64_t  epoch;              // Epoch ID of the most recently processed event
-  uint64_t  event;              // ID of the most recently processed event
-  uint64_t  root;               // Root after most recent event
-};
-
-/**
- * Initialize a brand new PMA environment and event snapshot
- *
- * @param path  File directory in which to create backing files for snapshot and
- *              page directory
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-pma_init(const char *path);
-
-/**
- * Load an existing PMA environment and event snapshot
- *
- * @param path  File directory from which to load the backing files for the
- *              snapshot and page directory
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-PMARootState
-pma_load(const char *path);
-
-/**
- * Safely unload the PMA after syncing changes to PMA state
- *
- * @param epoch Epoch of latest event successfully applied to state snapshot
- * @param event Event number of latest event successfully applied to state
- *              snapshot
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-pma_close(uint64_t epoch, uint64_t event, uint64_t root);
-
-/**
- * Allocate a new block of memory in the PMA
- *
- * @param size  Size in bytes to allocate
- *
- * @return  NULL    failure; errno set to error code
- * @return  void*   address of the newly allocated memory
- */
-void *
-pma_malloc(size_t size);
-
-/**
- * Deallocate an existing block of memory in the PMA
- *
- * @param address   Address of block to deallocated
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-pma_free(void *address);
-
-/**
- * Sync changes to PMA state
- *
- * @param epoch Epoch of latest event successfully applied to state snapshot
- * @param event Event number of latest event successfully applied to state
- *              snapshot
- *
- * @return  0   success
- * @return  -1  failure; errno set to error code
- */
-int
-pma_sync(uint64_t epoch, uint64_t event, uint64_t root);
-
-/**
- * True if the address is in the PMA
- */
-bool
-pma_in_arena(void *address);
-
-/*
-  bp(X) where X is false will raise a SIGTRAP. If the process is being run
-  inside a debugger, this can be caught and ignored. It's equivalent to a
-  breakpoint. If run without a debugger, it will dump core, like an assert
-*/
-#if defined(__i386__) || defined(__x86_64__)
-#define bp(x) do { if(!(x)) __asm__ volatile("int $3"); } while (0)
-#elif defined(__thumb__)
-#define bp(x) do { if(!(x)) __asm__ volatile(".inst 0xde01"); } while (0)
-#elif defined(__aarch64__)
-#define bp(x) do { if(!(x)) __asm__ volatile(".inst 0xd4200000"); } while (0)
-#elif defined(__arm__)
-#define bp(x) do { if(!(x)) __asm__ volatile(".inst 0xe7f001f0"); } while (0)
-#else
-STATIC_ASSERT(0, "debugger break instruction unimplemented");
-#endif
diff --git a/rust/ares/src/pma/test/internals.h b/rust/ares/src/pma/test/internals.h
deleted file mode 100644
index cc0e343..0000000
--- a/rust/ares/src/pma/test/internals.h
+++ /dev/null
@@ -1,198 +0,0 @@
-#include <assert.h>
-#include <dirent.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-
-//==============================================================================
-// MACROS
-//==============================================================================
-
-#define PMA_PAGE_SHIFT        12U
-#define PMA_MIN_ALLOC_SHIFT   4U
-#define PMA_BITMAP_BITS       (8 * sizeof(uint8_t))
-#define PMA_SNAPSHOT_RESIZE_INC 0x100000000
-#define PMA_PAGE_SIZE         (1UL << PMA_PAGE_SHIFT)
-#define PMA_PAGE_MASK         (PMA_PAGE_SIZE - 1)
-#define PMA_MIN_ALLOC_SIZE    (1U << PMA_MIN_ALLOC_SHIFT)
-#define PMA_MAX_SHARED_SHIFT  (PMA_PAGE_SHIFT - 2U)
-#define PMA_MAX_SHARED_ALLOC  (1UL << PMA_MAX_SHARED_SHIFT)
-#define PMA_SHARED_BUCKETS    (PMA_MAX_SHARED_SHIFT - PMA_MIN_ALLOC_SHIFT + 1)
-#define PAGE_ROUND_DOWN(foo)  (foo & (~PMA_PAGE_MASK))
-#define PAGE_ROUND_UP(foo)    ((foo + PMA_PAGE_MASK) & (~PMA_PAGE_MASK))
-#define PTR_TO_INDEX(foo)     ((((uint64_t)(foo)) - ((uint64_t)_pma_state->metadata->arena_start)) >> PMA_PAGE_SHIFT)
-#define INDEX_TO_PTR(foo)     (void *)((char *)_pma_state->metadata->arena_start + ((foo) * PMA_PAGE_SIZE))
-#ifdef __linux__
-  #define PMA_MMAP_FLAGS        (MAP_SHARED | MAP_FIXED_NOREPLACE)
-#else
-  #define PMA_MMAP_FLAGS        (MAP_SHARED | MAP_FIXED)
-#endif
-#define PMA_MAGIC_CODE        0xBADDECAFC0FFEE00  // i.e. all decaf coffee
-#define PMA_DATA_VERSION      1
-#define PMA_EMPTY_BITMAP      0xFF
-#define PMA_BITMAP_SIZE       32
-#define PMA_DPAGE_CACHE_SIZE  ((PMA_PAGE_SIZE - sizeof(PMADPageCache)) / sizeof(uint64_t))
-#define PMA_DIRTY_PAGE_LIMIT  164
-#define PMA_SNAPSHOT_FILENAME "snap.bin"
-#define PMA_PAGE_DIR_FILENAME "page.bin"
-#define PMA_DEFAULT_DIR_NAME  ".bin"
-#define PMA_NEW_FILE_FLAGS    (O_RDWR | O_CREAT)
-#define PMA_LOAD_FILE_FLAGS   (O_RDWR
-#define PMA_DIR_PERMISSIONS   (S_IRWXU | S_IRWXG | S_IRWXO)
-#define PMA_FILE_PERMISSIONS  (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP)
-#define PMA_INIT_SNAP_SIZE    0x40000000
-#define PMA_INIT_DIR_SIZE     0x400000
-#define PMA_MAXIMUM_DIR_SIZE  0x5500000000
-#ifdef __linux__
-  #define PMA_SNAPSHOT_ADDR     0x10000
-#else
-  #define PMA_SNAPSHOT_ADDR     0x28000000000
-#endif
-#define PMA_MAX_DISK_FILE_SIZE  0x100000000000
-#define PMA_MAX_RESIZE_FACTOR   (PMA_MAX_DISK_FILE_SIZE / PMA_SNAPSHOT_RESIZE_INC)
-
-
-//==============================================================================
-// TYPES
-//==============================================================================
-
-enum PMAPageStatus {
-  UNALLOCATED,
-  FREE,
-  SHARED,
-  FIRST,
-  FOLLOW
-};
-typedef enum PMAPageStatus PMAPageStatus;
-
-typedef struct PMAPageDirEntry PMAPageDirEntry;
-struct PMAPageDirEntry {
-  uint64_t      offset;
-  PMAPageStatus status;
-};
-
-typedef struct PMAPageDir PMAPageDir;
-struct PMAPageDir {
-  uint64_t         size;
-  uint64_t         next_index;
-  PMAPageDirEntry *entries;
-};
-
-typedef struct PMASharedPageHeader PMASharedPageHeader;
-struct PMASharedPageHeader {
-  struct PMASharedPageHeader *next;
-  uint8_t                     dirty;
-  uint8_t                     size;
-  uint8_t                     free;
-  uint8_t                     bits[PMA_BITMAP_SIZE];
-};
-
-typedef struct PMADirtyPageEntry PMADirtyPageEntry;
-struct PMADirtyPageEntry {
-  uint64_t      index;
-  uint64_t      offset;
-  uint32_t      num_pages;
-  PMAPageStatus status;
-};
-
-typedef struct PMASinglePageCache PMASinglePageCache;
-struct PMASinglePageCache {
-  PMASinglePageCache *next;
-  void               *page;
-};
-
-typedef struct PMAPageRunCache PMAPageRunCache;
-struct PMAPageRunCache {
-  PMAPageRunCache *next;
-  void            *page;
-  uint64_t         length;
-};
-
-typedef struct PMADPageCache PMADPageCache;
-struct PMADPageCache {
-  uint8_t   dirty;
-  uint16_t  size;
-  uint16_t  head;
-  uint16_t  tail;
-  uint64_t  queue[];
-};
-
-typedef struct PMAMetadata PMAMetadata;
-struct PMAMetadata {
-  uint64_t             magic_code;
-  uint32_t             checksum;
-  uint32_t             version;
-  uint64_t             epoch;
-  uint64_t             event;
-  uint64_t             root;
-  void                *arena_start;
-  void                *arena_end;
-  PMASharedPageHeader *shared_pages[PMA_SHARED_BUCKETS];
-  PMADPageCache       *dpage_cache;
-  uint64_t             snapshot_size;
-  uint64_t             next_offset;
-  uint8_t              num_dirty_pages;
-  uint64_t             padding[2];
-  PMADirtyPageEntry    dirty_pages[PMA_DIRTY_PAGE_LIMIT];
-};
-static_assert(sizeof(PMAMetadata) == PMA_PAGE_SIZE, "PMAMetadata must be a page in length");
-
-typedef struct PMAState PMAState;
-struct PMAState {
-  PMAMetadata        *metadata;
-  uint64_t            meta_page_offset;
-  PMAPageDir          page_directory;
-  int                 snapshot_fd;
-  int                 page_dir_fd;
-  PMASinglePageCache *free_pages;
-  PMAPageRunCache    *free_page_runs;
-};
-
-
-//==============================================================================
-// GLOBALS
-//==============================================================================
-
-extern PMAState *_pma_state;
-
-
-//==============================================================================
-// FUNCTIONS
-//==============================================================================
-
-int       _pma_verify_checksum(PMAMetadata *meta_page);
-int       _pma_sync_dirty_pages(int fd, uint8_t num_dirty_pages, PMADirtyPageEntry *dirty_pages);
-int       _pma_write_page_status(int fd, uint64_t index, PMAPageStatus status);
-int       _pma_write_page_offset(int fd, uint64_t index, uint64_t offset);
-int       _pma_update_free_pages(uint8_t num_dirty_pages, PMADirtyPageEntry *dirty_pages);
-void     *_pma_malloc_bytes(size_t size);
-int       _pma_malloc_shared_page(uint8_t bucket);
-void     *_pma_malloc_pages(size_t size);
-void     *_pma_malloc_single_page(PMAPageStatus status);
-void     *_pma_malloc_multi_pages(uint64_t num_pages);
-void     *_pma_get_cached_pages(uint64_t num_pages);
-void     *_pma_get_new_page(PMAPageStatus status);
-void     *_pma_get_new_pages(uint64_t num_pages);
-int       _pma_free_pages(void *address);
-int       _pma_free_bytes(void *address);
-int       _pma_copy_shared_page(void *address);
-uint64_t  _pma_get_single_dpage(void);
-uint64_t  _pma_get_cached_dpage(void);
-int       _pma_copy_dpage_cache(void);
-uint64_t  _pma_get_disk_dpage(void);
-void      _pma_copy_page(void *address, uint64_t offset, PMAPageStatus status, int fd);
-void      _pma_mark_page_dirty(uint64_t index, uint64_t offset, PMAPageStatus status, uint32_t num_pages);
-int       _pma_extend_snapshot_file(uint32_t multiplier);
-void      _pma_warning(const char *p, void *a, int l);
-void      _pma_state_free(void);
-int       _pma_state_malloc(void);
diff --git a/rust/ares/src/pma/test/malloc.c b/rust/ares/src/pma/test/malloc.c
deleted file mode 100644
index 158b5f7..0000000
--- a/rust/ares/src/pma/test/malloc.c
+++ /dev/null
@@ -1,1511 +0,0 @@
-#include <assert.h>
-#include <errno.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../malloc.h"
-#include "../includes/checksum.h"
-#include "internals.h"
-
-//==============================================================================
-// CONFIGURABLE MACROS
-//==============================================================================
-
-#define TEST_PMA_SNAPSHOT_TEMPLATE  "test-snapshot-XXXXXX.bin"
-#define TEST_PMA_SNAPSHOT_SUFFIX    4
-
-
-//==============================================================================
-// TYPES
-//==============================================================================
-
-typedef struct TestState TestState;
-struct TestState {
-  char *dir; // Directory in which to generate test files
-};
-
-
-//==============================================================================
-// GLOBALS
-//==============================================================================
-
-TestState *_test_state = NULL;
-
-
-//==============================================================================
-// FORWARD DECLARATIONS
-//==============================================================================
-
-void test_pma_state_malloc_and_free(void);
-void test_pma_extend_snapshot_file(void);
-void test_pma_mark_page_dirty(void);
-void test_pma_copy_page(void);
-void test_pma_get_disk_dpage(void);
-void test_pma_copy_dpage_cache(void);
-void test_pma_get_cached_dpage(void);
-void test_pma_copy_shared_page(void);
-void test_pma_free_bytes(void);
-void test_pma_free_pages(void);
-void test_pma_get_new_pages(void);
-void test_pma_get_new_page(void);
-void test_pma_get_cached_pages(void);
-void test_pma_malloc_single_page(void);
-void test_pma_malloc_shared_page(void);
-void test_pma_update_free_pages(void);
-void test_pma_verify_checksum(void);
-void test_pma_in_arena(void);
-void test_pma_init(void);
-void test_pma_sync(void);
-void test_pma_load(void);
-
-
-//==============================================================================
-// MAIN & HELPERS
-//==============================================================================
-
-void
-test_pma(char* test_dir) {
-  // Set up test state
-  _test_state = malloc(sizeof(TestState));
-  _test_state->dir = test_dir;
-
-  // Run tests
-  test_pma_state_malloc_and_free();
-  test_pma_extend_snapshot_file();
-  test_pma_mark_page_dirty();
-  test_pma_copy_page();
-  test_pma_get_disk_dpage();
-  test_pma_copy_dpage_cache();
-  test_pma_get_cached_dpage();
-  test_pma_copy_shared_page();
-  test_pma_free_bytes();
-  test_pma_free_pages();
-  test_pma_get_new_pages();
-  test_pma_get_new_page();
-  test_pma_get_cached_pages();
-  test_pma_malloc_single_page();
-  test_pma_malloc_shared_page();
-  test_pma_update_free_pages();
-  test_pma_verify_checksum();
-  test_pma_in_arena();
-  test_pma_init();
-  test_pma_sync();
-  test_pma_load();
-
-  // Clean up
-  free(_test_state);
-
-  // Done
-  printf("Unit tests PASSED\n");
-}
-
-int
-_generate_test_snapshot(char **filename) {
-  size_t  dir_len;
-  size_t  file_len;
-  int     fd;
-
-  dir_len = strlen(_test_state->dir);
-  file_len = strlen(TEST_PMA_SNAPSHOT_TEMPLATE);
-
-  *filename = malloc(dir_len + file_len + 1);
-  strcpy(*filename, _test_state->dir);
-  strcpy((*filename + dir_len), TEST_PMA_SNAPSHOT_TEMPLATE);
-  assert(*filename);
-  fd = mkstemps(*filename, TEST_PMA_SNAPSHOT_SUFFIX);
-  assert(fd > 0);
-
-  return fd;
-}
-
-void
-_clean_up_test_snapshot(int fd, char *filename) {
-  close(fd);
-  unlink(filename);
-  free(filename);
-}
-
-
-//==============================================================================
-// TESTS
-//==============================================================================
-
-void
-test_pma_state_malloc_and_free(void) {
-  int res = -1;
-
-  // pre state malloc
-  assert(!_pma_state);
-
-  // state malloc
-  res = _pma_state_malloc();
-  assert(!res);
-  assert(_pma_state);
-  assert(_pma_state->metadata);
-
-  // try state malloc again
-  res = _pma_state_malloc();
-  assert(res == 1);
-
-  // state free
-  _pma_state_free();
-  assert(!_pma_state);
-
-  // try state free again
-  _pma_state_free();
-
-  // free metadata separately
-  res = _pma_state_malloc();
-  free(_pma_state->metadata);
-  _pma_state->metadata = NULL;
-  _pma_state_free();
-}
-
-void
-test_pma_extend_snapshot_file(void) {
-  struct stat   statbuf;
-  uint64_t      multiplier;
-  int           fd;
-  int           ret;
-  char         *filename = NULL;
-
-  // Test 1: 0 multiplier
-  ret = _pma_extend_snapshot_file(0);
-  assert(ret == -1);
-
-  // Test 2: massive multiplier
-  ret = _pma_extend_snapshot_file(0xffffffff);
-  assert(ret == -1);
-
-  // Set up state & locals
-  _pma_state_malloc();
-  _pma_state->metadata->snapshot_size = 0;
-  multiplier = 10;
-
-  // Test 3: lseek fails; snapshot file doesn't exist
-  ret = _pma_extend_snapshot_file(multiplier);
-  assert(ret == -1);
-  assert(errno == ESPIPE);
-
-  // Set up fd
-  errno = 0;
-  fd = _generate_test_snapshot(&filename);
-  close(fd);
-  fd = open(filename, O_RDONLY);
-  assert(fd > 0);
-  _pma_state->snapshot_fd = fd;
-
-  // Test 4: write fails; snapshot file read only
-  errno = 0;
-  ret = _pma_extend_snapshot_file(multiplier);
-  assert(ret == -1);
-  assert(errno == EBADF);
-  close(fd);
-
-  // Reset fd
-  fd = open(filename, O_RDWR);
-  assert(fd > 0);
-  _pma_state->snapshot_fd = fd;
-
-  // Test 5: Successful
-  errno = 0;
-  ret = _pma_extend_snapshot_file(multiplier);
-  assert(ret == 0);
-  assert(errno == 0);
-  assert(fstat(fd, &statbuf) == 0);
-  assert((uint64_t)statbuf.st_size == (multiplier * PMA_SNAPSHOT_RESIZE_INC));
-  assert((uint64_t)statbuf.st_size == _pma_state->metadata->snapshot_size);
-
-  // Clean up
-  _clean_up_test_snapshot(fd, filename);
-  _pma_state_free();
-}
-
-void
-test_pma_mark_page_dirty(void) {
-  PMADirtyPageEntry *dirty_page;
-
-  // Set up state & locals
-  _pma_state_malloc();
-  _pma_state->metadata->num_dirty_pages = 10;
-  dirty_page = (_pma_state->metadata->dirty_pages + 10);
-  dirty_page->index     = 1;
-  dirty_page->offset    = 2;
-  dirty_page->num_pages = 3;
-  dirty_page->status    = FREE;
-
-  // Test 1: mark page dirty
-  _pma_mark_page_dirty(4, 5, FIRST, 6);
-  assert(_pma_state->metadata->num_dirty_pages == 11);
-  assert(dirty_page->index == 4);
-  assert(dirty_page->offset == 5);
-  assert(dirty_page->num_pages == 6);
-  assert(dirty_page->status == FIRST);
-
-  // Clean up
-  _pma_state_free();
-}
-
-void
-test_pma_copy_page(void) {
-  const uint64_t  page_uno_offset = 0;
-  const uint64_t  page_dos_offset = PMA_PAGE_SIZE;
-  const uint64_t  page_tre_offset = (2 * PMA_PAGE_SIZE);
-  const uint64_t  file_size = (3 * PMA_PAGE_SIZE);
-  const uint16_t  end_of_dpage_cache = (PMA_DPAGE_CACHE_SIZE - 1);
-  ssize_t         bytes;
-  const int       strlen = 6;
-  int             fd;
-  const char     *text_alpha = "ALPHA";
-  const char     *text_bravo = "BRAVO";
-  const char     *text_delta = "DELTA";
-  char           *filename;
-  char            text_test[6] = { 0 };
-  void           *address;
-
-  // Set up backing file
-  fd = _generate_test_snapshot(&filename);
-  assert(12287 == lseek(fd, (file_size - 1), SEEK_SET));
-  assert(1 == write(fd, "", 1));
-  assert(6 == pwrite(fd, text_alpha, strlen, 0));
-  assert(6 == pwrite(fd, text_bravo, strlen, PMA_PAGE_SIZE));
-  assert(6 == pwrite(fd, text_delta, strlen, (2 * PMA_PAGE_SIZE)));
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->metadata->arena_start = PMA_SNAPSHOT_ADDR;
-
-  _pma_state->metadata->dpage_cache = calloc(1, PMA_PAGE_SIZE);
-  _pma_state->metadata->dpage_cache->tail = end_of_dpage_cache;
-  _pma_state->metadata->dpage_cache->queue[end_of_dpage_cache] = 0;
-
-  _pma_state->page_directory.entries = calloc(2, sizeof(PMAPageDirEntry));
-  _pma_state->page_directory.entries[1].offset = page_dos_offset;
-
-  // Set up address
-  address = mmap(
-      INDEX_TO_PTR(1),
-      PMA_PAGE_SIZE,
-      PROT_READ,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      page_dos_offset);
-  assert(MAP_FAILED != address);
-
-  // Test 1: copy page in backing file
-  _pma_copy_page(address, page_tre_offset, FIRST, fd);
-  assert(0 == _pma_state->metadata->dpage_cache->tail);
-  assert(4096 == _pma_state->metadata->dpage_cache->queue[end_of_dpage_cache]);
-  bytes = pread(fd, text_test, strlen, page_uno_offset);
-  assert(6 == bytes);
-  assert(0 == strcmp(text_alpha, text_test));
-  bytes = pread(fd, text_test, strlen, page_dos_offset);
-  assert(6 == bytes);
-  assert(0 == strcmp(text_bravo, text_test));
-  bytes = pread(fd, text_test, strlen, page_tre_offset);
-  assert(6 == bytes);
-  assert(0 == strcmp(text_bravo, text_test));
-
-  // Clean up
-  munmap(INDEX_TO_PTR(0), file_size);
-  free(_pma_state->metadata->dpage_cache);
-  _clean_up_test_snapshot(fd, filename);
-  _pma_state_free();
-}
-
-void
-test_pma_get_disk_dpage(void) {
-  struct stat statbuf;
-  uint64_t    init_size = 2 * PMA_PAGE_SIZE;
-  uint64_t    next_offset;
-  int         fd;
-  char       *filename;
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->metadata->next_offset = init_size - PMA_PAGE_SIZE;
-  _pma_state->metadata->snapshot_size = init_size;
-
-  // Test 1: get next dpage without extending snapshot backing file
-  next_offset = _pma_get_disk_dpage();
-  assert(4096 == next_offset);
-  assert(8192 == _pma_state->metadata->next_offset);
-
-  // Test 2: failure to extend backing file
-  next_offset = _pma_get_disk_dpage();
-  assert(0 == next_offset);
-  assert(8192 == _pma_state->metadata->next_offset);
-
-  // Set up backing file
-  fd = _generate_test_snapshot(&filename);
-  assert(8191 == lseek(fd, (init_size - 1), SEEK_SET));
-  assert(1 == write(fd, "", 1));
-  _pma_state->snapshot_fd = fd;
-
-  // Test 3: get next dpage after extending snapshot backing file
-  next_offset = _pma_get_disk_dpage();
-  assert(8192 == next_offset);
-  assert(12288 == _pma_state->metadata->next_offset);
-  assert(0 == fstat(fd, &statbuf));
-  assert((uint64_t)statbuf.st_size == (PMA_SNAPSHOT_RESIZE_INC + init_size));
-
-  // Clean up
-  free(_pma_state->page_directory.entries);
-  _clean_up_test_snapshot(fd, filename);
-  _pma_state_free();
-}
-
-void
-test_pma_copy_dpage_cache(void) {
-  const uint64_t  page_uno_offset = PMA_PAGE_SIZE;
-  const uint64_t  page_dos_offset = (2 * PMA_PAGE_SIZE);
-  const uint64_t  page_tre_offset = (3 * PMA_PAGE_SIZE);
-  const uint64_t  init_size = 4 * PMA_PAGE_SIZE;
-  const uint64_t  test_code = 0xcafebabe8008135;
-  uint64_t        data_buffer;
-  ssize_t         bytes;
-  int             fd = 0;
-  char           *filename = NULL;
-
-  // Set up backing file
-  fd = _generate_test_snapshot(&filename);
-  assert(16383 == lseek(fd, (init_size - 1), SEEK_SET));
-  assert(1 == write(fd, "", 1));
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->metadata->arena_start = PMA_SNAPSHOT_ADDR;
-  _pma_state->metadata->snapshot_size = init_size;
-  _pma_state->metadata->dpage_cache = mmap(
-      INDEX_TO_PTR(0),
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      page_uno_offset);
-  _pma_state->metadata->dpage_cache->dirty = 0;
-  _pma_state->metadata->dpage_cache->size = 0;
-  _pma_state->metadata->dpage_cache->head = 1;
-  _pma_state->metadata->dpage_cache->tail = 2;
-  _pma_state->metadata->dpage_cache->queue[0] = test_code;
-  _pma_state->metadata->dpage_cache->queue[1] = page_dos_offset;
-  _pma_state->page_directory.entries = malloc(sizeof(PMAPageDirEntry));
-  _pma_state->page_directory.entries[0].offset = page_uno_offset;
-
-  // Test 1: free page cache empty, getting new page fails
-  _pma_state->metadata->next_offset = init_size;
-  assert(_pma_copy_dpage_cache());
-
-  // Test 2: free page cache empty, getting new page succeeds
-  _pma_state->snapshot_fd = fd;
-  _pma_state->metadata->next_offset = page_tre_offset;
-  assert(0 == _pma_copy_dpage_cache());
-  assert(16384 == _pma_state->metadata->next_offset);
-  bytes = pread(fd, &data_buffer, 8, (page_tre_offset + 8));
-  assert(8 == bytes);
-  assert(0xcafebabe8008135 == data_buffer);
-
-  // Reset dpage cache dirty bit
-  _pma_state->metadata->dpage_cache->dirty = 0;
-
-  // Test 3: free page cache has a page
-  _pma_state->metadata->dpage_cache->size = 1;
-  assert(0 == _pma_copy_dpage_cache());
-  bytes = pread(fd, &data_buffer, 8, (page_dos_offset + 8));
-  assert(8 == bytes);
-  assert(0xcafebabe8008135 == data_buffer);
-
-  // Clean up
-  munmap(INDEX_TO_PTR(0), init_size);
-  free(_pma_state->page_directory.entries);
-  _clean_up_test_snapshot(fd, filename);
-  _pma_state_free();
-}
-
-void
-test_pma_get_cached_dpage(void) {
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->metadata->dpage_cache = calloc(1, sizeof(PMADPageCache));
-
-  // Test 1: no pages in cache
-  _pma_state->metadata->dpage_cache->dirty = 1;
-  _pma_state->metadata->dpage_cache->size = 0;
-  assert(0 == _pma_get_cached_dpage());
-  
-  // Test 2: only one page in cache and cache uncopied
-  _pma_state->metadata->dpage_cache->dirty = 0;
-  _pma_state->metadata->dpage_cache->size = 1;
-  assert(0 == _pma_get_cached_dpage());
-
-  // Test 3: successfully get page
-  _pma_state->metadata->dpage_cache->dirty = 1;
-  _pma_state->metadata->dpage_cache->size = 2;
-  _pma_state->metadata->dpage_cache->head = 0;
-  _pma_state->metadata->dpage_cache->tail = 1;
-  _pma_state->metadata->dpage_cache->queue[0] = 0xcafebabe8008135;
-  assert(0xcafebabe8008135 == _pma_get_cached_dpage());
-  assert(1 == _pma_state->metadata->dpage_cache->size);
-  assert(1 == _pma_state->metadata->dpage_cache->head);
-  assert(1 == _pma_state->metadata->dpage_cache->tail);
-
-  // Test 4: successfully get page & loop queue
-  _pma_state->metadata->dpage_cache->head = PMA_DPAGE_CACHE_SIZE - 1;
-  _pma_state->metadata->dpage_cache->queue[PMA_DPAGE_CACHE_SIZE - 1] = 0xdefaced0facade;
-  assert(0xdefaced0facade == _pma_get_cached_dpage());
-  assert(0 == _pma_state->metadata->dpage_cache->size);
-  assert(0 == _pma_state->metadata->dpage_cache->head);
-  assert(1 == _pma_state->metadata->dpage_cache->tail);
-
-  // Clean up
-  free(_pma_state->metadata->dpage_cache);
-  _pma_state_free();
-}
-
-void
-test_pma_copy_shared_page(void) {
-  PMASharedPageHeader  *clean_shared_page;
-  PMASharedPageHeader  *dirty_shared_page;
-  ssize_t               bytes;
-  const uint64_t        init_size = 4 * PMA_PAGE_SIZE;
-  const uint64_t        page_nul_offset = 0;
-  const uint64_t        page_uno_offset = PMA_PAGE_SIZE;
-  const uint64_t        page_dos_offset = (2 * PMA_PAGE_SIZE);
-  const uint64_t        page_tre_offset = (3 * PMA_PAGE_SIZE);
-  const uint8_t         page_uno_size = 10;
-  const uint8_t         page_dos_size = 20;
-  uint8_t               data_buffer;
-  int                   fd = 0;
-  char                 *filename = NULL;
-
-  // Set up backing file
-  fd = _generate_test_snapshot(&filename);
-  assert(16383 == lseek(fd, (init_size - 1), SEEK_SET));
-  assert(1 == write(fd, "", 1));
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->metadata->arena_start = PMA_SNAPSHOT_ADDR;
-  _pma_state->metadata->snapshot_size = init_size;
-  _pma_state->metadata->dpage_cache = mmap(
-      INDEX_TO_PTR(0),
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      page_nul_offset);
-  _pma_state->metadata->dpage_cache->dirty = 1;
-  _pma_state->page_directory.entries = calloc(3, sizeof(PMAPageDirEntry));
-  _pma_state->page_directory.entries[1].offset = page_uno_offset;
-  _pma_state->page_directory.entries[1].status = SHARED;
-  _pma_state->page_directory.entries[2].offset = page_dos_offset;
-  _pma_state->page_directory.entries[2].status = SHARED;
-
-  // Set up shared pages
-  dirty_shared_page = mmap(
-      INDEX_TO_PTR(1),
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      page_uno_offset);
-  dirty_shared_page->dirty = 1;
-  dirty_shared_page->size = page_uno_size;
-
-  clean_shared_page = mmap(
-      INDEX_TO_PTR(2),
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      page_dos_offset);
-  clean_shared_page->dirty = 0;
-  clean_shared_page->size = page_dos_size;
-
-  // Test 1: don't copy if shared page already dirty
-  assert(0 == _pma_copy_shared_page(dirty_shared_page));
-
-  // Test 2: fail if a new dpage couldn't be acquired
-  _pma_state->metadata->dpage_cache->size = 0;
-  _pma_state->metadata->dpage_cache->head = 0;
-  _pma_state->metadata->dpage_cache->tail = 0;
-  assert(-1 == _pma_copy_shared_page(clean_shared_page));
-
-  // Test 3: success
-  _pma_state->snapshot_fd = fd;
-  _pma_state->metadata->dpage_cache->size = 1;
-  _pma_state->metadata->dpage_cache->tail = 1;
-  _pma_state->metadata->dpage_cache->queue[0] = page_tre_offset;
-  assert(0 == _pma_copy_shared_page(clean_shared_page));
-  bytes = pread(fd, &data_buffer, 1, (page_uno_offset + 9));
-  assert(1 == bytes);
-  assert(10 == data_buffer);
-  bytes = pread(fd, &data_buffer, 1, (page_dos_offset + 9));
-  assert(1 == bytes);
-  assert(20 == data_buffer);
-  bytes = pread(fd, &data_buffer, 1, (page_tre_offset + 9));
-  assert(1 == bytes);
-  assert(20 == data_buffer);
-
-  // Clean up
-  free(_pma_state->page_directory.entries);
-  munmap(PMA_SNAPSHOT_ADDR, init_size);
-  _pma_state_free();
-  _clean_up_test_snapshot(fd, filename);
-}
-
-void
-test_pma_free_bytes(void) {
-  PMASharedPageHeader  *shared_page_16;
-  PMASharedPageHeader  *shared_page_64;
-  PMASharedPageHeader  *shared_page_256;
-  const uint64_t        init_size = 3 * PMA_PAGE_SIZE;
-  const uint64_t        page_uno_offset = 0;
-  const uint64_t        page_dos_offset = PMA_PAGE_SIZE;
-  const uint64_t        page_tre_offset = (2 * PMA_PAGE_SIZE);
-  const uint8_t         page_uno_size = 4;
-  const uint8_t         page_dos_size = 6;
-  const uint8_t         page_tre_size = 8;
-  int                   fd = 0;
-  int                   ret;
-  char                 *filename = NULL;
-
-  // Set up backing file
-  fd = _generate_test_snapshot(&filename);
-  assert(12287 == lseek(fd, (init_size - 1), SEEK_SET));
-  assert(1 == write(fd, "", 1));
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->metadata->arena_start = PMA_SNAPSHOT_ADDR;
-  _pma_state->metadata->snapshot_size = init_size;
-
-  // Set up shared pages
-  shared_page_16 = mmap(
-      INDEX_TO_PTR(0),
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      page_uno_offset);
-  shared_page_16->dirty = 1;
-  shared_page_16->size = page_uno_size;
-  shared_page_16->free = 0;
-  for (int i = 0; i < PMA_BITMAP_SIZE; ++i) {
-    shared_page_16->bits[i] = 0;
-  }
-
-  shared_page_64 = mmap(
-      INDEX_TO_PTR(1),
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      page_dos_offset);
-  shared_page_64->dirty = 1;
-  shared_page_64->size = page_dos_size;
-  shared_page_64->free = 0;
-  for (int i = 0; i < PMA_BITMAP_SIZE; ++i) {
-    shared_page_64->bits[i] = 0;
-  }
-
-  shared_page_256 = mmap(
-      INDEX_TO_PTR(2),
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      page_tre_offset);
-  shared_page_256->dirty = 1;
-  shared_page_256->size = page_tre_size;
-  shared_page_256->free = 0;
-  for (int i = 0; i < PMA_BITMAP_SIZE; ++i) {
-    shared_page_256->bits[i] = 0;
-  }
-
-  // Test 1: free slot 0 of shared page with slot size 16
-  ret = _pma_free_bytes((char*)shared_page_16 + sizeof(PMASharedPageHeader));
-  assert(0 == ret);
-  assert(1 == shared_page_16->free);
-  assert(0x01 == shared_page_16->bits[0]);
-
-  // Test 2: free slot 8 of shared page with slot size 64
-  ret = _pma_free_bytes((char*)shared_page_64 + sizeof(PMASharedPageHeader) + 448);
-  assert(0 == ret);
-  assert(1 == shared_page_64->free);
-  assert(0x80 == shared_page_64->bits[0]);
-
-  // Test 3: free slot 15 of shared page with slot size 256
-  ret = _pma_free_bytes((char*)shared_page_256 + sizeof(PMASharedPageHeader) + 3584);
-  assert(0 == ret);
-  assert(1 == shared_page_256->free);
-  assert(0x40 == shared_page_256->bits[1]);
-
-  // Test 4: failure when freeing an already free slot
-  ret = _pma_free_bytes((char*)shared_page_16 + sizeof(PMASharedPageHeader));
-  assert(-1 == ret);
-
-  // Clean up
-  munmap(PMA_SNAPSHOT_ADDR, init_size);
-  _pma_state_free();
-  _clean_up_test_snapshot(fd, filename);
-}
-
-void
-test_pma_free_pages(void) {
-  const uint64_t  init_size = 3 * PMA_PAGE_SIZE;
-  const uint64_t  solo_page_offset = 0;
-  const uint64_t  duo_page_offset = PMA_PAGE_SIZE;
-  int             fd = 0;
-  char           *filename = NULL;
-  void           *solo_page;
-  void           *duo_page;
-
-  // Set up backing file
-  fd = _generate_test_snapshot(&filename);
-  assert(12287 == lseek(fd, (init_size - 1), SEEK_SET));
-  assert(1 == write(fd, "", 1));
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->metadata->arena_start = PMA_SNAPSHOT_ADDR;
-  _pma_state->metadata->snapshot_size = init_size;
-  _pma_state->page_directory.entries = calloc(3, sizeof(PMAPageDirEntry));
-  _pma_state->page_directory.entries[0].status = FIRST;
-  _pma_state->page_directory.entries[0].offset = solo_page_offset;
-  _pma_state->page_directory.entries[1].status = FIRST;
-  _pma_state->page_directory.entries[1].offset = duo_page_offset;
-  _pma_state->page_directory.entries[2].status = FOLLOW;
-  _pma_state->page_directory.entries[2].offset = duo_page_offset + PMA_PAGE_SIZE;
-
-  // Set up pages
-  solo_page = mmap(
-      INDEX_TO_PTR(0),
-      PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      solo_page_offset);
-
-  duo_page = mmap(
-      INDEX_TO_PTR(1),
-      2 * PMA_PAGE_SIZE,
-      PROT_READ | PROT_WRITE,
-      MAP_SHARED | MAP_FIXED,
-      fd,
-      duo_page_offset);
-
-  // Test 1: fail when pointing to middle of page
-  assert(-1 == _pma_free_pages(solo_page + 1));
-
-  // Test 2: free single page allocation
-  assert(0 == _pma_free_pages(solo_page));
-
-  // test 3: free multi-page allocation
-  assert(0 == _pma_free_pages(duo_page));
-
-  // Clean up
-  munmap(PMA_SNAPSHOT_ADDR, init_size);
-  free(_pma_state->page_directory.entries);
-  _pma_state_free();
-  _clean_up_test_snapshot(fd, filename);
-}
-
-void
-test_pma_get_new_pages(void) {
-  const uint64_t  init_size = PMA_PAGE_SIZE;
-  const uint64_t  num_pages = 2;
-  int             fd = 0;
-  char           *filename = NULL;
-  void* const     address = PMA_SNAPSHOT_ADDR + PMA_PAGE_SIZE;
-  void* const     arena_end = address + (2 * PMA_PAGE_SIZE);
-
-  // Set up backing file
-  fd = _generate_test_snapshot(&filename);
-  assert(4095 == lseek(fd, (init_size - 1), SEEK_SET));
-  assert(1 == write(fd, "", 1));
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->snapshot_fd = fd;
-  _pma_state->metadata->arena_start = PMA_SNAPSHOT_ADDR;
-  _pma_state->metadata->arena_end = address;
-  _pma_state->metadata->snapshot_size = init_size;
-  _pma_state->metadata->next_offset = init_size;
-
-  // Test 1: allocate new pages
-  assert(address == _pma_get_new_pages(num_pages));
-  assert(12288 == _pma_state->metadata->next_offset);
-  assert(arena_end == _pma_state->metadata->arena_end);
-
-  // Clean Up
-  munmap(address, num_pages * PMA_PAGE_SIZE);
-  _pma_state_free();
-  _clean_up_test_snapshot(fd, filename);
-}
-
-void
-test_pma_get_new_page(void) {
-  const uint64_t  init_size = 2 * PMA_PAGE_SIZE;
-  const uint64_t  init_offset = PMA_PAGE_SIZE;
-  int             fd = 0;
-  char           *filename = NULL;
-  void* const     address = PMA_SNAPSHOT_ADDR;
-  void* const     arena_end = address + PMA_PAGE_SIZE;
-
-  // Set up backing file
-  fd = _generate_test_snapshot(&filename);
-  assert(8191 == lseek(fd, (init_size - 1), SEEK_SET));
-  assert(1 == write(fd, "", 1));
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->snapshot_fd = fd;
-  _pma_state->metadata->arena_start = PMA_SNAPSHOT_ADDR;
-  _pma_state->metadata->arena_end = address;
-  _pma_state->metadata->snapshot_size = init_size;
-  _pma_state->metadata->next_offset = init_offset;
-
-  _pma_state->metadata->dpage_cache = calloc(1, sizeof(PMADPageCache));
-  _pma_state->metadata->dpage_cache->size = 0;
-
-  // Test 1: allocate new pages
-  assert(address == _pma_get_new_page(FIRST));
-  assert(8192 == _pma_state->metadata->next_offset);
-  assert(arena_end == _pma_state->metadata->arena_end);
-
-  // Clean Up
-  munmap(address, PMA_PAGE_SIZE);
-  free(_pma_state->metadata->dpage_cache);
-  _pma_state_free();
-  _clean_up_test_snapshot(fd, filename);
-}
-
-void
-test_pma_get_cached_pages(void) {
-  PMAPageRunCache  *test_0_cache;
-  PMAPageRunCache  *test_1_cache;
-  PMAPageRunCache  *test_2_cache;
-  PMAPageRunCache  *test_3_cache;
-  PMAPageRunCache  *test_4_cache;
-  PMAPageRunCache  *test_5_cache;
-  PMAPageRunCache  *wip_ptr;
-  void             *address;
-
-  // Set up state
-  _pma_state_malloc();
-
-  // Set up run caches for test
-  test_0_cache = NULL;
-
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x30000;
-  wip_ptr->length = 6;
-  wip_ptr->next = NULL;
-  test_1_cache = wip_ptr;
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x20000;
-  wip_ptr->length = 5;
-  wip_ptr->next = test_1_cache;
-  test_1_cache = wip_ptr;
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x10000;
-  wip_ptr->length = 4;
-  wip_ptr->next = test_1_cache;
-  test_1_cache = wip_ptr;
-
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x30000;
-  wip_ptr->length = 6;
-  wip_ptr->next = NULL;
-  test_2_cache = wip_ptr;
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x20000;
-  wip_ptr->length = 4;
-  wip_ptr->next = test_2_cache;
-  test_2_cache = wip_ptr;
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x10000;
-  wip_ptr->length = 5;
-  wip_ptr->next = test_2_cache;
-  test_2_cache = wip_ptr;
-
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x30000;
-  wip_ptr->length = 4;
-  wip_ptr->next = NULL;
-  test_3_cache = wip_ptr;
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x20000;
-  wip_ptr->length = 5;
-  wip_ptr->next = test_3_cache;
-  test_3_cache = wip_ptr;
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x10000;
-  wip_ptr->length = 6;
-  wip_ptr->next = test_3_cache;
-  test_3_cache = wip_ptr;
-
-  test_4_cache = calloc(1, sizeof(PMAPageRunCache));
-  test_4_cache->page = 0x40000;
-  test_4_cache->length = 2;
-  // Invalid pointer; used to confirm that we stop searching when we find exact run
-  test_4_cache->next = 0x8fffffffffffffff;
-
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x50000;
-  wip_ptr->length = 3;
-  wip_ptr->next = NULL;
-  test_5_cache = wip_ptr;
-  wip_ptr = calloc(1, sizeof(PMAPageRunCache));
-  wip_ptr->page = 0x99000;
-  wip_ptr->length = 1;
-  wip_ptr->next = test_5_cache;
-  test_5_cache = wip_ptr;
-
-  // Test 0: page run cache empty
-  _pma_state->free_page_runs = test_0_cache;
-  address = _pma_get_cached_pages(2);
-  assert(NULL == address);
-
-  // Test 1: find run bigger than requested, by two pages, at the very beginning
-  _pma_state->free_page_runs = test_1_cache;
-  address = _pma_get_cached_pages(2);
-  assert(0x10000 == address);
-  assert(2 == _pma_state->free_page_runs->length);
-  assert(0x12000 == _pma_state->free_page_runs->page);
-  assert(5 == _pma_state->free_page_runs->next->length);
-  assert(0x20000 == _pma_state->free_page_runs->next->page);
-  assert(6 == _pma_state->free_page_runs->next->next->length);
-  assert(0x30000 == _pma_state->free_page_runs->next->next->page);
-  assert(NULL == _pma_state->free_page_runs->next->next->next);
-
-  // Test 2: find run bigger than request, by two pages, in the middle
-  _pma_state->free_page_runs = test_2_cache;
-  address = _pma_get_cached_pages(2);
-  assert(0x20000 == address);
-  assert(5 == _pma_state->free_page_runs->length);
-  assert(0x10000 == _pma_state->free_page_runs->page);
-  assert(2 == _pma_state->free_page_runs->next->length);
-  assert(0x22000 == _pma_state->free_page_runs->next->page);
-  assert(6 == _pma_state->free_page_runs->next->next->length);
-  assert(0x30000 == _pma_state->free_page_runs->next->next->page);
-  assert(NULL == _pma_state->free_page_runs->next->next->next);
-
-  // Test 3: find run bigger than requested, by two pages, at the very end
-  _pma_state->free_page_runs = test_3_cache;
-  address = _pma_get_cached_pages(2);
-  assert(0x30000 == address);
-  assert(6 == _pma_state->free_page_runs->length);
-  assert(0x10000 == _pma_state->free_page_runs->page);
-  assert(5 == _pma_state->free_page_runs->next->length);
-  assert(0x20000 == _pma_state->free_page_runs->next->page);
-  assert(2 == _pma_state->free_page_runs->next->next->length);
-  assert(0x32000 == _pma_state->free_page_runs->next->next->page);
-  assert(NULL == _pma_state->free_page_runs->next->next->next);
-
-  // Test 4: find exactly sized run, as only entry in cache, and stop looking
-  _pma_state->free_page_runs = test_4_cache;
-  address = _pma_get_cached_pages(2);
-  assert(0x40000 == address);
-  assert(0x8fffffffffffffff == _pma_state->free_page_runs);
-
-  // Test 5: find run bigger than request, by a single page
-  _pma_state->free_page_runs = test_5_cache;
-  address = _pma_get_cached_pages(2);
-  assert(0x50000 == address);
-  assert(1 == _pma_state->free_page_runs->length);
-  assert(0x99000 == _pma_state->free_page_runs->page);
-  assert(NULL == _pma_state->free_page_runs->next);
-  assert(0x52000 == _pma_state->free_pages->page);
-  assert(NULL == _pma_state->free_pages->next);
-
-  // Clean up
-  while (test_1_cache != NULL) {
-    wip_ptr = test_1_cache;
-    test_1_cache = test_1_cache->next;
-    free(wip_ptr);
-  }
-  while (test_2_cache != NULL) {
-    wip_ptr = test_2_cache;
-    test_2_cache = test_2_cache->next;
-    free(wip_ptr);
-  }
-  while (test_3_cache != NULL) {
-    wip_ptr = test_3_cache;
-    test_3_cache = test_3_cache->next;
-    free(wip_ptr);
-  }
-  free(_pma_state->free_pages);
-  free(_pma_state->free_page_runs);
-  _pma_state_free();
-}
-
-void
-test_pma_malloc_single_page(void) {
-  PMASinglePageCache *wip_ptr;
-  
-  // Set up state
-  _pma_state_malloc();
-
-  // Set up free page cache
-  wip_ptr = calloc(1, sizeof(PMASinglePageCache));
-  wip_ptr->page = 0x20000;
-  wip_ptr->next = NULL;
-  _pma_state->free_pages = wip_ptr;
-  wip_ptr = calloc(1, sizeof(PMASinglePageCache));
-  wip_ptr->page = 0x10000;
-  wip_ptr->next = _pma_state->free_pages;
-  _pma_state->free_pages = wip_ptr;
-
-  // Test 1: get page from free page cache
-  assert(0x10000 == _pma_malloc_single_page(FIRST));
-  assert(0x20000 == _pma_state->free_pages->page);
-  assert(NULL == _pma_state->free_pages->next);
-
-  // Case when no pages in free page cache tested by test_pma_get_new_page
-
-  // Clean up
-  free(_pma_state->free_pages);
-  _pma_state_free();
-}
-
-void
-test_pma_malloc_shared_page(void) {
-  PMASinglePageCache *free_pages;
-  PMASinglePageCache *wip_ptr;
-  const uint64_t      mmap_size = 2 * PMA_PAGE_SIZE;
-  const uint8_t       test_1_bucket_size = 0;
-  const uint8_t       test_2_bucket_size = 0;
-  const uint8_t       test_3_bucket_size = 6;
-  void               *shared_pages;
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->metadata->arena_start = PMA_SNAPSHOT_ADDR;
-  _pma_state->metadata->snapshot_size = PMA_PAGE_SIZE;
-  _pma_state->metadata->next_offset = PMA_PAGE_SIZE;
-  _pma_state->free_pages = NULL;
-
-  _pma_state->metadata->dpage_cache = calloc(1, sizeof(PMADPageCache));
-  _pma_state->metadata->dpage_cache->size = 0;
-
-  // Set up shared pages
-  shared_pages = mmap(
-      PMA_SNAPSHOT_ADDR,
-      mmap_size,
-      PROT_READ | PROT_WRITE,
-      MAP_ANONYMOUS | MAP_PRIVATE,
-      -1,
-      0);
-  assert(MAP_FAILED != shared_pages);
-
-  // Set up free page cache
-  wip_ptr = calloc(1, sizeof(PMASinglePageCache));
-  wip_ptr->page = (shared_pages + PMA_PAGE_SIZE);
-  wip_ptr->next = NULL;
-  free_pages = wip_ptr;
-  wip_ptr = calloc(1, sizeof(PMASinglePageCache));
-  wip_ptr->page = shared_pages;
-  wip_ptr->next = free_pages;
-  free_pages = wip_ptr;
-
-  // Test 1: could not allocate page
-  assert(-1 == _pma_malloc_shared_page(test_1_bucket_size));
-
-  // Test 2: 16 byte slots
-  _pma_state->free_pages = free_pages;
-  assert(0 == _pma_malloc_shared_page(test_2_bucket_size));
-  assert(NULL != _pma_state->metadata->shared_pages[test_2_bucket_size]);
-  assert(1 == _pma_state->metadata->shared_pages[test_2_bucket_size]->dirty);
-  assert(4 == _pma_state->metadata->shared_pages[test_2_bucket_size]->size);
-  assert(253 == _pma_state->metadata->shared_pages[test_2_bucket_size]->free);
-  for (uint8_t i = 0; i < PMA_BITMAP_SIZE; ++i) {
-    assert(PMA_EMPTY_BITMAP == _pma_state->metadata->shared_pages[test_2_bucket_size]->bits[i]);
-  }
-  assert(NULL != _pma_state->free_pages);
-  assert((shared_pages + PMA_PAGE_SIZE) == _pma_state->free_pages->page);
-  assert(NULL == _pma_state->free_pages->next);
-
-  // Test 3: 1024 byte slots
-  assert(0 == _pma_malloc_shared_page(test_3_bucket_size));
-  assert(NULL != _pma_state->metadata->shared_pages[test_3_bucket_size]);
-  assert(1 == _pma_state->metadata->shared_pages[test_3_bucket_size]->dirty);
-  assert(10 == _pma_state->metadata->shared_pages[test_3_bucket_size]->size);
-  assert(3 == _pma_state->metadata->shared_pages[test_3_bucket_size]->free);
-  for (uint8_t i = 0; i < PMA_BITMAP_SIZE; ++i) {
-    assert(PMA_EMPTY_BITMAP == _pma_state->metadata->shared_pages[test_3_bucket_size]->bits[i]);
-  }
-  assert(NULL == _pma_state->free_pages);
-
-  // Clean up
-  munmap(shared_pages, mmap_size);
-  _pma_state_free();
-}
-
-void
-test_pma_update_free_pages(void) {
-  PMADirtyPageEntry test_1_dirty_pages[2];
-  PMADirtyPageEntry test_2_dirty_page;
-  PMADirtyPageEntry test_3_dirty_page;
-
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->metadata->arena_start = PMA_SNAPSHOT_ADDR;
-
-  // Set up dirty pages
-  test_1_dirty_pages[0].index = 1;
-  test_1_dirty_pages[0].num_pages = 1;
-  test_1_dirty_pages[0].status = SHARED;
-  test_1_dirty_pages[1].index = 1;
-  test_1_dirty_pages[1].num_pages = 2;
-  test_1_dirty_pages[1].status = FIRST;
-
-  test_2_dirty_page.index = 2;
-  test_2_dirty_page.num_pages = 1;
-  test_2_dirty_page.status = FREE;
-
-  test_3_dirty_page.index = 3;
-  test_3_dirty_page.num_pages = 2;
-  test_3_dirty_page.status = FREE;
-
-  // Test 1: all dirty pages have non-free status
-  assert(0 == _pma_update_free_pages(2, test_1_dirty_pages));
-  assert(NULL == _pma_state->free_pages);
-  assert(NULL == _pma_state->free_page_runs);
-
-  // Test 2: add single page to free page cache
-  assert(0 == _pma_update_free_pages(1, &test_2_dirty_page));
-  assert(NULL != _pma_state->free_pages);
-  assert(INDEX_TO_PTR(2) == _pma_state->free_pages->page);
-  assert(NULL == _pma_state->free_pages->next);
-
-  // Test 3: add multiple free pages to free page runs cache
-  assert(0 == _pma_update_free_pages(1, &test_3_dirty_page));
-  assert(NULL != _pma_state->free_page_runs);
-  assert(INDEX_TO_PTR(3) == _pma_state->free_page_runs->page);
-  assert(2 == _pma_state->free_page_runs->length);
-  assert(NULL == _pma_state->free_page_runs->next);
-
-  // Clean up
-  free(_pma_state->free_pages);
-  free(_pma_state->free_page_runs);
-  _pma_state_free();
-}
-
-void
-test_pma_verify_checksum(void) {
-  PMAMetadata fake_metadata_page;
-
-  // Set up state
-  _pma_state_malloc();
-
-  // Test 1: good checksum
-  fake_metadata_page.checksum = 0;
-  fake_metadata_page.checksum = crc_32(
-      (unsigned char *)(&fake_metadata_page),
-      PMA_PAGE_SIZE);
-  assert(1 == _pma_verify_checksum(&fake_metadata_page));
-
-  // Test 2: bad checksum
-  fake_metadata_page.checksum = 0xbaddecaf;
-  assert(0 == _pma_verify_checksum(&fake_metadata_page));
-
-  // Clean up
-  _pma_state_free();
-}
-
-void
-test_pma_in_arena(void) {
-  // Set up state
-  _pma_state_malloc();
-  _pma_state->metadata->arena_start = 0x7fffffff;
-  _pma_state->metadata->arena_end = 0x80000001;
-
-  // Test 1: before arena start
-  assert(0 == pma_in_arena(0x10000000));
-
-  // Test 2: equal to arena start
-  assert(1 == pma_in_arena(0x7fffffff));
-
-  // Test 3: in arena
-  assert(1 == pma_in_arena(0x80000000));
-
-  // Test 4: equal to arena end
-  assert(0 == pma_in_arena(0x80000001));
-
-  // Test 5: after arena end
-  assert(0 == pma_in_arena(0xffffffff));
-
-  // Clean up
-  _pma_state_free();
-}
-
-void
-test_pma_init(void) {
-  struct stat page_dir_statbuf;
-  struct stat page_dir_statbuf_v;
-  struct stat snapshot_statbuf;
-  struct stat snapshot_statbuf_v;
-  size_t      dir_len;
-  uint32_t    checksum;
-  char       *page_dir_path;
-  char       *snapshot_path;
-
-  // Set up
-  dir_len = strlen(_test_state->dir);
-
-  page_dir_path = malloc(dir_len + 15);
-  sprintf(page_dir_path, "%s/%s/%s", _test_state->dir, PMA_DEFAULT_DIR_NAME, PMA_PAGE_DIR_FILENAME);
-
-  snapshot_path = malloc(dir_len + 15);
-  sprintf(snapshot_path, "%s/%s/%s", _test_state->dir, PMA_DEFAULT_DIR_NAME, PMA_SNAPSHOT_FILENAME);
-
-  // Test 1: successful initialization
-  assert(0 == pma_init(_test_state->dir));
-
-  fstat(_pma_state->page_dir_fd, &page_dir_statbuf);
-  stat(page_dir_path, &page_dir_statbuf_v);
-  assert(page_dir_statbuf_v.st_dev == page_dir_statbuf.st_dev);
-  assert(page_dir_statbuf_v.st_ino == page_dir_statbuf.st_ino);
-
-  fstat(_pma_state->snapshot_fd, &snapshot_statbuf);
-  stat(snapshot_path, &snapshot_statbuf_v);
-  assert(snapshot_statbuf_v.st_dev == snapshot_statbuf.st_dev);
-  assert(snapshot_statbuf_v.st_ino == snapshot_statbuf.st_ino);
-
-  assert(0x400000 == page_dir_statbuf.st_size);
-  assert(0x40000000 == snapshot_statbuf.st_size);
-
-  assert(NULL == _pma_state->free_pages);
-  assert(NULL == _pma_state->free_page_runs);
-  assert(0 == _pma_state->meta_page_offset);
-
-  assert(0x400000 == _pma_state->page_directory.size);
-  assert(1 == _pma_state->page_directory.next_index);
-  assert(FIRST == _pma_state->page_directory.entries[0].status);
-  assert(8192 == _pma_state->page_directory.entries[0].offset);
-
-  assert(0xBADDECAFC0FFEE00 == _pma_state->metadata->magic_code);
-  assert(1 == _pma_state->metadata->version);
-  assert(0 == _pma_state->metadata->epoch);
-  assert(0 == _pma_state->metadata->event);
-  assert(0 == _pma_state->metadata->root);
-  assert(0x10000 == _pma_state->metadata->arena_start);
-  assert(0x11000 == _pma_state->metadata->arena_end);
-  assert(12288 == _pma_state->metadata->next_offset);
-  assert(0x10000 == _pma_state->metadata->dpage_cache);
-  assert(0 == _pma_state->metadata->dpage_cache->dirty);
-  assert(0 == _pma_state->metadata->dpage_cache->size);
-  assert(0 == _pma_state->metadata->dpage_cache->head);
-  assert(0 == _pma_state->metadata->dpage_cache->tail);
-  assert(0 == _pma_state->metadata->num_dirty_pages);
-  assert(0 == _pma_state->metadata->dirty_pages[0].index);
-  assert(0 == _pma_state->metadata->dirty_pages[0].offset);
-  assert(0 == _pma_state->metadata->dirty_pages[0].num_pages);
-
-  checksum = _pma_state->metadata->checksum;
-  _pma_state->metadata->checksum = 0;
-  assert(checksum == crc_32((unsigned char*)_pma_state->metadata, PMA_PAGE_SIZE));
-
-  // Clean up
-  munmap(_pma_state->metadata->arena_start, _pma_state->metadata->snapshot_size);
-  munmap(_pma_state->page_directory.entries, PMA_MAXIMUM_DIR_SIZE);
-
-  _pma_state_free();
-
-  unlink(snapshot_path);
-  free(snapshot_path);
-
-  unlink(page_dir_path);
-  free(page_dir_path);
-}
-
-void
-test_pma_sync(void) {
-  PMAMetadata          *metadata_page_1;
-  PMAMetadata          *metadata_page_2;
-  PMASharedPageHeader  *shared_page_16b;
-  size_t                dir_len;
-  char                 *page_dir_path;
-  char                 *snapshot_path;
-
-  // Set up
-  dir_len = strlen(_test_state->dir);
-
-  page_dir_path = malloc(dir_len + 15);
-  sprintf(page_dir_path, "%s/%s/%s", _test_state->dir, PMA_DEFAULT_DIR_NAME, PMA_PAGE_DIR_FILENAME);
-
-  snapshot_path = malloc(dir_len + 15);
-  sprintf(snapshot_path, "%s/%s/%s", _test_state->dir, PMA_DEFAULT_DIR_NAME, PMA_SNAPSHOT_FILENAME);
-
-  pma_init(_test_state->dir);
-  _pma_state->metadata->epoch = 1;
-  _pma_state->metadata->event = 1;
-
-  // Test 1: good event, bad epoch
-  assert(-1 == pma_sync(0, 2, 0));
-
-  // Test 2: good epoch, bad event
-  assert(-1 == pma_sync(1, 0, 0));
-
-  // Test 3: successful sync
-  _pma_state->metadata->epoch = 0;
-  _pma_state->metadata->event = 0;
-
-  pma_malloc(16);
-  assert(1 == _pma_state->metadata->num_dirty_pages);
-
-  assert(0 == pma_sync(1, 2, 3));
-  assert(1 == _pma_state->metadata->epoch);
-  assert(2 == _pma_state->metadata->event);
-  assert(3 == _pma_state->metadata->root);
-  assert(0x12000 == _pma_state->metadata->arena_end);
-  assert(0x11000 == _pma_state->metadata->shared_pages[0]);
-  assert(NULL == _pma_state->metadata->shared_pages[1]);
-  assert(NULL == _pma_state->metadata->shared_pages[2]);
-  assert(NULL == _pma_state->metadata->shared_pages[3]);
-  assert(NULL == _pma_state->metadata->shared_pages[4]);
-  assert(NULL == _pma_state->metadata->shared_pages[5]);
-  assert(NULL == _pma_state->metadata->shared_pages[6]);
-  assert(0x10000 == _pma_state->metadata->dpage_cache);
-  assert(0 == _pma_state->metadata->num_dirty_pages);
-  assert(16384 == _pma_state->metadata->next_offset);
-
-  metadata_page_1 = mmap(
-      NULL,
-      PMA_PAGE_SIZE,
-      PROT_READ,
-      MAP_SHARED,
-      _pma_state->snapshot_fd,
-      0);
-  metadata_page_2 = mmap(
-      NULL,
-      PMA_PAGE_SIZE,
-      PROT_READ,
-      MAP_SHARED,
-      _pma_state->snapshot_fd,
-      4096);
-  shared_page_16b = mmap(
-      NULL,
-      PMA_PAGE_SIZE,
-      PROT_READ,
-      MAP_SHARED,
-      _pma_state->snapshot_fd,
-      12288);
-  
-  assert(metadata_page_1->magic_code == _pma_state->metadata->magic_code);
-  assert(metadata_page_1->checksum == _pma_state->metadata->checksum);
-  assert(metadata_page_1->version == _pma_state->metadata->version);
-  assert(metadata_page_1->epoch == _pma_state->metadata->epoch);
-  assert(metadata_page_1->event == _pma_state->metadata->event);
-  assert(metadata_page_1->root == _pma_state->metadata->root);
-  assert(metadata_page_1->arena_start == _pma_state->metadata->arena_start);
-  assert(metadata_page_1->arena_end == _pma_state->metadata->arena_end);
-  assert(metadata_page_1->dpage_cache == _pma_state->metadata->dpage_cache);
-  assert(metadata_page_1->snapshot_size == _pma_state->metadata->snapshot_size);
-  assert(metadata_page_1->next_offset == _pma_state->metadata->next_offset);
-
-  assert(1 == metadata_page_1->num_dirty_pages);
-  assert(1 == metadata_page_1->dirty_pages[0].index);
-  assert(12288 == metadata_page_1->dirty_pages[0].offset);
-  assert(1 == metadata_page_1->dirty_pages[0].num_pages);
-  assert(SHARED == metadata_page_1->dirty_pages[0].status);
-
-  assert(0 == metadata_page_2->epoch);
-  assert(0 == metadata_page_2->event);
-  assert(0 == metadata_page_2->root);
-  assert(0x11000 == metadata_page_2->arena_end);
-  assert(NULL == metadata_page_2->shared_pages[0]);
-  assert(NULL == metadata_page_2->shared_pages[1]);
-  assert(NULL == metadata_page_2->shared_pages[2]);
-  assert(NULL == metadata_page_2->shared_pages[3]);
-  assert(NULL == metadata_page_2->shared_pages[4]);
-  assert(NULL == metadata_page_2->shared_pages[5]);
-  assert(NULL == metadata_page_2->shared_pages[6]);
-  assert(0x10000 == metadata_page_2->dpage_cache);
-  assert(0 == metadata_page_2->num_dirty_pages);
-  assert(12288 == metadata_page_2->next_offset);
-
-  assert(NULL == shared_page_16b->next);
-  assert(0 == shared_page_16b->dirty);
-  assert(4 == shared_page_16b->size);
-  assert(252 == shared_page_16b->free);
-
-  // Clean up
-  munmap(metadata_page_1, PMA_PAGE_SIZE);
-  munmap(metadata_page_2, PMA_PAGE_SIZE);
-
-  munmap(_pma_state->metadata->arena_start, _pma_state->metadata->snapshot_size);
-  munmap(_pma_state->page_directory.entries, PMA_MAXIMUM_DIR_SIZE);
-
-  _pma_state_free();
-
-  unlink(snapshot_path);
-  free(snapshot_path);
-
-  unlink(page_dir_path);
-  free(page_dir_path);
-}
-
-void
-test_pma_load(void) {
-  PMARootState    res;
-  size_t          dir_len;
-  const uint64_t  bad_code = 0x600DDECAFC0FFEE0;
-  const uint64_t  old_event = 0;
-  const uint32_t  bad_checksum = 0;
-  const uint32_t  bad_version = 1337;
-  int             snapshot_fd;
-  char           *bin_path;
-  char           *page_dir_path;
-  char           *snapshot_path;
-
-  // Set up
-  dir_len = strlen(_test_state->dir);
-
-  bin_path = malloc(dir_len + 6);
-  sprintf(bin_path, "%s/%s", _test_state->dir, PMA_DEFAULT_DIR_NAME);
-
-  page_dir_path = malloc(dir_len + 15);
-  sprintf(page_dir_path, "%s/%s", bin_path, PMA_PAGE_DIR_FILENAME);
-
-  snapshot_path = malloc(dir_len + 15);
-  sprintf(snapshot_path, "%s/%s", bin_path, PMA_SNAPSHOT_FILENAME);
-
-  // Test 1: dir doesn't exist
-  rmdir(bin_path);
-  res = pma_load(_test_state->dir);
-  assert(0 == res.epoch);
-  assert(0 == res.event);
-  assert(0 == res.root);
-  assert(2 == errno);
-  errno = 0;
-
-  // Test 2: snapshot doesn't exist
-  pma_init(_test_state->dir);
-  assert(0 == pma_close(0, 1, 0));
-  unlink(snapshot_path);
-  
-  res = pma_load(_test_state->dir);
-  assert(0 == res.epoch);
-  assert(0 == res.event);
-  assert(0 == res.root);
-  assert(2 == errno);
-
-  errno = 0;
-  _pma_state_free();
-  unlink(page_dir_path);
-
-  // Test 3: page directory doesn't exist
-  pma_init(_test_state->dir);
-  assert(0 == pma_close(0, 1, 0));
-  unlink(page_dir_path);
-  
-  res = pma_load(_test_state->dir);
-  assert(0 == res.epoch);
-  assert(0 == res.event);
-  assert(0 == res.root);
-  assert(2 == errno);
-
-  errno = 0;
-  _pma_state_free();
-  unlink(snapshot_path);
-
-  // Test 4: bad magic code
-  pma_init(_test_state->dir);
-  assert(0 == pma_close(0, 1, 0));
-  snapshot_fd = open(snapshot_path, PMA_NEW_FILE_FLAGS, PMA_FILE_PERMISSIONS);
-  assert(0 < snapshot_fd);
-  pwrite(snapshot_fd, &bad_code, 8, 0);
-
-  res = pma_load(_test_state->dir);
-  assert(0 == res.epoch);
-  assert(0 == res.event);
-  assert(0 == res.root);
-  assert(EILSEQ == errno);
-
-  errno = 0;
-  close(snapshot_fd);
-  unlink(snapshot_path);
-  unlink(page_dir_path);
-
-  // Test 5: bad version
-  pma_init(_test_state->dir);
-  assert(0 == pma_close(0, 1, 0));
-  snapshot_fd = open(snapshot_path, PMA_NEW_FILE_FLAGS, PMA_FILE_PERMISSIONS);
-  assert(0 < snapshot_fd);
-  pwrite(snapshot_fd, &bad_version, 4, 12);
-
-  res = pma_load(_test_state->dir);
-  assert(0 == res.epoch);
-  assert(0 == res.event);
-  assert(0 == res.root);
-  assert(EILSEQ == errno);
-
-  errno = 0;
-  close(snapshot_fd);
-  unlink(snapshot_path);
-  unlink(page_dir_path);
-
-  // Test 6: both metadata pages have invalid checksum
-  pma_init(_test_state->dir);
-  assert(0 == pma_close(0, 1, 0));
-  snapshot_fd = open(snapshot_path, PMA_NEW_FILE_FLAGS, PMA_FILE_PERMISSIONS);
-  assert(0 < snapshot_fd);
-  pwrite(snapshot_fd, &bad_checksum, 4, 8);
-  pwrite(snapshot_fd, &bad_checksum, 4, (PMA_PAGE_SIZE + 8));
-
-  res = pma_load(_test_state->dir);
-  assert(0 == res.epoch);
-  assert(0 == res.event);
-  assert(0 == res.root);
-  assert(EILSEQ == errno);
-
-  errno = 0;
-  close(snapshot_fd);
-  unlink(snapshot_path);
-  unlink(page_dir_path);
-
-  // Test 7: first metadata page is newer but has bad checksum
-  pma_init(_test_state->dir);
-  assert(0 == pma_close(1, 2, 3));
-  snapshot_fd = open(snapshot_path, PMA_NEW_FILE_FLAGS, PMA_FILE_PERMISSIONS);
-  assert(0 < snapshot_fd);
-  pwrite(snapshot_fd, &bad_checksum, 4, 8);
-  pwrite(snapshot_fd, &old_event, 8, (PMA_PAGE_SIZE + 24));
-
-  res = pma_load(_test_state->dir);
-  assert(0 == res.epoch);
-  assert(0 == res.event);
-  assert(0 == res.root);
-  assert(0 == _pma_state->meta_page_offset);
-
-  assert(0 == pma_close(4, 4, 4));
-  close(snapshot_fd);
-  unlink(snapshot_path);
-  unlink(page_dir_path);
-
-  // Test 8: second metadata page is newer
-  pma_init(_test_state->dir);
-  assert(0 == pma_sync(1, 2, 3));
-  assert(0 == pma_close(4, 5, 6));
-
-  res = pma_load(_test_state->dir);
-  assert(4 == res.epoch);
-  assert(5 == res.event);
-  assert(6 == res.root);
-  assert(0 == _pma_state->meta_page_offset);
-
-  assert(0 == pma_close(7, 8, 9));
-  close(snapshot_fd);
-  unlink(snapshot_path);
-  unlink(page_dir_path);
-
-  // Clean up
-  free(bin_path);
-  free(snapshot_path);
-  free(page_dir_path);
-}
diff --git a/rust/ares/src/serf.rs b/rust/ares/src/serf.rs
index 89c70be..b553771 100644
--- a/rust/ares/src/serf.rs
+++ b/rust/ares/src/serf.rs
@@ -10,13 +10,16 @@ use crate::mem::NockStack;
 use crate::mug::*;
 use crate::newt::Newt;
 use crate::noun::{Atom, Cell, DirectAtom, Noun, Slots, D, T};
+use crate::persist::pma_meta_set;
+use crate::persist::{pma_meta_get, pma_open, pma_sync, Persist};
 use crate::trace::*;
 use ares_macros::tas;
 use signal_hook;
 use signal_hook::consts::SIGINT;
 use std::fs::create_dir_all;
 use std::io;
-use std::path::{Path, PathBuf};
+use std::mem::size_of;
+use std::path::PathBuf;
 use std::result::Result;
 use std::sync::atomic::{AtomicBool, Ordering};
 use std::sync::Arc;
@@ -26,6 +29,57 @@ crate::gdb!();
 
 const FLAG_TRACE: u32 = 1 << 8;
 
+#[repr(usize)]
+enum BTMetaField {
+    SnapshotVersion = 0,
+    Snapshot = 1,
+}
+struct Snapshot(pub *mut SnapshotMem);
+
+impl Persist for Snapshot {
+    unsafe fn space_needed(&mut self, stack: &mut NockStack) -> usize {
+        let mut arvo = (*(self.0)).arvo;
+        let mut cold = (*(self.0)).cold;
+        let arvo_space_needed = arvo.space_needed(stack);
+        let cold_space_needed = cold.space_needed(stack);
+        (((size_of::<SnapshotMem>() + 7) >> 3) << 3) + arvo_space_needed + cold_space_needed
+    }
+
+    unsafe fn copy_to_buffer(&mut self, stack: &mut NockStack, buffer: &mut *mut u8) {
+        let snapshot_buffer = *buffer as *mut SnapshotMem;
+        std::ptr::copy_nonoverlapping(self.0, snapshot_buffer, 1);
+        *self = Snapshot(snapshot_buffer);
+        *buffer = snapshot_buffer.add(1) as *mut u8;
+
+        let mut arvo = (*snapshot_buffer).arvo;
+        arvo.copy_to_buffer(stack, buffer);
+        (*snapshot_buffer).arvo = arvo;
+
+        let mut cold = (*snapshot_buffer).cold;
+        cold.copy_to_buffer(stack, buffer);
+        (*snapshot_buffer).cold = cold;
+    }
+
+    unsafe fn handle_to_u64(&self) -> u64 {
+        self.0 as u64
+    }
+
+    unsafe fn handle_from_u64(meta_handle: u64) -> Self {
+        Snapshot(meta_handle as *mut SnapshotMem)
+    }
+}
+
+#[repr(C)]
+#[repr(packed)]
+struct SnapshotMem {
+    pub epoch: u64,
+    pub event_num: u64,
+    pub arvo: Noun,
+    pub cold: Cold,
+}
+
+const PMA_CURRENT_SNAPSHOT_VERSION: u64 = 1;
+
 struct Context {
     epoch: u64,
     event_num: u64,
@@ -35,27 +89,87 @@ struct Context {
 }
 
 impl Context {
-    pub fn new(
-        _snap_path: &Path,
+    pub fn load(
+        snap_path: PathBuf,
         trace_info: Option<TraceInfo>,
         constant_hot_state: &[HotEntry],
+    ) -> Context {
+        pma_open(snap_path).expect("serf: pma open failed");
+
+        let snapshot_version = pma_meta_get(BTMetaField::SnapshotVersion as usize);
+
+        let snapshot = match snapshot_version {
+            0 => None,
+            1 => Some(unsafe {
+                Snapshot::handle_from_u64(pma_meta_get(BTMetaField::Snapshot as usize))
+            }),
+            _ => panic!("Unsupported snapshot version"),
+        };
+
+        Context::new(trace_info, snapshot, constant_hot_state)
+    }
+
+    pub unsafe fn save(&mut self) {
+        let handle = {
+            let mut snapshot = Snapshot({
+                let snapshot_mem_ptr: *mut SnapshotMem = self.nock_context.stack.struct_alloc(1);
+
+                // Save into PMA (does not sync)
+                (*snapshot_mem_ptr).epoch = self.epoch;
+                (*snapshot_mem_ptr).event_num = self.event_num;
+                (*snapshot_mem_ptr).arvo = self.arvo;
+                (*snapshot_mem_ptr).cold = self.nock_context.cold;
+                snapshot_mem_ptr
+            });
+
+            let handle = snapshot.save_to_pma(&mut self.nock_context.stack);
+
+            self.epoch = (*snapshot.0).epoch;
+            self.arvo = (*snapshot.0).arvo;
+            self.event_num = (*snapshot.0).event_num;
+            self.nock_context.cold = (*snapshot.0).cold;
+
+            handle
+        };
+        pma_meta_set(
+            BTMetaField::SnapshotVersion as usize,
+            PMA_CURRENT_SNAPSHOT_VERSION,
+        );
+        pma_meta_set(BTMetaField::Snapshot as usize, handle);
+    }
+
+    fn new(
+        trace_info: Option<TraceInfo>,
+        snapshot: Option<Snapshot>,
+        constant_hot_state: &[HotEntry],
     ) -> Self {
-        // TODO: switch to Pma when ready
-        let mut stack = NockStack::new(512 << 10 << 10, 0);
+        let mut stack = NockStack::new(1024 << 10 << 10, 0);
+        let newt = Newt::new();
+        let cache = Hamt::<Noun>::new(&mut stack);
+
+        let (epoch, event_num, arvo, mut cold) = unsafe {
+            match snapshot {
+                Some(snapshot) => (
+                    (*(snapshot.0)).epoch,
+                    (*(snapshot.0)).event_num,
+                    (*(snapshot.0)).arvo,
+                    (*(snapshot.0)).cold,
+                ),
+                None => (0, 0, D(0), Cold::new(&mut stack)),
+            }
+        };
 
-        let cold = Cold::new(&mut stack);
         let hot = Hot::init(&mut stack, constant_hot_state);
-
-        let (epoch, event_num, arvo) = (0, 0, D(0));
+        let warm = Warm::init(&mut stack, &mut cold, &hot);
         let mug = mug_u32(&mut stack, arvo);
 
         let nock_context = interpreter::Context {
             stack,
-            newt: Newt::new(),
+            newt,
             cold,
-            warm: Warm::new(),
+            warm,
             hot,
-            cache: Hamt::<Noun>::new(),
+            cache,
             scry_stack: D(0),
             trace_info,
         };
@@ -73,20 +187,35 @@ impl Context {
     // Setters
     //
 
-    pub fn event_update(&mut self, new_event_num: u64, new_arvo: Noun) {
+    ///
+    /// ## Safety
+    ///
+    /// calls save(), which invalidates all nouns not in the context
+    /// until [preserve_event_update_leftovers] is called to resolve forwarding pointers.
+    pub unsafe fn event_update(&mut self, new_event_num: u64, new_arvo: Noun) {
         //  XX: assert event numbers are continuous
         self.arvo = new_arvo;
         self.event_num = new_event_num;
+        self.save();
+
+        self.nock_context.cache = Hamt::new(&mut self.nock_context.stack);
+        self.nock_context.scry_stack = D(0);
+
+        // XX save to PMA
         self.mug = mug_u32(&mut self.nock_context.stack, self.arvo);
     }
 
-    //
-    // Snapshot functions
-    //
-
-    pub fn sync(&mut self) {
-        // TODO actually sync
-        eprintln!("serf: TODO sync");
+    ///
+    /// ## Safety
+    ///
+    /// Preserves nouns and jet states in context and then calls [flip_top_frame].
+    /// Other stack-allocated objects needing preservation should be preserved between
+    /// [event_update] and invocation of this function
+    pub unsafe fn preserve_event_update_leftovers(&mut self) {
+        let stack = &mut self.nock_context.stack;
+        stack.preserve(&mut self.nock_context.warm);
+        stack.preserve(&mut self.nock_context.hot);
+        stack.flip_top_frame(0);
     }
 
     //
@@ -208,13 +337,13 @@ pub fn serf(constant_hot_state: &[HotEntry]) -> io::Result<()> {
         }
     }
 
-    let mut context = Context::new(&snap_path, trace_info, constant_hot_state);
+    let mut context = Context::load(snap_path, trace_info, constant_hot_state);
     context.ripe();
 
     // Can't use for loop because it borrows newt
     while let Some(writ) = context.next() {
         // Reset the local cache and scry handler stack
-        context.nock_context.cache = Hamt::<Noun>::new();
+        context.nock_context.cache = Hamt::<Noun>::new(&mut context.nock_context.stack);
         context.nock_context.scry_stack = D(0);
 
         let tag = slot(writ, 2)?.as_direct().unwrap();
@@ -229,8 +358,7 @@ pub fn serf(constant_hot_state: &[HotEntry]) -> io::Result<()> {
                     }
                     tas!(b"save") => {
                         // XX what is eve for?
-                        eprintln!("\r %save");
-                        context.sync();
+                        pma_sync();
                     }
                     tas!(b"meld") => eprintln!("\r %meld: not implemented"),
                     tas!(b"pack") => eprintln!("\r %pack: not implemented"),
@@ -261,18 +389,6 @@ pub fn serf(constant_hot_state: &[HotEntry]) -> io::Result<()> {
         };
 
         clear_interrupt();
-
-        // Persist data that should survive between events
-        //  XX: Such data should go in the PMA once that's available, except
-        //  the warm and hot state which should survive between events but not interpreter runs
-        unsafe {
-            let stack = &mut context.nock_context.stack;
-            stack.preserve(&mut context.arvo);
-            stack.preserve(&mut context.nock_context.cold);
-            stack.preserve(&mut context.nock_context.warm);
-            stack.preserve(&mut context.nock_context.hot);
-            stack.flip_top_frame(0);
-        }
     }
 
     Ok(())
@@ -365,7 +481,10 @@ fn play_life(context: &mut Context, eve: Noun) {
             let eved = lent(eve).expect("serf: play: boot event number failure") as u64;
             let arvo = slot(gat, 7).expect("serf: play: lifecycle didn't return initial Arvo");
 
-            context.event_update(eved, arvo);
+            unsafe {
+                context.event_update(eved, arvo);
+                context.preserve_event_update_leftovers();
+            }
             context.play_done();
         }
         Err(error) => match error {
@@ -384,6 +503,7 @@ fn play_list(context: &mut Context, mut lit: Noun) {
     let mut eve = context.event_num;
     while let Ok(cell) = lit.as_cell() {
         let ovo = cell.head();
+        lit = cell.tail();
         let trace_name = if context.nock_context.trace_info.is_some() {
             Some(format!("play [{}]", eve))
         } else {
@@ -398,13 +518,16 @@ fn play_list(context: &mut Context, mut lit: Noun) {
                     .tail();
                 eve += 1;
 
-                context.event_update(eve, arvo);
+                unsafe {
+                    context.event_update(eve, arvo);
+                    context.nock_context.stack.preserve(&mut lit);
+                    context.preserve_event_update_leftovers();
+                }
             }
             Err(goof) => {
                 return context.play_bail(goof);
             }
         }
-        lit = cell.tail();
     }
     context.play_done();
 }
@@ -427,10 +550,14 @@ fn work(context: &mut Context, job: Noun) {
     match soft(context, job, trace_name) {
         Ok(res) => {
             let cell = res.as_cell().expect("serf: work: +slam returned atom");
-            let fec = cell.head();
+            let mut fec = cell.head();
             let eve = context.event_num;
 
-            context.event_update(eve + 1, cell.tail());
+            unsafe {
+                context.event_update(eve + 1, cell.tail());
+                context.nock_context.stack.preserve(&mut fec);
+                context.preserve_event_update_leftovers();
+            }
             context.work_done(fec);
         }
         Err(goof) => {
@@ -447,14 +574,14 @@ fn work_swap(context: &mut Context, job: Noun, goof: Noun) {
     clear_interrupt();
 
     let stack = &mut context.nock_context.stack;
-    context.nock_context.cache = Hamt::<Noun>::new();
+    context.nock_context.cache = Hamt::<Noun>::new(stack);
     //  crud ovo = [+(now) [%$ %arvo ~] [%crud goof ovo]]
     let job_cell = job.as_cell().expect("serf: work: job not a cell");
     let job_now = job_cell.head().as_atom().expect("serf: work: now not atom");
     let now = inc(stack, job_now).as_noun();
     let wire = T(stack, &[D(0), D(tas!(b"arvo")), D(0)]);
     let crud = DirectAtom::new_panic(tas!(b"crud"));
-    let ovo = T(stack, &[now, wire, crud.as_noun(), goof, job_cell.tail()]);
+    let mut ovo = T(stack, &[now, wire, crud.as_noun(), goof, job_cell.tail()]);
     let trace_name = if context.nock_context.trace_info.is_some() {
         Some(work_trace_name(
             &mut context.nock_context.stack,
@@ -468,10 +595,15 @@ fn work_swap(context: &mut Context, job: Noun, goof: Noun) {
     match soft(context, ovo, trace_name) {
         Ok(res) => {
             let cell = res.as_cell().expect("serf: work: crud +slam returned atom");
-            let fec = cell.head();
+            let mut fec = cell.head();
             let eve = context.event_num;
 
-            context.event_update(eve + 1, cell.tail());
+            unsafe {
+                context.event_update(eve + 1, cell.tail());
+                context.nock_context.stack.preserve(&mut ovo);
+                context.nock_context.stack.preserve(&mut fec);
+                context.preserve_event_update_leftovers();
+            }
             context.work_swap(ovo, fec);
         }
         Err(goof_crud) => {
diff --git a/rust/ares/src/unifying_equality.rs b/rust/ares/src/unifying_equality.rs
new file mode 100644
index 0000000..267a2f0
--- /dev/null
+++ b/rust/ares/src/unifying_equality.rs
@@ -0,0 +1,254 @@
+use crate::assert_acyclic;
+use crate::assert_no_forwarding_pointers;
+use crate::assert_no_junior_pointers;
+use crate::mem::{NockStack, ALLOC, FRAME, STACK};
+use crate::noun::Noun;
+use crate::persist::{pma_contains, pma_dirty};
+use either::Either::*;
+use libc::{c_void, memcmp};
+
+#[cfg(feature = "check_junior")]
+#[macro_export]
+macro_rules! assert_no_junior_pointers {
+    ( $x:expr, $y:expr ) => {
+        assert_no_alloc::permit_alloc(|| {
+            assert!($x.no_junior_pointers($y));
+        })
+    };
+}
+
+#[cfg(not(feature = "check_junior"))]
+#[macro_export]
+macro_rules! assert_no_junior_pointers {
+    ( $x:expr, $y:expr ) => {};
+}
+
+pub unsafe fn unifying_equality(stack: &mut NockStack, a: *mut Noun, b: *mut Noun) -> bool {
+    /* This version of unifying equality is not like that of vere.
+     * Vere does a tree comparison (accelerated by pointer equality and short-circuited by mug
+     * equality) and then unifies the nouns at the top level if they are equal.
+     *
+     * Here we recursively attempt to unify nouns. Pointer-equal nouns are already unified.
+     * Disequal mugs again short-circuit the unification and equality check.
+     *
+     * Since we expect atoms to be normalized, direct and indirect atoms do not unify with each
+     * other. For direct atoms, no unification is possible as there is no pointer involved in their
+     * representation. Equality is simply direct equality on the word representation. Indirect
+     * atoms require equality first of the size and then of the memory buffers' contents.
+     *
+     * Cell equality is tested (after mug and pointer equality) by attempting to unify the heads and tails,
+     * respectively, of cells, and then re-testing. If unification succeeds then the heads and
+     * tails will be pointer-wise equal and the cell itself can be unified. A failed unification of
+     * the head or the tail will already short-circuit the unification/equality test, so we will
+     * not return to re-test the pointer equality.
+     *
+     * When actually mutating references for unification, we must be careful to respect seniority.
+     * A reference to a more junior noun should always be replaced with a reference to a more
+     * senior noun, *never vice versa*, to avoid introducing references from more senior frames
+     * into more junior frames, which would result in incorrect operation of the copier.
+     */
+    assert_acyclic!(*a);
+    assert_acyclic!(*b);
+    assert_no_forwarding_pointers!(*a);
+    assert_no_forwarding_pointers!(*b);
+    assert_no_junior_pointers!(stack, *a);
+    assert_no_junior_pointers!(stack, *b);
+
+    // If the nouns are already word-equal we have nothing to do
+    if (*a).raw_equals(*b) {
+        return true;
+    };
+    // If the nouns have cached mugs which are disequal we have nothing to do
+    if let (Ok(a_alloc), Ok(b_alloc)) = ((*a).as_allocated(), (*b).as_allocated()) {
+        if let (Some(a_mug), Some(b_mug)) = (a_alloc.get_cached_mug(), b_alloc.get_cached_mug()) {
+            if a_mug != b_mug {
+                return false;
+            };
+        };
+    };
+    stack.frame_push(0);
+    *(stack.push::<(*mut Noun, *mut Noun)>()) = (a, b);
+    loop {
+        if stack.stack_is_empty() {
+            break;
+        };
+        let (x, y): (*mut Noun, *mut Noun) = *(stack.top());
+        if (*x).raw_equals(*y) {
+            stack.pop::<(*mut Noun, *mut Noun)>();
+            continue;
+        };
+        if let (Ok(x_alloc), Ok(y_alloc)) = (
+            // equal direct atoms return true for raw_equals()
+            (*x).as_allocated(),
+            (*y).as_allocated(),
+        ) {
+            if let (Some(x_mug), Some(y_mug)) = (x_alloc.get_cached_mug(), y_alloc.get_cached_mug())
+            {
+                if x_mug != y_mug {
+                    break; // short-circuit, the mugs differ therefore the nouns must differ
+                }
+            };
+            match (x_alloc.as_either(), y_alloc.as_either()) {
+                (Left(x_indirect), Left(y_indirect)) => {
+                    let x_as_ptr = x_indirect.to_raw_pointer();
+                    let y_as_ptr = y_indirect.to_raw_pointer();
+                    if x_indirect.size() == y_indirect.size()
+                        && memcmp(
+                            x_indirect.data_pointer() as *const c_void,
+                            y_indirect.data_pointer() as *const c_void,
+                            x_indirect.size() << 3,
+                        ) == 0
+                    {
+                        let (_senior, junior) = senior_pointer_first(stack, x_as_ptr, y_as_ptr);
+                        if x_as_ptr == junior {
+                            if pma_contains(x, 1) {
+                                pma_dirty(x, 1);
+                            }
+                            *x = *y;
+                        } else {
+                            if pma_contains(y, 1) {
+                                pma_dirty(y, 1);
+                            }
+                            *y = *x;
+                        }
+                        stack.pop::<(*mut Noun, *mut Noun)>();
+                        continue;
+                    } else {
+                        break;
+                    }
+                }
+                (Right(x_cell), Right(y_cell)) => {
+                    let x_as_ptr = x_cell.to_raw_pointer() as *const u64;
+                    let y_as_ptr = y_cell.to_raw_pointer() as *const u64;
+                    if x_cell.head().raw_equals(y_cell.head())
+                        && x_cell.tail().raw_equals(y_cell.tail())
+                    {
+                        let (_senior, junior) = senior_pointer_first(stack, x_as_ptr, y_as_ptr);
+                        if x_as_ptr == junior {
+                            if pma_contains(x, 1) {
+                                pma_dirty(x, 1);
+                            }
+                            *x = *y;
+                        } else {
+                            if pma_contains(y, 1) {
+                                pma_dirty(y, 1);
+                            }
+                            *y = *x;
+                        }
+                        stack.pop::<(*mut Noun, *mut Noun)>();
+                        continue;
+                    } else {
+                        /* THIS ISN'T AN INFINITE LOOP
+                         * If we discover a disequality in either side, we will
+                         * short-circuit the entire loop and reset the work stack.
+                         *
+                         * If both sides are equal, then we will discover pointer
+                         * equality when we return and unify the cell.
+                         */
+                        *(stack.push::<(*mut Noun, *mut Noun)>()) =
+                            (x_cell.tail_as_mut(), y_cell.tail_as_mut());
+                        *(stack.push::<(*mut Noun, *mut Noun)>()) =
+                            (x_cell.head_as_mut(), y_cell.head_as_mut());
+                        continue;
+                    }
+                }
+                (_, _) => {
+                    break; // cells don't unify with atoms
+                }
+            }
+        } else {
+            break; // direct atom not raw equal, so short circuit
+        }
+    }
+    stack.frame_pop();
+
+    assert_acyclic!(*a);
+    assert_acyclic!(*b);
+    assert_no_forwarding_pointers!(*a);
+    assert_no_forwarding_pointers!(*b);
+    assert_no_junior_pointers!(stack, *a);
+    assert_no_junior_pointers!(stack, *b);
+
+    (*a).raw_equals(*b)
+}
+
+unsafe fn senior_pointer_first(
+    stack: &NockStack,
+    a: *const u64,
+    b: *const u64,
+) -> (*const u64, *const u64) {
+    let mut frame_pointer: *const u64 = stack.get_frame_pointer();
+    let mut stack_pointer: *const u64 = stack.get_stack_pointer();
+    let mut alloc_pointer: *const u64 = stack.get_alloc_pointer();
+    let prev_stack_pointer = *(stack.prev_stack_pointer_pointer());
+
+    let (mut high_pointer, mut low_pointer): (*const u64, *const u64) = if stack.is_west() {
+        (prev_stack_pointer, alloc_pointer)
+    } else {
+        (alloc_pointer, prev_stack_pointer)
+    };
+
+    loop {
+        if low_pointer.is_null() || high_pointer.is_null() {
+            // we found the bottom of the stack; check entirety of the stack
+            low_pointer = stack.get_start();
+            high_pointer = stack.get_start().add(stack.get_size());
+        }
+
+        match (
+            a < high_pointer && a >= low_pointer,
+            b < high_pointer && b >= low_pointer,
+        ) {
+            (true, true) => {
+                // both pointers are in the same frame, pick arbitrarily (lower in mem)
+                break lower_pointer_first(a, b);
+            }
+            (true, false) => break (b, a), // a is in the frame, b is not, so b is senior
+            (false, true) => break (a, b), // b is in the frame, a is not, so a is senior
+            (false, false) => {
+                // chase up the stack
+                #[allow(clippy::comparison_chain)]
+                // test to see if the frame under consideration is a west frame
+                if stack_pointer < alloc_pointer {
+                    stack_pointer = *(frame_pointer.sub(STACK + 1)) as *const u64;
+                    alloc_pointer = *(frame_pointer.sub(ALLOC + 1)) as *const u64;
+                    frame_pointer = *(frame_pointer.sub(FRAME + 1)) as *const u64;
+
+                    // both pointers are in the PMA, pick arbitrarily (lower in mem)
+                    if frame_pointer.is_null() {
+                        break lower_pointer_first(a, b);
+                    };
+
+                    // previous allocation pointer
+                    high_pointer = alloc_pointer;
+                    // "previous previous" stack pointer. this is the other boundary of the previous allocation arena
+                    low_pointer = *(frame_pointer.add(STACK)) as *const u64;
+                } else if stack_pointer > alloc_pointer {
+                    stack_pointer = *(frame_pointer.add(STACK)) as *const u64;
+                    alloc_pointer = *(frame_pointer.add(ALLOC)) as *const u64;
+                    frame_pointer = *(frame_pointer.add(FRAME)) as *const u64;
+
+                    // both pointers are in the PMA, pick arbitrarily (lower in mem)
+                    if frame_pointer.is_null() {
+                        break lower_pointer_first(a, b);
+                    };
+
+                    // previous allocation pointer
+                    low_pointer = alloc_pointer;
+                    // "previous previous" stack pointer. this is the other boundary of the previous allocation arena
+                    high_pointer = *(frame_pointer.sub(STACK + 1)) as *const u64;
+                } else {
+                    panic!("senior_pointer_first: stack_pointer == alloc_pointer");
+                }
+            }
+        }
+    }
+}
+
+fn lower_pointer_first(a: *const u64, b: *const u64) -> (*const u64, *const u64) {
+    if a < b {
+        (a, b)
+    } else {
+        (b, a)
+    }
+}
diff --git a/rust/ares_pma/Cargo.lock b/rust/ares_pma/Cargo.lock
new file mode 100644
index 0000000..a4dfb7b
--- /dev/null
+++ b/rust/ares_pma/Cargo.lock
@@ -0,0 +1,454 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "ares_pma"
+version = "0.1.0"
+dependencies = [
+ "bindgen",
+ "cc",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.69.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ffcebc3849946a7170a05992aac39da343a90676ab392c51a4280981d6379c2"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "clang-sys",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "peeking_take_while",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "syn",
+ "which",
+]
+
+[[package]]
+name = "bitflags"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07"
+
+[[package]]
+name = "cc"
+version = "1.0.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "clang-sys"
+version = "1.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c688fc74432808e3eb684cae8830a86be1d66a2bd58e1f248ed0960a590baf6f"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
+[[package]]
+name = "either"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
+
+[[package]]
+name = "errno"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245"
+dependencies = [
+ "libc",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
+
+[[package]]
+name = "home"
+version = "0.5.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb"
+dependencies = [
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "lazycell"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
+[[package]]
+name = "libc"
+version = "0.2.150"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
+
+[[package]]
+name = "libloading"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f"
+dependencies = [
+ "cfg-if",
+ "winapi",
+]
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "969488b55f8ac402214f3f5fd243ebb7206cf82de60d3172994707a4bcc2b829"
+
+[[package]]
+name = "log"
+version = "0.4.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
+
+[[package]]
+name = "memchr"
+version = "2.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
+
+[[package]]
+name = "peeking_take_while"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
+
+[[package]]
+name = "prettyplease"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae005bd773ab59b4725093fd7df83fd7892f7d8eafb48dbd7de6e024e4215f9d"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.70"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "regex"
+version = "1.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "rustix"
+version = "0.38.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc99bc2d4f1fed22595588a013687477aedf3cdcfb26558c559edb67b4d9b22e"
+dependencies = [
+ "bitflags",
+ "errno",
+ "libc",
+ "linux-raw-sys",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "shlex"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7cee0529a6d40f580e7a5e6c495c8fbfe21b7b52795ed4bb5e62cdf92bc6380"
+
+[[package]]
+name = "syn"
+version = "2.0.39"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23e78b90f2fcf45d3e842032ce32e3f2d1545ba6636271dcbf24fa306d87be7a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
+
+[[package]]
+name = "which"
+version = "4.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
+dependencies = [
+ "either",
+ "home",
+ "once_cell",
+ "rustix",
+]
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
+[[package]]
+name = "windows-sys"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+dependencies = [
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.0",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.0",
+ "windows_aarch64_msvc 0.52.0",
+ "windows_i686_gnu 0.52.0",
+ "windows_i686_msvc 0.52.0",
+ "windows_x86_64_gnu 0.52.0",
+ "windows_x86_64_gnullvm 0.52.0",
+ "windows_x86_64_msvc 0.52.0",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
diff --git a/rust/ares_pma/Cargo.toml b/rust/ares_pma/Cargo.toml
new file mode 100644
index 0000000..94612e4
--- /dev/null
+++ b/rust/ares_pma/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "ares_pma"
+version = "0.1.0"
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+
+[build-dependencies]
+bindgen = "0.69.1"
+cc = "1.0"
+
+[features]
+debug_prints = []
diff --git a/rust/ares_pma/build.rs b/rust/ares_pma/build.rs
new file mode 100644
index 0000000..22ec4be
--- /dev/null
+++ b/rust/ares_pma/build.rs
@@ -0,0 +1,83 @@
+extern crate bindgen;
+
+use std::env;
+use std::path::PathBuf;
+
+fn main() {
+    let opt_level = env::var("OPT_LEVEL").unwrap();
+    let define_debug = if env::var("CARGO_FEATURE_DEBUG_PRINTS").is_ok() {
+        "-DDEBUG"
+    } else {
+        "-UDEBUG" 
+    };
+
+    // This is the directory where the `c` library is located.
+    let libdir_path = PathBuf::from("c-src")
+        // Canonicalize the path as `rustc-link-search` requires an absolute
+        // path.
+        .canonicalize()
+        .expect("cannot canonicalize path");
+    let libdir_path_str = libdir_path.to_str().expect("Path is not a valid string");
+
+    // This is the path to the `c` headers file.
+    let headers_path = libdir_path.join("wrapper.h");
+    let headers_path_str = headers_path.to_str().expect("Path is not a valid string");
+
+    println!("cargo:rerun-if-changed={}", libdir_path_str);
+
+    let res = cc::Build::new()
+        .file(
+            libdir_path
+                .join("btree.c")
+                .to_str()
+                .expect("Path is not a valid string"),
+        )
+        .file(
+            libdir_path
+                .join("lib")
+                .join("checksum.c")
+                .to_str()
+                .expect("Path is not a valid string"),
+        )
+        .flag(format!("-O{}", opt_level).as_ref())
+        .flag(define_debug) 
+        .flag("-g3")
+        .flag("-Wall")
+        .flag("-Wextra")
+        .flag("-Wpedantic")
+        .flag("-Wformat=2")
+        .flag("-Wno-unused-parameter")
+        .flag("-Wshadow")
+        .flag("-Wwrite-strings")
+        .flag("-Wstrict-prototypes")
+        .flag("-Wold-style-definition")
+        .flag("-Wredundant-decls")
+        .flag("-Wnested-externs")
+        .flag("-Wmissing-include-dirs")
+        .try_compile("btree");
+
+    if let Err(err) = res {
+        panic!("{}", err);
+    }
+
+    // The bindgen::Builder is the main entry point
+    // to bindgen, and lets you build up options for
+    // the resulting bindings.
+    let bindings = bindgen::Builder::default()
+        // The input header we would like to generate
+        // bindings for.
+        .header(headers_path_str)
+        // Tell cargo to invalidate the built crate whenever any of the
+        // included header files changed.
+        .parse_callbacks(Box::new(bindgen::CargoCallbacks::new()))
+        // Finish the builder and generate the bindings.
+        .generate()
+        // Unwrap the Result and panic on failure.
+        .expect("Unable to generate bindings");
+
+    // Write the bindings to the $OUT_DIR/bindings.rs file.
+    let out_path = PathBuf::from(env::var("OUT_DIR").unwrap()).join("bindings.rs");
+    bindings
+        .write_to_file(out_path)
+        .expect("Couldn't write bindings!");
+}
diff --git a/rust/ares_pma/c-src/btest.c b/rust/ares_pma/c-src/btest.c
new file mode 100644
index 0000000..0191df2
--- /dev/null
+++ b/rust/ares_pma/c-src/btest.c
@@ -0,0 +1,298 @@
+#include "btree.h"
+#include "btree.c"
+
+#include <stdlib.h>
+#include <stdio.h>
+
+static void
+_test_nodeinteg(BT_state *state, BT_findpath *path,
+                vaof_t lo, vaof_t hi, pgno_t pg)
+{
+  size_t childidx = 0;
+  BT_page *parent = 0;
+
+  assert(SUCC(_bt_find(state, path, lo, hi)));
+  parent = path->path[path->depth];
+  childidx = path->idx[path->depth];
+  assert(parent->datk[childidx].fo == pg);
+  assert(parent->datk[childidx].va == lo);
+  assert(parent->datk[childidx+1].va == hi);
+}
+
+static size_t
+_mlist_sizep(BT_mlistnode *head)
+/* calculate the size of the mlist in pages */
+{
+  size_t sz = 0;
+  while (head) {
+    size_t sz_p = addr2off(head->hi) - addr2off(head->lo);
+    sz += sz_p;
+    head = head->next;
+  }
+  return sz;
+}
+
+static size_t
+_flist_sizep(BT_flistnode *head)
+/* calculate the size of the flist in pages */
+{
+  size_t sz = 0;
+  while (head) {
+    size_t sz_p = head->hi - head->lo;
+    sz += sz_p;
+    head = head->next;
+  }
+  return sz;
+}
+
+static BT_mlistnode *
+_mlist_copy(BT_state *state)
+{
+  BT_mlistnode *head = state->mlist;
+  BT_mlistnode *ret, *prev;
+  ret = prev = calloc(1, sizeof *ret);
+  memcpy(ret, head, sizeof *head);
+  ret->next = 0;
+  head = head->next;
+  while (head) {
+    BT_mlistnode *copy = calloc(1, sizeof *copy);
+    memcpy(copy, head, sizeof *head);
+    prev->next = copy;
+    prev = copy;
+    head = head->next;
+  }
+  return ret;
+}
+
+static BT_nlistnode *
+_nlist_copy(BT_state *state)
+{
+  BT_nlistnode *head = state->nlist;
+  BT_nlistnode *ret, *prev;
+  ret = prev = calloc(1, sizeof *ret);
+  memcpy(ret, head, sizeof *head);
+  ret->next = 0;
+  head = head->next;
+  while (head) {
+    BT_nlistnode *copy = calloc(1, sizeof *copy);
+    memcpy(copy, head, sizeof *head);
+    prev->next = copy;
+    prev = copy;
+    head = head->next;
+  }
+  return ret;
+}
+
+static BT_flistnode *
+_flist_copy(BT_state *state)
+{
+  BT_flistnode *head = state->flist;
+  BT_flistnode *ret, *prev;
+  ret = prev = calloc(1, sizeof *ret);
+  memcpy(ret, head, sizeof *head);
+  ret->next = 0;
+  head = head->next;
+  while (head) {
+    BT_flistnode *copy = calloc(1, sizeof *copy);
+    memcpy(copy, head, sizeof *head);
+    prev->next = copy;
+    prev = copy;
+    head = head->next;
+  }
+  return ret;
+}
+
+static int
+_mlist_eq(BT_mlistnode *l, BT_mlistnode *r)
+{
+  while (l && r) {
+    if (l->lo != r->lo)
+      bp(0);
+    if (l->hi != r->hi)
+      bp(0);
+    l = l->next; r = r->next;
+  }
+  if (l == 0 && r == 0)
+    return 1;
+  bp(0);
+}
+
+static int
+_nlist_eq(BT_nlistnode *l, BT_nlistnode *r)
+{
+  while (l && r) {
+    if (l->lo != r->lo)
+      bp(0);
+    if (l->hi != r->hi)
+      bp(0);
+    l = l->next; r = r->next;
+  }
+  if (l == 0 && r == 0)
+    return 1;
+  bp(0);
+}
+
+static int
+_flist_eq(BT_flistnode *l, BT_flistnode *r)
+{
+  while (l && r) {
+    if (l->lo != r->lo)
+      bp(0);
+    if (l->hi != r->hi)
+      bp(0);
+    l = l->next; r = r->next;
+  }
+  if (l == 0 && r == 0)
+    return 1;
+  bp(0);
+}
+
+int main(int argc, char *argv[])
+{
+  DPRINTF("PMA Max Storage: %lld", ((uint64_t)UINT32_MAX * BT_PAGESIZE) - BLK_BASE_LEN_TOTAL);
+  DPUTS("PMA Tests");
+
+  BT_state *state1;
+  BT_findpath path = {0};
+  int rc = 0;
+
+
+  DPUTS("== test 1: insert");
+
+  bt_state_new(&state1);
+  if (mkdir("./pmatest1", 0774) == -1)
+    return errno;
+  assert(SUCC(bt_state_open(state1, "./pmatest1", 0, 0644)));
+
+#define LOWEST_ADDR 0x2aaa80;
+  vaof_t lo = LOWEST_ADDR;
+  vaof_t hi = 0xDEADBEEF;
+  pgno_t pg = 1;                /* dummy value */
+  for (size_t i = 0; i < BT_DAT_MAXKEYS * 4; ++i) {
+    _bt_insert(state1, lo, hi, pg);
+    _test_nodeinteg(state1, &path, lo, hi, pg);
+    lo++; pg++;
+  }
+
+  bt_state_close(state1);
+
+
+  DPUTS("== test 2: malloc");
+  BT_state *state2;
+
+  bt_state_new(&state2);
+  if (mkdir("./pmatest2", 0774) == -1)
+    return errno;
+  assert(SUCC(bt_state_open(state2, "./pmatest2", 0, 0644)));
+
+  void *t2a = bt_malloc(state2, 10);
+  bt_free(state2, t2a, (BT_page*)t2a + 10);
+  void *t2b = bt_malloc(state2, 10);
+  /* should have pulled the same pointer due to eager mlist coalescing */
+  assert(t2a == t2b);
+  ZERO(&path, sizeof path);
+  _bt_find(state2, &path, addr2off(t2b), addr2off((BT_page *)t2b + 10));
+#define T2P1_PRNT0 (path.path[path.depth])
+#define T2P1_CIDX0 (path.idx[path.depth])
+#define T2P1_CIDX1 (path.idx[path.depth] + 1)
+  /* check length as represented in btree */
+  assert(T2P1_PRNT0->datk[T2P1_CIDX1].va
+         - T2P1_PRNT0->datk[T2P1_CIDX0].va
+         == 10);
+  bt_free(state2, t2b, (BT_page*)t2b + 10);
+  ZERO(&path, sizeof path);
+  _bt_find(state2, &path, addr2off(t2b), addr2off((BT_page *)t2b + 10));
+  /* fo should be zero (free) */
+  assert(path.path[path.depth]->datk[path.idx[path.depth]].fo == 0);
+  /* should invoke deletion coalescing - 10 page free range in btree */
+  void *t2c = bt_malloc(state2, 20);
+
+  bt_state_close(state2);
+
+
+  DPUTS("== test 3: ephemeral structure restoration");
+  BT_state *state3;
+
+  bt_state_new(&state3);
+  if (mkdir("./pmatest3", 0774) == -1)
+    return errno;
+  assert(SUCC(bt_state_open(state3, "./pmatest3", 0, 0644)));
+
+  typedef struct lohi_pair lohi_pair;
+  struct lohi_pair
+  {
+    BT_page *lo;
+    BT_page *hi;
+  };
+
+#define ITERATIONS 1000
+#define MAXALLOCPG 0xFF
+  lohi_pair allocs[ITERATIONS] = {0};
+  size_t alloc_sizp = 0;
+  size_t flist_sizp = _flist_sizep(state3->flist);
+  size_t mlist_sizp = _mlist_sizep(state3->mlist);
+  BT_meta *meta = state3->meta_pages[state3->which];
+  BT_page *root = _node_get(state3, meta->root);
+  size_t N;
+  for (size_t i = 0; i < ITERATIONS; i++) {
+    /* malloc a random number of pages <= 256 and store in the allocs array */
+    int pages = random();
+    pages &= MAXALLOCPG;
+    pages += 1;
+    allocs[i].lo = bt_malloc(state3, pages);
+    allocs[i].hi = allocs[i].lo + pages;
+    alloc_sizp += pages;
+    /* validate size changes to mlist and flist */
+    assert(_flist_sizep(state3->flist)
+           == (flist_sizp - alloc_sizp));
+    assert(_mlist_sizep(state3->mlist)
+           == (mlist_sizp - alloc_sizp));
+    N = _bt_numkeys(root);
+    assert(root->datk[N-2].fo == 0);
+  }
+
+  /* sync the state */
+  /* bt_sync(state3); */
+
+  /* TODO: close and reopen state. validate ephemeral structures */
+
+  flist_sizp = _flist_sizep(state3->flist);
+  mlist_sizp = _mlist_sizep(state3->mlist);
+  alloc_sizp = 0;
+  /* for (size_t i = 0; i < ITERATIONS / 2; i++) { */
+  /*   /\* free half of the allocations *\/ */
+  /*   bt_free(state3, allocs[i].lo, allocs[i].hi); */
+  /*   alloc_sizp += allocs[i].hi - allocs[i].lo; */
+  /*   /\* validate size changes to mlist *\/ */
+  /*   assert(_mlist_sizep(state3->mlist) */
+  /*          == (mlist_sizp + alloc_sizp)); */
+  /* } */
+
+  /* copy ephemeral structures */
+  BT_mlistnode *mlist_copy = _mlist_copy(state3);
+  BT_nlistnode *nlist_copy = _nlist_copy(state3);
+  BT_flistnode *flist_copy = _flist_copy(state3);
+  assert(_mlist_eq(mlist_copy, state3->mlist));
+  assert(_nlist_eq(nlist_copy, state3->nlist));
+  assert(_flist_eq(flist_copy, state3->flist));
+
+  meta = state3->meta_pages[state3->which];
+  BT_meta metacopy = {0};
+  memcpy(&metacopy, meta, sizeof metacopy);
+  
+  bt_state_close(state3);
+
+  bt_state_new(&state3);
+
+  assert(SUCC(bt_state_open(state3, "./pmatest3", 0, 0644)));
+
+  /* compare for equality copies of ephemeral structures with restored ephemeral
+     structures */
+  meta = state3->meta_pages[state3->which];
+  assert(meta->root == metacopy.root);
+  assert(_mlist_eq(mlist_copy, state3->mlist));
+  assert(_nlist_eq(nlist_copy, state3->nlist));
+  assert(_flist_eq(flist_copy, state3->flist));
+
+  return 0;
+}
diff --git a/rust/ares_pma/c-src/btree.c b/rust/ares_pma/c-src/btree.c
new file mode 100644
index 0000000..e5c9b0e
--- /dev/null
+++ b/rust/ares_pma/c-src/btree.c
@@ -0,0 +1,3199 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+#include <stdint.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include "btree.h"
+#include "lib/checksum.h"
+
+typedef uint32_t pgno_t;        /* a page number */
+typedef uint32_t vaof_t;        /* a virtual address offset */
+typedef uint32_t flag_t;
+typedef unsigned char BYTE;
+
+//// ===========================================================================
+////                              tmp tmp tmp tmp tmp
+/* ;;: remove -- for debugging */
+/*
+  bp(X) where X is false will raise a SIGTRAP. If the process is being run
+  inside a debugger, this can be caught and ignored. It's equivalent to a
+  breakpoint. If run without a debugger, it will dump core, like an assert
+*/
+#ifdef DEBUG
+#if defined(__i386__) || defined(__x86_64__)
+#define bp(x) do { if(!(x)) __asm__ volatile("int $3"); } while (0)
+#elif defined(__thumb__)
+#define bp(x) do { if(!(x)) __asm__ volatile(".inst 0xde01"); } while (0)
+#elif defined(__aarch64__)
+#define bp(x) do { if(!(x)) __asm__ volatile(".inst 0xd4200000"); } while (0)
+#elif defined(__arm__)
+#define bp(x) do { if(!(x)) __asm__ volatile(".inst 0xe7f001f0"); } while (0)
+#else
+STATIC_ASSERT(0, "debugger break instruction unimplemented");
+#endif
+#else
+#define bp(x) ((void)(0))
+#endif
+
+/* coalescing of memory freelist currently prohibited since we haven't
+   implemented coalescing of btree nodes (necessary) */
+#define CAN_COALESCE 0
+/* ;;: remove once confident in logic and delete all code dependencies on
+     state->node_freelist */
+
+/* prints a node before and after a call to _bt_insertdat */
+#define DEBUG_PRINTNODE 0
+
+#define ZERO(s, n) memset((s), 0, (n))
+
+#define S7(A, B, C, D, E, F, G) A##B##C##D##E##F##G
+#define S6(A, B, C, D, E, F, ...) S7(A, B, C, D, E, F, __VA_ARGS__)
+#define S5(A, B, C, D, E, ...) S6(A, B, C, D, E, __VA_ARGS__)
+#define S4(A, B, C, D, ...) S5(A, B, C, D, __VA_ARGS__)
+#define S3(A, B, C, ...) S4(A, B, C, __VA_ARGS__)
+#define S2(A, B, ...) S3(A, B, __VA_ARGS__)
+#define S(A, ...) S2(A, __VA_ARGS__)
+
+#define KBYTES(x) ((size_t)(x) << 10)
+#define MBYTES(x) ((size_t)(x) << 20)
+#define GBYTES(x) ((size_t)(x) << 30)
+#define TBYTES(x) ((size_t)(x) << 40)
+#define PBYTES(x) ((size_t)(x) << 50)
+
+/* 4K page in bytes */
+#define P2BYTES(x) ((size_t)(x) << BT_PAGEBITS)
+/* the opposite of P2BYTES */
+#define B2PAGES(x) ((size_t)(x) >> BT_PAGEBITS)
+
+
+#define __packed        __attribute__((__packed__))
+#define UNUSED(x) ((void)(x))
+
+#ifdef DEBUG
+# define DPRINTF(fmt, ...)                                              \
+        fprintf(stderr, "%s:%d " fmt "\n", __func__, __LINE__, __VA_ARGS__)
+#else
+# define DPRINTF(fmt, ...)	((void) 0)
+#endif
+#define DPUTS(arg)	DPRINTF("%s", arg)
+#define TRACE(...) DPUTS("")
+
+#define BT_SUCC 0
+#define SUCC(x) ((x) == BT_SUCC)
+
+/* given a pointer p returns the low page-aligned addr */
+#define LO_ALIGN_PAGE(p) ((BT_page *)(((uintptr_t)p) & ~(BT_PAGESIZE - 1)))
+
+
+#define BT_MAPADDR  ((BYTE *) S(0x1000,0000,0000))
+
+static inline vaof_t
+addr2off(void *p)
+/* convert a pointer into a 32-bit page offset */
+{
+  uintptr_t pu = (uintptr_t)p;
+  assert(pu >= (uintptr_t)BT_MAPADDR);
+  pu -= (uintptr_t)BT_MAPADDR;
+  assert((pu & ((1 << BT_PAGEBITS) - 1)) == 0); /* p must be page-aligned */
+  return (vaof_t)(pu >> BT_PAGEBITS);
+}
+
+static inline void *
+off2addr(vaof_t off)
+/* convert a 32-bit page offset into a pointer */
+{
+  uintptr_t pu = (uintptr_t)off << BT_PAGEBITS;
+  pu += (uintptr_t)BT_MAPADDR;
+  return (void *)pu;
+}
+
+#define BT_PAGEWORD 32ULL
+#define BT_NUMMETAS 2                     /* 2 metapages */
+#define BT_META_SECTION_WIDTH (BT_NUMMETAS * BT_PAGESIZE)
+#define BT_ADDRSIZE (BT_PAGESIZE << BT_PAGEWORD)
+#define PMA_GROW_SIZE (BT_PAGESIZE * 1024 * 64)
+
+#define BT_NOPAGE 0
+
+#define BT_PROT_CLEAN (PROT_READ)
+#define BT_FLAG_CLEAN (MAP_FIXED | MAP_SHARED)
+#define BT_PROT_FREE  (PROT_NONE)
+#define BT_FLAG_FREE  (MAP_ANONYMOUS | MAP_SHARED | MAP_FIXED | MAP_NORESERVE)
+#define BT_PROT_DIRTY (PROT_READ | PROT_WRITE)
+#define BT_FLAG_DIRTY (MAP_FIXED | MAP_SHARED)
+
+/*
+  FO2BY: file offset to byte
+  get byte INDEX into pma map from file offset
+*/
+#define FO2BY(fo)                               \
+  ((uint64_t)(fo) << BT_PAGEBITS)
+
+/*
+  BY2FO: byte to file offset
+  get pgno from byte INDEX into pma map
+*/
+#define BY2FO(p)                                \
+  ((pgno_t)((p) >> BT_PAGEBITS))
+
+/*
+  FO2PA: file offset to page
+  get a reference to a BT_page from a file offset
+
+  ;;: can simplify:
+
+  ((BT_page*)state->map)[fo]
+*/
+#define FO2PA(map, fo)                          \
+  ((BT_page *)&(map)[FO2BY(fo)])
+
+/* NMEMB: number of members in array, a */
+#define NMEMB(a)                                \
+  (sizeof(a) / sizeof(a[0]))
+
+#define offsetof(st, m) \
+    __builtin_offsetof(st, m)
+
+
+//// ===========================================================================
+////                                  btree types
+
+/*
+  btree page header. all pages share this header. Though for metapages, you can
+  expect it to be zeroed out.
+*/
+typedef struct BT_pageheader BT_pageheader;
+struct BT_pageheader {
+  uint8_t  dirty[256];          /* dirty bit map */
+} __packed;
+
+/*
+  btree key/value data format
+
+  BT_dat is used to provide a view of the data section in a BT_page where data is
+  stored like:
+        va  fo  va  fo
+  bytes 0   4   8   12
+
+  The convenience macros given an index into the data array do the following:
+  BT_dat_lo(i) returns ith   va (low addr)
+  BT_dat_hi(i) returns i+1th va (high addr)
+  BT_dat_fo(i) returns ith file offset
+*/
+typedef union BT_dat BT_dat;
+union BT_dat {
+  vaof_t va;                    /* virtual address offset */
+  pgno_t fo;                    /* file offset */
+};
+
+/* like BT_dat but when a struct is more useful than a union */
+typedef struct BT_kv BT_kv;
+struct BT_kv {
+  vaof_t va;
+  pgno_t fo;
+};
+
+/* ;;: todo, perhaps rather than an index, return the data directly and typecast?? */
+#define BT_dat_lo(i) ((i) * 2)
+#define BT_dat_fo(i) ((i) * 2 + 1)
+#define BT_dat_hi(i) ((i) * 2 + 2)
+
+#define BT_dat_lo2(I, dat)
+#define BT_dat_fo2(I, dat)
+#define BT_dat_hi2(I, dat)
+
+/* BT_dat_maxva: pointer to highest va in page data section */
+#define BT_dat_maxva(p)                         \
+  ((void *)&(p)->datd[BT_dat_lo(BT_DAT_MAXKEYS)])
+
+/* BT_dat_maxfo: pointer to highest fo in page data section */
+#define BT_dat_maxfo(p)                         \
+  ((void *)&(p)->datd[BT_dat_fo(BT_DAT_MAXVALS)])
+
+#define BT_DAT_MAXBYTES (BT_PAGESIZE - sizeof(BT_pageheader))
+#define BT_DAT_MAXENTRIES  (BT_DAT_MAXBYTES / sizeof(BT_dat))
+#define BT_DAT_MAXKEYS (BT_DAT_MAXENTRIES / 2)
+/* #define BT_DAT_MAXKEYS 10 */
+#define BT_DAT_MAXVALS BT_DAT_MAXKEYS
+static_assert(BT_DAT_MAXENTRIES % 2 == 0);
+/* we assume off_t is 64 bit */
+static_assert(sizeof(off_t) == sizeof(uint64_t));
+
+/*
+   all pages in the memory arena consist of a header and data section
+*/
+typedef struct BT_page BT_page;
+struct BT_page {
+  BT_pageheader head;                    /* header */
+  union {                                /* data section */
+    BT_dat      datd[BT_DAT_MAXENTRIES]; /* union view */
+    BT_kv       datk[BT_DAT_MAXKEYS];    /* struct view */
+    BYTE        datc[BT_DAT_MAXBYTES];   /* byte-level view */
+  };
+};
+static_assert(sizeof(BT_page) == BT_PAGESIZE);
+static_assert(BT_DAT_MAXBYTES % sizeof(BT_dat) == 0);
+
+#define BT_MAGIC   0xBADDBABE
+#define BT_VERSION 1
+/*
+   a meta page is like any other page, but the data section is used to store
+   additional information
+*/
+#define BLK_BASE_LEN0 (MBYTES(2) - BT_META_SECTION_WIDTH)
+#define BLK_BASE_LEN1 (MBYTES(8))
+#define BLK_BASE_LEN2 (BLK_BASE_LEN1 * 4)
+#define BLK_BASE_LEN3 (BLK_BASE_LEN2 * 4)
+#define BLK_BASE_LEN4 (BLK_BASE_LEN3 * 4)
+#define BLK_BASE_LEN5 (BLK_BASE_LEN4 * 4)
+#define BLK_BASE_LEN6 (BLK_BASE_LEN5 * 4)
+#define BLK_BASE_LEN7 (BLK_BASE_LEN6 * 4)
+#define BLK_BASE_LEN_TOTAL (                            \
+                             BT_META_SECTION_WIDTH +    \
+                             BLK_BASE_LEN0 +            \
+                             BLK_BASE_LEN1 +            \
+                             BLK_BASE_LEN2 +            \
+                             BLK_BASE_LEN3 +            \
+                             BLK_BASE_LEN4 +            \
+                             BLK_BASE_LEN5 +            \
+                             BLK_BASE_LEN6 +            \
+                             BLK_BASE_LEN7)
+typedef struct BT_meta BT_meta;
+struct BT_meta {
+#define BT_NUMROOTS 32
+  uint32_t  magic;
+  uint32_t  version;
+  pgno_t    last_pg;            /* last page used in file */
+  uint32_t  _pad0;
+  uint64_t  txnid;
+  void     *fix_addr;           /* fixed addr of btree */
+  pgno_t   blk_base[8];         /* block base array for striped node partition */
+  /* ;;: for the blk_base array, code may be simpler if this were an array of
+       BT_page *. */
+  uint8_t  blk_cnt;             /* currently highest valid block base */
+  uint8_t  depth;               /* tree depth */
+#define BP_META  ((uint8_t)0x02)
+  uint8_t  flags;
+  uint8_t  _pad1;
+  pgno_t   root;
+  /* 64bit alignment manually checked - 72 bytes total above */
+  uint64_t roots[BT_NUMROOTS];  /* for usage by ares */
+  uint32_t chk;                 /* checksum */
+} __packed;
+static_assert(sizeof(BT_meta) <= BT_DAT_MAXBYTES);
+
+/* the length of the metapage up to but excluding the checksum */
+#define BT_META_LEN (offsetof(BT_meta, chk))
+
+#define BT_roots_bytelen (sizeof(BT_meta) - offsetof(BT_meta, roots))
+
+typedef struct BT_mlistnode BT_mlistnode;
+struct BT_mlistnode {
+  /* ;;: lo and hi might as well by (BT_page *) because we don't have any reason
+       to have finer granularity */
+  BYTE *lo;                     /* low virtual address */
+  BYTE *hi;                     /* high virtual address */
+  BT_mlistnode *next;           /* next freelist node */
+};
+
+typedef struct BT_nlistnode BT_nlistnode;
+struct BT_nlistnode {
+  BT_page *lo;                  /* low virtual address */
+  BT_page *hi;                  /* high virtual address */
+  BT_nlistnode *next;           /* next freelist node */
+};
+
+typedef struct BT_flistnode BT_flistnode;
+struct BT_flistnode {
+  pgno_t lo;                    /* low pgno in persistent file */
+  pgno_t hi;                    /* high pgno in persistent file */
+  BT_flistnode *next;           /* next freelist node */
+};
+
+/* macro to access the metadata stored in a page's data section */
+#define METADATA(p) ((BT_meta *)(void *)(p)->datc)
+
+typedef struct BT_state BT_state;
+struct BT_state {
+  int           data_fd;
+  char         *path;
+  void         *fixaddr;
+  BYTE         *map;
+  BT_meta      *meta_pages[2];  /* double buffered */
+  /* ;;: note, while meta_pages[which]->root stores a pgno, we may want to just
+       store a pointer to root in state in addition to avoid a _node_find on it
+       every time it's referenced */
+  /* BT_page      *root; */
+  off_t         file_size;      /* the size of the pma file in bytes */
+  pgno_t        frontier;       /* last non-free page in use by pma (exclusive) */
+  unsigned int  which;          /* which double-buffered db are we using? */
+  BT_nlistnode *nlist;          /* node freelist */
+  BT_mlistnode *mlist;          /* memory freelist */
+  BT_flistnode *flist;          /* pma file freelist */
+  BT_flistnode *pending_flist;
+  BT_nlistnode *pending_nlist;
+};
+
+/*
+  ;;: wrt to frontier: if you need to allocate space for data, push the frontier
+     out by that amount allocated. If you're allocating a new stripe, push it to
+     the end of that stripe.
+*/
+
+
+//// ===========================================================================
+////                            btree internal routines
+
+static void _bt_printnode(BT_page *node) __attribute__((unused)); /* ;;: tmp */
+static int
+_bt_insertdat(vaof_t lo, vaof_t hi, pgno_t fo,
+              BT_page *parent, size_t childidx); /* ;;: tmp */
+
+static int _bt_flip_meta(BT_state *);
+
+
+#define BT_MAXDEPTH 4           /* ;;: todo derive it */
+typedef struct BT_findpath BT_findpath;
+struct BT_findpath {
+  BT_page *path[BT_MAXDEPTH];
+  size_t idx[BT_MAXDEPTH];
+  uint8_t depth;
+};
+
+/* _node_get: get a pointer to a node stored at file offset pgno */
+static BT_page *
+_node_get(BT_state *state, pgno_t pgno)
+{
+  /* TODO: eventually, once we can store more than 2M of nodes, this will need
+     to reference the meta page's blk_base array to determine where a node is
+     mapped. i.e:
+
+  - receive pgno
+  - find first pgno in blk_base that exceeds pgno : i
+  - sector that contains node is i-1
+  - appropriately offset into i-1th fixed size partition: 2M, 8M, 16M, ...
+
+  */
+
+  /* for now, this works because the 2M sector is at the beginning of both the
+     memory arena and pma file
+  */
+  if (pgno <= 1) return 0;      /* no nodes stored at 0 and 1 (metapages) */
+  /* TODO: when partition striping is implemented, a call beyond the furthest
+     block base should result in the allocation of a new block base */
+  assert((pgno * BT_PAGESIZE) < MBYTES(2));
+  return FO2PA(state->map, pgno);
+}
+
+/* ;;: I don't think we should need this if _bt_nalloc also returns a disc offset */
+static pgno_t
+_fo_get(BT_state *state, BT_page *node)
+{
+  uintptr_t vaddr = (uintptr_t)node;
+  uintptr_t start = (uintptr_t)state->map;
+  return BY2FO(vaddr - start);
+}
+
+static void
+_mlist_record_alloc(BT_state *state, void *lo, void *hi)
+{
+  BT_mlistnode **head = &state->mlist;
+  BYTE *lob = lo;
+  BYTE *hib = hi;
+  while (*head) {
+    /* found chunk */
+    if ((*head)->lo <= lob && (*head)->hi >= hib)
+      break;
+    assert((*head)->next);
+    head = &(*head)->next;
+  }
+
+  if (hib < (*head)->hi) {
+    if (lob > (*head)->lo) {
+      BT_mlistnode *left = *head;
+      BT_mlistnode *right = calloc(1, sizeof *right);
+      right->hi = left->hi;
+      right->lo = hib;
+      right->next = left->next;
+      left->hi = lob;
+      left->next = right;
+    }
+    else {
+      /* lob equal */
+      (*head)->lo = hib;
+    }
+  }
+  else if (lob > (*head)->lo) {
+    /* hib equal */
+    (*head)->hi = lob;
+  }
+  else {
+    /* equals */
+    BT_mlistnode *next = (*head)->next;
+    free(*head);
+    *head = next;
+  }
+}
+
+static void
+_nlist_record_alloc(BT_state *state, BT_page *lo)
+{
+  BT_nlistnode **head = &state->nlist;
+  BT_page *hi = lo + 1;
+  while (*head) {
+    /* found chunk */
+    if ((*head)->lo <= lo && (*head)->hi >= hi)
+      break;
+    assert((*head)->next);
+    head = &(*head)->next;
+  }
+
+  if (hi < (*head)->hi) {
+    if (lo > (*head)->lo) {
+      BT_nlistnode *left = *head;
+      BT_nlistnode *right = calloc(1, sizeof *right);
+      right->hi = left->hi;
+      right->lo = hi;
+      right->next = left->next;
+      left->hi = lo;
+      left->next = right;
+    }
+    else {
+      /* lo equal */
+      (*head)->lo = hi;
+    }
+  }
+  else if (lo > (*head)->lo) {
+    /* hi equal */
+    (*head)->hi = lo;
+  }
+  else {
+    /* equals */
+    BT_nlistnode *next = (*head)->next;
+    free(*head);
+    *head = next;
+  }
+}
+
+static void
+_flist_record_alloc(BT_state *state, pgno_t lo, pgno_t hi)
+{
+  BT_flistnode **head = &state->flist;
+  while (*head) {
+    /* found chunk */
+    if ((*head)->lo <= lo && (*head)->hi >= hi)
+      break;
+    assert((*head)->next);
+    head = &(*head)->next;
+  }
+
+  if (hi < (*head)->hi) {
+    if (lo > (*head)->lo) {
+      BT_flistnode *left = *head;
+      BT_flistnode *right = calloc(1, sizeof *right);
+      right->hi = left->hi;
+      right->lo = hi;
+      right->next = left->next;
+      left->hi = lo;
+      left->next = right;
+    }
+    else {
+      /* lo equal */
+      (*head)->lo = hi;
+    }
+  }
+  else if (lo > (*head)->lo) {
+    /* hi equal */
+    (*head)->hi = lo;
+  }
+  else {
+    /* equals */
+    BT_flistnode *next = (*head)->next;
+    free(*head);
+    *head = next;
+  }
+}
+
+static BT_page *
+_bt_nalloc(BT_state *state)
+/* allocate a node in the node freelist */
+{
+  /* TODO: maybe change _bt_nalloc to return both a file and a node offset as
+     params to the function and make actual return value an error code. This is
+     to avoid forcing some callers to immediately use _fo_get */
+  BT_nlistnode **n = &state->nlist;
+  BT_page *ret = 0;
+
+  for (; *n; n = &(*n)->next) {
+    size_t sz_p = (*n)->hi - (*n)->lo;
+
+    /* ;;: refactor? this is ridiculous */
+    if (sz_p >= 1) {
+      ret = (*n)->lo;
+      _nlist_record_alloc(state, ret);
+      break;
+    }
+  }
+
+  if (ret == 0) {
+    DPUTS("nlist out of mem!");
+    return 0;
+  }
+
+  /* make node writable */
+  if (mprotect(ret, sizeof(BT_page), BT_PROT_DIRTY) != 0) {
+    DPRINTF("mprotect of node: %p failed with %s", ret, strerror(errno));
+    abort();
+  }
+
+  return ret;
+}
+
+static int
+_node_cow(BT_state *state, BT_page *node, pgno_t *pgno)
+{
+  BT_page *ret = _bt_nalloc(state); /* ;;: todo: assert node has no dirty entries */
+  memcpy(ret->datk, node->datk, sizeof node->datk[0] * BT_DAT_MAXKEYS);
+  *pgno = _fo_get(state, ret);
+  return BT_SUCC;
+}
+
+static void *
+_bt_bsearch(BT_page *page, vaof_t va) __attribute((unused));
+
+/* binary search a page's data section for a va. Returns a pointer to the found BT_dat */
+static void *
+_bt_bsearch(BT_page *page, vaof_t va)
+{
+  /* ;;: todo: actually bsearch rather than linear */
+  for (BT_kv *kv = &page->datk[0]; kv <= (BT_kv *)BT_dat_maxva(page); kv++) {
+    if (kv->va == va)
+      return kv;
+  }
+
+  return 0;
+}
+
+static size_t
+_bt_childidx(BT_page *node, vaof_t lo, vaof_t hi)
+/* looks up the child index in a parent node. If not found, return is
+   BT_DAT_MAXKEYS */
+{
+  size_t i = 0;
+  for (; i < BT_DAT_MAXKEYS - 1; i++) {
+    vaof_t llo = node->datk[i].va;
+    vaof_t hhi = node->datk[i+1].va;
+    if (llo <= lo && hhi >= hi)
+      return i;
+  }
+  return BT_DAT_MAXKEYS;
+}
+
+/* ;;: find returns a path to nodes that things should be in if they are there. */
+/* a leaf has a meta page depth eq to findpath depth */
+static int
+_bt_find2(BT_state *state,
+          BT_page *node,
+          BT_findpath *path,
+          uint8_t maxdepth,
+          vaof_t lo,
+          vaof_t hi)
+{
+  /* ;;: meta node stores depth (node or leaf?)
+     look at root node and binsearch BT_dats where low is <= lo and high is >= hi
+     If at depth of metapage (a leaf), then done
+     otherwise grab node, increment depth, save node in path
+  */
+  if (path->depth > maxdepth)
+    return ENOENT;
+
+  assert(node != 0);
+
+  size_t i;
+  if ((i = _bt_childidx(node, lo, hi)) == BT_DAT_MAXKEYS)
+    return ENOENT;
+
+  if (path->depth == maxdepth) {
+    path->idx[path->depth] = i;
+    path->path[path->depth] = node;
+    return BT_SUCC;
+  }
+  /* then branch */
+  else {
+    pgno_t fo = node->datk[i].fo;
+    BT_page *child = _node_get(state, fo);
+    path->idx[path->depth] = i;
+    path->path[path->depth] = node;
+    path->depth++;
+    return _bt_find2(state, child, path, maxdepth, lo, hi);
+  }
+}
+
+static void
+_bt_root_new(BT_meta *meta, BT_page *root)
+{
+  /* The first usable address in the PMA is just beyond the btree segment */
+  root->datk[0].va = B2PAGES(BLK_BASE_LEN_TOTAL);
+  root->datk[0].fo = 0;
+  root->datk[1].va = UINT32_MAX;
+  root->datk[1].fo = 0;
+}
+
+static int
+_bt_find(BT_state *state, BT_findpath *path, vaof_t lo, vaof_t hi)
+{
+  path->depth = 1;
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+  uint8_t maxdepth = meta->depth;
+  return _bt_find2(state, root, path, maxdepth, lo, hi);
+}
+
+static int
+_bt_findpath_is_root(BT_findpath *path) __attribute((unused));
+
+static int
+_bt_findpath_is_root(BT_findpath *path)
+{
+  assert(path != 0);
+  return path->depth == 0;
+}
+
+/* _bt_numkeys: find next empty space in node's data section. Returned as
+   index into node->datk. If the node is full, return is BT_DAT_MAXKEYS */
+static size_t
+_bt_numkeys(BT_page *node)
+{
+  size_t i = 1;
+  for (; i < BT_DAT_MAXKEYS; i++) {
+    if (node->datk[i].va == 0) break;
+  }
+  return i;
+}
+
+static int
+_bt_datshift(BT_page *node, size_t i, size_t n)
+/* shift data segment at i over by n KVs */
+{
+  assert(i+n < BT_DAT_MAXKEYS); /* check buffer overflow */
+  size_t siz = sizeof node->datk[0];
+  size_t bytelen = (BT_DAT_MAXKEYS - i - n) * siz;
+  memmove(&node->datk[i+n], &node->datk[i], bytelen);
+  ZERO(&node->datk[i], n * siz); /* NB: not completely necessary */
+  return BT_SUCC;
+}
+
+/* _bt_split_datcopy: copy right half of left node to right node */
+static int
+_bt_split_datcopy(BT_page *left, BT_page *right)
+{
+  size_t mid = BT_DAT_MAXKEYS / 2;
+  size_t bytelen = mid * sizeof(left->datk[0]);
+  /* copy rhs of left to right */
+  memcpy(right->datk, &left->datk[mid], bytelen);
+  /* zero rhs of left */
+  ZERO(&left->datk[mid], bytelen); /* ;;: note, this would be unnecessary if we stored node.N */
+  /* the last entry in left should be the first entry in right */
+  left->datk[mid].va = right->datk[0].va;
+
+  return BT_SUCC;
+}
+
+static int
+_bt_ischilddirty(BT_page *parent, size_t child_idx)
+{
+  assert(child_idx < 2048);
+  uint8_t flag = parent->head.dirty[child_idx >> 3];
+  return flag & (1 << (child_idx & 0x7));
+}
+
+/* ;;: todo: name the 0x8 and 4 literals and/or generalize */
+static int
+_bt_dirtychild(BT_page *parent, size_t child_idx)
+{
+  assert(child_idx < 2048);
+  /* although there's nothing theoretically wrong with dirtying a dirty node,
+     there's probably a bug if we do it since a we only dirty a node when it's
+     alloced after a split or CoWed */
+  assert(!_bt_ischilddirty(parent, child_idx));
+  uint8_t *flag = &parent->head.dirty[child_idx >> 3];
+  *flag |= 1 << (child_idx & 0x7);
+  return BT_SUCC;
+}
+
+static int
+_bt_cleanchild(BT_page *parent, size_t child_idx)
+{
+  assert(_bt_ischilddirty(parent, child_idx));
+  uint8_t *flag = &parent->head.dirty[child_idx >> 3];
+  *flag ^= 1 << (child_idx & 0x7);
+  return BT_SUCC;
+}
+
+/* ;:: assert that the node is dirty when splitting */
+static int
+_bt_split_child(BT_state *state, BT_page *parent, size_t i, pgno_t *newchild)
+{
+  /* ;;: todo: better error handling */
+  assert(_bt_ischilddirty(parent, i));
+
+  int rc = BT_SUCC;
+  size_t N;
+  BT_page *left = _node_get(state, parent->datk[i].fo);
+  BT_page *right = _bt_nalloc(state);
+  if (right == 0)
+    return ENOMEM;
+  if (!SUCC(rc = _bt_split_datcopy(left, right)))
+    return rc;
+
+  /* adjust high address of left node in parent */
+  N = _bt_numkeys(left);
+
+  /* insert reference to right child into parent node */
+  N = _bt_numkeys(right);
+  vaof_t lo = right->datk[0].va;
+  vaof_t hi = right->datk[N-1].va;
+
+  _bt_insertdat(lo, hi, _fo_get(state, right), parent, i);
+
+  /* dirty right child */
+  size_t ridx = _bt_childidx(parent, lo, hi);
+  assert(ridx == i+1);          /* 0x100000020100;;: tmp? */
+  _bt_dirtychild(parent, ridx);
+
+  /* ;;: fix this */
+  *newchild = _fo_get(state, right);
+
+  return BT_SUCC;
+}
+
+static int
+_bt_rebalance(BT_state *state, BT_page *node) __attribute((unused));
+
+static int
+_bt_rebalance(BT_state *state, BT_page *node)
+{
+  return 255;
+}
+
+/* insert lo, hi, and fo in parent's data section for childidx */
+static int
+_bt_insertdat(vaof_t lo, vaof_t hi, pgno_t fo,
+              BT_page *parent, size_t childidx)
+{
+#if DEBUG_PRINTNODE
+  DPRINTF("BEFORE INSERT lo %" PRIu32 " hi %" PRIu32 " fo %" PRIu32, lo, hi, fo);
+  _bt_printnode(parent);
+#endif
+
+  /* ;;: TODO confirm this logic is appropriate for branch nodes. (It /should/
+       be correct for leaf nodes) */
+  vaof_t llo = parent->datk[childidx].va;
+  vaof_t hhi = parent->datk[childidx+1].va;
+
+  /* NB: it can be assumed that llo <= lo and hi <= hhi because this routine is
+     called using an index found with _bt_childidx */
+
+  /* duplicate */
+  if (llo == lo && hhi == hi) {
+    parent->datk[childidx].fo = fo;
+    return BT_SUCC;
+  }
+
+  if (llo == lo) {
+    _bt_datshift(parent, childidx + 1, 1);
+    vaof_t oldfo = parent->datk[childidx].fo;
+    parent->datk[childidx].fo = fo;
+    parent->datk[childidx+1].va = hi;
+    parent->datk[childidx+1].fo = (oldfo == 0)
+      ? 0
+      : oldfo + (hi - llo);
+  }
+  else if (hhi == hi) {
+    _bt_datshift(parent, childidx + 1, 1);
+    parent->datk[childidx+1].va = lo;
+    parent->datk[childidx+1].fo = fo;
+  }
+  else {
+    _bt_datshift(parent, childidx + 1, 2);
+    parent->datk[childidx+1].va = lo;
+    parent->datk[childidx+1].fo = fo;
+    parent->datk[childidx+2].va = hi;
+    pgno_t lfo = parent->datk[childidx].fo;
+    vaof_t lva = parent->datk[childidx].va;
+    parent->datk[childidx+2].fo = (lfo == 0)
+      ? 0
+      : lfo + (hi - lva);
+  }
+
+#if DEBUG_PRINTNODE
+  DPUTS("AFTER INSERT");
+  _bt_printnode(parent);
+#endif
+  return BT_SUCC;
+}
+
+
+//// ===========================================================================
+////                           wip - deletion coalescing
+
+/* ;;: todo: rename routines */
+
+int
+_bt_delco_1pass_0(BT_state *state, vaof_t lo, vaof_t hi,
+                  BT_page *node, uint8_t depth, uint8_t maxdepth)
+{
+  /* Perform a dfs search on all ranges that fall within lo and hi */
+
+  size_t N = _bt_numkeys(node);
+  size_t loidx = 0;
+  size_t hiidx = 0;
+
+  /* first find the entry that matches lo */
+  size_t i;
+  for (i = 0; i < N-1; i++) {
+    vaof_t hhi = node->datk[i+1].va;
+    if (hhi > lo) {
+      loidx = i;
+      break;
+    }
+  }
+
+  /* and then the entry that matches hi */
+  for (; i < N; i++) {
+    vaof_t hhi = node->datk[i].va;
+    if (hhi >= hi) {
+      hiidx = i;
+      break;
+    }
+  }
+
+  /* node->datk[loidx] - node->datk[hiidx] are the bounds on which to perform
+     the dfs */
+  for (i = loidx; i < hiidx; i++) {
+    pgno_t pg = node->datk[i].fo;
+
+    /* if at the leaf level, terminate with failure if pg is not free */
+    if (depth == maxdepth) {
+      if (pg != 0) return 1;
+      else continue;
+    }
+
+    /* otherwise, dfs the child node */
+    BT_page *child = _node_get(state, pg);
+    if (!SUCC(_bt_delco_1pass_0(state, lo, hi, child, depth+1, maxdepth)))
+      return 1;
+  }
+
+  /* whether we're at a leaf or a branch, by now all pages corresponding to the
+     hi-lo range must be free */
+  return BT_SUCC;
+}
+
+/* ;;: since this is called by another recursive function _bt_delco that first
+     finds if a split exists, this /could/ take a pgno to avoid unnecessarily
+     rewalking the tree. not a big deal though as is. */
+static int
+_bt_delco_1pass(BT_state *state, vaof_t lo, vaof_t hi)
+/* returns true if the leaves in the given range are all free (pgno of 0). false
+   otherwise. This must be the case for an insert into an overlapping range to
+   succeed */
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+  return _bt_delco_1pass_0(state, lo, hi, root, 1, meta->depth);
+}
+
+static void
+_mlist_insert(BT_state *state, void *lo, void *hi)
+{
+  BT_mlistnode **dst = &state->mlist;
+  BT_mlistnode **prev_dst = 0;
+  BYTE *lob = lo;
+  BYTE *hib = hi;
+
+  while(*dst) {
+    if (hib == (*dst)->lo) {
+      (*dst)->lo = lob;
+      /* check if we can coalesce with left neighbor */
+      if (prev_dst != 0) {
+        bp(0);  /* ;;: note, this case should not hit. keeping for debugging. */
+        /* dst equals &(*prev_dst)->next */
+        assert(*prev_dst != 0);
+        if ((*prev_dst)->hi == lob) {
+          (*prev_dst)->hi = (*dst)->hi;
+          (*prev_dst)->next = (*dst)->next;
+          free(*dst);
+        }
+      }
+      return;
+    }
+    if (lob == (*dst)->hi) {
+      (*dst)->hi = hi;
+      /* check if we can coalesce with right neighbor */
+      if ((*dst)->next != 0) {
+        if (hib == (*dst)->next->lo) {
+          (*dst)->hi = (*dst)->next->hi;
+          BT_mlistnode *dst_next = (*dst)->next;
+          (*dst)->next = (*dst)->next->next;
+          free(dst_next);
+        }
+      }
+      return;
+    }
+    if (hib > (*dst)->lo) {
+      assert(lob > (*dst)->hi);
+      assert(hib > (*dst)->hi);
+      prev_dst = dst;
+      dst = &(*dst)->next;
+      continue;
+    }
+
+    /* otherwise, insert discontinuous node */
+    BT_mlistnode *new = calloc(1, sizeof *new);
+    new->lo = lob;
+    new->hi = hib;
+    new->next = *dst;
+    *dst = new;
+    return;
+  }
+
+  /* found end of list */
+  BT_mlistnode *new = calloc(1, sizeof *new);
+  new->lo = lob;
+  new->hi = hib;
+  new->next = 0;
+  (*dst) = new;
+}
+
+static void
+_nlist_insert2(BT_state *state, BT_nlistnode **dst, BT_page *lo, BT_page *hi)
+{
+  BT_nlistnode **prev_dst = 0;
+
+  while(*dst) {
+    if (hi == (*dst)->lo) {
+      (*dst)->lo = lo;
+      /* check if we can coalesce with left neighbor */
+      if (prev_dst != 0) {
+        bp(0);  /* ;;: note, this case should not hit. keeping for debugging. */
+        /* dst equals &(*prev_dst)->next */
+        assert(*prev_dst != 0);
+        if ((*prev_dst)->hi == lo) {
+          (*prev_dst)->hi = (*dst)->hi;
+          (*prev_dst)->next = (*dst)->next;
+          free(*dst);
+        }
+      }
+      return;
+    }
+    if (lo == (*dst)->hi) {
+      (*dst)->hi = hi;
+      /* check if we can coalesce with right neighbor */
+      if ((*dst)->next != 0) {
+        if (hi == (*dst)->next->lo) {
+          (*dst)->hi = (*dst)->next->hi;
+          BT_nlistnode *dst_next = (*dst)->next;
+          (*dst)->next = (*dst)->next->next;
+          free(dst_next);
+        }
+      }
+      return;
+    }
+    if (hi > (*dst)->lo) {
+      assert(lo > (*dst)->hi);
+      assert(hi > (*dst)->hi);
+      prev_dst = dst;
+      dst = &(*dst)->next;
+      continue;
+    }
+
+    /* otherwise, insert discontinuous node */
+    BT_nlistnode *new = calloc(1, sizeof *new);
+    new->lo = lo;
+    new->hi = hi;
+    new->next = *dst;
+    *dst = new;
+    return;
+  }
+}
+
+static void
+_nlist_insert(BT_state *state, BT_nlistnode **dst, pgno_t nodepg)
+{
+  BT_page *lo = _node_get(state, nodepg);
+  BT_page *hi = _node_get(state, nodepg+1);
+  _nlist_insert2(state, dst, lo, hi);
+}
+
+static void
+_pending_nlist_merge(BT_state *state)
+{
+  BT_nlistnode *src_head = state->pending_nlist;
+  BT_nlistnode *prev = 0;
+  while (src_head) {
+    _nlist_insert2(state, &state->nlist, src_head->lo, src_head->hi);
+    prev = src_head;
+    src_head = src_head->next;
+    free(prev);
+  }
+}
+
+static void
+_flist_insert(BT_flistnode **dst, pgno_t lo, pgno_t hi)
+{
+  BT_flistnode **prev_dst = 0;
+
+  while(*dst) {
+    if (hi == (*dst)->lo) {
+      (*dst)->lo = lo;
+      /* check if we can coalesce with left neighbor */
+      if (prev_dst != 0) {
+        bp(0);  /* ;;: note, this case should not hit. keeping for debugging. */
+        /* dst equals &(*prev_dst)->next */
+        assert(*prev_dst != 0);
+        if ((*prev_dst)->hi == lo) {
+          (*prev_dst)->hi = (*dst)->hi;
+          (*prev_dst)->next = (*dst)->next;
+          free(*dst);
+        }
+      }
+      return;
+    }
+    if (lo == (*dst)->hi) {
+      (*dst)->hi = hi;
+      /* check if we can coalesce with right neighbor */
+      if ((*dst)->next != 0) {
+        if (hi == (*dst)->next->lo) {
+          (*dst)->hi = (*dst)->next->hi;
+          BT_flistnode *dst_next = (*dst)->next;
+          (*dst)->next = (*dst)->next->next;
+          free(dst_next);
+        }
+      }
+      return;
+    }
+    if (hi > (*dst)->lo) {
+      assert(lo > (*dst)->hi);
+      assert(hi > (*dst)->hi);
+      prev_dst = dst;
+      dst = &(*dst)->next;
+      continue;
+    }
+
+    /* otherwise, insert discontinuous node */
+    BT_flistnode *new = calloc(1, sizeof *new);
+    new->lo = lo;
+    new->hi = hi;
+    new->next = *dst;
+    *dst = new;
+    return;
+  }
+}
+
+static void
+_pending_flist_merge(BT_state *state)
+{
+  BT_flistnode *src_head = state->pending_flist;
+  BT_flistnode *prev = 0;
+  while (src_head) {
+    _flist_insert(&state->flist, src_head->lo, src_head->hi);
+    prev = src_head;
+    src_head = src_head->next;
+    free(prev);
+  }
+}
+
+
+/* ;;: todo move shit around */
+static void
+_bt_delco_droptree2(BT_state *state, pgno_t nodepg,
+                    uint8_t depth, uint8_t maxdepth, int isdirty)
+{
+  int ischilddirty = 0;
+
+  /* branch */
+  if (depth != maxdepth) {
+    BT_page *node = _node_get(state, nodepg);
+   for (size_t i = 0; i < BT_DAT_MAXKEYS; i++) {
+      BT_kv entry = node->datk[i];
+      if (entry.fo == 0)
+        break;                  /* done */
+      ischilddirty = _bt_ischilddirty(node, i);
+      _bt_delco_droptree2(state, entry.fo, depth+1, maxdepth, ischilddirty);
+    }
+  }
+
+  /* branch and leaf */
+  if (isdirty) {
+    _nlist_insert(state, &state->nlist, nodepg);
+  }
+  else {
+    _nlist_insert(state, &state->pending_nlist, nodepg);
+  }
+}
+
+static void
+_bt_delco_droptree(BT_state *state, pgno_t nodepg, uint8_t depth, int isdirty)
+{
+  /* completely drop a tree. Assume that all leaves under the tree are free
+     (pgno = 0) */
+  assert(nodepg >= 2);
+  BT_meta *meta = state->meta_pages[state->which];
+  _bt_delco_droptree2(state, nodepg, depth, meta->depth, isdirty);
+}
+
+static void
+_bt_delco_trim_rsubtree_lhs2(BT_state *state, vaof_t lo, vaof_t hi,
+                            pgno_t nodepg, uint8_t depth, uint8_t maxdepth)
+{
+  BT_page *node = _node_get(state, nodepg);
+  size_t hiidx = 0;
+  size_t N = _bt_numkeys(node);
+
+  /* find hi idx of range */
+  size_t i;
+  for (i = 0; i < N; i++) {
+    vaof_t hhi = node->datk[i].va;
+    if (hhi >= hi) {
+      hiidx = i;
+      break;
+    }
+  }
+
+  /* set the lo address of datk[hiidx] to hi */
+  node->datk[hiidx-1].va = hi;
+
+  /* drop the subtrees left of the range */
+  if (depth != maxdepth) {
+    for (i = 0; i < hiidx-1; i++) {
+      pgno_t childpg = node->datk[i].fo;
+      if (childpg == 0)
+        break;
+      int ischilddirty = _bt_ischilddirty(node, i);
+      _bt_delco_droptree(state, childpg, depth+1, ischilddirty);
+    }
+  }
+
+  /* memmove the buffer so the found range is the first in the node */
+  BYTE *dst = (BYTE *)&node->datk[0].va;
+  BYTE *src = (BYTE *)&node->datk[hiidx-1].va;
+  BYTE *end = (BYTE *)&node->datk[BT_DAT_MAXKEYS-1].fo;
+  size_t len = end - src;
+
+  memmove(dst, src, len);
+
+  /* ;;: TODO add temporary asserts for testing? */
+
+  /* and now zero the moved range */
+  ZERO(dst+len, end-(dst+len));
+
+  /* done if this is a leaf */
+  if (depth == maxdepth)
+    return;
+  /* otherwise, recur on subtree */
+  pgno_t rsubtree = node->datk[hiidx].fo;
+  _bt_delco_trim_rsubtree_lhs2(state, lo, hi, rsubtree, depth+1, maxdepth);
+}
+
+static void
+_bt_delco_trim_rsubtree_lhs(BT_state *state, vaof_t lo, vaof_t hi,
+                            pgno_t nodepg, uint8_t depth)
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  _bt_delco_trim_rsubtree_lhs2(state, lo, hi, nodepg, depth, meta->depth);
+}
+
+static void
+_bt_delco_trim_lsubtree_rhs2(BT_state *state, vaof_t lo, vaof_t hi,
+                            pgno_t nodepg, uint8_t depth, uint8_t maxdepth)
+{
+  BT_page *node = _node_get(state, nodepg);
+  size_t N = _bt_numkeys(node);
+  size_t loidx = 0;
+
+  /* find low idx of range */
+  size_t i;
+  for (i = 0; i < N-1; i++) {
+    vaof_t hhi = node->datk[i+1].va;
+    if (hhi > lo) {
+      loidx = i;
+      break;
+    }
+  }
+
+  /* set the hi address of datk[loidx] to hi */
+  node->datk[loidx+1].va = hi;
+
+  /* drop the subtrees right of the range */
+  if (depth != maxdepth) {
+    /* recur and droptree for branches */
+    for (i = loidx+1; i < N-1; i++) {
+      pgno_t childpg = node->datk[i].fo;
+      if (childpg == 0)
+        break;
+      int ischilddirty = _bt_ischilddirty(node, i);
+      _bt_delco_droptree(state, childpg, depth+1, ischilddirty);
+    }
+  }
+
+  /* always zero rhs whether node is a leaf or a branch */
+  BYTE *beg = (BYTE *)&node->datk[loidx+1].fo;
+  BYTE *end = (BYTE *)&node->datk[BT_DAT_MAXKEYS-1].fo;
+  size_t len = end - beg;
+
+  ZERO(beg, len);
+  /* ;;: this won't zero the last fo, but that should be fine. remove the assert
+       when you're confident it /is/ fine */
+  assert(node->datk[BT_DAT_MAXKEYS-1].fo == 0);
+
+  /* done if this is a leaf */
+  if (depth == maxdepth)
+    return;
+  /* otherwise, recur on the left subtree */
+  pgno_t lsubtree = node->datk[loidx].fo;
+  _bt_delco_trim_lsubtree_rhs2(state, lo, hi, lsubtree, depth+1, maxdepth);
+}
+
+static void
+_bt_delco_trim_lsubtree_rhs(BT_state *state, vaof_t lo, vaof_t hi,
+                            pgno_t nodepg, uint8_t depth)
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  _bt_delco_trim_lsubtree_rhs2(state, lo, hi, nodepg, depth, meta->depth);
+}
+
+static void
+_bt_delco(BT_state *state, vaof_t lo, vaof_t hi,
+          pgno_t nodepg, uint8_t depth, uint8_t maxdepth)
+{
+  /* ;;: "find_internal_splits" in the original algorithm */
+  BT_page *node = _node_get(state, nodepg);
+  size_t N = _bt_numkeys(node);
+
+  size_t loidx = 0;
+  size_t hiidx = 0;
+  pgno_t lsubtree = 0;
+  pgno_t rsubtree = 0;
+
+  /* find low idx of range */
+  for (size_t i = 0; i < N-1; i++) {
+    vaof_t hhi = node->datk[i+1].va;
+    if (hhi > lo) {
+      loidx = i;
+      break;
+    }
+  }
+
+  /* find high idx of range */
+  for (size_t i = loidx; i < N; i++) {
+    vaof_t hhi = node->datk[i].va;
+    if (hhi >= hi) {
+      assert(i > 0);
+      hiidx = i - 1;
+      break;
+    }
+  }
+
+  /* non-split range and at leaf. done */
+  if (depth == maxdepth
+      && hiidx == loidx) {
+    return;
+  }
+
+  lsubtree = node->datk[loidx].fo;
+  rsubtree = node->datk[hiidx].fo;
+
+  if (depth < maxdepth) {
+    /* guarantee path is dirty by CoWing node if not */
+
+    /* ;;: refactor? code duplication?? */
+    if (!_bt_ischilddirty(node, loidx)) {
+      BT_page *child = _node_get(state, lsubtree);
+      pgno_t newpg;
+      _node_cow(state, child, &newpg);
+      lsubtree = node->datk[loidx].fo = newpg;
+      _bt_dirtychild(node, loidx);
+    }
+
+    if (!_bt_ischilddirty(node, hiidx)) {
+      BT_page *child = _node_get(state, rsubtree);
+      pgno_t newpg;
+      _node_cow(state, child, &newpg);
+      rsubtree = node->datk[hiidx].fo = newpg;
+      _bt_dirtychild(node, hiidx);
+    }
+  }
+
+  /* non-split range, recurse to child tree */
+  if (hiidx == loidx) {
+    pgno_t childpg = node->datk[loidx].fo;
+    _bt_delco(state, lo, hi, childpg, depth+1, maxdepth);
+  }
+
+  /* split range discovered */
+  if (hiidx > loidx) {
+    /* run first pass to guarantee range is completely free */
+    if (!SUCC(_bt_delco_1pass(state, lo, hi))) {
+      /* attempted insert on split range that cannot be coalesced */
+      assert(0);
+    }
+
+    /* set leftmost boundary va to hi */
+    node->datk[loidx+1].va = hi;
+
+    /* set the lo side of the right boundary to hi */
+    node->datk[hiidx].va = hi;
+
+    /* drop all trees between the two subtrees */
+    for (size_t i = loidx+1; i < hiidx; i++) {
+      pgno_t childpg = node->datk[i].fo;
+      int ischilddirty = _bt_ischilddirty(node, i);
+      _bt_delco_droptree(state, childpg, depth+1, ischilddirty);
+    }
+
+    /* move buffer */
+    BYTE *dst = (BYTE *)&node->datk[loidx+1].va;
+    BYTE *src = (BYTE *)&node->datk[hiidx].va;
+    BYTE *end = (BYTE *)&node->datk[BT_DAT_MAXKEYS-1].fo;
+    size_t len = end - src;
+    memmove(dst, src, len);
+    ZERO(dst+len, end-(dst+len));
+
+    /* unless at leaf trim left subtree then trim right subtree */
+    if (depth < maxdepth) {
+      _bt_delco_trim_lsubtree_rhs(state, lo, hi, lsubtree, depth+1);
+      _bt_delco_trim_rsubtree_lhs(state, lo, hi, rsubtree, depth+1);
+    }
+
+    /* done */
+    return;
+  }
+}
+
+/* ;;: todo, update meta->depth when we add a row. Should this be done in
+     _bt_rebalance? */
+static int
+_bt_insert2(BT_state *state, vaof_t lo, vaof_t hi, pgno_t fo,
+        BT_page *node, size_t depth)
+{
+  /* ;;: to be written in such a way that node is guaranteed both dirty and
+       non-full */
+
+  /* ;;: remember:
+     - You need to CoW+dirty a node when you insert a non-dirty node.
+     - You need to insert into a node when:
+       - It's a leaf
+       - It's a branch and you CoWed the child
+     - Hence, all nodes in a path to a leaf being inserted into need to already
+     be dirty or explicitly Cowed. Splitting doesn't actually factor into this
+     decision afaict.
+  */
+
+  assert(node);
+
+  int rc = 255;
+  size_t N = 0;
+  size_t childidx = _bt_childidx(node, lo, hi);
+  assert(childidx != BT_DAT_MAXKEYS);
+  BT_meta *meta = state->meta_pages[state->which];
+
+  if (depth < meta->depth) {
+    pgno_t childpgno = node->datk[childidx].fo;
+    BT_page *child = _node_get(state, childpgno);
+    N = _bt_numkeys(child);
+  }
+
+  /* nullcond: node is a leaf */
+  if (meta->depth == depth) {
+    /* guaranteed non-full and dirty by n-1 recursive call, so just insert */
+    return _bt_insertdat(lo, hi, fo, node, childidx);
+  }
+
+  /* do we need to CoW the child node? */
+  if (!_bt_ischilddirty(node, childidx)) {
+    pgno_t pgno;
+    _node_cow(state, node, &pgno);
+    node->datk[childidx].fo = pgno;
+    _bt_dirtychild(node, childidx);
+  }
+
+  /* do we need to split the child node? */
+  if (N >= BT_DAT_MAXKEYS - 2) {
+      pgno_t rchild_pgno;
+      if (!SUCC(rc = _bt_split_child(state, node, childidx, &rchild_pgno)))
+        return rc;
+
+      /* since we split the child's data, recalculate the child idx */
+      /* ;;: note, this can be simplified into a conditional i++ */
+      childidx = _bt_childidx(node, lo, hi);
+
+  }
+
+  /* the child is now guaranteed non-full (split) and dirty. Recurse */
+  BT_page *child = _node_get(state, node->datk[childidx].fo);
+  return _bt_insert2(state, lo, hi, fo, child, depth+1);
+}
+
+static int
+_bt_insert(BT_state *state, vaof_t lo, vaof_t hi, pgno_t fo)
+/* handles CoWing/splitting of the root page since it's special cased. Then
+   passes the child matching hi/lo to _bt_insert2 */
+{
+
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+
+  /* the root MUST be dirty (zero checksum in metapage) */
+  assert(meta->chk == 0);
+
+  size_t N = _bt_numkeys(root);
+
+  /* perform deletion coalescing (and preemptively guarantee path is dirty) if
+     inserting a non-zero (non-free) page */
+  if (fo != 0) {
+    _bt_delco(state, lo, hi, meta->root, 1, meta->depth);
+  }
+
+  /* CoW root's child if it isn't already dirty */
+  size_t childidx = _bt_childidx(root, lo, hi);
+  assert(childidx != BT_DAT_MAXKEYS); /* ;;: this should catch the case of
+                                           improperly inserting into a split
+                                           range. Should we do it earlier or
+                                           differently? */
+  if (meta->depth > 1
+      && !_bt_ischilddirty(root, childidx)) {
+    BT_page *child = _node_get(state, root->datk[childidx].fo);
+    pgno_t  newchildpg;
+    _node_cow(state, child, &newchildpg);
+    root->datk[childidx].fo = newchildpg;
+    _bt_dirtychild(root, childidx);
+  }
+
+  /* before calling into recursive insert, handle root splitting since it's
+     special cased (2 allocs) */
+  if (N >= BT_DAT_MAXKEYS - 2) { /* ;;: remind, fix all these conditions to be - 2 */
+    pgno_t pg = 0;
+
+    /* the old root is now the left child of the new root */
+    BT_page *left = root;
+    BT_page *right = _bt_nalloc(state);
+    BT_page *rootnew = _bt_nalloc(state);
+
+    /* split root's data across left and right nodes */
+    _bt_split_datcopy(left, right);
+    /* save left and right in new root's .data */
+    pg = _fo_get(state, left);
+    rootnew->datk[0].fo = pg;
+    rootnew->datk[0].va = 0;
+    pg = _fo_get(state, right);
+    rootnew->datk[1].fo = pg;
+    rootnew->datk[1].va = right->datk[0].va;
+    rootnew->datk[2].va = UINT32_MAX;
+    /* dirty new root's children */
+    _bt_dirtychild(rootnew, 0);
+    _bt_dirtychild(rootnew, 1);
+    /* update meta page information. (root and depth) */
+    pg = _fo_get(state, rootnew);
+    meta->root = pg;
+    meta->depth += 1;
+    root = rootnew;
+  }
+
+  /*
+    meta is dirty
+    root is dirty and split if necessary
+    root's child in insert path is dirty and split if necessary
+    finally, recurse on child
+  */
+  return _bt_insert2(state, lo, hi, fo, root, 1);
+  /* return _bt_insert2(state, lo, hi, fo, child, 1); */
+}
+
+/* ;;: wip */
+/* ;;: inspired by lmdb's MDB_pageparent. While seemingly unnecessary for
+     _bt_insert, this may be useful for _bt_delete when we implement deletion
+     coalescing */
+typedef struct BT_ppage BT_ppage;
+struct BT_ppage {
+  BT_page *node;
+  BT_page *parent;
+};
+
+static int
+_bt_delete(BT_state *state, vaof_t lo, vaof_t hi) __attribute((unused));
+
+static int
+_bt_delete(BT_state *state, vaof_t lo, vaof_t hi)
+{
+  /* ;;: tmp, implement coalescing of zero ranges and merging/rebalancing of
+       nodes */
+  return _bt_insert(state, lo, hi, 0);
+}
+
+static int
+_mlist_new(BT_state *state)
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+  /* assert(root->datk[0].fo == 0); */
+  size_t N = _bt_numkeys(root);
+
+  vaof_t lo = root->datk[0].va;
+  vaof_t hi = root->datk[N-1].va;
+
+  BT_mlistnode *head = calloc(1, sizeof *head);
+
+  head->next = 0;
+  head->lo = off2addr(lo);
+  head->hi = off2addr(hi);
+  state->mlist = head;
+
+  return BT_SUCC;
+}
+
+#if 0
+static int
+_flist_grow(BT_state *state, BT_flistnode *space)
+/* growing the flist consists of expanding the backing persistent file, pushing
+   that space onto the disk freelist, and updating the dimension members in
+   BT_state */
+{
+  /* ;;: I don't see any reason to grow the backing file non-linearly, but we
+       may want to adjust the size of the amount grown based on performance
+       testing. */
+  if (-1 == lseek(state->data_fd, state->file_size + PMA_GROW_SIZE, SEEK_SET))
+    return errno;
+  if (-1 == write(state->data_fd, "", 1))
+    return errno;
+
+
+  /* find the last node in the disk freelist */
+  BT_flistnode *tail = state->flist;
+  for (; tail->next; tail = tail->next)
+    ;
+
+  pgno_t lastpgfree = tail->hi;
+
+  /* ;;: TODO, make sure you are certain of this logic. Further, add assertions
+       regarding relative positions of state->file_size, state->frontier, and
+       lastpgfree
+
+       we MAY call into this routine even if there is freespace on the end
+       because it's possible that freespace isn't large enough. We may also call
+       into this routine when the frontier exceeds the last free pg because
+       that's just how freelists work. ofc, frontier should never exceed
+       file_size. what other assertions??
+
+  */
+
+  /* if the frontier (last pg in use) is less than the last page free, we should
+     coalesce the new node with the tail. */
+  if (state->frontier <= lastpgfree) {
+    tail->hi += PMA_GROW_SIZE;  /* ;;: THIS IS INCORRECT */
+  }
+  /* otherwise, a new node needs to be allocated */
+  else {
+    BT_flistnode *new = calloc(1, sizeof *new);
+    /* since the frontier exceeds the last pg free, new freespace should
+       naturally be allocated at the frontier */
+    new->pg = state->frontier;
+    new->hi = PMA_GROW_SIZE;
+    tail->next = new;
+  }
+
+  /* finally, update the file size */
+  state->file_size += PMA_GROW_SIZE;
+
+  return BT_SUCC;
+}
+#endif
+
+static int
+_flist_new(BT_state *state)
+#define FLIST_PG_START ((BT_META_SECTION_WIDTH + BLK_BASE_LEN0) / BT_PAGESIZE)
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+  /* assert(root->datk[0].fo == 0); */
+  size_t N = _bt_numkeys(root);
+
+  vaof_t lo = root->datk[0].va;
+  vaof_t hi = root->datk[N-1].va;
+  size_t len = hi - lo;
+
+  BT_flistnode *head = calloc(1, sizeof *head);
+  head->next = 0;
+  head->lo = FLIST_PG_START;
+  head->hi = FLIST_PG_START + len;
+  state->flist = head;
+
+  return BT_SUCC;
+}
+
+static int
+_nlist_new(BT_state *state)
+{
+  BT_nlistnode *head = calloc(1, sizeof *head);
+
+  /* the size of a new node freelist is just the first stripe length */
+  head->lo = &((BT_page *)state->map)[BT_NUMMETAS];
+  head->hi = head->lo + B2PAGES(BLK_BASE_LEN0);
+  head->next = 0;
+
+  state->nlist = head;
+
+  return BT_SUCC;
+}
+
+static int
+_nlist_delete(BT_state *state)
+{
+  BT_nlistnode *head, *prev;
+  head = prev = state->nlist;
+  while (head->next) {
+    prev = head;
+    head = head->next;
+    free(prev);
+  }
+  state->nlist = 0;
+  return BT_SUCC;
+}
+
+#if 0
+static BT_nlistnode *
+_nlist_read_prev(BT_nlistnode *head, BT_nlistnode *curr)
+{
+  /* find nlist node preceding curr and return it */
+  BT_nlistnode *p, *n;
+  p = head;
+  n = head->next;
+  for (; n; p = n, n = n->next) {
+    if (n == curr)
+      return p;
+  }
+  return 0;
+}
+
+/* TODO this is a pretty bad algorithm in terms of time complexity. It should be
+   fixed, but isn't necessary now as our nlist is quite small. You may want to
+   consider making nlist doubly linked or incorporate a sort and merge step. */
+static int
+_nlist_read2(BT_state *state, BT_page *node, uint8_t maxdepth,
+             BT_nlistnode *head, uint8_t depth)
+/* recursively walk all nodes in the btree. Allocating new nlist nodes when a
+   node is found to be in a stripe unaccounted for. For each node found,
+   split/shrink the appropriate node to account for the allocated page */
+{
+  BT_nlistnode *p, *n;
+  p = head;
+  n = head->next;
+
+  /* find the nlist node that fits the current btree node */
+  for (; n; p = n, n = n->next) {
+    if (p->va <= node && p->va + p->sz > node)
+      break;
+  }
+
+  /* if the nlist node is only one page wide, it needs to be freed */
+  if (p->sz == 1) {
+    BT_nlistnode *prev = _nlist_read_prev(head, p);
+    prev->next = p->next;
+    free(p);
+    goto e;
+  }
+
+  /* if the btree node resides at the end of the nlist node, just shrink it */
+  BT_page *last = p->va + p->sz - 1;
+  if (last == node) {
+    p->sz -= 1;
+    goto e;
+  }
+
+  /* if the btree node resides at the start of the nlist node, likewise shrink
+     it and update the va */
+  if (p->va == node) {
+    p->sz -= 1;
+    p->va += 1;
+    goto e;
+  }
+
+  /* otherwise, need to split the current nlist node */
+  BT_nlistnode *right = calloc(1, sizeof *right);
+  size_t lsz = node - p->va;
+  size_t rsz = (p->va + p->sz) - node;
+  /* remove 1 page from the right nlist node's size to account for the allocated
+     btree node */
+  rsz -= 1;
+  assert(lsz > 0 && rsz > 0);
+
+  /* update the size of the left node. And set the size and va of the right
+     node. Finally, insert the new nlist node into the nlist. */
+  p->sz = lsz;
+  right->sz = rsz;
+  right->va = node + 1;
+  right->next = p->next;
+  p->next = right;
+
+ e:
+  /* if at a leaf, we're finished */
+  if (depth == maxdepth) {
+    return BT_SUCC;
+  }
+
+  /* otherwise iterate over all child nodes, recursively constructing the
+     list */
+  int rc = BT_SUCC;
+  for (size_t i = 0; i < BT_DAT_MAXKEYS; i++) {
+    BT_kv kv = node->datk[i];
+    BT_page *child = _node_get(state, node->datk[i].fo);
+    if (!child) continue;
+    if (!SUCC(rc = _nlist_read2(state,
+                                child,
+                                maxdepth,
+                                head,
+                                depth+1)))
+      return rc;
+  }
+
+  /* all children traversed */
+  return BT_SUCC;
+}
+
+static int
+_nlist_read(BT_state *state)
+{
+  /* ;;: this should theoretically be simpler than _mlist_read. right? We can
+     derive the stripes that contain nodes from the block base array stored in
+     the metapage. What else do we need to know? -- the parts of each stripe
+     that are free or in use. How can we discover that?
+
+     1) Without storing any per-page metadata, we could walk the entire tree
+     from the root. Check the page number of the node. And modify the freelist
+     accordingly.
+
+     2) If we stored per-page metadata, this would be simpler. Linearly traverse
+     each stripe and check if the page is BT_NODE or BT_FREE.
+
+     -- are there downsides to (2)? The only advantage to this would be quicker
+        startup. So for now, going to traverse all nodes and for each node,
+        traverse the nlist and split it appropriately.
+  */
+
+  int rc = BT_SUCC;
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+
+  /* ;;: since partition striping isn't implemented yet, simplifying code by
+     assuming all nodes reside in the 2M region */
+  BT_nlistnode *head = calloc(1, sizeof *head);
+  head->sz = BLK_BASE_LEN0;
+  head->va = &((BT_page *)state->map)[BT_NUMMETAS];
+  head->next = 0;
+
+  if (!SUCC(rc = _nlist_read2(state, root, meta->depth, head, 1)))
+    return rc;
+
+  state->nlist = head;
+
+  return rc;
+}
+
+static BT_mlistnode *
+_mlist_read2(BT_state *state, BT_page *node, uint8_t maxdepth, uint8_t depth)
+{
+  /* leaf */
+  if (depth == maxdepth) {
+    BT_mlistnode *head, *prev;
+    head = prev = calloc(1, sizeof *head);
+
+    size_t i = 0;
+    BT_kv *kv = &node->datk[i];
+    while (i < BT_DAT_MAXKEYS - 1) {
+#if CAN_COALESCE
+      /* free and contiguous with previous mlist node: merge */
+      if (kv->fo == 0
+          && addr2off(prev->va) + prev->sz == kv->va) {
+        vaof_t hi = node->datk[i+1].va;
+        vaof_t lo = kv->va;
+        size_t len = hi - lo;
+        prev->sz += len;
+      }
+      /* free but not contiguous with previous mlist node: append new node */
+      else if (kv->fo == 0) {
+#endif
+        BT_mlistnode *new = calloc(1, sizeof *new);
+        vaof_t hi = node->datk[i+1].va;
+        vaof_t lo = kv->va;
+        size_t len = hi - lo;
+        new->sz = len;
+        new->va = off2addr(lo);
+        prev->next = new;
+        prev = new;
+#if CAN_COALESCE
+      }
+#endif
+
+      kv = &node->datk[++i];
+    }
+    return head;
+  }
+
+  /* branch */
+  size_t i = 0;
+  BT_mlistnode *head, *prev;
+  head = prev = 0;
+  for (; i < BT_DAT_MAXKEYS; ++i) {
+    BT_kv kv = node->datk[i];
+    if (kv.fo == BT_NOPAGE)
+      continue;
+    BT_page *child = _node_get(state, kv.fo);
+    BT_mlistnode *new = _mlist_read2(state, child, maxdepth, depth+1);
+    if (head == 0) {
+      head = prev = new;
+    }
+    else {
+      /* just blindly append and unify the ends afterward */
+      prev->next = new;
+    }
+  }
+  return 0;
+}
+
+static int
+_mlist_read(BT_state *state)
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+  uint8_t maxdepth = meta->depth;
+  BT_mlistnode *head = _mlist_read2(state, root, maxdepth, 1);
+
+  /*
+    trace the full freelist and unify nodes one last time
+    NB: linking the leaf nodes would make this unnecessary
+  */
+#if CAN_COALESCE
+  BT_mlistnode *p = head;
+  BT_mlistnode *n = head->next;
+  while (n) {
+    size_t llen = P2BYTES(p->sz);
+    uintptr_t laddr = (uintptr_t)p->va;
+    uintptr_t raddr = (uintptr_t)n->va;
+    /* contiguous: unify */
+    if (laddr + llen == raddr) {
+      p->sz += n->sz;
+      p->next = n->next;
+      free(n);
+    }
+  }
+#endif
+
+  state->mlist = head;
+  return BT_SUCC;
+}
+#endif
+
+static int
+_mlist_delete(BT_state *state)
+{
+  BT_mlistnode *head, *prev;
+  head = prev = state->mlist;
+  while (head->next) {
+    prev = head;
+    head = head->next;
+    free(prev);
+  }
+  state->mlist = 0;
+  return BT_SUCC;
+}
+
+#if 0
+BT_flistnode *
+_flist_read2(BT_state *state, BT_page *node, uint8_t maxdepth, uint8_t depth)
+{
+  size_t N = _bt_numkeys(node);
+  /* leaf */
+  if (depth == maxdepth) {
+    BT_flistnode *head, *prev;
+    head = prev = calloc(1, sizeof(*head));
+
+    /* ;;: fixme the head won't get populated in this logic */
+    size_t i = 0;
+    BT_kv *kv = &node->datk[i];
+    while (i < N-1) {
+      /* Just blindly append nodes since they aren't guaranteed sorted */
+      BT_flistnode *new = calloc(1, sizeof *new);
+      vaof_t hi = node->datk[i+1].va;
+      vaof_t lo = kv->va;
+      size_t len = hi - lo;
+      pgno_t fo = kv->fo;
+      new->sz = len;
+      new->pg = fo;
+      prev->next = new;
+      prev = new;
+
+      kv = &node->datk[++i];
+    }
+    for (size_t i = 0; i < N-1; i++) {
+      vaof_t hi = node->datk[i+1].va;
+      vaof_t lo = node->datk[i].va;
+      size_t len = hi - lo;
+      pgno_t fo = node->datk[i].fo;
+      /* not free */
+      if (fo != 0)
+        continue;
+    }
+    return head;
+  }
+
+  /* branch */
+  size_t i = 0;
+  BT_flistnode *head, *prev;
+  head = prev = 0;
+  for (; i < N; ++i) {
+    BT_kv kv = node->datk[i];
+    if (kv.fo == BT_NOPAGE)
+      continue;
+    BT_page *child = _node_get(state, kv.fo);
+    BT_flistnode *new = _flist_read2(state, child, maxdepth, depth+1);
+    if (head == 0) {
+      head = prev = new;
+    }
+    else {
+      /* just blindly append and unify the ends afterward */
+      prev->next = new;
+    }
+  }
+  return 0;
+}
+
+static int
+_flist_read(BT_state *state)
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+  uint8_t maxdepth = meta->depth;
+  BT_flistnode *head = _flist_read2(state, root, maxdepth, 1);
+  /* ;;: infinite loop with proper starting depth of 1. -- fix that! */
+  /* BT_flistnode *head = _flist_read2(state, root, maxdepth, 1); */
+
+  if (head == 0)
+    return BT_SUCC;
+
+  /* sort the freelist */
+  _flist_mergesort(head);
+
+  /* merge contiguous regions after sorting */
+  BT_flistnode *p = head;
+  BT_flistnode *n = head->next;
+  while (n) {
+    size_t llen = p->sz;
+    pgno_t lfo = p->pg;
+    pgno_t rfo = n->pg;
+    /* contiguous: unify */
+    if (lfo + llen == rfo) {
+      p->sz += n->sz;
+      p->next = n->next;
+      free(n);
+    }
+  }
+
+  state->flist = head;
+  return BT_SUCC;
+}
+#endif
+
+static int
+_flist_delete(BT_state *state)
+{
+  BT_flistnode *head, *prev;
+  head = prev = state->flist;
+  while (head->next) {
+    prev = head;
+    head = head->next;
+    free(prev);
+  }
+  state->flist = 0;
+  return BT_SUCC;
+}
+
+#define CLOSE_FD(fd)                            \
+  do {                                          \
+    close(fd);                                  \
+    fd = -1;                                    \
+  } while(0)
+
+/* TODO: move to lib */
+static uint32_t
+nonzero_crc_32(void *dat, size_t len)
+{
+  unsigned char nonce = 0;
+  uint32_t chk = crc_32(dat, len);
+
+  do {
+    if (nonce > 8)
+      abort();
+    chk = update_crc_32(chk, nonce++);
+  } while (chk == 0);
+
+  return chk;
+}
+
+static void
+_bt_state_restore_maps2(BT_state *state, BT_page *node,
+                        uint8_t depth, uint8_t maxdepth)
+{
+  size_t N = _bt_numkeys(node);
+
+  /* leaf */
+  if (depth == maxdepth) {
+    for (size_t i = 0; i < N-1; i++) {
+      vaof_t lo = node->datk[i].va;
+      vaof_t hi = node->datk[i+1].va;
+      pgno_t pg = node->datk[i].fo;
+
+      BYTE *loaddr = off2addr(lo);
+      BYTE *hiaddr = off2addr(hi);
+      size_t bytelen = hiaddr - loaddr;
+      off_t offset = P2BYTES(pg);
+
+      if (pg != 0) {
+        /* not freespace, map readonly data on disk */
+        if (loaddr !=
+            mmap(loaddr,
+                 bytelen,
+                 BT_PROT_CLEAN,
+                 BT_FLAG_CLEAN,
+                 state->data_fd,
+                 offset)) {
+          DPRINTF("mmap: failed to map at addr %p, errno: %s", loaddr, strerror(errno));
+          abort();
+        }
+      }
+      else {
+        /* freespace, map no access */
+        if (loaddr !=
+            mmap(loaddr,
+                 bytelen,
+                 BT_PROT_FREE,
+                 BT_FLAG_FREE,
+                 0, 0)) {
+          DPRINTF("mmap: failed to map at addr %p, errno: %s", loaddr, strerror(errno));
+          abort();
+        }
+      }
+    }
+    return;
+  }
+
+  /* branch - dfs all subtrees */
+  for (size_t i = 0; i < N-1; i++) {
+    /* ;;: assuming node stripes when partition striping is implemented will be
+         1:1 mapped to disk for simplicity. If that is not the case, they should
+         be handled here. */
+    pgno_t pg = node->datk[i].fo;
+    BT_page *child = _node_get(state, pg);
+    _bt_state_restore_maps2(state, child, depth+1, maxdepth);
+  }
+}
+
+static void
+_bt_state_restore_maps(BT_state *state)
+/* restores the memory map of the btree since data can be arbitrarily located */
+{
+  /* TODO: add checks to ensure data isn't mapped into an invalid location
+     (e.g. a node stripe) */
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+  _bt_state_restore_maps2(state, root, 1, meta->depth);
+}
+
+static int
+_bt_state_meta_which(BT_state *state)
+{
+  BT_meta *m1 = state->meta_pages[0];
+  BT_meta *m2 = state->meta_pages[1];
+  int which = -1;
+
+  if (m1->chk == 0) {
+    /* first is dirty */
+    which = 1;
+  }
+  else if (m2->chk == 0) {
+    /* second is dirty */
+    which = 0;
+  }
+  else if (m1->txnid > m2->txnid) {
+    /* first is most recent */
+    which = 0;
+  }
+  else if (m1->txnid < m2->txnid) {
+    /* second is most recent */
+    which = 1;
+  }
+  else {
+    /* invalid state */
+    return EINVAL;
+  }
+
+  /* checksum the metapage found and abort if checksum doesn't match */
+  BT_meta *meta = state->meta_pages[which];
+  uint32_t chk = nonzero_crc_32(meta, BT_META_LEN);
+  if (chk != meta->chk) {
+    abort();
+  }
+
+  /* set which in state */
+  state->which = which;
+
+  return BT_SUCC;
+}
+
+static int
+_bt_state_read_header(BT_state *state)
+{
+  BT_meta *m1, *m2;
+  int rc = 1;
+  BYTE metas[BT_PAGESIZE*2] = {0};
+  m1 = state->meta_pages[0];
+  m2 = state->meta_pages[1];
+
+  TRACE();
+
+  if (pread(state->data_fd, metas, BT_PAGESIZE*2, 0)
+      != BT_PAGESIZE*2) {
+    /* new pma */
+    return ENOENT;
+  }
+
+  /* validate magic */
+  if (m1->magic != BT_MAGIC) {
+    DPRINTF("metapage 0x%pX inconsistent magic: 0x%" PRIX32, m1, m1->magic);
+    return EINVAL;
+  }
+  if (m2->magic != BT_MAGIC) {
+    DPRINTF("metapage 0x%pX inconsistent magic: 0x%" PRIX32, m2, m2->magic);
+    return EINVAL;
+  }
+
+  /* validate flags */
+  if ((m1->flags & BP_META) != BP_META) {
+    DPRINTF("metapage 0x%pX missing meta page flag", m1);
+    return EINVAL;
+  }
+  if ((m2->flags & BP_META) != BP_META) {
+    DPRINTF("metapage 0x%pX missing meta page flag", m2);
+    return EINVAL;
+  }
+
+  /* validate binary version */
+  if (m1->version != BT_VERSION) {
+    DPRINTF("version mismatch on metapage: 0x%pX, metapage version: %" PRIu32 ", binary version %u",
+            m1, m1->version, BT_VERSION);
+    return EINVAL;
+  }
+
+  /* validate binary version */
+  if (m2->version != BT_VERSION) {
+    DPRINTF("version mismatch on metapage: 0x%pX, metapage version: %" PRIu32 ", binary version %u",
+            m2, m2->version, BT_VERSION);
+    return EINVAL;
+  }
+
+  if (!SUCC(rc = _bt_state_meta_which(state)))
+    return rc;
+
+  return BT_SUCC;
+}
+
+static int
+_bt_state_meta_new(BT_state *state)
+#define INITIAL_ROOTPG 2
+{
+  BT_page *p1, *p2, *root;
+  BT_meta meta = {0};
+
+  TRACE();
+
+  /* open the metapage region for writing */
+  if (mprotect(BT_MAPADDR, BT_META_SECTION_WIDTH,
+               BT_PROT_DIRTY) != 0) {
+    DPRINTF("mprotect of metapage section failed with %s", strerror(errno));
+    abort();
+  }
+
+  /* initialize the block base array */
+  meta.blk_base[0] = BT_PAGESIZE * BT_NUMMETAS;
+
+  root = _bt_nalloc(state);
+  _bt_root_new(&meta, root);
+
+  /* initialize meta struct */
+  meta.magic = BT_MAGIC;
+  meta.version = BT_VERSION;
+  meta.last_pg = 1;
+  meta.txnid = 0;
+  meta.fix_addr = BT_MAPADDR;
+  meta.blk_cnt = 1;
+  meta.depth = 1;
+  meta.flags = BP_META;
+  meta.root = _fo_get(state, root);
+  assert(meta.root == INITIAL_ROOTPG); /* ;;: remove?? */
+
+  /* initialize the metapages */
+  p1 = &((BT_page *)state->map)[0];
+  p2 = &((BT_page *)state->map)[1];
+
+  /* copy the metadata into the metapages */
+  memcpy(METADATA(p1), &meta, sizeof meta);
+  /* ;;: todo, should the second metapage actually share a .root with the
+       first?? */
+  memcpy(METADATA(p2), &meta, sizeof meta);
+
+  /* only the active metapage should be writable (first page) */
+  if (mprotect(BT_MAPADDR, BT_META_SECTION_WIDTH, BT_PROT_CLEAN) != 0) {
+    DPRINTF("mprotect of metapage section failed with %s", strerror(errno));
+    abort();
+  }
+  if (mprotect(BT_MAPADDR, BT_PAGESIZE,
+               BT_PROT_DIRTY) != 0) {
+    DPRINTF("mprotect of current metapage failed with %s", strerror(errno));
+    abort();
+  }
+
+  return BT_SUCC;
+}
+
+static void
+_freelist_restore2(BT_state *state, BT_page *node,
+                   uint8_t depth, uint8_t maxdepth)
+{
+  size_t N = _bt_numkeys(node);
+
+  /* leaf */
+  if (depth == maxdepth) {
+    for (size_t i = 0; i < N-1; i++) {
+      /* if allocated */
+      if (node->datk[i].fo != 0) {
+        /* record allocated memory range */
+        BT_page *lo = off2addr(node->datk[i].va);
+        BT_page *hi = off2addr(node->datk[i+1].va);
+        _mlist_record_alloc(state, lo, hi);
+        /* record allocated file range */
+        ssize_t siz_p = hi - lo;
+        assert(siz_p > 0);
+        assert(siz_p < UINT32_MAX);
+        pgno_t lofo = node->datk[i].fo;
+        pgno_t hifo = lofo + (pgno_t)siz_p;
+        _flist_record_alloc(state, lofo, hifo);
+      }
+    }
+    return;
+  }
+  /* branch */
+  for (size_t i = 0; i < N-1; i++) {
+    pgno_t fo = node->datk[i].fo;
+    if (fo != 0) {
+      /* record allocated node */
+      BT_page *child = _node_get(state, fo);
+      _nlist_record_alloc(state, child);
+      _freelist_restore2(state, child, depth+1, maxdepth);
+    }
+  }
+}
+
+static void
+_freelist_restore(BT_state *state)
+/* restores the mlist, nlist, and mlist */
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+  assert(SUCC(_nlist_new(state)));
+  assert(SUCC(_mlist_new(state)));
+  assert(SUCC(_flist_new(state)));
+  /* first record root's allocation */
+  _nlist_record_alloc(state, root);
+  _freelist_restore2(state, root, 1, meta->depth);
+}
+
+static int
+_bt_state_load(BT_state *state)
+{
+  int rc;
+  int new = 0;
+  BT_page *p;
+  struct stat stat;
+
+  TRACE();
+
+  /* map first node stripe (along with metapages) as read only */
+  state->map = mmap(BT_MAPADDR,
+                    BT_META_SECTION_WIDTH + BLK_BASE_LEN0,
+                    BT_PROT_CLEAN,
+                    BT_FLAG_CLEAN,
+                    state->data_fd,
+                    0);
+
+  p = (BT_page *)state->map;
+  state->meta_pages[0] = METADATA(p);
+  state->meta_pages[1] = METADATA(p + 1);
+
+  if (!SUCC(rc = _bt_state_read_header(state))) {
+    if (rc != ENOENT) return rc;
+    DPUTS("creating new db");
+    state->file_size = PMA_GROW_SIZE;
+    new = 1;
+    if(ftruncate(state->data_fd, PMA_GROW_SIZE)) {
+      return errno;
+    }
+  }
+
+  if (state->map != BT_MAPADDR) {
+    DPRINTF("mmap: failed to map at addr %p, errno: %s", BT_MAPADDR, strerror(errno));
+    abort();
+  }
+
+  BYTE *nullspace_addr = BT_MAPADDR + (BT_META_SECTION_WIDTH + BLK_BASE_LEN0);
+  size_t nullspace_len = BLK_BASE_LEN_TOTAL - (BT_META_SECTION_WIDTH + BLK_BASE_LEN0);
+  if (nullspace_addr != mmap(nullspace_addr,
+                             nullspace_len,
+                             BT_PROT_FREE,
+                             BT_FLAG_FREE,
+                             0, 0)) {
+    DPRINTF("mmap: failed to map at addr %p, errno: %s", nullspace_addr, strerror(errno));
+    abort();
+  }
+
+  /* new db, so populate metadata */
+  if (new) {
+    /* ;;: move this logic to _flist_new */
+    if (-1 == lseek(state->data_fd, state->file_size, SEEK_SET))
+      return errno;
+    if (-1 == write(state->data_fd, "", 1))
+      return errno;
+
+    state->file_size = PMA_GROW_SIZE;
+
+    assert(SUCC(_nlist_new(state)));
+
+    if (!SUCC(rc = _bt_state_meta_new(state))) {
+      munmap(state->map, BT_ADDRSIZE);
+      return rc;
+    }
+
+    assert(SUCC(_mlist_new(state)));
+    assert(SUCC(_flist_new(state)));
+  }
+  else {
+    /* restore data memory maps */
+    _bt_state_restore_maps(state);
+
+    /* restore ephemeral freelists */
+    _freelist_restore(state);
+
+    /* Dirty the metapage and root page */
+    assert(SUCC(_bt_flip_meta(state)));
+
+    /* Set the file length */
+    // XX make sure the flist is updated with this!
+    if (fstat(state->data_fd, &stat) != 0)
+      return errno;
+
+    state->file_size = stat.st_size;
+  }
+
+  return BT_SUCC;
+}
+
+/* ;;: TODO, when persistence has been implemented, _bt_falloc will probably
+     need to handle extension of the file with appropriate striping. i.e. if no
+     space is found on the freelist, save the last entry, expand the file size,
+     and set last_entry->next to a new node representing the newly added file
+     space */
+static pgno_t
+_bt_falloc(BT_state *state, size_t pages)
+{
+  /* walk the persistent file freelist and return a pgno with sufficient
+     contiguous space for pages */
+  BT_flistnode **n = &state->flist;
+  pgno_t ret = 0;
+
+  /* first fit */
+  for (; *n; n = &(*n)->next) {
+    size_t sz_p = (*n)->hi - (*n)->lo;
+
+    if (sz_p >= pages) {
+      ret = (*n)->lo;
+      pgno_t hi = ret + pages;
+      _flist_record_alloc(state, ret, hi);
+      break;
+    }
+  }
+
+  if (ret == 0) {
+    DPUTS("flist out of mem!");
+    return UINT32_MAX;
+  }
+
+  return ret;
+}
+
+static int
+_bt_sync_hasdirtypage(BT_state *state, BT_page *node) __attribute((unused));
+
+static int
+_bt_sync_hasdirtypage(BT_state *state, BT_page *node)
+/* ;;: could be more efficiently replaced by a gcc vectorized builtin */
+{
+  for (size_t i = 0; i < NMEMB(node->head.dirty); i++) {
+    if (node->head.dirty[i] != 0)
+      return 1;
+  }
+
+  return 0;
+}
+
+static int
+_bt_sync_leaf(BT_state *state, BT_page *node)
+{
+  /* msync all of a leaf's data that is dirty. The caller is expected to sync
+     the node itself and mark it as clean in the parent. */
+  size_t i = 0;
+  size_t N = _bt_numkeys(node);
+
+  for (i = 0; i < N-1; i++) {
+    if (!_bt_ischilddirty(node, i))
+      continue;                 /* not dirty. nothing to do */
+
+    /* ;;: we don't actually need the page, do we? */
+    /* pgno_t pg = node->datk[i].fo; */
+    vaof_t lo = node->datk[i].va;
+    vaof_t hi = node->datk[i+1].va;
+    size_t bytelen = P2BYTES(hi - lo);
+    void *addr = off2addr(lo);
+
+    /* sync the page */
+    if (msync(addr, bytelen, MS_SYNC) != 0) {
+      DPRINTF("msync of leaf: %p failed with %s", addr, strerror(errno));
+      abort();
+    }
+
+    /* mprotect the data */
+    if (mprotect(addr, bytelen, BT_PROT_CLEAN) != 0) {
+      DPRINTF("mprotect of leaf data failed with %s", strerror(errno));
+      abort();
+    }
+
+    /* and clean the dirty bit */
+    _bt_cleanchild(node, i);
+  }
+
+  /* ;;: all data pages synced. should we now sync the node as well? No, I think
+       that should be the caller's responsibility */
+
+  /* ;;: it is probably faster to scan the dirty bit set and derive the datk idx
+     rather than iterate over the full datk array and check if it is dirty. This
+     was simpler to implement for now though. */
+  /* while (_bt_sync_hasdirtypage(state, node)) { */
+  /*   ... */
+  /* } */
+
+  return BT_SUCC;
+}
+
+static int
+_bt_sync_meta(BT_state *state)
+/* syncs the metapage and performs necessary checksumming. Additionally, flips
+   the which */
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  uint32_t chk;
+  int rc;
+
+  /* increment the txnid */
+  meta->txnid += 1;
+
+  /* checksum the metapage */
+  chk = nonzero_crc_32(meta, BT_META_LEN);
+  /* ;;: todo: guarantee the chk cannot be zero */
+
+  meta->chk = chk;
+
+  /* sync the metapage */
+  if (msync(LO_ALIGN_PAGE(meta), sizeof(BT_page), MS_SYNC) != 0) {
+    DPRINTF("msync of metapage: %p failed with %s", meta, strerror(errno));
+    abort();
+  }
+
+  // ensure we have a new dirty metapage and root node
+   /* finally, make old metapage clean */
+  rc =  _bt_flip_meta(state);
+
+  if (mprotect(LO_ALIGN_PAGE(meta), sizeof(BT_page), BT_PROT_CLEAN) != 0) {
+    DPRINTF("mprotect of old metapage failed with %s", strerror(errno));
+    abort();
+  }
+
+ return rc;
+}
+
+static int _bt_flip_meta(BT_state *state) {
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_meta *newmeta;
+  int newwhich;
+
+  /* zero the new metapage's checksum */
+  newwhich = state->which ? 0 : 1;
+  newmeta = state->meta_pages[newwhich];
+
+  /* mprotect dirty new metapage */
+  if (mprotect(LO_ALIGN_PAGE(newmeta), sizeof(BT_page), BT_PROT_DIRTY) != 0) {
+    DPRINTF("mprotect of new metapage failed with %s", strerror(errno));
+    abort();
+  }
+
+  newmeta->chk = 0;
+
+  /* copy over metapage to new metapage excluding the checksum */
+  memcpy(newmeta, meta, BT_META_LEN);
+
+  /* CoW a new root since the root referred to by the metapage should always be
+     dirty */
+  BT_page *root;
+  pgno_t newrootpg;
+  root = _node_get(state, newmeta->root);
+  if (!SUCC(_node_cow(state, root, &newrootpg)))
+    abort();
+
+  newmeta->root = newrootpg;
+
+  /* switch the metapage we're referring to */
+  state->which = newwhich;
+
+  return BT_SUCC;
+}
+
+static int
+_bt_sync(BT_state *state, BT_page *node, uint8_t depth, uint8_t maxdepth)
+/* recursively syncs the subtree under node. The caller is expected to sync node
+   itself and mark it clean. */
+{
+  int rc = 0;
+  size_t N = _bt_numkeys(node);
+
+  /* leaf */
+  if (depth == maxdepth) {
+    _bt_sync_leaf(state, node);
+    goto e;
+  }
+
+  /* do dfs */
+  for (size_t i = 0; i < N-1; i++) {
+    if (!_bt_ischilddirty(node, i))
+      continue;                 /* not dirty. nothing to do */
+
+    BT_page *child = _node_get(state, node->datk[i].fo);
+
+    /* recursively sync the child's data */
+    if ((rc = _bt_sync(state, child, depth+1, maxdepth)))
+      return rc;
+
+    /* sync the child node */
+    if (msync(child, sizeof(BT_page), MS_SYNC) != 0) {
+      DPRINTF("msync of child node: %p failed with %s", child, strerror(errno));
+      abort();
+    }
+
+    /* unset child dirty bit */
+    _bt_cleanchild(node, i);
+  }
+
+ e:
+  /* all modifications done in node, mark it read-only */
+  if (mprotect(node, sizeof(BT_page), BT_PROT_CLEAN) != 0) {
+    DPRINTF("mprotect of node failed with %s", strerror(errno));
+    abort();
+  }
+
+  return BT_SUCC;
+}
+
+
+//// ===========================================================================
+////                            btree external routines
+
+int
+bt_state_new(BT_state **state)
+{
+  // TRACE();
+
+  BT_state *s = calloc(1, sizeof *s);
+  s->data_fd = -1;
+  s->fixaddr = BT_MAPADDR;
+  *state = s;
+  return BT_SUCC;
+}
+
+#define DATANAME "/data.pma"
+int
+bt_state_open(BT_state *state, const char *path, ULONG flags, mode_t mode)
+{
+  int oflags, rc;
+  char *dpath;
+
+  TRACE();
+  UNUSED(flags);
+
+  oflags = O_RDWR | O_CREAT;
+  dpath = malloc(strlen(path) + sizeof(DATANAME));
+  if (!dpath) return ENOMEM;
+  sprintf(dpath, "%s" DATANAME, path);
+
+  if ((state->data_fd = open(dpath, oflags, mode)) == -1)
+    return errno;
+
+  if (!SUCC(rc = _bt_state_load(state)))
+    goto e;
+
+  state->path = strdup(dpath);
+
+ e:
+  /* cleanup FDs stored in state if anything failed */
+  if (!SUCC(rc)) {
+    if (state->data_fd != -1) CLOSE_FD(state->data_fd);
+  }
+
+  free(dpath);
+  return rc;
+}
+
+int
+bt_state_close(BT_state *state)
+{
+  int rc;
+  bt_sync(state);
+
+  _mlist_delete(state);
+  _flist_delete(state);
+  _nlist_delete(state);
+
+  if ((rc = munmap(state->map, BT_ADDRSIZE)) != 0) {
+    rc = errno;
+    return rc;
+  }
+  if (state->data_fd != -1) CLOSE_FD(state->data_fd);
+
+  ZERO(state, sizeof *state);
+
+  return BT_SUCC;
+}
+
+void *
+bt_malloc(BT_state *state, size_t pages)
+{
+  BT_mlistnode **n = &state->mlist;
+  void *ret = 0;
+  /* first fit */
+  for (; *n; n = &(*n)->next) {
+    size_t sz_p = addr2off((*n)->hi) - addr2off((*n)->lo);
+
+    if (sz_p >= pages) {
+      ret = (*n)->lo;
+      BT_page *hi = ((BT_page *)ret) + pages;
+      _mlist_record_alloc(state, ret, hi);
+      break;
+    }
+    // XX return early if nothing suitable found in freelist
+  }
+  if (ret == 0) {
+    DPUTS("mlist out of mem!");
+    return 0;
+  }
+
+  pgno_t pgno = _bt_falloc(state, pages);
+  bp(pgno != 0);
+  _bt_insert(state,
+             addr2off(ret),
+             addr2off(ret) + pages,
+             pgno);
+
+  DPRINTF("map %p to offset 0x%zx bytes (0x%zx pages)\n", ret, P2BYTES(pgno), pages);
+  if (ret !=
+      mmap(ret,
+           P2BYTES(pages),
+           BT_PROT_DIRTY,
+           BT_FLAG_DIRTY,
+           state->data_fd,
+           P2BYTES(pgno))) {
+    DPRINTF("mmap: failed to map at addr %p, errno: %s", ret, strerror(errno));
+    abort();
+  }
+  bp(ret != 0);
+  return ret;
+}
+
+// XX need to mmap fixed/anon/no_reserve and prot_none
+void
+bt_free(BT_state *state, void *lo, void *hi)
+{
+  vaof_t looff = addr2off(lo);
+  vaof_t hioff = addr2off(hi);
+  pgno_t lopg, hipg;
+  BT_findpath path = {0};
+
+  if (!SUCC(_bt_find(state, &path, looff, hioff))) {
+    DPRINTF("Failed to find range: (%p, %p)", lo, hi);
+    abort();
+  }
+
+  /* insert null into btree */
+  _bt_insert(state, looff, hioff, 0);
+  /* insert freed range into mlist */
+  _mlist_insert(state, lo, hi);
+  /* insert freed range into flist */
+  BT_page *leaf = path.path[path.depth];
+  size_t childidx = path.idx[path.depth];
+  int isdirty = _bt_ischilddirty(leaf, childidx);
+  BT_kv kv = leaf->datk[childidx];
+  vaof_t offset = looff - kv.va;
+  lopg = kv.fo + offset;
+  hipg = lopg + (looff - hioff);
+  if (isdirty) {
+    _flist_insert(&state->flist, lopg, hipg);
+  }
+  else {
+    _flist_insert(&state->pending_flist, lopg, hipg);
+  }
+
+  /* ;;: is this correct? Shouldn't this actually happen when we merge the
+       pending_mlist on sync? */
+  size_t bytelen = (BYTE *)hi - (BYTE *)lo;
+
+  if (lo !=
+      mmap(lo,
+           bytelen,
+           BT_PROT_FREE,
+           BT_FLAG_FREE,
+           0, 0)) {
+    DPRINTF("mmap: failed to map at addr %p, errno: %s", lo, strerror(errno));
+    abort();
+  }
+}
+
+// XX need to mprotect PROT_READ all ranges synced including root/meta
+int
+bt_sync(BT_state *state)
+{
+  /* as is often the case, handling the metapage/root is a special case, which
+     is done here. Syncing any other page of the tree is done in _bt_sync */
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+  int rc = 0;
+
+  if ((rc = _bt_sync(state, root, 1, meta->depth)))
+    return rc;
+
+  /* merge the pending freelists */
+  _pending_nlist_merge(state);
+  _pending_flist_merge(state);
+
+  /* sync the root page */
+  if (msync(root, sizeof(BT_page), MS_SYNC) != 0) {
+    DPRINTF("msync of root: %p failed with %s", root, strerror(errno));
+    abort();
+  }
+
+  /* make root read-only */
+  if (mprotect(root, sizeof(BT_page), BT_PROT_CLEAN) != 0) {
+    DPRINTF("mprotect of root failed with %s", strerror(errno));
+    abort();
+  }
+
+  /* then sync the metapage */
+  if ((rc = _bt_sync_meta(state)))
+    return rc;
+
+  return BT_SUCC;
+}
+
+uint64_t
+bt_meta_get(BT_state *state, size_t idx)
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  assert((uintptr_t)&(meta->roots[idx]) - (uintptr_t)meta <= sizeof *meta);
+  return meta->roots[idx];
+}
+
+void
+bt_meta_set(BT_state *state, size_t idx, uint64_t val)
+{
+  BT_meta *meta = state->meta_pages[state->which];
+  assert((uintptr_t)&(meta->roots[idx]) - (uintptr_t)meta <= sizeof *meta);
+  meta->roots[idx] = val;
+}
+
+int
+_bt_range_of(BT_state *state, vaof_t p, vaof_t **lo, vaof_t **hi,
+             pgno_t nodepg, uint8_t depth, uint8_t maxdepth)
+{
+  BT_page *node = _node_get(state, nodepg);
+  size_t N = _bt_numkeys(node);
+
+  vaof_t llo = 0;
+  vaof_t hhi = 0;
+  pgno_t pg = 0;
+  size_t i;
+  for (i = 0; i < N-1; i++) {
+    llo = node->datk[i].va;
+    hhi = node->datk[i+1].va;
+    pg = node->datk[i].fo;
+    if (llo <= p && hhi > p) {
+      break;
+    }
+  }
+  /* not found */
+  if (i == N-1)
+    return 1;
+
+  if (depth == maxdepth) {
+    **lo = llo;
+    **hi = hhi;
+    return BT_SUCC;
+  }
+
+  return _bt_range_of(state, p, lo, hi, pg, depth+1, maxdepth);
+}
+
+int
+bt_range_of(BT_state *state, void *p, void **lo, void **hi)
+{
+  /* traverse tree looking for lo <= p and hi > p. return that range as a pair
+     of pointers NOT as two vaof_t
+
+    0: succ (found)
+    1: otherwise
+  */
+
+  BT_meta *meta = state->meta_pages[state->which];
+  pgno_t root = meta->root;
+  vaof_t *loret = 0;
+  vaof_t *hiret = 0;
+  vaof_t poff = addr2off(p);
+  int rc = 0;
+  if (!SUCC(rc = _bt_range_of(state, poff, &loret, &hiret, root, 1, meta->depth))) {
+    return rc;
+  }
+  *lo = off2addr(*loret);
+  *hi = off2addr(*hiret);
+  return BT_SUCC;
+}
+
+/**
+
+pseudocode from ed:
+
+bt_dirty(btree, lo, hi):
+ loop:
+    (range_lo, range_hi) = find_range_for_pointer(btree, lo);
+    dirty_hi = min(hi, range_hi);
+    new_start_fo = data_cow(btree, lo, dirty_hi);
+    lo := range_hi;
+    if dirty_hi == hi then break;
+
+// precondition: given range does not cross a tree boundary
+data_cow(btree, lo, hi):
+  (range_lo, range_hi, fo) = bt_find(btree, lo, hi);
+  size = lo - hi;
+  new_fo = data_alloc(btree.data_free, size);
+
+  // puts data in the unified buffer cache without having to map virtual memory
+  write(fd, new_fo, size * BT_PAGESIZE, to_ptr(lo));
+
+  // maps new file offset with same data back into same memory
+  mmap(fd, new_fo, size, to_ptr(lo));
+
+  bt_insert(btree, lo, hi, new_fo);
+
+  offset = lo - range_lo;
+  freelist_insert(btree.pending_data_flist, fo + offset, fo + offset + size);
+  return new_fo
+
+**/
+
+static pgno_t
+_bt_data_cow(BT_state *state, vaof_t lo, vaof_t hi, pgno_t pg)
+{
+  size_t len = hi - lo;
+  size_t bytelen = P2BYTES(len);
+  pgno_t newpg = _bt_falloc(state, len);
+  BYTE *loaddr = off2addr(lo);
+  off_t offset = P2BYTES(newpg);
+
+  /* write call puts data in the unified buffer cache without having to map
+     virtual memory */
+  if (pwrite(state->data_fd, loaddr, bytelen, offset) != (ssize_t)bytelen)
+    abort();
+
+  /* maps new file offset with same data back into memory */
+  if (loaddr !=
+      mmap(loaddr,
+           bytelen,
+           BT_PROT_DIRTY,
+           BT_FLAG_DIRTY,
+           state->data_fd,
+           offset)) {
+    DPRINTF("mmap: failed to map at addr %p, errno: %s", loaddr, strerror(errno));
+    abort();
+  }
+
+  _bt_insert(state, lo, hi, newpg);
+
+  _flist_insert(&state->pending_flist, pg, pg + len);
+
+  return newpg;
+}
+
+#define MIN(x, y) ((x) > (y) ? (y) : (x))
+
+static int
+_bt_dirty(BT_state *state, vaof_t lo, vaof_t hi, pgno_t nodepg,
+          uint8_t depth, uint8_t maxdepth)
+{
+  BT_page *node = _node_get(state, nodepg);
+  size_t N = _bt_numkeys(node);
+  size_t loidx = BT_DAT_MAXKEYS; // 0 is a valid loidx!
+  size_t hiidx = 0;
+
+  /* find loidx of range */
+  for (size_t i = 0; i < N-1; i++) {
+    vaof_t hhi = node->datk[i+1].va;
+    if (hhi > lo) {
+      loidx = i;
+      break;
+    }
+  }
+  assert(loidx < BT_DAT_MAXKEYS);
+
+  /* find hiidx (exclusive) of range */
+  for (size_t i = loidx+1; i < N; i++) {
+    vaof_t hhi = node->datk[i].va;
+    if (hhi >= hi) {
+      hiidx = i;
+      break;
+    }
+  }
+  assert(hiidx != 0);
+
+  /* found a range in node that contains (lo-hi). May span multiple entries */
+    /* leaf: base case. cow the data */
+  if (depth == maxdepth) {
+    for (size_t i = loidx; i < hiidx; i++) {
+      vaof_t llo = node->datk[i].va;
+      vaof_t hhi = MIN(node->datk[i+1].va, hi);
+      pgno_t pg = node->datk[i].fo;
+      pgno_t newpg = _bt_data_cow(state, llo, hhi, pg);
+      _bt_insert(state, llo, hhi, newpg);
+    } 
+  } else {
+    for (size_t i = loidx; i < hiidx; i++) {
+      /* branch: recursive case */
+      pgno_t childpg = node->datk[i].fo;
+      /* iteratively recurse on all entries */
+      _bt_dirty(state, lo, hi, childpg, depth+1, maxdepth);
+    }
+  }
+  return BT_SUCC;
+}
+
+int
+bt_dirty(BT_state *state, void *lo, void *hi)
+{
+  /* takes a range and ensures that entire range is CoWed */
+  /* if part of the range is free then return 1 */
+  BT_meta *meta = state->meta_pages[state->which];
+  vaof_t looff = addr2off(lo);
+  vaof_t hioff = addr2off(hi);
+
+  return _bt_dirty(state, looff, hioff, meta->root, 1, meta->depth);
+}
+
+int
+bt_next_alloc(BT_state *state, void *p, void **lo, void **hi)
+/* if p is free, sets lo and hi to the bounds of the next adjacent allocated
+   space. If p is allocated, sets lo and hi to the bounds of the allocated space
+   it falls in. */
+{
+  BT_mlistnode *head = state->mlist;
+  BYTE *pb = p;
+  BYTE* pma_end;
+  while (head) {
+    /* at last free block, different logic applies */
+    if (head->next == 0)
+      goto end;
+
+    /* p is in a free range, return the allocated hole after it */
+    if (head->lo <= pb
+        && head->hi > pb) {
+      goto found;
+    }
+
+    /* p is alloced, return this hole */
+    if (head->next->lo > pb
+        && head->hi <= pb) {
+      goto found;
+    }
+
+    head = head->next;
+  }
+
+  /* not found */
+  return 1;
+
+ found:
+  /* the alloced space begins at the end of the free block */
+  *lo = head->hi;
+  /* ... and ends at the start of the next free block */
+  *hi = head->next->lo;
+  return BT_SUCC;
+
+ end:
+  pma_end = (void *)((uintptr_t)BT_MAPADDR + BT_ADDRSIZE);
+  assert(head->hi <= pma_end);
+  /* no alloced region between tail of freelist and end of pma memory space */
+  if (head->hi == pma_end)
+    return 1;
+
+  /* otherwise, return the alloced region between the tail of the freelist and
+     the end of the memory arena */
+  *lo = head->hi;
+  *hi = pma_end;
+  return BT_SUCC;
+}
+
+void
+bt_bounds(BT_state *state, void **lo, void **hi)
+{
+  *lo = BT_MAPADDR;
+  *hi = (void *)((uintptr_t)BT_MAPADDR + BT_ADDRSIZE);
+}
+
+int
+bt_inbounds(BT_state *state, void *p)
+/* 1: if in the bounds of the PMA, 0 otherwise */
+{
+  return p >= (void *)BT_MAPADDR
+    && p < (void *)((uintptr_t)BT_MAPADDR + BT_ADDRSIZE);
+}
+
+
+//// ===========================================================================
+////                                    tests
+
+/* ;;: obv this should be moved to a separate file */
+static void
+_sham_sync_clean(BT_page *node)
+{
+  for (uint8_t *dit = &node->head.dirty[0]
+         ; dit < &node->head.dirty[sizeof(node->head.dirty) - 1]
+         ; dit++) {
+    *dit = 0;
+  }
+}
+
+static void
+_sham_sync2(BT_state *state, BT_page *node, uint8_t depth, uint8_t maxdepth)
+{
+  if (depth == maxdepth) return;
+
+  /* clean node */
+  _sham_sync_clean(node);
+
+  /* then recurse and clean all children with DFS */
+  size_t N = _bt_numkeys(node);
+  for (size_t i = 1; i < N; ++i) {
+    BT_kv kv = node->datk[i];
+    pgno_t childpg = kv.fo;
+    BT_page *child = _node_get(state, childpg);
+    _sham_sync2(state, child, depth+1, maxdepth);
+  }
+}
+
+static void
+_sham_sync(BT_state *state) __attribute((unused));
+
+static void
+_sham_sync(BT_state *state)
+{
+  /* walk the tree and unset the dirty bit from all pages */
+  BT_meta *meta = state->meta_pages[state->which];
+  BT_page *root = _node_get(state, meta->root);
+  meta->chk = nonzero_crc_32(meta, BT_META_LEN);
+  _sham_sync2(state, root, 1, meta->depth);
+}
+
+static void
+_bt_printnode(BT_page *node)
+{
+  fprintf(stderr, "node: %p\n", (void*)node);
+  fprintf(stderr, "data: \n");
+  for (size_t i = 0; i < BT_DAT_MAXKEYS; ++i) {
+    if (i && node->datk[i].va == 0)
+      break;
+    fprintf(stderr, "[%5zu] %10x %10x\n", i, node->datk[i].va, node->datk[i].fo);
+  }
+}
+
+/*
+  _bt_state_restore_maps2
+  if pg 0:
+  mmap MAP_ANONYMOUS | MAP_FIXED | MAP_NO_RESERVE
+  PROT_NONE
+
+  if pg !0:
+  mmap MAP_SHARED | MAP_FIXED
+  PROT_READ
+
+
+  ------------------
+
+  the three routines that make modification to the data maps are:
+
+  bt_malloc:
+
+  MAP_SHARED | MAP_FIXED
+  PROT_READ | PROT_WRITE
+
+  _bt_data_cow:
+
+  MAP_SHARED | MAP_FIXED
+  PROT_READ | PROT_WRITE
+
+  bt_sync:
+
+  (mprotect)
+  PROT_READ
+
+  bt_free:
+
+  MAP_ANONYMOUS | MAP_FIXED | MAP_NO_RESERVE
+  PROT_NONE
+
+  -----------------
+
+  8 linear mappings (striping)
+
+  when we _bt_nalloc, mprotect(PROT_READ | PROT_WRITE)
+
+  when we free a node: mprotect(PROT_NONE)
+
+  additionally, when we sync, all allocated nodes: mprotect(PROT_READ)
+*/
diff --git a/rust/ares_pma/c-src/btree.h b/rust/ares_pma/c-src/btree.h
new file mode 100644
index 0000000..aad81e0
--- /dev/null
+++ b/rust/ares_pma/c-src/btree.h
@@ -0,0 +1,87 @@
+#ifndef __BTREE_H__
+#define __BTREE_H__
+#include <sys/types.h>
+#include <stdint.h>
+
+struct BT_state;
+typedef struct BT_state BT_state;
+
+#define BT_PAGEBITS 14ULL
+#define BT_PAGESIZE (1ULL << BT_PAGEBITS) /* 16K */
+
+typedef unsigned long ULONG;
+
+//// ===========================================================================
+////                            btree external routines
+
+/**
+ * instantiate an opaque BT_state handle
+ */
+int bt_state_new(BT_state **state);
+
+/**
+ * Open the persistent state or create if one doesn't exist
+ */
+int bt_state_open(BT_state *state, const char *path, ULONG flags, mode_t mode);
+
+/**
+ * Close the persistent state
+ */
+int bt_state_close(BT_state *state);
+
+/**
+ * Allocate persistent memory space
+ */
+void * bt_malloc(BT_state *state, size_t pages);
+
+/**
+ * Free persistent memory space
+ */
+void bt_free(BT_state *state, void *lo, void *hi);
+
+/**
+ * Sync a snapshot of the persistent memory to disk
+ * This will **exit the process** on failure to avoid data corruption
+ */
+int bt_sync(BT_state *state);
+
+/**
+ * Get a metadata entry
+ */
+uint64_t bt_meta_get(BT_state *state, size_t idx);
+
+/**
+ * Set a metadata entry
+ */
+void bt_meta_set(BT_state *state, size_t idx, uint64_t val);
+
+/**
+ * Give the allocation range in the btree that a pointer lives in
+ */
+int bt_range_of(BT_state *state, void *p, void **lo, void **hi);
+
+/**
+ * Ensure a region of memory is "dirty" i.e. can be mutated
+ *
+ * A successful call to bt_dirty ensures that the memory range can be mutated
+ * until the next call to `bt_sync()`
+ */
+int bt_dirty(BT_state *state, void *lo, void *hi);
+
+/**
+ * Given a pointer, give the containing region of allocated memory, or the next
+ * highest if the pointer is to free memory
+ */
+int bt_next_alloc(BT_state *state, void *p, void **lo, void **hi);
+
+/**
+ * Return the memory bounds of the persistent-memory B-tree
+ */
+void bt_bounds(BT_state *state, void **lo, void **hi);
+
+/**
+ * Return whether a pointer is within the persistent-memory B-tree
+ */
+int bt_inbounds(BT_state *state, void *p);
+
+#endif
diff --git a/rust/ares/src/pma/includes/checksum.c b/rust/ares_pma/c-src/lib/checksum.c
similarity index 100%
rename from rust/ares/src/pma/includes/checksum.c
rename to rust/ares_pma/c-src/lib/checksum.c
diff --git a/rust/ares/src/pma/includes/checksum.h b/rust/ares_pma/c-src/lib/checksum.h
similarity index 100%
rename from rust/ares/src/pma/includes/checksum.h
rename to rust/ares_pma/c-src/lib/checksum.h
diff --git a/rust/ares_pma/c-src/wrapper.h b/rust/ares_pma/c-src/wrapper.h
new file mode 100644
index 0000000..5c56c79
--- /dev/null
+++ b/rust/ares_pma/c-src/wrapper.h
@@ -0,0 +1 @@
+#include "btree.h"
diff --git a/rust/ares_pma/src/lib.rs b/rust/ares_pma/src/lib.rs
new file mode 100644
index 0000000..a38a13a
--- /dev/null
+++ b/rust/ares_pma/src/lib.rs
@@ -0,0 +1,5 @@
+#![allow(non_upper_case_globals)]
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+
+include!(concat!(env!("OUT_DIR"), "/bindings.rs"));