From 1d1b09520ca0a288f417c24a8d328d6682faaafc Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Fri, 6 Sep 2024 08:53:10 +1000 Subject: [PATCH] add heap as a new crate --- Cargo.lock | 23 +++- Cargo.toml | 1 + crates/roc_std_heap/Cargo.toml | 13 ++ crates/roc_std_heap/src/lib.rs | 239 +++++++++++++++++++++++++++++++++ 4 files changed, 273 insertions(+), 3 deletions(-) create mode 100644 crates/roc_std_heap/Cargo.toml create mode 100644 crates/roc_std_heap/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index f4299c78b8..1925825b62 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1399,9 +1399,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.149" +version = "0.2.158" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" +checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439" [[package]] name = "libloading" @@ -1540,6 +1540,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memmap2" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe751422e4a8caa417e13c3ea66452215d7d63e19e604f4980461212f3ae1322" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.6.5" @@ -2705,7 +2714,7 @@ dependencies = [ "indoc", "libc", "mach_object", - "memmap2", + "memmap2 0.5.10", "object", "roc_collections", "roc_error_macros", @@ -3133,6 +3142,14 @@ dependencies = [ "static_assertions", ] +[[package]] +name = "roc_std_heap" +version = "0.0.1" +dependencies = [ + "memmap2 0.9.4", + "roc_std", +] + [[package]] name = "roc_target" version = "0.0.1" diff --git a/Cargo.toml b/Cargo.toml index a02dc7b7de..6dafb57e8e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,7 @@ members = [ "crates/wasm_module", "crates/wasm_interp", "crates/language_server", + "crates/roc_std_heap", ] exclude = [ diff --git a/crates/roc_std_heap/Cargo.toml b/crates/roc_std_heap/Cargo.toml new file mode 100644 index 0000000000..885643722c --- /dev/null +++ b/crates/roc_std_heap/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "roc_std_heap" +description = "Rust representations of a Roc threadsafe version of the refcounted heap that can avoid a wrapping Mutex and RefCell" + +authors = ["The Roc Contributors"] +edition = "2021" +license = "UPL-1.0" +repository = "https://github.com/roc-lang/roc" +version = "0.0.1" + +[dependencies] +roc_std = { path = "../roc_std" } +memmap2 = "=0.9.4" diff --git a/crates/roc_std_heap/src/lib.rs b/crates/roc_std_heap/src/lib.rs new file mode 100644 index 0000000000..eaab3065b8 --- /dev/null +++ b/crates/roc_std_heap/src/lib.rs @@ -0,0 +1,239 @@ +//! This heap module attempts to make simple single type heaps. +//! These are used for allocating and deallocating resources beyond the scope of roc. +//! +//! For example, roc can not free file resources. +//! Instead, they will be tracked and reference counted in a side heap. +//! When freed, the underlying resource can be released. +//! +//! To make checking resource types quick, all heaps allocate to a single mmap. +//! Then a simple range check on a pointer can confirm is a pointer is into a specific heap. + +use memmap2::MmapMut; +use roc_std::RocBox; +use std::{ + cell::UnsafeCell, + ffi::c_void, + io::{Error, ErrorKind, Result}, + marker::PhantomData, + mem, ptr, + sync::Mutex, +}; + +const REFCOUNT_ONE: usize = isize::MIN as usize; + +/// ThreadSafeRefcountedResourceHeap is a threadsafe version of the refcounted heap that can avoid a wrapping Mutex and RefCell. +/// This is very important for dealloc performance. +/// No lock is needed to check if a pointer is in range of the underlying mmap. +/// This leads to a solid perf bump over the naive lock everywhere solution. +/// Otherwise, alloc and dealloc, always use a mutex, but are much rarer to be called. +/// If perf becomes a problem once basic-cli has threading, we should consider sharding the heap by thread. +pub struct ThreadSafeRefcountedResourceHeap { + heap: UnsafeCell>, + guard: Mutex<()>, +} + +impl ThreadSafeRefcountedResourceHeap { + pub fn new(max_elements: usize) -> Result> { + RefcountedResourceHeap::new(max_elements).map(|heap| ThreadSafeRefcountedResourceHeap { + heap: UnsafeCell::new(heap), + guard: Mutex::new(()), + }) + } + + pub fn alloc_for(self: &Self, data: T) -> Result> { + let _g = self.guard.lock().unwrap(); + unsafe { &mut *self.heap.get() }.alloc_for(data) + } + + pub fn dealloc(self: &Self, ptr: *const U) { + let _g = self.guard.lock().unwrap(); + unsafe { &mut *self.heap.get() }.dealloc(ptr) + } + + // This is safe to call at any time with no lock! + pub fn in_range(self: &Self, ptr: *const U) -> bool { + unsafe { &*self.heap.get() }.in_range(ptr) + } + + pub fn box_to_resource<'a>(data: RocBox<()>) -> &'a mut T { + RefcountedResourceHeap::box_to_resource(data) + } +} + +unsafe impl Sync for ThreadSafeRefcountedResourceHeap {} +unsafe impl Send for ThreadSafeRefcountedResourceHeap {} + +#[repr(C)] +struct Refcounted(usize, T); + +/// HeapOfRefcounted is a wrapper around Heap for data that roc stores with a refcount. +/// It will return a pointer to right after the refcount (what a `Box {}` would expect in Roc). +pub struct RefcountedResourceHeap(Heap>); + +impl RefcountedResourceHeap { + pub fn new(max_elements: usize) -> Result> { + Heap::new(max_elements).map(|heap| RefcountedResourceHeap(heap)) + } + + pub fn alloc_for(self: &mut Self, data: T) -> Result> { + self.0.alloc().map(|alloc_ptr| { + unsafe { std::ptr::write(alloc_ptr, Refcounted(REFCOUNT_ONE, data)) }; + let box_ptr = alloc_ptr as usize + mem::size_of::(); + unsafe { std::mem::transmute(box_ptr) } + }) + } + + pub fn dealloc(self: &mut Self, ptr: *const U) { + self.0.dealloc(ptr as _); + } + + pub fn in_range(self: &Self, ptr: *const U) -> bool { + self.0.in_range(ptr as _) + } + + pub fn box_to_resource<'a>(data: RocBox<()>) -> &'a mut T { + let box_ptr: usize = unsafe { std::mem::transmute(data) }; + + let alloc_ptr = (box_ptr - mem::size_of::()) as *mut Refcounted; + let alloc: &mut Refcounted = unsafe { &mut *alloc_ptr }; + &mut alloc.1 + } +} + +/// The Heap is one mmap of data that can be interpreted multiple ways. +/// +/// It can be view as list of unions between `T` and `usize`. +/// In the case of a `T`, it is an allocated element. +/// In the case of a `usize`, it is part of the freed list. +/// The value of the `usize` is the next free node. +/// +/// Note: If we ever need better multithreaded performance, +/// we could shard the heap and lock individual shards. +pub struct Heap { + data: MmapMut, + elements: usize, + max_elements: usize, + free_list: *const c_void, + phantom: PhantomData, +} + +unsafe impl Send for Heap {} + +impl Heap { + pub fn new(max_elements: usize) -> Result> { + debug_assert!(max_elements > 0); + + let max_bytes = max_elements * Self::node_size(); + Ok(Self { + data: MmapMut::map_anon(max_bytes)?, + elements: 0, + max_elements, + free_list: ptr::null(), + phantom: PhantomData::default(), + }) + } + + pub fn alloc(self: &mut Self) -> Result<*mut T> { + if self.free_list != ptr::null() { + // Open slot on the free list. + let root = self.free_list as *const *const c_void; + let next = unsafe { *root }; + self.free_list = next; + + // Convert root into a `*mut T` for use. + return Ok(root as *mut T); + } + + // If has available memory allocate at end. + if self.elements < self.max_elements { + let offset = self.elements * Self::node_size(); + let elem_ptr = unsafe { self.data.as_mut_ptr().offset(offset as isize) }; + self.elements += 1; + return Ok(elem_ptr as *mut T); + } + + return Err(Error::from(ErrorKind::OutOfMemory)); + } + + pub fn dealloc(self: &mut Self, elem_ptr: *mut T) { + debug_assert!(self.in_range(elem_ptr)); + + // Just push the freed value to the start of the free list. + let old_root = self.free_list; + self.free_list = elem_ptr as *const c_void; + unsafe { *(self.free_list as *mut *const c_void) = old_root }; + + unsafe { + // Free the underlying resource. + std::ptr::drop_in_place(elem_ptr); + } + } + + pub fn in_range(self: &Self, elem_ptr: *mut T) -> bool { + let start = self.data.as_ptr(); + let offset = self.elements * Self::node_size(); + let end = unsafe { start.offset(offset as isize) }; + (start as usize) <= (elem_ptr as usize) && (elem_ptr as usize) < (end as usize) + } + + const fn node_size() -> usize { + let a = mem::size_of::(); + let b = mem::size_of::(); + if a > b { + a + } else { + b + } + } +} + +#[cfg(test)] +mod test { + use std::u128; + + use super::*; + + #[test] + fn alloc_to_limit() { + let limit = 4; + let mut heap = Heap::::new(limit).unwrap(); + let mut ptrs = vec![]; + loop { + match heap.alloc() { + Ok(ptr) => ptrs.push(ptr), + Err(_) => break, + } + } + + assert_eq!(ptrs.len(), limit); + for ptr in ptrs { + assert!(heap.in_range(ptr)); + } + } + + #[test] + fn reuse_freed_elems() { + let limit = 4; + let mut heap = Heap::::new(limit).unwrap(); + let a = heap.alloc().unwrap(); + let b = heap.alloc().unwrap(); + let c = heap.alloc().unwrap(); + let d = heap.alloc().unwrap(); + + heap.dealloc(c); + assert_eq!(c, heap.alloc().unwrap()); + + assert!(heap.alloc().is_err()); + + heap.dealloc(d); + heap.dealloc(a); + heap.dealloc(b); + + // These should be reused in reverse order. + assert_eq!(b, heap.alloc().unwrap()); + assert_eq!(a, heap.alloc().unwrap()); + assert_eq!(d, heap.alloc().unwrap()); + + assert!(heap.alloc().is_err()); + } +}