From 298fde7e42bc5484c23691be4f3a41aba79f95d1 Mon Sep 17 00:00:00 2001 From: Folkert Date: Fri, 18 Feb 2022 23:44:36 +0100 Subject: [PATCH] split out RocStr --- roc_std/src/lib.rs | 361 +---------------------------------------- roc_std/src/roc_str.rs | 350 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 354 insertions(+), 357 deletions(-) create mode 100644 roc_std/src/roc_str.rs diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 4f5f480c58..4eecf04adb 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -2,14 +2,15 @@ #![no_std] use core::convert::From; use core::ffi::c_void; -use core::fmt::{self, Display, Formatter}; +use core::fmt; use core::mem::{ManuallyDrop, MaybeUninit}; -use core::ops::{Deref, DerefMut, Drop}; -use core::{mem, ptr, slice}; +use core::ops::Drop; mod roc_list; +mod roc_str; pub use roc_list::RocList; +pub use roc_str::RocStr; // A list of C functions that are being imported extern "C" { @@ -33,16 +34,6 @@ pub enum RocOrder { Lt = 2, } -//#[macro_export] -//macro_rules! roclist { -// () => ( -// $crate::RocList::default() -// ); -// ($($x:expr),+ $(,)?) => ( -// $crate::RocList::from_slice(&[$($x),+]) -// ); -//} - #[derive(Clone, Copy, Debug)] pub enum Storage { ReadOnly, @@ -50,350 +41,6 @@ pub enum Storage { Capacity(usize), } -#[repr(C)] -pub struct RocStr { - elements: *mut u8, - length: usize, -} - -impl RocStr { - pub fn len(&self) -> usize { - if self.is_small_str() { - let bytes = self.length.to_ne_bytes(); - let last_byte = bytes[mem::size_of::() - 1]; - - (last_byte ^ 0b1000_0000) as usize - } else { - self.length - } - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn is_small_str(&self) -> bool { - (self.length as isize) < 0 - } - - pub fn get(&self, index: usize) -> Option<&u8> { - if index < self.len() { - Some(unsafe { - let raw = if self.is_small_str() { - self.get_small_str_ptr().add(index) - } else { - self.elements.add(index) - }; - - &*raw - }) - } else { - None - } - } - - pub fn get_bytes(&self) -> *const u8 { - if self.is_small_str() { - self.get_small_str_ptr() - } else { - self.elements - } - } - - pub fn storage(&self) -> Option { - use core::cmp::Ordering::*; - - if self.is_small_str() { - return None; - } - - unsafe { - let value = *self.get_storage_ptr(); - - // NOTE doesn't work with elements of 16 or more bytes - match isize::cmp(&(value as isize), &0) { - Equal => Some(Storage::ReadOnly), - Less => Some(Storage::Refcounted(value)), - Greater => Some(Storage::Capacity(value as usize)), - } - } - } - - fn get_storage_ptr(&self) -> *const isize { - let ptr = self.elements as *const isize; - - unsafe { ptr.offset(-1) } - } - - fn get_storage_ptr_mut(&mut self) -> *mut isize { - self.get_storage_ptr() as *mut isize - } - - fn get_element_ptr(elements: *const u8) -> *const usize { - let elem_alignment = core::mem::align_of::(); - let ptr = elements as *const usize; - - unsafe { - if elem_alignment <= core::mem::align_of::() { - ptr.add(1) - } else { - // If elements have an alignment bigger than usize (e.g. an i128), - // we will have necessarily allocated two usize slots worth of - // space for the storage value (with the first usize slot being - // padding for alignment's sake), and we need to skip past both. - ptr.add(2) - } - } - } - - fn get_small_str_ptr(&self) -> *const u8 { - (self as *const Self).cast() - } - - fn get_small_str_ptr_mut(&mut self) -> *mut u8 { - (self as *mut Self).cast() - } - - fn from_slice_with_capacity_str(slice: &[u8], capacity: usize) -> Self { - assert!( - slice.len() <= capacity, - "RocStr::from_slice_with_capacity_str length bigger than capacity {} {}", - slice.len(), - capacity - ); - if capacity < core::mem::size_of::() { - let mut rocstr = Self::default(); - let target_ptr = rocstr.get_small_str_ptr_mut(); - let source_ptr = slice.as_ptr() as *const u8; - for index in 0..slice.len() { - unsafe { - *target_ptr.add(index) = *source_ptr.add(index); - } - } - // Write length and small string bit to last byte of length. - let mut bytes = rocstr.length.to_ne_bytes(); - bytes[mem::size_of::() - 1] = capacity as u8 ^ 0b1000_0000; - rocstr.length = usize::from_ne_bytes(bytes); - - rocstr - } else { - let ptr = slice.as_ptr(); - let element_bytes = capacity; - - let num_bytes = core::mem::size_of::() + element_bytes; - - let elements = unsafe { - let raw_ptr = roc_alloc(num_bytes, core::mem::size_of::() as u32) as *mut u8; - // write the capacity - let capacity_ptr = raw_ptr as *mut usize; - *capacity_ptr = capacity; - - let raw_ptr = Self::get_element_ptr(raw_ptr as *mut u8); - - // write the refcount - let refcount_ptr = raw_ptr as *mut isize; - *(refcount_ptr.offset(-1)) = isize::MIN; - - { - // NOTE: using a memcpy here causes weird issues - let target_ptr = raw_ptr as *mut u8; - let source_ptr = ptr as *const u8; - let length = slice.len(); - - for index in 0..length { - *target_ptr.add(index) = *source_ptr.add(index); - } - } - - raw_ptr as *mut u8 - }; - - Self { - length: slice.len(), - elements, - } - } - } - - pub fn from_slice(slice: &[u8]) -> Self { - Self::from_slice_with_capacity_str(slice, slice.len()) - } - - pub fn as_slice(&self) -> &[u8] { - if self.is_empty() { - &[] - } else if self.is_small_str() { - unsafe { core::slice::from_raw_parts(self.get_small_str_ptr(), self.len()) } - } else { - unsafe { core::slice::from_raw_parts(self.elements, self.length) } - } - } - - pub fn as_mut_slice(&mut self) -> &mut [u8] { - if self.is_empty() { - &mut [] - } else if self.is_small_str() { - unsafe { core::slice::from_raw_parts_mut(self.get_small_str_ptr_mut(), self.len()) } - } else { - unsafe { core::slice::from_raw_parts_mut(self.elements, self.length) } - } - } - - pub fn as_str(&self) -> &str { - let slice = self.as_slice(); - - unsafe { core::str::from_utf8_unchecked(slice) } - } - - pub fn as_mut_str(&mut self) -> &mut str { - let slice = self.as_mut_slice(); - - unsafe { core::str::from_utf8_unchecked_mut(slice) } - } - - /// Write a CStr (null-terminated) representation of this RocStr into - /// the given buffer. - /// - /// # Safety - /// This assumes the given buffer has enough space, so make sure you only - /// pass in a pointer to an allocation that's at least as long as this Str! - pub unsafe fn write_c_str(&self, buf: *mut char) { - if self.is_small_str() { - ptr::copy_nonoverlapping(self.get_small_str_ptr(), buf as *mut u8, self.len()); - } else { - ptr::copy_nonoverlapping(self.elements, buf as *mut u8, self.len()); - } - - // null-terminate - *(buf.add(self.len())) = '\0'; - } -} - -impl Deref for RocStr { - type Target = str; - - fn deref(&self) -> &str { - self.as_str() - } -} - -impl DerefMut for RocStr { - fn deref_mut(&mut self) -> &mut str { - self.as_mut_str() - } -} - -impl Default for RocStr { - fn default() -> Self { - Self { - length: isize::MIN as usize, - elements: core::ptr::null_mut(), - } - } -} - -impl From<&str> for RocStr { - fn from(str: &str) -> Self { - Self::from_slice(str.as_bytes()) - } -} - -impl Display for RocStr { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - self.as_str().fmt(f) - } -} - -impl fmt::Debug for RocStr { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - // RocStr { is_small_str: false, storage: Refcounted(3), elements: [ 1,2,3,4] } - - match core::str::from_utf8(self.as_slice()) { - Ok(string) => f - .debug_struct("RocStr") - .field("is_small_str", &self.is_small_str()) - .field("storage", &self.storage()) - .field("string_contents", &string) - .finish(), - Err(_) => f - .debug_struct("RocStr") - .field("is_small_str", &self.is_small_str()) - .field("storage", &self.storage()) - .field("byte_contents", &self.as_slice()) - .finish(), - } - } -} - -impl PartialEq for RocStr { - fn eq(&self, other: &Self) -> bool { - self.as_slice() == other.as_slice() - } -} - -impl Eq for RocStr {} - -impl Clone for RocStr { - fn clone(&self) -> Self { - if self.is_small_str() { - Self { - elements: self.elements, - length: self.length, - } - } else { - let capacity_size = core::mem::size_of::(); - let copy_length = self.length + capacity_size; - let elements = unsafe { - // We use *mut u8 here even though technically these are - // usize-aligned (due to the refcount slot). - // This avoids any potential edge cases around there somehow - // being unreadable memory after the last byte, which would - // potentially get read when reading bytes at a time. - let raw_ptr = - roc_alloc(copy_length, core::mem::size_of::() as u32) as *mut u8; - let dest_slice = slice::from_raw_parts_mut(raw_ptr, copy_length); - let src_ptr = self.elements.offset(-(capacity_size as isize)) as *mut u8; - let src_slice = slice::from_raw_parts(src_ptr, copy_length); - - dest_slice.copy_from_slice(src_slice); - - *(raw_ptr as *mut usize) = self.length; - - (raw_ptr as *mut u8).add(capacity_size) - }; - - Self { - elements, - length: self.length, - } - } - } -} - -impl Drop for RocStr { - fn drop(&mut self) { - if !self.is_small_str() { - let storage_ptr = self.get_storage_ptr_mut(); - - unsafe { - let storage_val = *storage_ptr; - - if storage_val == REFCOUNT_1 || storage_val > 0 { - // If we have no more references, or if this was unique, - // deallocate it. - roc_dealloc(storage_ptr as *mut c_void, mem::align_of::() as u32); - } else if storage_val < 0 { - // If this still has more references, decrement one. - *storage_ptr = storage_val - 1; - } - - // The only remaining option is that this is in readonly memory, - // in which case we shouldn't attempt to do anything to it. - } - } - } -} - /// Like a Rust `Result`, but following Roc's ABI instead of Rust's. /// (Using Rust's `Result` instead of this will not work properly with Roc code!) /// diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs new file mode 100644 index 0000000000..026ed9dfc3 --- /dev/null +++ b/roc_std/src/roc_str.rs @@ -0,0 +1,350 @@ +use core::ffi::c_void; +use core::fmt::{self, Display, Formatter}; +use core::ops::{Deref, DerefMut, Drop}; +use core::{mem, ptr, slice}; + +use crate::{roc_alloc, roc_dealloc, Storage, REFCOUNT_1}; + +#[repr(C)] +pub struct RocStr { + elements: *mut u8, + length: usize, +} + +impl RocStr { + pub fn len(&self) -> usize { + if self.is_small_str() { + let bytes = self.length.to_ne_bytes(); + let last_byte = bytes[mem::size_of::() - 1]; + + (last_byte ^ 0b1000_0000) as usize + } else { + self.length + } + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn is_small_str(&self) -> bool { + (self.length as isize) < 0 + } + + pub fn get(&self, index: usize) -> Option<&u8> { + if index < self.len() { + Some(unsafe { + let raw = if self.is_small_str() { + self.get_small_str_ptr().add(index) + } else { + self.elements.add(index) + }; + + &*raw + }) + } else { + None + } + } + + pub fn get_bytes(&self) -> *const u8 { + if self.is_small_str() { + self.get_small_str_ptr() + } else { + self.elements + } + } + + pub fn storage(&self) -> Option { + use core::cmp::Ordering::*; + + if self.is_small_str() { + return None; + } + + unsafe { + let value = *self.get_storage_ptr(); + + // NOTE doesn't work with elements of 16 or more bytes + match isize::cmp(&(value as isize), &0) { + Equal => Some(Storage::ReadOnly), + Less => Some(Storage::Refcounted(value)), + Greater => Some(Storage::Capacity(value as usize)), + } + } + } + + fn get_storage_ptr(&self) -> *const isize { + let ptr = self.elements as *const isize; + + unsafe { ptr.offset(-1) } + } + + fn get_storage_ptr_mut(&mut self) -> *mut isize { + self.get_storage_ptr() as *mut isize + } + + fn get_element_ptr(elements: *const u8) -> *const usize { + let elem_alignment = core::mem::align_of::(); + let ptr = elements as *const usize; + + unsafe { + if elem_alignment <= core::mem::align_of::() { + ptr.add(1) + } else { + // If elements have an alignment bigger than usize (e.g. an i128), + // we will have necessarily allocated two usize slots worth of + // space for the storage value (with the first usize slot being + // padding for alignment's sake), and we need to skip past both. + ptr.add(2) + } + } + } + + fn get_small_str_ptr(&self) -> *const u8 { + (self as *const Self).cast() + } + + fn get_small_str_ptr_mut(&mut self) -> *mut u8 { + (self as *mut Self).cast() + } + + fn from_slice_with_capacity_str(slice: &[u8], capacity: usize) -> Self { + assert!( + slice.len() <= capacity, + "RocStr::from_slice_with_capacity_str length bigger than capacity {} {}", + slice.len(), + capacity + ); + if capacity < core::mem::size_of::() { + let mut rocstr = Self::default(); + let target_ptr = rocstr.get_small_str_ptr_mut(); + let source_ptr = slice.as_ptr() as *const u8; + for index in 0..slice.len() { + unsafe { + *target_ptr.add(index) = *source_ptr.add(index); + } + } + // Write length and small string bit to last byte of length. + let mut bytes = rocstr.length.to_ne_bytes(); + bytes[mem::size_of::() - 1] = capacity as u8 ^ 0b1000_0000; + rocstr.length = usize::from_ne_bytes(bytes); + + rocstr + } else { + let ptr = slice.as_ptr(); + let element_bytes = capacity; + + let num_bytes = core::mem::size_of::() + element_bytes; + + let elements = unsafe { + let raw_ptr = roc_alloc(num_bytes, core::mem::size_of::() as u32) as *mut u8; + // write the capacity + let capacity_ptr = raw_ptr as *mut usize; + *capacity_ptr = capacity; + + let raw_ptr = Self::get_element_ptr(raw_ptr as *mut u8); + + // write the refcount + let refcount_ptr = raw_ptr as *mut isize; + *(refcount_ptr.offset(-1)) = isize::MIN; + + { + // NOTE: using a memcpy here causes weird issues + let target_ptr = raw_ptr as *mut u8; + let source_ptr = ptr as *const u8; + let length = slice.len(); + + for index in 0..length { + *target_ptr.add(index) = *source_ptr.add(index); + } + } + + raw_ptr as *mut u8 + }; + + Self { + length: slice.len(), + elements, + } + } + } + + pub fn from_slice(slice: &[u8]) -> Self { + Self::from_slice_with_capacity_str(slice, slice.len()) + } + + pub fn as_slice(&self) -> &[u8] { + if self.is_empty() { + &[] + } else if self.is_small_str() { + unsafe { core::slice::from_raw_parts(self.get_small_str_ptr(), self.len()) } + } else { + unsafe { core::slice::from_raw_parts(self.elements, self.length) } + } + } + + pub fn as_mut_slice(&mut self) -> &mut [u8] { + if self.is_empty() { + &mut [] + } else if self.is_small_str() { + unsafe { core::slice::from_raw_parts_mut(self.get_small_str_ptr_mut(), self.len()) } + } else { + unsafe { core::slice::from_raw_parts_mut(self.elements, self.length) } + } + } + + pub fn as_str(&self) -> &str { + let slice = self.as_slice(); + + unsafe { core::str::from_utf8_unchecked(slice) } + } + + pub fn as_mut_str(&mut self) -> &mut str { + let slice = self.as_mut_slice(); + + unsafe { core::str::from_utf8_unchecked_mut(slice) } + } + + /// Write a CStr (null-terminated) representation of this RocStr into + /// the given buffer. + /// + /// # Safety + /// This assumes the given buffer has enough space, so make sure you only + /// pass in a pointer to an allocation that's at least as long as this Str! + pub unsafe fn write_c_str(&self, buf: *mut char) { + if self.is_small_str() { + ptr::copy_nonoverlapping(self.get_small_str_ptr(), buf as *mut u8, self.len()); + } else { + ptr::copy_nonoverlapping(self.elements, buf as *mut u8, self.len()); + } + + // null-terminate + *(buf.add(self.len())) = '\0'; + } +} + +impl Deref for RocStr { + type Target = str; + + fn deref(&self) -> &str { + self.as_str() + } +} + +impl DerefMut for RocStr { + fn deref_mut(&mut self) -> &mut str { + self.as_mut_str() + } +} + +impl Default for RocStr { + fn default() -> Self { + Self { + length: isize::MIN as usize, + elements: core::ptr::null_mut(), + } + } +} + +impl From<&str> for RocStr { + fn from(str: &str) -> Self { + Self::from_slice(str.as_bytes()) + } +} + +impl Display for RocStr { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + self.as_str().fmt(f) + } +} + +impl fmt::Debug for RocStr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // RocStr { is_small_str: false, storage: Refcounted(3), elements: [ 1,2,3,4] } + + match core::str::from_utf8(self.as_slice()) { + Ok(string) => f + .debug_struct("RocStr") + .field("is_small_str", &self.is_small_str()) + .field("storage", &self.storage()) + .field("string_contents", &string) + .finish(), + Err(_) => f + .debug_struct("RocStr") + .field("is_small_str", &self.is_small_str()) + .field("storage", &self.storage()) + .field("byte_contents", &self.as_slice()) + .finish(), + } + } +} + +impl PartialEq for RocStr { + fn eq(&self, other: &Self) -> bool { + self.as_slice() == other.as_slice() + } +} + +impl Eq for RocStr {} + +impl Clone for RocStr { + fn clone(&self) -> Self { + if self.is_small_str() { + Self { + elements: self.elements, + length: self.length, + } + } else { + let capacity_size = core::mem::size_of::(); + let copy_length = self.length + capacity_size; + let elements = unsafe { + // We use *mut u8 here even though technically these are + // usize-aligned (due to the refcount slot). + // This avoids any potential edge cases around there somehow + // being unreadable memory after the last byte, which would + // potentially get read when reading bytes at a time. + let raw_ptr = + roc_alloc(copy_length, core::mem::size_of::() as u32) as *mut u8; + let dest_slice = slice::from_raw_parts_mut(raw_ptr, copy_length); + let src_ptr = self.elements.offset(-(capacity_size as isize)) as *mut u8; + let src_slice = slice::from_raw_parts(src_ptr, copy_length); + + dest_slice.copy_from_slice(src_slice); + + *(raw_ptr as *mut usize) = self.length; + + (raw_ptr as *mut u8).add(capacity_size) + }; + + Self { + elements, + length: self.length, + } + } + } +} + +impl Drop for RocStr { + fn drop(&mut self) { + if !self.is_small_str() { + let storage_ptr = self.get_storage_ptr_mut(); + + unsafe { + let storage_val = *storage_ptr; + + if storage_val == REFCOUNT_1 || storage_val > 0 { + // If we have no more references, or if this was unique, + // deallocate it. + roc_dealloc(storage_ptr as *mut c_void, mem::align_of::() as u32); + } else if storage_val < 0 { + // If this still has more references, decrement one. + *storage_ptr = storage_val - 1; + } + + // The only remaining option is that this is in readonly memory, + // in which case we shouldn't attempt to do anything to it. + } + } + } +}