mirror of
https://github.com/roc-lang/roc.git
synced 2024-09-22 08:17:40 +03:00
Merge pull request #6232 from roc-lang/roc-std-slices
expose list/string slices to rust users
This commit is contained in:
commit
d4a1dbc2d6
@ -128,9 +128,13 @@ mod glue_cli_run {
|
||||
multiple_modules:"multiple-modules" => indoc!(r#"
|
||||
combined was: Combined { s1: DepStr1::S("hello"), s2: DepStr2::R("world") }
|
||||
"#),
|
||||
arguments:"arguments" => indoc!(r#"
|
||||
Answer was: 84
|
||||
"#),
|
||||
// issue https://github.com/roc-lang/roc/issues/6121
|
||||
// TODO: re-enable this test. Currently it is flaking on macos x86-64 with a bad exit code.
|
||||
// nested_record:"nested-record" => "Record was: Outer { y: \"foo\", z: [1, 2], x: Inner { b: 24.0, a: 5 } }\n",
|
||||
// enumeration:"enumeration" => "tag_union was: MyEnum::Foo, Bar is: MyEnum::Bar, Baz is: MyEnum::Baz\n",
|
||||
// arguments:"arguments" => indoc!(r#"
|
||||
// Answer was: 84
|
||||
// "#),
|
||||
closures:"closures" => indoc!(r#"
|
||||
Answer was: 672
|
||||
"#),
|
||||
|
@ -12,6 +12,7 @@ use core::{
|
||||
ops::{Deref, DerefMut},
|
||||
ptr::{self, NonNull},
|
||||
};
|
||||
use std::ops::Range;
|
||||
|
||||
use crate::{roc_alloc, roc_dealloc, roc_realloc, storage::Storage};
|
||||
|
||||
@ -208,7 +209,7 @@ impl<T> RocList<T> {
|
||||
}
|
||||
|
||||
/// Useful for doing memcpy on the elements. Returns NULL if list is empty.
|
||||
pub(crate) unsafe fn ptr_to_first_elem(&self) -> *const T {
|
||||
pub(crate) fn ptr_to_first_elem(&self) -> *const T {
|
||||
unsafe { core::mem::transmute(self.elements) }
|
||||
}
|
||||
|
||||
@ -222,6 +223,15 @@ impl<T> RocList<T> {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
pub(crate) fn ptr_to_refcount(&self) -> *mut c_void {
|
||||
if self.is_seamless_slice() {
|
||||
((self.capacity_or_ref_ptr << 1) - std::mem::size_of::<usize>()) as *mut _
|
||||
} else {
|
||||
unsafe { self.ptr_to_first_elem().cast::<usize>().sub(1) as *mut _ }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn elem_ptr_from_alloc_ptr(alloc_ptr: *mut c_void) -> *mut c_void {
|
||||
unsafe {
|
||||
alloc_ptr
|
||||
@ -230,6 +240,44 @@ impl<T> RocList<T> {
|
||||
.cast()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn append(&mut self, value: T) {
|
||||
self.push(value)
|
||||
}
|
||||
|
||||
pub fn push(&mut self, value: T) {
|
||||
if self.capacity() <= self.len() {
|
||||
// reserve space for (at least!) one more element
|
||||
self.reserve(1);
|
||||
}
|
||||
|
||||
let elements = self.elements.unwrap().as_ptr();
|
||||
let append_ptr = unsafe { elements.add(self.len()) };
|
||||
|
||||
unsafe {
|
||||
// Write the element into the slot, without dropping it.
|
||||
ptr::write(append_ptr, ManuallyDrop::new(value));
|
||||
}
|
||||
|
||||
// It's important that the length is increased one by one, to
|
||||
// make sure that we don't drop uninitialized elements, even when
|
||||
// a incrementing the reference count panics.
|
||||
self.length += 1;
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `bytes` must be allocated for `cap` elements
|
||||
/// - `bytes` must be initialized for `len` elements
|
||||
/// - `bytes` must be preceded by a correctly-aligned refcount (usize)
|
||||
/// - `cap` >= `len`
|
||||
pub unsafe fn from_raw_parts(bytes: *mut T, len: usize, cap: usize) -> Self {
|
||||
Self {
|
||||
elements: NonNull::new(bytes.cast()),
|
||||
length: len,
|
||||
capacity_or_ref_ptr: cap,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> RocList<T>
|
||||
@ -323,6 +371,38 @@ where
|
||||
}
|
||||
|
||||
impl<T> RocList<T> {
|
||||
#[track_caller]
|
||||
pub fn slice_range(&self, range: Range<usize>) -> Self {
|
||||
match self.try_slice_range(range) {
|
||||
Some(x) => x,
|
||||
None => panic!("slice index out of range"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_slice_range(&self, range: Range<usize>) -> Option<Self> {
|
||||
if self.as_slice().get(range.start..range.end).is_none() {
|
||||
None
|
||||
} else {
|
||||
// increment the refcount
|
||||
std::mem::forget(self.clone());
|
||||
|
||||
let element_ptr = self.as_slice()[range.start..]
|
||||
.as_ptr()
|
||||
.cast::<ManuallyDrop<T>>();
|
||||
|
||||
let capacity_or_ref_ptr =
|
||||
(self.ptr_to_first_elem() as usize) >> 1 | isize::MIN as usize;
|
||||
|
||||
let roc_list = RocList {
|
||||
elements: NonNull::new(element_ptr as *mut ManuallyDrop<T>),
|
||||
length: range.end - range.start,
|
||||
capacity_or_ref_ptr,
|
||||
};
|
||||
|
||||
Some(roc_list)
|
||||
}
|
||||
}
|
||||
|
||||
/// Increase a RocList's capacity by at least the requested number of elements (possibly more).
|
||||
///
|
||||
/// May return a new RocList, if the provided one was not unique.
|
||||
@ -333,7 +413,7 @@ impl<T> RocList<T> {
|
||||
|
||||
match self.elements_and_storage() {
|
||||
Some((elements, storage)) => {
|
||||
if storage.get().is_unique() {
|
||||
if storage.get().is_unique() && !self.is_seamless_slice() {
|
||||
unsafe {
|
||||
let old_alloc = self.ptr_to_allocation();
|
||||
|
||||
|
@ -19,8 +19,9 @@ use core::{
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
use std::ffi::{CStr, CString};
|
||||
use std::{ops::Range, ptr::NonNull};
|
||||
|
||||
use crate::RocList;
|
||||
use crate::{roc_realloc, RocList};
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct RocStr(RocStrInner);
|
||||
@ -73,8 +74,34 @@ impl RocStr {
|
||||
Self(RocStrInner { small_string })
|
||||
} else {
|
||||
let heap_allocated = RocList::from_slice(slice);
|
||||
let big_string = unsafe { std::mem::transmute(heap_allocated) };
|
||||
Self(RocStrInner {
|
||||
heap_allocated: ManuallyDrop::new(heap_allocated),
|
||||
heap_allocated: ManuallyDrop::new(big_string),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// # Safety
|
||||
///
|
||||
/// - `bytes` must be allocated for `cap` bytes
|
||||
/// - `bytes` must be initialized for `len` bytes
|
||||
/// - `bytes` must be preceded by a correctly-aligned refcount (usize)
|
||||
/// - `bytes` must represent valid UTF-8
|
||||
/// - `cap` >= `len`
|
||||
pub unsafe fn from_raw_parts(bytes: *mut u8, len: usize, cap: usize) -> Self {
|
||||
if len <= SmallString::CAPACITY {
|
||||
unsafe {
|
||||
let slice = std::slice::from_raw_parts(bytes, len);
|
||||
let small_string = SmallString::try_from_utf8_bytes(slice).unwrap_unchecked();
|
||||
Self(RocStrInner { small_string })
|
||||
}
|
||||
} else {
|
||||
Self(RocStrInner {
|
||||
heap_allocated: ManuallyDrop::new(BigString {
|
||||
elements: unsafe { NonNull::new_unchecked(bytes) },
|
||||
length: len,
|
||||
capacity_or_alloc_ptr: cap,
|
||||
}),
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -93,7 +120,7 @@ impl RocStr {
|
||||
|
||||
pub fn capacity(&self) -> usize {
|
||||
match self.as_enum_ref() {
|
||||
RocStrInnerRef::HeapAllocated(roc_list) => roc_list.capacity(),
|
||||
RocStrInnerRef::HeapAllocated(big_string) => big_string.capacity(),
|
||||
RocStrInnerRef::SmallString(_) => SmallString::CAPACITY,
|
||||
}
|
||||
}
|
||||
@ -137,10 +164,12 @@ impl RocStr {
|
||||
/// There is no way to tell how many references it has and if it is safe to free.
|
||||
/// As such, only values that should have a static lifetime for the entire application run
|
||||
/// should be considered for marking read-only.
|
||||
pub unsafe fn set_readonly(&self) {
|
||||
match self.as_enum_ref() {
|
||||
RocStrInnerRef::HeapAllocated(roc_list) => unsafe { roc_list.set_readonly() },
|
||||
RocStrInnerRef::SmallString(_) => {}
|
||||
pub unsafe fn set_readonly(&mut self) {
|
||||
if self.is_small_str() {
|
||||
/* do nothing */
|
||||
} else {
|
||||
let big = unsafe { &mut self.0.heap_allocated };
|
||||
big.set_readonly()
|
||||
}
|
||||
}
|
||||
|
||||
@ -167,7 +196,7 @@ impl RocStr {
|
||||
} else {
|
||||
// The requested capacity won't fit in a small string; we need to go big.
|
||||
RocStr(RocStrInner {
|
||||
heap_allocated: ManuallyDrop::new(RocList::with_capacity(bytes)),
|
||||
heap_allocated: ManuallyDrop::new(BigString::with_capacity(bytes)),
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -182,21 +211,33 @@ impl RocStr {
|
||||
|
||||
if target_cap > SmallString::CAPACITY {
|
||||
// The requested capacity won't fit in a small string; we need to go big.
|
||||
let mut roc_list = RocList::with_capacity(target_cap);
|
||||
let mut big_string = BigString::with_capacity(target_cap);
|
||||
|
||||
roc_list.extend_from_slice(small_str.as_bytes());
|
||||
unsafe {
|
||||
std::ptr::copy_nonoverlapping(
|
||||
self.as_bytes().as_ptr(),
|
||||
big_string.ptr_to_first_elem(),
|
||||
self.len(),
|
||||
)
|
||||
};
|
||||
|
||||
*self = RocStr(RocStrInner {
|
||||
heap_allocated: ManuallyDrop::new(roc_list),
|
||||
big_string.length = self.len();
|
||||
big_string.capacity_or_alloc_ptr = target_cap;
|
||||
|
||||
let mut updated = RocStr(RocStrInner {
|
||||
heap_allocated: ManuallyDrop::new(big_string),
|
||||
});
|
||||
|
||||
mem::swap(self, &mut updated);
|
||||
mem::forget(updated);
|
||||
}
|
||||
} else {
|
||||
let mut roc_list = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) };
|
||||
let mut big_string = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) };
|
||||
|
||||
roc_list.reserve(bytes);
|
||||
big_string.reserve(bytes);
|
||||
|
||||
let mut updated = RocStr(RocStrInner {
|
||||
heap_allocated: ManuallyDrop::new(roc_list),
|
||||
heap_allocated: ManuallyDrop::new(big_string),
|
||||
});
|
||||
|
||||
mem::swap(self, &mut updated);
|
||||
@ -204,12 +245,57 @@ impl RocStr {
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
pub fn slice_range(&self, range: Range<usize>) -> Self {
|
||||
match self.try_slice_range(range) {
|
||||
Some(x) => x,
|
||||
None => panic!("slice index out of range"),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn try_slice_range(&self, range: Range<usize>) -> Option<Self> {
|
||||
if self.as_str().get(range.start..range.end).is_none() {
|
||||
None
|
||||
} else if range.end - range.start <= SmallString::CAPACITY && self.is_small_str() {
|
||||
let slice = &self.as_bytes()[range];
|
||||
let small_string =
|
||||
unsafe { SmallString::try_from_utf8_bytes(slice).unwrap_unchecked() };
|
||||
|
||||
// NOTE decrements `self`
|
||||
Some(RocStr(RocStrInner { small_string }))
|
||||
} else {
|
||||
// increment the refcount
|
||||
std::mem::forget(self.clone());
|
||||
|
||||
let big = unsafe { &self.0.heap_allocated };
|
||||
let ptr = unsafe { (self.as_bytes().as_ptr() as *mut u8).add(range.start) };
|
||||
|
||||
let heap_allocated = ManuallyDrop::new(BigString {
|
||||
elements: unsafe { NonNull::new_unchecked(ptr) },
|
||||
length: (isize::MIN as usize) | (range.end - range.start),
|
||||
capacity_or_alloc_ptr: (big.ptr_to_first_elem() as usize) >> 1,
|
||||
});
|
||||
|
||||
Some(RocStr(RocStrInner { heap_allocated }))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn split_once(&self, delimiter: &str) -> Option<(Self, Self)> {
|
||||
let (a, b) = self.as_str().split_once(delimiter)?;
|
||||
|
||||
let x = self.slice_range(0..a.len());
|
||||
let y = self.slice_range(self.len() - b.len()..self.len());
|
||||
|
||||
Some((x, y))
|
||||
}
|
||||
|
||||
pub fn split_whitespace(&self) -> SplitWhitespace<'_> {
|
||||
SplitWhitespace(self.as_str().char_indices().peekable(), self)
|
||||
}
|
||||
|
||||
/// Returns the index of the first interior \0 byte in the string, or None if there are none.
|
||||
fn first_nul_byte(&self) -> Option<usize> {
|
||||
match self.as_enum_ref() {
|
||||
RocStrInnerRef::HeapAllocated(roc_list) => roc_list.iter().position(|byte| *byte == 0),
|
||||
RocStrInnerRef::SmallString(small_string) => small_string.first_nul_byte(),
|
||||
}
|
||||
self.as_bytes().iter().position(|byte| *byte == 0)
|
||||
}
|
||||
|
||||
// If the string is under this many bytes, the with_terminator family
|
||||
@ -267,60 +353,49 @@ impl RocStr {
|
||||
};
|
||||
|
||||
match self.as_enum_ref() {
|
||||
RocStrInnerRef::HeapAllocated(roc_list) => {
|
||||
RocStrInnerRef::HeapAllocated(big_string) => {
|
||||
unsafe {
|
||||
match roc_list.storage() {
|
||||
Some(storage) if storage.is_unique() => {
|
||||
// The backing RocList was unique, so we can mutate it in-place.
|
||||
let len = roc_list.len();
|
||||
let ptr = if len < roc_list.capacity() {
|
||||
// We happen to have excess capacity already, so we will be able
|
||||
// to write the terminator into the first byte of excess capacity.
|
||||
roc_list.ptr_to_first_elem() as *mut u8
|
||||
} else {
|
||||
// We always have an allocation that's even bigger than necessary,
|
||||
// because the refcount bytes take up more than the 1B needed for
|
||||
// the terminator. We just need to shift the bytes over on top
|
||||
// of the refcount.
|
||||
let alloc_ptr = roc_list.ptr_to_allocation() as *mut u8;
|
||||
if big_string.is_unique() {
|
||||
// The backing RocList was unique, so we can mutate it in-place.
|
||||
let len = big_string.len();
|
||||
let ptr = if len < big_string.capacity() {
|
||||
// We happen to have excess capacity already, so we will be able
|
||||
// to write the terminator into the first byte of excess capacity.
|
||||
big_string.ptr_to_first_elem() as *mut u8
|
||||
} else {
|
||||
// We always have an allocation that's even bigger than necessary,
|
||||
// because the refcount bytes take up more than the 1B needed for
|
||||
// the terminator. We just need to shift the bytes over on top
|
||||
// of the refcount.
|
||||
let alloc_ptr = big_string.ptr_to_allocation() as *mut u8;
|
||||
|
||||
// First, copy the bytes over the original allocation - effectively
|
||||
// shifting everything over by one `usize`. Now we no longer have a
|
||||
// refcount (but the terminated won't use that anyway), but we do
|
||||
// have a free `usize` at the end.
|
||||
//
|
||||
// IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping
|
||||
// because the regions definitely overlap!
|
||||
ptr::copy(roc_list.ptr_to_first_elem() as *mut u8, alloc_ptr, len);
|
||||
|
||||
alloc_ptr
|
||||
};
|
||||
|
||||
terminate(ptr, len)
|
||||
}
|
||||
Some(_) => {
|
||||
let len = roc_list.len();
|
||||
|
||||
// The backing list was not unique, so we can't mutate it in-place.
|
||||
// ask for `len + 1` to store the original string and the terminator
|
||||
with_stack_bytes(len + 1, |alloc_ptr: *mut u8| {
|
||||
let alloc_ptr = alloc_ptr as *mut u8;
|
||||
let elem_ptr = roc_list.ptr_to_first_elem() as *mut u8;
|
||||
|
||||
// memcpy the bytes into the stack allocation
|
||||
ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len);
|
||||
|
||||
terminate(alloc_ptr, len)
|
||||
})
|
||||
}
|
||||
None => {
|
||||
// The backing list was empty.
|
||||
// First, copy the bytes over the original allocation - effectively
|
||||
// shifting everything over by one `usize`. Now we no longer have a
|
||||
// refcount (but the terminated won't use that anyway), but we do
|
||||
// have a free `usize` at the end.
|
||||
//
|
||||
// No need to do a heap allocation for an empty string - we
|
||||
// can just do a stack allocation that will live for the
|
||||
// duration of the function.
|
||||
func([terminator].as_mut_ptr(), 0)
|
||||
}
|
||||
// IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping
|
||||
// because the regions definitely overlap!
|
||||
ptr::copy(big_string.ptr_to_first_elem() as *mut u8, alloc_ptr, len);
|
||||
|
||||
alloc_ptr
|
||||
};
|
||||
|
||||
terminate(ptr, len)
|
||||
} else {
|
||||
let len = big_string.len();
|
||||
|
||||
// The backing list was not unique, so we can't mutate it in-place.
|
||||
// ask for `len + 1` to store the original string and the terminator
|
||||
with_stack_bytes(len + 1, |alloc_ptr: *mut u8| {
|
||||
let alloc_ptr = alloc_ptr as *mut u8;
|
||||
let elem_ptr = big_string.ptr_to_first_elem() as *mut u8;
|
||||
|
||||
// memcpy the bytes into the stack allocation
|
||||
std::ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len);
|
||||
|
||||
terminate(alloc_ptr, len)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -485,57 +560,46 @@ impl RocStr {
|
||||
};
|
||||
|
||||
match self.as_enum_ref() {
|
||||
RocStrInnerRef::HeapAllocated(roc_list) => {
|
||||
let len = roc_list.len();
|
||||
RocStrInnerRef::HeapAllocated(big_string) => {
|
||||
let len = big_string.len();
|
||||
|
||||
unsafe {
|
||||
match roc_list.storage() {
|
||||
Some(storage) if storage.is_unique() => {
|
||||
// The backing RocList was unique, so we can mutate it in-place.
|
||||
if big_string.is_unique() {
|
||||
// The backing RocList was unique, so we can mutate it in-place.
|
||||
|
||||
// We need 1 extra elem for the terminator. It must be an elem,
|
||||
// not a byte, because we'll be providing a pointer to elems.
|
||||
let needed_bytes = (len + 1) * size_of::<E>();
|
||||
// We need 1 extra elem for the terminator. It must be an elem,
|
||||
// not a byte, because we'll be providing a pointer to elems.
|
||||
let needed_bytes = (len + 1) * size_of::<E>();
|
||||
|
||||
// We can use not only the capacity on the heap, but also
|
||||
// the bytes originally used for the refcount.
|
||||
let available_bytes = roc_list.capacity() + size_of::<Storage>();
|
||||
// We can use not only the capacity on the heap, but also
|
||||
// the bytes originally used for the refcount.
|
||||
let available_bytes = big_string.capacity() + size_of::<Storage>();
|
||||
|
||||
if needed_bytes < available_bytes {
|
||||
debug_assert!(align_of::<Storage>() >= align_of::<E>());
|
||||
if needed_bytes < available_bytes {
|
||||
debug_assert!(align_of::<Storage>() >= align_of::<E>());
|
||||
|
||||
// We happen to have sufficient excess capacity already,
|
||||
// so we will be able to write the new elements as well as
|
||||
// the terminator into the existing allocation.
|
||||
let ptr = roc_list.ptr_to_allocation() as *mut E;
|
||||
let answer = terminate(ptr, self.as_str());
|
||||
// We happen to have sufficient excess capacity already,
|
||||
// so we will be able to write the new elements as well as
|
||||
// the terminator into the existing allocation.
|
||||
let ptr = big_string.ptr_to_allocation() as *mut E;
|
||||
let answer = terminate(ptr, self.as_str());
|
||||
|
||||
// We cannot rely on the RocStr::drop implementation, because
|
||||
// it tries to use the refcount - which we just overwrote
|
||||
// with string bytes.
|
||||
mem::forget(self);
|
||||
crate::roc_dealloc(ptr.cast(), mem::align_of::<E>() as u32);
|
||||
// We cannot rely on the RocStr::drop implementation, because
|
||||
// it tries to use the refcount - which we just overwrote
|
||||
// with string bytes.
|
||||
mem::forget(self);
|
||||
crate::roc_dealloc(ptr.cast(), mem::align_of::<E>() as u32);
|
||||
|
||||
answer
|
||||
} else {
|
||||
// We didn't have sufficient excess capacity already,
|
||||
// so we need to do either a new stack allocation or a new
|
||||
// heap allocation.
|
||||
fallback(self.as_str())
|
||||
}
|
||||
}
|
||||
Some(_) => {
|
||||
// The backing list was not unique, so we can't mutate it in-place.
|
||||
answer
|
||||
} else {
|
||||
// We didn't have sufficient excess capacity already,
|
||||
// so we need to do either a new stack allocation or a new
|
||||
// heap allocation.
|
||||
fallback(self.as_str())
|
||||
}
|
||||
None => {
|
||||
// The backing list was empty.
|
||||
//
|
||||
// No need to do a heap allocation for an empty string - we
|
||||
// can just do a stack allocation that will live for the
|
||||
// duration of the function.
|
||||
func([terminator].as_mut_ptr() as *mut E, "")
|
||||
}
|
||||
} else {
|
||||
// The backing list was not unique, so we can't mutate it in-place.
|
||||
fallback(self.as_str())
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -558,12 +622,44 @@ impl RocStr {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SplitWhitespace<'a>(std::iter::Peekable<std::str::CharIndices<'a>>, &'a RocStr);
|
||||
|
||||
impl Iterator for SplitWhitespace<'_> {
|
||||
type Item = RocStr;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let start = 'blk: {
|
||||
while let Some((pos, c)) = self.0.peek() {
|
||||
if c.is_whitespace() {
|
||||
self.0.next();
|
||||
} else {
|
||||
break 'blk *pos;
|
||||
}
|
||||
}
|
||||
|
||||
return None;
|
||||
};
|
||||
|
||||
let end = 'blk: {
|
||||
for (pos, c) in self.0.by_ref() {
|
||||
if c.is_whitespace() {
|
||||
break 'blk pos;
|
||||
}
|
||||
}
|
||||
|
||||
break 'blk self.1.len();
|
||||
};
|
||||
|
||||
self.1.try_slice_range(start..end)
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for RocStr {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
match self.as_enum_ref() {
|
||||
RocStrInnerRef::HeapAllocated(h) => unsafe { core::str::from_utf8_unchecked(h) },
|
||||
RocStrInnerRef::HeapAllocated(h) => h.as_str(),
|
||||
RocStrInnerRef::SmallString(s) => s,
|
||||
}
|
||||
}
|
||||
@ -697,6 +793,203 @@ impl From<SendSafeRocStr> for RocStr {
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
struct BigString {
|
||||
elements: NonNull<u8>,
|
||||
length: usize,
|
||||
capacity_or_alloc_ptr: usize,
|
||||
}
|
||||
|
||||
const SEAMLESS_SLICE_BIT: usize = isize::MIN as usize;
|
||||
|
||||
impl BigString {
|
||||
fn len(&self) -> usize {
|
||||
self.length & !SEAMLESS_SLICE_BIT
|
||||
}
|
||||
|
||||
fn capacity(&self) -> usize {
|
||||
if self.is_seamless_slice() {
|
||||
self.len()
|
||||
} else {
|
||||
self.capacity_or_alloc_ptr
|
||||
}
|
||||
}
|
||||
|
||||
fn is_seamless_slice(&self) -> bool {
|
||||
(self.length as isize) < 0
|
||||
}
|
||||
|
||||
fn ptr_to_first_elem(&self) -> *mut u8 {
|
||||
unsafe { core::mem::transmute(self.elements) }
|
||||
}
|
||||
|
||||
fn ptr_to_allocation(&self) -> *mut usize {
|
||||
// these are the same because the alignment of u8 is just 1
|
||||
self.ptr_to_refcount()
|
||||
}
|
||||
|
||||
fn ptr_to_refcount(&self) -> *mut usize {
|
||||
if self.is_seamless_slice() {
|
||||
unsafe { ((self.capacity_or_alloc_ptr << 1) as *mut usize).sub(1) }
|
||||
} else {
|
||||
unsafe { self.ptr_to_first_elem().cast::<usize>().sub(1) }
|
||||
}
|
||||
}
|
||||
|
||||
fn as_bytes(&self) -> &[u8] {
|
||||
unsafe { std::slice::from_raw_parts(self.ptr_to_first_elem(), self.len()) }
|
||||
}
|
||||
|
||||
fn as_str(&self) -> &str {
|
||||
unsafe { std::str::from_utf8_unchecked(self.as_bytes()) }
|
||||
}
|
||||
|
||||
fn is_unique(&self) -> bool {
|
||||
if self.capacity() == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
let ptr = self.ptr_to_refcount();
|
||||
let rc = unsafe { std::ptr::read(ptr) as isize };
|
||||
|
||||
rc == isize::MIN
|
||||
}
|
||||
|
||||
fn is_readonly(&self) -> bool {
|
||||
if self.capacity() == 0 {
|
||||
return true;
|
||||
}
|
||||
|
||||
let ptr = self.ptr_to_refcount();
|
||||
let rc = unsafe { std::ptr::read(ptr) as isize };
|
||||
|
||||
rc == 0
|
||||
}
|
||||
|
||||
fn set_readonly(&mut self) {
|
||||
assert_ne!(self.capacity(), 0);
|
||||
|
||||
let ptr = self.ptr_to_refcount();
|
||||
unsafe { std::ptr::write(ptr, 0) }
|
||||
}
|
||||
|
||||
fn inc(&mut self, n: usize) {
|
||||
let ptr = self.ptr_to_refcount();
|
||||
unsafe {
|
||||
let value = std::ptr::read(ptr);
|
||||
std::ptr::write(ptr, Ord::max(0, ((value as isize) + n as isize) as usize));
|
||||
}
|
||||
}
|
||||
|
||||
fn dec(&mut self) {
|
||||
if self.capacity() == 0 {
|
||||
// no valid allocation, elements pointer is dangling
|
||||
return;
|
||||
}
|
||||
|
||||
let ptr = self.ptr_to_refcount();
|
||||
unsafe {
|
||||
let value = std::ptr::read(ptr) as isize;
|
||||
match value {
|
||||
0 => {
|
||||
// static lifetime, do nothing
|
||||
}
|
||||
isize::MIN => {
|
||||
// refcount becomes zero; free allocation
|
||||
crate::roc_dealloc(self.ptr_to_allocation().cast(), 1);
|
||||
}
|
||||
_ => {
|
||||
std::ptr::write(ptr, (value - 1) as usize);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn with_capacity(cap: usize) -> Self {
|
||||
let mut this = Self {
|
||||
elements: NonNull::dangling(),
|
||||
length: 0,
|
||||
capacity_or_alloc_ptr: 0,
|
||||
};
|
||||
|
||||
this.reserve(cap);
|
||||
|
||||
this
|
||||
}
|
||||
|
||||
/// Increase a BigString's capacity by at least the requested number of elements (possibly more).
|
||||
///
|
||||
/// May return a new BigString, if the provided one was not unique.
|
||||
fn reserve(&mut self, n: usize) {
|
||||
let align = std::mem::size_of::<usize>();
|
||||
let desired_cap = self.len() + n;
|
||||
let desired_alloc = align + desired_cap;
|
||||
|
||||
if self.is_unique() && !self.is_seamless_slice() {
|
||||
if self.capacity() >= desired_cap {
|
||||
return;
|
||||
}
|
||||
|
||||
let new_alloc = unsafe {
|
||||
roc_realloc(
|
||||
self.ptr_to_allocation().cast(),
|
||||
desired_alloc as _,
|
||||
align + self.capacity(),
|
||||
align as _,
|
||||
)
|
||||
};
|
||||
|
||||
let elements = unsafe { NonNull::new_unchecked(new_alloc.cast::<u8>().add(align)) };
|
||||
|
||||
let mut this = Self {
|
||||
elements,
|
||||
length: self.len(),
|
||||
capacity_or_alloc_ptr: desired_cap,
|
||||
};
|
||||
|
||||
std::mem::swap(&mut this, self);
|
||||
std::mem::forget(this);
|
||||
} else {
|
||||
let ptr = unsafe { crate::roc_alloc(desired_alloc, align as _) } as *mut u8;
|
||||
let elements = unsafe { NonNull::new_unchecked(ptr.cast::<u8>().add(align)) };
|
||||
|
||||
unsafe {
|
||||
// Copy the old elements to the new allocation.
|
||||
std::ptr::copy_nonoverlapping(self.ptr_to_first_elem(), ptr.add(align), self.len());
|
||||
}
|
||||
|
||||
let mut this = Self {
|
||||
elements,
|
||||
length: self.len(),
|
||||
capacity_or_alloc_ptr: desired_cap,
|
||||
};
|
||||
|
||||
std::mem::swap(&mut this, self);
|
||||
std::mem::drop(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for BigString {
|
||||
fn clone(&self) -> Self {
|
||||
let mut this = Self {
|
||||
elements: self.elements,
|
||||
length: self.length,
|
||||
capacity_or_alloc_ptr: self.capacity_or_alloc_ptr,
|
||||
};
|
||||
|
||||
this.inc(1);
|
||||
|
||||
this
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for BigString {
|
||||
fn drop(&mut self) {
|
||||
self.dec()
|
||||
}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
union RocStrInner {
|
||||
// TODO: this really should be separated from the List type.
|
||||
@ -704,12 +997,12 @@ union RocStrInner {
|
||||
// Currently, there are work arounds in RocList to handle both via removing the highest bit of length in many cases.
|
||||
// With glue changes, we should probably rewrite these cleanly to match what is in the zig bitcode.
|
||||
// It is definitely a bit stale now and I think the storage mechanism can be quite confusing with our extra pieces of state.
|
||||
heap_allocated: ManuallyDrop<RocList<u8>>,
|
||||
heap_allocated: ManuallyDrop<BigString>,
|
||||
small_string: SmallString,
|
||||
}
|
||||
|
||||
enum RocStrInnerRef<'a> {
|
||||
HeapAllocated(&'a RocList<u8>),
|
||||
HeapAllocated(&'a BigString),
|
||||
SmallString(&'a SmallString),
|
||||
}
|
||||
|
||||
@ -756,17 +1049,6 @@ impl SmallString {
|
||||
fn len(&self) -> usize {
|
||||
usize::from(self.len & !RocStr::MASK)
|
||||
}
|
||||
|
||||
/// Returns the index of the first interior \0 byte in the string, or None if there are none.
|
||||
fn first_nul_byte(&self) -> Option<usize> {
|
||||
for (index, byte) in self.bytes[0..self.len()].iter().enumerate() {
|
||||
if *byte == 0 {
|
||||
return Some(index);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for SmallString {
|
||||
|
@ -358,6 +358,79 @@ mod test_roc_std {
|
||||
let roc_list = RocList::<RocStr>::empty();
|
||||
assert!(roc_list.is_unique());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slicing_and_dicing_list() {
|
||||
let example = RocList::from_slice(b"chaos is a ladder");
|
||||
|
||||
// basic slice from the start
|
||||
assert_eq!(example.slice_range(0..5).as_slice(), b"chaos");
|
||||
|
||||
// slice in the middle
|
||||
assert_eq!(example.slice_range(6..10).as_slice(), b"is a");
|
||||
|
||||
// slice of slice
|
||||
let first = example.slice_range(0..5);
|
||||
assert_eq!(first.slice_range(0..3).as_slice(), b"cha");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slicing_and_dicing_str() {
|
||||
let example = RocStr::from("chaos is a ladder");
|
||||
|
||||
// basic slice from the start
|
||||
assert_eq!(example.slice_range(0..5).as_str(), "chaos");
|
||||
|
||||
// slice in the middle
|
||||
assert_eq!(example.slice_range(6..10).as_str(), "is a");
|
||||
|
||||
// slice of slice
|
||||
let first = example.slice_range(0..5);
|
||||
assert_eq!(first.slice_range(0..3).as_str(), "cha");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roc_list_push() {
|
||||
let mut example = RocList::from_slice(&[1, 2, 3]);
|
||||
|
||||
// basic slice from the start
|
||||
example.push(4);
|
||||
assert_eq!(example.as_slice(), &[1, 2, 3, 4]);
|
||||
|
||||
// slice in the middle
|
||||
let mut sliced = example.slice_range(0..3);
|
||||
sliced.push(5);
|
||||
assert_eq!(sliced.as_slice(), &[1, 2, 3, 5]);
|
||||
|
||||
// original did not change
|
||||
assert_eq!(example.as_slice(), &[1, 2, 3, 4]);
|
||||
|
||||
drop(sliced);
|
||||
|
||||
let mut sliced = example.slice_range(0..3);
|
||||
// make the slice unique
|
||||
drop(example);
|
||||
|
||||
sliced.push(5);
|
||||
assert_eq!(sliced.as_slice(), &[1, 2, 3, 5]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn split_whitespace() {
|
||||
let example = RocStr::from("chaos is a ladder");
|
||||
|
||||
let split: Vec<_> = example.split_whitespace().collect();
|
||||
|
||||
assert_eq!(
|
||||
split,
|
||||
vec![
|
||||
RocStr::from("chaos"),
|
||||
RocStr::from("is"),
|
||||
RocStr::from("a"),
|
||||
RocStr::from("ladder"),
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
Loading…
Reference in New Issue
Block a user