Merge pull request #3194 from rtfeldman/cstr

roc_std improvements
This commit is contained in:
Folkert de Vries 2022-06-12 17:04:48 +02:00 committed by GitHub
commit b1308f71cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 1243 additions and 220 deletions

1
Cargo.lock generated
View File

@ -4120,6 +4120,7 @@ dependencies = [
name = "roc_std"
version = "0.1.0"
dependencies = [
"arrayvec 0.7.2",
"static_assertions 0.1.1",
]

View File

@ -421,7 +421,6 @@ pub fn rebuild_host(
let env_home = env::var("HOME").unwrap_or_else(|_| "".to_string());
let env_cpath = env::var("CPATH").unwrap_or_else(|_| "".to_string());
if zig_host_src.exists() {
// Compile host.zig

View File

@ -70,7 +70,7 @@ impl FromWasmerMemory for RocStr {
&memory_bytes[big_elem_ptr..][..big_length]
};
unsafe { RocStr::from_slice(slice) }
unsafe { RocStr::from_slice_unchecked(slice) }
}
}

View File

@ -90,7 +90,7 @@ use roc_std::{RocList, RocStr};
// "#
// ),
// RocStr::from_slice(b"JJJJJJJJJJJJJJJJJJJJJJJJJ"),
// RocStr::from_slice_unchecked(b"JJJJJJJJJJJJJJJJJJJJJJJJJ"),
// RocStr
// );
// }
@ -108,7 +108,7 @@ use roc_std::{RocList, RocStr};
// _ -> ""
// "#
// ),
// RocStr::from_slice(b"JJJ"),
// RocStr::from_slice_unchecked(b"JJJ"),
// RocStr
// );
// }
@ -122,8 +122,8 @@ use roc_std::{RocList, RocStr};
// "#
// ),
// RocList::from_slice(&[
// RocStr::from_slice(b"01234567789abcdefghi"),
// RocStr::from_slice(b"01234567789abcdefghi")
// RocStr::from_slice_unchecked(b"01234567789abcdefghi"),
// RocStr::from_slice_unchecked(b"01234567789abcdefghi")
// ]),
// RocList<RocStr>
// );
@ -135,8 +135,8 @@ use roc_std::{RocList, RocStr};
// "#
// ),
// RocList::from_slice(&[
// RocStr::from_slice(b"01234567789abcdefghi "),
// RocStr::from_slice(b" 01234567789abcdefghi")
// RocStr::from_slice_unchecked(b"01234567789abcdefghi "),
// RocStr::from_slice_unchecked(b" 01234567789abcdefghi")
// ]),
// RocList<RocStr>
// );
@ -151,9 +151,9 @@ use roc_std::{RocList, RocStr};
// "#
// ),
// RocList::from_slice(&[
// RocStr::from_slice(b"J"),
// RocStr::from_slice(b"J"),
// RocStr::from_slice(b"J")
// RocStr::from_slice_unchecked(b"J"),
// RocStr::from_slice_unchecked(b"J"),
// RocStr::from_slice_unchecked(b"J")
// ]),
// RocList<RocStr>
// );
@ -169,7 +169,7 @@ use roc_std::{RocList, RocStr};
// "than the delimiter which happens to be very very long"
// "#
// ),
// RocList::from_slice(&[RocStr::from_slice(b"string to split is shorter")]),
// RocList::from_slice(&[RocStr::from_slice_unchecked(b"string to split is shorter")]),
// RocList<RocStr>
// );
// }
@ -182,7 +182,7 @@ use roc_std::{RocList, RocStr};
// Str.split "" ""
// "#
// ),
// RocList::from_slice(&[RocStr::from_slice(b"")]),
// RocList::from_slice(&[RocStr::from_slice_unchecked(b"")]),
// RocList<RocStr>
// );
// }
@ -195,7 +195,7 @@ use roc_std::{RocList, RocStr};
// Str.split "a," ","
// "#
// ),
// RocList::from_slice(&[RocStr::from_slice(b"a"), RocStr::from_slice(b"")]),
// RocList::from_slice(&[RocStr::from_slice_unchecked(b"a"), RocStr::from_slice_unchecked(b"")]),
// RocList<RocStr>
// )
// }
@ -224,9 +224,9 @@ use roc_std::{RocList, RocStr};
// "#
// ),
// RocList::from_slice(&[
// RocStr::from_slice(b"1"),
// RocStr::from_slice(b"2"),
// RocStr::from_slice(b"")
// RocStr::from_slice_unchecked(b"1"),
// RocStr::from_slice_unchecked(b"2"),
// RocStr::from_slice_unchecked(b"")
// ]),
// RocList<RocStr>
// );
@ -243,9 +243,9 @@ use roc_std::{RocList, RocStr};
// "#
// ),
// RocList::from_slice(&[
// RocStr::from_slice(b"3"),
// RocStr::from_slice(b"4"),
// RocStr::from_slice(b"")
// RocStr::from_slice_unchecked(b"3"),
// RocStr::from_slice_unchecked(b"4"),
// RocStr::from_slice_unchecked(b"")
// ]),
// RocList<RocStr>
// );
@ -261,7 +261,7 @@ use roc_std::{RocList, RocStr};
// "Second string that is also fairly long. Two long strings test things that might not appear with short strings."
// "#
// ),
// RocStr::from_slice(b"First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."),
// RocStr::from_slice_unchecked(b"First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."),
// RocStr
// );
// }
@ -498,7 +498,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("a".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr
// );
// }
@ -513,7 +513,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("abc~".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("abc~".as_bytes()),
// roc_std::RocStr
// );
// }
@ -528,7 +528,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("∆".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("∆".as_bytes()),
// roc_std::RocStr
// );
// }
@ -543,7 +543,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("∆œ¬".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("∆œ¬".as_bytes()),
// roc_std::RocStr
// );
// }
@ -558,7 +558,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("💖".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("💖".as_bytes()),
// roc_std::RocStr
// );
// }
@ -573,7 +573,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("💖🤠🚀".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("💖🤠🚀".as_bytes()),
// roc_std::RocStr
// );
// }
@ -588,7 +588,7 @@ fn str_starts_with_false_small_str() {
// Err _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("💖b∆".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("💖b∆".as_bytes()),
// roc_std::RocStr
// );
// }
@ -607,7 +607,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("a".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr
// );
// }
@ -626,7 +626,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("a".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr
// );
// }
@ -645,7 +645,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("a".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr
// );
// }
@ -664,7 +664,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("a".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr
// );
// }
@ -683,7 +683,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("a".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr
// );
// }
@ -702,7 +702,7 @@ fn str_starts_with_false_small_str() {
// _ -> ""
// "#
// ),
// roc_std::RocStr::from_slice("a".as_bytes()),
// roc_std::RocStr::from_slice_unchecked("a".as_bytes()),
// roc_std::RocStr
// );
// }
@ -744,7 +744,7 @@ fn str_equality() {
// printExpr expr
// "#
// ),
// RocStr::from_slice(b"Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"),
// RocStr::from_slice_unchecked(b"Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"),
// RocStr
// );
// }

View File

@ -3,6 +3,7 @@
use core::ffi::c_void;
use roc_std::RocStr;
use std::ffi::CStr;
use std::mem::ManuallyDrop;
use std::os::raw::c_char;
extern "C" {
@ -56,7 +57,7 @@ pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut
#[no_mangle]
pub extern "C" fn rust_main() -> i32 {
unsafe {
let mut roc_str = RocStr::default();
let mut roc_str = ManuallyDrop::new(RocStr::default());
roc_main(&mut roc_str);
let len = roc_str.len();
@ -65,6 +66,8 @@ pub extern "C" fn rust_main() -> i32 {
if libc::write(1, str_bytes, len) < 0 {
panic!("Writing to stdout failed!");
}
ManuallyDrop::drop(&mut roc_str)
}
// Exit code

7
roc_std/Cargo.lock generated
View File

@ -20,6 +20,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "arrayvec"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "cfg-if"
version = "1.0.0"
@ -193,6 +199,7 @@ checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
name = "roc_std"
version = "0.1.0"
dependencies = [
"arrayvec",
"indoc",
"libc",
"pretty_assertions",

View File

@ -10,6 +10,7 @@ version = "0.1.0"
[dependencies]
static_assertions = "0.1"
arrayvec = "0.7.2"
[dev-dependencies]
indoc = "1.0.3"
@ -21,3 +22,4 @@ libc = "0.2.106"
[features]
default = ["platform"]
platform = []
no_std = []

View File

@ -1,19 +1,24 @@
#![crate_type = "lib"]
// #![no_std]
#![cfg_attr(feature = "no_std", no_std)]
use core::cmp::Ordering;
use core::ffi::c_void;
use core::fmt;
use core::fmt::{self, Debug};
use core::hash::{Hash, Hasher};
use core::mem::{ManuallyDrop, MaybeUninit};
use core::ops::Drop;
use core::str;
use std::hash::{Hash, Hasher};
use std::io::Write;
use arrayvec::ArrayString;
mod roc_box;
mod roc_list;
mod roc_str;
mod storage;
pub use roc_box::RocBox;
pub use roc_list::RocList;
pub use roc_str::RocStr;
pub use roc_str::{InteriorNulError, RocStr};
pub use storage::Storage;
// A list of C functions that are being imported
@ -27,17 +32,23 @@ extern "C" {
alignment: u32,
) -> *mut c_void;
pub fn roc_dealloc(ptr: *mut c_void, alignment: u32);
pub fn roc_panic(c_ptr: *mut c_void, tag_id: u32);
pub fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void;
pub fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void;
}
/// # Safety
/// This is only marked unsafe to typecheck without warnings in the rest of the code here.
#[cfg(not(feature = "platform"))]
#[no_mangle]
pub unsafe extern "C" fn roc_alloc(_size: usize, _alignment: u32) -> *mut c_void {
unimplemented!("It is not valid to call roc alloc from within the compiler. Please use the \"platform\" feature if this is a platform.")
}
/// # Safety
/// This is only marked unsafe to typecheck without warnings in the rest of the code here.
#[cfg(not(feature = "platform"))]
#[no_mangle]
pub unsafe extern "C" fn roc_realloc(
_ptr: *mut c_void,
_new_size: usize,
@ -46,13 +57,37 @@ pub unsafe extern "C" fn roc_realloc(
) -> *mut c_void {
unimplemented!("It is not valid to call roc realloc from within the compiler. Please use the \"platform\" feature if this is a platform.")
}
/// # Safety
/// This is only marked unsafe to typecheck without warnings in the rest of the code here.
#[cfg(not(feature = "platform"))]
#[no_mangle]
pub unsafe extern "C" fn roc_dealloc(_ptr: *mut c_void, _alignment: u32) {
unimplemented!("It is not valid to call roc dealloc from within the compiler. Please use the \"platform\" feature if this is a platform.")
}
#[cfg(not(feature = "platform"))]
#[no_mangle]
pub unsafe extern "C" fn roc_panic(c_ptr: *mut c_void, tag_id: u32) {
unimplemented!("It is not valid to call roc panic from within the compiler. Please use the \"platform\" feature if this is a platform.")
}
/// # Safety
/// This is only marked unsafe to typecheck without warnings in the rest of the code here.
#[cfg(not(feature = "platform"))]
#[no_mangle]
pub fn roc_memcpy(_dst: *mut c_void, _src: *mut c_void, _n: usize) -> *mut c_void {
unimplemented!("It is not valid to call roc memcpy from within the compiler. Please use the \"platform\" feature if this is a platform.")
}
/// # Safety
/// This is only marked unsafe to typecheck without warnings in the rest of the code here.
#[cfg(not(feature = "platform"))]
#[no_mangle]
pub fn roc_memset(_dst: *mut c_void, _c: i32, _n: usize) -> *mut c_void {
unimplemented!("It is not valid to call roc memset from within the compiler. Please use the \"platform\" feature if this is a platform.")
}
#[repr(u8)]
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum RocOrder {
@ -71,15 +106,23 @@ pub struct RocResult<T, E> {
tag: RocResultTag,
}
impl<T, E> core::fmt::Debug for RocResult<T, E>
impl<T, E> Debug for RocResult<T, E>
where
T: core::fmt::Debug,
E: core::fmt::Debug,
T: Debug,
E: Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self.as_result_of_refs() {
Ok(payload) => write!(f, "RocOk({:?})", payload),
Err(payload) => write!(f, "RocErr({:?})", payload),
Ok(payload) => {
f.write_str("RocOk(")?;
payload.fmt(f)?;
f.write_str(")")
}
Err(payload) => {
f.write_str("RocErr(")?;
payload.fmt(f)?;
f.write_str(")")
}
}
}
}
@ -306,73 +349,62 @@ impl RocDec {
self.0
}
fn to_str_helper(&self, bytes: &mut [u8; Self::MAX_STR_LENGTH]) -> usize {
fn to_str_helper(self, string: &mut ArrayString<{ Self::MAX_STR_LENGTH }>) -> &str {
use std::fmt::Write;
if self.as_i128() == 0 {
write!(&mut bytes[..], "{}", "0").unwrap();
return 1;
return "0";
}
let is_negative = (self.as_i128() < 0) as usize;
static_assertions::const_assert!(Self::DECIMAL_PLACES + 1 == 19);
// The :019 in the following write! is computed as Self::DECIMAL_PLACES + 1. If you change
// Self::DECIMAL_PLACES, this assert should remind you to change that format string as
// well.
//
// Self::DECIMAL_PLACES, this assert should remind you to change that format string as well.
static_assertions::const_assert!(Self::DECIMAL_PLACES + 1 == 19);
// By using the :019 format, we're guaranteeing that numbers less than 1, say 0.01234
// get their leading zeros placed in bytes for us. i.e. bytes = b"0012340000000000000"
write!(&mut bytes[..], "{:019}", self.as_i128()).unwrap();
// get their leading zeros placed in bytes for us. i.e. `string = b"0012340000000000000"`
write!(string, "{:019}", self.as_i128()).unwrap();
// If self represents 1234.5678, then bytes is b"1234567800000000000000".
let mut i = Self::MAX_STR_LENGTH - 1;
// Find the last place where we have actual data.
while bytes[i] == 0 {
i = i - 1;
}
// At this point i is 21 because bytes[21] is the final '0' in b"1234567800000000000000".
let is_negative = self.as_i128() < 0;
let decimal_location = string.len() - Self::DECIMAL_PLACES + (is_negative as usize);
let decimal_location = i - Self::DECIMAL_PLACES + 1 + is_negative;
// decimal_location = 4
// skip trailing zeros
let last_nonzero_byte = string.trim_end_matches('0').len();
while bytes[i] == ('0' as u8) && i >= decimal_location {
bytes[i] = 0;
i = i - 1;
}
// Now i = 7, because bytes[7] = '8', and bytes = b"12345678"
if i < decimal_location {
if last_nonzero_byte < decimal_location {
// This means that we've removed trailing zeros and are left with an integer. Our
// convention is to print these without a decimal point or trailing zeros, so we're done.
return i + 1;
string.truncate(decimal_location);
return string.as_str();
}
let ret = i + 1;
while i >= decimal_location {
bytes[i + 1] = bytes[i];
i = i - 1;
}
bytes[i + 1] = bytes[i];
// Now i = 4, and bytes = b"123455678"
// otherwise, we're dealing with a fraction, and need to insert the decimal dot
bytes[decimal_location] = '.' as u8;
// Finally bytes = b"1234.5678"
// truncate all extra zeros off
string.truncate(last_nonzero_byte);
ret + 1
// push a dummy character so we have space for the decimal dot
string.push('$');
// Safety: at any time, the string only contains ascii characters, so it is always valid utf8
let bytes = unsafe { string.as_bytes_mut() };
// shift the fractional part by one
bytes.copy_within(decimal_location..last_nonzero_byte, decimal_location + 1);
// and put in the decimal dot in the right place
bytes[decimal_location] = b'.';
string.as_str()
}
pub fn to_str(&self) -> RocStr {
let mut bytes = [0 as u8; Self::MAX_STR_LENGTH];
let last_idx = self.to_str_helper(&mut bytes);
unsafe { RocStr::from_slice(&bytes[0..last_idx]) }
RocStr::from(self.to_str_helper(&mut ArrayString::new()))
}
}
impl fmt::Display for RocDec {
fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut bytes = [0 as u8; Self::MAX_STR_LENGTH];
let last_idx = self.to_str_helper(&mut bytes);
let result = unsafe { str::from_utf8_unchecked(&bytes[0..last_idx]) };
write!(fmtr, "{}", result)
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.to_str_helper(&mut ArrayString::new()))
}
}
@ -394,52 +426,37 @@ impl From<I128> for i128 {
impl fmt::Debug for I128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let i128: i128 = (*self).into();
i128.fmt(f)
i128::from(*self).fmt(f)
}
}
impl fmt::Display for I128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let i128: i128 = (*self).into();
i128.fmt(f)
Debug::fmt(&i128::from(*self), f)
}
}
impl PartialEq for I128 {
fn eq(&self, other: &Self) -> bool {
let i128_self: i128 = (*self).into();
let i128_other: i128 = (*other).into();
i128_self.eq(&i128_other)
i128::from(*self).eq(&i128::from(*other))
}
}
impl PartialOrd for I128 {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
let i128_self: i128 = (*self).into();
let i128_other: i128 = (*other).into();
i128_self.partial_cmp(&i128_other)
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
i128::from(*self).partial_cmp(&i128::from(*other))
}
}
impl Ord for I128 {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
let i128_self: i128 = (*self).into();
let i128_other: i128 = (*other).into();
i128_self.cmp(&i128_other)
fn cmp(&self, other: &Self) -> Ordering {
i128::from(*self).cmp(&i128::from(*other))
}
}
impl Hash for I128 {
fn hash<H: Hasher>(&self, state: &mut H) {
let i128: i128 = (*self).into();
i128.hash(state);
i128::from(*self).hash(state);
}
}
@ -461,51 +478,36 @@ impl From<U128> for u128 {
impl fmt::Debug for U128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let u128: u128 = (*self).into();
u128.fmt(f)
u128::from(*self).fmt(f)
}
}
impl fmt::Display for U128 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let u128: u128 = (*self).into();
u128.fmt(f)
Debug::fmt(&u128::from(*self), f)
}
}
impl PartialEq for U128 {
fn eq(&self, other: &Self) -> bool {
let u128_self: u128 = (*self).into();
let u128_other: u128 = (*other).into();
u128_self.eq(&u128_other)
u128::from(*self).eq(&u128::from(*other))
}
}
impl PartialOrd for U128 {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
let u128_self: u128 = (*self).into();
let u128_other: u128 = (*other).into();
u128_self.partial_cmp(&u128_other)
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
u128::from(*self).partial_cmp(&u128::from(*other))
}
}
impl Ord for U128 {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
let u128_self: u128 = (*self).into();
let u128_other: u128 = (*other).into();
u128_self.cmp(&u128_other)
fn cmp(&self, other: &Self) -> Ordering {
u128::from(*self).cmp(&u128::from(*other))
}
}
impl Hash for U128 {
fn hash<H: Hasher>(&self, state: &mut H) {
let u128: u128 = (*self).into();
u128.hash(state);
u128::from(*self).hash(state);
}
}

168
roc_std/src/roc_box.rs Normal file
View File

@ -0,0 +1,168 @@
#![deny(unsafe_op_in_unsafe_fn)]
use crate::{roc_alloc, roc_dealloc, storage::Storage};
use core::{
cell::Cell,
cmp::{self, Ordering},
fmt::Debug,
mem,
ops::Deref,
ptr::{self, NonNull},
};
#[repr(C)]
pub struct RocBox<T> {
contents: NonNull<T>,
}
impl<T> RocBox<T> {
pub fn new(contents: T) -> Self {
let alignment = Self::alloc_alignment();
let bytes = mem::size_of::<T>() + alignment;
let ptr = unsafe { roc_alloc(bytes, alignment as u32) };
if ptr.is_null() {
todo!("Call roc_panic with the info that an allocation failed.");
}
// Initialize the reference count.
let refcount_one = Storage::new_reference_counted();
unsafe { ptr.cast::<Storage>().write(refcount_one) };
let contents = unsafe {
let contents_ptr = ptr.cast::<u8>().add(alignment).cast::<T>();
*contents_ptr = contents;
// We already verified that the original alloc pointer was non-null,
// and this one is the alloc pointer with `alignment` bytes added to it,
// so it should be non-null too.
NonNull::new_unchecked(contents_ptr)
};
Self { contents }
}
#[inline(always)]
fn alloc_alignment() -> usize {
mem::align_of::<T>().max(mem::align_of::<Storage>())
}
pub fn into_inner(self) -> T {
unsafe { ptr::read(self.contents.as_ptr() as *mut T) }
}
fn storage(&self) -> &Cell<Storage> {
let alignment = Self::alloc_alignment();
unsafe {
&*self
.contents
.as_ptr()
.cast::<u8>()
.sub(alignment)
.cast::<Cell<Storage>>()
}
}
}
impl<T> Deref for RocBox<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
unsafe { self.contents.as_ref() }
}
}
impl<T, U> PartialEq<RocBox<U>> for RocBox<T>
where
T: PartialEq<U>,
{
fn eq(&self, other: &RocBox<U>) -> bool {
self.deref() == other.deref()
}
}
impl<T> Eq for RocBox<T> where T: Eq {}
impl<T, U> PartialOrd<RocBox<U>> for RocBox<T>
where
T: PartialOrd<U>,
{
fn partial_cmp(&self, other: &RocBox<U>) -> Option<cmp::Ordering> {
let self_contents = unsafe { self.contents.as_ref() };
let other_contents = unsafe { other.contents.as_ref() };
self_contents.partial_cmp(other_contents)
}
}
impl<T> Ord for RocBox<T>
where
T: Ord,
{
fn cmp(&self, other: &Self) -> Ordering {
let self_contents = unsafe { self.contents.as_ref() };
let other_contents = unsafe { other.contents.as_ref() };
self_contents.cmp(other_contents)
}
}
impl<T> Debug for RocBox<T>
where
T: Debug,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.deref().fmt(f)
}
}
impl<T> Clone for RocBox<T> {
fn clone(&self) -> Self {
let storage = self.storage();
let mut new_storage = storage.get();
// Increment the reference count
if !new_storage.is_readonly() {
new_storage.increment_reference_count();
storage.set(new_storage);
}
Self {
contents: self.contents,
}
}
}
impl<T> Drop for RocBox<T> {
fn drop(&mut self) {
let storage = self.storage();
let contents = self.contents;
// Decrease the list's reference count.
let mut new_storage = storage.get();
let needs_dealloc = new_storage.decrease();
if needs_dealloc {
unsafe {
// Drop the stored contents.
let contents_ptr = contents.as_ptr();
mem::drop::<T>(ptr::read(contents_ptr));
let alignment = Self::alloc_alignment();
// Release the memory.
roc_dealloc(
contents.as_ptr().cast::<u8>().sub(alignment).cast(),
alignment as u32,
);
}
} else if !new_storage.is_readonly() {
// Write the storage back.
storage.set(new_storage);
}
}
}

View File

@ -3,12 +3,12 @@
use core::{
cell::Cell,
cmp::{self, Ordering},
ffi::c_void,
fmt::Debug,
intrinsics::copy_nonoverlapping,
mem::{self, ManuallyDrop},
ops::Deref,
ptr,
ptr::NonNull,
ptr::{self, NonNull},
};
use crate::{roc_alloc, roc_dealloc, roc_realloc, storage::Storage};
@ -21,14 +21,61 @@ pub struct RocList<T> {
}
impl<T> RocList<T> {
#[inline(always)]
fn alloc_alignment() -> u32 {
mem::align_of::<T>().max(mem::align_of::<Storage>()) as u32
}
pub fn empty() -> Self {
RocList {
Self {
elements: None,
length: 0,
capacity: 0,
}
}
/// Create an empty RocList with enough space preallocated to store
/// the requested number of elements.
pub fn with_capacity(num_elems: usize) -> Self {
Self {
elements: Some(Self::elems_with_capacity(num_elems)),
length: 0,
capacity: num_elems,
}
}
/// Used for both roc_alloc and roc_realloc - given the number of elements,
/// returns the number of bytes needed to allocate, taking into account both the
/// size of the elements as well as the size of Storage.
fn alloc_bytes(num_elems: usize) -> usize {
mem::size_of::<Storage>() + (num_elems * mem::size_of::<T>())
}
fn elems_with_capacity(num_elems: usize) -> NonNull<ManuallyDrop<T>> {
let alloc_ptr = unsafe { roc_alloc(Self::alloc_bytes(num_elems), Self::alloc_alignment()) };
Self::elems_from_allocation(NonNull::new(alloc_ptr).unwrap_or_else(|| {
todo!("Call roc_panic with the info that an allocation failed.");
}))
}
fn elems_from_allocation(allocation: NonNull<c_void>) -> NonNull<ManuallyDrop<T>> {
let alloc_ptr = allocation.as_ptr();
unsafe {
let elem_ptr = Self::elem_ptr_from_alloc_ptr(alloc_ptr).cast::<ManuallyDrop<T>>();
// Initialize the reference count.
alloc_ptr
.cast::<Storage>()
.write(Storage::new_reference_counted());
// The original alloc pointer was non-null, and this one is the alloc pointer
// with `alignment` bytes added to it, so it should be non-null too.
NonNull::new_unchecked(elem_ptr)
}
}
pub fn len(&self) -> usize {
self.length
}
@ -41,21 +88,141 @@ impl<T> RocList<T> {
self.len() == 0
}
/// Note that there is no way to convert directly to a Vec.
///
/// This is because RocList values are not allocated using the system allocator, so
/// handing off any heap-allocated bytes to a Vec would not work because its Drop
/// implementation would try to free those bytes using the wrong allocator.
///
/// Instead, if you want a Rust Vec, you need to do a fresh allocation and copy the
/// bytes over - in other words, calling this `as_slice` method and then calling `to_vec`
/// on that.
pub fn as_slice(&self) -> &[T] {
&*self
}
#[inline(always)]
fn elements_and_storage(&self) -> Option<(NonNull<ManuallyDrop<T>>, &Cell<Storage>)> {
let elements = self.elements?;
let storage = unsafe { &*elements.as_ptr().cast::<Cell<Storage>>().sub(1) };
let storage = unsafe { &*self.ptr_to_allocation().cast::<Cell<Storage>>() };
Some((elements, storage))
}
pub(crate) fn storage(&self) -> Option<Storage> {
self.elements_and_storage()
.map(|(_, storage)| storage.get())
}
/// Useful for doing memcpy on the elements. Returns NULL if list is empty.
pub(crate) unsafe fn ptr_to_first_elem(&self) -> *const T {
unsafe { core::mem::transmute(self.elements) }
}
/// Useful for doing memcpy on the underlying allocation. Returns NULL if list is empty.
pub(crate) unsafe fn ptr_to_allocation(&self) -> *mut c_void {
unsafe {
self.ptr_to_first_elem()
.cast::<u8>()
.sub(Self::alloc_alignment() as usize) as *mut _
}
}
unsafe fn elem_ptr_from_alloc_ptr(alloc_ptr: *mut c_void) -> *mut c_void {
unsafe {
alloc_ptr
.cast::<u8>()
.add(Self::alloc_alignment() as usize)
.cast()
}
}
}
impl<T> RocList<T>
where
T: Clone,
{
/// Increase a RocList's capacity by at least the requested number of elements (possibly more).
///
/// May return a new RocList, if the provided one was not unique.
pub fn reserve(&mut self, num_elems: usize) {
let new_len = num_elems + self.length;
let new_elems;
let old_elements_ptr;
match self.elements_and_storage() {
Some((elements, storage)) => {
if storage.get().is_unique() {
unsafe {
let old_alloc = self.ptr_to_allocation();
// Try to reallocate in-place.
let new_alloc = roc_realloc(
old_alloc,
Self::alloc_bytes(new_len),
Self::alloc_bytes(self.capacity),
Self::alloc_alignment(),
);
if new_alloc == old_alloc {
// We successfully reallocated in-place; we're done!
return;
} else {
// We got back a different allocation; copy the existing elements
// into it. We don't need to increment their refcounts because
// The existing allocation that references to them is now gone and
// no longer referencing them.
new_elems = Self::elems_from_allocation(
NonNull::new(new_alloc).unwrap_or_else(|| {
todo!("Reallocation failed");
}),
);
}
// Note that realloc automatically deallocates the old allocation,
// so we don't need to call roc_dealloc here.
}
} else {
// Make a new allocation
new_elems = Self::elems_with_capacity(new_len);
old_elements_ptr = elements.as_ptr();
unsafe {
// Copy the old elements to the new allocation.
copy_nonoverlapping(old_elements_ptr, new_elems.as_ptr(), self.length);
}
// Decrease the current allocation's reference count.
let mut new_storage = storage.get();
let needs_dealloc = new_storage.decrease();
if needs_dealloc {
// Unlike in Drop, do *not* decrement the refcounts of all the elements!
// The new allocation is referencing them, so instead of incrementing them all
// all just to decrement them again here, we neither increment nor decrement them.
unsafe {
roc_dealloc(self.ptr_to_allocation(), Self::alloc_alignment());
}
} else if !new_storage.is_readonly() {
// Write the storage back.
storage.set(new_storage);
}
}
}
None => {
// This is an empty list, so `reserve` is the same as `with_capacity`.
self.update_to(Self::with_capacity(new_len));
return;
}
}
self.update_to(Self {
elements: Some(new_elems),
length: self.length,
capacity: new_len,
});
}
pub fn from_slice(slice: &[T]) -> Self {
let mut list = Self::empty();
list.extend_from_slice(slice);
@ -64,27 +231,37 @@ where
pub fn extend_from_slice(&mut self, slice: &[T]) {
// TODO: Can we do better for ZSTs? Alignment might be a problem.
if slice.is_empty() {
return;
}
let alignment = cmp::max(mem::align_of::<T>(), mem::align_of::<Storage>());
let elements_offset = alignment;
let new_size = elements_offset + mem::size_of::<T>() * (self.len() + slice.len());
let new_ptr = if let Some((elements, storage)) = self.elements_and_storage() {
let new_len = self.len() + slice.len();
let non_null_elements = if let Some((elements, storage)) = self.elements_and_storage() {
// Decrement the list's refence count.
let mut copy = storage.get();
let is_unique = copy.decrease();
if is_unique {
// If the memory is not shared, we can reuse the memory.
let old_size = elements_offset + mem::size_of::<T>() * self.len();
unsafe {
let ptr = elements.as_ptr().cast::<u8>().sub(alignment).cast();
roc_realloc(ptr, new_size, old_size, alignment as u32).cast()
// If we have enough capacity, we can add to the existing elements in-place.
if self.capacity() >= slice.len() {
elements
} else {
// There wasn't enough capacity, so we need a new allocation.
// Since this is a unique RocList, we can use realloc here.
let new_ptr = unsafe {
roc_realloc(
storage.as_ptr().cast(),
Self::alloc_bytes(new_len),
Self::alloc_bytes(self.capacity),
Self::alloc_alignment(),
)
};
self.capacity = new_len;
Self::elems_from_allocation(NonNull::new(new_ptr).unwrap_or_else(|| {
todo!("Reallocation failed");
}))
}
} else {
if !copy.is_readonly() {
@ -93,49 +270,19 @@ where
}
// Allocate new memory.
let new_ptr = unsafe { roc_alloc(new_size, alignment as u32) };
let new_elements = unsafe {
new_ptr
.cast::<u8>()
.add(alignment)
.cast::<ManuallyDrop<T>>()
};
// Initialize the reference count.
unsafe {
let storage_ptr = new_elements.cast::<Storage>().sub(1);
storage_ptr.write(Storage::new_reference_counted());
}
let new_elements = Self::elems_with_capacity(slice.len());
// Copy the old elements to the new allocation.
unsafe {
copy_nonoverlapping(elements.as_ptr(), new_elements, self.length);
copy_nonoverlapping(elements.as_ptr(), new_elements.as_ptr(), self.length);
}
new_ptr
new_elements
}
} else {
// Allocate new memory.
let new_ptr = unsafe { roc_alloc(new_size, alignment as u32) };
let new_elements = unsafe { new_ptr.cast::<u8>().add(elements_offset).cast::<T>() };
// Initialize the reference count.
unsafe {
let storage_ptr = new_elements.cast::<Storage>().sub(1);
storage_ptr.write(Storage::new_reference_counted());
}
new_ptr
Self::elems_with_capacity(slice.len())
};
let elements = unsafe {
new_ptr
.cast::<u8>()
.add(elements_offset)
.cast::<ManuallyDrop<T>>()
};
let non_null_elements = NonNull::new(elements).unwrap();
self.elements = Some(non_null_elements);
let elements = self.elements.unwrap().as_ptr();
@ -159,6 +306,16 @@ where
self.capacity = self.length
}
/// Replace self with a new version, without letting `drop` run in between.
fn update_to(&mut self, mut updated: Self) {
// We want to replace `self` with `updated` in a way that makes sure
// `self`'s `drop` never runs. This is the proper way to do that:
// swap them, and then forget the "updated" one (which is now pointing
// to the original allocation).
mem::swap(self, &mut updated);
mem::forget(updated);
}
}
impl<T> Deref for RocList<T> {
@ -282,22 +439,15 @@ impl<T> Drop for RocList<T> {
for index in 0..self.len() {
let elem_ptr = elements.as_ptr().add(index);
mem::drop::<T>(ManuallyDrop::take(&mut *elem_ptr));
ManuallyDrop::drop(&mut *elem_ptr);
}
let alignment = cmp::max(mem::align_of::<T>(), mem::align_of::<Storage>());
// Release the memory.
roc_dealloc(
elements.as_ptr().cast::<u8>().sub(alignment).cast(),
alignment as u32,
);
}
} else {
if !new_storage.is_readonly() {
// Write the storage back.
storage.set(new_storage);
roc_dealloc(self.ptr_to_allocation(), Self::alloc_alignment());
}
} else if !new_storage.is_readonly() {
// Write the storage back.
storage.set(new_storage);
}
}
}

View File

@ -1,18 +1,51 @@
#![deny(unsafe_op_in_unsafe_fn)]
use core::{
cmp,
convert::TryFrom,
fmt::Debug,
mem::{size_of, ManuallyDrop},
fmt,
hash::{self, Hash},
mem::{self, size_of, ManuallyDrop},
ops::{Deref, DerefMut},
ptr,
};
use std::hash::Hash;
#[cfg(not(feature = "no_std"))]
use std::ffi::{CStr, CString};
use crate::RocList;
#[repr(transparent)]
pub struct RocStr(RocStrInner);
fn with_stack_bytes<F, E, T>(length: usize, closure: F) -> T
where
F: FnOnce(*mut E) -> T,
{
use crate::{roc_alloc, roc_dealloc};
use core::mem::MaybeUninit;
if length < RocStr::TEMP_STR_MAX_STACK_BYTES {
// TODO: once https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.uninit_array
// has become stabilized, use that here in order to do a precise
// stack allocation instead of always over-allocating to 64B.
let mut bytes: MaybeUninit<[u8; RocStr::TEMP_STR_MAX_STACK_BYTES]> = MaybeUninit::uninit();
closure(bytes.as_mut_ptr() as *mut E)
} else {
let align = core::mem::align_of::<E>() as u32;
// The string is too long to stack-allocate, so
// do a heap allocation and then free it afterwards.
let ptr = unsafe { roc_alloc(length, align) } as *mut E;
let answer = closure(ptr);
// Free the heap allocation.
unsafe { roc_dealloc(ptr.cast(), align) };
answer
}
}
impl RocStr {
pub const SIZE: usize = core::mem::size_of::<Self>();
pub const MASK: u8 = 0b1000_0000;
@ -28,8 +61,8 @@ impl RocStr {
/// # Safety
///
/// `slice` must be valid UTF-8.
pub unsafe fn from_slice(slice: &[u8]) -> Self {
if let Some(small_string) = unsafe { SmallString::try_from(slice) } {
pub unsafe fn from_slice_unchecked(slice: &[u8]) -> Self {
if let Some(small_string) = unsafe { SmallString::try_from_utf8_bytes(slice) } {
Self(RocStrInner { small_string })
} else {
let heap_allocated = RocList::from_slice(slice);
@ -51,6 +84,13 @@ impl RocStr {
}
}
pub fn capacity(&self) -> usize {
match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(roc_list) => roc_list.capacity(),
RocStrInnerRef::SmallString(_) => SmallString::CAPACITY,
}
}
pub fn len(&self) -> usize {
match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(h) => h.len(),
@ -62,9 +102,417 @@ impl RocStr {
self.len() == 0
}
/// Note that there is no way to convert directly to a String.
///
/// This is because RocStr values are not allocated using the system allocator, so
/// handing off any heap-allocated bytes to a String would not work because its Drop
/// implementation would try to free those bytes using the wrong allocator.
///
/// Instead, if you want a Rust String, you need to do a fresh allocation and copy the
/// bytes over - in other words, calling this `as_str` method and then calling `to_string`
/// on that.
pub fn as_str(&self) -> &str {
&*self
}
/// Create an empty RocStr with enough space preallocated to store
/// the requested number of bytes.
pub fn with_capacity(bytes: usize) -> Self {
if bytes <= SmallString::CAPACITY {
RocStr(RocStrInner {
small_string: SmallString::empty(),
})
} else {
// The requested capacity won't fit in a small string; we need to go big.
RocStr(RocStrInner {
heap_allocated: ManuallyDrop::new(RocList::with_capacity(bytes)),
})
}
}
/// Increase a RocStr's capacity by at least the requested number of bytes (possibly more).
///
/// May return a new RocStr, if the provided one was not unique.
pub fn reserve(&mut self, bytes: usize) {
if self.is_small_str() {
let small_str = unsafe { self.0.small_string };
let target_cap = small_str.len() + bytes;
if target_cap > SmallString::CAPACITY {
// The requested capacity won't fit in a small string; we need to go big.
let mut roc_list = RocList::with_capacity(target_cap);
roc_list.extend_from_slice(small_str.as_bytes());
*self = RocStr(RocStrInner {
heap_allocated: ManuallyDrop::new(roc_list),
});
}
} else {
let mut roc_list = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) };
roc_list.reserve(bytes);
let mut updated = RocStr(RocStrInner {
heap_allocated: ManuallyDrop::new(roc_list),
});
mem::swap(self, &mut updated);
mem::forget(updated);
}
}
/// Returns the index of the first interior \0 byte in the string, or None if there are none.
fn first_nul_byte(&self) -> Option<usize> {
match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(roc_list) => roc_list.iter().position(|byte| *byte == 0),
RocStrInnerRef::SmallString(small_string) => small_string.first_nul_byte(),
}
}
// If the string is under this many bytes, the with_terminator family
// of methods will allocate the terminated string on the stack when
// the RocStr is non-unique.
const TEMP_STR_MAX_STACK_BYTES: usize = 64;
/// Like calling with_utf8_terminator passing \0 for the terminator,
/// except it can fail because a RocStr may contain \0 characters,
/// which a nul-terminated string must not.
pub fn utf8_nul_terminated<T, F: Fn(*mut u8, usize) -> T>(
self,
func: F,
) -> Result<T, InteriorNulError> {
if let Some(pos) = self.first_nul_byte() {
Err(InteriorNulError { pos, roc_str: self })
} else {
Ok(self.with_utf8_terminator(b'\0', func))
}
}
/// Turn this RocStr into a UTF-8 `*mut u8`, terminate it with the given character
/// (commonly either `b'\n'` or b`\0`) and then provide access to that
/// `*mut u8` (as well as its length) for the duration of a given function. This is
/// designed to be an efficient way to turn a `RocStr` received from an application into
/// either the nul-terminated UTF-8 `char*` needed by UNIX syscalls, or into a
/// newline-terminated string to write to stdout or stderr (for a "println"-style effect).
///
/// **NOTE:** The length passed to the function is the same value that `RocStr::len` will
/// return; it does not count the terminator. So to convert it to a nul-terminated slice
/// of Rust bytes (for example), call `slice::from_raw_parts` passing the given length + 1.
///
/// This operation achieves efficiency by reusing allocated bytes from the RocStr itself,
/// and sometimes allocating on the stack. It does not allocate on the heap when given a
/// a small string or a string with unique refcount, but may allocate when given a large
/// string with non-unique refcount. (It will do a stack allocation if the string is under
/// 64 bytes; the stack allocation will only live for the duration of the called function.)
///
/// If the given (owned) RocStr is unique, this can overwrite the underlying bytes
/// to terminate the string in-place. Small strings have an extra byte at the end
/// where the length is stored, which can be replaced with the terminator. Heap-allocated
/// strings can have excess capacity which can hold a terminator, or if they have no
/// excess capacity, all the bytes can be shifted over the refcount in order to free up
/// a `usize` worth of free space at the end - which can easily fit a 1-byte terminator.
pub fn with_utf8_terminator<T, F: Fn(*mut u8, usize) -> T>(self, terminator: u8, func: F) -> T {
// Note that this function does not use with_terminator because it can be
// more efficient than that - due to knowing that it's already in UTF-8 and always
// has room for a 1-byte terminator in the existing allocation (either in the refcount
// bytes, or, in a small string, in the length at the end of the string).
let terminate = |alloc_ptr: *mut u8, len: usize| unsafe {
*(alloc_ptr.add(len)) = terminator;
func(alloc_ptr, len)
};
match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(roc_list) => {
unsafe {
match roc_list.storage() {
Some(storage) if storage.is_unique() => {
// The backing RocList was unique, so we can mutate it in-place.
let len = roc_list.len();
let ptr = if len < roc_list.capacity() {
// We happen to have excess capacity already, so we will be able
// to write the terminator into the first byte of excess capacity.
roc_list.ptr_to_first_elem() as *mut u8
} else {
// We always have an allocation that's even bigger than necessary,
// because the refcount bytes take up more than the 1B needed for
// the terminator. We just need to shift the bytes over on top
// of the refcount.
let alloc_ptr = roc_list.ptr_to_allocation() as *mut u8;
// First, copy the bytes over the original allocation - effectively
// shifting everything over by one `usize`. Now we no longer have a
// refcount (but the terminated won't use that anyway), but we do
// have a free `usize` at the end.
//
// IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping
// because the regions definitely overlap!
ptr::copy(roc_list.ptr_to_first_elem() as *mut u8, alloc_ptr, len);
alloc_ptr
};
terminate(ptr, len)
}
Some(_) => {
let len = roc_list.len();
// The backing list was not unique, so we can't mutate it in-place.
// ask for `len + 1` to store the original string and the terminator
with_stack_bytes(len + 1, |alloc_ptr: *mut u8| {
let alloc_ptr = alloc_ptr as *mut u8;
let elem_ptr = roc_list.ptr_to_first_elem() as *mut u8;
// memcpy the bytes into the stack allocation
ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len);
terminate(alloc_ptr, len)
})
}
None => {
// The backing list was empty.
//
// No need to do a heap allocation for an empty string - we
// can just do a stack allocation that will live for the
// duration of the function.
func([terminator].as_mut_ptr(), 0)
}
}
}
}
RocStrInnerRef::SmallString(small_str) => {
let mut bytes = small_str.bytes;
// Even if the small string is at capacity, there will be room to write
// a terminator in the byte that's used to store the length.
terminate(bytes.as_mut_ptr() as *mut u8, small_str.len())
}
}
}
/// Like calling with_utf16_terminator passing \0 for the terminator,
/// except it can fail because a RocStr may contain \0 characters,
/// which a nul-terminated string must not.
pub fn utf16_nul_terminated<T, F: Fn(*mut u16, usize) -> T>(
self,
func: F,
) -> Result<T, InteriorNulError> {
if let Some(pos) = self.first_nul_byte() {
Err(InteriorNulError { pos, roc_str: self })
} else {
Ok(self.with_utf16_terminator(0, func))
}
}
/// Turn this RocStr into a nul-terminated UTF-16 `*mut u16` and then provide access to
/// that `*mut u16` (as well as its length) for the duration of a given function. This is
/// designed to be an efficient way to turn a RocStr received from an application into
/// the nul-terminated UTF-16 `wchar_t*` needed by Windows API calls.
///
/// **NOTE:** The length passed to the function is the same value that `RocStr::len` will
/// return; it does not count the terminator. So to convert it to a nul-terminated
/// slice of Rust bytes, call `slice::from_raw_parts` passing the given length + 1.
///
/// This operation achieves efficiency by reusing allocated bytes from the RocStr itself,
/// and sometimes allocating on the stack. It does not allocate on the heap when given a
/// a small string or a string with unique refcount, but may allocate when given a large
/// string with non-unique refcount. (It will do a stack allocation if the string is under
/// 64 bytes; the stack allocation will only live for the duration of the called function.)
///
/// Because this works on an owned RocStr, it's able to overwrite the underlying bytes
/// to nul-terminate the string in-place. Small strings have an extra byte at the end
/// where the length is stored, which can become 0 for nul-termination. Heap-allocated
/// strings can have excess capacity which can hold a termiator, or if they have no
/// excess capacity, all the bytes can be shifted over the refcount in order to free up
/// a `usize` worth of free space at the end - which can easily fit a terminator.
///
/// This operation can fail because a RocStr may contain \0 characters, which a
/// nul-terminated string must not.
pub fn with_utf16_terminator<T, F: Fn(*mut u16, usize) -> T>(
self,
terminator: u16,
func: F,
) -> T {
self.with_terminator(terminator, |dest_ptr: *mut u16, str_slice: &str| {
// Translate UTF-8 source bytes into UTF-16 and write them into the destination.
for (index, wchar) in str_slice.encode_utf16().enumerate() {
unsafe {
*(dest_ptr.add(index)) = wchar;
}
}
func(dest_ptr, str_slice.len())
})
}
pub fn with_windows_path<T, F: Fn(*mut u16, usize) -> T>(
self,
func: F,
) -> Result<T, InteriorNulError> {
if let Some(pos) = self.first_nul_byte() {
Err(InteriorNulError { pos, roc_str: self })
} else {
let answer = self.with_terminator(0u16, |dest_ptr: *mut u16, str_slice: &str| {
// Translate UTF-8 source bytes into UTF-16 and write them into the destination.
for (index, mut wchar) in str_slice.encode_utf16().enumerate() {
// Replace slashes with backslashes
if wchar == '/' as u16 {
wchar = '\\' as u16
};
unsafe {
*(dest_ptr.add(index)) = wchar;
}
}
func(dest_ptr, str_slice.len())
});
Ok(answer)
}
}
/// Generic version of temp_c_utf8 and temp_c_utf16. The given function will be
/// passed a pointer to elements of type E. The pointer will have enough room to hold
/// one element for each byte of the given `&str`'s length, plus the terminator element.
///
/// The terminator will be written right after the end of the space for the other elements,
/// but all the memory in that space before the terminator will be uninitialized. This means
/// if you want to do something like copy the contents of the `&str` into there, that will
/// need to be done explicitly.
///
/// The terminator is always written - even if there are no other elements present before it.
/// (In such a case, the `&str` argument will be empty and the `*mut E` will point directly
/// to the terminator).
///
/// One use for this is to convert slashes to backslashes in Windows paths;
/// this function provides the most efficient way to do that, because no extra
/// iteration pass is necessary; the conversion can be done after each translation
/// of a UTF-8 character to UTF-16. Here's how that would look:
///
/// use roc_std::{RocStr, InteriorNulError};
///
/// pub fn with_windows_path<T, F: Fn(*mut u16, usize) -> T>(
/// roc_str: RocStr,
/// func: F,
/// ) -> Result<T, InteriorNulError> {
/// let answer = roc_str.with_terminator(0u16, |dest_ptr: *mut u16, str_slice: &str| {
/// // Translate UTF-8 source bytes into UTF-16 and write them into the destination.
/// for (index, mut wchar) in str_slice.encode_utf16().enumerate() {
/// // Replace slashes with backslashes
/// if wchar == '/' as u16 {
/// wchar = '\\' as u16
/// };
///
/// unsafe {
/// *(dest_ptr.add(index)) = wchar;
/// }
/// }
///
/// func(dest_ptr, str_slice.len())
/// });
///
/// Ok(answer)
/// }
pub fn with_terminator<E: Copy, A, F: Fn(*mut E, &str) -> A>(
self,
terminator: E,
func: F,
) -> A {
use crate::Storage;
use core::mem::align_of;
let terminate = |alloc_ptr: *mut E, str_slice: &str| unsafe {
*(alloc_ptr.add(str_slice.len())) = terminator;
func(alloc_ptr, str_slice)
};
// When we don't have an existing allocation that can work, fall back on this.
// It uses either a stack allocation, or, if that would be too big, a heap allocation.
let fallback = |str_slice: &str| {
// We need 1 extra elem for the terminator. It must be an elem,
// not a byte, because we'll be providing a pointer to elems.
let needed_bytes = (str_slice.len() + 1) * size_of::<E>();
with_stack_bytes(needed_bytes, |alloc_ptr: *mut E| {
terminate(alloc_ptr, str_slice)
})
};
match self.as_enum_ref() {
RocStrInnerRef::HeapAllocated(roc_list) => {
let len = roc_list.len();
unsafe {
match roc_list.storage() {
Some(storage) if storage.is_unique() => {
// The backing RocList was unique, so we can mutate it in-place.
// We need 1 extra elem for the terminator. It must be an elem,
// not a byte, because we'll be providing a pointer to elems.
let needed_bytes = (len + 1) * size_of::<E>();
// We can use not only the capacity on the heap, but also
// the bytes originally used for the refcount.
let available_bytes = roc_list.capacity() + size_of::<Storage>();
if needed_bytes < available_bytes {
debug_assert!(align_of::<Storage>() >= align_of::<E>());
// We happen to have sufficient excess capacity already,
// so we will be able to write the new elements as well as
// the terminator into the existing allocation.
let ptr = roc_list.ptr_to_allocation() as *mut E;
let answer = terminate(ptr, self.as_str());
// We cannot rely on the RocStr::drop implementation, because
// it tries to use the refcount - which we just overwrote
// with string bytes.
mem::forget(self);
crate::roc_dealloc(ptr.cast(), mem::align_of::<E>() as u32);
answer
} else {
// We didn't have sufficient excess capacity already,
// so we need to do either a new stack allocation or a new
// heap allocation.
fallback(self.as_str())
}
}
Some(_) => {
// The backing list was not unique, so we can't mutate it in-place.
fallback(self.as_str())
}
None => {
// The backing list was empty.
//
// No need to do a heap allocation for an empty string - we
// can just do a stack allocation that will live for the
// duration of the function.
func([terminator].as_mut_ptr() as *mut E, "")
}
}
}
}
RocStrInnerRef::SmallString(small_str) => {
let len = small_str.len();
// We need 1 extra elem for the terminator. It must be an elem,
// not a byte, because we'll be providing a pointer to elems.
let needed_bytes = (len + 1) * size_of::<E>();
let available_bytes = size_of::<SmallString>();
if needed_bytes < available_bytes {
terminate(small_str.bytes.as_ptr() as *mut E, self.as_str())
} else {
fallback(self.as_str())
}
}
}
}
}
impl Deref for RocStr {
@ -78,6 +526,35 @@ impl Deref for RocStr {
}
}
/// This can fail because a CStr may contain invalid UTF-8 characters
#[cfg(not(feature = "no_std"))]
impl TryFrom<&CStr> for RocStr {
type Error = core::str::Utf8Error;
fn try_from(c_str: &CStr) -> Result<Self, Self::Error> {
c_str.to_str().map(RocStr::from)
}
}
/// This can fail because a CString may contain invalid UTF-8 characters
#[cfg(not(feature = "no_std"))]
impl TryFrom<CString> for RocStr {
type Error = core::str::Utf8Error;
fn try_from(c_string: CString) -> Result<Self, Self::Error> {
c_string.to_str().map(RocStr::from)
}
}
#[cfg(not(feature = "no_std"))]
/// Like https://doc.rust-lang.org/std/ffi/struct.NulError.html but
/// only for interior nuls, not for missing nul terminators.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct InteriorNulError {
pub pos: usize,
pub roc_str: RocStr,
}
impl Default for RocStr {
fn default() -> Self {
Self::empty()
@ -86,7 +563,7 @@ impl Default for RocStr {
impl From<&str> for RocStr {
fn from(s: &str) -> Self {
unsafe { Self::from_slice(s.as_bytes()) }
unsafe { Self::from_slice_unchecked(s.as_bytes()) }
}
}
@ -99,18 +576,24 @@ impl PartialEq for RocStr {
impl Eq for RocStr {}
impl PartialOrd for RocStr {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
self.as_str().partial_cmp(other.as_str())
}
}
impl Ord for RocStr {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
fn cmp(&self, other: &Self) -> cmp::Ordering {
self.as_str().cmp(other.as_str())
}
}
impl Debug for RocStr {
impl fmt::Debug for RocStr {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.deref().fmt(f)
}
}
impl fmt::Display for RocStr {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
self.deref().fmt(f)
}
@ -168,7 +651,7 @@ impl SmallString {
/// # Safety
///
/// `slice` must be valid UTF-8.
unsafe fn try_from(slice: &[u8]) -> Option<Self> {
unsafe fn try_from_utf8_bytes(slice: &[u8]) -> Option<Self> {
// Check the size of the slice.
let len_as_u8 = u8::try_from(slice.len()).ok()?;
if (len_as_u8 as usize) > Self::CAPACITY {
@ -191,6 +674,17 @@ impl SmallString {
fn len(&self) -> usize {
usize::from(self.len & !RocStr::MASK)
}
/// Returns the index of the first interior \0 byte in the string, or None if there are none.
fn first_nul_byte(&self) -> Option<usize> {
for (index, byte) in self.bytes[0..self.len()].iter().enumerate() {
if *byte == 0 {
return Some(index);
}
}
None
}
}
impl Deref for SmallString {
@ -210,7 +704,7 @@ impl DerefMut for SmallString {
}
impl Hash for RocStr {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}

View File

@ -1,6 +1,14 @@
use core::num::NonZeroIsize;
const REFCOUNT_1: isize = isize::MIN;
/// # Safety
///
/// isize::MIN is definitely not zero. This can become
/// https://doc.rust-lang.org/std/num/struct.NonZeroIsize.html#associatedconstant.MIN
/// once it has been stabilized.
const REFCOUNT_1: NonZeroIsize = unsafe { NonZeroIsize::new_unchecked(isize::MIN) };
const _ASSERT_STORAGE_SIZE: () =
assert!(std::mem::size_of::<isize>() == std::mem::size_of::<Storage>());
#[derive(Clone, Copy, Debug)]
pub enum Storage {
@ -10,7 +18,7 @@ pub enum Storage {
impl Storage {
pub fn new_reference_counted() -> Self {
Self::ReferenceCounted(NonZeroIsize::new(REFCOUNT_1).unwrap())
Self::ReferenceCounted(REFCOUNT_1)
}
/// Increment the reference count.
@ -37,11 +45,10 @@ impl Storage {
match self {
Storage::Readonly => false,
Storage::ReferenceCounted(rc) => {
let rc_as_isize = rc.get();
if rc_as_isize == REFCOUNT_1 {
if *rc == REFCOUNT_1 {
true
} else {
*rc = NonZeroIsize::new(rc_as_isize - 1).unwrap();
*rc = NonZeroIsize::new(rc.get() - 1).unwrap();
false
}
}
@ -51,4 +58,8 @@ impl Storage {
pub fn is_readonly(&self) -> bool {
matches!(self, Self::Readonly)
}
pub fn is_unique(&self) -> bool {
matches!(self, Self::ReferenceCounted(REFCOUNT_1))
}
}

View File

@ -7,6 +7,8 @@ extern crate roc_std;
use core::ffi::c_void;
const ROC_SMALL_STR_CAPACITY: usize = core::mem::size_of::<roc_std::RocStr>() - 1;
#[no_mangle]
pub unsafe extern "C" fn roc_alloc(size: usize, _alignment: u32) -> *mut c_void {
libc::malloc(size)
@ -27,10 +29,37 @@ pub unsafe extern "C" fn roc_dealloc(c_ptr: *mut c_void, _alignment: u32) {
libc::free(c_ptr)
}
#[cfg(test)]
#[no_mangle]
pub unsafe extern "C" fn roc_panic(c_ptr: *mut c_void, tag_id: u32) {
use std::ffi::CStr;
use std::os::raw::c_char;
match tag_id {
0 => {
let c_str = CStr::from_ptr(c_ptr as *const c_char);
let string = c_str.to_str().unwrap();
panic!("roc_panic during test: {}", string);
}
_ => todo!(),
}
}
#[cfg(test)]
#[no_mangle]
pub unsafe extern "C" fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void {
libc::memcpy(dst, src, n)
}
#[cfg(test)]
#[no_mangle]
pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void {
libc::memset(dst, c, n)
}
#[cfg(test)]
mod test_roc_std {
use roc_std::RocResult;
use roc_std::RocStr;
use roc_std::{RocBox, RocDec, RocList, RocResult, RocStr};
fn roc_str_byte_representation(string: &RocStr) -> [u8; RocStr::SIZE] {
unsafe { core::mem::transmute_copy(string) }
@ -86,9 +115,45 @@ mod test_roc_std {
#[test]
fn empty_string_capacity() {
let string = RocStr::from("");
let string = RocStr::empty();
assert_eq!(string.capacity(), 0);
assert_eq!(string.capacity(), super::ROC_SMALL_STR_CAPACITY);
}
#[test]
fn reserve_small_str() {
let mut roc_str = RocStr::empty();
roc_str.reserve(42);
assert_eq!(roc_str.capacity(), 42);
}
#[test]
fn reserve_big_str() {
let mut roc_str = RocStr::empty();
roc_str.reserve(5000);
assert_eq!(roc_str.capacity(), 5000);
}
#[test]
fn reserve_small_list() {
let mut roc_list = RocList::<RocStr>::empty();
roc_list.reserve(42);
assert_eq!(roc_list.capacity(), 42);
}
#[test]
fn reserve_big_list() {
let mut roc_list = RocList::<RocStr>::empty();
roc_list.reserve(5000);
assert_eq!(roc_list.capacity(), 5000);
}
#[test]
@ -123,4 +188,125 @@ mod test_roc_std {
assert!(!roc_result.is_ok());
assert!(roc_result.is_err());
}
#[test]
fn create_roc_box() {
let contents = 42i32;
let roc_box = RocBox::new(contents);
assert_eq!(roc_box.into_inner(), contents)
}
#[test]
fn roc_dec_fmt() {
assert_eq!(
format!("{}", RocDec::MIN),
"-1701411834604692317316.87303715884105728"
);
let half = RocDec::from_str("0.5").unwrap();
assert_eq!(format!("{}", half), "0.5");
let ten = RocDec::from_str("10").unwrap();
assert_eq!(format!("{}", ten), "10");
let example = RocDec::from_str("1234.5678").unwrap();
assert_eq!(format!("{}", example), "1234.5678");
}
}
#[cfg(test)]
mod with_terminator {
use core::slice;
use roc_std::RocStr;
use std::ffi::CStr;
fn verify_temp_c(string: &str, excess_capacity: usize) {
let mut roc_str = RocStr::from(string);
if excess_capacity > 0 {
roc_str.reserve(excess_capacity);
}
// utf8_nul_terminated
{
let answer = roc_str.clone().utf8_nul_terminated(|ptr, len| {
let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) };
let c_str = CStr::from_bytes_with_nul(bytes).unwrap();
assert_eq!(c_str.to_str(), Ok(string));
42
});
assert_eq!(Ok(42), answer);
}
// utf16_nul_terminated
{
let answer = roc_str.utf16_nul_terminated(|ptr, len| {
let bytes: &[u16] = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) };
// Verify that it's nul-terminated
assert_eq!(bytes[len], 0);
let string = String::from_utf16(&bytes[0..len]).unwrap();
assert_eq!(string.as_str(), string);
42
});
assert_eq!(Ok(42), answer);
}
}
#[test]
fn empty_string() {
verify_temp_c("", 0);
}
/// e.g. "a" or "abc" or "abcdefg" etc.
fn string_for_len(len: usize) -> String {
let first_index: usize = 97; // start with ASCII lowercase "a"
let bytes: Vec<u8> = (0..len)
.map(|index| {
let letter = (index % 26) + first_index;
letter.try_into().unwrap()
})
.collect();
assert_eq!(bytes.len(), len);
// The bytes should contain no nul characters.
assert!(bytes.iter().all(|byte| *byte != 0));
String::from_utf8(bytes).unwrap()
}
#[test]
fn small_strings() {
for len in 1..=super::ROC_SMALL_STR_CAPACITY {
verify_temp_c(&string_for_len(len), 0);
}
}
#[test]
fn no_excess_capacity() {
// This is small enough that it should be a stack allocation for UTF-8
verify_temp_c(&string_for_len(33), 0);
// This is big enough that it should be a heap allocation for UTF-8 and UTF-16
verify_temp_c(&string_for_len(65), 0);
}
#[test]
fn with_excess_capacity() {
// We should be able to use the excess capacity for all of these.
verify_temp_c(&string_for_len(33), 1);
verify_temp_c(&string_for_len(33), 33);
verify_temp_c(&string_for_len(65), 1);
verify_temp_c(&string_for_len(65), 64);
}
}