diff --git a/Cargo.lock b/Cargo.lock index 95dacacb39..4b6266847f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4120,6 +4120,7 @@ dependencies = [ name = "roc_std" version = "0.1.0" dependencies = [ + "arrayvec 0.7.2", "static_assertions 0.1.1", ] diff --git a/compiler/build/src/link.rs b/compiler/build/src/link.rs index f9aff2e0fb..ad66b5f5ba 100644 --- a/compiler/build/src/link.rs +++ b/compiler/build/src/link.rs @@ -421,7 +421,6 @@ pub fn rebuild_host( let env_home = env::var("HOME").unwrap_or_else(|_| "".to_string()); let env_cpath = env::var("CPATH").unwrap_or_else(|_| "".to_string()); - if zig_host_src.exists() { // Compile host.zig diff --git a/compiler/test_gen/src/helpers/from_wasmer_memory.rs b/compiler/test_gen/src/helpers/from_wasmer_memory.rs index 489ab2d541..e8f5a3da5f 100644 --- a/compiler/test_gen/src/helpers/from_wasmer_memory.rs +++ b/compiler/test_gen/src/helpers/from_wasmer_memory.rs @@ -70,7 +70,7 @@ impl FromWasmerMemory for RocStr { &memory_bytes[big_elem_ptr..][..big_length] }; - unsafe { RocStr::from_slice(slice) } + unsafe { RocStr::from_slice_unchecked(slice) } } } diff --git a/compiler/test_gen/src/wasm_str.rs b/compiler/test_gen/src/wasm_str.rs index 18466ec2a4..94ee2dc116 100644 --- a/compiler/test_gen/src/wasm_str.rs +++ b/compiler/test_gen/src/wasm_str.rs @@ -90,7 +90,7 @@ use roc_std::{RocList, RocStr}; // "# // ), -// RocStr::from_slice(b"JJJJJJJJJJJJJJJJJJJJJJJJJ"), +// RocStr::from_slice_unchecked(b"JJJJJJJJJJJJJJJJJJJJJJJJJ"), // RocStr // ); // } @@ -108,7 +108,7 @@ use roc_std::{RocList, RocStr}; // _ -> "" // "# // ), -// RocStr::from_slice(b"JJJ"), +// RocStr::from_slice_unchecked(b"JJJ"), // RocStr // ); // } @@ -122,8 +122,8 @@ use roc_std::{RocList, RocStr}; // "# // ), // RocList::from_slice(&[ -// RocStr::from_slice(b"01234567789abcdefghi"), -// RocStr::from_slice(b"01234567789abcdefghi") +// RocStr::from_slice_unchecked(b"01234567789abcdefghi"), +// RocStr::from_slice_unchecked(b"01234567789abcdefghi") // ]), // RocList // ); @@ -135,8 +135,8 @@ use roc_std::{RocList, RocStr}; // "# // ), // RocList::from_slice(&[ -// RocStr::from_slice(b"01234567789abcdefghi "), -// RocStr::from_slice(b" 01234567789abcdefghi") +// RocStr::from_slice_unchecked(b"01234567789abcdefghi "), +// RocStr::from_slice_unchecked(b" 01234567789abcdefghi") // ]), // RocList // ); @@ -151,9 +151,9 @@ use roc_std::{RocList, RocStr}; // "# // ), // RocList::from_slice(&[ -// RocStr::from_slice(b"J"), -// RocStr::from_slice(b"J"), -// RocStr::from_slice(b"J") +// RocStr::from_slice_unchecked(b"J"), +// RocStr::from_slice_unchecked(b"J"), +// RocStr::from_slice_unchecked(b"J") // ]), // RocList // ); @@ -169,7 +169,7 @@ use roc_std::{RocList, RocStr}; // "than the delimiter which happens to be very very long" // "# // ), -// RocList::from_slice(&[RocStr::from_slice(b"string to split is shorter")]), +// RocList::from_slice(&[RocStr::from_slice_unchecked(b"string to split is shorter")]), // RocList // ); // } @@ -182,7 +182,7 @@ use roc_std::{RocList, RocStr}; // Str.split "" "" // "# // ), -// RocList::from_slice(&[RocStr::from_slice(b"")]), +// RocList::from_slice(&[RocStr::from_slice_unchecked(b"")]), // RocList // ); // } @@ -195,7 +195,7 @@ use roc_std::{RocList, RocStr}; // Str.split "a," "," // "# // ), -// RocList::from_slice(&[RocStr::from_slice(b"a"), RocStr::from_slice(b"")]), +// RocList::from_slice(&[RocStr::from_slice_unchecked(b"a"), RocStr::from_slice_unchecked(b"")]), // RocList // ) // } @@ -224,9 +224,9 @@ use roc_std::{RocList, RocStr}; // "# // ), // RocList::from_slice(&[ -// RocStr::from_slice(b"1"), -// RocStr::from_slice(b"2"), -// RocStr::from_slice(b"") +// RocStr::from_slice_unchecked(b"1"), +// RocStr::from_slice_unchecked(b"2"), +// RocStr::from_slice_unchecked(b"") // ]), // RocList // ); @@ -243,9 +243,9 @@ use roc_std::{RocList, RocStr}; // "# // ), // RocList::from_slice(&[ -// RocStr::from_slice(b"3"), -// RocStr::from_slice(b"4"), -// RocStr::from_slice(b"") +// RocStr::from_slice_unchecked(b"3"), +// RocStr::from_slice_unchecked(b"4"), +// RocStr::from_slice_unchecked(b"") // ]), // RocList // ); @@ -261,7 +261,7 @@ use roc_std::{RocList, RocStr}; // "Second string that is also fairly long. Two long strings test things that might not appear with short strings." // "# // ), -// RocStr::from_slice(b"First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."), +// RocStr::from_slice_unchecked(b"First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."), // RocStr // ); // } @@ -498,7 +498,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -513,7 +513,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("abc~".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("abc~".as_bytes()), // roc_std::RocStr // ); // } @@ -528,7 +528,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("∆".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("∆".as_bytes()), // roc_std::RocStr // ); // } @@ -543,7 +543,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("∆œ¬".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("∆œ¬".as_bytes()), // roc_std::RocStr // ); // } @@ -558,7 +558,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("💖".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("💖".as_bytes()), // roc_std::RocStr // ); // } @@ -573,7 +573,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("💖🤠🚀".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("💖🤠🚀".as_bytes()), // roc_std::RocStr // ); // } @@ -588,7 +588,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("💖b∆".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("💖b∆".as_bytes()), // roc_std::RocStr // ); // } @@ -607,7 +607,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -626,7 +626,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -645,7 +645,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -664,7 +664,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -683,7 +683,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -702,7 +702,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -744,7 +744,7 @@ fn str_equality() { // printExpr expr // "# // ), -// RocStr::from_slice(b"Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"), +// RocStr::from_slice_unchecked(b"Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"), // RocStr // ); // } diff --git a/examples/hello-world/rust-platform/src/lib.rs b/examples/hello-world/rust-platform/src/lib.rs index 7df8fe563c..e657725529 100644 --- a/examples/hello-world/rust-platform/src/lib.rs +++ b/examples/hello-world/rust-platform/src/lib.rs @@ -3,6 +3,7 @@ use core::ffi::c_void; use roc_std::RocStr; use std::ffi::CStr; +use std::mem::ManuallyDrop; use std::os::raw::c_char; extern "C" { @@ -56,7 +57,7 @@ pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut #[no_mangle] pub extern "C" fn rust_main() -> i32 { unsafe { - let mut roc_str = RocStr::default(); + let mut roc_str = ManuallyDrop::new(RocStr::default()); roc_main(&mut roc_str); let len = roc_str.len(); @@ -65,6 +66,8 @@ pub extern "C" fn rust_main() -> i32 { if libc::write(1, str_bytes, len) < 0 { panic!("Writing to stdout failed!"); } + + ManuallyDrop::drop(&mut roc_str) } // Exit code diff --git a/roc_std/Cargo.lock b/roc_std/Cargo.lock index fe1a8e26d8..0295a22bc4 100644 --- a/roc_std/Cargo.lock +++ b/roc_std/Cargo.lock @@ -20,6 +20,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + [[package]] name = "cfg-if" version = "1.0.0" @@ -193,6 +199,7 @@ checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" name = "roc_std" version = "0.1.0" dependencies = [ + "arrayvec", "indoc", "libc", "pretty_assertions", diff --git a/roc_std/Cargo.toml b/roc_std/Cargo.toml index 5f18ae3060..f296f56a0a 100644 --- a/roc_std/Cargo.toml +++ b/roc_std/Cargo.toml @@ -10,6 +10,7 @@ version = "0.1.0" [dependencies] static_assertions = "0.1" +arrayvec = "0.7.2" [dev-dependencies] indoc = "1.0.3" @@ -21,3 +22,4 @@ libc = "0.2.106" [features] default = ["platform"] platform = [] +no_std = [] diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index a38fd75c84..18a35720db 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -1,19 +1,24 @@ #![crate_type = "lib"] -// #![no_std] +#![cfg_attr(feature = "no_std", no_std)] + +use core::cmp::Ordering; use core::ffi::c_void; -use core::fmt; +use core::fmt::{self, Debug}; +use core::hash::{Hash, Hasher}; use core::mem::{ManuallyDrop, MaybeUninit}; use core::ops::Drop; use core::str; -use std::hash::{Hash, Hasher}; -use std::io::Write; +use arrayvec::ArrayString; + +mod roc_box; mod roc_list; mod roc_str; mod storage; +pub use roc_box::RocBox; pub use roc_list::RocList; -pub use roc_str::RocStr; +pub use roc_str::{InteriorNulError, RocStr}; pub use storage::Storage; // A list of C functions that are being imported @@ -27,17 +32,23 @@ extern "C" { alignment: u32, ) -> *mut c_void; pub fn roc_dealloc(ptr: *mut c_void, alignment: u32); + pub fn roc_panic(c_ptr: *mut c_void, tag_id: u32); + pub fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void; + pub fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void; } /// # Safety /// This is only marked unsafe to typecheck without warnings in the rest of the code here. #[cfg(not(feature = "platform"))] +#[no_mangle] pub unsafe extern "C" fn roc_alloc(_size: usize, _alignment: u32) -> *mut c_void { unimplemented!("It is not valid to call roc alloc from within the compiler. Please use the \"platform\" feature if this is a platform.") } + /// # Safety /// This is only marked unsafe to typecheck without warnings in the rest of the code here. #[cfg(not(feature = "platform"))] +#[no_mangle] pub unsafe extern "C" fn roc_realloc( _ptr: *mut c_void, _new_size: usize, @@ -46,13 +57,37 @@ pub unsafe extern "C" fn roc_realloc( ) -> *mut c_void { unimplemented!("It is not valid to call roc realloc from within the compiler. Please use the \"platform\" feature if this is a platform.") } + /// # Safety /// This is only marked unsafe to typecheck without warnings in the rest of the code here. #[cfg(not(feature = "platform"))] +#[no_mangle] pub unsafe extern "C" fn roc_dealloc(_ptr: *mut c_void, _alignment: u32) { unimplemented!("It is not valid to call roc dealloc from within the compiler. Please use the \"platform\" feature if this is a platform.") } +#[cfg(not(feature = "platform"))] +#[no_mangle] +pub unsafe extern "C" fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { + unimplemented!("It is not valid to call roc panic from within the compiler. Please use the \"platform\" feature if this is a platform.") +} + +/// # Safety +/// This is only marked unsafe to typecheck without warnings in the rest of the code here. +#[cfg(not(feature = "platform"))] +#[no_mangle] +pub fn roc_memcpy(_dst: *mut c_void, _src: *mut c_void, _n: usize) -> *mut c_void { + unimplemented!("It is not valid to call roc memcpy from within the compiler. Please use the \"platform\" feature if this is a platform.") +} + +/// # Safety +/// This is only marked unsafe to typecheck without warnings in the rest of the code here. +#[cfg(not(feature = "platform"))] +#[no_mangle] +pub fn roc_memset(_dst: *mut c_void, _c: i32, _n: usize) -> *mut c_void { + unimplemented!("It is not valid to call roc memset from within the compiler. Please use the \"platform\" feature if this is a platform.") +} + #[repr(u8)] #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum RocOrder { @@ -71,15 +106,23 @@ pub struct RocResult { tag: RocResultTag, } -impl core::fmt::Debug for RocResult +impl Debug for RocResult where - T: core::fmt::Debug, - E: core::fmt::Debug, + T: Debug, + E: Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.as_result_of_refs() { - Ok(payload) => write!(f, "RocOk({:?})", payload), - Err(payload) => write!(f, "RocErr({:?})", payload), + Ok(payload) => { + f.write_str("RocOk(")?; + payload.fmt(f)?; + f.write_str(")") + } + Err(payload) => { + f.write_str("RocErr(")?; + payload.fmt(f)?; + f.write_str(")") + } } } } @@ -306,73 +349,62 @@ impl RocDec { self.0 } - fn to_str_helper(&self, bytes: &mut [u8; Self::MAX_STR_LENGTH]) -> usize { + fn to_str_helper(self, string: &mut ArrayString<{ Self::MAX_STR_LENGTH }>) -> &str { + use std::fmt::Write; + if self.as_i128() == 0 { - write!(&mut bytes[..], "{}", "0").unwrap(); - return 1; + return "0"; } - let is_negative = (self.as_i128() < 0) as usize; - - static_assertions::const_assert!(Self::DECIMAL_PLACES + 1 == 19); // The :019 in the following write! is computed as Self::DECIMAL_PLACES + 1. If you change - // Self::DECIMAL_PLACES, this assert should remind you to change that format string as - // well. - // + // Self::DECIMAL_PLACES, this assert should remind you to change that format string as well. + static_assertions::const_assert!(Self::DECIMAL_PLACES + 1 == 19); + // By using the :019 format, we're guaranteeing that numbers less than 1, say 0.01234 - // get their leading zeros placed in bytes for us. i.e. bytes = b"0012340000000000000" - write!(&mut bytes[..], "{:019}", self.as_i128()).unwrap(); + // get their leading zeros placed in bytes for us. i.e. `string = b"0012340000000000000"` + write!(string, "{:019}", self.as_i128()).unwrap(); - // If self represents 1234.5678, then bytes is b"1234567800000000000000". - let mut i = Self::MAX_STR_LENGTH - 1; - // Find the last place where we have actual data. - while bytes[i] == 0 { - i = i - 1; - } - // At this point i is 21 because bytes[21] is the final '0' in b"1234567800000000000000". + let is_negative = self.as_i128() < 0; + let decimal_location = string.len() - Self::DECIMAL_PLACES + (is_negative as usize); - let decimal_location = i - Self::DECIMAL_PLACES + 1 + is_negative; - // decimal_location = 4 + // skip trailing zeros + let last_nonzero_byte = string.trim_end_matches('0').len(); - while bytes[i] == ('0' as u8) && i >= decimal_location { - bytes[i] = 0; - i = i - 1; - } - // Now i = 7, because bytes[7] = '8', and bytes = b"12345678" - - if i < decimal_location { + if last_nonzero_byte < decimal_location { // This means that we've removed trailing zeros and are left with an integer. Our // convention is to print these without a decimal point or trailing zeros, so we're done. - return i + 1; + string.truncate(decimal_location); + return string.as_str(); } - let ret = i + 1; - while i >= decimal_location { - bytes[i + 1] = bytes[i]; - i = i - 1; - } - bytes[i + 1] = bytes[i]; - // Now i = 4, and bytes = b"123455678" + // otherwise, we're dealing with a fraction, and need to insert the decimal dot - bytes[decimal_location] = '.' as u8; - // Finally bytes = b"1234.5678" + // truncate all extra zeros off + string.truncate(last_nonzero_byte); - ret + 1 + // push a dummy character so we have space for the decimal dot + string.push('$'); + + // Safety: at any time, the string only contains ascii characters, so it is always valid utf8 + let bytes = unsafe { string.as_bytes_mut() }; + + // shift the fractional part by one + bytes.copy_within(decimal_location..last_nonzero_byte, decimal_location + 1); + + // and put in the decimal dot in the right place + bytes[decimal_location] = b'.'; + + string.as_str() } pub fn to_str(&self) -> RocStr { - let mut bytes = [0 as u8; Self::MAX_STR_LENGTH]; - let last_idx = self.to_str_helper(&mut bytes); - unsafe { RocStr::from_slice(&bytes[0..last_idx]) } + RocStr::from(self.to_str_helper(&mut ArrayString::new())) } } impl fmt::Display for RocDec { - fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut bytes = [0 as u8; Self::MAX_STR_LENGTH]; - let last_idx = self.to_str_helper(&mut bytes); - let result = unsafe { str::from_utf8_unchecked(&bytes[0..last_idx]) }; - write!(fmtr, "{}", result) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(self.to_str_helper(&mut ArrayString::new())) } } @@ -394,52 +426,37 @@ impl From for i128 { impl fmt::Debug for I128 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let i128: i128 = (*self).into(); - - i128.fmt(f) + i128::from(*self).fmt(f) } } impl fmt::Display for I128 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let i128: i128 = (*self).into(); - - i128.fmt(f) + Debug::fmt(&i128::from(*self), f) } } impl PartialEq for I128 { fn eq(&self, other: &Self) -> bool { - let i128_self: i128 = (*self).into(); - let i128_other: i128 = (*other).into(); - - i128_self.eq(&i128_other) + i128::from(*self).eq(&i128::from(*other)) } } impl PartialOrd for I128 { - fn partial_cmp(&self, other: &Self) -> Option { - let i128_self: i128 = (*self).into(); - let i128_other: i128 = (*other).into(); - - i128_self.partial_cmp(&i128_other) + fn partial_cmp(&self, other: &Self) -> Option { + i128::from(*self).partial_cmp(&i128::from(*other)) } } impl Ord for I128 { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - let i128_self: i128 = (*self).into(); - let i128_other: i128 = (*other).into(); - - i128_self.cmp(&i128_other) + fn cmp(&self, other: &Self) -> Ordering { + i128::from(*self).cmp(&i128::from(*other)) } } impl Hash for I128 { fn hash(&self, state: &mut H) { - let i128: i128 = (*self).into(); - - i128.hash(state); + i128::from(*self).hash(state); } } @@ -461,51 +478,36 @@ impl From for u128 { impl fmt::Debug for U128 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let u128: u128 = (*self).into(); - - u128.fmt(f) + u128::from(*self).fmt(f) } } impl fmt::Display for U128 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let u128: u128 = (*self).into(); - - u128.fmt(f) + Debug::fmt(&u128::from(*self), f) } } impl PartialEq for U128 { fn eq(&self, other: &Self) -> bool { - let u128_self: u128 = (*self).into(); - let u128_other: u128 = (*other).into(); - - u128_self.eq(&u128_other) + u128::from(*self).eq(&u128::from(*other)) } } impl PartialOrd for U128 { - fn partial_cmp(&self, other: &Self) -> Option { - let u128_self: u128 = (*self).into(); - let u128_other: u128 = (*other).into(); - - u128_self.partial_cmp(&u128_other) + fn partial_cmp(&self, other: &Self) -> Option { + u128::from(*self).partial_cmp(&u128::from(*other)) } } impl Ord for U128 { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - let u128_self: u128 = (*self).into(); - let u128_other: u128 = (*other).into(); - - u128_self.cmp(&u128_other) + fn cmp(&self, other: &Self) -> Ordering { + u128::from(*self).cmp(&u128::from(*other)) } } impl Hash for U128 { fn hash(&self, state: &mut H) { - let u128: u128 = (*self).into(); - - u128.hash(state); + u128::from(*self).hash(state); } } diff --git a/roc_std/src/roc_box.rs b/roc_std/src/roc_box.rs new file mode 100644 index 0000000000..5eea65fb5e --- /dev/null +++ b/roc_std/src/roc_box.rs @@ -0,0 +1,168 @@ +#![deny(unsafe_op_in_unsafe_fn)] + +use crate::{roc_alloc, roc_dealloc, storage::Storage}; +use core::{ + cell::Cell, + cmp::{self, Ordering}, + fmt::Debug, + mem, + ops::Deref, + ptr::{self, NonNull}, +}; + +#[repr(C)] +pub struct RocBox { + contents: NonNull, +} + +impl RocBox { + pub fn new(contents: T) -> Self { + let alignment = Self::alloc_alignment(); + let bytes = mem::size_of::() + alignment; + + let ptr = unsafe { roc_alloc(bytes, alignment as u32) }; + + if ptr.is_null() { + todo!("Call roc_panic with the info that an allocation failed."); + } + + // Initialize the reference count. + let refcount_one = Storage::new_reference_counted(); + unsafe { ptr.cast::().write(refcount_one) }; + + let contents = unsafe { + let contents_ptr = ptr.cast::().add(alignment).cast::(); + + *contents_ptr = contents; + + // We already verified that the original alloc pointer was non-null, + // and this one is the alloc pointer with `alignment` bytes added to it, + // so it should be non-null too. + NonNull::new_unchecked(contents_ptr) + }; + + Self { contents } + } + + #[inline(always)] + fn alloc_alignment() -> usize { + mem::align_of::().max(mem::align_of::()) + } + + pub fn into_inner(self) -> T { + unsafe { ptr::read(self.contents.as_ptr() as *mut T) } + } + + fn storage(&self) -> &Cell { + let alignment = Self::alloc_alignment(); + + unsafe { + &*self + .contents + .as_ptr() + .cast::() + .sub(alignment) + .cast::>() + } + } +} + +impl Deref for RocBox { + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { self.contents.as_ref() } + } +} + +impl PartialEq> for RocBox +where + T: PartialEq, +{ + fn eq(&self, other: &RocBox) -> bool { + self.deref() == other.deref() + } +} + +impl Eq for RocBox where T: Eq {} + +impl PartialOrd> for RocBox +where + T: PartialOrd, +{ + fn partial_cmp(&self, other: &RocBox) -> Option { + let self_contents = unsafe { self.contents.as_ref() }; + let other_contents = unsafe { other.contents.as_ref() }; + + self_contents.partial_cmp(other_contents) + } +} + +impl Ord for RocBox +where + T: Ord, +{ + fn cmp(&self, other: &Self) -> Ordering { + let self_contents = unsafe { self.contents.as_ref() }; + let other_contents = unsafe { other.contents.as_ref() }; + + self_contents.cmp(other_contents) + } +} + +impl Debug for RocBox +where + T: Debug, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.deref().fmt(f) + } +} + +impl Clone for RocBox { + fn clone(&self) -> Self { + let storage = self.storage(); + let mut new_storage = storage.get(); + + // Increment the reference count + if !new_storage.is_readonly() { + new_storage.increment_reference_count(); + storage.set(new_storage); + } + + Self { + contents: self.contents, + } + } +} + +impl Drop for RocBox { + fn drop(&mut self) { + let storage = self.storage(); + let contents = self.contents; + + // Decrease the list's reference count. + let mut new_storage = storage.get(); + let needs_dealloc = new_storage.decrease(); + + if needs_dealloc { + unsafe { + // Drop the stored contents. + let contents_ptr = contents.as_ptr(); + + mem::drop::(ptr::read(contents_ptr)); + + let alignment = Self::alloc_alignment(); + + // Release the memory. + roc_dealloc( + contents.as_ptr().cast::().sub(alignment).cast(), + alignment as u32, + ); + } + } else if !new_storage.is_readonly() { + // Write the storage back. + storage.set(new_storage); + } + } +} diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 5abdfa69be..9be5b1fe9d 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -3,12 +3,12 @@ use core::{ cell::Cell, cmp::{self, Ordering}, + ffi::c_void, fmt::Debug, intrinsics::copy_nonoverlapping, mem::{self, ManuallyDrop}, ops::Deref, - ptr, - ptr::NonNull, + ptr::{self, NonNull}, }; use crate::{roc_alloc, roc_dealloc, roc_realloc, storage::Storage}; @@ -21,14 +21,61 @@ pub struct RocList { } impl RocList { + #[inline(always)] + fn alloc_alignment() -> u32 { + mem::align_of::().max(mem::align_of::()) as u32 + } + pub fn empty() -> Self { - RocList { + Self { elements: None, length: 0, capacity: 0, } } + /// Create an empty RocList with enough space preallocated to store + /// the requested number of elements. + pub fn with_capacity(num_elems: usize) -> Self { + Self { + elements: Some(Self::elems_with_capacity(num_elems)), + length: 0, + capacity: num_elems, + } + } + + /// Used for both roc_alloc and roc_realloc - given the number of elements, + /// returns the number of bytes needed to allocate, taking into account both the + /// size of the elements as well as the size of Storage. + fn alloc_bytes(num_elems: usize) -> usize { + mem::size_of::() + (num_elems * mem::size_of::()) + } + + fn elems_with_capacity(num_elems: usize) -> NonNull> { + let alloc_ptr = unsafe { roc_alloc(Self::alloc_bytes(num_elems), Self::alloc_alignment()) }; + + Self::elems_from_allocation(NonNull::new(alloc_ptr).unwrap_or_else(|| { + todo!("Call roc_panic with the info that an allocation failed."); + })) + } + + fn elems_from_allocation(allocation: NonNull) -> NonNull> { + let alloc_ptr = allocation.as_ptr(); + + unsafe { + let elem_ptr = Self::elem_ptr_from_alloc_ptr(alloc_ptr).cast::>(); + + // Initialize the reference count. + alloc_ptr + .cast::() + .write(Storage::new_reference_counted()); + + // The original alloc pointer was non-null, and this one is the alloc pointer + // with `alignment` bytes added to it, so it should be non-null too. + NonNull::new_unchecked(elem_ptr) + } + } + pub fn len(&self) -> usize { self.length } @@ -41,21 +88,141 @@ impl RocList { self.len() == 0 } + /// Note that there is no way to convert directly to a Vec. + /// + /// This is because RocList values are not allocated using the system allocator, so + /// handing off any heap-allocated bytes to a Vec would not work because its Drop + /// implementation would try to free those bytes using the wrong allocator. + /// + /// Instead, if you want a Rust Vec, you need to do a fresh allocation and copy the + /// bytes over - in other words, calling this `as_slice` method and then calling `to_vec` + /// on that. pub fn as_slice(&self) -> &[T] { &*self } + #[inline(always)] fn elements_and_storage(&self) -> Option<(NonNull>, &Cell)> { let elements = self.elements?; - let storage = unsafe { &*elements.as_ptr().cast::>().sub(1) }; + let storage = unsafe { &*self.ptr_to_allocation().cast::>() }; Some((elements, storage)) } + + pub(crate) fn storage(&self) -> Option { + self.elements_and_storage() + .map(|(_, storage)| storage.get()) + } + + /// Useful for doing memcpy on the elements. Returns NULL if list is empty. + pub(crate) unsafe fn ptr_to_first_elem(&self) -> *const T { + unsafe { core::mem::transmute(self.elements) } + } + + /// Useful for doing memcpy on the underlying allocation. Returns NULL if list is empty. + pub(crate) unsafe fn ptr_to_allocation(&self) -> *mut c_void { + unsafe { + self.ptr_to_first_elem() + .cast::() + .sub(Self::alloc_alignment() as usize) as *mut _ + } + } + + unsafe fn elem_ptr_from_alloc_ptr(alloc_ptr: *mut c_void) -> *mut c_void { + unsafe { + alloc_ptr + .cast::() + .add(Self::alloc_alignment() as usize) + .cast() + } + } } impl RocList where T: Clone, { + /// Increase a RocList's capacity by at least the requested number of elements (possibly more). + /// + /// May return a new RocList, if the provided one was not unique. + pub fn reserve(&mut self, num_elems: usize) { + let new_len = num_elems + self.length; + let new_elems; + let old_elements_ptr; + + match self.elements_and_storage() { + Some((elements, storage)) => { + if storage.get().is_unique() { + unsafe { + let old_alloc = self.ptr_to_allocation(); + + // Try to reallocate in-place. + let new_alloc = roc_realloc( + old_alloc, + Self::alloc_bytes(new_len), + Self::alloc_bytes(self.capacity), + Self::alloc_alignment(), + ); + + if new_alloc == old_alloc { + // We successfully reallocated in-place; we're done! + return; + } else { + // We got back a different allocation; copy the existing elements + // into it. We don't need to increment their refcounts because + // The existing allocation that references to them is now gone and + // no longer referencing them. + new_elems = Self::elems_from_allocation( + NonNull::new(new_alloc).unwrap_or_else(|| { + todo!("Reallocation failed"); + }), + ); + } + + // Note that realloc automatically deallocates the old allocation, + // so we don't need to call roc_dealloc here. + } + } else { + // Make a new allocation + new_elems = Self::elems_with_capacity(new_len); + old_elements_ptr = elements.as_ptr(); + + unsafe { + // Copy the old elements to the new allocation. + copy_nonoverlapping(old_elements_ptr, new_elems.as_ptr(), self.length); + } + + // Decrease the current allocation's reference count. + let mut new_storage = storage.get(); + let needs_dealloc = new_storage.decrease(); + + if needs_dealloc { + // Unlike in Drop, do *not* decrement the refcounts of all the elements! + // The new allocation is referencing them, so instead of incrementing them all + // all just to decrement them again here, we neither increment nor decrement them. + unsafe { + roc_dealloc(self.ptr_to_allocation(), Self::alloc_alignment()); + } + } else if !new_storage.is_readonly() { + // Write the storage back. + storage.set(new_storage); + } + } + } + None => { + // This is an empty list, so `reserve` is the same as `with_capacity`. + self.update_to(Self::with_capacity(new_len)); + + return; + } + } + + self.update_to(Self { + elements: Some(new_elems), + length: self.length, + capacity: new_len, + }); + } + pub fn from_slice(slice: &[T]) -> Self { let mut list = Self::empty(); list.extend_from_slice(slice); @@ -64,27 +231,37 @@ where pub fn extend_from_slice(&mut self, slice: &[T]) { // TODO: Can we do better for ZSTs? Alignment might be a problem. - if slice.is_empty() { return; } - let alignment = cmp::max(mem::align_of::(), mem::align_of::()); - let elements_offset = alignment; - - let new_size = elements_offset + mem::size_of::() * (self.len() + slice.len()); - - let new_ptr = if let Some((elements, storage)) = self.elements_and_storage() { + let new_len = self.len() + slice.len(); + let non_null_elements = if let Some((elements, storage)) = self.elements_and_storage() { // Decrement the list's refence count. let mut copy = storage.get(); let is_unique = copy.decrease(); if is_unique { - // If the memory is not shared, we can reuse the memory. - let old_size = elements_offset + mem::size_of::() * self.len(); - unsafe { - let ptr = elements.as_ptr().cast::().sub(alignment).cast(); - roc_realloc(ptr, new_size, old_size, alignment as u32).cast() + // If we have enough capacity, we can add to the existing elements in-place. + if self.capacity() >= slice.len() { + elements + } else { + // There wasn't enough capacity, so we need a new allocation. + // Since this is a unique RocList, we can use realloc here. + let new_ptr = unsafe { + roc_realloc( + storage.as_ptr().cast(), + Self::alloc_bytes(new_len), + Self::alloc_bytes(self.capacity), + Self::alloc_alignment(), + ) + }; + + self.capacity = new_len; + + Self::elems_from_allocation(NonNull::new(new_ptr).unwrap_or_else(|| { + todo!("Reallocation failed"); + })) } } else { if !copy.is_readonly() { @@ -93,49 +270,19 @@ where } // Allocate new memory. - let new_ptr = unsafe { roc_alloc(new_size, alignment as u32) }; - let new_elements = unsafe { - new_ptr - .cast::() - .add(alignment) - .cast::>() - }; - - // Initialize the reference count. - unsafe { - let storage_ptr = new_elements.cast::().sub(1); - storage_ptr.write(Storage::new_reference_counted()); - } + let new_elements = Self::elems_with_capacity(slice.len()); // Copy the old elements to the new allocation. unsafe { - copy_nonoverlapping(elements.as_ptr(), new_elements, self.length); + copy_nonoverlapping(elements.as_ptr(), new_elements.as_ptr(), self.length); } - new_ptr + new_elements } } else { - // Allocate new memory. - let new_ptr = unsafe { roc_alloc(new_size, alignment as u32) }; - let new_elements = unsafe { new_ptr.cast::().add(elements_offset).cast::() }; - - // Initialize the reference count. - unsafe { - let storage_ptr = new_elements.cast::().sub(1); - storage_ptr.write(Storage::new_reference_counted()); - } - - new_ptr + Self::elems_with_capacity(slice.len()) }; - let elements = unsafe { - new_ptr - .cast::() - .add(elements_offset) - .cast::>() - }; - - let non_null_elements = NonNull::new(elements).unwrap(); self.elements = Some(non_null_elements); let elements = self.elements.unwrap().as_ptr(); @@ -159,6 +306,16 @@ where self.capacity = self.length } + + /// Replace self with a new version, without letting `drop` run in between. + fn update_to(&mut self, mut updated: Self) { + // We want to replace `self` with `updated` in a way that makes sure + // `self`'s `drop` never runs. This is the proper way to do that: + // swap them, and then forget the "updated" one (which is now pointing + // to the original allocation). + mem::swap(self, &mut updated); + mem::forget(updated); + } } impl Deref for RocList { @@ -282,22 +439,15 @@ impl Drop for RocList { for index in 0..self.len() { let elem_ptr = elements.as_ptr().add(index); - mem::drop::(ManuallyDrop::take(&mut *elem_ptr)); + ManuallyDrop::drop(&mut *elem_ptr); } - let alignment = cmp::max(mem::align_of::(), mem::align_of::()); - // Release the memory. - roc_dealloc( - elements.as_ptr().cast::().sub(alignment).cast(), - alignment as u32, - ); - } - } else { - if !new_storage.is_readonly() { - // Write the storage back. - storage.set(new_storage); + roc_dealloc(self.ptr_to_allocation(), Self::alloc_alignment()); } + } else if !new_storage.is_readonly() { + // Write the storage back. + storage.set(new_storage); } } } diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 00f3a52fe7..863d33f5b0 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -1,18 +1,51 @@ #![deny(unsafe_op_in_unsafe_fn)] use core::{ + cmp, convert::TryFrom, - fmt::Debug, - mem::{size_of, ManuallyDrop}, + fmt, + hash::{self, Hash}, + mem::{self, size_of, ManuallyDrop}, ops::{Deref, DerefMut}, + ptr, }; -use std::hash::Hash; + +#[cfg(not(feature = "no_std"))] +use std::ffi::{CStr, CString}; use crate::RocList; #[repr(transparent)] pub struct RocStr(RocStrInner); +fn with_stack_bytes(length: usize, closure: F) -> T +where + F: FnOnce(*mut E) -> T, +{ + use crate::{roc_alloc, roc_dealloc}; + use core::mem::MaybeUninit; + + if length < RocStr::TEMP_STR_MAX_STACK_BYTES { + // TODO: once https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.uninit_array + // has become stabilized, use that here in order to do a precise + // stack allocation instead of always over-allocating to 64B. + let mut bytes: MaybeUninit<[u8; RocStr::TEMP_STR_MAX_STACK_BYTES]> = MaybeUninit::uninit(); + + closure(bytes.as_mut_ptr() as *mut E) + } else { + let align = core::mem::align_of::() as u32; + // The string is too long to stack-allocate, so + // do a heap allocation and then free it afterwards. + let ptr = unsafe { roc_alloc(length, align) } as *mut E; + let answer = closure(ptr); + + // Free the heap allocation. + unsafe { roc_dealloc(ptr.cast(), align) }; + + answer + } +} + impl RocStr { pub const SIZE: usize = core::mem::size_of::(); pub const MASK: u8 = 0b1000_0000; @@ -28,8 +61,8 @@ impl RocStr { /// # Safety /// /// `slice` must be valid UTF-8. - pub unsafe fn from_slice(slice: &[u8]) -> Self { - if let Some(small_string) = unsafe { SmallString::try_from(slice) } { + pub unsafe fn from_slice_unchecked(slice: &[u8]) -> Self { + if let Some(small_string) = unsafe { SmallString::try_from_utf8_bytes(slice) } { Self(RocStrInner { small_string }) } else { let heap_allocated = RocList::from_slice(slice); @@ -51,6 +84,13 @@ impl RocStr { } } + pub fn capacity(&self) -> usize { + match self.as_enum_ref() { + RocStrInnerRef::HeapAllocated(roc_list) => roc_list.capacity(), + RocStrInnerRef::SmallString(_) => SmallString::CAPACITY, + } + } + pub fn len(&self) -> usize { match self.as_enum_ref() { RocStrInnerRef::HeapAllocated(h) => h.len(), @@ -62,9 +102,417 @@ impl RocStr { self.len() == 0 } + /// Note that there is no way to convert directly to a String. + /// + /// This is because RocStr values are not allocated using the system allocator, so + /// handing off any heap-allocated bytes to a String would not work because its Drop + /// implementation would try to free those bytes using the wrong allocator. + /// + /// Instead, if you want a Rust String, you need to do a fresh allocation and copy the + /// bytes over - in other words, calling this `as_str` method and then calling `to_string` + /// on that. pub fn as_str(&self) -> &str { &*self } + + /// Create an empty RocStr with enough space preallocated to store + /// the requested number of bytes. + pub fn with_capacity(bytes: usize) -> Self { + if bytes <= SmallString::CAPACITY { + RocStr(RocStrInner { + small_string: SmallString::empty(), + }) + } else { + // The requested capacity won't fit in a small string; we need to go big. + RocStr(RocStrInner { + heap_allocated: ManuallyDrop::new(RocList::with_capacity(bytes)), + }) + } + } + + /// Increase a RocStr's capacity by at least the requested number of bytes (possibly more). + /// + /// May return a new RocStr, if the provided one was not unique. + pub fn reserve(&mut self, bytes: usize) { + if self.is_small_str() { + let small_str = unsafe { self.0.small_string }; + let target_cap = small_str.len() + bytes; + + if target_cap > SmallString::CAPACITY { + // The requested capacity won't fit in a small string; we need to go big. + let mut roc_list = RocList::with_capacity(target_cap); + + roc_list.extend_from_slice(small_str.as_bytes()); + + *self = RocStr(RocStrInner { + heap_allocated: ManuallyDrop::new(roc_list), + }); + } + } else { + let mut roc_list = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) }; + + roc_list.reserve(bytes); + + let mut updated = RocStr(RocStrInner { + heap_allocated: ManuallyDrop::new(roc_list), + }); + + mem::swap(self, &mut updated); + mem::forget(updated); + } + } + + /// Returns the index of the first interior \0 byte in the string, or None if there are none. + fn first_nul_byte(&self) -> Option { + match self.as_enum_ref() { + RocStrInnerRef::HeapAllocated(roc_list) => roc_list.iter().position(|byte| *byte == 0), + RocStrInnerRef::SmallString(small_string) => small_string.first_nul_byte(), + } + } + + // If the string is under this many bytes, the with_terminator family + // of methods will allocate the terminated string on the stack when + // the RocStr is non-unique. + const TEMP_STR_MAX_STACK_BYTES: usize = 64; + + /// Like calling with_utf8_terminator passing \0 for the terminator, + /// except it can fail because a RocStr may contain \0 characters, + /// which a nul-terminated string must not. + pub fn utf8_nul_terminated T>( + self, + func: F, + ) -> Result { + if let Some(pos) = self.first_nul_byte() { + Err(InteriorNulError { pos, roc_str: self }) + } else { + Ok(self.with_utf8_terminator(b'\0', func)) + } + } + + /// Turn this RocStr into a UTF-8 `*mut u8`, terminate it with the given character + /// (commonly either `b'\n'` or b`\0`) and then provide access to that + /// `*mut u8` (as well as its length) for the duration of a given function. This is + /// designed to be an efficient way to turn a `RocStr` received from an application into + /// either the nul-terminated UTF-8 `char*` needed by UNIX syscalls, or into a + /// newline-terminated string to write to stdout or stderr (for a "println"-style effect). + /// + /// **NOTE:** The length passed to the function is the same value that `RocStr::len` will + /// return; it does not count the terminator. So to convert it to a nul-terminated slice + /// of Rust bytes (for example), call `slice::from_raw_parts` passing the given length + 1. + /// + /// This operation achieves efficiency by reusing allocated bytes from the RocStr itself, + /// and sometimes allocating on the stack. It does not allocate on the heap when given a + /// a small string or a string with unique refcount, but may allocate when given a large + /// string with non-unique refcount. (It will do a stack allocation if the string is under + /// 64 bytes; the stack allocation will only live for the duration of the called function.) + /// + /// If the given (owned) RocStr is unique, this can overwrite the underlying bytes + /// to terminate the string in-place. Small strings have an extra byte at the end + /// where the length is stored, which can be replaced with the terminator. Heap-allocated + /// strings can have excess capacity which can hold a terminator, or if they have no + /// excess capacity, all the bytes can be shifted over the refcount in order to free up + /// a `usize` worth of free space at the end - which can easily fit a 1-byte terminator. + pub fn with_utf8_terminator T>(self, terminator: u8, func: F) -> T { + // Note that this function does not use with_terminator because it can be + // more efficient than that - due to knowing that it's already in UTF-8 and always + // has room for a 1-byte terminator in the existing allocation (either in the refcount + // bytes, or, in a small string, in the length at the end of the string). + + let terminate = |alloc_ptr: *mut u8, len: usize| unsafe { + *(alloc_ptr.add(len)) = terminator; + + func(alloc_ptr, len) + }; + + match self.as_enum_ref() { + RocStrInnerRef::HeapAllocated(roc_list) => { + unsafe { + match roc_list.storage() { + Some(storage) if storage.is_unique() => { + // The backing RocList was unique, so we can mutate it in-place. + let len = roc_list.len(); + let ptr = if len < roc_list.capacity() { + // We happen to have excess capacity already, so we will be able + // to write the terminator into the first byte of excess capacity. + roc_list.ptr_to_first_elem() as *mut u8 + } else { + // We always have an allocation that's even bigger than necessary, + // because the refcount bytes take up more than the 1B needed for + // the terminator. We just need to shift the bytes over on top + // of the refcount. + let alloc_ptr = roc_list.ptr_to_allocation() as *mut u8; + + // First, copy the bytes over the original allocation - effectively + // shifting everything over by one `usize`. Now we no longer have a + // refcount (but the terminated won't use that anyway), but we do + // have a free `usize` at the end. + // + // IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping + // because the regions definitely overlap! + ptr::copy(roc_list.ptr_to_first_elem() as *mut u8, alloc_ptr, len); + + alloc_ptr + }; + + terminate(ptr, len) + } + Some(_) => { + let len = roc_list.len(); + + // The backing list was not unique, so we can't mutate it in-place. + // ask for `len + 1` to store the original string and the terminator + with_stack_bytes(len + 1, |alloc_ptr: *mut u8| { + let alloc_ptr = alloc_ptr as *mut u8; + let elem_ptr = roc_list.ptr_to_first_elem() as *mut u8; + + // memcpy the bytes into the stack allocation + ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len); + + terminate(alloc_ptr, len) + }) + } + None => { + // The backing list was empty. + // + // No need to do a heap allocation for an empty string - we + // can just do a stack allocation that will live for the + // duration of the function. + func([terminator].as_mut_ptr(), 0) + } + } + } + } + RocStrInnerRef::SmallString(small_str) => { + let mut bytes = small_str.bytes; + + // Even if the small string is at capacity, there will be room to write + // a terminator in the byte that's used to store the length. + terminate(bytes.as_mut_ptr() as *mut u8, small_str.len()) + } + } + } + + /// Like calling with_utf16_terminator passing \0 for the terminator, + /// except it can fail because a RocStr may contain \0 characters, + /// which a nul-terminated string must not. + pub fn utf16_nul_terminated T>( + self, + func: F, + ) -> Result { + if let Some(pos) = self.first_nul_byte() { + Err(InteriorNulError { pos, roc_str: self }) + } else { + Ok(self.with_utf16_terminator(0, func)) + } + } + + /// Turn this RocStr into a nul-terminated UTF-16 `*mut u16` and then provide access to + /// that `*mut u16` (as well as its length) for the duration of a given function. This is + /// designed to be an efficient way to turn a RocStr received from an application into + /// the nul-terminated UTF-16 `wchar_t*` needed by Windows API calls. + /// + /// **NOTE:** The length passed to the function is the same value that `RocStr::len` will + /// return; it does not count the terminator. So to convert it to a nul-terminated + /// slice of Rust bytes, call `slice::from_raw_parts` passing the given length + 1. + /// + /// This operation achieves efficiency by reusing allocated bytes from the RocStr itself, + /// and sometimes allocating on the stack. It does not allocate on the heap when given a + /// a small string or a string with unique refcount, but may allocate when given a large + /// string with non-unique refcount. (It will do a stack allocation if the string is under + /// 64 bytes; the stack allocation will only live for the duration of the called function.) + /// + /// Because this works on an owned RocStr, it's able to overwrite the underlying bytes + /// to nul-terminate the string in-place. Small strings have an extra byte at the end + /// where the length is stored, which can become 0 for nul-termination. Heap-allocated + /// strings can have excess capacity which can hold a termiator, or if they have no + /// excess capacity, all the bytes can be shifted over the refcount in order to free up + /// a `usize` worth of free space at the end - which can easily fit a terminator. + /// + /// This operation can fail because a RocStr may contain \0 characters, which a + /// nul-terminated string must not. + pub fn with_utf16_terminator T>( + self, + terminator: u16, + func: F, + ) -> T { + self.with_terminator(terminator, |dest_ptr: *mut u16, str_slice: &str| { + // Translate UTF-8 source bytes into UTF-16 and write them into the destination. + for (index, wchar) in str_slice.encode_utf16().enumerate() { + unsafe { + *(dest_ptr.add(index)) = wchar; + } + } + + func(dest_ptr, str_slice.len()) + }) + } + + pub fn with_windows_path T>( + self, + func: F, + ) -> Result { + if let Some(pos) = self.first_nul_byte() { + Err(InteriorNulError { pos, roc_str: self }) + } else { + let answer = self.with_terminator(0u16, |dest_ptr: *mut u16, str_slice: &str| { + // Translate UTF-8 source bytes into UTF-16 and write them into the destination. + for (index, mut wchar) in str_slice.encode_utf16().enumerate() { + // Replace slashes with backslashes + if wchar == '/' as u16 { + wchar = '\\' as u16 + }; + + unsafe { + *(dest_ptr.add(index)) = wchar; + } + } + + func(dest_ptr, str_slice.len()) + }); + + Ok(answer) + } + } + + /// Generic version of temp_c_utf8 and temp_c_utf16. The given function will be + /// passed a pointer to elements of type E. The pointer will have enough room to hold + /// one element for each byte of the given `&str`'s length, plus the terminator element. + /// + /// The terminator will be written right after the end of the space for the other elements, + /// but all the memory in that space before the terminator will be uninitialized. This means + /// if you want to do something like copy the contents of the `&str` into there, that will + /// need to be done explicitly. + /// + /// The terminator is always written - even if there are no other elements present before it. + /// (In such a case, the `&str` argument will be empty and the `*mut E` will point directly + /// to the terminator). + /// + /// One use for this is to convert slashes to backslashes in Windows paths; + /// this function provides the most efficient way to do that, because no extra + /// iteration pass is necessary; the conversion can be done after each translation + /// of a UTF-8 character to UTF-16. Here's how that would look: + /// + /// use roc_std::{RocStr, InteriorNulError}; + /// + /// pub fn with_windows_path T>( + /// roc_str: RocStr, + /// func: F, + /// ) -> Result { + /// let answer = roc_str.with_terminator(0u16, |dest_ptr: *mut u16, str_slice: &str| { + /// // Translate UTF-8 source bytes into UTF-16 and write them into the destination. + /// for (index, mut wchar) in str_slice.encode_utf16().enumerate() { + /// // Replace slashes with backslashes + /// if wchar == '/' as u16 { + /// wchar = '\\' as u16 + /// }; + /// + /// unsafe { + /// *(dest_ptr.add(index)) = wchar; + /// } + /// } + /// + /// func(dest_ptr, str_slice.len()) + /// }); + /// + /// Ok(answer) + /// } + pub fn with_terminator A>( + self, + terminator: E, + func: F, + ) -> A { + use crate::Storage; + use core::mem::align_of; + + let terminate = |alloc_ptr: *mut E, str_slice: &str| unsafe { + *(alloc_ptr.add(str_slice.len())) = terminator; + + func(alloc_ptr, str_slice) + }; + + // When we don't have an existing allocation that can work, fall back on this. + // It uses either a stack allocation, or, if that would be too big, a heap allocation. + let fallback = |str_slice: &str| { + // We need 1 extra elem for the terminator. It must be an elem, + // not a byte, because we'll be providing a pointer to elems. + let needed_bytes = (str_slice.len() + 1) * size_of::(); + + with_stack_bytes(needed_bytes, |alloc_ptr: *mut E| { + terminate(alloc_ptr, str_slice) + }) + }; + + match self.as_enum_ref() { + RocStrInnerRef::HeapAllocated(roc_list) => { + let len = roc_list.len(); + + unsafe { + match roc_list.storage() { + Some(storage) if storage.is_unique() => { + // The backing RocList was unique, so we can mutate it in-place. + + // We need 1 extra elem for the terminator. It must be an elem, + // not a byte, because we'll be providing a pointer to elems. + let needed_bytes = (len + 1) * size_of::(); + + // We can use not only the capacity on the heap, but also + // the bytes originally used for the refcount. + let available_bytes = roc_list.capacity() + size_of::(); + + if needed_bytes < available_bytes { + debug_assert!(align_of::() >= align_of::()); + + // We happen to have sufficient excess capacity already, + // so we will be able to write the new elements as well as + // the terminator into the existing allocation. + let ptr = roc_list.ptr_to_allocation() as *mut E; + let answer = terminate(ptr, self.as_str()); + + // We cannot rely on the RocStr::drop implementation, because + // it tries to use the refcount - which we just overwrote + // with string bytes. + mem::forget(self); + crate::roc_dealloc(ptr.cast(), mem::align_of::() as u32); + + answer + } else { + // We didn't have sufficient excess capacity already, + // so we need to do either a new stack allocation or a new + // heap allocation. + fallback(self.as_str()) + } + } + Some(_) => { + // The backing list was not unique, so we can't mutate it in-place. + fallback(self.as_str()) + } + None => { + // The backing list was empty. + // + // No need to do a heap allocation for an empty string - we + // can just do a stack allocation that will live for the + // duration of the function. + func([terminator].as_mut_ptr() as *mut E, "") + } + } + } + } + RocStrInnerRef::SmallString(small_str) => { + let len = small_str.len(); + + // We need 1 extra elem for the terminator. It must be an elem, + // not a byte, because we'll be providing a pointer to elems. + let needed_bytes = (len + 1) * size_of::(); + let available_bytes = size_of::(); + + if needed_bytes < available_bytes { + terminate(small_str.bytes.as_ptr() as *mut E, self.as_str()) + } else { + fallback(self.as_str()) + } + } + } + } } impl Deref for RocStr { @@ -78,6 +526,35 @@ impl Deref for RocStr { } } +/// This can fail because a CStr may contain invalid UTF-8 characters +#[cfg(not(feature = "no_std"))] +impl TryFrom<&CStr> for RocStr { + type Error = core::str::Utf8Error; + + fn try_from(c_str: &CStr) -> Result { + c_str.to_str().map(RocStr::from) + } +} + +/// This can fail because a CString may contain invalid UTF-8 characters +#[cfg(not(feature = "no_std"))] +impl TryFrom for RocStr { + type Error = core::str::Utf8Error; + + fn try_from(c_string: CString) -> Result { + c_string.to_str().map(RocStr::from) + } +} + +#[cfg(not(feature = "no_std"))] +/// Like https://doc.rust-lang.org/std/ffi/struct.NulError.html but +/// only for interior nuls, not for missing nul terminators. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InteriorNulError { + pub pos: usize, + pub roc_str: RocStr, +} + impl Default for RocStr { fn default() -> Self { Self::empty() @@ -86,7 +563,7 @@ impl Default for RocStr { impl From<&str> for RocStr { fn from(s: &str) -> Self { - unsafe { Self::from_slice(s.as_bytes()) } + unsafe { Self::from_slice_unchecked(s.as_bytes()) } } } @@ -99,18 +576,24 @@ impl PartialEq for RocStr { impl Eq for RocStr {} impl PartialOrd for RocStr { - fn partial_cmp(&self, other: &Self) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { self.as_str().partial_cmp(other.as_str()) } } impl Ord for RocStr { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { + fn cmp(&self, other: &Self) -> cmp::Ordering { self.as_str().cmp(other.as_str()) } } -impl Debug for RocStr { +impl fmt::Debug for RocStr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.deref().fmt(f) + } +} + +impl fmt::Display for RocStr { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.deref().fmt(f) } @@ -168,7 +651,7 @@ impl SmallString { /// # Safety /// /// `slice` must be valid UTF-8. - unsafe fn try_from(slice: &[u8]) -> Option { + unsafe fn try_from_utf8_bytes(slice: &[u8]) -> Option { // Check the size of the slice. let len_as_u8 = u8::try_from(slice.len()).ok()?; if (len_as_u8 as usize) > Self::CAPACITY { @@ -191,6 +674,17 @@ impl SmallString { fn len(&self) -> usize { usize::from(self.len & !RocStr::MASK) } + + /// Returns the index of the first interior \0 byte in the string, or None if there are none. + fn first_nul_byte(&self) -> Option { + for (index, byte) in self.bytes[0..self.len()].iter().enumerate() { + if *byte == 0 { + return Some(index); + } + } + + None + } } impl Deref for SmallString { @@ -210,7 +704,7 @@ impl DerefMut for SmallString { } impl Hash for RocStr { - fn hash(&self, state: &mut H) { + fn hash(&self, state: &mut H) { self.as_str().hash(state) } } diff --git a/roc_std/src/storage.rs b/roc_std/src/storage.rs index 2be9976fca..5ec9882a97 100644 --- a/roc_std/src/storage.rs +++ b/roc_std/src/storage.rs @@ -1,6 +1,14 @@ use core::num::NonZeroIsize; -const REFCOUNT_1: isize = isize::MIN; +/// # Safety +/// +/// isize::MIN is definitely not zero. This can become +/// https://doc.rust-lang.org/std/num/struct.NonZeroIsize.html#associatedconstant.MIN +/// once it has been stabilized. +const REFCOUNT_1: NonZeroIsize = unsafe { NonZeroIsize::new_unchecked(isize::MIN) }; + +const _ASSERT_STORAGE_SIZE: () = + assert!(std::mem::size_of::() == std::mem::size_of::()); #[derive(Clone, Copy, Debug)] pub enum Storage { @@ -10,7 +18,7 @@ pub enum Storage { impl Storage { pub fn new_reference_counted() -> Self { - Self::ReferenceCounted(NonZeroIsize::new(REFCOUNT_1).unwrap()) + Self::ReferenceCounted(REFCOUNT_1) } /// Increment the reference count. @@ -37,11 +45,10 @@ impl Storage { match self { Storage::Readonly => false, Storage::ReferenceCounted(rc) => { - let rc_as_isize = rc.get(); - if rc_as_isize == REFCOUNT_1 { + if *rc == REFCOUNT_1 { true } else { - *rc = NonZeroIsize::new(rc_as_isize - 1).unwrap(); + *rc = NonZeroIsize::new(rc.get() - 1).unwrap(); false } } @@ -51,4 +58,8 @@ impl Storage { pub fn is_readonly(&self) -> bool { matches!(self, Self::Readonly) } + + pub fn is_unique(&self) -> bool { + matches!(self, Self::ReferenceCounted(REFCOUNT_1)) + } } diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index 2466deb628..0c510f0d09 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -7,6 +7,8 @@ extern crate roc_std; use core::ffi::c_void; +const ROC_SMALL_STR_CAPACITY: usize = core::mem::size_of::() - 1; + #[no_mangle] pub unsafe extern "C" fn roc_alloc(size: usize, _alignment: u32) -> *mut c_void { libc::malloc(size) @@ -27,10 +29,37 @@ pub unsafe extern "C" fn roc_dealloc(c_ptr: *mut c_void, _alignment: u32) { libc::free(c_ptr) } +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { + use std::ffi::CStr; + use std::os::raw::c_char; + + match tag_id { + 0 => { + let c_str = CStr::from_ptr(c_ptr as *const c_char); + let string = c_str.to_str().unwrap(); + panic!("roc_panic during test: {}", string); + } + _ => todo!(), + } +} + +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void { + libc::memcpy(dst, src, n) +} + +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void { + libc::memset(dst, c, n) +} + #[cfg(test)] mod test_roc_std { - use roc_std::RocResult; - use roc_std::RocStr; + use roc_std::{RocBox, RocDec, RocList, RocResult, RocStr}; fn roc_str_byte_representation(string: &RocStr) -> [u8; RocStr::SIZE] { unsafe { core::mem::transmute_copy(string) } @@ -86,9 +115,45 @@ mod test_roc_std { #[test] fn empty_string_capacity() { - let string = RocStr::from(""); + let string = RocStr::empty(); - assert_eq!(string.capacity(), 0); + assert_eq!(string.capacity(), super::ROC_SMALL_STR_CAPACITY); + } + + #[test] + fn reserve_small_str() { + let mut roc_str = RocStr::empty(); + + roc_str.reserve(42); + + assert_eq!(roc_str.capacity(), 42); + } + + #[test] + fn reserve_big_str() { + let mut roc_str = RocStr::empty(); + + roc_str.reserve(5000); + + assert_eq!(roc_str.capacity(), 5000); + } + + #[test] + fn reserve_small_list() { + let mut roc_list = RocList::::empty(); + + roc_list.reserve(42); + + assert_eq!(roc_list.capacity(), 42); + } + + #[test] + fn reserve_big_list() { + let mut roc_list = RocList::::empty(); + + roc_list.reserve(5000); + + assert_eq!(roc_list.capacity(), 5000); } #[test] @@ -123,4 +188,125 @@ mod test_roc_std { assert!(!roc_result.is_ok()); assert!(roc_result.is_err()); } + + #[test] + fn create_roc_box() { + let contents = 42i32; + let roc_box = RocBox::new(contents); + + assert_eq!(roc_box.into_inner(), contents) + } + + #[test] + fn roc_dec_fmt() { + assert_eq!( + format!("{}", RocDec::MIN), + "-1701411834604692317316.87303715884105728" + ); + + let half = RocDec::from_str("0.5").unwrap(); + assert_eq!(format!("{}", half), "0.5"); + + let ten = RocDec::from_str("10").unwrap(); + assert_eq!(format!("{}", ten), "10"); + + let example = RocDec::from_str("1234.5678").unwrap(); + assert_eq!(format!("{}", example), "1234.5678"); + } +} + +#[cfg(test)] +mod with_terminator { + use core::slice; + use roc_std::RocStr; + use std::ffi::CStr; + + fn verify_temp_c(string: &str, excess_capacity: usize) { + let mut roc_str = RocStr::from(string); + + if excess_capacity > 0 { + roc_str.reserve(excess_capacity); + } + + // utf8_nul_terminated + { + let answer = roc_str.clone().utf8_nul_terminated(|ptr, len| { + let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; + let c_str = CStr::from_bytes_with_nul(bytes).unwrap(); + + assert_eq!(c_str.to_str(), Ok(string)); + + 42 + }); + + assert_eq!(Ok(42), answer); + } + + // utf16_nul_terminated + { + let answer = roc_str.utf16_nul_terminated(|ptr, len| { + let bytes: &[u16] = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; + + // Verify that it's nul-terminated + assert_eq!(bytes[len], 0); + + let string = String::from_utf16(&bytes[0..len]).unwrap(); + + assert_eq!(string.as_str(), string); + + 42 + }); + + assert_eq!(Ok(42), answer); + } + } + + #[test] + fn empty_string() { + verify_temp_c("", 0); + } + + /// e.g. "a" or "abc" or "abcdefg" etc. + fn string_for_len(len: usize) -> String { + let first_index: usize = 97; // start with ASCII lowercase "a" + let bytes: Vec = (0..len) + .map(|index| { + let letter = (index % 26) + first_index; + + letter.try_into().unwrap() + }) + .collect(); + + assert_eq!(bytes.len(), len); + + // The bytes should contain no nul characters. + assert!(bytes.iter().all(|byte| *byte != 0)); + + String::from_utf8(bytes).unwrap() + } + + #[test] + fn small_strings() { + for len in 1..=super::ROC_SMALL_STR_CAPACITY { + verify_temp_c(&string_for_len(len), 0); + } + } + + #[test] + fn no_excess_capacity() { + // This is small enough that it should be a stack allocation for UTF-8 + verify_temp_c(&string_for_len(33), 0); + + // This is big enough that it should be a heap allocation for UTF-8 and UTF-16 + verify_temp_c(&string_for_len(65), 0); + } + + #[test] + fn with_excess_capacity() { + // We should be able to use the excess capacity for all of these. + verify_temp_c(&string_for_len(33), 1); + verify_temp_c(&string_for_len(33), 33); + verify_temp_c(&string_for_len(65), 1); + verify_temp_c(&string_for_len(65), 64); + } }