From 84bb2cd11d846ec509a2ae950dad894683f52dcd Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 31 May 2022 21:50:38 -0400 Subject: [PATCH 01/65] Don't use Rust std in roc_std Debug impl --- roc_std/src/lib.rs | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index a38fd75c84..45befb8924 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -1,7 +1,7 @@ #![crate_type = "lib"] // #![no_std] use core::ffi::c_void; -use core::fmt; +use core::fmt::{self, Debug}; use core::mem::{ManuallyDrop, MaybeUninit}; use core::ops::Drop; use core::str; @@ -71,15 +71,23 @@ pub struct RocResult { tag: RocResultTag, } -impl core::fmt::Debug for RocResult +impl Debug for RocResult where - T: core::fmt::Debug, - E: core::fmt::Debug, + T: Debug, + E: Debug, { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.as_result_of_refs() { - Ok(payload) => write!(f, "RocOk({:?})", payload), - Err(payload) => write!(f, "RocErr({:?})", payload), + Ok(payload) => { + f.write_str("RocOk(")?; + payload.fmt(f)?; + f.write_str(")") + } + Err(payload) => { + f.write_str("RocErr(")?; + payload.fmt(f)?; + f.write_str(")") + } } } } From 5e6f9614649188550cf39910a41070b48dc91eae Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 31 May 2022 21:51:44 -0400 Subject: [PATCH 02/65] Use core over std more consistently in roc_std --- roc_std/src/lib.rs | 67 ++++++++++++------------------------------ roc_std/src/roc_str.rs | 2 +- 2 files changed, 20 insertions(+), 49 deletions(-) diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 45befb8924..09360005e2 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -1,12 +1,13 @@ #![crate_type = "lib"] // #![no_std] + +use core::cmp::Ordering; use core::ffi::c_void; use core::fmt::{self, Debug}; +use core::hash::{Hash, Hasher}; use core::mem::{ManuallyDrop, MaybeUninit}; use core::ops::Drop; use core::str; -use std::hash::{Hash, Hasher}; -use std::io::Write; mod roc_list; mod roc_str; @@ -402,52 +403,37 @@ impl From for i128 { impl fmt::Debug for I128 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let i128: i128 = (*self).into(); - - i128.fmt(f) + i128::from(*self).fmt(f) } } impl fmt::Display for I128 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let i128: i128 = (*self).into(); - - i128.fmt(f) + Debug::fmt(&i128::from(*self), f) } } impl PartialEq for I128 { fn eq(&self, other: &Self) -> bool { - let i128_self: i128 = (*self).into(); - let i128_other: i128 = (*other).into(); - - i128_self.eq(&i128_other) + i128::from(*self).eq(&i128::from(*other)) } } impl PartialOrd for I128 { - fn partial_cmp(&self, other: &Self) -> Option { - let i128_self: i128 = (*self).into(); - let i128_other: i128 = (*other).into(); - - i128_self.partial_cmp(&i128_other) + fn partial_cmp(&self, other: &Self) -> Option { + i128::from(*self).partial_cmp(&i128::from(*other)) } } impl Ord for I128 { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - let i128_self: i128 = (*self).into(); - let i128_other: i128 = (*other).into(); - - i128_self.cmp(&i128_other) + fn cmp(&self, other: &Self) -> Ordering { + i128::from(*self).cmp(&i128::from(*other)) } } impl Hash for I128 { fn hash(&self, state: &mut H) { - let i128: i128 = (*self).into(); - - i128.hash(state); + i128::from(*self).hash(state); } } @@ -469,51 +455,36 @@ impl From for u128 { impl fmt::Debug for U128 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let u128: u128 = (*self).into(); - - u128.fmt(f) + u128::from(*self).fmt(f) } } impl fmt::Display for U128 { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let u128: u128 = (*self).into(); - - u128.fmt(f) + Debug::fmt(&u128::from(*self), f) } } impl PartialEq for U128 { fn eq(&self, other: &Self) -> bool { - let u128_self: u128 = (*self).into(); - let u128_other: u128 = (*other).into(); - - u128_self.eq(&u128_other) + u128::from(*self).eq(&u128::from(*other)) } } impl PartialOrd for U128 { - fn partial_cmp(&self, other: &Self) -> Option { - let u128_self: u128 = (*self).into(); - let u128_other: u128 = (*other).into(); - - u128_self.partial_cmp(&u128_other) + fn partial_cmp(&self, other: &Self) -> Option { + u128::from(*self).partial_cmp(&u128::from(*other)) } } impl Ord for U128 { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - let u128_self: u128 = (*self).into(); - let u128_other: u128 = (*other).into(); - - u128_self.cmp(&u128_other) + fn cmp(&self, other: &Self) -> Ordering { + u128::from(*self).cmp(&u128::from(*other)) } } impl Hash for U128 { fn hash(&self, state: &mut H) { - let u128: u128 = (*self).into(); - - u128.hash(state); + u128::from(*self).hash(state); } } diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 00f3a52fe7..f1c95e0f7f 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -3,10 +3,10 @@ use core::{ convert::TryFrom, fmt::Debug, + hash::Hash, mem::{size_of, ManuallyDrop}, ops::{Deref, DerefMut}, }; -use std::hash::Hash; use crate::RocList; From 819ae960eec4c8e7371f058cc86b9acd3ee44b4a Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 31 May 2022 21:52:18 -0400 Subject: [PATCH 03/65] Add no_std feature to roc_std --- roc_std/Cargo.toml | 1 + roc_std/src/lib.rs | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/roc_std/Cargo.toml b/roc_std/Cargo.toml index 5f18ae3060..20b9cb6fd6 100644 --- a/roc_std/Cargo.toml +++ b/roc_std/Cargo.toml @@ -21,3 +21,4 @@ libc = "0.2.106" [features] default = ["platform"] platform = [] +no_std = [] diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 09360005e2..a35e4533c5 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -1,5 +1,5 @@ #![crate_type = "lib"] -// #![no_std] +#![cfg_attr(feature = "no_std", no_std)] use core::cmp::Ordering; use core::ffi::c_void; @@ -315,19 +315,25 @@ impl RocDec { self.0 } + #[cfg(not(feature = "no_std"))] fn to_str_helper(&self, bytes: &mut [u8; Self::MAX_STR_LENGTH]) -> usize { + // TODO there is probably some way to implement this logic without std::io::Write, + // which in turn would make this method work with no_std. + use std::io::Write; + if self.as_i128() == 0 { write!(&mut bytes[..], "{}", "0").unwrap(); + return 1; } let is_negative = (self.as_i128() < 0) as usize; - static_assertions::const_assert!(Self::DECIMAL_PLACES + 1 == 19); // The :019 in the following write! is computed as Self::DECIMAL_PLACES + 1. If you change // Self::DECIMAL_PLACES, this assert should remind you to change that format string as // well. - // + static_assertions::const_assert!(Self::DECIMAL_PLACES + 1 == 19); + // By using the :019 format, we're guaranteeing that numbers less than 1, say 0.01234 // get their leading zeros placed in bytes for us. i.e. bytes = b"0012340000000000000" write!(&mut bytes[..], "{:019}", self.as_i128()).unwrap(); @@ -369,6 +375,7 @@ impl RocDec { ret + 1 } + #[cfg(not(feature = "no_std"))] // to_str_helper currently uses std, but might not need to. pub fn to_str(&self) -> RocStr { let mut bytes = [0 as u8; Self::MAX_STR_LENGTH]; let last_idx = self.to_str_helper(&mut bytes); @@ -376,6 +383,7 @@ impl RocDec { } } +#[cfg(not(feature = "no_std"))] // to_str_helper currently uses std, but might not need to. impl fmt::Display for RocDec { fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result { let mut bytes = [0 as u8; Self::MAX_STR_LENGTH]; From ab99a583a82e30004e3bb9825980d90447266620 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 31 May 2022 22:13:08 -0400 Subject: [PATCH 04/65] Rename some functions to make it clearer they're unchecked --- .../src/helpers/from_wasmer_memory.rs | 2 +- compiler/test_gen/src/wasm_str.rs | 66 +++++++++---------- roc_std/src/lib.rs | 2 +- roc_std/src/roc_str.rs | 8 +-- 4 files changed, 39 insertions(+), 39 deletions(-) diff --git a/compiler/test_gen/src/helpers/from_wasmer_memory.rs b/compiler/test_gen/src/helpers/from_wasmer_memory.rs index 489ab2d541..e8f5a3da5f 100644 --- a/compiler/test_gen/src/helpers/from_wasmer_memory.rs +++ b/compiler/test_gen/src/helpers/from_wasmer_memory.rs @@ -70,7 +70,7 @@ impl FromWasmerMemory for RocStr { &memory_bytes[big_elem_ptr..][..big_length] }; - unsafe { RocStr::from_slice(slice) } + unsafe { RocStr::from_slice_unchecked(slice) } } } diff --git a/compiler/test_gen/src/wasm_str.rs b/compiler/test_gen/src/wasm_str.rs index 18466ec2a4..94ee2dc116 100644 --- a/compiler/test_gen/src/wasm_str.rs +++ b/compiler/test_gen/src/wasm_str.rs @@ -90,7 +90,7 @@ use roc_std::{RocList, RocStr}; // "# // ), -// RocStr::from_slice(b"JJJJJJJJJJJJJJJJJJJJJJJJJ"), +// RocStr::from_slice_unchecked(b"JJJJJJJJJJJJJJJJJJJJJJJJJ"), // RocStr // ); // } @@ -108,7 +108,7 @@ use roc_std::{RocList, RocStr}; // _ -> "" // "# // ), -// RocStr::from_slice(b"JJJ"), +// RocStr::from_slice_unchecked(b"JJJ"), // RocStr // ); // } @@ -122,8 +122,8 @@ use roc_std::{RocList, RocStr}; // "# // ), // RocList::from_slice(&[ -// RocStr::from_slice(b"01234567789abcdefghi"), -// RocStr::from_slice(b"01234567789abcdefghi") +// RocStr::from_slice_unchecked(b"01234567789abcdefghi"), +// RocStr::from_slice_unchecked(b"01234567789abcdefghi") // ]), // RocList // ); @@ -135,8 +135,8 @@ use roc_std::{RocList, RocStr}; // "# // ), // RocList::from_slice(&[ -// RocStr::from_slice(b"01234567789abcdefghi "), -// RocStr::from_slice(b" 01234567789abcdefghi") +// RocStr::from_slice_unchecked(b"01234567789abcdefghi "), +// RocStr::from_slice_unchecked(b" 01234567789abcdefghi") // ]), // RocList // ); @@ -151,9 +151,9 @@ use roc_std::{RocList, RocStr}; // "# // ), // RocList::from_slice(&[ -// RocStr::from_slice(b"J"), -// RocStr::from_slice(b"J"), -// RocStr::from_slice(b"J") +// RocStr::from_slice_unchecked(b"J"), +// RocStr::from_slice_unchecked(b"J"), +// RocStr::from_slice_unchecked(b"J") // ]), // RocList // ); @@ -169,7 +169,7 @@ use roc_std::{RocList, RocStr}; // "than the delimiter which happens to be very very long" // "# // ), -// RocList::from_slice(&[RocStr::from_slice(b"string to split is shorter")]), +// RocList::from_slice(&[RocStr::from_slice_unchecked(b"string to split is shorter")]), // RocList // ); // } @@ -182,7 +182,7 @@ use roc_std::{RocList, RocStr}; // Str.split "" "" // "# // ), -// RocList::from_slice(&[RocStr::from_slice(b"")]), +// RocList::from_slice(&[RocStr::from_slice_unchecked(b"")]), // RocList // ); // } @@ -195,7 +195,7 @@ use roc_std::{RocList, RocStr}; // Str.split "a," "," // "# // ), -// RocList::from_slice(&[RocStr::from_slice(b"a"), RocStr::from_slice(b"")]), +// RocList::from_slice(&[RocStr::from_slice_unchecked(b"a"), RocStr::from_slice_unchecked(b"")]), // RocList // ) // } @@ -224,9 +224,9 @@ use roc_std::{RocList, RocStr}; // "# // ), // RocList::from_slice(&[ -// RocStr::from_slice(b"1"), -// RocStr::from_slice(b"2"), -// RocStr::from_slice(b"") +// RocStr::from_slice_unchecked(b"1"), +// RocStr::from_slice_unchecked(b"2"), +// RocStr::from_slice_unchecked(b"") // ]), // RocList // ); @@ -243,9 +243,9 @@ use roc_std::{RocList, RocStr}; // "# // ), // RocList::from_slice(&[ -// RocStr::from_slice(b"3"), -// RocStr::from_slice(b"4"), -// RocStr::from_slice(b"") +// RocStr::from_slice_unchecked(b"3"), +// RocStr::from_slice_unchecked(b"4"), +// RocStr::from_slice_unchecked(b"") // ]), // RocList // ); @@ -261,7 +261,7 @@ use roc_std::{RocList, RocStr}; // "Second string that is also fairly long. Two long strings test things that might not appear with short strings." // "# // ), -// RocStr::from_slice(b"First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."), +// RocStr::from_slice_unchecked(b"First string that is fairly long. Longer strings make for different errors. Second string that is also fairly long. Two long strings test things that might not appear with short strings."), // RocStr // ); // } @@ -498,7 +498,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -513,7 +513,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("abc~".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("abc~".as_bytes()), // roc_std::RocStr // ); // } @@ -528,7 +528,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("∆".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("∆".as_bytes()), // roc_std::RocStr // ); // } @@ -543,7 +543,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("∆œ¬".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("∆œ¬".as_bytes()), // roc_std::RocStr // ); // } @@ -558,7 +558,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("💖".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("💖".as_bytes()), // roc_std::RocStr // ); // } @@ -573,7 +573,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("💖🤠🚀".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("💖🤠🚀".as_bytes()), // roc_std::RocStr // ); // } @@ -588,7 +588,7 @@ fn str_starts_with_false_small_str() { // Err _ -> "" // "# // ), -// roc_std::RocStr::from_slice("💖b∆".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("💖b∆".as_bytes()), // roc_std::RocStr // ); // } @@ -607,7 +607,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -626,7 +626,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -645,7 +645,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -664,7 +664,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -683,7 +683,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -702,7 +702,7 @@ fn str_starts_with_false_small_str() { // _ -> "" // "# // ), -// roc_std::RocStr::from_slice("a".as_bytes()), +// roc_std::RocStr::from_slice_unchecked("a".as_bytes()), // roc_std::RocStr // ); // } @@ -744,7 +744,7 @@ fn str_equality() { // printExpr expr // "# // ), -// RocStr::from_slice(b"Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"), +// RocStr::from_slice_unchecked(b"Add (Add (Val 3) (Val 1)) (Add (Val 1) (Var 1))"), // RocStr // ); // } diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index a35e4533c5..37ccba18f4 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -379,7 +379,7 @@ impl RocDec { pub fn to_str(&self) -> RocStr { let mut bytes = [0 as u8; Self::MAX_STR_LENGTH]; let last_idx = self.to_str_helper(&mut bytes); - unsafe { RocStr::from_slice(&bytes[0..last_idx]) } + unsafe { RocStr::from_slice_unchecked(&bytes[0..last_idx]) } } } diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index f1c95e0f7f..5af7766c9e 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -28,8 +28,8 @@ impl RocStr { /// # Safety /// /// `slice` must be valid UTF-8. - pub unsafe fn from_slice(slice: &[u8]) -> Self { - if let Some(small_string) = unsafe { SmallString::try_from(slice) } { + pub unsafe fn from_slice_unchecked(slice: &[u8]) -> Self { + if let Some(small_string) = unsafe { SmallString::try_from_utf8_bytes(slice) } { Self(RocStrInner { small_string }) } else { let heap_allocated = RocList::from_slice(slice); @@ -86,7 +86,7 @@ impl Default for RocStr { impl From<&str> for RocStr { fn from(s: &str) -> Self { - unsafe { Self::from_slice(s.as_bytes()) } + unsafe { Self::from_slice_unchecked(s.as_bytes()) } } } @@ -168,7 +168,7 @@ impl SmallString { /// # Safety /// /// `slice` must be valid UTF-8. - unsafe fn try_from(slice: &[u8]) -> Option { + unsafe fn try_from_utf8_bytes(slice: &[u8]) -> Option { // Check the size of the slice. let len_as_u8 = u8::try_from(slice.len()).ok()?; if (len_as_u8 as usize) > Self::CAPACITY { From 225d5b574167c69482dc649ae864981a318a6e73 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 31 May 2022 22:16:01 -0400 Subject: [PATCH 05/65] Add TryFrom<&CStr> and CString for RocStr --- roc_std/src/roc_str.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 5af7766c9e..f5917a19d3 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -8,6 +8,9 @@ use core::{ ops::{Deref, DerefMut}, }; +#[cfg(not(feature = "no_std"))] +use std::ffi::{CStr, CString}; + use crate::RocList; #[repr(transparent)] @@ -78,6 +81,24 @@ impl Deref for RocStr { } } +#[cfg(not(feature = "no_std"))] +impl core::convert::TryFrom<&CStr> for RocStr { + type Error = core::str::Utf8Error; + + fn try_from(c_str: &CStr) -> Result { + c_str.to_str().map(RocStr::from) + } +} + +#[cfg(not(feature = "no_std"))] +impl core::convert::TryFrom for RocStr { + type Error = core::str::Utf8Error; + + fn try_from(c_string: CString) -> Result { + c_string.to_str().map(RocStr::from) + } +} + impl Default for RocStr { fn default() -> Self { Self::empty() From 94c9a592fcb714d9c8958f93596d501ca520dc52 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 10:03:51 -0400 Subject: [PATCH 06/65] Use libc in roc_std except when no_std is enabled --- Cargo.lock | 1 + roc_std/Cargo.lock | 7 +++++++ roc_std/Cargo.toml | 5 +++-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 57203a4c28..81117d6e3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4110,6 +4110,7 @@ dependencies = [ name = "roc_std" version = "0.1.0" dependencies = [ + "libc", "static_assertions 0.1.1", ] diff --git a/roc_std/Cargo.lock b/roc_std/Cargo.lock index 5494d0afd5..fe1a8e26d8 100644 --- a/roc_std/Cargo.lock +++ b/roc_std/Cargo.lock @@ -198,8 +198,15 @@ dependencies = [ "pretty_assertions", "quickcheck", "quickcheck_macros", + "static_assertions", ] +[[package]] +name = "static_assertions" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f406d6ee68db6796e11ffd7b4d171864c58b7451e79ef9460ea33c287a1f89a7" + [[package]] name = "syn" version = "1.0.86" diff --git a/roc_std/Cargo.toml b/roc_std/Cargo.toml index 20b9cb6fd6..89859b4bc0 100644 --- a/roc_std/Cargo.toml +++ b/roc_std/Cargo.toml @@ -10,15 +10,16 @@ version = "0.1.0" [dependencies] static_assertions = "0.1" +libc = { version = "0.2.106", optional = true } [dev-dependencies] indoc = "1.0.3" pretty_assertions = "1.0.0" quickcheck = "1.0.3" quickcheck_macros = "1.0.0" -libc = "0.2.106" [features] -default = ["platform"] +default = ["platform", "std"] platform = [] no_std = [] +std = ["libc"] From 1f086c464cab060e82ff7d2c887b37aff64eb106 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 10:05:52 -0400 Subject: [PATCH 07/65] Link roc_memcpy and roc_memset in roc_std --- roc_std/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 37ccba18f4..adc494318d 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -28,6 +28,8 @@ extern "C" { alignment: u32, ) -> *mut c_void; pub fn roc_dealloc(ptr: *mut c_void, alignment: u32); + pub fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void; + pub fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void; } /// # Safety From a456e51a6e38fc8da8564a5ffb34db52c811eca0 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 10:06:30 -0400 Subject: [PATCH 08/65] Add to_cstring and to_cstring_unchecked to RocStr --- roc_std/src/roc_list.rs | 11 ++++++ roc_std/src/roc_str.rs | 87 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 97 insertions(+), 1 deletion(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 5abdfa69be..01559f024f 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -3,6 +3,7 @@ use core::{ cell::Cell, cmp::{self, Ordering}, + ffi::c_void, fmt::Debug, intrinsics::copy_nonoverlapping, mem::{self, ManuallyDrop}, @@ -50,6 +51,16 @@ impl RocList { let storage = unsafe { &*elements.as_ptr().cast::>().sub(1) }; Some((elements, storage)) } + + /// Useful for doing memcpy on the elements. Returns NULL if list is empty. + pub(crate) unsafe fn ptr_to_first_elem(&self) -> *const c_void { + unsafe { core::mem::transmute(self.elements) } + } + + /// Useful for doing memcpy on the underlying allocation. Returns NULL if list is empty. + pub(crate) unsafe fn ptr_to_allocation(&self) -> *const c_void { + unsafe { (self.ptr_to_first_elem() as *const usize).sub(1).cast() } + } } impl RocList diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index f5917a19d3..54274f9b12 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -11,7 +11,7 @@ use core::{ #[cfg(not(feature = "no_std"))] use std::ffi::{CStr, CString}; -use crate::RocList; +use crate::{roc_alloc, roc_memcpy, RocList}; #[repr(transparent)] pub struct RocStr(RocStrInner); @@ -54,6 +54,13 @@ impl RocStr { } } + fn iter_bytes(&self) -> impl ExactSizeIterator + '_ { + match self.as_enum_ref() { + RocStrInnerRef::HeapAllocated(roc_list) => roc_list.iter().copied(), + RocStrInnerRef::SmallString(small_str) => small_str.bytes.iter().copied(), + } + } + pub fn len(&self) -> usize { match self.as_enum_ref() { RocStrInnerRef::HeapAllocated(h) => h.len(), @@ -68,6 +75,80 @@ impl RocStr { pub fn as_str(&self) -> &str { &*self } + + #[cfg(not(feature = "no_std"))] + pub fn to_cstring(self) -> Option { + if self.iter_bytes().any(|byte| byte == 0) { + None + } else { + Some(unsafe { self.to_cstring_unchecked() }) + } + } + + /// C strings must not have any \0 bytes in them. This does not check for that, + /// and instead assumes that the RocStr has no \0 bytes in the middle. + #[cfg(not(feature = "no_std"))] + pub unsafe fn to_cstring_unchecked(self) -> CString { + use crate::Storage; + use core::{ffi::c_void, mem}; + + let len; + let alloc_ptr = match self.as_enum_ref() { + RocStrInnerRef::HeapAllocated(roc_list) => { + len = roc_list.len(); + + // We already have an allocation that's even bigger than necessary, because + // the refcount bytes take up more than the 1B needed for the \0 at the end. + unsafe { + let alloc_ptr = roc_list.ptr_to_allocation() as *mut libc::c_void; + + // First, copy the bytes over the original allocation - effectively scooting + // everything over by one `usize`. Now we no longer have a refcount (but the + // CString wouldn't use that anyway), but we do have a free `usize` at the end. + // + // IMPORTANT: Must use memmove instead of memcpy because the regions overlap. + // Passing overlapping regions to memcpy is undefined behavior! + libc::memmove(alloc_ptr, roc_list.ptr_to_first_elem(), len); + + alloc_ptr + } + } + RocStrInnerRef::SmallString(small_str) => { + let align = mem::align_of::() as u32; + + len = small_str.len(); + + // Make a new allocation, then copy the bytes over and null-terminate. + unsafe { + // Use len + 1 to make room for the null terminateor. + let alloc_ptr = roc_alloc(len + 1, align); + + // Copy the contents of the small string into the new allocation + roc_memcpy(alloc_ptr, small_str.bytes_ptr() as *mut c_void, len); + + // Null-terminate + *((alloc_ptr as *mut u8).add(len)) = 0; + + alloc_ptr + } + } + }; + + let c_string = unsafe { + // Null-terminate the string by writing a zero to the end, where we now + // have a free `usize` worth of space. Don't write an entire `usize` in + // there, because it might not be aligned properly (depending on whether + // len() happens to be aligned to `usize`). Null-termination only needs + // 1 byte anyway. + *(alloc_ptr as *mut u8).add(len) = 0; + + CString::from_raw(alloc_ptr as *mut std::os::raw::c_char) + }; + + core::mem::forget(self); + + c_string + } } impl Deref for RocStr { @@ -212,6 +293,10 @@ impl SmallString { fn len(&self) -> usize { usize::from(self.len & !RocStr::MASK) } + + fn bytes_ptr(&self) -> *const u8 { + self.bytes.as_ptr() + } } impl Deref for SmallString { From 88cbb9fafd23ada7908b1b10a3369532c25d7b3b Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 12:02:22 -0400 Subject: [PATCH 09/65] Make RocList::ptr_to_first_elem return T over c_void --- roc_std/src/roc_list.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 01559f024f..250886010e 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -53,7 +53,7 @@ impl RocList { } /// Useful for doing memcpy on the elements. Returns NULL if list is empty. - pub(crate) unsafe fn ptr_to_first_elem(&self) -> *const c_void { + pub(crate) unsafe fn ptr_to_first_elem(&self) -> *const T { unsafe { core::mem::transmute(self.elements) } } From 7cb7a5889962891c9770f4deec1ef300e1d88bd1 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 12:02:53 -0400 Subject: [PATCH 10/65] Replace less-safe/useful functions with into_temp_c_str --- roc_std/src/roc_str.rs | 170 ++++++++++++++++++++++++----------------- 1 file changed, 100 insertions(+), 70 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 54274f9b12..56c94c063c 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -11,7 +11,7 @@ use core::{ #[cfg(not(feature = "no_std"))] use std::ffi::{CStr, CString}; -use crate::{roc_alloc, roc_memcpy, RocList}; +use crate::RocList; #[repr(transparent)] pub struct RocStr(RocStrInner); @@ -54,10 +54,10 @@ impl RocStr { } } - fn iter_bytes(&self) -> impl ExactSizeIterator + '_ { + pub fn capacity(&self) -> usize { match self.as_enum_ref() { - RocStrInnerRef::HeapAllocated(roc_list) => roc_list.iter().copied(), - RocStrInnerRef::SmallString(small_str) => small_str.bytes.iter().copied(), + RocStrInnerRef::HeapAllocated(roc_list) => roc_list.capacity(), + RocStrInnerRef::SmallString(_) => SmallString::CAPACITY, } } @@ -76,78 +76,97 @@ impl RocStr { &*self } - #[cfg(not(feature = "no_std"))] - pub fn to_cstring(self) -> Option { - if self.iter_bytes().any(|byte| byte == 0) { - None - } else { - Some(unsafe { self.to_cstring_unchecked() }) + /// Returns the index of the first interior \0 byte in the string, or None if there are none. + fn first_nul_byte(&self) -> Option { + match self.as_enum_ref() { + RocStrInnerRef::HeapAllocated(roc_list) => roc_list.iter().position(|byte| *byte == 0), + RocStrInnerRef::SmallString(small_string) => small_string.first_nul_byte(), } } - /// C strings must not have any \0 bytes in them. This does not check for that, - /// and instead assumes that the RocStr has no \0 bytes in the middle. + /// Without allocating any new heap memory, destructively turn this RocStr into a + /// &CStr and then provide access to that &CStr for the duration of a given function. + /// + /// Because this works on an owned RocStr, it's able to overwrite the underlying bytes + /// to null-terminate the string in-place. Small strings have an extra byte at the end + /// where the length is stored, which can become 0 for null-termination. Heap-allocated + /// strings can have excess capacity which can hold a null termiator, or if they have no + /// excess capacity, all the bytes can be shifted over the refcount in order to free up + /// a `usize` worth of free space at the end - which can easily fit a null terminator. + /// + /// This operation can fail because a RocStr may contain \0 characters, which a CStr cannot. + /// Also it can fail due to the RocStr not being null-terminated, which it might + /// coincidentally be if it has excess capacity and the first byte of excess capacity is 0. #[cfg(not(feature = "no_std"))] - pub unsafe fn to_cstring_unchecked(self) -> CString { - use crate::Storage; - use core::{ffi::c_void, mem}; + pub fn into_temp_c_str T>(self, func: F) -> Result { + let opt_first_nul_byte_pos = self.first_nul_byte(); - let len; - let alloc_ptr = match self.as_enum_ref() { - RocStrInnerRef::HeapAllocated(roc_list) => { - len = roc_list.len(); + if let Some(pos) = opt_first_nul_byte_pos { + Err(InteriorNulError { pos, roc_str: self }) + } else { + match self.as_enum_ref() { + RocStrInnerRef::HeapAllocated(roc_list) => { + let len = roc_list.len(); - // We already have an allocation that's even bigger than necessary, because - // the refcount bytes take up more than the 1B needed for the \0 at the end. - unsafe { - let alloc_ptr = roc_list.ptr_to_allocation() as *mut libc::c_void; + unsafe { + if len == 0 { + // No need to do a heap allocation for an empty &CStr - we + // can just do a stack allocation that will live for the + // duration of the function. + let c_str = CStr::from_bytes_with_nul_unchecked(&[0]); - // First, copy the bytes over the original allocation - effectively scooting - // everything over by one `usize`. Now we no longer have a refcount (but the - // CString wouldn't use that anyway), but we do have a free `usize` at the end. - // - // IMPORTANT: Must use memmove instead of memcpy because the regions overlap. - // Passing overlapping regions to memcpy is undefined behavior! - libc::memmove(alloc_ptr, roc_list.ptr_to_first_elem(), len); + Ok(func(c_str)) + } else if len < roc_list.capacity() { + // We happen to have excess capacity, so write a null terminator + // into the first byte of excess capacity. + let ptr = roc_list.ptr_to_first_elem(); - alloc_ptr + *((ptr.add(len)) as *mut u8) = 0; + + let c_str = CStr::from_bytes_with_nul_unchecked(roc_list.as_slice()); + + Ok(func(c_str)) + } else { + // We always have an allocation that's even bigger than necessary, + // because the refcount bytes take up more than the 1B needed for + // the \0 at the end. We just need to shift the bytes over on top + // of the refcount. + let alloc_ptr = roc_list.ptr_to_allocation() as *mut libc::c_void; + + // First, copy the bytes over the original allocation - effectively + // shifting everything over by one `usize`. Now we no longer have a + // refcount (but the &CStr won't use that anyway), but we do have a + // free `usize` at the end. + // + // IMPORTANT: Must use memmove instead of memcpy because the regions + // overlap. Passing overlapping regions to memcpy is undefined! + libc::memmove(alloc_ptr, roc_list.ptr_to_first_elem().cast(), len); + + let bytes = std::slice::from_raw_parts(alloc_ptr.cast(), len); + let c_str = CStr::from_bytes_with_nul_unchecked(bytes); + + Ok(func(c_str)) + } + } + } + RocStrInnerRef::SmallString(small_str) => { + // Set the length byte to 0 to guarantee null-termination. + // We may alredy happen to be null-terminated if small_str.len() is + // less than the maximum small string length, but we can't assume that, + // and checking first would be more expensive than always writing the 0. + unsafe { + let mut bytes = small_str.bytes; + let ptr = bytes.as_mut_ptr(); + + *((ptr.add(small_str.len())) as *mut u8) = 0; + + let c_str = CStr::from_bytes_with_nul_unchecked(&bytes); + + Ok(func(c_str)) + } } } - RocStrInnerRef::SmallString(small_str) => { - let align = mem::align_of::() as u32; - - len = small_str.len(); - - // Make a new allocation, then copy the bytes over and null-terminate. - unsafe { - // Use len + 1 to make room for the null terminateor. - let alloc_ptr = roc_alloc(len + 1, align); - - // Copy the contents of the small string into the new allocation - roc_memcpy(alloc_ptr, small_str.bytes_ptr() as *mut c_void, len); - - // Null-terminate - *((alloc_ptr as *mut u8).add(len)) = 0; - - alloc_ptr - } - } - }; - - let c_string = unsafe { - // Null-terminate the string by writing a zero to the end, where we now - // have a free `usize` worth of space. Don't write an entire `usize` in - // there, because it might not be aligned properly (depending on whether - // len() happens to be aligned to `usize`). Null-termination only needs - // 1 byte anyway. - *(alloc_ptr as *mut u8).add(len) = 0; - - CString::from_raw(alloc_ptr as *mut std::os::raw::c_char) - }; - - core::mem::forget(self); - - c_string + } } } @@ -162,8 +181,9 @@ impl Deref for RocStr { } } +/// This can fail because a CStr may contain invalid UTF-8 characters #[cfg(not(feature = "no_std"))] -impl core::convert::TryFrom<&CStr> for RocStr { +impl TryFrom<&CStr> for RocStr { type Error = core::str::Utf8Error; fn try_from(c_str: &CStr) -> Result { @@ -171,8 +191,9 @@ impl core::convert::TryFrom<&CStr> for RocStr { } } +/// This can fail because a CString may contain invalid UTF-8 characters #[cfg(not(feature = "no_std"))] -impl core::convert::TryFrom for RocStr { +impl TryFrom for RocStr { type Error = core::str::Utf8Error; fn try_from(c_string: CString) -> Result { @@ -180,6 +201,14 @@ impl core::convert::TryFrom for RocStr { } } +#[cfg(not(feature = "no_std"))] +/// Like https://doc.rust-lang.org/std/ffi/struct.NulError.html but +/// only for interior nulls, not for missing null terminators. +pub struct InteriorNulError { + pub pos: usize, + pub roc_str: RocStr, +} + impl Default for RocStr { fn default() -> Self { Self::empty() @@ -294,8 +323,9 @@ impl SmallString { usize::from(self.len & !RocStr::MASK) } - fn bytes_ptr(&self) -> *const u8 { - self.bytes.as_ptr() + /// Returns the index of the first interior \0 byte in the string, or None if there are none. + fn first_nul_byte(&self) -> Option { + self.bytes.iter().position(|byte| *byte == 0) } } From 695cb7b1545a6405aeee61dc300e3e7014045ce5 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 15:50:31 -0400 Subject: [PATCH 11/65] Avoid a conditional in roc_std::Storage --- roc_std/src/storage.rs | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/roc_std/src/storage.rs b/roc_std/src/storage.rs index 2be9976fca..cd1d265c3a 100644 --- a/roc_std/src/storage.rs +++ b/roc_std/src/storage.rs @@ -1,6 +1,11 @@ use core::num::NonZeroIsize; -const REFCOUNT_1: isize = isize::MIN; +/// # Safety +/// +/// isize::MIN is definitely not zero. This can become +/// https://doc.rust-lang.org/std/num/struct.NonZeroIsize.html#associatedconstant.MIN +/// once it has become stable. +const REFCOUNT_1: NonZeroIsize = unsafe { NonZeroIsize::new_unchecked(isize::MIN) }; #[derive(Clone, Copy, Debug)] pub enum Storage { @@ -10,7 +15,7 @@ pub enum Storage { impl Storage { pub fn new_reference_counted() -> Self { - Self::ReferenceCounted(NonZeroIsize::new(REFCOUNT_1).unwrap()) + Self::ReferenceCounted(REFCOUNT_1) } /// Increment the reference count. @@ -37,11 +42,10 @@ impl Storage { match self { Storage::Readonly => false, Storage::ReferenceCounted(rc) => { - let rc_as_isize = rc.get(); - if rc_as_isize == REFCOUNT_1 { + if *rc == REFCOUNT_1 { true } else { - *rc = NonZeroIsize::new(rc_as_isize - 1).unwrap(); + *rc = NonZeroIsize::new(rc.get() - 1).unwrap(); false } } From 7e49724687ea74fe069ac1769baeecb1aeaec409 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 15:50:43 -0400 Subject: [PATCH 12/65] Add roc_std::Storage::is_unique --- roc_std/src/storage.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/roc_std/src/storage.rs b/roc_std/src/storage.rs index cd1d265c3a..4332f8581c 100644 --- a/roc_std/src/storage.rs +++ b/roc_std/src/storage.rs @@ -4,7 +4,7 @@ use core::num::NonZeroIsize; /// /// isize::MIN is definitely not zero. This can become /// https://doc.rust-lang.org/std/num/struct.NonZeroIsize.html#associatedconstant.MIN -/// once it has become stable. +/// once it has been stabilized. const REFCOUNT_1: NonZeroIsize = unsafe { NonZeroIsize::new_unchecked(isize::MIN) }; #[derive(Clone, Copy, Debug)] @@ -55,4 +55,8 @@ impl Storage { pub fn is_readonly(&self) -> bool { matches!(self, Self::Readonly) } + + pub fn is_unique(&self) -> bool { + matches!(self, Self::ReferenceCounted(REFCOUNT_1)) + } } From cc951a38be043d16f65f8672ddffc7255756346b Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 16:35:20 -0400 Subject: [PATCH 13/65] Add some roc_std documentation --- roc_std/src/roc_list.rs | 9 +++++++++ roc_std/src/roc_str.rs | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 250886010e..64ab755b95 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -42,6 +42,15 @@ impl RocList { self.len() == 0 } + /// Note that there is no way to convert directly to a Vec. + /// + /// This is because RocList values are not allocated using the system allocator, so + /// handing off any heap-allocated bytes to a Vec would not work because its Drop + /// implementation would try to free those bytes using the wrong allocator. + /// + /// Instead, if you want a Rust Vec, you need to do a fresh allocation and copy the + /// bytes over - in other words, calling this `as_slice` method and then calling `to_vec` + /// on that. pub fn as_slice(&self) -> &[T] { &*self } diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 56c94c063c..38ce496e35 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -72,6 +72,15 @@ impl RocStr { self.len() == 0 } + /// Note that there is no way to convert directly to a String. + /// + /// This is because RocStr values are not allocated using the system allocator, so + /// handing off any heap-allocated bytes to a String would not work because its Drop + /// implementation would try to free those bytes using the wrong allocator. + /// + /// Instead, if you want a Rust String, you need to do a fresh allocation and copy the + /// bytes over - in other words, calling this `as_str` method and then calling `to_string` + /// on that. pub fn as_str(&self) -> &str { &*self } From 6ea8ef4fd046924eb944ee0a24f41d279d5fbb14 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 16:35:34 -0400 Subject: [PATCH 14/65] Handle unique refcounts in into_temp_c_str --- roc_std/src/roc_list.rs | 4 +- roc_std/src/roc_str.rs | 175 ++++++++++++++++++++++++++++------------ 2 files changed, 125 insertions(+), 54 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 64ab755b95..25006f6583 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -55,7 +55,9 @@ impl RocList { &*self } - fn elements_and_storage(&self) -> Option<(NonNull>, &Cell)> { + pub(crate) fn elements_and_storage( + &self, + ) -> Option<(NonNull>, &Cell)> { let elements = self.elements?; let storage = unsafe { &*elements.as_ptr().cast::>().sub(1) }; Some((elements, storage)) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 38ce496e35..b602967d31 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -93,8 +93,14 @@ impl RocStr { } } - /// Without allocating any new heap memory, destructively turn this RocStr into a - /// &CStr and then provide access to that &CStr for the duration of a given function. + // If the string is under this many bytes, the into_temp_c_str method will allocate the + // CStr on the stack when the RocStr is non-unique. + const TEMP_CSTR_MAX_STACK_BYTES: usize = 64; + + /// Turn this RocStr into a &CStr and then provide access to that &CStr for the duration + /// of a given function. This does not allocate when given a small string or a string with + /// unique refcount, but may allocate when given a large string with non-unique refcount. + /// (It will do a stack allocation if the string is under 64 bytes.) /// /// Because this works on an owned RocStr, it's able to overwrite the underlying bytes /// to null-terminate the string in-place. Small strings have an extra byte at the end @@ -108,71 +114,134 @@ impl RocStr { /// coincidentally be if it has excess capacity and the first byte of excess capacity is 0. #[cfg(not(feature = "no_std"))] pub fn into_temp_c_str T>(self, func: F) -> Result { - let opt_first_nul_byte_pos = self.first_nul_byte(); + use core::mem::MaybeUninit; - if let Some(pos) = opt_first_nul_byte_pos { - Err(InteriorNulError { pos, roc_str: self }) - } else { - match self.as_enum_ref() { - RocStrInnerRef::HeapAllocated(roc_list) => { - let len = roc_list.len(); + use crate::{roc_alloc, roc_dealloc}; - unsafe { - if len == 0 { + if let Some(pos) = self.first_nul_byte() { + return Err(InteriorNulError { pos, roc_str: self }); + } + + match self.as_enum_ref() { + RocStrInnerRef::HeapAllocated(roc_list) => { + unsafe { + match roc_list.elements_and_storage() { + Some((_, storage)) if storage.get().is_unique() => { + // The backing RocList was unique, so we can mutate it in-place. + let len = roc_list.len(); + + if len < roc_list.capacity() { + // We happen to have excess capacity, so write a null terminator + // into the first byte of excess capacity. + let ptr = roc_list.ptr_to_first_elem(); + + *((ptr.add(len)) as *mut u8) = 0; + + let c_str = + CStr::from_bytes_with_nul_unchecked(roc_list.as_slice()); + + Ok(func(c_str)) + } else { + // We always have an allocation that's even bigger than necessary, + // because the refcount bytes take up more than the 1B needed for + // the \0 at the end. We just need to shift the bytes over on top + // of the refcount. + let alloc_ptr = roc_list.ptr_to_allocation() as *mut _; + + // First, copy the bytes over the original allocation - effectively + // shifting everything over by one `usize`. Now we no longer have a + // refcount (but the &CStr won't use that anyway), but we do have a + // free `usize` at the end. + // + // IMPORTANT: Must use memmove instead of memcpy because the regions + // overlap. Passing overlapping regions to memcpy is undefined! + libc::memmove(alloc_ptr, roc_list.ptr_to_first_elem().cast(), len); + + let bytes = std::slice::from_raw_parts(alloc_ptr.cast(), len); + let c_str = CStr::from_bytes_with_nul_unchecked(bytes); + + Ok(func(c_str)) + } + } + Some(_) => { + use crate::roc_memcpy; + + // The backing list was not unique, so we can't mutate it in-place. + let len = roc_list.len(); + + if len < Self::TEMP_CSTR_MAX_STACK_BYTES { + // TODO: once https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.uninit_array + // has become stabilized, use that here in order to do a precise + // stack allocation instead of always over-allocating to 64B. + let mut bytes: MaybeUninit<[u8; Self::TEMP_CSTR_MAX_STACK_BYTES]> = + MaybeUninit::uninit(); + let alloc_ptr = bytes.as_mut_ptr() as *mut u8; + let elem_ptr = roc_list.ptr_to_first_elem() as *mut _; + + // memcpy the bytes into the stack allocation + roc_memcpy(alloc_ptr.cast(), elem_ptr, len); + + // Null-terminate the new allocation. + *(alloc_ptr.add(len)) = 0; + + // Convert the bytes to &CStr + let bytes = std::slice::from_raw_parts(alloc_ptr.cast(), len); + let c_str = CStr::from_bytes_with_nul_unchecked(bytes); + + Ok(func(c_str)) + } else { + // The string is too long to stack-allocate, so + // do a heap allocation and then free it afterwards. + let align = core::mem::align_of::() as u32; + let alloc_ptr = roc_alloc(len, align); + let elem_ptr = roc_list.ptr_to_first_elem() as *mut _; + + // memcpy the bytes into the heap allocation + roc_memcpy(alloc_ptr.cast(), elem_ptr, len); + + // Null-terminate the new allocation. + *((alloc_ptr as *mut u8).add(len)) = 0; + + // Convert the bytes to &CStr + let bytes = std::slice::from_raw_parts(alloc_ptr.cast(), len); + let c_str = CStr::from_bytes_with_nul_unchecked(bytes); + + // Pass the &CStr to the function to get the answer. + let answer = func(c_str); + + // Free the heap allocation. + roc_dealloc(alloc_ptr, align); + + Ok(answer) + } + } + None => { + // The backing list was empty. + // // No need to do a heap allocation for an empty &CStr - we // can just do a stack allocation that will live for the // duration of the function. let c_str = CStr::from_bytes_with_nul_unchecked(&[0]); - Ok(func(c_str)) - } else if len < roc_list.capacity() { - // We happen to have excess capacity, so write a null terminator - // into the first byte of excess capacity. - let ptr = roc_list.ptr_to_first_elem(); - - *((ptr.add(len)) as *mut u8) = 0; - - let c_str = CStr::from_bytes_with_nul_unchecked(roc_list.as_slice()); - - Ok(func(c_str)) - } else { - // We always have an allocation that's even bigger than necessary, - // because the refcount bytes take up more than the 1B needed for - // the \0 at the end. We just need to shift the bytes over on top - // of the refcount. - let alloc_ptr = roc_list.ptr_to_allocation() as *mut libc::c_void; - - // First, copy the bytes over the original allocation - effectively - // shifting everything over by one `usize`. Now we no longer have a - // refcount (but the &CStr won't use that anyway), but we do have a - // free `usize` at the end. - // - // IMPORTANT: Must use memmove instead of memcpy because the regions - // overlap. Passing overlapping regions to memcpy is undefined! - libc::memmove(alloc_ptr, roc_list.ptr_to_first_elem().cast(), len); - - let bytes = std::slice::from_raw_parts(alloc_ptr.cast(), len); - let c_str = CStr::from_bytes_with_nul_unchecked(bytes); - Ok(func(c_str)) } } } - RocStrInnerRef::SmallString(small_str) => { - // Set the length byte to 0 to guarantee null-termination. - // We may alredy happen to be null-terminated if small_str.len() is - // less than the maximum small string length, but we can't assume that, - // and checking first would be more expensive than always writing the 0. - unsafe { - let mut bytes = small_str.bytes; - let ptr = bytes.as_mut_ptr(); + } + RocStrInnerRef::SmallString(small_str) => { + // Set the length byte to 0 to guarantee null-termination. + // We may alredy happen to be null-terminated if small_str.len() is + // less than the maximum small string length, but we can't assume that, + // and checking first would be more expensive than always writing the 0. + unsafe { + let mut bytes = small_str.bytes; + let ptr = bytes.as_mut_ptr(); - *((ptr.add(small_str.len())) as *mut u8) = 0; + *((ptr.add(small_str.len())) as *mut u8) = 0; - let c_str = CStr::from_bytes_with_nul_unchecked(&bytes); + let c_str = CStr::from_bytes_with_nul_unchecked(&bytes); - Ok(func(c_str)) - } + Ok(func(c_str)) } } } From 336af38f214df82ececdb222b76810d5140858a4 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 17:32:29 -0400 Subject: [PATCH 15/65] Provide roc_alloc etc during roc_std tests --- roc_std/src/lib.rs | 80 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index adc494318d..1ea74be46d 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -18,7 +18,7 @@ pub use roc_str::RocStr; pub use storage::Storage; // A list of C functions that are being imported -#[cfg(feature = "platform")] +#[cfg(all(feature = "platform", not(test)))] extern "C" { pub fn roc_alloc(size: usize, alignment: u32) -> *mut c_void; pub fn roc_realloc( @@ -28,19 +28,73 @@ extern "C" { alignment: u32, ) -> *mut c_void; pub fn roc_dealloc(ptr: *mut c_void, alignment: u32); + pub fn roc_panic(c_ptr: *mut c_void, tag_id: u32); pub fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void; pub fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void; } +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_alloc(size: usize, _alignment: u32) -> *mut c_void { + return libc::malloc(size); +} + +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_realloc( + c_ptr: *mut c_void, + new_size: usize, + _old_size: usize, + _alignment: u32, +) -> *mut c_void { + return libc::realloc(c_ptr, new_size); +} + +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_dealloc(c_ptr: *mut c_void, _alignment: u32) { + return libc::free(c_ptr); +} + +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { + use std::ffi::CStr; + use std::os::raw::c_char; + + match tag_id { + 0 => { + let c_str = CStr::from_ptr(c_ptr as *const c_char); + let string = c_str.to_str().unwrap(); + panic!("roc_panic during test: {}", string); + } + _ => todo!(), + } +} + +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void { + libc::memcpy(dst, src, n) +} + +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void { + libc::memset(dst, c, n) +} + /// # Safety /// This is only marked unsafe to typecheck without warnings in the rest of the code here. #[cfg(not(feature = "platform"))] +#[no_mangle] pub unsafe extern "C" fn roc_alloc(_size: usize, _alignment: u32) -> *mut c_void { unimplemented!("It is not valid to call roc alloc from within the compiler. Please use the \"platform\" feature if this is a platform.") } /// # Safety /// This is only marked unsafe to typecheck without warnings in the rest of the code here. #[cfg(not(feature = "platform"))] +#[no_mangle] pub unsafe extern "C" fn roc_realloc( _ptr: *mut c_void, _new_size: usize, @@ -49,13 +103,37 @@ pub unsafe extern "C" fn roc_realloc( ) -> *mut c_void { unimplemented!("It is not valid to call roc realloc from within the compiler. Please use the \"platform\" feature if this is a platform.") } + /// # Safety /// This is only marked unsafe to typecheck without warnings in the rest of the code here. #[cfg(not(feature = "platform"))] +#[no_mangle] pub unsafe extern "C" fn roc_dealloc(_ptr: *mut c_void, _alignment: u32) { unimplemented!("It is not valid to call roc dealloc from within the compiler. Please use the \"platform\" feature if this is a platform.") } +#[cfg(not(feature = "platform"))] +#[no_mangle] +pub unsafe extern "C" fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { + unimplemented!("It is not valid to call roc panic from within the compiler. Please use the \"platform\" feature if this is a platform.") +} + +/// # Safety +/// This is only marked unsafe to typecheck without warnings in the rest of the code here. +#[cfg(not(feature = "platform"))] +#[no_mangle] +pub fn roc_memcpy(_dst: *mut c_void, _src: *mut c_void, _n: usize) -> *mut c_void { + unimplemented!("It is not valid to call roc memcpy from within the compiler. Please use the \"platform\" feature if this is a platform.") +} + +/// # Safety +/// This is only marked unsafe to typecheck without warnings in the rest of the code here. +#[cfg(not(feature = "platform"))] +#[no_mangle] +pub fn roc_memset(_dst: *mut c_void, _c: i32, _n: usize) -> *mut c_void { + unimplemented!("It is not valid to call roc memset from within the compiler. Please use the \"platform\" feature if this is a platform.") +} + #[repr(u8)] #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum RocOrder { From 78b4d6f22b0808afdc4e53199e96779b9a5acb71 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 17:33:35 -0400 Subject: [PATCH 16/65] Add some RocStr tests --- roc_std/src/roc_str.rs | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index b602967d31..9ce67aa848 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -282,6 +282,7 @@ impl TryFrom for RocStr { #[cfg(not(feature = "no_std"))] /// Like https://doc.rust-lang.org/std/ffi/struct.NulError.html but /// only for interior nulls, not for missing null terminators. +#[derive(Debug, Clone, PartialEq, Eq)] pub struct InteriorNulError { pub pos: usize, pub roc_str: RocStr, @@ -428,3 +429,39 @@ impl Hash for RocStr { self.as_str().hash(state) } } + +#[cfg(test)] +mod into_temp_c_str { + use super::RocStr; + use core::mem; + + #[test] + fn empty_string() { + let is_empty = RocStr::empty().into_temp_c_str(|c_str| c_str.to_bytes().is_empty()); + + assert_eq!(Ok(true), is_empty); + } + + #[test] + fn small_string_all_lengths() { + for len in 1..mem::size_of::() { + let bytes: Vec = (1..=len as u8).collect(); + + assert_eq!(bytes.len(), len); + + // The bytes should contain no nul characters. + assert!(bytes.iter().all(|byte| *byte != 0)); + + // e.g. "1" or "12" or "12345" etc. + let string = String::from_utf8(bytes).unwrap(); + + let answer = RocStr::from(string.as_str()).into_temp_c_str(|c_str| { + assert_eq!(c_str.to_str(), Ok(string.as_str())); + + 42 + }); + + assert_eq!(Ok(42), answer); + } + } +} From b7dc265d45436dcb85c080918177a93de0c780d0 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 17:48:22 -0400 Subject: [PATCH 17/65] Move RocStr tests into test_roc_std --- roc_std/src/lib.rs | 54 ++-------------------------- roc_std/src/roc_str.rs | 36 ------------------- roc_std/tests/test_roc_std.rs | 67 +++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 88 deletions(-) diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 1ea74be46d..6fcd5d5c81 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -18,7 +18,7 @@ pub use roc_str::RocStr; pub use storage::Storage; // A list of C functions that are being imported -#[cfg(all(feature = "platform", not(test)))] +#[cfg(feature = "platform")] extern "C" { pub fn roc_alloc(size: usize, alignment: u32) -> *mut c_void; pub fn roc_realloc( @@ -33,57 +33,6 @@ extern "C" { pub fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void; } -#[cfg(test)] -#[no_mangle] -pub unsafe extern "C" fn roc_alloc(size: usize, _alignment: u32) -> *mut c_void { - return libc::malloc(size); -} - -#[cfg(test)] -#[no_mangle] -pub unsafe extern "C" fn roc_realloc( - c_ptr: *mut c_void, - new_size: usize, - _old_size: usize, - _alignment: u32, -) -> *mut c_void { - return libc::realloc(c_ptr, new_size); -} - -#[cfg(test)] -#[no_mangle] -pub unsafe extern "C" fn roc_dealloc(c_ptr: *mut c_void, _alignment: u32) { - return libc::free(c_ptr); -} - -#[cfg(test)] -#[no_mangle] -pub unsafe extern "C" fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { - use std::ffi::CStr; - use std::os::raw::c_char; - - match tag_id { - 0 => { - let c_str = CStr::from_ptr(c_ptr as *const c_char); - let string = c_str.to_str().unwrap(); - panic!("roc_panic during test: {}", string); - } - _ => todo!(), - } -} - -#[cfg(test)] -#[no_mangle] -pub unsafe extern "C" fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void { - libc::memcpy(dst, src, n) -} - -#[cfg(test)] -#[no_mangle] -pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void { - libc::memset(dst, c, n) -} - /// # Safety /// This is only marked unsafe to typecheck without warnings in the rest of the code here. #[cfg(not(feature = "platform"))] @@ -91,6 +40,7 @@ pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut pub unsafe extern "C" fn roc_alloc(_size: usize, _alignment: u32) -> *mut c_void { unimplemented!("It is not valid to call roc alloc from within the compiler. Please use the \"platform\" feature if this is a platform.") } + /// # Safety /// This is only marked unsafe to typecheck without warnings in the rest of the code here. #[cfg(not(feature = "platform"))] diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 9ce67aa848..56104393d2 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -429,39 +429,3 @@ impl Hash for RocStr { self.as_str().hash(state) } } - -#[cfg(test)] -mod into_temp_c_str { - use super::RocStr; - use core::mem; - - #[test] - fn empty_string() { - let is_empty = RocStr::empty().into_temp_c_str(|c_str| c_str.to_bytes().is_empty()); - - assert_eq!(Ok(true), is_empty); - } - - #[test] - fn small_string_all_lengths() { - for len in 1..mem::size_of::() { - let bytes: Vec = (1..=len as u8).collect(); - - assert_eq!(bytes.len(), len); - - // The bytes should contain no nul characters. - assert!(bytes.iter().all(|byte| *byte != 0)); - - // e.g. "1" or "12" or "12345" etc. - let string = String::from_utf8(bytes).unwrap(); - - let answer = RocStr::from(string.as_str()).into_temp_c_str(|c_str| { - assert_eq!(c_str.to_str(), Ok(string.as_str())); - - 42 - }); - - assert_eq!(Ok(42), answer); - } - } -} diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index 2466deb628..a6edbcf85c 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -27,6 +27,34 @@ pub unsafe extern "C" fn roc_dealloc(c_ptr: *mut c_void, _alignment: u32) { libc::free(c_ptr) } +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_panic(c_ptr: *mut c_void, tag_id: u32) { + use std::ffi::CStr; + use std::os::raw::c_char; + + match tag_id { + 0 => { + let c_str = CStr::from_ptr(c_ptr as *const c_char); + let string = c_str.to_str().unwrap(); + panic!("roc_panic during test: {}", string); + } + _ => todo!(), + } +} + +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_memcpy(dst: *mut c_void, src: *mut c_void, n: usize) -> *mut c_void { + libc::memcpy(dst, src, n) +} + +#[cfg(test)] +#[no_mangle] +pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut c_void { + libc::memset(dst, c, n) +} + #[cfg(test)] mod test_roc_std { use roc_std::RocResult; @@ -124,3 +152,42 @@ mod test_roc_std { assert!(roc_result.is_err()); } } + +#[cfg(test)] +mod into_temp_c_str { + use roc_std::RocStr; + + #[test] + fn empty_string() { + let answer = RocStr::empty().into_temp_c_str(|c_str| { + assert_eq!(c_str.to_str(), Ok("")); + + 42 + }); + + assert_eq!(Ok(42), answer); + } + + #[test] + fn small_string_all_lengths() { + for len in 1..(core::mem::size_of::() - 1) { + let bytes: Vec = (1..=len as u8).collect(); + + assert_eq!(bytes.len(), len); + + // The bytes should contain no nul characters. + assert!(bytes.iter().all(|byte| *byte != 0)); + + // e.g. "1" or "12" or "12345" etc. + let string = String::from_utf8(bytes).unwrap(); + + let answer = RocStr::from(string.as_str()).into_temp_c_str(|c_str| { + assert_eq!(c_str.to_str(), Ok(string.as_str())); + + 42 + }); + + assert_eq!(Ok(42), answer); + } + } +} From 4db002acafc7ddf78fedb8a875f3227388e693a3 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 17:52:15 -0400 Subject: [PATCH 18/65] Fix empty_string_capacity test --- roc_std/tests/test_roc_std.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index a6edbcf85c..9adad6d215 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -7,6 +7,8 @@ extern crate roc_std; use core::ffi::c_void; +const ROC_SMALL_STR_CAPACITY: usize = core::mem::size_of::() - 1; + #[no_mangle] pub unsafe extern "C" fn roc_alloc(size: usize, _alignment: u32) -> *mut c_void { libc::malloc(size) @@ -114,9 +116,9 @@ mod test_roc_std { #[test] fn empty_string_capacity() { - let string = RocStr::from(""); + let string = RocStr::empty(); - assert_eq!(string.capacity(), 0); + assert_eq!(string.capacity(), super::ROC_SMALL_STR_CAPACITY); } #[test] @@ -170,7 +172,7 @@ mod into_temp_c_str { #[test] fn small_string_all_lengths() { - for len in 1..(core::mem::size_of::() - 1) { + for len in 1..super::ROC_SMALL_STR_CAPACITY { let bytes: Vec = (1..=len as u8).collect(); assert_eq!(bytes.len(), len); From 2eaaab11608ef413095ba82951d5bbbb788c12ec Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 17:52:21 -0400 Subject: [PATCH 19/65] Fix bug in into_temp_c_str --- roc_std/src/roc_str.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 56104393d2..96bebdc885 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -234,12 +234,13 @@ impl RocStr { // less than the maximum small string length, but we can't assume that, // and checking first would be more expensive than always writing the 0. unsafe { + let len = small_str.len(); let mut bytes = small_str.bytes; let ptr = bytes.as_mut_ptr(); *((ptr.add(small_str.len())) as *mut u8) = 0; - let c_str = CStr::from_bytes_with_nul_unchecked(&bytes); + let c_str = CStr::from_bytes_with_nul_unchecked(&bytes[0..=len]); Ok(func(c_str)) } @@ -404,7 +405,13 @@ impl SmallString { /// Returns the index of the first interior \0 byte in the string, or None if there are none. fn first_nul_byte(&self) -> Option { - self.bytes.iter().position(|byte| *byte == 0) + for (index, byte) in self.bytes[0..self.len()].iter().enumerate() { + if *byte == 0 { + return Some(index); + } + } + + None } } From 0cc1047877040e32fa31dcf2aa38cc249ef6994c Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 20:17:23 -0400 Subject: [PATCH 20/65] Allow converting RocStr to CStr in no_std --- Cargo.lock | 1 - roc_std/Cargo.toml | 5 +- roc_std/src/roc_str.rs | 128 ++++++++++++++++------------------ roc_std/tests/test_roc_std.rs | 12 +++- 4 files changed, 73 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 81117d6e3b..57203a4c28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4110,7 +4110,6 @@ dependencies = [ name = "roc_std" version = "0.1.0" dependencies = [ - "libc", "static_assertions 0.1.1", ] diff --git a/roc_std/Cargo.toml b/roc_std/Cargo.toml index 89859b4bc0..20b9cb6fd6 100644 --- a/roc_std/Cargo.toml +++ b/roc_std/Cargo.toml @@ -10,16 +10,15 @@ version = "0.1.0" [dependencies] static_assertions = "0.1" -libc = { version = "0.2.106", optional = true } [dev-dependencies] indoc = "1.0.3" pretty_assertions = "1.0.0" quickcheck = "1.0.3" quickcheck_macros = "1.0.0" +libc = "0.2.106" [features] -default = ["platform", "std"] +default = ["platform"] platform = [] no_std = [] -std = ["libc"] diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 96bebdc885..ec4b8ccee2 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -6,6 +6,7 @@ use core::{ hash::Hash, mem::{size_of, ManuallyDrop}, ops::{Deref, DerefMut}, + ptr, }; #[cfg(not(feature = "no_std"))] @@ -97,23 +98,34 @@ impl RocStr { // CStr on the stack when the RocStr is non-unique. const TEMP_CSTR_MAX_STACK_BYTES: usize = 64; - /// Turn this RocStr into a &CStr and then provide access to that &CStr for the duration - /// of a given function. This does not allocate when given a small string or a string with - /// unique refcount, but may allocate when given a large string with non-unique refcount. - /// (It will do a stack allocation if the string is under 64 bytes.) + /// Turn this RocStr into a nul-terminated UTF-8 `*mut i8` and then provide access to that + /// `*mut i8` (as well as its length) for the duration of a given function. This is + /// designed to be an efficient way to turn a RocStr received from an application into + /// the nul-terminated UTF-8 `char*` needed by UNIX syscalls. + /// + /// **NOTE:** The length passed to the function is the same value that `RocStr::len` will + /// return; it does not count the nul terminator. So to convert it to a nul-terminated + /// slice of Rust bytes, call `slice::from_raw_parts` passing the given length + 1. + /// + /// This operation achieves efficiency by reusing allocated bytes from the RocStr itself, + /// and sometimes allocating on the stack. It does not allocate on the heap when given a + /// a small string or a string with unique refcount, but may allocate when given a large + /// string with non-unique refcount. (It will do a stack allocation if the string is under + /// 64 bytes; the stack allocation will only live for the duration of the called function.) /// /// Because this works on an owned RocStr, it's able to overwrite the underlying bytes - /// to null-terminate the string in-place. Small strings have an extra byte at the end - /// where the length is stored, which can become 0 for null-termination. Heap-allocated - /// strings can have excess capacity which can hold a null termiator, or if they have no + /// to nul-terminate the string in-place. Small strings have an extra byte at the end + /// where the length is stored, which can become 0 for nul-termination. Heap-allocated + /// strings can have excess capacity which can hold a nul termiator, or if they have no /// excess capacity, all the bytes can be shifted over the refcount in order to free up - /// a `usize` worth of free space at the end - which can easily fit a null terminator. + /// a `usize` worth of free space at the end - which can easily fit a nul terminator. /// - /// This operation can fail because a RocStr may contain \0 characters, which a CStr cannot. - /// Also it can fail due to the RocStr not being null-terminated, which it might - /// coincidentally be if it has excess capacity and the first byte of excess capacity is 0. - #[cfg(not(feature = "no_std"))] - pub fn into_temp_c_str T>(self, func: F) -> Result { + /// This operation can fail because a RocStr may contain \0 characters, which a + /// nul-terminated string must not. + pub fn temp_c_utf8 T>( + self, + func: F, + ) -> Result { use core::mem::MaybeUninit; use crate::{roc_alloc, roc_dealloc}; @@ -129,43 +141,35 @@ impl RocStr { Some((_, storage)) if storage.get().is_unique() => { // The backing RocList was unique, so we can mutate it in-place. let len = roc_list.len(); - - if len < roc_list.capacity() { - // We happen to have excess capacity, so write a null terminator - // into the first byte of excess capacity. - let ptr = roc_list.ptr_to_first_elem(); - - *((ptr.add(len)) as *mut u8) = 0; - - let c_str = - CStr::from_bytes_with_nul_unchecked(roc_list.as_slice()); - - Ok(func(c_str)) + let ptr = if len < roc_list.capacity() { + // We happen to have excess capacity already, so we will be able + // to write the \0 into the first byte of excess capacity. + roc_list.ptr_to_first_elem() as *mut i8 } else { // We always have an allocation that's even bigger than necessary, // because the refcount bytes take up more than the 1B needed for // the \0 at the end. We just need to shift the bytes over on top // of the refcount. - let alloc_ptr = roc_list.ptr_to_allocation() as *mut _; + let alloc_ptr = roc_list.ptr_to_allocation() as *mut i8; // First, copy the bytes over the original allocation - effectively // shifting everything over by one `usize`. Now we no longer have a // refcount (but the &CStr won't use that anyway), but we do have a // free `usize` at the end. // - // IMPORTANT: Must use memmove instead of memcpy because the regions - // overlap. Passing overlapping regions to memcpy is undefined! - libc::memmove(alloc_ptr, roc_list.ptr_to_first_elem().cast(), len); + // IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping + // because the regions definitely overlap! + ptr::copy(roc_list.ptr_to_first_elem() as *mut i8, alloc_ptr, len); - let bytes = std::slice::from_raw_parts(alloc_ptr.cast(), len); - let c_str = CStr::from_bytes_with_nul_unchecked(bytes); + alloc_ptr + }; - Ok(func(c_str)) - } + // nul-terminate + *(ptr.add(len)) = 0; + + Ok(func(ptr, len)) } Some(_) => { - use crate::roc_memcpy; - // The backing list was not unique, so we can't mutate it in-place. let len = roc_list.len(); @@ -173,44 +177,38 @@ impl RocStr { // TODO: once https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.uninit_array // has become stabilized, use that here in order to do a precise // stack allocation instead of always over-allocating to 64B. - let mut bytes: MaybeUninit<[u8; Self::TEMP_CSTR_MAX_STACK_BYTES]> = + let mut bytes: MaybeUninit<[i8; Self::TEMP_CSTR_MAX_STACK_BYTES]> = MaybeUninit::uninit(); - let alloc_ptr = bytes.as_mut_ptr() as *mut u8; - let elem_ptr = roc_list.ptr_to_first_elem() as *mut _; + let alloc_ptr = bytes.as_mut_ptr() as *mut i8; + let elem_ptr = roc_list.ptr_to_first_elem() as *mut i8; // memcpy the bytes into the stack allocation - roc_memcpy(alloc_ptr.cast(), elem_ptr, len); + ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len); - // Null-terminate the new allocation. + // nul-terminate the new allocation. *(alloc_ptr.add(len)) = 0; // Convert the bytes to &CStr - let bytes = std::slice::from_raw_parts(alloc_ptr.cast(), len); - let c_str = CStr::from_bytes_with_nul_unchecked(bytes); - Ok(func(c_str)) + Ok(func(alloc_ptr, len)) } else { // The string is too long to stack-allocate, so // do a heap allocation and then free it afterwards. - let align = core::mem::align_of::() as u32; - let alloc_ptr = roc_alloc(len, align); - let elem_ptr = roc_list.ptr_to_first_elem() as *mut _; + let align = core::mem::align_of::() as u32; + let alloc_ptr = roc_alloc(len, align) as *mut i8; + let elem_ptr = roc_list.ptr_to_first_elem() as *mut i8; // memcpy the bytes into the heap allocation - roc_memcpy(alloc_ptr.cast(), elem_ptr, len); + ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len); - // Null-terminate the new allocation. - *((alloc_ptr as *mut u8).add(len)) = 0; - - // Convert the bytes to &CStr - let bytes = std::slice::from_raw_parts(alloc_ptr.cast(), len); - let c_str = CStr::from_bytes_with_nul_unchecked(bytes); + // nul-terminate the new allocation. + *(alloc_ptr.add(len)) = 0; // Pass the &CStr to the function to get the answer. - let answer = func(c_str); + let answer = func(alloc_ptr, len); // Free the heap allocation. - roc_dealloc(alloc_ptr, align); + roc_dealloc(alloc_ptr.cast(), align); Ok(answer) } @@ -218,31 +216,27 @@ impl RocStr { None => { // The backing list was empty. // - // No need to do a heap allocation for an empty &CStr - we + // No need to do a heap allocation for an empty string - we // can just do a stack allocation that will live for the // duration of the function. - let c_str = CStr::from_bytes_with_nul_unchecked(&[0]); - - Ok(func(c_str)) + Ok(func([0i8].as_mut_ptr(), 0)) } } } } RocStrInnerRef::SmallString(small_str) => { - // Set the length byte to 0 to guarantee null-termination. - // We may alredy happen to be null-terminated if small_str.len() is + // Set the length byte to 0 to guarantee nul-termination. + // We may alredy happen to be nul-terminated if small_str.len() is // less than the maximum small string length, but we can't assume that, // and checking first would be more expensive than always writing the 0. unsafe { let len = small_str.len(); let mut bytes = small_str.bytes; - let ptr = bytes.as_mut_ptr(); + let ptr = bytes.as_mut_ptr() as *mut i8; - *((ptr.add(small_str.len())) as *mut u8) = 0; + *(ptr.add(small_str.len())) = 0; - let c_str = CStr::from_bytes_with_nul_unchecked(&bytes[0..=len]); - - Ok(func(c_str)) + Ok(func(ptr, len)) } } } @@ -282,7 +276,7 @@ impl TryFrom for RocStr { #[cfg(not(feature = "no_std"))] /// Like https://doc.rust-lang.org/std/ffi/struct.NulError.html but -/// only for interior nulls, not for missing null terminators. +/// only for interior nulls, not for missing nul terminators. #[derive(Debug, Clone, PartialEq, Eq)] pub struct InteriorNulError { pub pos: usize, diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index 9adad6d215..07ce13c1ca 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -157,11 +157,16 @@ mod test_roc_std { #[cfg(test)] mod into_temp_c_str { + use core::slice; use roc_std::RocStr; + use std::ffi::CStr; #[test] fn empty_string() { - let answer = RocStr::empty().into_temp_c_str(|c_str| { + let answer = RocStr::empty().temp_c_utf8(|ptr, len| { + let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; + let c_str = CStr::from_bytes_with_nul(bytes).unwrap(); + assert_eq!(c_str.to_str(), Ok("")); 42 @@ -183,7 +188,10 @@ mod into_temp_c_str { // e.g. "1" or "12" or "12345" etc. let string = String::from_utf8(bytes).unwrap(); - let answer = RocStr::from(string.as_str()).into_temp_c_str(|c_str| { + let answer = RocStr::from(string.as_str()).temp_c_utf8(|ptr, len| { + let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; + let c_str = CStr::from_bytes_with_nul(bytes).unwrap(); + assert_eq!(c_str.to_str(), Ok(string.as_str())); 42 From 5b105da4bcde35fed3925a45a01a34b5db2ed446 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 21:30:20 -0400 Subject: [PATCH 21/65] Add RocStr::temp_c_utf16 --- roc_std/src/roc_str.rs | 210 ++++++++++++++++++++++++++++++++++------- 1 file changed, 176 insertions(+), 34 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index ec4b8ccee2..fd717fa5bd 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -17,6 +17,35 @@ use crate::RocList; #[repr(transparent)] pub struct RocStr(RocStrInner); +macro_rules! with_stack_bytes { + ($len:expr, $align_type:ty, $closure:expr) => { + { + use $crate::RocStr; + + if $len < RocStr::TEMP_CSTR_MAX_STACK_BYTES { + // TODO: once https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.uninit_array + // has become stabilized, use that here in order to do a precise + // stack allocation instead of always over-allocating to 64B. + let mut bytes: MaybeUninit<[u8; RocStr::TEMP_CSTR_MAX_STACK_BYTES]> = + MaybeUninit::uninit(); + + $closure(bytes.as_mut_ptr() as *mut u8) + } else { + let align = core::mem::align_of::<$align_type>() as u32; + // The string is too long to stack-allocate, so + // do a heap allocation and then free it afterwards. + let ptr = roc_alloc($len, align) as *mut u8; + let answer = $closure(ptr); + + // Free the heap allocation. + roc_dealloc(ptr.cast(), align); + + answer + } + } + }; +} + impl RocStr { pub const SIZE: usize = core::mem::size_of::(); pub const MASK: u8 = 0b1000_0000; @@ -95,7 +124,7 @@ impl RocStr { } // If the string is under this many bytes, the into_temp_c_str method will allocate the - // CStr on the stack when the RocStr is non-unique. + // C string on the stack when the RocStr is non-unique. const TEMP_CSTR_MAX_STACK_BYTES: usize = 64; /// Turn this RocStr into a nul-terminated UTF-8 `*mut i8` and then provide access to that @@ -122,7 +151,7 @@ impl RocStr { /// /// This operation can fail because a RocStr may contain \0 characters, which a /// nul-terminated string must not. - pub fn temp_c_utf8 T>( + pub fn temp_c_utf8 T>( self, func: F, ) -> Result { @@ -154,7 +183,7 @@ impl RocStr { // First, copy the bytes over the original allocation - effectively // shifting everything over by one `usize`. Now we no longer have a - // refcount (but the &CStr won't use that anyway), but we do have a + // refcount (but the C string won't use that anyway), but we do have a // free `usize` at the end. // // IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping @@ -170,16 +199,12 @@ impl RocStr { Ok(func(ptr, len)) } Some(_) => { - // The backing list was not unique, so we can't mutate it in-place. let len = roc_list.len(); - if len < Self::TEMP_CSTR_MAX_STACK_BYTES { - // TODO: once https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.uninit_array - // has become stabilized, use that here in order to do a precise - // stack allocation instead of always over-allocating to 64B. - let mut bytes: MaybeUninit<[i8; Self::TEMP_CSTR_MAX_STACK_BYTES]> = - MaybeUninit::uninit(); - let alloc_ptr = bytes.as_mut_ptr() as *mut i8; + // The backing list was not unique, so we can't mutate it in-place. + // The backing list was not unique, so we can't mutate it in-place. + with_stack_bytes!(len, i8, |alloc_ptr| { + let alloc_ptr = alloc_ptr as *mut i8; let elem_ptr = roc_list.ptr_to_first_elem() as *mut i8; // memcpy the bytes into the stack allocation @@ -188,30 +213,8 @@ impl RocStr { // nul-terminate the new allocation. *(alloc_ptr.add(len)) = 0; - // Convert the bytes to &CStr - Ok(func(alloc_ptr, len)) - } else { - // The string is too long to stack-allocate, so - // do a heap allocation and then free it afterwards. - let align = core::mem::align_of::() as u32; - let alloc_ptr = roc_alloc(len, align) as *mut i8; - let elem_ptr = roc_list.ptr_to_first_elem() as *mut i8; - - // memcpy the bytes into the heap allocation - ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len); - - // nul-terminate the new allocation. - *(alloc_ptr.add(len)) = 0; - - // Pass the &CStr to the function to get the answer. - let answer = func(alloc_ptr, len); - - // Free the heap allocation. - roc_dealloc(alloc_ptr.cast(), align); - - Ok(answer) - } + }) } None => { // The backing list was empty. @@ -241,6 +244,145 @@ impl RocStr { } } } + + /// Turn this RocStr into a nul-terminated UTF-16 `*mut u16` and then provide access to + /// that `*mut u16` (as well as its length) for the duration of a given function. This is + /// designed to be an efficient way to turn a RocStr received from an application into + /// the nul-terminated UTF-16 `wchar_t*` needed by Windows API calls. + /// + /// **NOTE:** The length passed to the function is the same value that `RocStr::len` will + /// return; it does not count the nul terminator. So to convert it to a nul-terminated + /// slice of Rust bytes, call `slice::from_raw_parts` passing the given length + 1. + /// + /// This operation achieves efficiency by reusing allocated bytes from the RocStr itself, + /// and sometimes allocating on the stack. It does not allocate on the heap when given a + /// a small string or a string with unique refcount, but may allocate when given a large + /// string with non-unique refcount. (It will do a stack allocation if the string is under + /// 64 bytes; the stack allocation will only live for the duration of the called function.) + /// + /// Because this works on an owned RocStr, it's able to overwrite the underlying bytes + /// to nul-terminate the string in-place. Small strings have an extra byte at the end + /// where the length is stored, which can become 0 for nul-termination. Heap-allocated + /// strings can have excess capacity which can hold a nul termiator, or if they have no + /// excess capacity, all the bytes can be shifted over the refcount in order to free up + /// a `usize` worth of free space at the end - which can easily fit a nul terminator. + /// + /// This operation can fail because a RocStr may contain \0 characters, which a + /// nul-terminated string must not. + pub fn temp_c_utf16 T>( + self, + func: F, + ) -> Result { + use crate::{roc_alloc, roc_dealloc, Storage}; + use core::mem::{align_of, MaybeUninit}; + + if let Some(pos) = self.first_nul_byte() { + return Err(InteriorNulError { pos, roc_str: self }); + } + + // When we don't have an existing allocation that can work, fall back on this. + // It uses either a stack allocation, or, if that would be too big, a heap allocation. + macro_rules! fallback { + ($len:expr) => { + with_stack_bytes!($len, u16, |alloc_ptr| { + let alloc_ptr = alloc_ptr as *mut u16; + + self.write_utf16_c(alloc_ptr); + + Ok(func(alloc_ptr, $len)) + }) + }; + } + + match self.as_enum_ref() { + RocStrInnerRef::HeapAllocated(roc_list) => { + let len = roc_list.len(); + + unsafe { + match roc_list.elements_and_storage() { + Some((_, storage)) if storage.get().is_unique() => { + // The backing RocList was unique, so we can mutate it in-place. + + // We need 1 extra u16 for the nul terminator. It must be a u16, + // not a u8, because we'll be providing a pointer to u16s. + let needed_bytes = (len + 1) * size_of::(); + + // We can use not only the capacity on the heap, but also + // the bytes originally used for the refcount. + let available_bytes = roc_list.capacity() + size_of::(); + + if needed_bytes < available_bytes { + debug_assert!(align_of::() >= align_of::()); + + // We happen to have sufficient excess capacity already, + // so we will be able to write the UTF-16 chars as well as + // the nul terminator into the existing allocation. + let ptr = roc_list.ptr_to_allocation() as *mut u16; + + self.write_utf16_c(ptr); + + Ok(func(ptr, len)) + } else { + // We didn't have sufficient excess capacity already, + // so we need to do either a new stack allocation or a new + // heap allocation. + fallback!(len) + } + } + Some(_) => { + // The backing list was not unique, so we can't mutate it in-place. + fallback!(len) + } + None => { + // The backing list was empty. + // + // No need to do a heap allocation for an empty string - we + // can just do a stack allocation that will live for the + // duration of the function. + Ok(func([0u16].as_mut_ptr(), 0)) + } + } + } + } + RocStrInnerRef::SmallString(small_str) => { + let len = small_str.len(); + + // We need 1 extra u16 for the nul terminator. It must be a u16, + // not a u8, because we'll be providing a pointer to u16s. + let needed_bytes = (len + 1) * size_of::(); + let available_bytes = size_of::(); + + unsafe { + if needed_bytes < available_bytes { + let mut bytes = small_str.bytes; + let ptr = bytes.as_mut_ptr() as *mut u16; + + self.write_utf16_c(ptr); + + Ok(func(ptr, len)) + } else { + fallback!(len) + } + } + } + } + } + + unsafe fn write_utf16_c(&self, dest_ptr: *mut u16) { + let str_slice = self.as_str(); + + // Write the UTF-8 source bytes into UTF-16 destination bytes. + for (index, wchar) in str_slice.encode_utf16().enumerate() { + unsafe { + *(dest_ptr.add(index)) = wchar; + } + } + + // nul-terminate + unsafe { + *(dest_ptr.add(str_slice.len())) = 0; + } + } } impl Deref for RocStr { From 307e6b12bbd0d0fde9e5a7d5f461fd46ebe0b3bc Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 22:15:08 -0400 Subject: [PATCH 22/65] Test large strings in roc_std --- roc_std/tests/test_roc_std.rs | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index 07ce13c1ca..d91b51e959 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -175,18 +175,23 @@ mod into_temp_c_str { assert_eq!(Ok(42), answer); } + /// e.g. "1" or "12" or "12345" etc. + fn string_for_len(len: usize) -> String { + let bytes: Vec = (1..=len as u8).collect(); + + assert_eq!(bytes.len(), len); + + // The bytes should contain no nul characters. + assert!(bytes.iter().all(|byte| *byte != 0)); + + String::from_utf8(bytes).unwrap() + } + #[test] - fn small_string_all_lengths() { - for len in 1..super::ROC_SMALL_STR_CAPACITY { - let bytes: Vec = (1..=len as u8).collect(); - - assert_eq!(bytes.len(), len); - - // The bytes should contain no nul characters. - assert!(bytes.iter().all(|byte| *byte != 0)); - - // e.g. "1" or "12" or "12345" etc. - let string = String::from_utf8(bytes).unwrap(); + fn no_excess_capacity() { + // Test all the small strings, and also one large string + for len in 1..=(super::ROC_SMALL_STR_CAPACITY + 1) { + let string = string_for_len(len); let answer = RocStr::from(string.as_str()).temp_c_utf8(|ptr, len| { let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; From 3f01f766f52e88a4301b5b812528378be7c8d171 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 2 Jun 2022 22:33:15 -0400 Subject: [PATCH 23/65] Test temp_utf16 RocStr functions --- roc_std/tests/test_roc_std.rs | 76 +++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index d91b51e959..b3152063fa 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -156,23 +156,50 @@ mod test_roc_std { } #[cfg(test)] -mod into_temp_c_str { +mod temp_c_str { use core::slice; use roc_std::RocStr; use std::ffi::CStr; + fn verify_temp_c(string: &str) { + // temp_c_utf8 + { + let roc_str = RocStr::from(string); + let answer = roc_str.temp_c_utf8(|ptr, len| { + let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; + let c_str = CStr::from_bytes_with_nul(bytes).unwrap(); + + assert_eq!(c_str.to_str(), Ok(string)); + + 42 + }); + + assert_eq!(Ok(42), answer); + } + + // temp_c_utf16 + { + let roc_str = RocStr::from(string); + let answer = roc_str.temp_c_utf16(|ptr, len| { + let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; + + // Verify that it's nul-terminated + assert_eq!(bytes[len], 0); + + let string = String::from_utf16(&bytes[0..len]).unwrap(); + + assert_eq!(string.as_str(), string); + + 42 + }); + + assert_eq!(Ok(42), answer); + } + } + #[test] fn empty_string() { - let answer = RocStr::empty().temp_c_utf8(|ptr, len| { - let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; - let c_str = CStr::from_bytes_with_nul(bytes).unwrap(); - - assert_eq!(c_str.to_str(), Ok("")); - - 42 - }); - - assert_eq!(Ok(42), answer); + verify_temp_c(""); } /// e.g. "1" or "12" or "12345" etc. @@ -188,21 +215,18 @@ mod into_temp_c_str { } #[test] - fn no_excess_capacity() { - // Test all the small strings, and also one large string - for len in 1..=(super::ROC_SMALL_STR_CAPACITY + 1) { - let string = string_for_len(len); - - let answer = RocStr::from(string.as_str()).temp_c_utf8(|ptr, len| { - let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; - let c_str = CStr::from_bytes_with_nul(bytes).unwrap(); - - assert_eq!(c_str.to_str(), Ok(string.as_str())); - - 42 - }); - - assert_eq!(Ok(42), answer); + fn small_strings() { + for len in 1..=super::ROC_SMALL_STR_CAPACITY { + verify_temp_c(&string_for_len(len)); } } + + #[test] + fn no_excess_capacity() { + // This is small enough that it should be a stack allocation for UTF-8 + verify_temp_c(&string_for_len(33)); + + // This is big enough that it should be a heap allocation for UTF-8 and UTF-16 + verify_temp_c(&string_for_len(65)); + } } From 6031a0ee101df080da12eda179a90325c1208639 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Fri, 3 Jun 2022 09:14:37 -0400 Subject: [PATCH 24/65] Extract RocList::ALLOC_ALIGNMENT constant --- roc_std/src/roc_list.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 25006f6583..b51098f9fb 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -22,6 +22,8 @@ pub struct RocList { } impl RocList { + const ALLOC_ALIGNMENT: usize = mem::align_of::().max(mem::align_of::()); + pub fn empty() -> Self { RocList { elements: None, @@ -91,7 +93,7 @@ where return; } - let alignment = cmp::max(mem::align_of::(), mem::align_of::()); + let alignment = Self::ALLOC_ALIGNMENT; let elements_offset = alignment; let new_size = elements_offset + mem::size_of::() * (self.len() + slice.len()); @@ -307,7 +309,7 @@ impl Drop for RocList { mem::drop::(ManuallyDrop::take(&mut *elem_ptr)); } - let alignment = cmp::max(mem::align_of::(), mem::align_of::()); + let alignment = Self::ALLOC_ALIGNMENT; // Release the memory. roc_dealloc( From 38d5002c1345126552cd57d953dd109217d3a387 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Fri, 3 Jun 2022 12:14:06 -0400 Subject: [PATCH 25/65] Refactor how RocList does alignment --- roc_std/src/roc_list.rs | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index b51098f9fb..2aad94b3e1 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -22,7 +22,10 @@ pub struct RocList { } impl RocList { - const ALLOC_ALIGNMENT: usize = mem::align_of::().max(mem::align_of::()); + #[inline(always)] + fn alloc_alignment() -> usize { + mem::align_of::().max(mem::align_of::()) + } pub fn empty() -> Self { RocList { @@ -57,14 +60,18 @@ impl RocList { &*self } - pub(crate) fn elements_and_storage( - &self, - ) -> Option<(NonNull>, &Cell)> { + #[inline(always)] + fn elements_and_storage(&self) -> Option<(NonNull>, &Cell)> { let elements = self.elements?; let storage = unsafe { &*elements.as_ptr().cast::>().sub(1) }; Some((elements, storage)) } + pub(crate) fn storage(&self) -> Option { + self.elements_and_storage() + .map(|(_, storage)| storage.get()) + } + /// Useful for doing memcpy on the elements. Returns NULL if list is empty. pub(crate) unsafe fn ptr_to_first_elem(&self) -> *const T { unsafe { core::mem::transmute(self.elements) } @@ -93,7 +100,7 @@ where return; } - let alignment = Self::ALLOC_ALIGNMENT; + let alignment = Self::alloc_alignment(); let elements_offset = alignment; let new_size = elements_offset + mem::size_of::() * (self.len() + slice.len()); @@ -309,7 +316,7 @@ impl Drop for RocList { mem::drop::(ManuallyDrop::take(&mut *elem_ptr)); } - let alignment = Self::ALLOC_ALIGNMENT; + let alignment = Self::alloc_alignment(); // Release the memory. roc_dealloc( From 308cb47b262761242e977bd963831fa531810ed0 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Fri, 3 Jun 2022 12:14:41 -0400 Subject: [PATCH 26/65] Add generic RocStr::temp_nul_terminated function --- roc_std/src/roc_str.rs | 207 +++++++++++++++++++++++++---------------- 1 file changed, 127 insertions(+), 80 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index fd717fa5bd..ed3d554224 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -18,7 +18,7 @@ use crate::RocList; pub struct RocStr(RocStrInner); macro_rules! with_stack_bytes { - ($len:expr, $align_type:ty, $closure:expr) => { + ($len:expr, $elem_type:ty, $closure:expr) => { { use $crate::RocStr; @@ -29,12 +29,12 @@ macro_rules! with_stack_bytes { let mut bytes: MaybeUninit<[u8; RocStr::TEMP_CSTR_MAX_STACK_BYTES]> = MaybeUninit::uninit(); - $closure(bytes.as_mut_ptr() as *mut u8) + $closure(bytes.as_mut_ptr() as *mut $elem_type) } else { - let align = core::mem::align_of::<$align_type>() as u32; + let align = core::mem::align_of::<$elem_type>() as u32; // The string is too long to stack-allocate, so // do a heap allocation and then free it afterwards. - let ptr = roc_alloc($len, align) as *mut u8; + let ptr = roc_alloc($len, align) as *mut $elem_type; let answer = $closure(ptr); // Free the heap allocation. @@ -155,10 +155,21 @@ impl RocStr { self, func: F, ) -> Result { + // Note that this function does not use temp_nul_terminated because it can be + // more efficient than that - due to knowing that it's already in UTF-8 and always + // has room for a nul terminator in the existing allocation (either in the refcount + // bytes, or, in a small string, in the length at the end of the string). + use core::mem::MaybeUninit; use crate::{roc_alloc, roc_dealloc}; + let nul_terminate = |alloc_ptr: *mut i8, len: usize| unsafe { + *(alloc_ptr.add(len)) = 0; + + Ok(func(alloc_ptr, len)) + }; + if let Some(pos) = self.first_nul_byte() { return Err(InteriorNulError { pos, roc_str: self }); } @@ -166,8 +177,8 @@ impl RocStr { match self.as_enum_ref() { RocStrInnerRef::HeapAllocated(roc_list) => { unsafe { - match roc_list.elements_and_storage() { - Some((_, storage)) if storage.get().is_unique() => { + match roc_list.storage() { + Some(storage) if storage.is_unique() => { // The backing RocList was unique, so we can mutate it in-place. let len = roc_list.len(); let ptr = if len < roc_list.capacity() { @@ -193,10 +204,7 @@ impl RocStr { alloc_ptr }; - // nul-terminate - *(ptr.add(len)) = 0; - - Ok(func(ptr, len)) + nul_terminate(ptr, len) } Some(_) => { let len = roc_list.len(); @@ -210,10 +218,7 @@ impl RocStr { // memcpy the bytes into the stack allocation ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len); - // nul-terminate the new allocation. - *(alloc_ptr.add(len)) = 0; - - Ok(func(alloc_ptr, len)) + nul_terminate(alloc_ptr, len) }) } None => { @@ -228,19 +233,11 @@ impl RocStr { } } RocStrInnerRef::SmallString(small_str) => { - // Set the length byte to 0 to guarantee nul-termination. - // We may alredy happen to be nul-terminated if small_str.len() is - // less than the maximum small string length, but we can't assume that, - // and checking first would be more expensive than always writing the 0. - unsafe { - let len = small_str.len(); - let mut bytes = small_str.bytes; - let ptr = bytes.as_mut_ptr() as *mut i8; + let mut bytes = small_str.bytes; - *(ptr.add(small_str.len())) = 0; - - Ok(func(ptr, len)) - } + // Even if the small string is at capacity, there will be room to write + // a nul terminator in the byte that's used to store the length. + nul_terminate(bytes.as_mut_ptr() as *mut i8, small_str.len()) } } } @@ -273,6 +270,28 @@ impl RocStr { self, func: F, ) -> Result { + self.temp_nul_terminated(|dest_ptr: *mut u16, str_slice: &str| { + // Translate UTF-8 source bytes into UTF-16 and write them into the destination. + for (index, wchar) in str_slice.encode_utf16().enumerate() { + unsafe { + *(dest_ptr.add(index)) = wchar; + } + } + + func(dest_ptr, str_slice.len()) + }) + } + + /// Generic version of temp_c_utf8 and temp_c_utf16. + /// + /// One use for this is to convert slashes to backslashes in Windows paths; + /// this function provides the most efficient way to do that, because no extra + /// iteration pass is necessary; the conversion can be done after each translation + /// of a UTF-8 character to UTF-16. + pub fn temp_nul_terminated A>( + self, + func: F, + ) -> Result { use crate::{roc_alloc, roc_dealloc, Storage}; use core::mem::{align_of, MaybeUninit}; @@ -280,58 +299,56 @@ impl RocStr { return Err(InteriorNulError { pos, roc_str: self }); } + let nul_terminate = |alloc_ptr: *mut E, str_slice: &str| unsafe { + *(alloc_ptr.add(str_slice.len())) = E::zero(); + + Ok(func(alloc_ptr, str_slice)) + }; + // When we don't have an existing allocation that can work, fall back on this. // It uses either a stack allocation, or, if that would be too big, a heap allocation. - macro_rules! fallback { - ($len:expr) => { - with_stack_bytes!($len, u16, |alloc_ptr| { - let alloc_ptr = alloc_ptr as *mut u16; - - self.write_utf16_c(alloc_ptr); - - Ok(func(alloc_ptr, $len)) - }) - }; - } + let fallback = |str_slice: &str| unsafe { + with_stack_bytes!(str_slice.len(), E, |alloc_ptr: *mut E| { + nul_terminate(alloc_ptr, str_slice) + }) + }; match self.as_enum_ref() { RocStrInnerRef::HeapAllocated(roc_list) => { let len = roc_list.len(); unsafe { - match roc_list.elements_and_storage() { - Some((_, storage)) if storage.get().is_unique() => { + match roc_list.storage() { + Some(storage) if storage.is_unique() => { // The backing RocList was unique, so we can mutate it in-place. - // We need 1 extra u16 for the nul terminator. It must be a u16, - // not a u8, because we'll be providing a pointer to u16s. - let needed_bytes = (len + 1) * size_of::(); + // We need 1 extra elem for the nul terminator. It must be an elem, + // not a byte, because we'll be providing a pointer to elems. + let needed_bytes = (len + 1) * size_of::(); // We can use not only the capacity on the heap, but also // the bytes originally used for the refcount. let available_bytes = roc_list.capacity() + size_of::(); if needed_bytes < available_bytes { - debug_assert!(align_of::() >= align_of::()); + debug_assert!(align_of::() >= align_of::()); // We happen to have sufficient excess capacity already, // so we will be able to write the UTF-16 chars as well as // the nul terminator into the existing allocation. - let ptr = roc_list.ptr_to_allocation() as *mut u16; + let ptr = roc_list.ptr_to_allocation() as *mut E; - self.write_utf16_c(ptr); - - Ok(func(ptr, len)) + nul_terminate(ptr, self.as_str()) } else { // We didn't have sufficient excess capacity already, // so we need to do either a new stack allocation or a new // heap allocation. - fallback!(len) + fallback(self.as_str()) } } Some(_) => { // The backing list was not unique, so we can't mutate it in-place. - fallback!(len) + fallback(self.as_str()) } None => { // The backing list was empty. @@ -339,7 +356,7 @@ impl RocStr { // No need to do a heap allocation for an empty string - we // can just do a stack allocation that will live for the // duration of the function. - Ok(func([0u16].as_mut_ptr(), 0)) + Ok(func([E::zero()].as_mut_ptr() as *mut E, "")) } } } @@ -347,42 +364,19 @@ impl RocStr { RocStrInnerRef::SmallString(small_str) => { let len = small_str.len(); - // We need 1 extra u16 for the nul terminator. It must be a u16, - // not a u8, because we'll be providing a pointer to u16s. - let needed_bytes = (len + 1) * size_of::(); + // We need 1 extra elem for the nul terminator. It must be an elem, + // not a byte, because we'll be providing a pointer to elems. + let needed_bytes = (len + 1) * size_of::(); let available_bytes = size_of::(); - unsafe { - if needed_bytes < available_bytes { - let mut bytes = small_str.bytes; - let ptr = bytes.as_mut_ptr() as *mut u16; - - self.write_utf16_c(ptr); - - Ok(func(ptr, len)) - } else { - fallback!(len) - } + if needed_bytes < available_bytes { + nul_terminate(small_str.bytes.as_ptr() as *mut E, self.as_str()) + } else { + fallback(self.as_str()) } } } } - - unsafe fn write_utf16_c(&self, dest_ptr: *mut u16) { - let str_slice = self.as_str(); - - // Write the UTF-8 source bytes into UTF-16 destination bytes. - for (index, wchar) in str_slice.encode_utf16().enumerate() { - unsafe { - *(dest_ptr.add(index)) = wchar; - } - } - - // nul-terminate - unsafe { - *(dest_ptr.add(str_slice.len())) = 0; - } - } } impl Deref for RocStr { @@ -572,3 +566,56 @@ impl Hash for RocStr { self.as_str().hash(state) } } + +/// This is a struct that cannot be instantiated outside this module. +/// Its purpose is to guarantee that the UnicodeCodePoint trait is +/// only implemented for integers of 32 bits or fewer. +#[repr(transparent)] +pub struct NotExtensible; + +pub trait UnicodeCodePoint { + fn get_zero() -> (NotExtensible, Self); + + fn zero() -> Self + where + Self: Sized, + { + Self::get_zero().1 + } +} + +impl UnicodeCodePoint for u8 { + fn get_zero() -> (NotExtensible, Self) { + (NotExtensible, 0) + } +} + +impl UnicodeCodePoint for i8 { + fn get_zero() -> (NotExtensible, Self) { + (NotExtensible, 0) + } +} + +impl UnicodeCodePoint for u16 { + fn get_zero() -> (NotExtensible, Self) { + (NotExtensible, 0) + } +} + +impl UnicodeCodePoint for i16 { + fn get_zero() -> (NotExtensible, Self) { + (NotExtensible, 0) + } +} + +impl UnicodeCodePoint for u32 { + fn get_zero() -> (NotExtensible, Self) { + (NotExtensible, 0) + } +} + +impl UnicodeCodePoint for i32 { + fn get_zero() -> (NotExtensible, Self) { + (NotExtensible, 0) + } +} From ce259292140145f54352c8105fc849d33f0a8ce1 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Fri, 3 Jun 2022 12:32:40 -0400 Subject: [PATCH 27/65] Add more documentation to RocStr::temp_nul_terminated --- roc_std/src/lib.rs | 2 +- roc_std/src/roc_str.rs | 52 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 6fcd5d5c81..feb687831b 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -14,7 +14,7 @@ mod roc_str; mod storage; pub use roc_list::RocList; -pub use roc_str::RocStr; +pub use roc_str::{InteriorNulError, RocStr}; pub use storage::Storage; // A list of C functions that are being imported diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index ed3d554224..a1d71b2d39 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -282,12 +282,60 @@ impl RocStr { }) } - /// Generic version of temp_c_utf8 and temp_c_utf16. + pub fn temp_c_windows_path T>( + self, + func: F, + ) -> Result { + self.temp_nul_terminated(|dest_ptr: *mut u16, str_slice: &str| { + // Translate UTF-8 source bytes into UTF-16 and write them into the destination. + for (index, mut wchar) in str_slice.encode_utf16().enumerate() { + // Replace slashes with backslashes + if wchar == '/' as u16 { + wchar = '\\' as u16 + }; + + unsafe { + *(dest_ptr.add(index)) = wchar; + } + } + + func(dest_ptr, str_slice.len()) + }) + } + + /// Generic version of temp_c_utf8 and temp_c_utf16. The given function will be + /// passed a pointer to elements of type E. The number of elements will be equal to + /// the length of the `&str` given as the other parameter. There will be a `0` element + /// at the end of those elements, even if there are no elements (in which case the + /// `&str` argument will be empty and the `*mut E` will point to a `0`). /// /// One use for this is to convert slashes to backslashes in Windows paths; /// this function provides the most efficient way to do that, because no extra /// iteration pass is necessary; the conversion can be done after each translation - /// of a UTF-8 character to UTF-16. + /// of a UTF-8 character to UTF-16. Here's how that would look: + /// + /// use roc_std::{RocStr, InteriorNulError}; + /// + /// pub fn temp_windows_path T>( + /// roc_str: RocStr, + /// func: F, + /// ) -> Result { + /// roc_str.temp_nul_terminated(|dest_ptr: *mut u16, str_slice: &str| { + /// // Translate UTF-8 source bytes into UTF-16 and write them into the destination. + /// for (index, mut wchar) in str_slice.encode_utf16().enumerate() { + /// // Replace slashes with backslashes + /// if wchar == '/' as u16 { + /// wchar = '\\' as u16 + /// }; + /// + /// unsafe { + /// *(dest_ptr.add(index)) = wchar; + /// } + /// } + /// + /// func(dest_ptr, str_slice.len()) + /// }) + /// } pub fn temp_nul_terminated A>( self, func: F, From 983f2b9fad3db3a3b61debb505406e85962f93ff Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Fri, 3 Jun 2022 12:51:33 -0400 Subject: [PATCH 28/65] Add RocList::with_capacity --- roc_std/src/roc_list.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 2aad94b3e1..fcc9d3ca93 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -35,6 +35,32 @@ impl RocList { } } + /// Create an empty RocList with enough space preallocated to store + /// the requested number of elements. + pub fn with_capacity(elems: usize) -> Self { + let alignment = Self::alloc_alignment(); + + // Allocate new memory. + let non_null_elements = unsafe { + let ptr = roc_alloc(elems, alignment as u32); + let elements = ptr.cast::().add(alignment).cast::>(); + + // Initialize the reference count. + let storage_ptr = elements.cast::().sub(1); + storage_ptr.write(Storage::new_reference_counted()); + + NonNull::new(elements).unwrap_or_else(|| { + todo!("Call roc_panic with the info that an allocation failed."); + }) + }; + + RocList { + elements: Some(non_null_elements), + length: 0, + capacity: elems, + } + } + pub fn len(&self) -> usize { self.length } From e2012e96b7d9227044db969763330953a245f3a2 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Sat, 4 Jun 2022 14:01:59 -0400 Subject: [PATCH 29/65] Add RocList::reserve and RocStr::reserve --- roc_std/src/roc_list.rs | 171 ++++++++++++++++++++++++++++------------ roc_std/src/roc_str.rs | 45 +++++++++++ 2 files changed, 166 insertions(+), 50 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index fcc9d3ca93..00f0665f68 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -8,8 +8,7 @@ use core::{ intrinsics::copy_nonoverlapping, mem::{self, ManuallyDrop}, ops::Deref, - ptr, - ptr::NonNull, + ptr::{self, NonNull}, }; use crate::{roc_alloc, roc_dealloc, roc_realloc, storage::Storage}; @@ -28,7 +27,7 @@ impl RocList { } pub fn empty() -> Self { - RocList { + Self { elements: None, length: 0, capacity: 0, @@ -38,11 +37,24 @@ impl RocList { /// Create an empty RocList with enough space preallocated to store /// the requested number of elements. pub fn with_capacity(elems: usize) -> Self { + Self { + elements: Some(Self::elems_with_capacity(elems)), + length: 0, + capacity: elems, + } + } + + fn elems_with_capacity(elems: usize) -> NonNull> { + let alignment = Self::alloc_alignment(); + let alloc_ptr = unsafe { roc_alloc(elems * mem::size_of::(), alignment as u32) }; + + Self::elems_from_allocation(alloc_ptr) + } + + fn elems_from_allocation(ptr: *mut c_void) -> NonNull> { let alignment = Self::alloc_alignment(); - // Allocate new memory. - let non_null_elements = unsafe { - let ptr = roc_alloc(elems, alignment as u32); + unsafe { let elements = ptr.cast::().add(alignment).cast::>(); // Initialize the reference count. @@ -52,12 +64,6 @@ impl RocList { NonNull::new(elements).unwrap_or_else(|| { todo!("Call roc_panic with the info that an allocation failed."); }) - }; - - RocList { - elements: Some(non_null_elements), - length: 0, - capacity: elems, } } @@ -113,6 +119,97 @@ impl RocList where T: Clone, { + /// Increase a RocList's capacity by at least the requested number of elements (possibly more). + /// + /// May return a new RocList, if the provided one was not unique. + pub fn reserve(self, num_elems: usize) -> Self { + let new_elems; + let old_elements_ptr; + + match self.elements_and_storage() { + Some((elements, storage)) => { + if storage.get().is_unique() { + unsafe { + let original_ptr = self.ptr_to_allocation(); + + // Try to reallocate in-place. + let new_ptr = roc_realloc( + original_ptr as *mut _, + num_elems, + self.length, + Self::alloc_alignment() as u32, + ); + + if new_ptr == original_ptr as *mut _ { + // We successfully reallocated in-place; we're done! + return self; + } else if new_ptr.is_null() { + todo!("Reallocation failed"); + } else { + // We got back a different allocation; copy the existing elements + // into it. We don't need to increment their refcounts because + // The existing allocation that references to them is now gone and + // no longer referencing them. + new_elems = Self::elems_from_allocation(new_ptr); + old_elements_ptr = elements.as_ptr(); + } + + // Note that realloc automatically deallocates the old allocation, + // so we don't need to call roc_dealloc here. + } + } else { + // Make a new allocation + new_elems = Self::elems_with_capacity(num_elems); + old_elements_ptr = elements.as_ptr(); + + // Decrease the current allocation's reference count. + let mut new_storage = storage.get(); + let needs_dealloc = new_storage.decrease(); + + if needs_dealloc { + let alignment = Self::alloc_alignment(); + + // Unlike in Drop, do *not* decrement the refcounts of all the elements! + // The new allocation is referencing them, so instead of incrementing them all + // all just to decrement them again here, we neither increment nor decrement them. + unsafe { + // Release the memory. + roc_dealloc( + old_elements_ptr.cast::().sub(alignment).cast(), + alignment as u32, + ); + } + } else { + if !new_storage.is_readonly() { + // Write the storage back. + storage.set(new_storage); + } + } + } + } + None => { + // This is an empty list, so `reserve` is the same as `with_capacity`. + return Self::with_capacity(num_elems); + } + } + + unsafe { + // Copy the old elements to the new allocation. + copy_nonoverlapping(old_elements_ptr, new_elems.as_ptr(), self.length); + } + + let length = self.length; + + // We already manually cleaned up `self` by now, so don't let its Drop run too. + mem::forget(self); + + Self { + elements: Some(new_elems), + length, + capacity: num_elems, + } + } + pub fn from_slice(slice: &[T]) -> Self { let mut list = Self::empty(); list.extend_from_slice(slice); @@ -131,18 +228,22 @@ where let new_size = elements_offset + mem::size_of::() * (self.len() + slice.len()); - let new_ptr = if let Some((elements, storage)) = self.elements_and_storage() { + let non_null_elements = if let Some((elements, storage)) = self.elements_and_storage() { // Decrement the list's refence count. let mut copy = storage.get(); let is_unique = copy.decrease(); if is_unique { // If the memory is not shared, we can reuse the memory. - let old_size = elements_offset + mem::size_of::() * self.len(); - unsafe { + let old_size = elements_offset + (mem::size_of::() * self.len()); + + let new_ptr = unsafe { let ptr = elements.as_ptr().cast::().sub(alignment).cast(); + roc_realloc(ptr, new_size, old_size, alignment as u32).cast() - } + }; + + Self::elems_from_allocation(new_ptr) } else { if !copy.is_readonly() { // Write the decremented reference count back. @@ -150,49 +251,19 @@ where } // Allocate new memory. - let new_ptr = unsafe { roc_alloc(new_size, alignment as u32) }; - let new_elements = unsafe { - new_ptr - .cast::() - .add(alignment) - .cast::>() - }; - - // Initialize the reference count. - unsafe { - let storage_ptr = new_elements.cast::().sub(1); - storage_ptr.write(Storage::new_reference_counted()); - } + let new_elements = Self::elems_with_capacity(new_size); // Copy the old elements to the new allocation. unsafe { - copy_nonoverlapping(elements.as_ptr(), new_elements, self.length); + copy_nonoverlapping(elements.as_ptr(), new_elements.as_ptr(), self.length); } - new_ptr + new_elements } } else { - // Allocate new memory. - let new_ptr = unsafe { roc_alloc(new_size, alignment as u32) }; - let new_elements = unsafe { new_ptr.cast::().add(elements_offset).cast::() }; - - // Initialize the reference count. - unsafe { - let storage_ptr = new_elements.cast::().sub(1); - storage_ptr.write(Storage::new_reference_counted()); - } - - new_ptr + Self::elems_with_capacity(new_size) }; - let elements = unsafe { - new_ptr - .cast::() - .add(elements_offset) - .cast::>() - }; - - let non_null_elements = NonNull::new(elements).unwrap(); self.elements = Some(non_null_elements); let elements = self.elements.unwrap().as_ptr(); diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index a1d71b2d39..58518b819b 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -115,6 +115,51 @@ impl RocStr { &*self } + /// Create an empty RocStr with enough space preallocated to store + /// the requested number of bytes. + pub fn with_capacity(bytes: usize) -> Self { + if bytes <= SmallString::CAPACITY { + RocStr(RocStrInner { + small_string: SmallString::empty(), + }) + } else { + // The requested capacity won't fit in a small string; we need to go big. + RocStr(RocStrInner { + heap_allocated: ManuallyDrop::new(RocList::with_capacity(bytes)), + }) + } + } + + /// Increase a RocStr's capacity by at least the requested number of bytes (possibly more). + /// + /// May return a new RocStr, if the provided one was not unique. + pub fn reserve(mut self, bytes: usize) -> Self { + if self.is_small_str() { + let small_str = unsafe { self.0.small_string }; + let target_cap = small_str.len() + bytes; + + if target_cap <= SmallString::CAPACITY { + // The small string already has enough capacity; return it unmodified. + self + } else { + // The requested capacity won't fit in a small string; we need to go big. + let mut roc_list = RocList::with_capacity(target_cap); + + roc_list.extend_from_slice(&small_str.bytes); + + RocStr(RocStrInner { + heap_allocated: ManuallyDrop::new(roc_list), + }) + } + } else { + let roc_list = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) }; + + RocStr(RocStrInner { + heap_allocated: ManuallyDrop::new(roc_list.reserve(bytes)), + }) + } + } + /// Returns the index of the first interior \0 byte in the string, or None if there are none. fn first_nul_byte(&self) -> Option { match self.as_enum_ref() { From f43f15bacc98af12d3b5b2ed6a75e82e9e6dfcc2 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Sat, 4 Jun 2022 14:56:10 -0400 Subject: [PATCH 30/65] Add roc_std::RocBox --- roc_std/src/lib.rs | 1 + roc_std/src/roc_box.rs | 171 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 roc_std/src/roc_box.rs diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index feb687831b..2651f4c6d1 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -9,6 +9,7 @@ use core::mem::{ManuallyDrop, MaybeUninit}; use core::ops::Drop; use core::str; +mod roc_box; mod roc_list; mod roc_str; mod storage; diff --git a/roc_std/src/roc_box.rs b/roc_std/src/roc_box.rs new file mode 100644 index 0000000000..b65ffc9b04 --- /dev/null +++ b/roc_std/src/roc_box.rs @@ -0,0 +1,171 @@ +#![deny(unsafe_op_in_unsafe_fn)] + +use core::{ + cell::Cell, + cmp::{self, Ordering}, + fmt::Debug, + mem::{self, ManuallyDrop}, + ops::Deref, + ptr::{self, NonNull}, +}; + +use crate::{roc_alloc, roc_dealloc, storage::Storage}; + +#[repr(C)] +pub struct RocBox { + contents: NonNull>, +} + +impl RocBox { + pub fn init(contents: T) -> Self { + let alignment = Self::alloc_alignment(); + let bytes = mem::size_of_val(&contents) + alignment; + + let ptr = unsafe { roc_alloc(bytes, alignment as u32) }; + + // Initialize the reference count. + unsafe { + let storage_ptr = ptr.cast::(); + + storage_ptr.write(Storage::new_reference_counted()); + } + + let contents = unsafe { + let contents_ptr = ptr.cast::().add(alignment).cast::>(); + + *contents_ptr = ManuallyDrop::new(contents); + + if true { + todo!("Increment the refcount of `contents`, and also do that in RocList extend_from_slice."); + } + + NonNull::new(contents_ptr).unwrap_or_else(|| { + todo!("Call roc_panic with the info that an allocation failed."); + }) + }; + + Self { contents } + } + + #[inline(always)] + fn alloc_alignment() -> usize { + mem::align_of::().max(mem::align_of::()) + } + + pub fn into_inner(self) -> T { + unsafe { ptr::read(self.contents.as_ptr() as *mut T) } + } + + fn storage(&self) -> &Cell { + unsafe { + &*self + .contents + .as_ptr() + .cast::() + .sub(mem::size_of::()) + .cast::>() + } + } +} + +impl Deref for RocBox { + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { self.contents.as_ref() } + } +} + +impl PartialEq> for RocBox +where + T: PartialEq, +{ + fn eq(&self, other: &RocBox) -> bool { + self.deref() == other.deref() + } +} + +impl Eq for RocBox where T: Eq {} + +impl PartialOrd> for RocBox +where + T: PartialOrd, +{ + fn partial_cmp(&self, other: &RocBox) -> Option { + let self_contents = unsafe { self.contents.as_ref() }; + let other_contents = unsafe { other.contents.as_ref() }; + + self_contents.partial_cmp(other_contents) + } +} + +impl Ord for RocBox +where + T: Ord, +{ + fn cmp(&self, other: &Self) -> Ordering { + let self_contents = unsafe { self.contents.as_ref() }; + let other_contents = unsafe { other.contents.as_ref() }; + + self_contents.cmp(other_contents) + } +} + +impl Debug for RocBox +where + T: Debug, +{ + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.deref().fmt(f) + } +} + +impl Clone for RocBox { + fn clone(&self) -> Self { + let storage = self.storage(); + let mut new_storage = storage.get(); + + // Increment the reference count + if !new_storage.is_readonly() { + new_storage.increment_reference_count(); + storage.set(new_storage); + } + + Self { + contents: self.contents, + } + } +} + +impl Drop for RocBox { + fn drop(&mut self) { + let storage = self.storage(); + let contents = self.contents; + + // Decrease the list's reference count. + let mut new_storage = storage.get(); + let needs_dealloc = new_storage.decrease(); + + if needs_dealloc { + unsafe { + // Drop the stored contents. + let contents_ptr = contents.as_ptr(); + + mem::drop::(ManuallyDrop::take(&mut *contents_ptr)); + + let alignment = Self::alloc_alignment(); + + // Release the memory. + roc_dealloc( + contents.as_ptr().cast::().sub(alignment).cast(), + alignment as u32, + ); + } + } else { + if !new_storage.is_readonly() { + // Write the storage back. + storage.set(new_storage); + } + } + } +} From 7d2e0d91925deb4e49db1a0a95be206500b12e50 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Sat, 4 Jun 2022 15:10:47 -0400 Subject: [PATCH 31/65] Use NonNull more to verify that (re)allocations succeeded --- roc_std/src/roc_box.rs | 9 +++++---- roc_std/src/roc_list.rs | 35 +++++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/roc_std/src/roc_box.rs b/roc_std/src/roc_box.rs index b65ffc9b04..6ffb2f7205 100644 --- a/roc_std/src/roc_box.rs +++ b/roc_std/src/roc_box.rs @@ -17,7 +17,7 @@ pub struct RocBox { } impl RocBox { - pub fn init(contents: T) -> Self { + pub fn new(contents: T) -> Self { let alignment = Self::alloc_alignment(); let bytes = mem::size_of_val(&contents) + alignment; @@ -39,9 +39,10 @@ impl RocBox { todo!("Increment the refcount of `contents`, and also do that in RocList extend_from_slice."); } - NonNull::new(contents_ptr).unwrap_or_else(|| { - todo!("Call roc_panic with the info that an allocation failed."); - }) + // We already verified that the original alloc pointer was non-null, + // and this one is the alloc pointer with `alignment` bytes added to it, + // so it should be non-null too. + NonNull::new_unchecked(contents_ptr) }; Self { contents } diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 00f0665f68..d60c8d21cb 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -48,22 +48,29 @@ impl RocList { let alignment = Self::alloc_alignment(); let alloc_ptr = unsafe { roc_alloc(elems * mem::size_of::(), alignment as u32) }; - Self::elems_from_allocation(alloc_ptr) + Self::elems_from_allocation(NonNull::new(alloc_ptr).unwrap_or_else(|| { + todo!("Call roc_panic with the info that an allocation failed."); + })) } - fn elems_from_allocation(ptr: *mut c_void) -> NonNull> { + fn elems_from_allocation(allocation: NonNull) -> NonNull> { let alignment = Self::alloc_alignment(); + let alloc_ptr = allocation.as_ptr(); unsafe { - let elements = ptr.cast::().add(alignment).cast::>(); + let elem_ptr = alloc_ptr + .cast::() + .add(alignment) + .cast::>(); // Initialize the reference count. - let storage_ptr = elements.cast::().sub(1); - storage_ptr.write(Storage::new_reference_counted()); + alloc_ptr + .cast::() + .write(Storage::new_reference_counted()); - NonNull::new(elements).unwrap_or_else(|| { - todo!("Call roc_panic with the info that an allocation failed."); - }) + // The original alloc pointer was non-null, and this one is the alloc pointer + // with `alignment` bytes added to it, so it should be non-null too. + NonNull::new_unchecked(elem_ptr) } } @@ -143,14 +150,16 @@ where if new_ptr == original_ptr as *mut _ { // We successfully reallocated in-place; we're done! return self; - } else if new_ptr.is_null() { - todo!("Reallocation failed"); } else { // We got back a different allocation; copy the existing elements // into it. We don't need to increment their refcounts because // The existing allocation that references to them is now gone and // no longer referencing them. - new_elems = Self::elems_from_allocation(new_ptr); + new_elems = Self::elems_from_allocation( + NonNull::new(new_ptr).unwrap_or_else(|| { + todo!("Reallocation failed"); + }), + ); old_elements_ptr = elements.as_ptr(); } @@ -243,7 +252,9 @@ where roc_realloc(ptr, new_size, old_size, alignment as u32).cast() }; - Self::elems_from_allocation(new_ptr) + Self::elems_from_allocation(NonNull::new(new_ptr).unwrap_or_else(|| { + todo!("Reallocation failed"); + })) } else { if !copy.is_readonly() { // Write the decremented reference count back. From 8e28eea4aeb8b826196142c71d398c85038d2e6e Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Mon, 6 Jun 2022 20:18:52 -0400 Subject: [PATCH 32/65] Panic if allocation failed --- roc_std/src/roc_box.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/roc_std/src/roc_box.rs b/roc_std/src/roc_box.rs index 6ffb2f7205..30e3c45875 100644 --- a/roc_std/src/roc_box.rs +++ b/roc_std/src/roc_box.rs @@ -23,6 +23,10 @@ impl RocBox { let ptr = unsafe { roc_alloc(bytes, alignment as u32) }; + if ptr.is_null() { + todo!("Call roc_panic with the info that an allocation failed."); + } + // Initialize the reference count. unsafe { let storage_ptr = ptr.cast::(); From d20d337dd29a5c8f69bf250eed25ebc672e955ee Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Mon, 6 Jun 2022 20:19:41 -0400 Subject: [PATCH 33/65] Make nul terminator code work with any terminator This is specifically useful for newline termination --- roc_std/src/roc_str.rs | 151 +++++++++++++++++++--------------- roc_std/tests/test_roc_std.rs | 4 +- 2 files changed, 88 insertions(+), 67 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 58518b819b..a67632866c 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -172,14 +172,29 @@ impl RocStr { // C string on the stack when the RocStr is non-unique. const TEMP_CSTR_MAX_STACK_BYTES: usize = 64; - /// Turn this RocStr into a nul-terminated UTF-8 `*mut i8` and then provide access to that - /// `*mut i8` (as well as its length) for the duration of a given function. This is - /// designed to be an efficient way to turn a RocStr received from an application into - /// the nul-terminated UTF-8 `char*` needed by UNIX syscalls. + /// Like with_utf8_terminator, except it can fail because a RocStr may + /// contain \0 characters, which a nul-terminated string must not. + pub fn utf8_nul_terminated T>( + self, + func: F, + ) -> Result { + if let Some(pos) = self.first_nul_byte() { + Err(InteriorNulError { pos, roc_str: self }) + } else { + Ok(self.with_utf8_terminator(b'\0', func)) + } + } + + /// Turn this RocStr into a UTF-8 `*mut u8`, terminate it with the given character + /// (almost always either `b'\n'` or b`\0`) and then provide access to that + /// `*mut u8` (as well as its length) for the duration of a given function. This is + /// designed to be an efficient way to turn a `RocStr` received from an application into + /// either the nul-terminated UTF-8 `char*` needed by UNIX syscalls, or into a + /// newline-terminated string to write to stdout or stderr (for a "println"-style effect). /// /// **NOTE:** The length passed to the function is the same value that `RocStr::len` will - /// return; it does not count the nul terminator. So to convert it to a nul-terminated - /// slice of Rust bytes, call `slice::from_raw_parts` passing the given length + 1. + /// return; it does not count the terminator. So to convert it to a nul-terminated slice + /// of Rust bytes (for example), call `slice::from_raw_parts` passing the given length + 1. /// /// This operation achieves efficiency by reusing allocated bytes from the RocStr itself, /// and sometimes allocating on the stack. It does not allocate on the heap when given a @@ -189,36 +204,25 @@ impl RocStr { /// /// Because this works on an owned RocStr, it's able to overwrite the underlying bytes /// to nul-terminate the string in-place. Small strings have an extra byte at the end - /// where the length is stored, which can become 0 for nul-termination. Heap-allocated - /// strings can have excess capacity which can hold a nul termiator, or if they have no + /// where the length is stored, which can be replaced with the terminator. Heap-allocated + /// strings can have excess capacity which can hold a termiator, or if they have no /// excess capacity, all the bytes can be shifted over the refcount in order to free up - /// a `usize` worth of free space at the end - which can easily fit a nul terminator. - /// - /// This operation can fail because a RocStr may contain \0 characters, which a - /// nul-terminated string must not. - pub fn temp_c_utf8 T>( - self, - func: F, - ) -> Result { - // Note that this function does not use temp_nul_terminated because it can be + /// a `usize` worth of free space at the end - which can easily fit a 1-byte terminator. + pub fn with_utf8_terminator T>(self, terminator: u8, func: F) -> T { + // Note that this function does not use with_terminator because it can be // more efficient than that - due to knowing that it's already in UTF-8 and always - // has room for a nul terminator in the existing allocation (either in the refcount + // has room for a 1-byte terminator in the existing allocation (either in the refcount // bytes, or, in a small string, in the length at the end of the string). - use core::mem::MaybeUninit; use crate::{roc_alloc, roc_dealloc}; - let nul_terminate = |alloc_ptr: *mut i8, len: usize| unsafe { - *(alloc_ptr.add(len)) = 0; + let terminate = |alloc_ptr: *mut u8, len: usize| unsafe { + *(alloc_ptr.add(len)) = terminator; - Ok(func(alloc_ptr, len)) + func(alloc_ptr, len) }; - if let Some(pos) = self.first_nul_byte() { - return Err(InteriorNulError { pos, roc_str: self }); - } - match self.as_enum_ref() { RocStrInnerRef::HeapAllocated(roc_list) => { unsafe { @@ -229,13 +233,13 @@ impl RocStr { let ptr = if len < roc_list.capacity() { // We happen to have excess capacity already, so we will be able // to write the \0 into the first byte of excess capacity. - roc_list.ptr_to_first_elem() as *mut i8 + roc_list.ptr_to_first_elem() as *mut u8 } else { // We always have an allocation that's even bigger than necessary, // because the refcount bytes take up more than the 1B needed for // the \0 at the end. We just need to shift the bytes over on top // of the refcount. - let alloc_ptr = roc_list.ptr_to_allocation() as *mut i8; + let alloc_ptr = roc_list.ptr_to_allocation() as *mut u8; // First, copy the bytes over the original allocation - effectively // shifting everything over by one `usize`. Now we no longer have a @@ -244,26 +248,26 @@ impl RocStr { // // IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping // because the regions definitely overlap! - ptr::copy(roc_list.ptr_to_first_elem() as *mut i8, alloc_ptr, len); + ptr::copy(roc_list.ptr_to_first_elem() as *mut u8, alloc_ptr, len); alloc_ptr }; - nul_terminate(ptr, len) + terminate(ptr, len) } Some(_) => { let len = roc_list.len(); // The backing list was not unique, so we can't mutate it in-place. // The backing list was not unique, so we can't mutate it in-place. - with_stack_bytes!(len, i8, |alloc_ptr| { - let alloc_ptr = alloc_ptr as *mut i8; - let elem_ptr = roc_list.ptr_to_first_elem() as *mut i8; + with_stack_bytes!(len, u8, |alloc_ptr| { + let alloc_ptr = alloc_ptr as *mut u8; + let elem_ptr = roc_list.ptr_to_first_elem() as *mut u8; // memcpy the bytes into the stack allocation ptr::copy_nonoverlapping(elem_ptr, alloc_ptr, len); - nul_terminate(alloc_ptr, len) + terminate(alloc_ptr, len) }) } None => { @@ -272,7 +276,7 @@ impl RocStr { // No need to do a heap allocation for an empty string - we // can just do a stack allocation that will live for the // duration of the function. - Ok(func([0i8].as_mut_ptr(), 0)) + func([terminator].as_mut_ptr(), 0) } } } @@ -282,11 +286,24 @@ impl RocStr { // Even if the small string is at capacity, there will be room to write // a nul terminator in the byte that's used to store the length. - nul_terminate(bytes.as_mut_ptr() as *mut i8, small_str.len()) + terminate(bytes.as_mut_ptr() as *mut u8, small_str.len()) } } } + /// Like with_utf16_terminator, except it can fail because a RocStr may + /// contain \0 characters, which a nul-terminated string must not. + pub fn utf16_nul_terminated T>( + self, + func: F, + ) -> Result { + if let Some(pos) = self.first_nul_byte() { + Err(InteriorNulError { pos, roc_str: self }) + } else { + Ok(self.with_utf16_terminator(0, func)) + } + } + /// Turn this RocStr into a nul-terminated UTF-16 `*mut u16` and then provide access to /// that `*mut u16` (as well as its length) for the duration of a given function. This is /// designed to be an efficient way to turn a RocStr received from an application into @@ -311,11 +328,12 @@ impl RocStr { /// /// This operation can fail because a RocStr may contain \0 characters, which a /// nul-terminated string must not. - pub fn temp_c_utf16 T>( + pub fn with_utf16_terminator T>( self, + terminator: u16, func: F, - ) -> Result { - self.temp_nul_terminated(|dest_ptr: *mut u16, str_slice: &str| { + ) -> T { + self.with_terminator(terminator, |dest_ptr: *mut u16, str_slice: &str| { // Translate UTF-8 source bytes into UTF-16 and write them into the destination. for (index, wchar) in str_slice.encode_utf16().enumerate() { unsafe { @@ -327,25 +345,31 @@ impl RocStr { }) } - pub fn temp_c_windows_path T>( + pub fn with_windows_path T>( self, func: F, ) -> Result { - self.temp_nul_terminated(|dest_ptr: *mut u16, str_slice: &str| { - // Translate UTF-8 source bytes into UTF-16 and write them into the destination. - for (index, mut wchar) in str_slice.encode_utf16().enumerate() { - // Replace slashes with backslashes - if wchar == '/' as u16 { - wchar = '\\' as u16 - }; + if let Some(pos) = self.first_nul_byte() { + Err(InteriorNulError { pos, roc_str: self }) + } else { + let answer = self.with_terminator(0, |dest_ptr: *mut u16, str_slice: &str| { + // Translate UTF-8 source bytes into UTF-16 and write them into the destination. + for (index, mut wchar) in str_slice.encode_utf16().enumerate() { + // Replace slashes with backslashes + if wchar == '/' as u16 { + wchar = '\\' as u16 + }; - unsafe { - *(dest_ptr.add(index)) = wchar; + unsafe { + *(dest_ptr.add(index)) = wchar; + } } - } - func(dest_ptr, str_slice.len()) - }) + func(dest_ptr, str_slice.len()) + }); + + Ok(answer) + } } /// Generic version of temp_c_utf8 and temp_c_utf16. The given function will be @@ -381,28 +405,25 @@ impl RocStr { /// func(dest_ptr, str_slice.len()) /// }) /// } - pub fn temp_nul_terminated A>( + pub fn with_terminator A>( self, + terminator: E, func: F, - ) -> Result { + ) -> A { use crate::{roc_alloc, roc_dealloc, Storage}; use core::mem::{align_of, MaybeUninit}; - if let Some(pos) = self.first_nul_byte() { - return Err(InteriorNulError { pos, roc_str: self }); - } + let terminate = |alloc_ptr: *mut E, str_slice: &str| unsafe { + *(alloc_ptr.add(str_slice.len())) = terminator; - let nul_terminate = |alloc_ptr: *mut E, str_slice: &str| unsafe { - *(alloc_ptr.add(str_slice.len())) = E::zero(); - - Ok(func(alloc_ptr, str_slice)) + func(alloc_ptr, str_slice) }; // When we don't have an existing allocation that can work, fall back on this. // It uses either a stack allocation, or, if that would be too big, a heap allocation. let fallback = |str_slice: &str| unsafe { with_stack_bytes!(str_slice.len(), E, |alloc_ptr: *mut E| { - nul_terminate(alloc_ptr, str_slice) + terminate(alloc_ptr, str_slice) }) }; @@ -431,7 +452,7 @@ impl RocStr { // the nul terminator into the existing allocation. let ptr = roc_list.ptr_to_allocation() as *mut E; - nul_terminate(ptr, self.as_str()) + terminate(ptr, self.as_str()) } else { // We didn't have sufficient excess capacity already, // so we need to do either a new stack allocation or a new @@ -449,7 +470,7 @@ impl RocStr { // No need to do a heap allocation for an empty string - we // can just do a stack allocation that will live for the // duration of the function. - Ok(func([E::zero()].as_mut_ptr() as *mut E, "")) + func([terminator].as_mut_ptr() as *mut E, "") } } } @@ -463,7 +484,7 @@ impl RocStr { let available_bytes = size_of::(); if needed_bytes < available_bytes { - nul_terminate(small_str.bytes.as_ptr() as *mut E, self.as_str()) + terminate(small_str.bytes.as_ptr() as *mut E, self.as_str()) } else { fallback(self.as_str()) } diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index b3152063fa..604f853b73 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -165,7 +165,7 @@ mod temp_c_str { // temp_c_utf8 { let roc_str = RocStr::from(string); - let answer = roc_str.temp_c_utf8(|ptr, len| { + let answer = roc_str.utf8_nul_terminated(|ptr, len| { let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; let c_str = CStr::from_bytes_with_nul(bytes).unwrap(); @@ -180,7 +180,7 @@ mod temp_c_str { // temp_c_utf16 { let roc_str = RocStr::from(string); - let answer = roc_str.temp_c_utf16(|ptr, len| { + let answer = roc_str.utf16_nul_terminated(|ptr, len| { let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; // Verify that it's nul-terminated From 872bceabe407a3654e5faf9351bb39ab241184b2 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Mon, 6 Jun 2022 20:21:11 -0400 Subject: [PATCH 34/65] Drop obsolete UnicodeCodePoint trait --- roc_std/src/roc_str.rs | 53 ------------------------------------------ 1 file changed, 53 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index a67632866c..e0bac40146 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -680,56 +680,3 @@ impl Hash for RocStr { self.as_str().hash(state) } } - -/// This is a struct that cannot be instantiated outside this module. -/// Its purpose is to guarantee that the UnicodeCodePoint trait is -/// only implemented for integers of 32 bits or fewer. -#[repr(transparent)] -pub struct NotExtensible; - -pub trait UnicodeCodePoint { - fn get_zero() -> (NotExtensible, Self); - - fn zero() -> Self - where - Self: Sized, - { - Self::get_zero().1 - } -} - -impl UnicodeCodePoint for u8 { - fn get_zero() -> (NotExtensible, Self) { - (NotExtensible, 0) - } -} - -impl UnicodeCodePoint for i8 { - fn get_zero() -> (NotExtensible, Self) { - (NotExtensible, 0) - } -} - -impl UnicodeCodePoint for u16 { - fn get_zero() -> (NotExtensible, Self) { - (NotExtensible, 0) - } -} - -impl UnicodeCodePoint for i16 { - fn get_zero() -> (NotExtensible, Self) { - (NotExtensible, 0) - } -} - -impl UnicodeCodePoint for u32 { - fn get_zero() -> (NotExtensible, Self) { - (NotExtensible, 0) - } -} - -impl UnicodeCodePoint for i32 { - fn get_zero() -> (NotExtensible, Self) { - (NotExtensible, 0) - } -} From 8755a390eb8552e6eece62627326d7edcd2047ed Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Mon, 6 Jun 2022 20:32:31 -0400 Subject: [PATCH 35/65] clippy --- roc_std/src/lib.rs | 20 ++++++++++---------- roc_std/src/roc_box.rs | 8 +++----- roc_std/src/roc_list.rs | 16 ++++++---------- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 2651f4c6d1..3a653da53e 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -347,13 +347,13 @@ impl RocDec { } #[cfg(not(feature = "no_std"))] - fn to_str_helper(&self, bytes: &mut [u8; Self::MAX_STR_LENGTH]) -> usize { + fn to_str_helper(self, bytes: &mut [u8; Self::MAX_STR_LENGTH]) -> usize { // TODO there is probably some way to implement this logic without std::io::Write, // which in turn would make this method work with no_std. use std::io::Write; if self.as_i128() == 0 { - write!(&mut bytes[..], "{}", "0").unwrap(); + write!(&mut bytes[..], "0").unwrap(); return 1; } @@ -372,17 +372,17 @@ impl RocDec { // If self represents 1234.5678, then bytes is b"1234567800000000000000". let mut i = Self::MAX_STR_LENGTH - 1; // Find the last place where we have actual data. - while bytes[i] == 0 { - i = i - 1; + while bytes[i] == b'0' { + i -= 1; } // At this point i is 21 because bytes[21] is the final '0' in b"1234567800000000000000". let decimal_location = i - Self::DECIMAL_PLACES + 1 + is_negative; // decimal_location = 4 - while bytes[i] == ('0' as u8) && i >= decimal_location { + while bytes[i] == 0 && i >= decimal_location { bytes[i] = 0; - i = i - 1; + i -= 1; } // Now i = 7, because bytes[7] = '8', and bytes = b"12345678" @@ -395,12 +395,12 @@ impl RocDec { let ret = i + 1; while i >= decimal_location { bytes[i + 1] = bytes[i]; - i = i - 1; + i -= 1; } bytes[i + 1] = bytes[i]; // Now i = 4, and bytes = b"123455678" - bytes[decimal_location] = '.' as u8; + bytes[decimal_location] = b'.'; // Finally bytes = b"1234.5678" ret + 1 @@ -408,7 +408,7 @@ impl RocDec { #[cfg(not(feature = "no_std"))] // to_str_helper currently uses std, but might not need to. pub fn to_str(&self) -> RocStr { - let mut bytes = [0 as u8; Self::MAX_STR_LENGTH]; + let mut bytes = [0; Self::MAX_STR_LENGTH]; let last_idx = self.to_str_helper(&mut bytes); unsafe { RocStr::from_slice_unchecked(&bytes[0..last_idx]) } } @@ -417,7 +417,7 @@ impl RocDec { #[cfg(not(feature = "no_std"))] // to_str_helper currently uses std, but might not need to. impl fmt::Display for RocDec { fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut bytes = [0 as u8; Self::MAX_STR_LENGTH]; + let mut bytes = [0; Self::MAX_STR_LENGTH]; let last_idx = self.to_str_helper(&mut bytes); let result = unsafe { str::from_utf8_unchecked(&bytes[0..last_idx]) }; write!(fmtr, "{}", result) diff --git a/roc_std/src/roc_box.rs b/roc_std/src/roc_box.rs index 30e3c45875..b1d465a451 100644 --- a/roc_std/src/roc_box.rs +++ b/roc_std/src/roc_box.rs @@ -166,11 +166,9 @@ impl Drop for RocBox { alignment as u32, ); } - } else { - if !new_storage.is_readonly() { - // Write the storage back. - storage.set(new_storage); - } + } else if !new_storage.is_readonly() { + // Write the storage back. + storage.set(new_storage); } } } diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index d60c8d21cb..252bc0d67f 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -188,11 +188,9 @@ where alignment as u32, ); } - } else { - if !new_storage.is_readonly() { - // Write the storage back. - storage.set(new_storage); - } + } else if !new_storage.is_readonly() { + // Write the storage back. + storage.set(new_storage); } } } @@ -432,11 +430,9 @@ impl Drop for RocList { alignment as u32, ); } - } else { - if !new_storage.is_readonly() { - // Write the storage back. - storage.set(new_storage); - } + } else if !new_storage.is_readonly() { + // Write the storage back. + storage.set(new_storage); } } } From 4f8cea7c7a3942a49ab935d99c7b95631afaa82c Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Mon, 6 Jun 2022 20:39:58 -0400 Subject: [PATCH 36/65] Rename a constant --- roc_std/src/roc_str.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index e0bac40146..dd171718c5 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -22,11 +22,11 @@ macro_rules! with_stack_bytes { { use $crate::RocStr; - if $len < RocStr::TEMP_CSTR_MAX_STACK_BYTES { + if $len < RocStr::TEMP_STR_MAX_STACK_BYTES { // TODO: once https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.uninit_array // has become stabilized, use that here in order to do a precise // stack allocation instead of always over-allocating to 64B. - let mut bytes: MaybeUninit<[u8; RocStr::TEMP_CSTR_MAX_STACK_BYTES]> = + let mut bytes: MaybeUninit<[u8; RocStr::TEMP_STR_MAX_STACK_BYTES]> = MaybeUninit::uninit(); $closure(bytes.as_mut_ptr() as *mut $elem_type) @@ -168,9 +168,10 @@ impl RocStr { } } - // If the string is under this many bytes, the into_temp_c_str method will allocate the - // C string on the stack when the RocStr is non-unique. - const TEMP_CSTR_MAX_STACK_BYTES: usize = 64; + // If the string is under this many bytes, the with_terminator family + // of methods will allocate the terminated string on the stack when + // the RocStr is non-unique. + const TEMP_STR_MAX_STACK_BYTES: usize = 64; /// Like with_utf8_terminator, except it can fail because a RocStr may /// contain \0 characters, which a nul-terminated string must not. From 96fddac6ebf1b20f38272721479b5891c7939f44 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Mon, 6 Jun 2022 20:40:59 -0400 Subject: [PATCH 37/65] Improve some documentation and comments --- roc_std/src/roc_str.rs | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index dd171718c5..2d43ed06a7 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -173,8 +173,9 @@ impl RocStr { // the RocStr is non-unique. const TEMP_STR_MAX_STACK_BYTES: usize = 64; - /// Like with_utf8_terminator, except it can fail because a RocStr may - /// contain \0 characters, which a nul-terminated string must not. + /// Like calling with_utf8_terminator passing \0 for the terminator, + /// except it can fail because a RocStr may contain \0 characters, + /// which a nul-terminated string must not. pub fn utf8_nul_terminated T>( self, func: F, @@ -187,7 +188,7 @@ impl RocStr { } /// Turn this RocStr into a UTF-8 `*mut u8`, terminate it with the given character - /// (almost always either `b'\n'` or b`\0`) and then provide access to that + /// (commonly either `b'\n'` or b`\0`) and then provide access to that /// `*mut u8` (as well as its length) for the duration of a given function. This is /// designed to be an efficient way to turn a `RocStr` received from an application into /// either the nul-terminated UTF-8 `char*` needed by UNIX syscalls, or into a @@ -203,10 +204,10 @@ impl RocStr { /// string with non-unique refcount. (It will do a stack allocation if the string is under /// 64 bytes; the stack allocation will only live for the duration of the called function.) /// - /// Because this works on an owned RocStr, it's able to overwrite the underlying bytes - /// to nul-terminate the string in-place. Small strings have an extra byte at the end + /// If the given (owned) RocStr is unique, this can overwrite the underlying bytes + /// to terminate the string in-place. Small strings have an extra byte at the end /// where the length is stored, which can be replaced with the terminator. Heap-allocated - /// strings can have excess capacity which can hold a termiator, or if they have no + /// strings can have excess capacity which can hold a terminator, or if they have no /// excess capacity, all the bytes can be shifted over the refcount in order to free up /// a `usize` worth of free space at the end - which can easily fit a 1-byte terminator. pub fn with_utf8_terminator T>(self, terminator: u8, func: F) -> T { @@ -244,8 +245,8 @@ impl RocStr { // First, copy the bytes over the original allocation - effectively // shifting everything over by one `usize`. Now we no longer have a - // refcount (but the C string won't use that anyway), but we do have a - // free `usize` at the end. + // refcount (but the terminated won't use that anyway), but we do + // have a free `usize` at the end. // // IMPORTANT: Must use ptr::copy instead of ptr::copy_nonoverlapping // because the regions definitely overlap! @@ -259,7 +260,6 @@ impl RocStr { Some(_) => { let len = roc_list.len(); - // The backing list was not unique, so we can't mutate it in-place. // The backing list was not unique, so we can't mutate it in-place. with_stack_bytes!(len, u8, |alloc_ptr| { let alloc_ptr = alloc_ptr as *mut u8; @@ -286,14 +286,15 @@ impl RocStr { let mut bytes = small_str.bytes; // Even if the small string is at capacity, there will be room to write - // a nul terminator in the byte that's used to store the length. + // a terminator in the byte that's used to store the length. terminate(bytes.as_mut_ptr() as *mut u8, small_str.len()) } } } - /// Like with_utf16_terminator, except it can fail because a RocStr may - /// contain \0 characters, which a nul-terminated string must not. + /// Like calling with_utf16_terminator passing \0 for the terminator, + /// except it can fail because a RocStr may contain \0 characters, + /// which a nul-terminated string must not. pub fn utf16_nul_terminated T>( self, func: F, @@ -311,7 +312,7 @@ impl RocStr { /// the nul-terminated UTF-16 `wchar_t*` needed by Windows API calls. /// /// **NOTE:** The length passed to the function is the same value that `RocStr::len` will - /// return; it does not count the nul terminator. So to convert it to a nul-terminated + /// return; it does not count the terminator. So to convert it to a nul-terminated /// slice of Rust bytes, call `slice::from_raw_parts` passing the given length + 1. /// /// This operation achieves efficiency by reusing allocated bytes from the RocStr itself, @@ -323,9 +324,9 @@ impl RocStr { /// Because this works on an owned RocStr, it's able to overwrite the underlying bytes /// to nul-terminate the string in-place. Small strings have an extra byte at the end /// where the length is stored, which can become 0 for nul-termination. Heap-allocated - /// strings can have excess capacity which can hold a nul termiator, or if they have no + /// strings can have excess capacity which can hold a termiator, or if they have no /// excess capacity, all the bytes can be shifted over the refcount in order to free up - /// a `usize` worth of free space at the end - which can easily fit a nul terminator. + /// a `usize` worth of free space at the end - which can easily fit a terminator. /// /// This operation can fail because a RocStr may contain \0 characters, which a /// nul-terminated string must not. @@ -437,7 +438,7 @@ impl RocStr { Some(storage) if storage.is_unique() => { // The backing RocList was unique, so we can mutate it in-place. - // We need 1 extra elem for the nul terminator. It must be an elem, + // We need 1 extra elem for the terminator. It must be an elem, // not a byte, because we'll be providing a pointer to elems. let needed_bytes = (len + 1) * size_of::(); @@ -450,7 +451,7 @@ impl RocStr { // We happen to have sufficient excess capacity already, // so we will be able to write the UTF-16 chars as well as - // the nul terminator into the existing allocation. + // the terminator into the existing allocation. let ptr = roc_list.ptr_to_allocation() as *mut E; terminate(ptr, self.as_str()) @@ -479,7 +480,7 @@ impl RocStr { RocStrInnerRef::SmallString(small_str) => { let len = small_str.len(); - // We need 1 extra elem for the nul terminator. It must be an elem, + // We need 1 extra elem for the terminator. It must be an elem, // not a byte, because we'll be providing a pointer to elems. let needed_bytes = (len + 1) * size_of::(); let available_bytes = size_of::(); @@ -527,7 +528,7 @@ impl TryFrom for RocStr { #[cfg(not(feature = "no_std"))] /// Like https://doc.rust-lang.org/std/ffi/struct.NulError.html but -/// only for interior nulls, not for missing nul terminators. +/// only for interior nuls, not for missing nul terminators. #[derive(Debug, Clone, PartialEq, Eq)] pub struct InteriorNulError { pub pos: usize, From 534bf35d59b430a156a94c78fd0debf493bbbf0d Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Mon, 6 Jun 2022 20:57:40 -0400 Subject: [PATCH 38/65] Use Self::alloc_alignment to find allocation pointer This should avoid problems where the element size is smaller than the size of the storage (e.g. a list of u8s) --- roc_std/src/roc_list.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 252bc0d67f..9dc53d53b1 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -102,7 +102,13 @@ impl RocList { #[inline(always)] fn elements_and_storage(&self) -> Option<(NonNull>, &Cell)> { let elements = self.elements?; - let storage = unsafe { &*elements.as_ptr().cast::>().sub(1) }; + let storage = unsafe { + &*elements + .as_ptr() + .cast::() + .sub(Self::alloc_alignment()) + .cast::>() + }; Some((elements, storage)) } @@ -118,7 +124,11 @@ impl RocList { /// Useful for doing memcpy on the underlying allocation. Returns NULL if list is empty. pub(crate) unsafe fn ptr_to_allocation(&self) -> *const c_void { - unsafe { (self.ptr_to_first_elem() as *const usize).sub(1).cast() } + unsafe { + (self.ptr_to_first_elem() as *const u8) + .sub(Self::alloc_alignment()) + .cast() + } } } From 091412dc4a4eb29dd0d6c9c3c4b6a51d2ac56264 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 7 Jun 2022 00:51:12 -0400 Subject: [PATCH 39/65] Have to_str_helper return &str --- roc_std/src/lib.rs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 3a653da53e..76e8387869 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -347,15 +347,13 @@ impl RocDec { } #[cfg(not(feature = "no_std"))] - fn to_str_helper(self, bytes: &mut [u8; Self::MAX_STR_LENGTH]) -> usize { + fn to_str_helper(self, bytes: &mut [u8; Self::MAX_STR_LENGTH]) -> &str { // TODO there is probably some way to implement this logic without std::io::Write, // which in turn would make this method work with no_std. use std::io::Write; if self.as_i128() == 0 { - write!(&mut bytes[..], "0").unwrap(); - - return 1; + return "0"; } let is_negative = (self.as_i128() < 0) as usize; @@ -389,7 +387,7 @@ impl RocDec { if i < decimal_location { // This means that we've removed trailing zeros and are left with an integer. Our // convention is to print these without a decimal point or trailing zeros, so we're done. - return i + 1; + return unsafe { str::from_utf8_unchecked(&bytes[0..i + 1]) }; } let ret = i + 1; @@ -403,24 +401,23 @@ impl RocDec { bytes[decimal_location] = b'.'; // Finally bytes = b"1234.5678" - ret + 1 + unsafe { str::from_utf8_unchecked(&bytes[0..ret + 1]) } } #[cfg(not(feature = "no_std"))] // to_str_helper currently uses std, but might not need to. pub fn to_str(&self) -> RocStr { let mut bytes = [0; Self::MAX_STR_LENGTH]; - let last_idx = self.to_str_helper(&mut bytes); - unsafe { RocStr::from_slice_unchecked(&bytes[0..last_idx]) } + + RocStr::from(self.to_str_helper(&mut bytes)) } } #[cfg(not(feature = "no_std"))] // to_str_helper currently uses std, but might not need to. impl fmt::Display for RocDec { - fn fmt(&self, fmtr: &mut fmt::Formatter<'_>) -> fmt::Result { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut bytes = [0; Self::MAX_STR_LENGTH]; - let last_idx = self.to_str_helper(&mut bytes); - let result = unsafe { str::from_utf8_unchecked(&bytes[0..last_idx]) }; - write!(fmtr, "{}", result) + + f.write_str(self.to_str_helper(&mut bytes)) } } From 4238e309cdcf944671f9830e9d6e60decc41fc5a Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 7 Jun 2022 13:42:00 -0400 Subject: [PATCH 40/65] Fix RocDec::to_str implementation Made some changes to appease clippy, and they accidentally caused a regression. --- roc_std/src/lib.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 76e8387869..1ea6ca8d51 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -356,8 +356,6 @@ impl RocDec { return "0"; } - let is_negative = (self.as_i128() < 0) as usize; - // The :019 in the following write! is computed as Self::DECIMAL_PLACES + 1. If you change // Self::DECIMAL_PLACES, this assert should remind you to change that format string as // well. @@ -370,16 +368,17 @@ impl RocDec { // If self represents 1234.5678, then bytes is b"1234567800000000000000". let mut i = Self::MAX_STR_LENGTH - 1; // Find the last place where we have actual data. - while bytes[i] == b'0' { + while bytes[i] == 0 { i -= 1; } // At this point i is 21 because bytes[21] is the final '0' in b"1234567800000000000000". - let decimal_location = i - Self::DECIMAL_PLACES + 1 + is_negative; + let is_negative = self.as_i128() < 0; + let decimal_location = i - Self::DECIMAL_PLACES + 1 + (is_negative as usize); // decimal_location = 4 - while bytes[i] == 0 && i >= decimal_location { - bytes[i] = 0; + while bytes[i] == b'0' && i >= decimal_location { + bytes[i] = b'0'; i -= 1; } // Now i = 7, because bytes[7] = '8', and bytes = b"12345678" From 6d259ebd458fb07e4bd814336b84a9affc9f826b Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 7 Jun 2022 14:12:40 -0400 Subject: [PATCH 41/65] Fix some RocList::reserve logic --- roc_std/src/roc_list.rs | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 9dc53d53b1..84f8f0f24c 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -235,34 +235,39 @@ where pub fn extend_from_slice(&mut self, slice: &[T]) { // TODO: Can we do better for ZSTs? Alignment might be a problem. - if slice.is_empty() { return; } let alignment = Self::alloc_alignment(); let elements_offset = alignment; - let new_size = elements_offset + mem::size_of::() * (self.len() + slice.len()); - let non_null_elements = if let Some((elements, storage)) = self.elements_and_storage() { // Decrement the list's refence count. let mut copy = storage.get(); let is_unique = copy.decrease(); if is_unique { - // If the memory is not shared, we can reuse the memory. - let old_size = elements_offset + (mem::size_of::() * self.len()); + // If we have enough capacity, we can add to the existing elements in-place. + if self.capacity() >= new_size { + elements + } else { + // There wasn't enough capacity, so we need a new allocation. + // Since this is a unique RocList, we can use realloc here. + let old_size = elements_offset + (mem::size_of::() * self.len()); - let new_ptr = unsafe { - let ptr = elements.as_ptr().cast::().sub(alignment).cast(); + let new_ptr = unsafe { + let ptr = elements.as_ptr().cast::().sub(alignment).cast(); - roc_realloc(ptr, new_size, old_size, alignment as u32).cast() - }; + roc_realloc(ptr, new_size, old_size, alignment as u32).cast() + }; - Self::elems_from_allocation(NonNull::new(new_ptr).unwrap_or_else(|| { - todo!("Reallocation failed"); - })) + self.capacity = new_size; + + Self::elems_from_allocation(NonNull::new(new_ptr).unwrap_or_else(|| { + todo!("Reallocation failed"); + })) + } } else { if !copy.is_readonly() { // Write the decremented reference count back. From 5a3ace1ce1b4d5c59faaf922f73f9eb7b157eab5 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 7 Jun 2022 14:21:07 -0400 Subject: [PATCH 42/65] Make RocList::reserve take &mut instead of owned --- roc_std/src/roc_list.rs | 19 ++++++++----------- roc_std/src/roc_str.rs | 23 +++++++++++------------ roc_std/tests/test_roc_std.rs | 35 ++++++++++++++++++++++++----------- 3 files changed, 43 insertions(+), 34 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 84f8f0f24c..1423f15875 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -139,7 +139,7 @@ where /// Increase a RocList's capacity by at least the requested number of elements (possibly more). /// /// May return a new RocList, if the provided one was not unique. - pub fn reserve(self, num_elems: usize) -> Self { + pub fn reserve(&mut self, num_elems: usize) { let new_elems; let old_elements_ptr; @@ -159,7 +159,7 @@ where if new_ptr == original_ptr as *mut _ { // We successfully reallocated in-place; we're done! - return self; + return; } else { // We got back a different allocation; copy the existing elements // into it. We don't need to increment their refcounts because @@ -206,7 +206,9 @@ where } None => { // This is an empty list, so `reserve` is the same as `with_capacity`. - return Self::with_capacity(num_elems); + *self = Self::with_capacity(num_elems); + + return; } } @@ -215,16 +217,11 @@ where copy_nonoverlapping(old_elements_ptr, new_elems.as_ptr(), self.length); } - let length = self.length; - - // We already manually cleaned up `self` by now, so don't let its Drop run too. - mem::forget(self); - - Self { + *self = Self { elements: Some(new_elems), - length, + length: self.length, capacity: num_elems, - } + }; } pub fn from_slice(slice: &[T]) -> Self { diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 2d43ed06a7..913ee30f83 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -133,30 +133,29 @@ impl RocStr { /// Increase a RocStr's capacity by at least the requested number of bytes (possibly more). /// /// May return a new RocStr, if the provided one was not unique. - pub fn reserve(mut self, bytes: usize) -> Self { + pub fn reserve(&mut self, bytes: usize) { if self.is_small_str() { let small_str = unsafe { self.0.small_string }; let target_cap = small_str.len() + bytes; - if target_cap <= SmallString::CAPACITY { - // The small string already has enough capacity; return it unmodified. - self - } else { + if target_cap > SmallString::CAPACITY { // The requested capacity won't fit in a small string; we need to go big. let mut roc_list = RocList::with_capacity(target_cap); - roc_list.extend_from_slice(&small_str.bytes); + roc_list.extend_from_slice(&small_str.as_bytes()); - RocStr(RocStrInner { + *self = RocStr(RocStrInner { heap_allocated: ManuallyDrop::new(roc_list), - }) + }); } } else { - let roc_list = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) }; + let mut roc_list = unsafe { ManuallyDrop::take(&mut self.0.heap_allocated) }; - RocStr(RocStrInner { - heap_allocated: ManuallyDrop::new(roc_list.reserve(bytes)), - }) + roc_list.reserve(bytes); + + *self = RocStr(RocStrInner { + heap_allocated: ManuallyDrop::new(roc_list), + }); } } diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index 604f853b73..dc95a75190 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -121,6 +121,15 @@ mod test_roc_std { assert_eq!(string.capacity(), super::ROC_SMALL_STR_CAPACITY); } + #[test] + fn reserve() { + let mut roc_str = RocStr::empty(); + + roc_str.reserve(42); + + assert_eq!(roc_str.capacity(), 42); + } + #[test] fn roc_result_to_rust_result() { let greeting = "Hello, World!"; @@ -156,16 +165,21 @@ mod test_roc_std { } #[cfg(test)] -mod temp_c_str { +mod with_terminator { use core::slice; use roc_std::RocStr; use std::ffi::CStr; - fn verify_temp_c(string: &str) { - // temp_c_utf8 + fn verify_temp_c(string: &str, excess_capacity: usize) { + let mut roc_str = RocStr::from(string); + + if excess_capacity > 0 { + roc_str.reserve(excess_capacity); + } + + // utf8_nul_terminated { - let roc_str = RocStr::from(string); - let answer = roc_str.utf8_nul_terminated(|ptr, len| { + let answer = roc_str.clone().utf8_nul_terminated(|ptr, len| { let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; let c_str = CStr::from_bytes_with_nul(bytes).unwrap(); @@ -177,9 +191,8 @@ mod temp_c_str { assert_eq!(Ok(42), answer); } - // temp_c_utf16 + // utf16_nul_terminated { - let roc_str = RocStr::from(string); let answer = roc_str.utf16_nul_terminated(|ptr, len| { let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; @@ -199,7 +212,7 @@ mod temp_c_str { #[test] fn empty_string() { - verify_temp_c(""); + verify_temp_c("", 0); } /// e.g. "1" or "12" or "12345" etc. @@ -217,16 +230,16 @@ mod temp_c_str { #[test] fn small_strings() { for len in 1..=super::ROC_SMALL_STR_CAPACITY { - verify_temp_c(&string_for_len(len)); + verify_temp_c(&string_for_len(len), 0); } } #[test] fn no_excess_capacity() { // This is small enough that it should be a stack allocation for UTF-8 - verify_temp_c(&string_for_len(33)); + verify_temp_c(&string_for_len(33), 0); // This is big enough that it should be a heap allocation for UTF-8 and UTF-16 - verify_temp_c(&string_for_len(65)); + verify_temp_c(&string_for_len(65), 0); } } From 8e67c6a6449dd008cbf63628f181c31333ebbfc0 Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 8 Jun 2022 15:45:24 +0200 Subject: [PATCH 43/65] no_std decimal formatting --- Cargo.lock | 1 + roc_std/Cargo.lock | 7 ++++ roc_std/Cargo.toml | 1 + roc_std/src/lib.rs | 73 ++++++++++++++--------------------- roc_std/tests/test_roc_std.rs | 18 +++++++++ 5 files changed, 57 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 57203a4c28..8aa5f66c8b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4110,6 +4110,7 @@ dependencies = [ name = "roc_std" version = "0.1.0" dependencies = [ + "arrayvec 0.7.2", "static_assertions 0.1.1", ] diff --git a/roc_std/Cargo.lock b/roc_std/Cargo.lock index fe1a8e26d8..0295a22bc4 100644 --- a/roc_std/Cargo.lock +++ b/roc_std/Cargo.lock @@ -20,6 +20,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + [[package]] name = "cfg-if" version = "1.0.0" @@ -193,6 +199,7 @@ checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" name = "roc_std" version = "0.1.0" dependencies = [ + "arrayvec", "indoc", "libc", "pretty_assertions", diff --git a/roc_std/Cargo.toml b/roc_std/Cargo.toml index 20b9cb6fd6..f296f56a0a 100644 --- a/roc_std/Cargo.toml +++ b/roc_std/Cargo.toml @@ -10,6 +10,7 @@ version = "0.1.0" [dependencies] static_assertions = "0.1" +arrayvec = "0.7.2" [dev-dependencies] indoc = "1.0.3" diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 1ea6ca8d51..76b534604c 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -9,6 +9,8 @@ use core::mem::{ManuallyDrop, MaybeUninit}; use core::ops::Drop; use core::str; +use arrayvec::ArrayString; + mod roc_box; mod roc_list; mod roc_str; @@ -346,77 +348,62 @@ impl RocDec { self.0 } - #[cfg(not(feature = "no_std"))] - fn to_str_helper(self, bytes: &mut [u8; Self::MAX_STR_LENGTH]) -> &str { - // TODO there is probably some way to implement this logic without std::io::Write, - // which in turn would make this method work with no_std. - use std::io::Write; + fn to_str_helper(self, string: &mut ArrayString<{ Self::MAX_STR_LENGTH }>) -> &str { + use std::fmt::Write; if self.as_i128() == 0 { return "0"; } // The :019 in the following write! is computed as Self::DECIMAL_PLACES + 1. If you change - // Self::DECIMAL_PLACES, this assert should remind you to change that format string as - // well. + // Self::DECIMAL_PLACES, this assert should remind you to change that format string as well. static_assertions::const_assert!(Self::DECIMAL_PLACES + 1 == 19); // By using the :019 format, we're guaranteeing that numbers less than 1, say 0.01234 - // get their leading zeros placed in bytes for us. i.e. bytes = b"0012340000000000000" - write!(&mut bytes[..], "{:019}", self.as_i128()).unwrap(); - - // If self represents 1234.5678, then bytes is b"1234567800000000000000". - let mut i = Self::MAX_STR_LENGTH - 1; - // Find the last place where we have actual data. - while bytes[i] == 0 { - i -= 1; - } - // At this point i is 21 because bytes[21] is the final '0' in b"1234567800000000000000". + // get their leading zeros placed in bytes for us. i.e. `string = b"0012340000000000000"` + write!(string, "{:019}", self.as_i128()).unwrap(); let is_negative = self.as_i128() < 0; - let decimal_location = i - Self::DECIMAL_PLACES + 1 + (is_negative as usize); - // decimal_location = 4 + let decimal_location = string.len() - Self::DECIMAL_PLACES + (is_negative as usize); - while bytes[i] == b'0' && i >= decimal_location { - bytes[i] = b'0'; - i -= 1; - } - // Now i = 7, because bytes[7] = '8', and bytes = b"12345678" + // skip trailing zeros + let last_nonzero_byte = string.trim_end_matches('0').len(); - if i < decimal_location { + if last_nonzero_byte < decimal_location { // This means that we've removed trailing zeros and are left with an integer. Our // convention is to print these without a decimal point or trailing zeros, so we're done. - return unsafe { str::from_utf8_unchecked(&bytes[0..i + 1]) }; + string.truncate(decimal_location); + return string.as_str(); } - let ret = i + 1; - while i >= decimal_location { - bytes[i + 1] = bytes[i]; - i -= 1; - } - bytes[i + 1] = bytes[i]; - // Now i = 4, and bytes = b"123455678" + // otherwise, we're dealing with a fraction, and need to insert the decimal dot + // truncate all extra zeros off + string.truncate(last_nonzero_byte); + + // push a dummy character so we have space for the decimal dot + string.push('$'); + + // Safety: at any time, the string only contains ascii characters, so it is always valid utf8 + let bytes = unsafe { string.as_bytes_mut() }; + + // shift the fractional part by one + bytes.copy_within(decimal_location..last_nonzero_byte, decimal_location + 1); + + // and put in the decimal dot in the right place bytes[decimal_location] = b'.'; - // Finally bytes = b"1234.5678" - unsafe { str::from_utf8_unchecked(&bytes[0..ret + 1]) } + string.as_str() } - #[cfg(not(feature = "no_std"))] // to_str_helper currently uses std, but might not need to. pub fn to_str(&self) -> RocStr { - let mut bytes = [0; Self::MAX_STR_LENGTH]; - - RocStr::from(self.to_str_helper(&mut bytes)) + RocStr::from(self.to_str_helper(&mut ArrayString::new())) } } -#[cfg(not(feature = "no_std"))] // to_str_helper currently uses std, but might not need to. impl fmt::Display for RocDec { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut bytes = [0; Self::MAX_STR_LENGTH]; - - f.write_str(self.to_str_helper(&mut bytes)) + f.write_str(self.to_str_helper(&mut ArrayString::new())) } } diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index dc95a75190..f356df3d7e 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -59,6 +59,7 @@ pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut #[cfg(test)] mod test_roc_std { + use roc_std::RocDec; use roc_std::RocResult; use roc_std::RocStr; @@ -162,6 +163,23 @@ mod test_roc_std { assert!(!roc_result.is_ok()); assert!(roc_result.is_err()); } + + #[test] + fn roc_dec_fmt() { + assert_eq!( + format!("{}", RocDec::MIN), + "-1701411834604692317316.87303715884105728" + ); + + let half = RocDec::from_str("0.5").unwrap(); + assert_eq!(format!("{}", half), "0.5"); + + let ten = RocDec::from_str("10").unwrap(); + assert_eq!(format!("{}", ten), "10"); + + let example = RocDec::from_str("1234.5678").unwrap(); + assert_eq!(format!("{}", example), "1234.5678"); + } } #[cfg(test)] From 8336f118f6b31b527586f4dd8083f393e25f81bc Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 7 Jun 2022 21:05:08 -0400 Subject: [PATCH 44/65] Add Display impl for RocStr --- roc_std/src/roc_str.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 913ee30f83..31d9c8a358 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -2,7 +2,7 @@ use core::{ convert::TryFrom, - fmt::Debug, + fmt, hash::Hash, mem::{size_of, ManuallyDrop}, ops::{Deref, DerefMut}, @@ -233,12 +233,12 @@ impl RocStr { let len = roc_list.len(); let ptr = if len < roc_list.capacity() { // We happen to have excess capacity already, so we will be able - // to write the \0 into the first byte of excess capacity. + // to write the terminator into the first byte of excess capacity. roc_list.ptr_to_first_elem() as *mut u8 } else { // We always have an allocation that's even bigger than necessary, // because the refcount bytes take up more than the 1B needed for - // the \0 at the end. We just need to shift the bytes over on top + // the terminator. We just need to shift the bytes over on top // of the refcount. let alloc_ptr = roc_list.ptr_to_allocation() as *mut u8; @@ -566,7 +566,13 @@ impl Ord for RocStr { } } -impl Debug for RocStr { +impl fmt::Debug for RocStr { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.deref().fmt(f) + } +} + +impl fmt::Display for RocStr { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { self.deref().fmt(f) } From 7537f4e4c0ec692ad83121402f66387e0656c303 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 7 Jun 2022 21:05:26 -0400 Subject: [PATCH 45/65] Make more human-readable test strings --- roc_std/tests/test_roc_std.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index f356df3d7e..b37bb84efa 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -233,9 +233,16 @@ mod with_terminator { verify_temp_c("", 0); } - /// e.g. "1" or "12" or "12345" etc. + /// e.g. "a" or "abc" or "abcdefg" etc. fn string_for_len(len: usize) -> String { - let bytes: Vec = (1..=len as u8).collect(); + let first_index: usize = 97; // start with ASCII lowercase "a" + let bytes: Vec = (0..len) + .map(|index| { + let letter = (index % 26) + first_index; + + letter.try_into().unwrap() + }) + .collect(); assert_eq!(bytes.len(), len); From 083603ad099cc9988fabaf27e4564ab35bec6458 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Tue, 7 Jun 2022 21:38:24 -0400 Subject: [PATCH 46/65] Use RocList::ptr_to_allocation more consistently --- roc_std/src/roc_list.rs | 72 +++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 43 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 1423f15875..3aa55d635b 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -22,8 +22,8 @@ pub struct RocList { impl RocList { #[inline(always)] - fn alloc_alignment() -> usize { - mem::align_of::().max(mem::align_of::()) + fn alloc_alignment() -> u32 { + mem::align_of::().max(mem::align_of::()) as u32 } pub fn empty() -> Self { @@ -36,17 +36,17 @@ impl RocList { /// Create an empty RocList with enough space preallocated to store /// the requested number of elements. - pub fn with_capacity(elems: usize) -> Self { + pub fn with_capacity(num_elems: usize) -> Self { Self { - elements: Some(Self::elems_with_capacity(elems)), + elements: Some(Self::elems_with_capacity(num_elems)), length: 0, - capacity: elems, + capacity: num_elems, } } - fn elems_with_capacity(elems: usize) -> NonNull> { - let alignment = Self::alloc_alignment(); - let alloc_ptr = unsafe { roc_alloc(elems * mem::size_of::(), alignment as u32) }; + fn elems_with_capacity(num_elems: usize) -> NonNull> { + let num_bytes = num_elems * mem::size_of::(); + let alloc_ptr = unsafe { roc_alloc(num_bytes, Self::alloc_alignment()) }; Self::elems_from_allocation(NonNull::new(alloc_ptr).unwrap_or_else(|| { todo!("Call roc_panic with the info that an allocation failed."); @@ -54,14 +54,10 @@ impl RocList { } fn elems_from_allocation(allocation: NonNull) -> NonNull> { - let alignment = Self::alloc_alignment(); let alloc_ptr = allocation.as_ptr(); unsafe { - let elem_ptr = alloc_ptr - .cast::() - .add(alignment) - .cast::>(); + let elem_ptr = Self::elem_ptr_from_alloc_ptr(alloc_ptr).cast::>(); // Initialize the reference count. alloc_ptr @@ -102,13 +98,7 @@ impl RocList { #[inline(always)] fn elements_and_storage(&self) -> Option<(NonNull>, &Cell)> { let elements = self.elements?; - let storage = unsafe { - &*elements - .as_ptr() - .cast::() - .sub(Self::alloc_alignment()) - .cast::>() - }; + let storage = unsafe { &*self.ptr_to_allocation().cast::>() }; Some((elements, storage)) } @@ -123,10 +113,19 @@ impl RocList { } /// Useful for doing memcpy on the underlying allocation. Returns NULL if list is empty. - pub(crate) unsafe fn ptr_to_allocation(&self) -> *const c_void { + pub(crate) unsafe fn ptr_to_allocation(&self) -> *mut c_void { unsafe { - (self.ptr_to_first_elem() as *const u8) - .sub(Self::alloc_alignment()) + self.ptr_to_first_elem() + .cast::() + .sub(Self::alloc_alignment() as usize) as *mut _ + } + } + + unsafe fn elem_ptr_from_alloc_ptr(alloc_ptr: *mut c_void) -> *mut c_void { + unsafe { + alloc_ptr + .cast::() + .add(Self::alloc_alignment() as usize) .cast() } } @@ -151,13 +150,13 @@ where // Try to reallocate in-place. let new_ptr = roc_realloc( - original_ptr as *mut _, + original_ptr, num_elems, self.length, - Self::alloc_alignment() as u32, + Self::alloc_alignment(), ); - if new_ptr == original_ptr as *mut _ { + if new_ptr == original_ptr { // We successfully reallocated in-place; we're done! return; } else { @@ -186,17 +185,11 @@ where let needs_dealloc = new_storage.decrease(); if needs_dealloc { - let alignment = Self::alloc_alignment(); - // Unlike in Drop, do *not* decrement the refcounts of all the elements! // The new allocation is referencing them, so instead of incrementing them all // all just to decrement them again here, we neither increment nor decrement them. unsafe { - // Release the memory. - roc_dealloc( - old_elements_ptr.cast::().sub(alignment).cast(), - alignment as u32, - ); + roc_dealloc(self.ptr_to_allocation(), Self::alloc_alignment()); } } else if !new_storage.is_readonly() { // Write the storage back. @@ -237,7 +230,7 @@ where } let alignment = Self::alloc_alignment(); - let elements_offset = alignment; + let elements_offset = alignment as usize; let new_size = elements_offset + mem::size_of::() * (self.len() + slice.len()); let non_null_elements = if let Some((elements, storage)) = self.elements_and_storage() { // Decrement the list's refence count. @@ -254,9 +247,7 @@ where let old_size = elements_offset + (mem::size_of::() * self.len()); let new_ptr = unsafe { - let ptr = elements.as_ptr().cast::().sub(alignment).cast(); - - roc_realloc(ptr, new_size, old_size, alignment as u32).cast() + roc_realloc(self.ptr_to_allocation(), new_size, old_size, alignment).cast() }; self.capacity = new_size; @@ -434,13 +425,8 @@ impl Drop for RocList { mem::drop::(ManuallyDrop::take(&mut *elem_ptr)); } - let alignment = Self::alloc_alignment(); - // Release the memory. - roc_dealloc( - elements.as_ptr().cast::().sub(alignment).cast(), - alignment as u32, - ); + roc_dealloc(self.ptr_to_allocation(), Self::alloc_alignment()); } } else if !new_storage.is_readonly() { // Write the storage back. From 4941fa3c37feae4d9b7682184806acc1145f4087 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Wed, 8 Jun 2022 09:18:58 -0400 Subject: [PATCH 47/65] Drop ManualyDrop from RocBox implementation --- roc_std/src/roc_box.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/roc_std/src/roc_box.rs b/roc_std/src/roc_box.rs index b1d465a451..a91a72d76e 100644 --- a/roc_std/src/roc_box.rs +++ b/roc_std/src/roc_box.rs @@ -1,19 +1,18 @@ #![deny(unsafe_op_in_unsafe_fn)] +use crate::{roc_alloc, roc_dealloc, storage::Storage}; use core::{ cell::Cell, cmp::{self, Ordering}, fmt::Debug, - mem::{self, ManuallyDrop}, + mem, ops::Deref, ptr::{self, NonNull}, }; -use crate::{roc_alloc, roc_dealloc, storage::Storage}; - #[repr(C)] pub struct RocBox { - contents: NonNull>, + contents: NonNull, } impl RocBox { @@ -35,9 +34,9 @@ impl RocBox { } let contents = unsafe { - let contents_ptr = ptr.cast::().add(alignment).cast::>(); + let contents_ptr = ptr.cast::().add(alignment).cast::(); - *contents_ptr = ManuallyDrop::new(contents); + *contents_ptr = contents; if true { todo!("Increment the refcount of `contents`, and also do that in RocList extend_from_slice."); @@ -156,7 +155,7 @@ impl Drop for RocBox { // Drop the stored contents. let contents_ptr = contents.as_ptr(); - mem::drop::(ManuallyDrop::take(&mut *contents_ptr)); + mem::drop::(ptr::read(contents_ptr)); let alignment = Self::alloc_alignment(); From 2547d84e75d50fddd717cd5d31b902ee5376841c Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Wed, 8 Jun 2022 09:19:07 -0400 Subject: [PATCH 48/65] Drop unnecessary note about refcounts Turns out this is not actually necessary because Clone bumps refcount now! --- roc_std/src/roc_box.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/roc_std/src/roc_box.rs b/roc_std/src/roc_box.rs index a91a72d76e..63da66a1b8 100644 --- a/roc_std/src/roc_box.rs +++ b/roc_std/src/roc_box.rs @@ -38,10 +38,6 @@ impl RocBox { *contents_ptr = contents; - if true { - todo!("Increment the refcount of `contents`, and also do that in RocList extend_from_slice."); - } - // We already verified that the original alloc pointer was non-null, // and this one is the alloc pointer with `alignment` bytes added to it, // so it should be non-null too. From d82cd51c519d1a82b7ff076e601f2e1568b7372b Mon Sep 17 00:00:00 2001 From: Folkert Date: Wed, 8 Jun 2022 22:06:27 +0200 Subject: [PATCH 49/65] fix incorrect allocation sizes in roc_str --- roc_std/src/roc_str.rs | 77 ++++++++++++++++++----------------- roc_std/tests/test_roc_std.rs | 8 ++-- 2 files changed, 45 insertions(+), 40 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 31d9c8a358..4913e9f685 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -17,33 +17,32 @@ use crate::RocList; #[repr(transparent)] pub struct RocStr(RocStrInner); -macro_rules! with_stack_bytes { - ($len:expr, $elem_type:ty, $closure:expr) => { - { - use $crate::RocStr; +fn with_stack_bytes(length: usize, closure: F) -> T +where + F: FnOnce(*mut E) -> T, +{ + use crate::{roc_alloc, roc_dealloc}; + use core::mem::MaybeUninit; - if $len < RocStr::TEMP_STR_MAX_STACK_BYTES { - // TODO: once https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.uninit_array - // has become stabilized, use that here in order to do a precise - // stack allocation instead of always over-allocating to 64B. - let mut bytes: MaybeUninit<[u8; RocStr::TEMP_STR_MAX_STACK_BYTES]> = - MaybeUninit::uninit(); + if length < RocStr::TEMP_STR_MAX_STACK_BYTES { + // TODO: once https://doc.rust-lang.org/std/mem/union.MaybeUninit.html#method.uninit_array + // has become stabilized, use that here in order to do a precise + // stack allocation instead of always over-allocating to 64B. + let mut bytes: MaybeUninit<[u8; RocStr::TEMP_STR_MAX_STACK_BYTES]> = MaybeUninit::uninit(); - $closure(bytes.as_mut_ptr() as *mut $elem_type) - } else { - let align = core::mem::align_of::<$elem_type>() as u32; - // The string is too long to stack-allocate, so - // do a heap allocation and then free it afterwards. - let ptr = roc_alloc($len, align) as *mut $elem_type; - let answer = $closure(ptr); + closure(bytes.as_mut_ptr() as *mut E) + } else { + let align = core::mem::align_of::() as u32; + // The string is too long to stack-allocate, so + // do a heap allocation and then free it afterwards. + let ptr = unsafe { roc_alloc(length, align) } as *mut E; + let answer = closure(ptr); - // Free the heap allocation. - roc_dealloc(ptr.cast(), align); + // Free the heap allocation. + unsafe { roc_dealloc(ptr.cast(), align) }; - answer - } - } - }; + answer + } } impl RocStr { @@ -142,7 +141,7 @@ impl RocStr { // The requested capacity won't fit in a small string; we need to go big. let mut roc_list = RocList::with_capacity(target_cap); - roc_list.extend_from_slice(&small_str.as_bytes()); + roc_list.extend_from_slice(small_str.as_bytes()); *self = RocStr(RocStrInner { heap_allocated: ManuallyDrop::new(roc_list), @@ -214,9 +213,6 @@ impl RocStr { // more efficient than that - due to knowing that it's already in UTF-8 and always // has room for a 1-byte terminator in the existing allocation (either in the refcount // bytes, or, in a small string, in the length at the end of the string). - use core::mem::MaybeUninit; - - use crate::{roc_alloc, roc_dealloc}; let terminate = |alloc_ptr: *mut u8, len: usize| unsafe { *(alloc_ptr.add(len)) = terminator; @@ -260,7 +256,8 @@ impl RocStr { let len = roc_list.len(); // The backing list was not unique, so we can't mutate it in-place. - with_stack_bytes!(len, u8, |alloc_ptr| { + // ask for `len + 1` to store the original string and the terminator + with_stack_bytes(len + 1, |alloc_ptr: *mut u8| { let alloc_ptr = alloc_ptr as *mut u8; let elem_ptr = roc_list.ptr_to_first_elem() as *mut u8; @@ -353,7 +350,7 @@ impl RocStr { if let Some(pos) = self.first_nul_byte() { Err(InteriorNulError { pos, roc_str: self }) } else { - let answer = self.with_terminator(0, |dest_ptr: *mut u16, str_slice: &str| { + let answer = self.with_terminator(0u16, |dest_ptr: *mut u16, str_slice: &str| { // Translate UTF-8 source bytes into UTF-16 and write them into the destination. for (index, mut wchar) in str_slice.encode_utf16().enumerate() { // Replace slashes with backslashes @@ -386,11 +383,11 @@ impl RocStr { /// /// use roc_std::{RocStr, InteriorNulError}; /// - /// pub fn temp_windows_path T>( + /// pub fn with_windows_path T>( /// roc_str: RocStr, /// func: F, /// ) -> Result { - /// roc_str.temp_nul_terminated(|dest_ptr: *mut u16, str_slice: &str| { + /// let answer = roc_str.with_terminator(0u16, |dest_ptr: *mut u16, str_slice: &str| { /// // Translate UTF-8 source bytes into UTF-16 and write them into the destination. /// for (index, mut wchar) in str_slice.encode_utf16().enumerate() { /// // Replace slashes with backslashes @@ -404,15 +401,17 @@ impl RocStr { /// } /// /// func(dest_ptr, str_slice.len()) - /// }) + /// }); + /// + /// Ok(answer) /// } pub fn with_terminator A>( self, terminator: E, func: F, ) -> A { - use crate::{roc_alloc, roc_dealloc, Storage}; - use core::mem::{align_of, MaybeUninit}; + use crate::Storage; + use core::mem::align_of; let terminate = |alloc_ptr: *mut E, str_slice: &str| unsafe { *(alloc_ptr.add(str_slice.len())) = terminator; @@ -422,8 +421,12 @@ impl RocStr { // When we don't have an existing allocation that can work, fall back on this. // It uses either a stack allocation, or, if that would be too big, a heap allocation. - let fallback = |str_slice: &str| unsafe { - with_stack_bytes!(str_slice.len(), E, |alloc_ptr: *mut E| { + let fallback = |str_slice: &str| { + // We need 1 extra elem for the terminator. It must be an elem, + // not a byte, because we'll be providing a pointer to elems. + let needed_bytes = (str_slice.len() + 1) * size_of::(); + + with_stack_bytes(needed_bytes, |alloc_ptr: *mut E| { terminate(alloc_ptr, str_slice) }) }; @@ -445,7 +448,7 @@ impl RocStr { // the bytes originally used for the refcount. let available_bytes = roc_list.capacity() + size_of::(); - if needed_bytes < available_bytes { + if dbg!(needed_bytes < available_bytes) { debug_assert!(align_of::() >= align_of::()); // We happen to have sufficient excess capacity already, diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index b37bb84efa..d7a0d5ee95 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -195,6 +195,8 @@ mod with_terminator { roc_str.reserve(excess_capacity); } + dbg!(roc_str.len()); + // utf8_nul_terminated { let answer = roc_str.clone().utf8_nul_terminated(|ptr, len| { @@ -212,7 +214,7 @@ mod with_terminator { // utf16_nul_terminated { let answer = roc_str.utf16_nul_terminated(|ptr, len| { - let bytes = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; + let bytes: &[u16] = unsafe { slice::from_raw_parts(ptr.cast(), len + 1) }; // Verify that it's nul-terminated assert_eq!(bytes[len], 0); @@ -261,8 +263,8 @@ mod with_terminator { #[test] fn no_excess_capacity() { - // This is small enough that it should be a stack allocation for UTF-8 - verify_temp_c(&string_for_len(33), 0); + // // This is small enough that it should be a stack allocation for UTF-8 + // verify_temp_c(&string_for_len(33), 0); // This is big enough that it should be a heap allocation for UTF-8 and UTF-16 verify_temp_c(&string_for_len(65), 0); From 058a9dd0e89480fb66f73dfb4dd02ad95c9a4b63 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 9 Jun 2022 08:50:47 -0400 Subject: [PATCH 50/65] Drop some dbg!s --- roc_std/src/roc_str.rs | 2 +- roc_std/tests/test_roc_std.rs | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 4913e9f685..e6fdc5c923 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -448,7 +448,7 @@ impl RocStr { // the bytes originally used for the refcount. let available_bytes = roc_list.capacity() + size_of::(); - if dbg!(needed_bytes < available_bytes) { + if needed_bytes < available_bytes { debug_assert!(align_of::() >= align_of::()); // We happen to have sufficient excess capacity already, diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index d7a0d5ee95..7f71cd454b 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -195,8 +195,6 @@ mod with_terminator { roc_str.reserve(excess_capacity); } - dbg!(roc_str.len()); - // utf8_nul_terminated { let answer = roc_str.clone().utf8_nul_terminated(|ptr, len| { From c34db827c3c914f7c8fb417fc2d20eae12834190 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 9 Jun 2022 22:15:07 -0400 Subject: [PATCH 51/65] Fix RocList::reserve --- roc_std/src/roc_list.rs | 54 +++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 3aa55d635b..1c9a15f020 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -44,9 +44,15 @@ impl RocList { } } + /// Used for both roc_alloc and roc_realloc - given the number of elements, + /// returns the number of bytes needed to allocate, taking into account both the + /// size of the elements as well as the size of Storage. + fn alloc_bytes(num_elems: usize) -> usize { + mem::size_of::() + (num_elems * mem::size_of::()) + } + fn elems_with_capacity(num_elems: usize) -> NonNull> { - let num_bytes = num_elems * mem::size_of::(); - let alloc_ptr = unsafe { roc_alloc(num_bytes, Self::alloc_alignment()) }; + let alloc_ptr = unsafe { roc_alloc(Self::alloc_bytes(num_elems), Self::alloc_alignment()) }; Self::elems_from_allocation(NonNull::new(alloc_ptr).unwrap_or_else(|| { todo!("Call roc_panic with the info that an allocation failed."); @@ -139,6 +145,7 @@ where /// /// May return a new RocList, if the provided one was not unique. pub fn reserve(&mut self, num_elems: usize) { + let new_len = num_elems + self.length; let new_elems; let old_elements_ptr; @@ -146,17 +153,18 @@ where Some((elements, storage)) => { if storage.get().is_unique() { unsafe { - let original_ptr = self.ptr_to_allocation(); + let old_alloc = self.ptr_to_allocation(); + old_elements_ptr = elements.as_ptr(); // Try to reallocate in-place. - let new_ptr = roc_realloc( - original_ptr, - num_elems, - self.length, + let new_alloc = roc_realloc( + old_alloc, + Self::alloc_bytes(new_len), + Self::alloc_bytes(self.capacity), Self::alloc_alignment(), ); - if new_ptr == original_ptr { + if new_alloc == old_alloc { // We successfully reallocated in-place; we're done! return; } else { @@ -165,11 +173,10 @@ where // The existing allocation that references to them is now gone and // no longer referencing them. new_elems = Self::elems_from_allocation( - NonNull::new(new_ptr).unwrap_or_else(|| { + NonNull::new(new_alloc).unwrap_or_else(|| { todo!("Reallocation failed"); }), ); - old_elements_ptr = elements.as_ptr(); } // Note that realloc automatically deallocates the old allocation, @@ -177,7 +184,7 @@ where } } else { // Make a new allocation - new_elems = Self::elems_with_capacity(num_elems); + new_elems = Self::elems_with_capacity(new_len); old_elements_ptr = elements.as_ptr(); // Decrease the current allocation's reference count. @@ -199,7 +206,7 @@ where } None => { // This is an empty list, so `reserve` is the same as `with_capacity`. - *self = Self::with_capacity(num_elems); + *self = Self::with_capacity(new_len); return; } @@ -213,7 +220,7 @@ where *self = Self { elements: Some(new_elems), length: self.length, - capacity: num_elems, + capacity: new_len, }; } @@ -229,9 +236,7 @@ where return; } - let alignment = Self::alloc_alignment(); - let elements_offset = alignment as usize; - let new_size = elements_offset + mem::size_of::() * (self.len() + slice.len()); + let new_len = self.len() + slice.len(); let non_null_elements = if let Some((elements, storage)) = self.elements_and_storage() { // Decrement the list's refence count. let mut copy = storage.get(); @@ -239,18 +244,21 @@ where if is_unique { // If we have enough capacity, we can add to the existing elements in-place. - if self.capacity() >= new_size { + if self.capacity() >= slice.len() { elements } else { // There wasn't enough capacity, so we need a new allocation. // Since this is a unique RocList, we can use realloc here. - let old_size = elements_offset + (mem::size_of::() * self.len()); - let new_ptr = unsafe { - roc_realloc(self.ptr_to_allocation(), new_size, old_size, alignment).cast() + roc_realloc( + storage.as_ptr().cast(), + Self::alloc_bytes(new_len), + Self::alloc_bytes(self.capacity), + Self::alloc_alignment(), + ) }; - self.capacity = new_size; + self.capacity = new_len; Self::elems_from_allocation(NonNull::new(new_ptr).unwrap_or_else(|| { todo!("Reallocation failed"); @@ -263,7 +271,7 @@ where } // Allocate new memory. - let new_elements = Self::elems_with_capacity(new_size); + let new_elements = Self::elems_with_capacity(slice.len()); // Copy the old elements to the new allocation. unsafe { @@ -273,7 +281,7 @@ where new_elements } } else { - Self::elems_with_capacity(new_size) + Self::elems_with_capacity(slice.len()) }; self.elements = Some(non_null_elements); From fe5ed634dbb5647c2373ff224b108e398768b327 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 9 Jun 2022 22:17:09 -0400 Subject: [PATCH 52/65] Add some reserve() roc_std tests --- roc_std/tests/test_roc_std.rs | 37 +++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index 7f71cd454b..883802628e 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -59,9 +59,7 @@ pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut #[cfg(test)] mod test_roc_std { - use roc_std::RocDec; - use roc_std::RocResult; - use roc_std::RocStr; + use roc_std::{RocDec, RocList, RocResult, RocStr}; fn roc_str_byte_representation(string: &RocStr) -> [u8; RocStr::SIZE] { unsafe { core::mem::transmute_copy(string) } @@ -123,7 +121,7 @@ mod test_roc_std { } #[test] - fn reserve() { + fn reserve_small_str() { let mut roc_str = RocStr::empty(); roc_str.reserve(42); @@ -131,6 +129,33 @@ mod test_roc_std { assert_eq!(roc_str.capacity(), 42); } + #[test] + fn reserve_big_str() { + let mut roc_str = RocStr::empty(); + + roc_str.reserve(5000); + + assert_eq!(roc_str.capacity(), 5000); + } + + #[test] + fn reserve_small_list() { + let mut roc_list = RocList::::empty(); + + roc_list.reserve(42); + + assert_eq!(roc_list.capacity(), 42); + } + + #[test] + fn reserve_big_list() { + let mut roc_list = RocList::::empty(); + + roc_list.reserve(5000); + + assert_eq!(roc_list.capacity(), 5000); + } + #[test] fn roc_result_to_rust_result() { let greeting = "Hello, World!"; @@ -261,8 +286,8 @@ mod with_terminator { #[test] fn no_excess_capacity() { - // // This is small enough that it should be a stack allocation for UTF-8 - // verify_temp_c(&string_for_len(33), 0); + // This is small enough that it should be a stack allocation for UTF-8 + verify_temp_c(&string_for_len(33), 0); // This is big enough that it should be a heap allocation for UTF-8 and UTF-16 verify_temp_c(&string_for_len(65), 0); From 67bec4828e197c91b63a4bfa3d3560439045127e Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 9 Jun 2022 22:19:59 -0400 Subject: [PATCH 53/65] Use ManuallyDrop::drop --- roc_std/src/roc_list.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 1c9a15f020..f2314a66a9 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -430,7 +430,7 @@ impl Drop for RocList { for index in 0..self.len() { let elem_ptr = elements.as_ptr().add(index); - mem::drop::(ManuallyDrop::take(&mut *elem_ptr)); + ManuallyDrop::drop(&mut *elem_ptr); } // Release the memory. From a668cd67e0e909a4c5b46de7755bde2031a98d0b Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 9 Jun 2022 22:25:06 -0400 Subject: [PATCH 54/65] Add a RocBox test and pub use it --- roc_std/src/lib.rs | 1 + roc_std/tests/test_roc_std.rs | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/roc_std/src/lib.rs b/roc_std/src/lib.rs index 76b534604c..18a35720db 100644 --- a/roc_std/src/lib.rs +++ b/roc_std/src/lib.rs @@ -16,6 +16,7 @@ mod roc_list; mod roc_str; mod storage; +pub use roc_box::RocBox; pub use roc_list::RocList; pub use roc_str::{InteriorNulError, RocStr}; pub use storage::Storage; diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index 883802628e..4b66f3c012 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -59,7 +59,7 @@ pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut #[cfg(test)] mod test_roc_std { - use roc_std::{RocDec, RocList, RocResult, RocStr}; + use roc_std::{RocBox, RocDec, RocList, RocResult, RocStr}; fn roc_str_byte_representation(string: &RocStr) -> [u8; RocStr::SIZE] { unsafe { core::mem::transmute_copy(string) } @@ -189,6 +189,14 @@ mod test_roc_std { assert!(roc_result.is_err()); } + #[test] + fn create_roc_box() { + let contents = 42; + let roc_box = RocBox::new(contents); + + assert_eq!(roc_box.into_inner(), contents) + } + #[test] fn roc_dec_fmt() { assert_eq!( From 4eb7e0931cecd866b57eaf681668a98d58a1363d Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 9 Jun 2022 22:41:59 -0400 Subject: [PATCH 55/65] Improve some documentation --- roc_std/src/roc_str.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index e6fdc5c923..bb42ebd566 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -371,10 +371,17 @@ impl RocStr { } /// Generic version of temp_c_utf8 and temp_c_utf16. The given function will be - /// passed a pointer to elements of type E. The number of elements will be equal to - /// the length of the `&str` given as the other parameter. There will be a `0` element - /// at the end of those elements, even if there are no elements (in which case the - /// `&str` argument will be empty and the `*mut E` will point to a `0`). + /// passed a pointer to elements of type E. The pointer will have enough room to hold + /// one element for each byte of the given `&str`'s length, plus the terminator element. + /// + /// The terminator will be written right after the end of the space for the other elements, + /// but all the memory in that space before the terminator will be uninitialized. This means + /// if you want to do something like copy the contents of the `&str` into there, that will + /// need to be done explicitly. + /// + /// The terminator is always written - even if there are no other elements present before it. + /// (In such a case, the `&str` argument will be empty and the `*mut E` will point directly + /// to the terminator). /// /// One use for this is to convert slashes to backslashes in Windows paths; /// this function provides the most efficient way to do that, because no extra From 8f49c866981b9b39be2e315ba24dc920baefd6d2 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Thu, 9 Jun 2022 22:55:41 -0400 Subject: [PATCH 56/65] Add with_excess_capacity test --- roc_std/tests/test_roc_std.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index 4b66f3c012..044c2f2dac 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -300,4 +300,13 @@ mod with_terminator { // This is big enough that it should be a heap allocation for UTF-8 and UTF-16 verify_temp_c(&string_for_len(65), 0); } + + #[test] + fn with_excess_capacity() { + // We should be able to use the excess capacity for all of these. + verify_temp_c(&string_for_len(33), 1); + verify_temp_c(&string_for_len(33), 33); + verify_temp_c(&string_for_len(65), 1); + verify_temp_c(&string_for_len(65), 64); + } } From 6bf74629e2cc18ccde761d3fbcb5539042730ba2 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Fri, 10 Jun 2022 19:59:27 -0400 Subject: [PATCH 57/65] cargo fmt --- compiler/build/src/link.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler/build/src/link.rs b/compiler/build/src/link.rs index f9aff2e0fb..ad66b5f5ba 100644 --- a/compiler/build/src/link.rs +++ b/compiler/build/src/link.rs @@ -421,7 +421,6 @@ pub fn rebuild_host( let env_home = env::var("HOME").unwrap_or_else(|_| "".to_string()); let env_cpath = env::var("CPATH").unwrap_or_else(|_| "".to_string()); - if zig_host_src.exists() { // Compile host.zig From dcdffcc48971a6c41702a962e9dec18564f9ee6e Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 11 Jun 2022 20:08:06 +0200 Subject: [PATCH 58/65] remove undesired drop --- roc_std/src/roc_list.rs | 19 ++++++++++++------- roc_std/src/roc_str.rs | 5 ++++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index f2314a66a9..e36702f5a5 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -154,7 +154,6 @@ where if storage.get().is_unique() { unsafe { let old_alloc = self.ptr_to_allocation(); - old_elements_ptr = elements.as_ptr(); // Try to reallocate in-place. let new_alloc = roc_realloc( @@ -187,6 +186,11 @@ where new_elems = Self::elems_with_capacity(new_len); old_elements_ptr = elements.as_ptr(); + unsafe { + // Copy the old elements to the new allocation. + copy_nonoverlapping(old_elements_ptr, new_elems.as_ptr(), self.length); + } + // Decrease the current allocation's reference count. let mut new_storage = storage.get(); let needs_dealloc = new_storage.decrease(); @@ -212,16 +216,17 @@ where } } - unsafe { - // Copy the old elements to the new allocation. - copy_nonoverlapping(old_elements_ptr, new_elems.as_ptr(), self.length); - } + let length = self.length; - *self = Self { + let mut updated = Self { elements: Some(new_elems), - length: self.length, + length, capacity: new_len, }; + + std::mem::swap(self, &mut updated); + + std::mem::forget(updated); } pub fn from_slice(slice: &[T]) -> Self { diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index bb42ebd566..94e6af9c61 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -152,9 +152,12 @@ impl RocStr { roc_list.reserve(bytes); - *self = RocStr(RocStrInner { + let mut updated = RocStr(RocStrInner { heap_allocated: ManuallyDrop::new(roc_list), }); + + std::mem::swap(self, &mut updated); + std::mem::forget(updated); } } From de24a1ca748d438c0235424d69c78bcec4704841 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 11 Jun 2022 21:29:10 +0200 Subject: [PATCH 59/65] fix incorrect offset used to look up refcount --- roc_std/src/roc_box.rs | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/roc_std/src/roc_box.rs b/roc_std/src/roc_box.rs index 63da66a1b8..5eea65fb5e 100644 --- a/roc_std/src/roc_box.rs +++ b/roc_std/src/roc_box.rs @@ -18,7 +18,7 @@ pub struct RocBox { impl RocBox { pub fn new(contents: T) -> Self { let alignment = Self::alloc_alignment(); - let bytes = mem::size_of_val(&contents) + alignment; + let bytes = mem::size_of::() + alignment; let ptr = unsafe { roc_alloc(bytes, alignment as u32) }; @@ -27,11 +27,8 @@ impl RocBox { } // Initialize the reference count. - unsafe { - let storage_ptr = ptr.cast::(); - - storage_ptr.write(Storage::new_reference_counted()); - } + let refcount_one = Storage::new_reference_counted(); + unsafe { ptr.cast::().write(refcount_one) }; let contents = unsafe { let contents_ptr = ptr.cast::().add(alignment).cast::(); @@ -57,12 +54,14 @@ impl RocBox { } fn storage(&self) -> &Cell { + let alignment = Self::alloc_alignment(); + unsafe { &*self .contents .as_ptr() .cast::() - .sub(mem::size_of::()) + .sub(alignment) .cast::>() } } From 065c637de4405329154d5fbbd1ab24eaad7fd4fa Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 11 Jun 2022 21:32:32 +0200 Subject: [PATCH 60/65] free memory if realloc fails --- roc_std/src/roc_list.rs | 3 +++ roc_std/src/storage.rs | 3 +++ roc_std/tests/test_roc_std.rs | 2 +- 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index e36702f5a5..9cfa1cc2be 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -166,6 +166,9 @@ where if new_alloc == old_alloc { // We successfully reallocated in-place; we're done! return; + } else if new_alloc.is_null() { + roc_dealloc(old_alloc, Self::alloc_alignment()); + panic!("realloc failed"); } else { // We got back a different allocation; copy the existing elements // into it. We don't need to increment their refcounts because diff --git a/roc_std/src/storage.rs b/roc_std/src/storage.rs index 4332f8581c..5ec9882a97 100644 --- a/roc_std/src/storage.rs +++ b/roc_std/src/storage.rs @@ -7,6 +7,9 @@ use core::num::NonZeroIsize; /// once it has been stabilized. const REFCOUNT_1: NonZeroIsize = unsafe { NonZeroIsize::new_unchecked(isize::MIN) }; +const _ASSERT_STORAGE_SIZE: () = + assert!(std::mem::size_of::() == std::mem::size_of::()); + #[derive(Clone, Copy, Debug)] pub enum Storage { Readonly, diff --git a/roc_std/tests/test_roc_std.rs b/roc_std/tests/test_roc_std.rs index 044c2f2dac..0c510f0d09 100644 --- a/roc_std/tests/test_roc_std.rs +++ b/roc_std/tests/test_roc_std.rs @@ -191,7 +191,7 @@ mod test_roc_std { #[test] fn create_roc_box() { - let contents = 42; + let contents = 42i32; let roc_box = RocBox::new(contents); assert_eq!(roc_box.into_inner(), contents) From ecebe3b7d30e41529578cdc719610b98f7e66960 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 12 Jun 2022 14:25:49 +0200 Subject: [PATCH 61/65] manually free memory when the refcount has been erased --- roc_std/src/roc_list.rs | 3 --- roc_std/src/roc_str.rs | 9 ++++++++- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index 9cfa1cc2be..e36702f5a5 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -166,9 +166,6 @@ where if new_alloc == old_alloc { // We successfully reallocated in-place; we're done! return; - } else if new_alloc.is_null() { - roc_dealloc(old_alloc, Self::alloc_alignment()); - panic!("realloc failed"); } else { // We got back a different allocation; copy the existing elements // into it. We don't need to increment their refcounts because diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 94e6af9c61..c99737a2da 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -466,7 +466,14 @@ impl RocStr { // the terminator into the existing allocation. let ptr = roc_list.ptr_to_allocation() as *mut E; - terminate(ptr, self.as_str()) + let result = terminate(ptr, self.as_str()); + + // we cannot rely on the RocStr::drop implementation, because the + // refcount was overwritten with string bytes + std::mem::forget(self); + crate::roc_dealloc(ptr.cast(), std::mem::align_of::() as u32); + + result } else { // We didn't have sufficient excess capacity already, // so we need to do either a new stack allocation or a new From af69c52846c52277006696aacebe3d5a324c2230 Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Sun, 12 Jun 2022 09:04:38 -0400 Subject: [PATCH 62/65] Fix how RocList updates in-place in another case --- roc_std/src/roc_list.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/roc_std/src/roc_list.rs b/roc_std/src/roc_list.rs index e36702f5a5..9be5b1fe9d 100644 --- a/roc_std/src/roc_list.rs +++ b/roc_std/src/roc_list.rs @@ -210,23 +210,17 @@ where } None => { // This is an empty list, so `reserve` is the same as `with_capacity`. - *self = Self::with_capacity(new_len); + self.update_to(Self::with_capacity(new_len)); return; } } - let length = self.length; - - let mut updated = Self { + self.update_to(Self { elements: Some(new_elems), - length, + length: self.length, capacity: new_len, - }; - - std::mem::swap(self, &mut updated); - - std::mem::forget(updated); + }); } pub fn from_slice(slice: &[T]) -> Self { @@ -312,6 +306,16 @@ where self.capacity = self.length } + + /// Replace self with a new version, without letting `drop` run in between. + fn update_to(&mut self, mut updated: Self) { + // We want to replace `self` with `updated` in a way that makes sure + // `self`'s `drop` never runs. This is the proper way to do that: + // swap them, and then forget the "updated" one (which is now pointing + // to the original allocation). + mem::swap(self, &mut updated); + mem::forget(updated); + } } impl Deref for RocList { From 018ddcdf3acaac109813ba56470fa93e5b4ada8f Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Sun, 12 Jun 2022 09:11:10 -0400 Subject: [PATCH 63/65] Update some comments --- roc_std/src/roc_str.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index c99737a2da..4447d729e7 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -462,18 +462,19 @@ impl RocStr { debug_assert!(align_of::() >= align_of::()); // We happen to have sufficient excess capacity already, - // so we will be able to write the UTF-16 chars as well as + // so we will be able to write the new elements as well as // the terminator into the existing allocation. let ptr = roc_list.ptr_to_allocation() as *mut E; + let answer = terminate(ptr, self.as_str()); - let result = terminate(ptr, self.as_str()); + // We cannot rely on the RocStr::drop implementation, because + // it tries to use the refcount - which we just overwrote + // with string bytes. - // we cannot rely on the RocStr::drop implementation, because the - // refcount was overwritten with string bytes std::mem::forget(self); crate::roc_dealloc(ptr.cast(), std::mem::align_of::() as u32); - result + answer } else { // We didn't have sufficient excess capacity already, // so we need to do either a new stack allocation or a new From b08e704614a06451327fc418f1292d27fbc4aa2a Mon Sep 17 00:00:00 2001 From: Richard Feldman Date: Sun, 12 Jun 2022 09:11:21 -0400 Subject: [PATCH 64/65] Use core:: over std:: in some roc_std places --- roc_std/src/roc_str.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/roc_std/src/roc_str.rs b/roc_std/src/roc_str.rs index 4447d729e7..863d33f5b0 100644 --- a/roc_std/src/roc_str.rs +++ b/roc_std/src/roc_str.rs @@ -1,10 +1,11 @@ #![deny(unsafe_op_in_unsafe_fn)] use core::{ + cmp, convert::TryFrom, fmt, - hash::Hash, - mem::{size_of, ManuallyDrop}, + hash::{self, Hash}, + mem::{self, size_of, ManuallyDrop}, ops::{Deref, DerefMut}, ptr, }; @@ -156,8 +157,8 @@ impl RocStr { heap_allocated: ManuallyDrop::new(roc_list), }); - std::mem::swap(self, &mut updated); - std::mem::forget(updated); + mem::swap(self, &mut updated); + mem::forget(updated); } } @@ -470,9 +471,8 @@ impl RocStr { // We cannot rely on the RocStr::drop implementation, because // it tries to use the refcount - which we just overwrote // with string bytes. - - std::mem::forget(self); - crate::roc_dealloc(ptr.cast(), std::mem::align_of::() as u32); + mem::forget(self); + crate::roc_dealloc(ptr.cast(), mem::align_of::() as u32); answer } else { @@ -576,13 +576,13 @@ impl PartialEq for RocStr { impl Eq for RocStr {} impl PartialOrd for RocStr { - fn partial_cmp(&self, other: &Self) -> Option { + fn partial_cmp(&self, other: &Self) -> Option { self.as_str().partial_cmp(other.as_str()) } } impl Ord for RocStr { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { + fn cmp(&self, other: &Self) -> cmp::Ordering { self.as_str().cmp(other.as_str()) } } @@ -704,7 +704,7 @@ impl DerefMut for SmallString { } impl Hash for RocStr { - fn hash(&self, state: &mut H) { + fn hash(&self, state: &mut H) { self.as_str().hash(state) } } From 8d85d873261bb929f3a93d4370ce41a480c216d3 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sun, 12 Jun 2022 16:09:44 +0200 Subject: [PATCH 65/65] extend lifetime of string till end of block --- examples/hello-world/rust-platform/src/lib.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/hello-world/rust-platform/src/lib.rs b/examples/hello-world/rust-platform/src/lib.rs index 7df8fe563c..e657725529 100644 --- a/examples/hello-world/rust-platform/src/lib.rs +++ b/examples/hello-world/rust-platform/src/lib.rs @@ -3,6 +3,7 @@ use core::ffi::c_void; use roc_std::RocStr; use std::ffi::CStr; +use std::mem::ManuallyDrop; use std::os::raw::c_char; extern "C" { @@ -56,7 +57,7 @@ pub unsafe extern "C" fn roc_memset(dst: *mut c_void, c: i32, n: usize) -> *mut #[no_mangle] pub extern "C" fn rust_main() -> i32 { unsafe { - let mut roc_str = RocStr::default(); + let mut roc_str = ManuallyDrop::new(RocStr::default()); roc_main(&mut roc_str); let len = roc_str.len(); @@ -65,6 +66,8 @@ pub extern "C" fn rust_main() -> i32 { if libc::write(1, str_bytes, len) < 0 { panic!("Writing to stdout failed!"); } + + ManuallyDrop::drop(&mut roc_str) } // Exit code