Merge pull request #2751 from rtfeldman/remove-identstr-allocation

remove allocation in Symbol creation
This commit is contained in:
Richard Feldman 2022-03-19 21:19:21 -04:00 committed by GitHub
commit 32c1b5f0bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 43 additions and 18 deletions

4
Cargo.lock generated
View File

@ -3647,6 +3647,9 @@ dependencies = [
[[package]]
name = "roc_ident"
version = "0.1.0"
dependencies = [
"arrayvec 0.7.2",
]
[[package]]
name = "roc_linker"
@ -3702,6 +3705,7 @@ dependencies = [
name = "roc_module"
version = "0.1.0"
dependencies = [
"arrayvec 0.7.2",
"bumpalo",
"lazy_static",
"roc_collections",

View File

@ -4,3 +4,6 @@ version = "0.1.0"
authors = ["The Roc Contributors"]
license = "UPL-1.0"
edition = "2018"
[dependencies]
arrayvec = "0.7.2"

View File

@ -20,9 +20,6 @@ use std::os::raw::c_char;
/// a UTF-8 string). This design works on little-endian targets, but a different
/// design for storing length might be necessary on big-endian targets.
// For big-endian, field order must be swapped!
// Otherwise, the discriminant byte will be in the wrong place.
#[cfg(target_endian = "little")]
#[repr(C)]
pub struct IdentStr {
elements: *const u8,
@ -30,6 +27,9 @@ pub struct IdentStr {
}
impl IdentStr {
// Reserve 1 byte for the discriminant
const SMALL_STR_BYTES: usize = std::mem::size_of::<Self>() - 1;
pub fn len(&self) -> usize {
let bytes = self.length.to_ne_bytes();
let last_byte = bytes[mem::size_of::<usize>() - 1];
@ -82,22 +82,34 @@ impl IdentStr {
(self as *const IdentStr).cast()
}
fn from_str(str: &str) -> Self {
#[inline(always)]
const fn small_str_from_bytes(slice: &[u8]) -> Self {
assert!(slice.len() <= Self::SMALL_STR_BYTES);
let len = slice.len();
let mut bytes = [0; mem::size_of::<Self>()];
// Copy the bytes from the slice into bytes.
// while because for/Iterator does not work in const context
let mut i = 0;
while i < len {
bytes[i] = slice[i];
i += 1;
}
// Write length and small string bit to last byte of length.
bytes[Self::SMALL_STR_BYTES] = u8::MAX - len as u8;
unsafe { mem::transmute::<[u8; mem::size_of::<Self>()], Self>(bytes) }
}
#[allow(clippy::should_implement_trait)]
pub fn from_str(str: &str) -> Self {
let slice = str.as_bytes();
let len = slice.len();
match len.cmp(&mem::size_of::<Self>()) {
Ordering::Less => {
let mut bytes = [0; mem::size_of::<Self>()];
// Copy the bytes from the slice into bytes.
bytes[..len].copy_from_slice(slice);
// Write length and small string bit to last byte of length.
bytes[mem::size_of::<usize>() * 2 - 1] = u8::MAX - len as u8;
unsafe { mem::transmute::<[u8; mem::size_of::<Self>()], Self>(bytes) }
}
Ordering::Less => Self::small_str_from_bytes(slice),
Ordering::Equal => {
// This fits in a small string, and is exactly long enough to
// take up the entire available struct

View File

@ -14,3 +14,4 @@ bumpalo = { version = "3.8.0", features = ["collections"] }
lazy_static = "1.4.0"
static_assertions = "1.1.0"
snafu = { version = "0.6.10", features = ["backtraces"] }
arrayvec = "0.7.2"

View File

@ -634,12 +634,17 @@ impl IdentIds {
/// This is used, for example, during canonicalization of an Expr::Closure
/// to generate a unique symbol to refer to that closure.
pub fn gen_unique(&mut self) -> IdentId {
// TODO convert this directly from u32 into IdentStr,
// without allocating an extra string along the way like this.
let ident = self.next_generated_name.to_string().into();
use std::fmt::Write;
let index: u32 = self.next_generated_name;
self.next_generated_name += 1;
// "4294967296" is 10 characters
let mut buffer: arrayvec::ArrayString<10> = arrayvec::ArrayString::new();
write!(buffer, "{}", index).unwrap();
let ident = Ident(IdentStr::from_str(buffer.as_str()));
self.add(ident)
}