Add RocStr.capacity and isNullTerminated

This commit is contained in:
Richard Feldman 2021-06-21 19:05:03 -04:00
parent d11f42743c
commit ebf7e6e3b9

View File

@ -217,6 +217,82 @@ pub const RocStr = extern struct {
return self.len() == 0;
}
// If a string happens to be null-terminated already, then we can pass its
// bytes directly to functions (e.g. for opening files) that require
// null-terminated strings. Otherwise, we need to allocate and copy a new
// null-terminated string, which has a much higher performance cost!
fn isNullTerminated(self: RocStr) bool {
const len = self.len();
const longest_small_str = @sizeOf(RocStr) - 1;
// NOTE: We want to compare length here, *NOT* check for is_small_str!
// This is because we explicitly want the empty string to be handled in
// this branch, even though the empty string is not a small string.
//
// (The other branch dereferences the bytes pointer, which is not safe
// to do for the empty string.)
if (len <= longest_small_str) {
// If we're a small string, then usually the next byte after the
// end of the string will be zero. (Small strings set all their
// unused bytes to 0, so that comparison for equality can be fast.)
//
// However, empty strings are *not* null terminated, so if this is
// empty, it should return false.
//
// Also, if we are exactly a maximum-length small string,
// then the next byte is off the end of the struct;
// in that case, we are also not null-terminated!
return len != 0 and len != longest_small_str;
} else {
// This is a big string, and it's not empty, so we can safely
// dereference the pointer.
const ptr: [*]usize = @ptrCast([*]usize, @alignCast(8, self.str_bytes));
const capacity_or_refcount: isize = (ptr - 1)[0];
// If capacity_or_refcount is positive, then it's a capacity value.
//
// If we have excess capacity, then we can safely read the next
// byte after the end of the string. Maybe it happens to be zero!
if (capacity_or_refcount > @intCast(isize, len)) {
return self.str_bytes[len] == 0;
} else {
// This string was refcounted or immortal; we can't safely read
// the next byte, so assume the string is not null-terminated.
return false;
}
}
}
// Returns (@sizeOf(RocStr) - 1) for small strings and the empty string.
// Returns 0 for refcounted stirngs and immortal strings.
// Returns the stored capacity value for all other strings.
pub fn capacity(self: RocStr) usize {
const len = self.len();
const longest_small_str = @sizeOf(RocStr) - 1;
if (len <= longest_small_str) {
// Note that although empty strings technically have the full
// capacity of a small string available, they aren't marked as small
// strings, so if you want to make use of that capacity, you need
// to first change its flag to mark it as a small string!
return longest_small_str;
} else {
const ptr: [*]usize = @ptrCast([*]usize, @alignCast(8, self.str_bytes));
const capacity_or_refcount: isize = (ptr - 1)[0];
if (capacity_or_refcount > 0) {
// If capacity_or_refcount is positive, that means it's a
// capacity value.
return capacity_or_refcount;
} else {
// This is either a refcount or else this big string is stored
// in a readonly section; either way, it has no capacity,
// because we cannot mutate it in-place!
return 0;
}
}
}
pub fn isUnique(self: RocStr) bool {
// the empty list is unique (in the sense that copying it will not leak memory)
if (self.isEmpty()) {