Fix countGrapheme wiring; Reorganize zig builtins; Streamline how we export zig function

This commit is contained in:
Jared Ramirez 2020-11-06 16:43:32 -06:00
parent f34235e050
commit e112a406a2
11 changed files with 519 additions and 512 deletions

View File

@ -110,6 +110,14 @@ mod repl_eval {
);
}
#[test]
fn str_count_graphemes() {
expect_success(
"Str.concat \"å🤔\"",
"2 : Int",
);
}
#[test]
fn literal_empty_list() {
expect_success("[]", "[] : List *");

View File

@ -1,476 +1,33 @@
const builtin = @import("builtin");
const std = @import("std");
const math = std.math;
const unicode = std.unicode;
const testing = std.testing;
const expectEqual = testing.expectEqual;
const expect = testing.expect;
const roc_builtins_namespace = "roc_builtins";
// Num Module
const num = @import("num.zig");
comptime { exportNumFn(num.atan, "atan"); }
comptime { exportNumFn(num.isFinite, "is_finite"); }
comptime { exportNumFn(num.powInt, "pow_int"); }
comptime { exportNumFn(num.acos, "acos"); }
comptime { exportNumFn(num.asin, "asin"); }
// MATH
const math_namespace = roc_builtins_namespace ++ ".math";
// Str Module
const str = @import("str.zig");
comptime { exportStrFn(str.strSplitInPlace, "str_split_in_place"); }
comptime { exportStrFn(str.countSegments, "count_segements"); }
comptime { exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters"); }
comptime { @export(atan, .{ .name = math_namespace ++ ".atan", .linkage = .Strong }); }
fn atan(num: f64) callconv(.C) f64 {
return math.atan(num);
}
comptime { @export(isFinite, .{ .name = math_namespace ++ ".is_finite", .linkage = .Strong }); }
fn isFinite(num: f64) callconv(.C) bool {
return math.isFinite(num);
}
comptime { @export(powInt, .{ .name = math_namespace ++ ".pow_int", .linkage = .Strong }); }
fn powInt(base: i64, exp: i64) callconv(.C) i64 {
return math.pow(i64, base, exp);
}
comptime { @export(acos, .{ .name = math_namespace ++ ".acos", .linkage = .Strong }); }
fn acos(num: f64) callconv(.C) f64 {
return math.acos(num);
}
comptime { @export(asin, .{ .name = math_namespace ++ ".asin", .linkage = .Strong }); }
fn asin(num: f64) callconv(.C) f64 {
return math.asin(num);
}
// STR
const str_namespace = roc_builtins_namespace ++ ".str";
// Str.split
const RocStr = struct {
str_bytes_ptrs: [*]u8,
str_len: usize,
pub fn init(bytes: [*]u8, len: usize) RocStr {
return RocStr {
.str_bytes_ptrs = bytes,
.str_len = len
};
}
pub fn eq(self: *RocStr, other: RocStr) bool {
if (self.str_len != other.str_len) {
return false;
}
var areEq: bool = true;
var index: usize = 0;
while (index < self.str_len and areEq) {
areEq = areEq and self.str_bytes_ptrs[index] == other.str_bytes_ptrs[index];
index = index + 1;
}
return areEq;
}
test "RocStr.eq: equal" {
const str1_len = 3;
var str1: [str1_len]u8 = "abc".*;
const str1_ptr: [*]u8 = &str1;
var roc_str1 = RocStr.init(str1_ptr, str1_len);
const str2_len = 3;
var str2: [str2_len]u8 = "abc".*;
const str2_ptr: [*]u8 = &str2;
var roc_str2 = RocStr.init(str2_ptr, str2_len);
expect(roc_str1.eq(roc_str2));
}
test "RocStr.eq: not equal different length" {
const str1_len = 4;
var str1: [str1_len]u8 = "abcd".*;
const str1_ptr: [*]u8 = &str1;
var roc_str1 = RocStr.init(str1_ptr, str1_len);
const str2_len = 3;
var str2: [str2_len]u8 = "abc".*;
const str2_ptr: [*]u8 = &str2;
var roc_str2 = RocStr.init(str2_ptr, str2_len);
expect(!roc_str1.eq(roc_str2));
}
test "RocStr.eq: not equal same length" {
const str1_len = 3;
var str1: [str1_len]u8 = "acb".*;
const str1_ptr: [*]u8 = &str1;
var roc_str1 = RocStr.init(str1_ptr, str1_len);
const str2_len = 3;
var str2: [str2_len]u8 = "abc".*;
const str2_ptr: [*]u8 = &str2;
var roc_str2 = RocStr.init(str2_ptr, str2_len);
expect(!roc_str1.eq(roc_str2));
}
};
comptime { @export(strSplitInPlace, .{ .name = str_namespace ++ ".str_split_in_place", .linkage = .Strong }); }
fn strSplitInPlace(
array: [*]RocStr,
array_len: usize,
str_bytes_ptrs: [*]u8,
str_len: usize,
delimiter_bytes: [*]u8,
delimiter_len: usize
) callconv(.C) void {
var ret_array_index : usize = 0;
var sliceStart_index : usize = 0;
var str_index : usize = 0;
if (str_len > delimiter_len) {
const end_index : usize = str_len - delimiter_len;
while (str_index <= end_index) {
var delimiter_index : usize = 0;
var matches_delimiter = true;
while (delimiter_index < delimiter_len) {
var delimiterChar = delimiter_bytes[delimiter_index];
var strChar = str_bytes_ptrs[str_index + delimiter_index];
if (delimiterChar != strChar) {
matches_delimiter = false;
break;
}
delimiter_index += 1;
}
if (matches_delimiter) {
array[ret_array_index] = RocStr.init(str_bytes_ptrs + sliceStart_index, str_index - sliceStart_index);
sliceStart_index = str_index + delimiter_len;
ret_array_index += 1;
str_index += delimiter_len;
} else {
str_index += 1;
}
}
}
array[ret_array_index] = RocStr.init(str_bytes_ptrs + sliceStart_index, str_len - sliceStart_index);
}
test "strSplitInPlace: no delimiter" {
// Str.split "abc" "!" == [ "abc" ]
var str: [3]u8 = "abc".*;
const str_ptr: [*]u8 = &str;
var delimiter: [1]u8 = "!".*;
const delimiter_ptr: [*]u8 = &delimiter;
var array: [1]RocStr = undefined;
const array_ptr: [*]RocStr = &array;
strSplitInPlace(
array_ptr,
1,
str_ptr,
3,
delimiter_ptr,
1
);
var expected = [1]RocStr{
RocStr.init(str_ptr, 3),
};
expectEqual(array.len, expected.len);
expect(array[0].eq(expected[0]));
}
test "strSplitInPlace: delimiter on sides" {
// Str.split "tttghittt" "ttt" == [ "", "ghi", "" ]
const str_len: usize = 9;
var str: [str_len]u8 = "tttghittt".*;
const str_ptr: [*]u8 = &str;
const delimiter_len = 3;
var delimiter: [delimiter_len]u8 = "ttt".*;
const delimiter_ptr: [*]u8 = &delimiter;
const array_len : usize = 3;
var array: [array_len]RocStr = [_]RocStr{
undefined ,
undefined,
undefined,
};
const array_ptr: [*]RocStr = &array;
strSplitInPlace(
array_ptr,
array_len,
str_ptr,
str_len,
delimiter_ptr,
delimiter_len
);
const expected_str_len: usize = 3;
var expected_str: [expected_str_len]u8 = "ghi".*;
const expected_str_ptr: [*]u8 = &expected_str;
var expectedRocStr = RocStr.init(expected_str_ptr, expected_str_len);
expectEqual(array.len, 3);
expectEqual(array[0].str_len, 0);
expect(array[1].eq(expectedRocStr));
expectEqual(array[2].str_len, 0);
}
test "strSplitInPlace: three pieces" {
// Str.split "a!b!c" "!" == [ "a", "b", "c" ]
const str_len: usize = 5;
var str: [str_len]u8 = "a!b!c".*;
const str_ptr: [*]u8 = &str;
const delimiter_len = 1;
var delimiter: [delimiter_len]u8 = "!".*;
const delimiter_ptr: [*]u8 = &delimiter;
const array_len : usize = 3;
var array: [array_len]RocStr = undefined;
const array_ptr: [*]RocStr = &array;
strSplitInPlace(
array_ptr,
array_len,
str_ptr,
str_len,
delimiter_ptr,
delimiter_len
);
var a: [1]u8 = "a".*;
const a_ptr: [*]u8 = &a;
var b: [1]u8 = "b".*;
const b_ptr: [*]u8 = &b;
var c: [1]u8 = "c".*;
const c_ptr: [*]u8 = &c;
var expected_array = [array_len]RocStr{
RocStr{
.str_bytes_ptrs = a_ptr,
.str_len = 1,
},
RocStr{
.str_bytes_ptrs = b_ptr,
.str_len = 1,
},
RocStr{
.str_bytes_ptrs = c_ptr,
.str_len = 1,
}
};
expectEqual(expected_array.len, array.len);
expect(array[0].eq(expected_array[0]));
expect(array[1].eq(expected_array[1]));
expect(array[2].eq(expected_array[2]));
}
// This is used for `Str.split : Str, Str -> Array Str
// It is used to count how many segments the input `_str`
// needs to be broken into, so that we can allocate a array
// of that size. It always returns at least 1.
comptime { @export(countSegments, .{ .name = str_namespace ++ ".count_segements", .linkage = .Strong }); }
fn countSegments(
str_bytes_ptrs: [*]u8,
str_len: usize,
delimiter_bytes: [*]u8,
delimiter_len: usize
) callconv(.C) i64 {
var count: i64 = 1;
if (str_len > delimiter_len) {
var str_index: usize = 0;
const end_cond: usize = str_len - delimiter_len;
while (str_index < end_cond) {
var delimiter_index: usize = 0;
var matches_delimiter = true;
while (delimiter_index < delimiter_len) {
const delimiterChar = delimiter_bytes[delimiter_index];
const strChar = str_bytes_ptrs[str_index + delimiter_index];
if (delimiterChar != strChar) {
matches_delimiter = false;
break;
}
delimiter_index += 1;
}
if (matches_delimiter) {
count += 1;
}
str_index += 1;
}
}
return count;
}
test "countSegments: long delimiter" {
// Str.split "str" "delimiter" == [ "str" ]
// 1 segment
const str_len: usize = 3;
var str: [str_len]u8 = "str".*;
const str_ptr: [*]u8 = &str;
const delimiter_len = 9;
var delimiter: [delimiter_len]u8 = "delimiter".*;
const delimiter_ptr: [*]u8 = &delimiter;
const segments_count = countSegments(
str_ptr,
str_len,
delimiter_ptr,
delimiter_len
);
expectEqual(segments_count, 1);
}
test "countSegments: delimiter at start" {
// Str.split "hello there" "hello" == [ "", " there" ]
// 2 segments
const str_len: usize = 11;
var str: [str_len]u8 = "hello there".*;
const str_ptr: [*]u8 = &str;
const delimiter_len = 5;
var delimiter: [delimiter_len]u8 = "hello".*;
const delimiter_ptr: [*]u8 = &delimiter;
const segments_count = countSegments(
str_ptr,
str_len,
delimiter_ptr,
delimiter_len
);
expectEqual(segments_count, 2);
}
test "countSegments: delimiter interspered" {
// Str.split "a!b!c" "!" == [ "a", "b", "c" ]
// 3 segments
const str_len: usize = 5;
var str: [str_len]u8 = "a!b!c".*;
const str_ptr: [*]u8 = &str;
const delimiter_len = 1;
var delimiter: [delimiter_len]u8 = "!".*;
const delimiter_ptr: [*]u8 = &delimiter;
const segments_count = countSegments(
str_ptr,
str_len,
delimiter_ptr,
delimiter_len
);
expectEqual(segments_count, 3);
}
// Str.countGraphemeClusters
const grapheme = @import("grapheme.zig");
comptime { @export(countGraphemeClusters, .{ .name = str_namespace ++ ".count_grapheme_clusters", .linkage = .Strong }); }
fn countGraphemeClusters(bytes_ptr: [*]u8, bytes_len: usize) callconv(.C) usize {
var bytes = bytes_ptr[0..bytes_len];
var iter = (unicode.Utf8View.init(bytes) catch unreachable).iterator();
var count: usize = 0;
var grapheme_break_state: ?grapheme.BoundClass = null;
var grapheme_break_state_ptr = &grapheme_break_state;
var opt_last_codepoint: ?u21 = null;
while (iter.nextCodepoint()) |cur_codepoint| {
if (opt_last_codepoint) |last_codepoint| {
var did_break = grapheme.isGraphemeBreak(
last_codepoint,
cur_codepoint,
grapheme_break_state_ptr
);
if (did_break) {
count += 1;
grapheme_break_state = null;
}
}
opt_last_codepoint = cur_codepoint;
}
if (bytes_len != 0) {
count += 1;
}
return count;
}
test "countGraphemeClusters: empty string" {
var bytes_arr = "".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 0);
}
test "countGraphemeClusters: ascii characters" {
var bytes_arr = "abcd".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 4);
}
test "countGraphemeClusters: utf8 characters" {
var bytes_arr = "ãxā".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 3);
}
test "countGraphemeClusters: emojis" {
var bytes_arr = "🤔🤔🤔".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 3);
}
test "countGraphemeClusters: emojis and ut8 characters" {
var bytes_arr = "🤔å🤔¥🤔ç".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 6);
}
test "countGraphemeClusters: emojis, ut8, and ascii characters" {
var bytes_arr = "6🤔å🤔e¥🤔çpp".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 10);
// Export helpers - Must be run inside a comptime
fn exportBuiltinFn(comptime fn_target: anytype, comptime fn_name: []const u8) void {
@export(fn_target, .{ .name = "roc_builtins." ++ fn_name, .linkage = .Strong });
}
fn exportNumFn(comptime fn_target: anytype, comptime fn_name: []const u8) void {
exportBuiltinFn(fn_target, "num." ++ fn_name);
}
fn exportStrFn(comptime fn_target: anytype, comptime fn_name: []const u8) void {
exportBuiltinFn(fn_target, "str." ++ fn_name);
}
// Run all tests in imported modules
// https://github.com/ziglang/zig/blob/master/lib/std/std.zig#L94
test "" {
testing.refAllDecls(@This());

View File

@ -0,0 +1,22 @@
const std = @import("std");
const math = std.math;
pub fn atan(num: f64) callconv(.C) f64 {
return math.atan(num);
}
pub fn isFinite(num: f64) callconv(.C) bool {
return math.isFinite(num);
}
pub fn powInt(base: i64, exp: i64) callconv(.C) i64 {
return math.pow(i64, base, exp);
}
pub fn acos(num: f64) callconv(.C) f64 {
return math.acos(num);
}
pub fn asin(num: f64) callconv(.C) f64 {
return math.asin(num);
}

View File

@ -0,0 +1,437 @@
const std = @import("std");
const unicode = std.unicode;
const testing = std.testing;
const expectEqual = testing.expectEqual;
const expect = testing.expect;
const RocStr = struct {
str_bytes_ptrs: [*]u8,
str_len: usize,
pub fn init(bytes: [*]u8, len: usize) RocStr {
return RocStr {
.str_bytes_ptrs = bytes,
.str_len = len
};
}
pub fn eq(self: *RocStr, other: RocStr) bool {
if (self.str_len != other.str_len) {
return false;
}
var areEq: bool = true;
var index: usize = 0;
while (index < self.str_len and areEq) {
areEq = areEq and self.str_bytes_ptrs[index] == other.str_bytes_ptrs[index];
index = index + 1;
}
return areEq;
}
test "RocStr.eq: equal" {
const str1_len = 3;
var str1: [str1_len]u8 = "abc".*;
const str1_ptr: [*]u8 = &str1;
var roc_str1 = RocStr.init(str1_ptr, str1_len);
const str2_len = 3;
var str2: [str2_len]u8 = "abc".*;
const str2_ptr: [*]u8 = &str2;
var roc_str2 = RocStr.init(str2_ptr, str2_len);
expect(roc_str1.eq(roc_str2));
}
test "RocStr.eq: not equal different length" {
const str1_len = 4;
var str1: [str1_len]u8 = "abcd".*;
const str1_ptr: [*]u8 = &str1;
var roc_str1 = RocStr.init(str1_ptr, str1_len);
const str2_len = 3;
var str2: [str2_len]u8 = "abc".*;
const str2_ptr: [*]u8 = &str2;
var roc_str2 = RocStr.init(str2_ptr, str2_len);
expect(!roc_str1.eq(roc_str2));
}
test "RocStr.eq: not equal same length" {
const str1_len = 3;
var str1: [str1_len]u8 = "acb".*;
const str1_ptr: [*]u8 = &str1;
var roc_str1 = RocStr.init(str1_ptr, str1_len);
const str2_len = 3;
var str2: [str2_len]u8 = "abc".*;
const str2_ptr: [*]u8 = &str2;
var roc_str2 = RocStr.init(str2_ptr, str2_len);
expect(!roc_str1.eq(roc_str2));
}
};
// Str.split
pub fn strSplitInPlace(
array: [*]RocStr,
array_len: usize,
str_bytes_ptrs: [*]u8,
str_len: usize,
delimiter_bytes: [*]u8,
delimiter_len: usize
) callconv(.C) void {
var ret_array_index : usize = 0;
var sliceStart_index : usize = 0;
var str_index : usize = 0;
if (str_len > delimiter_len) {
const end_index : usize = str_len - delimiter_len;
while (str_index <= end_index) {
var delimiter_index : usize = 0;
var matches_delimiter = true;
while (delimiter_index < delimiter_len) {
var delimiterChar = delimiter_bytes[delimiter_index];
var strChar = str_bytes_ptrs[str_index + delimiter_index];
if (delimiterChar != strChar) {
matches_delimiter = false;
break;
}
delimiter_index += 1;
}
if (matches_delimiter) {
array[ret_array_index] = RocStr.init(str_bytes_ptrs + sliceStart_index, str_index - sliceStart_index);
sliceStart_index = str_index + delimiter_len;
ret_array_index += 1;
str_index += delimiter_len;
} else {
str_index += 1;
}
}
}
array[ret_array_index] = RocStr.init(str_bytes_ptrs + sliceStart_index, str_len - sliceStart_index);
}
test "strSplitInPlace: no delimiter" {
// Str.split "abc" "!" == [ "abc" ]
var str: [3]u8 = "abc".*;
const str_ptr: [*]u8 = &str;
var delimiter: [1]u8 = "!".*;
const delimiter_ptr: [*]u8 = &delimiter;
var array: [1]RocStr = undefined;
const array_ptr: [*]RocStr = &array;
strSplitInPlace(
array_ptr,
1,
str_ptr,
3,
delimiter_ptr,
1
);
var expected = [1]RocStr{
RocStr.init(str_ptr, 3),
};
expectEqual(array.len, expected.len);
expect(array[0].eq(expected[0]));
}
test "strSplitInPlace: delimiter on sides" {
// Str.split "tttghittt" "ttt" == [ "", "ghi", "" ]
const str_len: usize = 9;
var str: [str_len]u8 = "tttghittt".*;
const str_ptr: [*]u8 = &str;
const delimiter_len = 3;
var delimiter: [delimiter_len]u8 = "ttt".*;
const delimiter_ptr: [*]u8 = &delimiter;
const array_len : usize = 3;
var array: [array_len]RocStr = [_]RocStr{
undefined ,
undefined,
undefined,
};
const array_ptr: [*]RocStr = &array;
strSplitInPlace(
array_ptr,
array_len,
str_ptr,
str_len,
delimiter_ptr,
delimiter_len
);
const expected_str_len: usize = 3;
var expected_str: [expected_str_len]u8 = "ghi".*;
const expected_str_ptr: [*]u8 = &expected_str;
var expectedRocStr = RocStr.init(expected_str_ptr, expected_str_len);
expectEqual(array.len, 3);
expectEqual(array[0].str_len, 0);
expect(array[1].eq(expectedRocStr));
expectEqual(array[2].str_len, 0);
}
test "strSplitInPlace: three pieces" {
// Str.split "a!b!c" "!" == [ "a", "b", "c" ]
const str_len: usize = 5;
var str: [str_len]u8 = "a!b!c".*;
const str_ptr: [*]u8 = &str;
const delimiter_len = 1;
var delimiter: [delimiter_len]u8 = "!".*;
const delimiter_ptr: [*]u8 = &delimiter;
const array_len : usize = 3;
var array: [array_len]RocStr = undefined;
const array_ptr: [*]RocStr = &array;
strSplitInPlace(
array_ptr,
array_len,
str_ptr,
str_len,
delimiter_ptr,
delimiter_len
);
var a: [1]u8 = "a".*;
const a_ptr: [*]u8 = &a;
var b: [1]u8 = "b".*;
const b_ptr: [*]u8 = &b;
var c: [1]u8 = "c".*;
const c_ptr: [*]u8 = &c;
var expected_array = [array_len]RocStr{
RocStr{
.str_bytes_ptrs = a_ptr,
.str_len = 1,
},
RocStr{
.str_bytes_ptrs = b_ptr,
.str_len = 1,
},
RocStr{
.str_bytes_ptrs = c_ptr,
.str_len = 1,
}
};
expectEqual(expected_array.len, array.len);
expect(array[0].eq(expected_array[0]));
expect(array[1].eq(expected_array[1]));
expect(array[2].eq(expected_array[2]));
}
// This is used for `Str.split : Str, Str -> Array Str
// It is used to count how many segments the input `_str`
// needs to be broken into, so that we can allocate a array
// of that size. It always returns at least 1.
pub fn countSegments(
str_bytes_ptrs: [*]u8,
str_len: usize,
delimiter_bytes: [*]u8,
delimiter_len: usize
) callconv(.C) i64 {
var count: i64 = 1;
if (str_len > delimiter_len) {
var str_index: usize = 0;
const end_cond: usize = str_len - delimiter_len;
while (str_index < end_cond) {
var delimiter_index: usize = 0;
var matches_delimiter = true;
while (delimiter_index < delimiter_len) {
const delimiterChar = delimiter_bytes[delimiter_index];
const strChar = str_bytes_ptrs[str_index + delimiter_index];
if (delimiterChar != strChar) {
matches_delimiter = false;
break;
}
delimiter_index += 1;
}
if (matches_delimiter) {
count += 1;
}
str_index += 1;
}
}
return count;
}
test "countSegments: long delimiter" {
// Str.split "str" "delimiter" == [ "str" ]
// 1 segment
const str_len: usize = 3;
var str: [str_len]u8 = "str".*;
const str_ptr: [*]u8 = &str;
const delimiter_len = 9;
var delimiter: [delimiter_len]u8 = "delimiter".*;
const delimiter_ptr: [*]u8 = &delimiter;
const segments_count = countSegments(
str_ptr,
str_len,
delimiter_ptr,
delimiter_len
);
expectEqual(segments_count, 1);
}
test "countSegments: delimiter at start" {
// Str.split "hello there" "hello" == [ "", " there" ]
// 2 segments
const str_len: usize = 11;
var str: [str_len]u8 = "hello there".*;
const str_ptr: [*]u8 = &str;
const delimiter_len = 5;
var delimiter: [delimiter_len]u8 = "hello".*;
const delimiter_ptr: [*]u8 = &delimiter;
const segments_count = countSegments(
str_ptr,
str_len,
delimiter_ptr,
delimiter_len
);
expectEqual(segments_count, 2);
}
test "countSegments: delimiter interspered" {
// Str.split "a!b!c" "!" == [ "a", "b", "c" ]
// 3 segments
const str_len: usize = 5;
var str: [str_len]u8 = "a!b!c".*;
const str_ptr: [*]u8 = &str;
const delimiter_len = 1;
var delimiter: [delimiter_len]u8 = "!".*;
const delimiter_ptr: [*]u8 = &delimiter;
const segments_count = countSegments(
str_ptr,
str_len,
delimiter_ptr,
delimiter_len
);
expectEqual(segments_count, 3);
}
// Str.countGraphemeClusters
const grapheme = @import("helpers/grapheme.zig");
pub fn countGraphemeClusters(bytes_ptr: [*]u8, bytes_len: usize) callconv(.C) usize {
var bytes = bytes_ptr[0..bytes_len];
var iter = (unicode.Utf8View.init(bytes) catch unreachable).iterator();
var count: usize = 0;
var grapheme_break_state: ?grapheme.BoundClass = null;
var grapheme_break_state_ptr = &grapheme_break_state;
var opt_last_codepoint: ?u21 = null;
while (iter.nextCodepoint()) |cur_codepoint| {
if (opt_last_codepoint) |last_codepoint| {
var did_break = grapheme.isGraphemeBreak(
last_codepoint,
cur_codepoint,
grapheme_break_state_ptr
);
if (did_break) {
count += 1;
grapheme_break_state = null;
}
}
opt_last_codepoint = cur_codepoint;
}
// If there are no breaks, but the str is not empty the there
// must be a single grapheme
if (bytes_len != 0) {
count += 1;
}
return count;
}
test "countGraphemeClusters: empty string" {
var bytes_arr = "".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 0);
}
test "countGraphemeClusters: ascii characters" {
var bytes_arr = "abcd".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 4);
}
test "countGraphemeClusters: utf8 characters" {
var bytes_arr = "ãxā".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 3);
}
test "countGraphemeClusters: emojis" {
var bytes_arr = "🤔🤔🤔".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 3);
}
test "countGraphemeClusters: emojis and ut8 characters" {
var bytes_arr = "🤔å🤔¥🤔ç".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 6);
}
test "countGraphemeClusters: emojis, ut8, and ascii characters" {
var bytes_arr = "6🤔å🤔e¥🤔çpp".*;
var bytes_len = bytes_arr.len;
var bytes_ptr: [*]u8 = &bytes_arr;
var count = countGraphemeClusters(bytes_ptr, bytes_len);
expectEqual(count, 10);
}

View File

@ -17,11 +17,11 @@ pub fn get_bytes() -> Vec<u8> {
buffer
}
pub const MATH_ASIN: &str = "roc_builtins.math.asin";
pub const MATH_ACOS: &str = "roc_builtins.math.acos";
pub const MATH_ATAN: &str = "roc_builtins.math.atan";
pub const MATH_IS_FINITE: &str = "roc_builtins.math.is_finite";
pub const MATH_POW_INT: &str = "roc_builtins.math.pow_int";
pub const NUM_ASIN: &str = "roc_builtins.num.asin";
pub const NUM_ACOS: &str = "roc_builtins.num.acos";
pub const NUM_ATAN: &str = "roc_builtins.num.atan";
pub const NUM_IS_FINITE: &str = "roc_builtins.num.is_finite";
pub const NUM_POW_INT: &str = "roc_builtins.num.pow_int";
pub const STR_COUNT_SEGEMENTS: &str = "roc_builtins.str.count_segements";
pub const STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";

View File

@ -1,7 +1,7 @@
use inkwell::types::BasicTypeEnum;
use roc_module::low_level::LowLevel;
fn call_bitcode_fn<'a, 'ctx, 'env>(
pub fn call_bitcode_fn<'a, 'ctx, 'env>(
op: LowLevel,
env: &Env<'a, 'ctx, 'env>,
args: &[BasicValueEnum<'ctx>],

View File

@ -3033,7 +3033,7 @@ fn build_int_binop<'a, 'ctx, 'env>(
NumPowInt,
env,
&[lhs.into(), rhs.into()],
&bitcode::MATH_POW_INT,
&bitcode::NUM_POW_INT,
),
_ => {
unreachable!("Unrecognized int binary operation: {:?}", op);
@ -3041,7 +3041,7 @@ fn build_int_binop<'a, 'ctx, 'env>(
}
}
fn call_bitcode_fn<'a, 'ctx, 'env>(
pub fn call_bitcode_fn<'a, 'ctx, 'env>(
op: LowLevel,
env: &Env<'a, 'ctx, 'env>,
args: &[BasicValueEnum<'ctx>],
@ -3082,7 +3082,7 @@ fn build_float_binop<'a, 'ctx, 'env>(
let result = bd.build_float_add(lhs, rhs, "add_float");
let is_finite =
call_bitcode_fn(NumIsFinite, env, &[result.into()], &bitcode::MATH_IS_FINITE)
call_bitcode_fn(NumIsFinite, env, &[result.into()], &bitcode::NUM_IS_FINITE)
.into_int_value();
let then_block = context.append_basic_block(parent, "then_block");
@ -3104,7 +3104,7 @@ fn build_float_binop<'a, 'ctx, 'env>(
let result = bd.build_float_add(lhs, rhs, "add_float");
let is_finite =
call_bitcode_fn(NumIsFinite, env, &[result.into()], &bitcode::MATH_IS_FINITE)
call_bitcode_fn(NumIsFinite, env, &[result.into()], &bitcode::NUM_IS_FINITE)
.into_int_value();
let is_infinite = bd.build_not(is_finite, "negate");
@ -3234,10 +3234,10 @@ fn build_float_unary_op<'a, 'ctx, 'env>(
env.context.i64_type(),
"num_floor",
),
NumIsFinite => call_bitcode_fn(NumIsFinite, env, &[arg.into()], &bitcode::MATH_IS_FINITE),
NumAtan => call_bitcode_fn(NumAtan, env, &[arg.into()], &bitcode::MATH_ATAN),
NumAcos => call_bitcode_fn(NumAcos, env, &[arg.into()], &bitcode::MATH_ACOS),
NumAsin => call_bitcode_fn(NumAsin, env, &[arg.into()], &bitcode::MATH_ASIN),
NumIsFinite => call_bitcode_fn(NumIsFinite, env, &[arg.into()], &bitcode::NUM_IS_FINITE),
NumAtan => call_bitcode_fn(NumAtan, env, &[arg.into()], &bitcode::NUM_ATAN),
NumAcos => call_bitcode_fn(NumAcos, env, &[arg.into()], &bitcode::NUM_ACOS),
NumAsin => call_bitcode_fn(NumAsin, env, &[arg.into()], &bitcode::NUM_ASIN),
_ => {
unreachable!("Unrecognized int unary operation: {:?}", op);
}

View File

@ -1,4 +1,4 @@
use crate::llvm::build::{ptr_from_symbol, Env, InPlace, Scope};
use crate::llvm::build::{ptr_from_symbol, Env, InPlace, Scope, call_bitcode_fn};
use crate::llvm::build_list::{
allocate_list, build_basic_phi2, empty_list, incrementing_elem_loop, load_list_ptr, store_list,
};
@ -29,19 +29,19 @@ pub fn str_concat<'a, 'ctx, 'env>(
let second_str_ptr = ptr_from_symbol(scope, second_str_symbol);
let first_str_ptr = ptr_from_symbol(scope, first_str_symbol);
let str_wrapper_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
load_str(
env,
parent,
*second_str_ptr,
str_wrapper_type,
ret_type,
|second_str_ptr, second_str_len, second_str_smallness| {
load_str(
env,
parent,
*first_str_ptr,
str_wrapper_type,
ret_type,
|first_str_ptr, first_str_len, first_str_smallness| {
// first_str_len > 0
// We do this check to avoid allocating memory. If the first input
@ -74,7 +74,7 @@ pub fn str_concat<'a, 'ctx, 'env>(
second_str_length_comparison,
if_second_str_is_nonempty,
if_second_str_is_empty,
str_wrapper_type,
ret_type,
)
};
@ -604,13 +604,13 @@ pub fn str_count_graphemes<'a, 'ctx, 'env>(
let ctx = env.context;
let sym_str_ptr = ptr_from_symbol(scope, str_symbol);
let str_wrapper_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
let ret_type = BasicTypeEnum::IntType(ctx.i64_type());
load_str(
env,
parent,
*sym_str_ptr,
str_wrapper_type,
ret_type,
|str_ptr, str_len, _str_smallness| {
call_bitcode_fn(
LowLevel::StrCountGraphemes,
@ -624,24 +624,3 @@ pub fn str_count_graphemes<'a, 'ctx, 'env>(
},
)
}
// Duplicated from build.rs for now, once it's all working I'll delete this and import it form a
// common place
fn call_bitcode_fn<'a, 'ctx, 'env>(
op: LowLevel,
env: &Env<'a, 'ctx, 'env>,
args: &[BasicValueEnum<'ctx>],
fn_name: &str,
) -> BasicValueEnum<'ctx> {
let fn_val = env
.module
.get_function(fn_name)
.unwrap_or_else(|| panic!("Unrecognized builtin function: {:?} - if you're working on the Roc compiler, do you need to rebuild the bitcode? See compiler/builtins/bitcode/README.md", fn_name));
let call = env.builder.build_call(fn_val, args, "call_builtin");
call.set_call_convention(fn_val.get_call_conventions());
call.try_as_basic_value()
.left()
.unwrap_or_else(|| panic!("LLVM error: Invalid call for low-level op {:?}", op))
}

View File

@ -204,7 +204,12 @@ mod gen_str {
}
#[test]
fn str_count_graphemes() {
assert_evals_to!(r#"Str.countGraphemes "6🤔å🤔e¥🤔çpp""#, 10, usize);
fn str_count_graphemes_small_str() {
assert_evals_to!(r#"Str.countGraphemes "å🤔""#, 2, usize);
}
#[test]
fn str_count_graphemes_big_str() {
assert_evals_to!(r#"Str.countGraphemes "6🤔å🤔e¥🤔çppkd🙃1jdal🦯asdfa∆ltråø˚waia8918.,🏅jjc""#, 45, usize);
}
}

View File

@ -506,12 +506,11 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
// - arguments that we may want to update destructively must be Owned
// - other refcounted arguments are Borrowed
match op {
ListLen | StrIsEmpty => arena.alloc_slice_copy(&[borrowed]),
ListLen | StrIsEmpty | StrCountGraphemes => arena.alloc_slice_copy(&[borrowed]),
ListSet => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
ListSetInPlace => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]),
ListGetUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]),
ListConcat | StrConcat => arena.alloc_slice_copy(&[owned, borrowed]),
StrCountGraphemes => arena.alloc_slice_copy(&[borrowed]),
ListSingle => arena.alloc_slice_copy(&[irrelevant]),
ListRepeat => arena.alloc_slice_copy(&[irrelevant, irrelevant]),