mirror of
https://github.com/roc-lang/roc.git
synced 2024-09-22 08:17:40 +03:00
Merge pull request #6216 from roc-lang/dict-ankerl-unordered-dense
Swap Dict implementation to ankerl dense unordered
This commit is contained in:
commit
f6bff3a86e
@ -277,4 +277,33 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
===========================================================
|
||||
|
||||
* ankerl::unordered_dense - https://github.com/martinus/unordered_dense
|
||||
|
||||
A rather direct port of the source into Roc is currently the implementation for our Dict type.
|
||||
Source code is in crates/compiler/builtins/roc/Dict.roc
|
||||
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2022 Martin Leitner-Ankerl
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
@ -59,6 +59,8 @@ fn generateLlvmIrFile(
|
||||
const obj = b.addObject(.{ .name = object_name, .root_source_file = main_path, .optimize = mode, .target = target, .use_llvm = true });
|
||||
obj.strip = true;
|
||||
obj.disable_stack_probing = true;
|
||||
if (target.cpu_arch != .wasm32)
|
||||
obj.bundle_compiler_rt = true;
|
||||
|
||||
// Generating the bin seems required to get zig to generate the llvm ir.
|
||||
_ = obj.getEmittedBin();
|
||||
@ -91,6 +93,8 @@ fn generateObjectFile(
|
||||
obj.link_function_sections = true;
|
||||
obj.force_pic = true;
|
||||
obj.disable_stack_probing = true;
|
||||
if (target.cpu_arch != .wasm32)
|
||||
obj.bundle_compiler_rt = true;
|
||||
|
||||
const obj_file = obj.getEmittedBin();
|
||||
|
||||
@ -112,7 +116,7 @@ fn makeLinux32Target() CrossTarget {
|
||||
|
||||
target.cpu_arch = std.Target.Cpu.Arch.x86;
|
||||
target.os_tag = std.Target.Os.Tag.linux;
|
||||
target.abi = std.Target.Abi.musl;
|
||||
target.abi = std.Target.Abi.none;
|
||||
|
||||
return target;
|
||||
}
|
||||
@ -122,7 +126,7 @@ fn makeLinuxAarch64Target() CrossTarget {
|
||||
|
||||
target.cpu_arch = std.Target.Cpu.Arch.aarch64;
|
||||
target.os_tag = std.Target.Os.Tag.linux;
|
||||
target.abi = std.Target.Abi.musl;
|
||||
target.abi = std.Target.Abi.none;
|
||||
|
||||
return target;
|
||||
}
|
||||
@ -132,7 +136,7 @@ fn makeLinuxX64Target() CrossTarget {
|
||||
|
||||
target.cpu_arch = std.Target.Cpu.Arch.x86_64;
|
||||
target.os_tag = std.Target.Os.Tag.linux;
|
||||
target.abi = std.Target.Abi.musl;
|
||||
target.abi = std.Target.Abi.none;
|
||||
|
||||
return target;
|
||||
}
|
||||
@ -142,7 +146,7 @@ fn makeWindows64Target() CrossTarget {
|
||||
|
||||
target.cpu_arch = std.Target.Cpu.Arch.x86_64;
|
||||
target.os_tag = std.Target.Os.Tag.windows;
|
||||
target.abi = std.Target.Abi.gnu;
|
||||
target.abi = std.Target.Abi.none;
|
||||
|
||||
return target;
|
||||
}
|
||||
|
@ -1,478 +0,0 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const math = std.math;
|
||||
|
||||
// Eventually, we need to statically ingest compiler-rt and get it working with the surgical linker, then these should not be needed anymore.
|
||||
// Until then, we are manually ingesting used parts of compiler-rt here.
|
||||
//
|
||||
// Taken from
|
||||
// https://github.com/ziglang/zig/tree/4976b58ab16069f8d3267b69ed030f29685c1abe/lib/compiler_rt/
|
||||
// Thank you Zig Contributors!
|
||||
|
||||
// Libcalls that involve u128 on Windows x86-64 are expected by LLVM to use the
|
||||
// calling convention of @Vector(2, u64), rather than what's standard.
|
||||
pub const want_windows_v2u64_abi = builtin.os.tag == .windows and builtin.cpu.arch == .x86_64 and @import("builtin").object_format != .c;
|
||||
|
||||
const v2u64 = @Vector(2, u64);
|
||||
|
||||
// Export it as weak incase it is already linked in by something else.
|
||||
comptime {
|
||||
if (!want_windows_v2u64_abi) {
|
||||
@export(__muloti4, .{ .name = "__muloti4", .linkage = .Weak });
|
||||
@export(__lshrti3, .{ .name = "__lshrti3", .linkage = .Weak });
|
||||
@export(__divti3, .{ .name = "__divti3", .linkage = .Weak });
|
||||
@export(__modti3, .{ .name = "__modti3", .linkage = .Weak });
|
||||
@export(__umodti3, .{ .name = "__umodti3", .linkage = .Weak });
|
||||
@export(__udivti3, .{ .name = "__udivti3", .linkage = .Weak });
|
||||
@export(__fixdfti, .{ .name = "__fixdfti", .linkage = .Weak });
|
||||
@export(__fixsfti, .{ .name = "__fixsfti", .linkage = .Weak });
|
||||
@export(__fixunsdfti, .{ .name = "__fixunsdfti", .linkage = .Weak });
|
||||
@export(__fixunssfti, .{ .name = "__fixunssfti", .linkage = .Weak });
|
||||
}
|
||||
}
|
||||
|
||||
pub fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 {
|
||||
if (2 * @bitSizeOf(i128) <= @bitSizeOf(usize)) {
|
||||
return muloXi4_genericFast(i128, a, b, overflow);
|
||||
} else {
|
||||
return muloXi4_genericSmall(i128, a, b, overflow);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn __divti3(a: i128, b: i128) callconv(.C) i128 {
|
||||
return div(a, b);
|
||||
}
|
||||
|
||||
fn __divti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
|
||||
return @as(v2u64, @bitCast(div(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
|
||||
}
|
||||
|
||||
inline fn div(a: i128, b: i128) i128 {
|
||||
const s_a = a >> (128 - 1);
|
||||
const s_b = b >> (128 - 1);
|
||||
|
||||
const an = (a ^ s_a) -% s_a;
|
||||
const bn = (b ^ s_b) -% s_b;
|
||||
|
||||
const r = udivmod(u128, @as(u128, @bitCast(an)), @as(u128, @bitCast(bn)), null);
|
||||
const s = s_a ^ s_b;
|
||||
return (@as(i128, @bitCast(r)) ^ s) -% s;
|
||||
}
|
||||
|
||||
pub fn __udivti3(a: u128, b: u128) callconv(.C) u128 {
|
||||
return udivmod(u128, a, b, null);
|
||||
}
|
||||
|
||||
fn __udivti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
|
||||
return @as(v2u64, @bitCast(udivmod(u128, @as(u128, @bitCast(a)), @as(u128, @bitCast(b)), null)));
|
||||
}
|
||||
|
||||
pub fn __umodti3(a: u128, b: u128) callconv(.C) u128 {
|
||||
var r: u128 = undefined;
|
||||
_ = udivmod(u128, a, b, &r);
|
||||
return r;
|
||||
}
|
||||
|
||||
fn __umodti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
|
||||
var r: u128 = undefined;
|
||||
_ = udivmod(u128, @as(u128, @bitCast(a)), @as(u128, @bitCast(b)), &r);
|
||||
return @as(v2u64, @bitCast(r));
|
||||
}
|
||||
|
||||
pub fn __modti3(a: i128, b: i128) callconv(.C) i128 {
|
||||
return mod(a, b);
|
||||
}
|
||||
|
||||
fn __modti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
|
||||
return @as(v2u64, @bitCast(mod(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
|
||||
}
|
||||
|
||||
inline fn mod(a: i128, b: i128) i128 {
|
||||
const s_a = a >> (128 - 1); // s = a < 0 ? -1 : 0
|
||||
const s_b = b >> (128 - 1); // s = b < 0 ? -1 : 0
|
||||
|
||||
const an = (a ^ s_a) -% s_a; // negate if s == -1
|
||||
const bn = (b ^ s_b) -% s_b; // negate if s == -1
|
||||
|
||||
var r: u128 = undefined;
|
||||
_ = udivmod(u128, @as(u128, @bitCast(an)), @as(u128, @bitCast(bn)), &r);
|
||||
return (@as(i128, @bitCast(r)) ^ s_a) -% s_a; // negate if s == -1
|
||||
}
|
||||
|
||||
pub fn __fixdfti(a: f64) callconv(.C) i128 {
|
||||
return floatToInt(i128, a);
|
||||
}
|
||||
|
||||
fn __fixdfti_windows_x86_64(a: f64) callconv(.C) v2u64 {
|
||||
return @as(v2u64, @bitCast(floatToInt(i128, a)));
|
||||
}
|
||||
|
||||
pub fn __fixsfti(a: f32) callconv(.C) i128 {
|
||||
return floatToInt(i128, a);
|
||||
}
|
||||
|
||||
fn __fixsfti_windows_x86_64(a: f32) callconv(.C) v2u64 {
|
||||
return @as(v2u64, @bitCast(floatToInt(i128, a)));
|
||||
}
|
||||
|
||||
pub fn __fixunsdfti(a: f64) callconv(.C) u128 {
|
||||
return floatToInt(u128, a);
|
||||
}
|
||||
|
||||
fn __fixunsdfti_windows_x86_64(a: f64) callconv(.C) v2u64 {
|
||||
return @as(v2u64, @bitCast(floatToInt(u128, a)));
|
||||
}
|
||||
|
||||
pub fn __fixunssfti(a: f32) callconv(.C) u128 {
|
||||
return floatToInt(u128, a);
|
||||
}
|
||||
|
||||
fn __fixunssfti_windows_x86_64(a: f32) callconv(.C) v2u64 {
|
||||
return @as(v2u64, @bitCast(floatToInt(u128, a)));
|
||||
}
|
||||
// mulo - multiplication overflow
|
||||
// * return a*%b.
|
||||
// * return if a*b overflows => 1 else => 0
|
||||
// - muloXi4_genericSmall as default
|
||||
// - muloXi4_genericFast for 2*bitsize <= usize
|
||||
|
||||
inline fn muloXi4_genericSmall(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
|
||||
overflow.* = 0;
|
||||
const min = math.minInt(ST);
|
||||
var res: ST = a *% b;
|
||||
// Hacker's Delight section Overflow subsection Multiplication
|
||||
// case a=-2^{31}, b=-1 problem, because
|
||||
// on some machines a*b = -2^{31} with overflow
|
||||
// Then -2^{31}/-1 overflows and any result is possible.
|
||||
// => check with a<0 and b=-2^{31}
|
||||
if ((a < 0 and b == min) or (a != 0 and @divTrunc(res, a) != b))
|
||||
overflow.* = 1;
|
||||
return res;
|
||||
}
|
||||
|
||||
inline fn muloXi4_genericFast(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
|
||||
overflow.* = 0;
|
||||
const EST = switch (ST) {
|
||||
i32 => i64,
|
||||
i64 => i128,
|
||||
i128 => i256,
|
||||
else => unreachable,
|
||||
};
|
||||
const min = math.minInt(ST);
|
||||
const max = math.maxInt(ST);
|
||||
var res: EST = @as(EST, a) * @as(EST, b);
|
||||
//invariant: -2^{bitwidth(EST)} < res < 2^{bitwidth(EST)-1}
|
||||
if (res < min or max < res)
|
||||
overflow.* = 1;
|
||||
return @as(ST, @truncate(res));
|
||||
}
|
||||
|
||||
const native_endian = builtin.cpu.arch.endian();
|
||||
const low = switch (native_endian) {
|
||||
.Big => 1,
|
||||
.Little => 0,
|
||||
};
|
||||
const high = 1 - low;
|
||||
|
||||
pub fn udivmod(comptime DoubleInt: type, a: DoubleInt, b: DoubleInt, maybe_rem: ?*DoubleInt) DoubleInt {
|
||||
// @setRuntimeSafety(builtin.is_test);
|
||||
|
||||
const double_int_bits = @typeInfo(DoubleInt).Int.bits;
|
||||
const single_int_bits = @divExact(double_int_bits, 2);
|
||||
const SingleInt = std.meta.Int(.unsigned, single_int_bits);
|
||||
const SignedDoubleInt = std.meta.Int(.signed, double_int_bits);
|
||||
const Log2SingleInt = std.math.Log2Int(SingleInt);
|
||||
|
||||
const n = @as([2]SingleInt, @bitCast(a));
|
||||
const d = @as([2]SingleInt, @bitCast(b));
|
||||
var q: [2]SingleInt = undefined;
|
||||
var r: [2]SingleInt = undefined;
|
||||
var sr: c_uint = undefined;
|
||||
// special cases, X is unknown, K != 0
|
||||
if (n[high] == 0) {
|
||||
if (d[high] == 0) {
|
||||
// 0 X
|
||||
// ---
|
||||
// 0 X
|
||||
if (maybe_rem) |rem| {
|
||||
rem.* = n[low] % d[low];
|
||||
}
|
||||
return n[low] / d[low];
|
||||
}
|
||||
// 0 X
|
||||
// ---
|
||||
// K X
|
||||
if (maybe_rem) |rem| {
|
||||
rem.* = n[low];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
// n[high] != 0
|
||||
if (d[low] == 0) {
|
||||
if (d[high] == 0) {
|
||||
// K X
|
||||
// ---
|
||||
// 0 0
|
||||
if (maybe_rem) |rem| {
|
||||
rem.* = n[high] % d[low];
|
||||
}
|
||||
return n[high] / d[low];
|
||||
}
|
||||
// d[high] != 0
|
||||
if (n[low] == 0) {
|
||||
// K 0
|
||||
// ---
|
||||
// K 0
|
||||
if (maybe_rem) |rem| {
|
||||
r[high] = n[high] % d[high];
|
||||
r[low] = 0;
|
||||
rem.* = @as(DoubleInt, @bitCast(r));
|
||||
}
|
||||
return n[high] / d[high];
|
||||
}
|
||||
// K K
|
||||
// ---
|
||||
// K 0
|
||||
if ((d[high] & (d[high] - 1)) == 0) {
|
||||
// d is a power of 2
|
||||
if (maybe_rem) |rem| {
|
||||
r[low] = n[low];
|
||||
r[high] = n[high] & (d[high] - 1);
|
||||
rem.* = @as(DoubleInt, @bitCast(r));
|
||||
}
|
||||
return n[high] >> @as(Log2SingleInt, @intCast(@ctz(d[high])));
|
||||
}
|
||||
// K K
|
||||
// ---
|
||||
// K 0
|
||||
sr = @as(c_uint, @bitCast(@as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))));
|
||||
// 0 <= sr <= single_int_bits - 2 or sr large
|
||||
if (sr > single_int_bits - 2) {
|
||||
if (maybe_rem) |rem| {
|
||||
rem.* = a;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
sr += 1;
|
||||
// 1 <= sr <= single_int_bits - 1
|
||||
// q.all = a << (double_int_bits - sr);
|
||||
q[low] = 0;
|
||||
q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
|
||||
// r.all = a >> sr;
|
||||
r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
|
||||
r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
|
||||
} else {
|
||||
// d[low] != 0
|
||||
if (d[high] == 0) {
|
||||
// K X
|
||||
// ---
|
||||
// 0 K
|
||||
if ((d[low] & (d[low] - 1)) == 0) {
|
||||
// d is a power of 2
|
||||
if (maybe_rem) |rem| {
|
||||
rem.* = n[low] & (d[low] - 1);
|
||||
}
|
||||
if (d[low] == 1) {
|
||||
return a;
|
||||
}
|
||||
sr = @ctz(d[low]);
|
||||
q[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
|
||||
q[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
|
||||
return @as(DoubleInt, @bitCast(q));
|
||||
}
|
||||
// K X
|
||||
// ---
|
||||
// 0 K
|
||||
sr = 1 + single_int_bits + @as(c_uint, @clz(d[low])) - @as(c_uint, @clz(n[high]));
|
||||
// 2 <= sr <= double_int_bits - 1
|
||||
// q.all = a << (double_int_bits - sr);
|
||||
// r.all = a >> sr;
|
||||
if (sr == single_int_bits) {
|
||||
q[low] = 0;
|
||||
q[high] = n[low];
|
||||
r[high] = 0;
|
||||
r[low] = n[high];
|
||||
} else if (sr < single_int_bits) {
|
||||
// 2 <= sr <= single_int_bits - 1
|
||||
q[low] = 0;
|
||||
q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
|
||||
r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
|
||||
r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
|
||||
} else {
|
||||
// single_int_bits + 1 <= sr <= double_int_bits - 1
|
||||
q[low] = n[low] << @as(Log2SingleInt, @intCast(double_int_bits - sr));
|
||||
q[high] = (n[high] << @as(Log2SingleInt, @intCast(double_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr - single_int_bits)));
|
||||
r[high] = 0;
|
||||
r[low] = n[high] >> @as(Log2SingleInt, @intCast(sr - single_int_bits));
|
||||
}
|
||||
} else {
|
||||
// K X
|
||||
// ---
|
||||
// K K
|
||||
sr = @as(c_uint, @bitCast(@as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))));
|
||||
// 0 <= sr <= single_int_bits - 1 or sr large
|
||||
if (sr > single_int_bits - 1) {
|
||||
if (maybe_rem) |rem| {
|
||||
rem.* = a;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
sr += 1;
|
||||
// 1 <= sr <= single_int_bits
|
||||
// q.all = a << (double_int_bits - sr);
|
||||
// r.all = a >> sr;
|
||||
q[low] = 0;
|
||||
if (sr == single_int_bits) {
|
||||
q[high] = n[low];
|
||||
r[high] = 0;
|
||||
r[low] = n[high];
|
||||
} else {
|
||||
r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
|
||||
r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
|
||||
q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Not a special case
|
||||
// q and r are initialized with:
|
||||
// q.all = a << (double_int_bits - sr);
|
||||
// r.all = a >> sr;
|
||||
// 1 <= sr <= double_int_bits - 1
|
||||
var carry: u32 = 0;
|
||||
var r_all: DoubleInt = undefined;
|
||||
while (sr > 0) : (sr -= 1) {
|
||||
// r:q = ((r:q) << 1) | carry
|
||||
r[high] = (r[high] << 1) | (r[low] >> (single_int_bits - 1));
|
||||
r[low] = (r[low] << 1) | (q[high] >> (single_int_bits - 1));
|
||||
q[high] = (q[high] << 1) | (q[low] >> (single_int_bits - 1));
|
||||
q[low] = (q[low] << 1) | carry;
|
||||
// carry = 0;
|
||||
// if (r.all >= b)
|
||||
// {
|
||||
// r.all -= b;
|
||||
// carry = 1;
|
||||
// }
|
||||
r_all = @as(DoubleInt, @bitCast(r));
|
||||
const s: SignedDoubleInt = @as(SignedDoubleInt, @bitCast(b -% r_all -% 1)) >> (double_int_bits - 1);
|
||||
carry = @as(u32, @intCast(s & 1));
|
||||
r_all -= b & @as(DoubleInt, @bitCast(s));
|
||||
r = @as([2]SingleInt, @bitCast(r_all));
|
||||
}
|
||||
const q_all = (@as(DoubleInt, @bitCast(q)) << 1) | carry;
|
||||
if (maybe_rem) |rem| {
|
||||
rem.* = r_all;
|
||||
}
|
||||
return q_all;
|
||||
}
|
||||
|
||||
pub inline fn floatToInt(comptime I: type, a: anytype) I {
|
||||
const Log2Int = math.Log2Int;
|
||||
const Int = @import("std").meta.Int;
|
||||
const F = @TypeOf(a);
|
||||
const float_bits = @typeInfo(F).Float.bits;
|
||||
const int_bits = @typeInfo(I).Int.bits;
|
||||
const rep_t = Int(.unsigned, float_bits);
|
||||
const sig_bits = math.floatMantissaBits(F);
|
||||
const exp_bits = math.floatExponentBits(F);
|
||||
const fractional_bits = floatFractionalBits(F);
|
||||
|
||||
// const implicit_bit = if (F != f80) (@as(rep_t, 1) << sig_bits) else 0;
|
||||
const implicit_bit = @as(rep_t, 1) << sig_bits;
|
||||
const max_exp = (1 << (exp_bits - 1));
|
||||
const exp_bias = max_exp - 1;
|
||||
const sig_mask = (@as(rep_t, 1) << sig_bits) - 1;
|
||||
|
||||
// Break a into sign, exponent, significand
|
||||
const a_rep: rep_t = @as(rep_t, @bitCast(a));
|
||||
const negative = (a_rep >> (float_bits - 1)) != 0;
|
||||
const exponent = @as(i32, @intCast((a_rep << 1) >> (sig_bits + 1))) - exp_bias;
|
||||
const significand: rep_t = (a_rep & sig_mask) | implicit_bit;
|
||||
|
||||
// If the exponent is negative, the result rounds to zero.
|
||||
if (exponent < 0) return 0;
|
||||
|
||||
// If the value is too large for the integer type, saturate.
|
||||
switch (@typeInfo(I).Int.signedness) {
|
||||
.unsigned => {
|
||||
if (negative) return 0;
|
||||
if (@as(c_uint, @intCast(exponent)) >= @min(int_bits, max_exp)) return math.maxInt(I);
|
||||
},
|
||||
.signed => if (@as(c_uint, @intCast(exponent)) >= @min(int_bits - 1, max_exp)) {
|
||||
return if (negative) math.minInt(I) else math.maxInt(I);
|
||||
},
|
||||
}
|
||||
|
||||
// If 0 <= exponent < sig_bits, right shift to get the result.
|
||||
// Otherwise, shift left.
|
||||
var result: I = undefined;
|
||||
if (exponent < fractional_bits) {
|
||||
result = @as(I, @intCast(significand >> @as(Log2Int(rep_t), @intCast(fractional_bits - exponent))));
|
||||
} else {
|
||||
result = @as(I, @intCast(significand)) << @as(Log2Int(I), @intCast(exponent - fractional_bits));
|
||||
}
|
||||
|
||||
if ((@typeInfo(I).Int.signedness == .signed) and negative)
|
||||
return ~result +% 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Returns the number of fractional bits in the mantissa of floating point type T.
|
||||
pub inline fn floatFractionalBits(comptime T: type) comptime_int {
|
||||
comptime std.debug.assert(@typeInfo(T) == .Float);
|
||||
|
||||
// standard IEEE floats have an implicit 0.m or 1.m integer part
|
||||
// f80 is special and has an explicitly stored bit in the MSB
|
||||
// this function corresponds to `MANT_DIG - 1' from C
|
||||
return switch (@typeInfo(T).Float.bits) {
|
||||
16 => 10,
|
||||
32 => 23,
|
||||
64 => 52,
|
||||
80 => 63,
|
||||
128 => 112,
|
||||
else => @compileError("unknown floating point type " ++ @typeName(T)),
|
||||
};
|
||||
}
|
||||
|
||||
pub fn __lshrti3(a: i128, b: i32) callconv(.C) i128 {
|
||||
return lshrXi3(i128, a, b);
|
||||
}
|
||||
|
||||
// Logical shift right: shift in 0 from left to right
|
||||
// Precondition: 0 <= b < T.bit_count
|
||||
inline fn lshrXi3(comptime T: type, a: T, b: i32) T {
|
||||
const word_t = HalveInt(T, false);
|
||||
const S = std.math.Log2Int(word_t.HalfT);
|
||||
|
||||
const input = word_t{ .all = a };
|
||||
var output: word_t = undefined;
|
||||
|
||||
if (b >= word_t.bits) {
|
||||
output.s.high = 0;
|
||||
output.s.low = input.s.high >> @as(S, @intCast(b - word_t.bits));
|
||||
} else if (b == 0) {
|
||||
return a;
|
||||
} else {
|
||||
output.s.high = input.s.high >> @as(S, @intCast(b));
|
||||
output.s.low = input.s.high << @as(S, @intCast(word_t.bits - b));
|
||||
output.s.low |= input.s.low >> @as(S, @intCast(b));
|
||||
}
|
||||
|
||||
return output.all;
|
||||
}
|
||||
|
||||
/// Allows to access underlying bits as two equally sized lower and higher
|
||||
/// signed or unsigned integers.
|
||||
fn HalveInt(comptime T: type, comptime signed_half: bool) type {
|
||||
return extern union {
|
||||
pub const bits = @divExact(@typeInfo(T).Int.bits, 2);
|
||||
pub const HalfTU = std.meta.Int(.unsigned, bits);
|
||||
pub const HalfTS = std.meta.Int(.signed, bits);
|
||||
pub const HalfT = if (signed_half) HalfTS else HalfTU;
|
||||
|
||||
all: T,
|
||||
s: if (native_endian == .Little)
|
||||
extern struct { low: HalfT, high: HalfT }
|
||||
else
|
||||
extern struct { high: HalfT, low: HalfT },
|
||||
};
|
||||
}
|
@ -1,87 +0,0 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const arch = builtin.cpu.arch;
|
||||
const musl = @import("libc/musl.zig");
|
||||
const folly = @import("libc/folly.zig");
|
||||
const cpuid = @import("libc/cpuid.zig");
|
||||
|
||||
comptime {
|
||||
// TODO: remove this workaround.
|
||||
// Our wasm llvm pipeline always links in memcpy.
|
||||
// As such, our impl will conflict.
|
||||
if (builtin.is_test) {
|
||||
// We don't need memcpy for tests because the tests are built with -lc
|
||||
} else if (arch != .wasm32) {
|
||||
@export(memcpy, .{ .name = "memcpy", .linkage = .Strong });
|
||||
}
|
||||
}
|
||||
|
||||
const Memcpy = *const fn (noalias [*]u8, noalias [*]const u8, len: usize) callconv(.C) [*]u8;
|
||||
|
||||
pub var memcpy_target: Memcpy = switch (arch) {
|
||||
.x86_64 => dispatch_memcpy,
|
||||
else => unreachable,
|
||||
};
|
||||
|
||||
pub fn memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
|
||||
switch (builtin.os.tag) {
|
||||
.windows => {
|
||||
return musl.memcpy(dest, src, len);
|
||||
},
|
||||
else => switch (arch) {
|
||||
// x86_64 has a special optimized memcpy that can use avx2.
|
||||
.x86_64 => {
|
||||
return memcpy_target(dest, src, len);
|
||||
},
|
||||
else => {
|
||||
return musl.memcpy(dest, src, len);
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
const MemcpyDecision = enum {
|
||||
uninitialized,
|
||||
folly_prefetchw,
|
||||
folly_prefetcht0,
|
||||
musl,
|
||||
};
|
||||
|
||||
var memcpy_decision: MemcpyDecision = .uninitialized;
|
||||
|
||||
fn dispatch_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
|
||||
switch (arch) {
|
||||
.x86_64 => {
|
||||
// TODO: Switch this to overwrite the memcpy_target pointer once the surgical linker can support it.
|
||||
// Then dispatch will just happen on the first call instead of every call.
|
||||
// if (cpuid.supports_avx2()) {
|
||||
// if (cpuid.supports_prefetchw()) {
|
||||
// memcpy_target = folly.memcpy_prefetchw;
|
||||
// } else {
|
||||
// memcpy_target = folly.memcpy_prefetcht0;
|
||||
// }
|
||||
// } else {
|
||||
// memcpy_target = musl.memcpy;
|
||||
// }
|
||||
// return memcpy_target(dest, src, len);
|
||||
switch (memcpy_decision) {
|
||||
.uninitialized => {
|
||||
if (cpuid.supports_avx2()) {
|
||||
if (cpuid.supports_prefetchw()) {
|
||||
memcpy_decision = .folly_prefetchw;
|
||||
} else {
|
||||
memcpy_decision = .folly_prefetcht0;
|
||||
}
|
||||
} else {
|
||||
memcpy_decision = .musl;
|
||||
}
|
||||
return dispatch_memcpy(dest, src, len);
|
||||
},
|
||||
.folly_prefetchw => return folly.memcpy_prefetchw(dest, src, len),
|
||||
.folly_prefetcht0 => return folly.memcpy_prefetcht0(dest, src, len),
|
||||
.musl => return musl.memcpy(dest, src, len),
|
||||
}
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
const builtin = @import("builtin");
|
||||
const os = builtin.os;
|
||||
|
||||
pub const function_prefix = switch (os.tag) {
|
||||
.macos => "_",
|
||||
else => "",
|
||||
};
|
@ -1,53 +0,0 @@
|
||||
// Check if AVX2 is supported.
|
||||
// Returns 1 if AVX2 is supported, 0 otherwise.
|
||||
.global {[function_prefix]s}supports_avx2;
|
||||
{[function_prefix]s}supports_avx2:
|
||||
// Save the EBX register.
|
||||
push %rbx
|
||||
|
||||
// Call the CPUID instruction with the EAX register set to 7 and ECX set to 0.
|
||||
// This will get the CPUID information for the current CPU.
|
||||
mov $7, %eax
|
||||
mov $0, %ecx
|
||||
cpuid
|
||||
|
||||
// The AVX2 feature flag is located in the EBX register at bit 5.
|
||||
bt $5, %ebx
|
||||
jc .avx2_supported
|
||||
|
||||
// AVX2 is not supported.
|
||||
pop %rbx
|
||||
mov $0, %eax
|
||||
ret
|
||||
|
||||
.avx2_supported:
|
||||
pop %rbx
|
||||
mov $1, %eax
|
||||
ret
|
||||
|
||||
// Check if prefetchw is supported.
|
||||
// Returns 1 if the prefetchw instruction is supported, 0 otherwise.
|
||||
.global {[function_prefix]s}supports_prefetchw;
|
||||
{[function_prefix]s}supports_prefetchw:
|
||||
// Save the EBX register.
|
||||
push %rbx
|
||||
|
||||
// Call the CPUID instruction with the EAX register set to 0x80000001 and ECX set to 0.
|
||||
// This will get the CPUID information for the current CPU.
|
||||
mov $0x80000001, %eax
|
||||
mov $0, %ecx
|
||||
cpuid
|
||||
|
||||
// The prefetchw feature flag is located in the ECX register at bit 8.
|
||||
bt $8, %ecx
|
||||
jc .prefetchw_supported
|
||||
|
||||
// AVX2 is not supported.
|
||||
pop %rbx
|
||||
mov $0, %eax
|
||||
ret
|
||||
|
||||
.prefetchw_supported:
|
||||
pop %rbx
|
||||
mov $1, %eax
|
||||
ret
|
@ -1,18 +0,0 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const arch = builtin.cpu.arch;
|
||||
const function_prefix = @import("assembly_util.zig").function_prefix;
|
||||
|
||||
// I couldn't manage to define this in a PIE friendly way with inline assembly.
|
||||
// Instead, I am defining it as global assembly functions.
|
||||
comptime {
|
||||
switch (arch) {
|
||||
.x86_64 => {
|
||||
asm (std.fmt.comptimePrint(@embedFile("cpuid.S"), .{ .function_prefix = function_prefix }));
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
pub extern fn supports_avx2() bool;
|
||||
pub extern fn supports_prefetchw() bool;
|
@ -1,2 +0,0 @@
|
||||
pub const memcpy_prefetchw = @import("folly/memcpy.zig").__folly_memcpy_prefetchw;
|
||||
pub const memcpy_prefetcht0 = @import("folly/memcpy.zig").__folly_memcpy_prefetcht0;
|
@ -1,437 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* __folly_memcpy: An optimized memcpy implementation that uses prefetch and
|
||||
* AVX2 instructions.
|
||||
*
|
||||
* This implementation of memcpy acts as a memmove: while overlapping copies
|
||||
* are undefined in memcpy, in some implementations they're the same function and
|
||||
* legacy programs rely on this behavior.
|
||||
*
|
||||
* This implementation uses prefetch to avoid dtlb misses. This can
|
||||
* substantially reduce dtlb store misses in cases where the destination
|
||||
* location is absent from L1 cache and where the copy size is small enough
|
||||
* that the hardware prefetcher doesn't have a large impact.
|
||||
*
|
||||
* The number of branches is limited by the use of overlapping loads & stores.
|
||||
* This helps with copies where the source and destination cache lines are already
|
||||
* present in L1 because there are fewer instructions to execute and fewer
|
||||
* branches to potentially mispredict.
|
||||
* e.g. to copy the last 4 <= n <= 7 bytes: copy the first & last 4 bytes (overlapped):
|
||||
* movl (%rsi), %r8d
|
||||
* movl -4(%rsi,%rdx), %r9d
|
||||
* movl %r8d, (%rdi)
|
||||
* movl %r9d, -4(%rdi,%rdx)
|
||||
*
|
||||
*
|
||||
* For sizes up to 256 all source data is first read into registers and then written:
|
||||
* - n <= 16: overlapping movs
|
||||
* - n <= 32: overlapping unaligned 16-byte SSE XMM load/stores
|
||||
* - n <= 256: overlapping unaligned 32-byte AVX YMM load/stores
|
||||
*
|
||||
* Large copies (> 256 bytes) use unaligned loads + aligned stores.
|
||||
* This is observed to always be faster than rep movsb, so the rep movsb
|
||||
* instruction is not used.
|
||||
* - The head & tail may be unaligned => they're always written using unaligned stores.
|
||||
*
|
||||
* If the copy size is humongous (> 32 KiB) and the source and destination are both
|
||||
* aligned, this memcpy will use non-temporal operations (AVX2). This can have
|
||||
* a substantial speedup for copies where data is absent from L1, but it
|
||||
* is significantly slower if the source and destination data were already
|
||||
* in L1. The use of non-temporal operations also has the effect that after
|
||||
* the copy is complete, the data will be moved out of L1, even if the data was
|
||||
* present before the copy started.
|
||||
*
|
||||
* For n > 256 and overlapping src & dst buffers (memmove):
|
||||
* - use unaligned loads + aligned stores, but not non-temporal stores
|
||||
* - for dst < src forward copy in 128 byte batches:
|
||||
* - unaligned load the first 32 bytes & last 4 x 32 bytes
|
||||
* - forward copy (unaligned load + aligned stores) 4 x 32 bytes at a time
|
||||
* - unaligned store the first 32 bytes & last 4 x 32 bytes
|
||||
* - for dst > src backward copy in 128 byte batches:
|
||||
* - unaligned load the first 4 x 32 bytes & last 32 bytes
|
||||
* - backward copy (unaligned load + aligned stores) 4 x 32 bytes at a time
|
||||
* - unaligned store the first 4 x 32 bytes & last 32 bytes
|
||||
*
|
||||
* @author Logan Evans <lpe@fb.com>
|
||||
*/
|
||||
|
||||
|
||||
// .type {[function_prefix]s}__folly_memcpy_short_{[prefetch]s}, @function not supported by windows
|
||||
{[function_prefix]s}__folly_memcpy_short_{[prefetch]s}:
|
||||
.cfi_startproc
|
||||
|
||||
.L_GE1_LE7_{[prefetch]s}:
|
||||
cmp $1, %rdx
|
||||
je .L_EQ1_{[prefetch]s}
|
||||
|
||||
cmp $4, %rdx
|
||||
jae .L_GE4_LE7_{[prefetch]s}
|
||||
|
||||
.L_GE2_LE3_{[prefetch]s}:
|
||||
movw (%rsi), %r8w
|
||||
movw -2(%rsi,%rdx), %r9w
|
||||
movw %r8w, (%rdi)
|
||||
movw %r9w, -2(%rdi,%rdx)
|
||||
ret
|
||||
|
||||
.balign 2
|
||||
.L_EQ1_{[prefetch]s}:
|
||||
movb (%rsi), %r8b
|
||||
movb %r8b, (%rdi)
|
||||
ret
|
||||
|
||||
// Aligning the target of a jump to an even address has a measurable
|
||||
// speedup in microbenchmarks.
|
||||
.balign 2
|
||||
.L_GE4_LE7_{[prefetch]s}:
|
||||
movl (%rsi), %r8d
|
||||
movl -4(%rsi,%rdx), %r9d
|
||||
movl %r8d, (%rdi)
|
||||
movl %r9d, -4(%rdi,%rdx)
|
||||
ret
|
||||
|
||||
.cfi_endproc
|
||||
// .size {[function_prefix]s}__folly_memcpy_short_{[prefetch]s}, .-{[function_prefix]s}__folly_memcpy_short_{[prefetch]s} not supported by windows
|
||||
|
||||
// memcpy is an alternative entrypoint into the function named __folly_memcpy.
|
||||
// The compiler is able to call memcpy since the name is global while
|
||||
// stacktraces will show __folly_memcpy since that is the name of the function.
|
||||
// This is intended to aid in debugging by making it obvious which version of
|
||||
// memcpy is being used.
|
||||
.balign 64
|
||||
.globl {[function_prefix]s}__folly_memcpy_{[prefetch]s}
|
||||
// .type {[function_prefix]s}__folly_memcpy_{[prefetch]s}, @function not supported by windows
|
||||
|
||||
{[function_prefix]s}__folly_memcpy_{[prefetch]s}:
|
||||
.cfi_startproc
|
||||
|
||||
mov %rdi, %rax // return: $rdi
|
||||
|
||||
test %rdx, %rdx
|
||||
je .L_EQ0_{[prefetch]s}
|
||||
|
||||
{[prefetch]s} (%rdi)
|
||||
{[prefetch]s} -1(%rdi,%rdx)
|
||||
|
||||
cmp $8, %rdx
|
||||
jb .L_GE1_LE7_{[prefetch]s}
|
||||
|
||||
.L_GE8_{[prefetch]s}:
|
||||
cmp $32, %rdx
|
||||
ja .L_GE33_{[prefetch]s}
|
||||
|
||||
.L_GE8_LE32_{[prefetch]s}:
|
||||
cmp $16, %rdx
|
||||
ja .L_GE17_LE32_{[prefetch]s}
|
||||
|
||||
.L_GE8_LE16_{[prefetch]s}:
|
||||
mov (%rsi), %r8
|
||||
mov -8(%rsi,%rdx), %r9
|
||||
mov %r8, (%rdi)
|
||||
mov %r9, -8(%rdi,%rdx)
|
||||
.L_EQ0_{[prefetch]s}:
|
||||
ret
|
||||
|
||||
.balign 2
|
||||
.L_GE17_LE32_{[prefetch]s}:
|
||||
movdqu (%rsi), %xmm0
|
||||
movdqu -16(%rsi,%rdx), %xmm1
|
||||
movdqu %xmm0, (%rdi)
|
||||
movdqu %xmm1, -16(%rdi,%rdx)
|
||||
ret
|
||||
|
||||
.balign 2
|
||||
.L_GE193_LE256_{[prefetch]s}:
|
||||
vmovdqu %ymm3, 96(%rdi)
|
||||
vmovdqu %ymm4, -128(%rdi,%rdx)
|
||||
|
||||
.L_GE129_LE192_{[prefetch]s}:
|
||||
vmovdqu %ymm2, 64(%rdi)
|
||||
vmovdqu %ymm5, -96(%rdi,%rdx)
|
||||
|
||||
.L_GE65_LE128_{[prefetch]s}:
|
||||
vmovdqu %ymm1, 32(%rdi)
|
||||
vmovdqu %ymm6, -64(%rdi,%rdx)
|
||||
|
||||
.L_GE33_LE64_{[prefetch]s}:
|
||||
vmovdqu %ymm0, (%rdi)
|
||||
vmovdqu %ymm7, -32(%rdi,%rdx)
|
||||
|
||||
vzeroupper
|
||||
ret
|
||||
|
||||
.balign 2
|
||||
.L_GE33_{[prefetch]s}:
|
||||
vmovdqu (%rsi), %ymm0
|
||||
vmovdqu -32(%rsi,%rdx), %ymm7
|
||||
|
||||
cmp $64, %rdx
|
||||
jbe .L_GE33_LE64_{[prefetch]s}
|
||||
|
||||
{[prefetch]s} 64(%rdi)
|
||||
|
||||
vmovdqu 32(%rsi), %ymm1
|
||||
vmovdqu -64(%rsi,%rdx), %ymm6
|
||||
|
||||
cmp $128, %rdx
|
||||
jbe .L_GE65_LE128_{[prefetch]s}
|
||||
|
||||
{[prefetch]s} 128(%rdi)
|
||||
|
||||
vmovdqu 64(%rsi), %ymm2
|
||||
vmovdqu -96(%rsi,%rdx), %ymm5
|
||||
|
||||
cmp $192, %rdx
|
||||
jbe .L_GE129_LE192_{[prefetch]s}
|
||||
|
||||
{[prefetch]s} 192(%rdi)
|
||||
|
||||
vmovdqu 96(%rsi), %ymm3
|
||||
vmovdqu -128(%rsi,%rdx), %ymm4
|
||||
|
||||
cmp $256, %rdx
|
||||
jbe .L_GE193_LE256_{[prefetch]s}
|
||||
|
||||
.L_GE257_{[prefetch]s}:
|
||||
{[prefetch]s} 256(%rdi)
|
||||
|
||||
// Check if there is an overlap. If there is an overlap then the caller
|
||||
// has a bug since this is undefined behavior. However, for legacy
|
||||
// reasons this behavior is expected by some callers.
|
||||
//
|
||||
// All copies through 256 bytes will operate as a memmove since for
|
||||
// those sizes all reads are performed before any writes.
|
||||
//
|
||||
// This check uses the idea that there is an overlap if
|
||||
// (%rdi < (%rsi + %rdx)) && (%rsi < (%rdi + %rdx)),
|
||||
// or equivalently, there is no overlap if
|
||||
// ((%rsi + %rdx) <= %rdi) || ((%rdi + %rdx) <= %rsi).
|
||||
//
|
||||
// %r9 will be used after .L_ALIGNED_DST_LOOP to calculate how many
|
||||
// bytes remain to be copied.
|
||||
|
||||
// (%rsi + %rdx <= %rdi) => no overlap
|
||||
lea (%rsi,%rdx), %r9
|
||||
cmp %rdi, %r9
|
||||
jbe .L_NO_OVERLAP_{[prefetch]s}
|
||||
|
||||
// (%rdi + %rdx <= %rsi) => no overlap
|
||||
lea (%rdi,%rdx), %r8
|
||||
cmp %rsi, %r8
|
||||
// If no info is available in branch predictor's cache, Intel CPUs assume
|
||||
// forward jumps are not taken. Use a forward jump as overlapping buffers
|
||||
// are unlikely.
|
||||
ja .L_OVERLAP_{[prefetch]s}
|
||||
|
||||
.balign 2
|
||||
.L_NO_OVERLAP_{[prefetch]s}:
|
||||
vmovdqu %ymm0, (%rdi)
|
||||
vmovdqu %ymm1, 32(%rdi)
|
||||
vmovdqu %ymm2, 64(%rdi)
|
||||
vmovdqu %ymm3, 96(%rdi)
|
||||
|
||||
// Align %rdi to a 32 byte boundary.
|
||||
// %rcx = 128 - 31 & %rdi
|
||||
mov $128, %rcx
|
||||
and $31, %rdi
|
||||
sub %rdi, %rcx
|
||||
|
||||
lea (%rsi,%rcx), %rsi
|
||||
lea (%rax,%rcx), %rdi
|
||||
sub %rcx, %rdx
|
||||
|
||||
// %r8 is the end condition for the loop.
|
||||
lea -128(%rsi,%rdx), %r8
|
||||
|
||||
// This threshold is half of L1 cache on a Skylake machine, which means that
|
||||
// potentially all of L1 will be populated by this copy once it is executed
|
||||
// (dst and src are cached for temporal copies).
|
||||
// NON_TEMPORAL_STORE_THRESHOLD = $32768
|
||||
// cmp NON_TEMPORAL_STORE_THRESHOLD, %rdx
|
||||
cmp $32768, %rdx
|
||||
jae .L_NON_TEMPORAL_LOOP_{[prefetch]s}
|
||||
|
||||
.balign 2
|
||||
.L_ALIGNED_DST_LOOP_{[prefetch]s}:
|
||||
{[prefetch]s} 128(%rdi)
|
||||
{[prefetch]s} 192(%rdi)
|
||||
|
||||
vmovdqu (%rsi), %ymm0
|
||||
vmovdqu 32(%rsi), %ymm1
|
||||
vmovdqu 64(%rsi), %ymm2
|
||||
vmovdqu 96(%rsi), %ymm3
|
||||
add $128, %rsi
|
||||
|
||||
vmovdqa %ymm0, (%rdi)
|
||||
vmovdqa %ymm1, 32(%rdi)
|
||||
vmovdqa %ymm2, 64(%rdi)
|
||||
vmovdqa %ymm3, 96(%rdi)
|
||||
add $128, %rdi
|
||||
|
||||
cmp %r8, %rsi
|
||||
jb .L_ALIGNED_DST_LOOP_{[prefetch]s}
|
||||
|
||||
.L_ALIGNED_DST_LOOP_END_{[prefetch]s}:
|
||||
sub %rsi, %r9
|
||||
mov %r9, %rdx
|
||||
|
||||
vmovdqu %ymm4, -128(%rdi,%rdx)
|
||||
vmovdqu %ymm5, -96(%rdi,%rdx)
|
||||
vmovdqu %ymm6, -64(%rdi,%rdx)
|
||||
vmovdqu %ymm7, -32(%rdi,%rdx)
|
||||
|
||||
vzeroupper
|
||||
ret
|
||||
|
||||
.balign 2
|
||||
.L_NON_TEMPORAL_LOOP_{[prefetch]s}:
|
||||
testb $31, %sil
|
||||
jne .L_ALIGNED_DST_LOOP_{[prefetch]s}
|
||||
// This is prefetching the source data unlike ALIGNED_DST_LOOP which
|
||||
// prefetches the destination data. This choice is again informed by
|
||||
// benchmarks. With a non-temporal store the entirety of the cache line
|
||||
// is being written so the previous data can be discarded without being
|
||||
// fetched.
|
||||
prefetchnta 128(%rsi)
|
||||
prefetchnta 196(%rsi)
|
||||
|
||||
vmovntdqa (%rsi), %ymm0
|
||||
vmovntdqa 32(%rsi), %ymm1
|
||||
vmovntdqa 64(%rsi), %ymm2
|
||||
vmovntdqa 96(%rsi), %ymm3
|
||||
add $128, %rsi
|
||||
|
||||
vmovntdq %ymm0, (%rdi)
|
||||
vmovntdq %ymm1, 32(%rdi)
|
||||
vmovntdq %ymm2, 64(%rdi)
|
||||
vmovntdq %ymm3, 96(%rdi)
|
||||
add $128, %rdi
|
||||
|
||||
cmp %r8, %rsi
|
||||
jb .L_NON_TEMPORAL_LOOP_{[prefetch]s}
|
||||
|
||||
sfence
|
||||
jmp .L_ALIGNED_DST_LOOP_END_{[prefetch]s}
|
||||
|
||||
|
||||
.L_OVERLAP_{[prefetch]s}:
|
||||
.balign 2
|
||||
cmp %rdi, %rsi
|
||||
jb .L_OVERLAP_BWD_{[prefetch]s} // %rsi < %rdi => backward-copy
|
||||
je .L_RET_{[prefetch]s} // %rsi == %rdi => return, nothing to copy
|
||||
|
||||
// Source & destination buffers overlap. Forward copy.
|
||||
|
||||
vmovdqu (%rsi), %ymm8
|
||||
|
||||
// Align %rdi to a 32 byte boundary.
|
||||
// %rcx = 32 - 31 & %rdi
|
||||
mov $32, %rcx
|
||||
and $31, %rdi
|
||||
sub %rdi, %rcx
|
||||
|
||||
lea (%rsi,%rcx), %rsi
|
||||
lea (%rax,%rcx), %rdi
|
||||
sub %rcx, %rdx
|
||||
|
||||
// %r8 is the end condition for the loop.
|
||||
lea -128(%rsi,%rdx), %r8
|
||||
|
||||
|
||||
.L_OVERLAP_FWD_ALIGNED_DST_LOOP_{[prefetch]s}:
|
||||
{[prefetch]s} 128(%rdi)
|
||||
{[prefetch]s} 192(%rdi)
|
||||
|
||||
vmovdqu (%rsi), %ymm0
|
||||
vmovdqu 32(%rsi), %ymm1
|
||||
vmovdqu 64(%rsi), %ymm2
|
||||
vmovdqu 96(%rsi), %ymm3
|
||||
add $128, %rsi
|
||||
|
||||
vmovdqa %ymm0, (%rdi)
|
||||
vmovdqa %ymm1, 32(%rdi)
|
||||
vmovdqa %ymm2, 64(%rdi)
|
||||
vmovdqa %ymm3, 96(%rdi)
|
||||
add $128, %rdi
|
||||
|
||||
cmp %r8, %rsi
|
||||
jb .L_OVERLAP_FWD_ALIGNED_DST_LOOP_{[prefetch]s}
|
||||
|
||||
sub %rsi, %r9
|
||||
mov %r9, %rdx
|
||||
|
||||
vmovdqu %ymm4, -128(%rdi,%rdx)
|
||||
vmovdqu %ymm5, -96(%rdi,%rdx)
|
||||
vmovdqu %ymm6, -64(%rdi,%rdx)
|
||||
vmovdqu %ymm7, -32(%rdi,%rdx)
|
||||
vmovdqu %ymm8, (%rax) // %rax == the original (unaligned) %rdi
|
||||
|
||||
vzeroupper
|
||||
|
||||
.L_RET_{[prefetch]s}:
|
||||
ret
|
||||
|
||||
.L_OVERLAP_BWD_{[prefetch]s}:
|
||||
// Save last 32 bytes.
|
||||
vmovdqu -32(%rsi, %rdx), %ymm8
|
||||
lea -32(%rdi, %rdx), %r9
|
||||
|
||||
|
||||
// %r8 is the end condition for the loop.
|
||||
lea 128(%rsi), %r8
|
||||
|
||||
// Align %rdi+%rdx (destination end) to a 32 byte boundary.
|
||||
// %rcx = (%rdi + %rdx - 32) & 31
|
||||
mov %r9, %rcx
|
||||
and $31, %rcx
|
||||
// Set %rsi & %rdi to the end of the 32 byte aligned range.
|
||||
sub %rcx, %rdx
|
||||
add %rdx, %rsi
|
||||
add %rdx, %rdi
|
||||
|
||||
|
||||
.L_OVERLAP_BWD_ALIGNED_DST_LOOP_{[prefetch]s}:
|
||||
{[prefetch]s} -128(%rdi)
|
||||
{[prefetch]s} -192(%rdi)
|
||||
|
||||
vmovdqu -32(%rsi), %ymm4
|
||||
vmovdqu -64(%rsi), %ymm5
|
||||
vmovdqu -96(%rsi), %ymm6
|
||||
vmovdqu -128(%rsi), %ymm7
|
||||
sub $128, %rsi
|
||||
|
||||
vmovdqa %ymm4, -32(%rdi)
|
||||
vmovdqa %ymm5, -64(%rdi)
|
||||
vmovdqa %ymm6, -96(%rdi)
|
||||
vmovdqa %ymm7, -128(%rdi)
|
||||
sub $128, %rdi
|
||||
|
||||
cmp %r8, %rsi
|
||||
ja .L_OVERLAP_BWD_ALIGNED_DST_LOOP_{[prefetch]s}
|
||||
|
||||
vmovdqu %ymm0, (%rax) // %rax == the original unaligned %rdi
|
||||
vmovdqu %ymm1, 32(%rax)
|
||||
vmovdqu %ymm2, 64(%rax)
|
||||
vmovdqu %ymm3, 96(%rax)
|
||||
vmovdqu %ymm8, (%r9)
|
||||
|
||||
vzeroupper
|
||||
ret
|
||||
|
||||
.cfi_endproc
|
||||
// .size {[function_prefix]s}__folly_memcpy_{[prefetch]s}, .-{[function_prefix]s}__folly_memcpy_{[prefetch]s} not supported by windows
|
@ -1,18 +0,0 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const arch = builtin.cpu.arch;
|
||||
const function_prefix = @import("../assembly_util.zig").function_prefix;
|
||||
|
||||
comptime {
|
||||
switch (arch) {
|
||||
.x86_64 => {
|
||||
inline for ([_][]const u8{ "prefetchw", "prefetcht0" }) |prefetch| {
|
||||
asm (std.fmt.comptimePrint(@embedFile("memcpy-x86_64.S"), .{ .prefetch = prefetch, .function_prefix = function_prefix }));
|
||||
}
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
|
||||
pub extern fn __folly_memcpy_prefetchw(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.SysV) [*]u8;
|
||||
pub extern fn __folly_memcpy_prefetcht0(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.SysV) [*]u8;
|
@ -1 +0,0 @@
|
||||
pub const memcpy = @import("musl/memcpy.zig").memcpy;
|
@ -1,193 +0,0 @@
|
||||
musl as a whole is licensed under the following standard MIT license:
|
||||
|
||||
----------------------------------------------------------------------
|
||||
Copyright © 2005-2020 Rich Felker, et al.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
----------------------------------------------------------------------
|
||||
|
||||
Authors/contributors include:
|
||||
|
||||
A. Wilcox
|
||||
Ada Worcester
|
||||
Alex Dowad
|
||||
Alex Suykov
|
||||
Alexander Monakov
|
||||
Andre McCurdy
|
||||
Andrew Kelley
|
||||
Anthony G. Basile
|
||||
Aric Belsito
|
||||
Arvid Picciani
|
||||
Bartosz Brachaczek
|
||||
Benjamin Peterson
|
||||
Bobby Bingham
|
||||
Boris Brezillon
|
||||
Brent Cook
|
||||
Chris Spiegel
|
||||
Clément Vasseur
|
||||
Daniel Micay
|
||||
Daniel Sabogal
|
||||
Daurnimator
|
||||
David Carlier
|
||||
David Edelsohn
|
||||
Denys Vlasenko
|
||||
Dmitry Ivanov
|
||||
Dmitry V. Levin
|
||||
Drew DeVault
|
||||
Emil Renner Berthing
|
||||
Fangrui Song
|
||||
Felix Fietkau
|
||||
Felix Janda
|
||||
Gianluca Anzolin
|
||||
Hauke Mehrtens
|
||||
He X
|
||||
Hiltjo Posthuma
|
||||
Isaac Dunham
|
||||
Jaydeep Patil
|
||||
Jens Gustedt
|
||||
Jeremy Huntwork
|
||||
Jo-Philipp Wich
|
||||
Joakim Sindholt
|
||||
John Spencer
|
||||
Julien Ramseier
|
||||
Justin Cormack
|
||||
Kaarle Ritvanen
|
||||
Khem Raj
|
||||
Kylie McClain
|
||||
Leah Neukirchen
|
||||
Luca Barbato
|
||||
Luka Perkov
|
||||
M Farkas-Dyck (Strake)
|
||||
Mahesh Bodapati
|
||||
Markus Wichmann
|
||||
Masanori Ogino
|
||||
Michael Clark
|
||||
Michael Forney
|
||||
Mikhail Kremnyov
|
||||
Natanael Copa
|
||||
Nicholas J. Kain
|
||||
orc
|
||||
Pascal Cuoq
|
||||
Patrick Oppenlander
|
||||
Petr Hosek
|
||||
Petr Skocik
|
||||
Pierre Carrier
|
||||
Reini Urban
|
||||
Rich Felker
|
||||
Richard Pennington
|
||||
Ryan Fairfax
|
||||
Samuel Holland
|
||||
Segev Finer
|
||||
Shiz
|
||||
sin
|
||||
Solar Designer
|
||||
Stefan Kristiansson
|
||||
Stefan O'Rear
|
||||
Szabolcs Nagy
|
||||
Timo Teräs
|
||||
Trutz Behn
|
||||
Valentin Ochs
|
||||
Will Dietz
|
||||
William Haddon
|
||||
William Pitcock
|
||||
|
||||
Portions of this software are derived from third-party works licensed
|
||||
under terms compatible with the above MIT license:
|
||||
|
||||
The TRE regular expression implementation (src/regex/reg* and
|
||||
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
|
||||
under a 2-clause BSD license (license text in the source files). The
|
||||
included version has been heavily modified by Rich Felker in 2012, in
|
||||
the interests of size, simplicity, and namespace cleanliness.
|
||||
|
||||
Much of the math library code (src/math/* and src/complex/*) is
|
||||
Copyright © 1993,2004 Sun Microsystems or
|
||||
Copyright © 2003-2011 David Schultz or
|
||||
Copyright © 2003-2009 Steven G. Kargl or
|
||||
Copyright © 2003-2009 Bruce D. Evans or
|
||||
Copyright © 2008 Stephen L. Moshier or
|
||||
Copyright © 2017-2018 Arm Limited
|
||||
and labelled as such in comments in the individual source files. All
|
||||
have been licensed under extremely permissive terms.
|
||||
|
||||
The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
|
||||
The Android Open Source Project and is licensed under a two-clause BSD
|
||||
license. It was taken from Bionic libc, used on Android.
|
||||
|
||||
The AArch64 memcpy and memset code (src/string/aarch64/*) are
|
||||
Copyright © 1999-2019, Arm Limited.
|
||||
|
||||
The implementation of DES for crypt (src/crypt/crypt_des.c) is
|
||||
Copyright © 1994 David Burren. It is licensed under a BSD license.
|
||||
|
||||
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
|
||||
originally written by Solar Designer and placed into the public
|
||||
domain. The code also comes with a fallback permissive license for use
|
||||
in jurisdictions that may not recognize the public domain.
|
||||
|
||||
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
|
||||
Valentin Ochs and is licensed under an MIT-style license.
|
||||
|
||||
The x86_64 port was written by Nicholas J. Kain and is licensed under
|
||||
the standard MIT terms.
|
||||
|
||||
The mips and microblaze ports were originally written by Richard
|
||||
Pennington for use in the ellcc project. The original code was adapted
|
||||
by Rich Felker for build system and code conventions during upstream
|
||||
integration. It is licensed under the standard MIT terms.
|
||||
|
||||
The mips64 port was contributed by Imagination Technologies and is
|
||||
licensed under the standard MIT terms.
|
||||
|
||||
The powerpc port was also originally written by Richard Pennington,
|
||||
and later supplemented and integrated by John Spencer. It is licensed
|
||||
under the standard MIT terms.
|
||||
|
||||
All other files which have no copyright comments are original works
|
||||
produced specifically for use as part of this library, written either
|
||||
by Rich Felker, the main author of the library, or by one or more
|
||||
contibutors listed above. Details on authorship of individual files
|
||||
can be found in the git version control history of the project. The
|
||||
omission of copyright and license comments in each file is in the
|
||||
interest of source tree size.
|
||||
|
||||
In addition, permission is hereby granted for all public header files
|
||||
(include/* and arch/*/bits/*) and crt files intended to be linked into
|
||||
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
|
||||
the copyright notice and permission notice otherwise required by the
|
||||
license, and to use these files without any requirement of
|
||||
attribution. These files include substantial contributions from:
|
||||
|
||||
Bobby Bingham
|
||||
John Spencer
|
||||
Nicholas J. Kain
|
||||
Rich Felker
|
||||
Richard Pennington
|
||||
Stefan Kristiansson
|
||||
Szabolcs Nagy
|
||||
|
||||
all of whom have explicitly granted such permission.
|
||||
|
||||
This file previously contained text expressing a belief that most of
|
||||
the files covered by the above exception were sufficiently trivial not
|
||||
to be subject to copyright, resulting in confusion over whether it
|
||||
negated the permissions granted in the license. In the spirit of
|
||||
permissive licensing, and of not having licensing issues being an
|
||||
obstacle to adoption, that text has been removed.
|
@ -1,2 +0,0 @@
|
||||
This set of files all come from [musl libc](https://musl.libc.org/).
|
||||
Roc just directly uses a few of them instead of depending on musl libc fully.
|
@ -1,30 +0,0 @@
|
||||
.global {[function_prefix]s}musl_memcpy
|
||||
// Windows does not support the type directive.
|
||||
// .type {[function_prefix]s}musl_memcpy,@function
|
||||
{[function_prefix]s}musl_memcpy:
|
||||
push %esi
|
||||
push %edi
|
||||
mov 12(%esp),%edi
|
||||
mov 16(%esp),%esi
|
||||
mov 20(%esp),%ecx
|
||||
mov %edi,%eax
|
||||
cmp $4,%ecx
|
||||
jc 1f
|
||||
test $3,%edi
|
||||
jz 1f
|
||||
2: movsb
|
||||
dec %ecx
|
||||
test $3,%edi
|
||||
jnz 2b
|
||||
1: mov %ecx,%edx
|
||||
shr $2,%ecx
|
||||
rep
|
||||
movsl
|
||||
and $3,%edx
|
||||
jz 1f
|
||||
2: movsb
|
||||
dec %edx
|
||||
jnz 2b
|
||||
1: pop %edi
|
||||
pop %esi
|
||||
ret
|
@ -1,23 +0,0 @@
|
||||
.global {[function_prefix]s}musl_memcpy
|
||||
// Windows does not support the type directive.
|
||||
// .type {[function_prefix]s}musl_memcpy,@function
|
||||
{[function_prefix]s}musl_memcpy:
|
||||
mov %rdi,%rax
|
||||
cmp $8,%rdx
|
||||
jc 1f
|
||||
test $7,%edi
|
||||
jz 1f
|
||||
2: movsb
|
||||
dec %rdx
|
||||
test $7,%edi
|
||||
jnz 2b
|
||||
1: mov %rdx,%rcx
|
||||
shr $3,%rcx
|
||||
rep
|
||||
movsq
|
||||
and $7,%edx
|
||||
jz 1f
|
||||
2: movsb
|
||||
dec %edx
|
||||
jnz 2b
|
||||
1: ret
|
@ -1,223 +0,0 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const arch = builtin.cpu.arch;
|
||||
const function_prefix = @import("../assembly_util.zig").function_prefix;
|
||||
|
||||
comptime {
|
||||
switch (arch) {
|
||||
.x86_64 => {
|
||||
asm (std.fmt.comptimePrint(@embedFile("memcpy-x86_64.S"), .{ .function_prefix = function_prefix }));
|
||||
},
|
||||
.x86 => {
|
||||
asm (std.fmt.comptimePrint(@embedFile("memcpy-x86.S"), .{ .function_prefix = function_prefix }));
|
||||
},
|
||||
// TODO: add assembly implementations for other platforms.
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
pub const memcpy =
|
||||
switch (builtin.os.tag) {
|
||||
.windows => fallback_memcpy,
|
||||
else => switch (arch) {
|
||||
.x86_64, .x86 => musl_memcpy,
|
||||
else => fallback_memcpy,
|
||||
},
|
||||
};
|
||||
|
||||
pub extern fn musl_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8;
|
||||
|
||||
// Note: this is written to only support little endian targets.
|
||||
// To support big endian, `<<` and `>>` wold need to be swapped.
|
||||
pub fn fallback_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
|
||||
var d = dest;
|
||||
var s = src;
|
||||
var n = len;
|
||||
switch (@min(n, @intFromPtr(s) % 4)) {
|
||||
1 => {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
n -= 1;
|
||||
},
|
||||
2 => {
|
||||
d[0] = s[0];
|
||||
d[1] = s[1];
|
||||
d += 2;
|
||||
s += 2;
|
||||
n -= 2;
|
||||
},
|
||||
3 => {
|
||||
d[0] = s[0];
|
||||
d[1] = s[1];
|
||||
d[2] = s[2];
|
||||
d += 3;
|
||||
s += 3;
|
||||
n -= 3;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
|
||||
if (@intFromPtr(d) % 4 == 0) {
|
||||
var d4 = @as([*]align(4) u8, @alignCast(d));
|
||||
var s4 = @as([*]align(4) const u8, @alignCast(s));
|
||||
while (n >= 16) : (n -= 16) {
|
||||
var d_u32 = @as([*]u32, @ptrCast(d4));
|
||||
var s_u32 = @as([*]const u32, @ptrCast(s4));
|
||||
d_u32[0] = s_u32[0];
|
||||
d_u32[1] = s_u32[1];
|
||||
d_u32[2] = s_u32[2];
|
||||
d_u32[3] = s_u32[3];
|
||||
|
||||
d4 += 16;
|
||||
s4 += 16;
|
||||
}
|
||||
if (n & 8 != 0) {
|
||||
var d_u32 = @as([*]u32, @ptrCast(d4));
|
||||
var s_u32 = @as([*]const u32, @ptrCast(s4));
|
||||
d_u32[0] = s_u32[0];
|
||||
d_u32[1] = s_u32[1];
|
||||
|
||||
d4 += 8;
|
||||
s4 += 8;
|
||||
}
|
||||
if (n & 4 != 0) {
|
||||
var d_u32 = @as([*]u32, @ptrCast(d4));
|
||||
var s_u32 = @as([*]const u32, @ptrCast(s4));
|
||||
d_u32[0] = s_u32[0];
|
||||
|
||||
d4 += 4;
|
||||
s4 += 4;
|
||||
}
|
||||
d = d4;
|
||||
s = s4;
|
||||
if (n & 2 != 0) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
if (n & 1 != 0) {
|
||||
d[0] = s[0];
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
if (n >= 32) {
|
||||
switch (@intFromPtr(d) % 4) {
|
||||
1 => {
|
||||
var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
n -= 3;
|
||||
while (n >= 17) : (n -= 16) {
|
||||
var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
|
||||
var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 1)));
|
||||
var x = s_u32[0];
|
||||
d_u32[0] = (w >> 24) | (x << 8);
|
||||
w = s_u32[1];
|
||||
d_u32[1] = (x >> 24) | (w << 8);
|
||||
x = s_u32[2];
|
||||
d_u32[2] = (w >> 24) | (x << 8);
|
||||
w = s_u32[3];
|
||||
d_u32[3] = (x >> 24) | (w << 8);
|
||||
|
||||
d += 16;
|
||||
s += 16;
|
||||
}
|
||||
},
|
||||
2 => {
|
||||
var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
n -= 2;
|
||||
while (n >= 18) : (n -= 16) {
|
||||
var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
|
||||
var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 2)));
|
||||
var x = s_u32[0];
|
||||
d_u32[0] = (w >> 16) | (x << 16);
|
||||
w = s_u32[1];
|
||||
d_u32[1] = (x >> 16) | (w << 16);
|
||||
x = s_u32[2];
|
||||
d_u32[2] = (w >> 16) | (x << 16);
|
||||
w = s_u32[3];
|
||||
d_u32[3] = (x >> 16) | (w << 16);
|
||||
|
||||
d += 16;
|
||||
s += 16;
|
||||
}
|
||||
},
|
||||
3 => {
|
||||
var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
n -= 1;
|
||||
while (n >= 19) : (n -= 16) {
|
||||
var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
|
||||
var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 3)));
|
||||
var x = s_u32[0];
|
||||
d_u32[0] = (w >> 8) | (x << 24);
|
||||
w = s_u32[1];
|
||||
d_u32[1] = (x >> 8) | (w << 24);
|
||||
x = s_u32[2];
|
||||
d_u32[2] = (w >> 8) | (x << 24);
|
||||
w = s_u32[3];
|
||||
d_u32[3] = (x >> 8) | (w << 24);
|
||||
|
||||
d += 16;
|
||||
s += 16;
|
||||
}
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
}
|
||||
if (n & 16 != 0) {
|
||||
comptime var i = 0;
|
||||
inline while (i < 16) : (i += 1) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
}
|
||||
if (n & 8 != 0) {
|
||||
comptime var i = 0;
|
||||
inline while (i < 8) : (i += 1) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
}
|
||||
if (n & 4 != 0) {
|
||||
comptime var i = 0;
|
||||
inline while (i < 4) : (i += 1) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
}
|
||||
if (n & 2 != 0) {
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
d[0] = s[0];
|
||||
d += 1;
|
||||
s += 1;
|
||||
}
|
||||
if (n & 1 != 0) {
|
||||
d[0] = s[0];
|
||||
}
|
||||
return dest;
|
||||
}
|
@ -6,11 +6,6 @@ const expect = @import("expect.zig");
|
||||
const panic_utils = @import("panic.zig");
|
||||
const dbg_utils = @import("dbg.zig");
|
||||
|
||||
comptime {
|
||||
_ = @import("compiler_rt.zig");
|
||||
_ = @import("libc.zig");
|
||||
}
|
||||
|
||||
const ROC_BUILTINS = "roc_builtins";
|
||||
const NUM = "num";
|
||||
const STR = "str";
|
||||
@ -18,6 +13,13 @@ const STR = "str";
|
||||
// Dec Module
|
||||
const dec = @import("dec.zig");
|
||||
|
||||
var FLTUSED: i32 = 0;
|
||||
comptime {
|
||||
if (builtin.os.tag == .windows) {
|
||||
@export(FLTUSED, .{ .name = "_fltused", .linkage = .Weak });
|
||||
}
|
||||
}
|
||||
|
||||
comptime {
|
||||
exportDecFn(dec.absC, "abs");
|
||||
exportDecFn(dec.acosC, "acos");
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2,6 +2,8 @@ interface Set
|
||||
exposes [
|
||||
Set,
|
||||
empty,
|
||||
withCapacity,
|
||||
reserve,
|
||||
single,
|
||||
walk,
|
||||
walkUntil,
|
||||
@ -45,7 +47,7 @@ Set k := Dict.Dict k {} where k implements Hash & Eq
|
||||
},
|
||||
]
|
||||
|
||||
isEq : Set k, Set k -> Bool where k implements Hash & Eq
|
||||
isEq : Set k, Set k -> Bool
|
||||
isEq = \xs, ys ->
|
||||
if len xs != len ys then
|
||||
Bool.false
|
||||
@ -56,7 +58,7 @@ isEq = \xs, ys ->
|
||||
else
|
||||
Break Bool.false
|
||||
|
||||
hashSet : hasher, Set k -> hasher where k implements Hash & Eq, hasher implements Hasher
|
||||
hashSet : hasher, Set k -> hasher where hasher implements Hasher
|
||||
hashSet = \hasher, @Set inner -> Hash.hash hasher inner
|
||||
|
||||
toInspectorSet : Set k -> Inspector f where k implements Inspect & Hash & Eq, f implements InspectFormatter
|
||||
@ -74,13 +76,18 @@ toInspectorSet = \set ->
|
||||
empty : {} -> Set *
|
||||
empty = \{} -> @Set (Dict.empty {})
|
||||
|
||||
## Return a dictionary with space allocated for a number of entries. This
|
||||
## Return a set with space allocated for a number of entries. This
|
||||
## may provide a performance optimization if you know how many entries will be
|
||||
## inserted.
|
||||
withCapacity : Nat -> Set *
|
||||
withCapacity = \cap ->
|
||||
@Set (Dict.withCapacity cap)
|
||||
|
||||
# Enlarge the set for at least capacity additional elements
|
||||
reserve : Set k, Nat -> Set k
|
||||
reserve = \@Set dict, requested ->
|
||||
@Set (Dict.reserve dict requested)
|
||||
|
||||
## Creates a new `Set` with a single value.
|
||||
## ```
|
||||
## singleItemSet = Set.single "Apple"
|
||||
@ -88,7 +95,7 @@ withCapacity = \cap ->
|
||||
##
|
||||
## expect countValues == 1
|
||||
## ```
|
||||
single : k -> Set k where k implements Hash & Eq
|
||||
single : k -> Set k
|
||||
single = \key ->
|
||||
Dict.single key {} |> @Set
|
||||
|
||||
@ -104,7 +111,7 @@ single = \key ->
|
||||
##
|
||||
## expect countValues == 3
|
||||
## ```
|
||||
insert : Set k, k -> Set k where k implements Hash & Eq
|
||||
insert : Set k, k -> Set k
|
||||
insert = \@Set dict, key ->
|
||||
Dict.insert dict key {} |> @Set
|
||||
|
||||
@ -189,7 +196,7 @@ expect
|
||||
## expect has10 == Bool.false
|
||||
## expect has20 == Bool.true
|
||||
## ```
|
||||
remove : Set k, k -> Set k where k implements Hash & Eq
|
||||
remove : Set k, k -> Set k
|
||||
remove = \@Set dict, key ->
|
||||
Dict.remove dict key |> @Set
|
||||
|
||||
@ -208,7 +215,7 @@ remove = \@Set dict, key ->
|
||||
## expect hasApple == Bool.true
|
||||
## expect hasBanana == Bool.false
|
||||
## ```
|
||||
contains : Set k, k -> Bool where k implements Hash & Eq
|
||||
contains : Set k, k -> Bool
|
||||
contains = \@Set dict, key ->
|
||||
Dict.contains dict key
|
||||
|
||||
@ -221,7 +228,7 @@ contains = \@Set dict, key ->
|
||||
##
|
||||
## expect Set.toList numbers == values
|
||||
## ```
|
||||
toList : Set k -> List k where k implements Hash & Eq
|
||||
toList : Set k -> List k
|
||||
toList = \@Set dict ->
|
||||
Dict.keys dict
|
||||
|
||||
@ -235,7 +242,7 @@ toList = \@Set dict ->
|
||||
##
|
||||
## expect Set.fromList [Pear, Apple, Banana] == values
|
||||
## ```
|
||||
fromList : List k -> Set k where k implements Hash & Eq
|
||||
fromList : List k -> Set k
|
||||
fromList = \list ->
|
||||
list
|
||||
|> List.map \k -> (k, {})
|
||||
@ -252,7 +259,7 @@ fromList = \list ->
|
||||
##
|
||||
## expect Set.union set1 set2 == Set.fromList [Left, Right]
|
||||
## ```
|
||||
union : Set k, Set k -> Set k where k implements Hash & Eq
|
||||
union : Set k, Set k -> Set k
|
||||
union = \@Set dict1, @Set dict2 ->
|
||||
Dict.insertAll dict1 dict2 |> @Set
|
||||
|
||||
@ -265,7 +272,7 @@ union = \@Set dict1, @Set dict2 ->
|
||||
##
|
||||
## expect Set.intersection set1 set2 == Set.single Left
|
||||
## ```
|
||||
intersection : Set k, Set k -> Set k where k implements Hash & Eq
|
||||
intersection : Set k, Set k -> Set k
|
||||
intersection = \@Set dict1, @Set dict2 ->
|
||||
Dict.keepShared dict1 dict2 |> @Set
|
||||
|
||||
@ -279,7 +286,7 @@ intersection = \@Set dict1, @Set dict2 ->
|
||||
##
|
||||
## expect Set.difference first second == Set.fromList [Up, Down]
|
||||
## ```
|
||||
difference : Set k, Set k -> Set k where k implements Hash & Eq
|
||||
difference : Set k, Set k -> Set k
|
||||
difference = \@Set dict1, @Set dict2 ->
|
||||
Dict.removeAll dict1 dict2 |> @Set
|
||||
|
||||
@ -302,14 +309,14 @@ difference = \@Set dict1, @Set dict2 ->
|
||||
##
|
||||
## expect result == 2
|
||||
## ```
|
||||
walk : Set k, state, (state, k -> state) -> state where k implements Hash & Eq
|
||||
walk : Set k, state, (state, k -> state) -> state
|
||||
walk = \@Set dict, state, step ->
|
||||
Dict.walk dict state (\s, k, _ -> step s k)
|
||||
|
||||
## Convert each value in the set to something new, by calling a conversion
|
||||
## function on each of them which receives the old value. Then return a
|
||||
## new set containing the converted values.
|
||||
map : Set a, (a -> b) -> Set b where a implements Hash & Eq, b implements Hash & Eq
|
||||
map : Set a, (a -> b) -> Set b
|
||||
map = \set, transform ->
|
||||
init = withCapacity (capacity set)
|
||||
|
||||
@ -321,7 +328,7 @@ map = \set, transform ->
|
||||
## (using [Set.union]) into one set.
|
||||
##
|
||||
## You may know a similar function named `concatMap` in other languages.
|
||||
joinMap : Set a, (a -> Set b) -> Set b where a implements Hash & Eq, b implements Hash & Eq
|
||||
joinMap : Set a, (a -> Set b) -> Set b
|
||||
joinMap = \set, transform ->
|
||||
init = withCapacity (capacity set) # Might be a pessimization
|
||||
|
||||
@ -343,7 +350,7 @@ joinMap = \set, transform ->
|
||||
##
|
||||
## expect result == FoundTheAnswer
|
||||
## ```
|
||||
walkUntil : Set k, state, (state, k -> [Continue state, Break state]) -> state where k implements Hash & Eq
|
||||
walkUntil : Set k, state, (state, k -> [Continue state, Break state]) -> state
|
||||
walkUntil = \@Set dict, state, step ->
|
||||
Dict.walkUntil dict state (\s, k, _ -> step s k)
|
||||
|
||||
|
@ -1058,6 +1058,50 @@ pub fn module_from_builtins<'ctx>(
|
||||
let module = Module::parse_bitcode_from_buffer(&memory_buffer, ctx)
|
||||
.unwrap_or_else(|err| panic!("Unable to import builtins bitcode. LLVM error: {err:?}"));
|
||||
|
||||
// In my testing, this adds about 20ms extra to compilation.
|
||||
// Long term it would be best if we could do this on the zig side.
|
||||
// This change enables us to dce all the parts of compiler-rt we don't use.
|
||||
// That said, it would be better to dce them before roc app compiltation time.
|
||||
// Anything not depended on by a `roc_builtin.` function could alread by DCE'd theoretically.
|
||||
// That said, this workaround is good enough and fixes compilations times.
|
||||
|
||||
// Also, must_keep is the functions we depend on that would normally be provide by libc.
|
||||
// They are magically linked to by llvm builtins, so we must specify that they can't be DCE'd.
|
||||
let must_keep = [
|
||||
"_fltused",
|
||||
"floorf",
|
||||
"memcpy",
|
||||
"memset",
|
||||
// Roc special functions
|
||||
"__roc_force_longjmp",
|
||||
"__roc_force_setjmp",
|
||||
"set_shared_buffer",
|
||||
];
|
||||
for func in module.get_functions() {
|
||||
let has_definition = func.count_basic_blocks() > 0;
|
||||
let name = func.get_name().to_string_lossy();
|
||||
if has_definition
|
||||
&& !name.starts_with("roc_builtins.")
|
||||
&& !must_keep.contains(&name.as_ref())
|
||||
{
|
||||
func.set_linkage(Linkage::Private);
|
||||
}
|
||||
}
|
||||
|
||||
// Note, running DCE here is faster then waiting until full app DCE.
|
||||
let mpm = PassManager::create(());
|
||||
mpm.add_global_dce_pass();
|
||||
mpm.run_on(&module);
|
||||
|
||||
// Now that the unused compiler-rt functions have been removed,
|
||||
// mark that the builtin functions are allowed to be DCE'd if they aren't used.
|
||||
for func in module.get_functions() {
|
||||
let name = func.get_name().to_string_lossy();
|
||||
if name.starts_with("roc_builtins.") {
|
||||
func.set_linkage(Linkage::Private);
|
||||
}
|
||||
}
|
||||
|
||||
// Add LLVM intrinsics.
|
||||
add_intrinsics(ctx, &module);
|
||||
|
||||
|
@ -183,7 +183,7 @@ impl LowLevelWrapperType {
|
||||
|
||||
/// We use a rust macro to ensure that every LowLevel gets handled
|
||||
macro_rules! map_symbol_to_lowlevel {
|
||||
($($lowlevel:ident <= $symbol:ident),* $(,)?) => {
|
||||
($($lowlevel:ident <= $($symbol:ident),+);* $(;)?) => {
|
||||
|
||||
fn for_symbol_help(symbol: Symbol) -> LowLevelWrapperType {
|
||||
use $crate::low_level::LowLevelWrapperType::*;
|
||||
@ -191,14 +191,14 @@ macro_rules! map_symbol_to_lowlevel {
|
||||
// expands to a big (but non-exhaustive) match on symbols and maps them to a lowlevel
|
||||
match symbol {
|
||||
$(
|
||||
Symbol::$symbol => CanBeReplacedBy(LowLevel::$lowlevel),
|
||||
$(Symbol::$symbol)|+ => CanBeReplacedBy(LowLevel::$lowlevel),
|
||||
)*
|
||||
|
||||
_ => NotALowLevelWrapper,
|
||||
}
|
||||
}
|
||||
|
||||
fn _enforce_exhaustiveness(lowlevel: LowLevel) -> Symbol {
|
||||
fn _enforce_exhaustiveness(lowlevel: LowLevel) -> &'static [Symbol] {
|
||||
// when adding a new lowlevel, this match will stop being exhaustive, and give a
|
||||
// compiler error. Most likely, you are adding a new lowlevel that maps directly to a
|
||||
// symbol. For instance, you want to have `List.foo` to stand for the `ListFoo`
|
||||
@ -209,7 +209,7 @@ macro_rules! map_symbol_to_lowlevel {
|
||||
// that it isn't and just see if that works.
|
||||
match lowlevel {
|
||||
$(
|
||||
LowLevel::$lowlevel => Symbol::$symbol,
|
||||
LowLevel::$lowlevel => &[$(Symbol::$symbol),+],
|
||||
)*
|
||||
|
||||
// these are higher-order lowlevels. these need the surrounding
|
||||
@ -259,107 +259,107 @@ macro_rules! map_symbol_to_lowlevel {
|
||||
// pattern of a symbol mapping directly to a lowlevel. In other words, most lowlevels (left) are generated
|
||||
// by only one specific symbol (right)
|
||||
map_symbol_to_lowlevel! {
|
||||
StrConcat <= STR_CONCAT,
|
||||
StrJoinWith <= STR_JOIN_WITH,
|
||||
StrIsEmpty <= STR_IS_EMPTY,
|
||||
StrStartsWith <= STR_STARTS_WITH,
|
||||
StrStartsWithScalar <= STR_STARTS_WITH_SCALAR,
|
||||
StrEndsWith <= STR_ENDS_WITH,
|
||||
StrSplit <= STR_SPLIT,
|
||||
StrCountGraphemes <= STR_COUNT_GRAPHEMES,
|
||||
StrCountUtf8Bytes <= STR_COUNT_UTF8_BYTES,
|
||||
StrFromUtf8Range <= STR_FROM_UTF8_RANGE_LOWLEVEL,
|
||||
StrToUtf8 <= STR_TO_UTF8,
|
||||
StrRepeat <= STR_REPEAT,
|
||||
StrTrim <= STR_TRIM,
|
||||
StrTrimStart <= STR_TRIM_START,
|
||||
StrTrimEnd <= STR_TRIM_END,
|
||||
StrToScalars <= STR_TO_SCALARS,
|
||||
StrGetUnsafe <= STR_GET_UNSAFE,
|
||||
StrSubstringUnsafe <= STR_SUBSTRING_UNSAFE,
|
||||
StrReserve <= STR_RESERVE,
|
||||
StrAppendScalar <= STR_APPEND_SCALAR_UNSAFE,
|
||||
StrGetScalarUnsafe <= STR_GET_SCALAR_UNSAFE,
|
||||
StrToNum <= STR_TO_NUM,
|
||||
StrGetCapacity <= STR_CAPACITY,
|
||||
StrWithCapacity <= STR_WITH_CAPACITY,
|
||||
StrGraphemes <= STR_GRAPHEMES,
|
||||
StrReleaseExcessCapacity <= STR_RELEASE_EXCESS_CAPACITY,
|
||||
ListLen <= LIST_LEN,
|
||||
ListGetCapacity <= LIST_CAPACITY,
|
||||
ListWithCapacity <= LIST_WITH_CAPACITY,
|
||||
ListReserve <= LIST_RESERVE,
|
||||
ListReleaseExcessCapacity <= LIST_RELEASE_EXCESS_CAPACITY,
|
||||
ListIsUnique <= LIST_IS_UNIQUE,
|
||||
ListAppendUnsafe <= LIST_APPEND_UNSAFE,
|
||||
ListPrepend <= LIST_PREPEND,
|
||||
ListGetUnsafe <= LIST_GET_UNSAFE,
|
||||
ListReplaceUnsafe <= LIST_REPLACE_UNSAFE,
|
||||
ListConcat <= LIST_CONCAT,
|
||||
ListSublist <= LIST_SUBLIST_LOWLEVEL,
|
||||
ListDropAt <= LIST_DROP_AT,
|
||||
ListSwap <= LIST_SWAP,
|
||||
NumAdd <= NUM_ADD,
|
||||
NumAddWrap <= NUM_ADD_WRAP,
|
||||
NumAddChecked <= NUM_ADD_CHECKED_LOWLEVEL,
|
||||
NumAddSaturated <= NUM_ADD_SATURATED,
|
||||
NumSub <= NUM_SUB,
|
||||
NumSubWrap <= NUM_SUB_WRAP,
|
||||
NumSubChecked <= NUM_SUB_CHECKED_LOWLEVEL,
|
||||
NumSubSaturated <= NUM_SUB_SATURATED,
|
||||
NumMul <= NUM_MUL,
|
||||
NumMulWrap <= NUM_MUL_WRAP,
|
||||
NumMulSaturated <= NUM_MUL_SATURATED,
|
||||
NumMulChecked <= NUM_MUL_CHECKED_LOWLEVEL,
|
||||
NumGt <= NUM_GT,
|
||||
NumGte <= NUM_GTE,
|
||||
NumLt <= NUM_LT,
|
||||
NumLte <= NUM_LTE,
|
||||
NumCompare <= NUM_COMPARE,
|
||||
NumDivFrac <= NUM_DIV_FRAC,
|
||||
NumDivCeilUnchecked <= NUM_DIV_CEIL,
|
||||
NumDivTruncUnchecked <= NUM_DIV_TRUNC,
|
||||
NumRemUnchecked <= NUM_REM,
|
||||
NumIsMultipleOf <= NUM_IS_MULTIPLE_OF,
|
||||
NumAbs <= NUM_ABS,
|
||||
NumNeg <= NUM_NEG,
|
||||
NumSin <= NUM_SIN,
|
||||
NumCos <= NUM_COS,
|
||||
NumTan <= NUM_TAN,
|
||||
NumSqrtUnchecked <= NUM_SQRT,
|
||||
NumLogUnchecked <= NUM_LOG,
|
||||
NumRound <= NUM_ROUND,
|
||||
NumToFrac <= NUM_TO_FRAC,
|
||||
NumIsNan <= NUM_IS_NAN,
|
||||
NumIsInfinite <= NUM_IS_INFINITE,
|
||||
NumIsFinite <= NUM_IS_FINITE,
|
||||
NumPow <= NUM_POW,
|
||||
NumCeiling <= NUM_CEILING,
|
||||
NumPowInt <= NUM_POW_INT,
|
||||
NumFloor <= NUM_FLOOR,
|
||||
NumAtan <= NUM_ATAN,
|
||||
NumAcos <= NUM_ACOS,
|
||||
NumAsin <= NUM_ASIN,
|
||||
NumBytesToU16 <= NUM_BYTES_TO_U16_LOWLEVEL,
|
||||
NumBytesToU32 <= NUM_BYTES_TO_U32_LOWLEVEL,
|
||||
NumBytesToU64 <= NUM_BYTES_TO_U64_LOWLEVEL,
|
||||
NumBytesToU128 <= NUM_BYTES_TO_U128_LOWLEVEL,
|
||||
NumBitwiseAnd <= NUM_BITWISE_AND,
|
||||
NumBitwiseXor <= NUM_BITWISE_XOR,
|
||||
NumBitwiseOr <= NUM_BITWISE_OR,
|
||||
NumShiftLeftBy <= NUM_SHIFT_LEFT,
|
||||
NumShiftRightBy <= NUM_SHIFT_RIGHT,
|
||||
NumShiftRightZfBy <= NUM_SHIFT_RIGHT_ZERO_FILL,
|
||||
NumToStr <= NUM_TO_STR,
|
||||
NumCountLeadingZeroBits <= NUM_COUNT_LEADING_ZERO_BITS,
|
||||
NumCountTrailingZeroBits <= NUM_COUNT_TRAILING_ZERO_BITS,
|
||||
NumCountOneBits <= NUM_COUNT_ONE_BITS,
|
||||
I128OfDec <= I128_OF_DEC,
|
||||
Eq <= BOOL_STRUCTURAL_EQ,
|
||||
NotEq <= BOOL_STRUCTURAL_NOT_EQ,
|
||||
And <= BOOL_AND,
|
||||
Or <= BOOL_OR,
|
||||
Not <= BOOL_NOT,
|
||||
Unreachable <= LIST_UNREACHABLE,
|
||||
DictPseudoSeed <= DICT_PSEUDO_SEED,
|
||||
StrConcat <= STR_CONCAT;
|
||||
StrJoinWith <= STR_JOIN_WITH;
|
||||
StrIsEmpty <= STR_IS_EMPTY;
|
||||
StrStartsWith <= STR_STARTS_WITH;
|
||||
StrStartsWithScalar <= STR_STARTS_WITH_SCALAR;
|
||||
StrEndsWith <= STR_ENDS_WITH;
|
||||
StrSplit <= STR_SPLIT;
|
||||
StrCountGraphemes <= STR_COUNT_GRAPHEMES;
|
||||
StrCountUtf8Bytes <= STR_COUNT_UTF8_BYTES;
|
||||
StrFromUtf8Range <= STR_FROM_UTF8_RANGE_LOWLEVEL;
|
||||
StrToUtf8 <= STR_TO_UTF8;
|
||||
StrRepeat <= STR_REPEAT;
|
||||
StrTrim <= STR_TRIM;
|
||||
StrTrimStart <= STR_TRIM_START;
|
||||
StrTrimEnd <= STR_TRIM_END;
|
||||
StrToScalars <= STR_TO_SCALARS;
|
||||
StrGetUnsafe <= STR_GET_UNSAFE;
|
||||
StrSubstringUnsafe <= STR_SUBSTRING_UNSAFE;
|
||||
StrReserve <= STR_RESERVE;
|
||||
StrAppendScalar <= STR_APPEND_SCALAR_UNSAFE;
|
||||
StrGetScalarUnsafe <= STR_GET_SCALAR_UNSAFE;
|
||||
StrToNum <= STR_TO_NUM;
|
||||
StrGetCapacity <= STR_CAPACITY;
|
||||
StrWithCapacity <= STR_WITH_CAPACITY;
|
||||
StrGraphemes <= STR_GRAPHEMES;
|
||||
StrReleaseExcessCapacity <= STR_RELEASE_EXCESS_CAPACITY;
|
||||
ListLen <= LIST_LEN;
|
||||
ListGetCapacity <= LIST_CAPACITY;
|
||||
ListWithCapacity <= LIST_WITH_CAPACITY;
|
||||
ListReserve <= LIST_RESERVE;
|
||||
ListReleaseExcessCapacity <= LIST_RELEASE_EXCESS_CAPACITY;
|
||||
ListIsUnique <= LIST_IS_UNIQUE;
|
||||
ListAppendUnsafe <= LIST_APPEND_UNSAFE;
|
||||
ListPrepend <= LIST_PREPEND;
|
||||
ListGetUnsafe <= LIST_GET_UNSAFE, DICT_LIST_GET_UNSAFE;
|
||||
ListReplaceUnsafe <= LIST_REPLACE_UNSAFE;
|
||||
ListConcat <= LIST_CONCAT;
|
||||
ListSublist <= LIST_SUBLIST_LOWLEVEL;
|
||||
ListDropAt <= LIST_DROP_AT;
|
||||
ListSwap <= LIST_SWAP;
|
||||
NumAdd <= NUM_ADD;
|
||||
NumAddWrap <= NUM_ADD_WRAP;
|
||||
NumAddChecked <= NUM_ADD_CHECKED_LOWLEVEL;
|
||||
NumAddSaturated <= NUM_ADD_SATURATED;
|
||||
NumSub <= NUM_SUB;
|
||||
NumSubWrap <= NUM_SUB_WRAP;
|
||||
NumSubChecked <= NUM_SUB_CHECKED_LOWLEVEL;
|
||||
NumSubSaturated <= NUM_SUB_SATURATED;
|
||||
NumMul <= NUM_MUL;
|
||||
NumMulWrap <= NUM_MUL_WRAP;
|
||||
NumMulSaturated <= NUM_MUL_SATURATED;
|
||||
NumMulChecked <= NUM_MUL_CHECKED_LOWLEVEL;
|
||||
NumGt <= NUM_GT;
|
||||
NumGte <= NUM_GTE;
|
||||
NumLt <= NUM_LT;
|
||||
NumLte <= NUM_LTE;
|
||||
NumCompare <= NUM_COMPARE;
|
||||
NumDivFrac <= NUM_DIV_FRAC;
|
||||
NumDivCeilUnchecked <= NUM_DIV_CEIL;
|
||||
NumDivTruncUnchecked <= NUM_DIV_TRUNC;
|
||||
NumRemUnchecked <= NUM_REM;
|
||||
NumIsMultipleOf <= NUM_IS_MULTIPLE_OF;
|
||||
NumAbs <= NUM_ABS;
|
||||
NumNeg <= NUM_NEG;
|
||||
NumSin <= NUM_SIN;
|
||||
NumCos <= NUM_COS;
|
||||
NumTan <= NUM_TAN;
|
||||
NumSqrtUnchecked <= NUM_SQRT;
|
||||
NumLogUnchecked <= NUM_LOG;
|
||||
NumRound <= NUM_ROUND;
|
||||
NumToFrac <= NUM_TO_FRAC;
|
||||
NumIsNan <= NUM_IS_NAN;
|
||||
NumIsInfinite <= NUM_IS_INFINITE;
|
||||
NumIsFinite <= NUM_IS_FINITE;
|
||||
NumPow <= NUM_POW;
|
||||
NumCeiling <= NUM_CEILING;
|
||||
NumPowInt <= NUM_POW_INT;
|
||||
NumFloor <= NUM_FLOOR;
|
||||
NumAtan <= NUM_ATAN;
|
||||
NumAcos <= NUM_ACOS;
|
||||
NumAsin <= NUM_ASIN;
|
||||
NumBytesToU16 <= NUM_BYTES_TO_U16_LOWLEVEL;
|
||||
NumBytesToU32 <= NUM_BYTES_TO_U32_LOWLEVEL;
|
||||
NumBytesToU64 <= NUM_BYTES_TO_U64_LOWLEVEL;
|
||||
NumBytesToU128 <= NUM_BYTES_TO_U128_LOWLEVEL;
|
||||
NumBitwiseAnd <= NUM_BITWISE_AND;
|
||||
NumBitwiseXor <= NUM_BITWISE_XOR;
|
||||
NumBitwiseOr <= NUM_BITWISE_OR;
|
||||
NumShiftLeftBy <= NUM_SHIFT_LEFT;
|
||||
NumShiftRightBy <= NUM_SHIFT_RIGHT;
|
||||
NumShiftRightZfBy <= NUM_SHIFT_RIGHT_ZERO_FILL;
|
||||
NumToStr <= NUM_TO_STR;
|
||||
NumCountLeadingZeroBits <= NUM_COUNT_LEADING_ZERO_BITS;
|
||||
NumCountTrailingZeroBits <= NUM_COUNT_TRAILING_ZERO_BITS;
|
||||
NumCountOneBits <= NUM_COUNT_ONE_BITS;
|
||||
I128OfDec <= I128_OF_DEC;
|
||||
Eq <= BOOL_STRUCTURAL_EQ;
|
||||
NotEq <= BOOL_STRUCTURAL_NOT_EQ;
|
||||
And <= BOOL_AND;
|
||||
Or <= BOOL_OR;
|
||||
Not <= BOOL_NOT;
|
||||
Unreachable <= LIST_UNREACHABLE;
|
||||
DictPseudoSeed <= DICT_PSEUDO_SEED;
|
||||
}
|
||||
|
@ -1486,6 +1486,7 @@ define_builtins! {
|
||||
26 DICT_JOINMAP: "joinMap"
|
||||
27 DICT_KEEP_IF: "keepIf"
|
||||
28 DICT_DROP_IF: "dropIf"
|
||||
29 DICT_RESERVE: "reserve"
|
||||
}
|
||||
9 SET: "Set" => {
|
||||
0 SET_SET: "Set" exposed_type=true // the Set.Set type alias
|
||||
@ -1510,6 +1511,8 @@ define_builtins! {
|
||||
19 SET_JOIN_MAP: "joinMap"
|
||||
20 SET_KEEP_IF: "keepIf"
|
||||
21 SET_DROP_IF: "dropIf"
|
||||
22 SET_WITH_CAPACITY: "withCapacity"
|
||||
23 SET_RESERVE: "reserve"
|
||||
}
|
||||
10 BOX: "Box" => {
|
||||
0 BOX_BOX_TYPE: "Box" exposed_apply_type=true // the Box.Box opaque type
|
||||
|
@ -1813,6 +1813,7 @@ fn ceiling() {
|
||||
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
|
||||
fn floor() {
|
||||
assert_evals_to!("Num.floor 1.9f64", 1, i64);
|
||||
assert_evals_to!("Num.floor -1.9f64", -2, i64);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1,46 +1,40 @@
|
||||
procedure Dict.1 (Dict.596):
|
||||
let Dict.606 : List {[], []} = Array [];
|
||||
let Dict.35 : List U64 = CallByName Dict.35;
|
||||
let Dict.34 : List I8 = CallByName Dict.34;
|
||||
let Dict.605 : {List {[], []}, List U64, List I8} = Struct {Dict.606, Dict.35, Dict.34};
|
||||
ret Dict.605;
|
||||
procedure Dict.1 (Dict.679):
|
||||
let Dict.688 : List {U32, U32} = Array [];
|
||||
let Dict.689 : List {[], []} = Array [];
|
||||
let Dict.690 : U64 = 0i64;
|
||||
let Dict.41 : Float32 = CallByName Dict.41;
|
||||
let Dict.42 : U8 = CallByName Dict.42;
|
||||
let Dict.687 : {List {U32, U32}, List {[], []}, U64, Float32, U8} = Struct {Dict.688, Dict.689, Dict.690, Dict.41, Dict.42};
|
||||
ret Dict.687;
|
||||
|
||||
procedure Dict.34 ():
|
||||
let Dict.608 : I8 = CallByName Dict.46;
|
||||
let Dict.609 : I8 = CallByName Dict.46;
|
||||
let Dict.610 : I8 = CallByName Dict.46;
|
||||
let Dict.611 : I8 = CallByName Dict.46;
|
||||
let Dict.612 : I8 = CallByName Dict.46;
|
||||
let Dict.613 : I8 = CallByName Dict.46;
|
||||
let Dict.614 : I8 = CallByName Dict.46;
|
||||
let Dict.615 : I8 = CallByName Dict.46;
|
||||
let Dict.607 : List I8 = Array [Dict.608, Dict.609, Dict.610, Dict.611, Dict.612, Dict.613, Dict.614, Dict.615];
|
||||
ret Dict.607;
|
||||
|
||||
procedure Dict.35 ():
|
||||
let Dict.617 : List U64 = Array [0i64, 0i64, 0i64, 0i64, 0i64, 0i64, 0i64, 0i64];
|
||||
ret Dict.617;
|
||||
|
||||
procedure Dict.4 (Dict.603):
|
||||
let Dict.114 : List {[], []} = StructAtIndex 0 Dict.603;
|
||||
let #Derived_gen.1 : List U64 = StructAtIndex 1 Dict.603;
|
||||
dec #Derived_gen.1;
|
||||
let #Derived_gen.0 : List I8 = StructAtIndex 2 Dict.603;
|
||||
procedure Dict.4 (Dict.685):
|
||||
let Dict.138 : List {[], []} = StructAtIndex 1 Dict.685;
|
||||
let #Derived_gen.0 : List {U32, U32} = StructAtIndex 0 Dict.685;
|
||||
dec #Derived_gen.0;
|
||||
let Dict.604 : U64 = CallByName List.6 Dict.114;
|
||||
dec Dict.114;
|
||||
ret Dict.604;
|
||||
let Dict.686 : U64 = CallByName List.6 Dict.138;
|
||||
dec Dict.138;
|
||||
ret Dict.686;
|
||||
|
||||
procedure Dict.46 ():
|
||||
let Dict.616 : I8 = -128i64;
|
||||
ret Dict.616;
|
||||
procedure Dict.41 ():
|
||||
let Dict.694 : Float32 = 0.8f64;
|
||||
ret Dict.694;
|
||||
|
||||
procedure Dict.42 ():
|
||||
let Dict.692 : U8 = 64i64;
|
||||
let Dict.693 : U8 = 3i64;
|
||||
let Dict.691 : U8 = CallByName Num.20 Dict.692 Dict.693;
|
||||
ret Dict.691;
|
||||
|
||||
procedure List.6 (#Attr.2):
|
||||
let List.553 : U64 = lowlevel ListLen #Attr.2;
|
||||
ret List.553;
|
||||
|
||||
procedure Num.20 (#Attr.2, #Attr.3):
|
||||
let Num.291 : U8 = lowlevel NumSub #Attr.2 #Attr.3;
|
||||
ret Num.291;
|
||||
|
||||
procedure Test.0 ():
|
||||
let Test.3 : {} = Struct {};
|
||||
let Test.2 : {List {[], []}, List U64, List I8} = CallByName Dict.1 Test.3;
|
||||
let Test.2 : {List {U32, U32}, List {[], []}, U64, Float32, U8} = CallByName Dict.1 Test.3;
|
||||
let Test.1 : U64 = CallByName Dict.4 Test.2;
|
||||
ret Test.1;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8,5 +8,5 @@ main =
|
||||
s2 = Set.empty {}
|
||||
|
||||
Bool.isEq s1 s1 && Bool.isEq s2 s2
|
||||
# ^^^^^^^^^ Set#Bool.isEq(22): Set Str, Set Str -[[Set.isEq(22)]]-> Bool
|
||||
# ^^^^^^^^^ Set#Bool.isEq(22): Set U8, Set U8 -[[Set.isEq(22)]]-> Bool
|
||||
# ^^^^^^^^^ Set#Bool.isEq(24): Set Str, Set Str -[[Set.isEq(24)]]-> Bool
|
||||
# ^^^^^^^^^ Set#Bool.isEq(24): Set U8, Set U8 -[[Set.isEq(24)]]-> Bool
|
||||
|
@ -3,22 +3,22 @@
|
||||
app "test" provides [main] to "./platform"
|
||||
|
||||
f = \{} ->
|
||||
#^{-1} <2826><117>{} -<120>[[f(1)]]-> <116>[Ok <2834>{}]<80>*
|
||||
#^{-1} <2918><117>{} -<120>[[f(1)]]-> <116>[Ok <2926>{}]<80>*
|
||||
when g {} is
|
||||
# ^ <2816><2834>{} -<2824>[[g(2)]]-> <72>[Ok <2834>{}]<102>*
|
||||
# ^ <2908><2926>{} -<2916>[[g(2)]]-> <72>[Ok <2926>{}]<102>*
|
||||
_ -> Ok {}
|
||||
|
||||
g = \{} ->
|
||||
#^{-1} <2816><2834>{} -<2824>[[g(2)]]-> <72>[Ok <2834>{}]<102>*
|
||||
#^{-1} <2908><2926>{} -<2916>[[g(2)]]-> <72>[Ok <2926>{}]<102>*
|
||||
when h {} is
|
||||
# ^ <2821><2834>{} -<2829>[[h(3)]]-> <94>[Ok <2834>{}]<124>*
|
||||
# ^ <2913><2926>{} -<2921>[[h(3)]]-> <94>[Ok <2926>{}]<124>*
|
||||
_ -> Ok {}
|
||||
|
||||
h = \{} ->
|
||||
#^{-1} <2821><2834>{} -<2829>[[h(3)]]-> <94>[Ok <2834>{}]<124>*
|
||||
#^{-1} <2913><2926>{} -<2921>[[h(3)]]-> <94>[Ok <2926>{}]<124>*
|
||||
when f {} is
|
||||
# ^ <2826><117>{} -<120>[[f(1)]]-> <116>[Ok <2834>{}]<80>*
|
||||
# ^ <2918><117>{} -<120>[[f(1)]]-> <116>[Ok <2926>{}]<80>*
|
||||
_ -> Ok {}
|
||||
|
||||
main = f {}
|
||||
# ^ <2836><133>{} -<136>[[f(1)]]-> <138>[Ok <2834>{}]<2835>w_a
|
||||
# ^ <2928><133>{} -<136>[[f(1)]]-> <138>[Ok <2926>{}]<2927>w_a
|
||||
|
@ -435,17 +435,87 @@ pub(crate) fn surgery_pe(executable_path: &Path, metadata_path: &Path, roc_app_b
|
||||
);
|
||||
} else {
|
||||
let is_ingested_compiler_rt = [
|
||||
"__muloti4",
|
||||
"__addtf3",
|
||||
"__ceilx",
|
||||
"__cmpdf2",
|
||||
"__cmphf2",
|
||||
"__cmpsf2",
|
||||
"__cmptf2",
|
||||
"__cmpxf2",
|
||||
"__cosx",
|
||||
"__divsf3",
|
||||
"__divtf3",
|
||||
"__divti3",
|
||||
"__udivti3",
|
||||
"__modti3",
|
||||
"__umodti3",
|
||||
"__exp2x",
|
||||
"__expx",
|
||||
"__extendhfsf2",
|
||||
"__fabsx",
|
||||
"__fixdfti",
|
||||
"__fixsfti",
|
||||
"__fixunsdfti",
|
||||
"__fixunssfti",
|
||||
"__floorx",
|
||||
"__fmax",
|
||||
"__fmaxx",
|
||||
"__fminx",
|
||||
"__fmodx",
|
||||
"__gedf2",
|
||||
"__gehf2",
|
||||
"__gesf2",
|
||||
"__getf2",
|
||||
"__gexf2",
|
||||
"__log10x",
|
||||
"__log2x",
|
||||
"__logx",
|
||||
"__lshrti3",
|
||||
"memcpy_decision",
|
||||
"__modti3",
|
||||
"__muloti4",
|
||||
"__multf3",
|
||||
"__roundx",
|
||||
"__sincosx",
|
||||
"__sinx",
|
||||
"__sqrtx",
|
||||
"__tanx",
|
||||
"__truncsfhf2",
|
||||
"__truncx",
|
||||
"__udivmoddi4",
|
||||
"__udivti3",
|
||||
"__umodti3",
|
||||
"ceilq",
|
||||
"cos",
|
||||
"cosf",
|
||||
"cosq",
|
||||
"exp",
|
||||
"exp2",
|
||||
"exp2q",
|
||||
"expf",
|
||||
"expq",
|
||||
"floor",
|
||||
"floorf",
|
||||
"floorq",
|
||||
"fmaq",
|
||||
"fmaxf",
|
||||
"fmaxl",
|
||||
"fmodf",
|
||||
"log10",
|
||||
"log10q",
|
||||
"log2",
|
||||
"log2q",
|
||||
"logq",
|
||||
"memcpy",
|
||||
"roundq",
|
||||
"sin",
|
||||
"sincos",
|
||||
"sincosf",
|
||||
"sincosq",
|
||||
"sinf",
|
||||
"sinq",
|
||||
"sqrt",
|
||||
"sqrtf",
|
||||
"sqrtq",
|
||||
"tan",
|
||||
"tanf",
|
||||
"tanq",
|
||||
]
|
||||
.contains(&name.as_str());
|
||||
if *address == 0 && !name.starts_with("roc") && !is_ingested_compiler_rt {
|
||||
|
Loading…
Reference in New Issue
Block a user