Merge pull request #6216 from roc-lang/dict-ankerl-unordered-dense

Swap Dict implementation to ankerl dense unordered
This commit is contained in:
Brendan Hansknecht 2023-12-09 22:17:32 -08:00 committed by GitHub
commit f6bff3a86e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 1675 additions and 3110 deletions

View File

@ -277,4 +277,33 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
===========================================================
* ankerl::unordered_dense - https://github.com/martinus/unordered_dense
A rather direct port of the source into Roc is currently the implementation for our Dict type.
Source code is in crates/compiler/builtins/roc/Dict.roc
MIT License
Copyright (c) 2022 Martin Leitner-Ankerl
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -59,6 +59,8 @@ fn generateLlvmIrFile(
const obj = b.addObject(.{ .name = object_name, .root_source_file = main_path, .optimize = mode, .target = target, .use_llvm = true });
obj.strip = true;
obj.disable_stack_probing = true;
if (target.cpu_arch != .wasm32)
obj.bundle_compiler_rt = true;
// Generating the bin seems required to get zig to generate the llvm ir.
_ = obj.getEmittedBin();
@ -91,6 +93,8 @@ fn generateObjectFile(
obj.link_function_sections = true;
obj.force_pic = true;
obj.disable_stack_probing = true;
if (target.cpu_arch != .wasm32)
obj.bundle_compiler_rt = true;
const obj_file = obj.getEmittedBin();
@ -112,7 +116,7 @@ fn makeLinux32Target() CrossTarget {
target.cpu_arch = std.Target.Cpu.Arch.x86;
target.os_tag = std.Target.Os.Tag.linux;
target.abi = std.Target.Abi.musl;
target.abi = std.Target.Abi.none;
return target;
}
@ -122,7 +126,7 @@ fn makeLinuxAarch64Target() CrossTarget {
target.cpu_arch = std.Target.Cpu.Arch.aarch64;
target.os_tag = std.Target.Os.Tag.linux;
target.abi = std.Target.Abi.musl;
target.abi = std.Target.Abi.none;
return target;
}
@ -132,7 +136,7 @@ fn makeLinuxX64Target() CrossTarget {
target.cpu_arch = std.Target.Cpu.Arch.x86_64;
target.os_tag = std.Target.Os.Tag.linux;
target.abi = std.Target.Abi.musl;
target.abi = std.Target.Abi.none;
return target;
}
@ -142,7 +146,7 @@ fn makeWindows64Target() CrossTarget {
target.cpu_arch = std.Target.Cpu.Arch.x86_64;
target.os_tag = std.Target.Os.Tag.windows;
target.abi = std.Target.Abi.gnu;
target.abi = std.Target.Abi.none;
return target;
}

View File

@ -1,478 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const math = std.math;
// Eventually, we need to statically ingest compiler-rt and get it working with the surgical linker, then these should not be needed anymore.
// Until then, we are manually ingesting used parts of compiler-rt here.
//
// Taken from
// https://github.com/ziglang/zig/tree/4976b58ab16069f8d3267b69ed030f29685c1abe/lib/compiler_rt/
// Thank you Zig Contributors!
// Libcalls that involve u128 on Windows x86-64 are expected by LLVM to use the
// calling convention of @Vector(2, u64), rather than what's standard.
pub const want_windows_v2u64_abi = builtin.os.tag == .windows and builtin.cpu.arch == .x86_64 and @import("builtin").object_format != .c;
const v2u64 = @Vector(2, u64);
// Export it as weak incase it is already linked in by something else.
comptime {
if (!want_windows_v2u64_abi) {
@export(__muloti4, .{ .name = "__muloti4", .linkage = .Weak });
@export(__lshrti3, .{ .name = "__lshrti3", .linkage = .Weak });
@export(__divti3, .{ .name = "__divti3", .linkage = .Weak });
@export(__modti3, .{ .name = "__modti3", .linkage = .Weak });
@export(__umodti3, .{ .name = "__umodti3", .linkage = .Weak });
@export(__udivti3, .{ .name = "__udivti3", .linkage = .Weak });
@export(__fixdfti, .{ .name = "__fixdfti", .linkage = .Weak });
@export(__fixsfti, .{ .name = "__fixsfti", .linkage = .Weak });
@export(__fixunsdfti, .{ .name = "__fixunsdfti", .linkage = .Weak });
@export(__fixunssfti, .{ .name = "__fixunssfti", .linkage = .Weak });
}
}
pub fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 {
if (2 * @bitSizeOf(i128) <= @bitSizeOf(usize)) {
return muloXi4_genericFast(i128, a, b, overflow);
} else {
return muloXi4_genericSmall(i128, a, b, overflow);
}
}
pub fn __divti3(a: i128, b: i128) callconv(.C) i128 {
return div(a, b);
}
fn __divti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(div(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
}
inline fn div(a: i128, b: i128) i128 {
const s_a = a >> (128 - 1);
const s_b = b >> (128 - 1);
const an = (a ^ s_a) -% s_a;
const bn = (b ^ s_b) -% s_b;
const r = udivmod(u128, @as(u128, @bitCast(an)), @as(u128, @bitCast(bn)), null);
const s = s_a ^ s_b;
return (@as(i128, @bitCast(r)) ^ s) -% s;
}
pub fn __udivti3(a: u128, b: u128) callconv(.C) u128 {
return udivmod(u128, a, b, null);
}
fn __udivti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(udivmod(u128, @as(u128, @bitCast(a)), @as(u128, @bitCast(b)), null)));
}
pub fn __umodti3(a: u128, b: u128) callconv(.C) u128 {
var r: u128 = undefined;
_ = udivmod(u128, a, b, &r);
return r;
}
fn __umodti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
var r: u128 = undefined;
_ = udivmod(u128, @as(u128, @bitCast(a)), @as(u128, @bitCast(b)), &r);
return @as(v2u64, @bitCast(r));
}
pub fn __modti3(a: i128, b: i128) callconv(.C) i128 {
return mod(a, b);
}
fn __modti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(mod(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
}
inline fn mod(a: i128, b: i128) i128 {
const s_a = a >> (128 - 1); // s = a < 0 ? -1 : 0
const s_b = b >> (128 - 1); // s = b < 0 ? -1 : 0
const an = (a ^ s_a) -% s_a; // negate if s == -1
const bn = (b ^ s_b) -% s_b; // negate if s == -1
var r: u128 = undefined;
_ = udivmod(u128, @as(u128, @bitCast(an)), @as(u128, @bitCast(bn)), &r);
return (@as(i128, @bitCast(r)) ^ s_a) -% s_a; // negate if s == -1
}
pub fn __fixdfti(a: f64) callconv(.C) i128 {
return floatToInt(i128, a);
}
fn __fixdfti_windows_x86_64(a: f64) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(floatToInt(i128, a)));
}
pub fn __fixsfti(a: f32) callconv(.C) i128 {
return floatToInt(i128, a);
}
fn __fixsfti_windows_x86_64(a: f32) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(floatToInt(i128, a)));
}
pub fn __fixunsdfti(a: f64) callconv(.C) u128 {
return floatToInt(u128, a);
}
fn __fixunsdfti_windows_x86_64(a: f64) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(floatToInt(u128, a)));
}
pub fn __fixunssfti(a: f32) callconv(.C) u128 {
return floatToInt(u128, a);
}
fn __fixunssfti_windows_x86_64(a: f32) callconv(.C) v2u64 {
return @as(v2u64, @bitCast(floatToInt(u128, a)));
}
// mulo - multiplication overflow
// * return a*%b.
// * return if a*b overflows => 1 else => 0
// - muloXi4_genericSmall as default
// - muloXi4_genericFast for 2*bitsize <= usize
inline fn muloXi4_genericSmall(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
overflow.* = 0;
const min = math.minInt(ST);
var res: ST = a *% b;
// Hacker's Delight section Overflow subsection Multiplication
// case a=-2^{31}, b=-1 problem, because
// on some machines a*b = -2^{31} with overflow
// Then -2^{31}/-1 overflows and any result is possible.
// => check with a<0 and b=-2^{31}
if ((a < 0 and b == min) or (a != 0 and @divTrunc(res, a) != b))
overflow.* = 1;
return res;
}
inline fn muloXi4_genericFast(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
overflow.* = 0;
const EST = switch (ST) {
i32 => i64,
i64 => i128,
i128 => i256,
else => unreachable,
};
const min = math.minInt(ST);
const max = math.maxInt(ST);
var res: EST = @as(EST, a) * @as(EST, b);
//invariant: -2^{bitwidth(EST)} < res < 2^{bitwidth(EST)-1}
if (res < min or max < res)
overflow.* = 1;
return @as(ST, @truncate(res));
}
const native_endian = builtin.cpu.arch.endian();
const low = switch (native_endian) {
.Big => 1,
.Little => 0,
};
const high = 1 - low;
pub fn udivmod(comptime DoubleInt: type, a: DoubleInt, b: DoubleInt, maybe_rem: ?*DoubleInt) DoubleInt {
// @setRuntimeSafety(builtin.is_test);
const double_int_bits = @typeInfo(DoubleInt).Int.bits;
const single_int_bits = @divExact(double_int_bits, 2);
const SingleInt = std.meta.Int(.unsigned, single_int_bits);
const SignedDoubleInt = std.meta.Int(.signed, double_int_bits);
const Log2SingleInt = std.math.Log2Int(SingleInt);
const n = @as([2]SingleInt, @bitCast(a));
const d = @as([2]SingleInt, @bitCast(b));
var q: [2]SingleInt = undefined;
var r: [2]SingleInt = undefined;
var sr: c_uint = undefined;
// special cases, X is unknown, K != 0
if (n[high] == 0) {
if (d[high] == 0) {
// 0 X
// ---
// 0 X
if (maybe_rem) |rem| {
rem.* = n[low] % d[low];
}
return n[low] / d[low];
}
// 0 X
// ---
// K X
if (maybe_rem) |rem| {
rem.* = n[low];
}
return 0;
}
// n[high] != 0
if (d[low] == 0) {
if (d[high] == 0) {
// K X
// ---
// 0 0
if (maybe_rem) |rem| {
rem.* = n[high] % d[low];
}
return n[high] / d[low];
}
// d[high] != 0
if (n[low] == 0) {
// K 0
// ---
// K 0
if (maybe_rem) |rem| {
r[high] = n[high] % d[high];
r[low] = 0;
rem.* = @as(DoubleInt, @bitCast(r));
}
return n[high] / d[high];
}
// K K
// ---
// K 0
if ((d[high] & (d[high] - 1)) == 0) {
// d is a power of 2
if (maybe_rem) |rem| {
r[low] = n[low];
r[high] = n[high] & (d[high] - 1);
rem.* = @as(DoubleInt, @bitCast(r));
}
return n[high] >> @as(Log2SingleInt, @intCast(@ctz(d[high])));
}
// K K
// ---
// K 0
sr = @as(c_uint, @bitCast(@as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))));
// 0 <= sr <= single_int_bits - 2 or sr large
if (sr > single_int_bits - 2) {
if (maybe_rem) |rem| {
rem.* = a;
}
return 0;
}
sr += 1;
// 1 <= sr <= single_int_bits - 1
// q.all = a << (double_int_bits - sr);
q[low] = 0;
q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
// r.all = a >> sr;
r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
} else {
// d[low] != 0
if (d[high] == 0) {
// K X
// ---
// 0 K
if ((d[low] & (d[low] - 1)) == 0) {
// d is a power of 2
if (maybe_rem) |rem| {
rem.* = n[low] & (d[low] - 1);
}
if (d[low] == 1) {
return a;
}
sr = @ctz(d[low]);
q[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
q[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
return @as(DoubleInt, @bitCast(q));
}
// K X
// ---
// 0 K
sr = 1 + single_int_bits + @as(c_uint, @clz(d[low])) - @as(c_uint, @clz(n[high]));
// 2 <= sr <= double_int_bits - 1
// q.all = a << (double_int_bits - sr);
// r.all = a >> sr;
if (sr == single_int_bits) {
q[low] = 0;
q[high] = n[low];
r[high] = 0;
r[low] = n[high];
} else if (sr < single_int_bits) {
// 2 <= sr <= single_int_bits - 1
q[low] = 0;
q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
} else {
// single_int_bits + 1 <= sr <= double_int_bits - 1
q[low] = n[low] << @as(Log2SingleInt, @intCast(double_int_bits - sr));
q[high] = (n[high] << @as(Log2SingleInt, @intCast(double_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr - single_int_bits)));
r[high] = 0;
r[low] = n[high] >> @as(Log2SingleInt, @intCast(sr - single_int_bits));
}
} else {
// K X
// ---
// K K
sr = @as(c_uint, @bitCast(@as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))));
// 0 <= sr <= single_int_bits - 1 or sr large
if (sr > single_int_bits - 1) {
if (maybe_rem) |rem| {
rem.* = a;
}
return 0;
}
sr += 1;
// 1 <= sr <= single_int_bits
// q.all = a << (double_int_bits - sr);
// r.all = a >> sr;
q[low] = 0;
if (sr == single_int_bits) {
q[high] = n[low];
r[high] = 0;
r[low] = n[high];
} else {
r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
}
}
}
// Not a special case
// q and r are initialized with:
// q.all = a << (double_int_bits - sr);
// r.all = a >> sr;
// 1 <= sr <= double_int_bits - 1
var carry: u32 = 0;
var r_all: DoubleInt = undefined;
while (sr > 0) : (sr -= 1) {
// r:q = ((r:q) << 1) | carry
r[high] = (r[high] << 1) | (r[low] >> (single_int_bits - 1));
r[low] = (r[low] << 1) | (q[high] >> (single_int_bits - 1));
q[high] = (q[high] << 1) | (q[low] >> (single_int_bits - 1));
q[low] = (q[low] << 1) | carry;
// carry = 0;
// if (r.all >= b)
// {
// r.all -= b;
// carry = 1;
// }
r_all = @as(DoubleInt, @bitCast(r));
const s: SignedDoubleInt = @as(SignedDoubleInt, @bitCast(b -% r_all -% 1)) >> (double_int_bits - 1);
carry = @as(u32, @intCast(s & 1));
r_all -= b & @as(DoubleInt, @bitCast(s));
r = @as([2]SingleInt, @bitCast(r_all));
}
const q_all = (@as(DoubleInt, @bitCast(q)) << 1) | carry;
if (maybe_rem) |rem| {
rem.* = r_all;
}
return q_all;
}
pub inline fn floatToInt(comptime I: type, a: anytype) I {
const Log2Int = math.Log2Int;
const Int = @import("std").meta.Int;
const F = @TypeOf(a);
const float_bits = @typeInfo(F).Float.bits;
const int_bits = @typeInfo(I).Int.bits;
const rep_t = Int(.unsigned, float_bits);
const sig_bits = math.floatMantissaBits(F);
const exp_bits = math.floatExponentBits(F);
const fractional_bits = floatFractionalBits(F);
// const implicit_bit = if (F != f80) (@as(rep_t, 1) << sig_bits) else 0;
const implicit_bit = @as(rep_t, 1) << sig_bits;
const max_exp = (1 << (exp_bits - 1));
const exp_bias = max_exp - 1;
const sig_mask = (@as(rep_t, 1) << sig_bits) - 1;
// Break a into sign, exponent, significand
const a_rep: rep_t = @as(rep_t, @bitCast(a));
const negative = (a_rep >> (float_bits - 1)) != 0;
const exponent = @as(i32, @intCast((a_rep << 1) >> (sig_bits + 1))) - exp_bias;
const significand: rep_t = (a_rep & sig_mask) | implicit_bit;
// If the exponent is negative, the result rounds to zero.
if (exponent < 0) return 0;
// If the value is too large for the integer type, saturate.
switch (@typeInfo(I).Int.signedness) {
.unsigned => {
if (negative) return 0;
if (@as(c_uint, @intCast(exponent)) >= @min(int_bits, max_exp)) return math.maxInt(I);
},
.signed => if (@as(c_uint, @intCast(exponent)) >= @min(int_bits - 1, max_exp)) {
return if (negative) math.minInt(I) else math.maxInt(I);
},
}
// If 0 <= exponent < sig_bits, right shift to get the result.
// Otherwise, shift left.
var result: I = undefined;
if (exponent < fractional_bits) {
result = @as(I, @intCast(significand >> @as(Log2Int(rep_t), @intCast(fractional_bits - exponent))));
} else {
result = @as(I, @intCast(significand)) << @as(Log2Int(I), @intCast(exponent - fractional_bits));
}
if ((@typeInfo(I).Int.signedness == .signed) and negative)
return ~result +% 1;
return result;
}
/// Returns the number of fractional bits in the mantissa of floating point type T.
pub inline fn floatFractionalBits(comptime T: type) comptime_int {
comptime std.debug.assert(@typeInfo(T) == .Float);
// standard IEEE floats have an implicit 0.m or 1.m integer part
// f80 is special and has an explicitly stored bit in the MSB
// this function corresponds to `MANT_DIG - 1' from C
return switch (@typeInfo(T).Float.bits) {
16 => 10,
32 => 23,
64 => 52,
80 => 63,
128 => 112,
else => @compileError("unknown floating point type " ++ @typeName(T)),
};
}
pub fn __lshrti3(a: i128, b: i32) callconv(.C) i128 {
return lshrXi3(i128, a, b);
}
// Logical shift right: shift in 0 from left to right
// Precondition: 0 <= b < T.bit_count
inline fn lshrXi3(comptime T: type, a: T, b: i32) T {
const word_t = HalveInt(T, false);
const S = std.math.Log2Int(word_t.HalfT);
const input = word_t{ .all = a };
var output: word_t = undefined;
if (b >= word_t.bits) {
output.s.high = 0;
output.s.low = input.s.high >> @as(S, @intCast(b - word_t.bits));
} else if (b == 0) {
return a;
} else {
output.s.high = input.s.high >> @as(S, @intCast(b));
output.s.low = input.s.high << @as(S, @intCast(word_t.bits - b));
output.s.low |= input.s.low >> @as(S, @intCast(b));
}
return output.all;
}
/// Allows to access underlying bits as two equally sized lower and higher
/// signed or unsigned integers.
fn HalveInt(comptime T: type, comptime signed_half: bool) type {
return extern union {
pub const bits = @divExact(@typeInfo(T).Int.bits, 2);
pub const HalfTU = std.meta.Int(.unsigned, bits);
pub const HalfTS = std.meta.Int(.signed, bits);
pub const HalfT = if (signed_half) HalfTS else HalfTU;
all: T,
s: if (native_endian == .Little)
extern struct { low: HalfT, high: HalfT }
else
extern struct { high: HalfT, low: HalfT },
};
}

View File

@ -1,87 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const musl = @import("libc/musl.zig");
const folly = @import("libc/folly.zig");
const cpuid = @import("libc/cpuid.zig");
comptime {
// TODO: remove this workaround.
// Our wasm llvm pipeline always links in memcpy.
// As such, our impl will conflict.
if (builtin.is_test) {
// We don't need memcpy for tests because the tests are built with -lc
} else if (arch != .wasm32) {
@export(memcpy, .{ .name = "memcpy", .linkage = .Strong });
}
}
const Memcpy = *const fn (noalias [*]u8, noalias [*]const u8, len: usize) callconv(.C) [*]u8;
pub var memcpy_target: Memcpy = switch (arch) {
.x86_64 => dispatch_memcpy,
else => unreachable,
};
pub fn memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
switch (builtin.os.tag) {
.windows => {
return musl.memcpy(dest, src, len);
},
else => switch (arch) {
// x86_64 has a special optimized memcpy that can use avx2.
.x86_64 => {
return memcpy_target(dest, src, len);
},
else => {
return musl.memcpy(dest, src, len);
},
},
}
}
const MemcpyDecision = enum {
uninitialized,
folly_prefetchw,
folly_prefetcht0,
musl,
};
var memcpy_decision: MemcpyDecision = .uninitialized;
fn dispatch_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
switch (arch) {
.x86_64 => {
// TODO: Switch this to overwrite the memcpy_target pointer once the surgical linker can support it.
// Then dispatch will just happen on the first call instead of every call.
// if (cpuid.supports_avx2()) {
// if (cpuid.supports_prefetchw()) {
// memcpy_target = folly.memcpy_prefetchw;
// } else {
// memcpy_target = folly.memcpy_prefetcht0;
// }
// } else {
// memcpy_target = musl.memcpy;
// }
// return memcpy_target(dest, src, len);
switch (memcpy_decision) {
.uninitialized => {
if (cpuid.supports_avx2()) {
if (cpuid.supports_prefetchw()) {
memcpy_decision = .folly_prefetchw;
} else {
memcpy_decision = .folly_prefetcht0;
}
} else {
memcpy_decision = .musl;
}
return dispatch_memcpy(dest, src, len);
},
.folly_prefetchw => return folly.memcpy_prefetchw(dest, src, len),
.folly_prefetcht0 => return folly.memcpy_prefetcht0(dest, src, len),
.musl => return musl.memcpy(dest, src, len),
}
},
else => unreachable,
}
}

View File

@ -1,7 +0,0 @@
const builtin = @import("builtin");
const os = builtin.os;
pub const function_prefix = switch (os.tag) {
.macos => "_",
else => "",
};

View File

@ -1,53 +0,0 @@
// Check if AVX2 is supported.
// Returns 1 if AVX2 is supported, 0 otherwise.
.global {[function_prefix]s}supports_avx2;
{[function_prefix]s}supports_avx2:
// Save the EBX register.
push %rbx
// Call the CPUID instruction with the EAX register set to 7 and ECX set to 0.
// This will get the CPUID information for the current CPU.
mov $7, %eax
mov $0, %ecx
cpuid
// The AVX2 feature flag is located in the EBX register at bit 5.
bt $5, %ebx
jc .avx2_supported
// AVX2 is not supported.
pop %rbx
mov $0, %eax
ret
.avx2_supported:
pop %rbx
mov $1, %eax
ret
// Check if prefetchw is supported.
// Returns 1 if the prefetchw instruction is supported, 0 otherwise.
.global {[function_prefix]s}supports_prefetchw;
{[function_prefix]s}supports_prefetchw:
// Save the EBX register.
push %rbx
// Call the CPUID instruction with the EAX register set to 0x80000001 and ECX set to 0.
// This will get the CPUID information for the current CPU.
mov $0x80000001, %eax
mov $0, %ecx
cpuid
// The prefetchw feature flag is located in the ECX register at bit 8.
bt $8, %ecx
jc .prefetchw_supported
// AVX2 is not supported.
pop %rbx
mov $0, %eax
ret
.prefetchw_supported:
pop %rbx
mov $1, %eax
ret

View File

@ -1,18 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const function_prefix = @import("assembly_util.zig").function_prefix;
// I couldn't manage to define this in a PIE friendly way with inline assembly.
// Instead, I am defining it as global assembly functions.
comptime {
switch (arch) {
.x86_64 => {
asm (std.fmt.comptimePrint(@embedFile("cpuid.S"), .{ .function_prefix = function_prefix }));
},
else => unreachable,
}
}
pub extern fn supports_avx2() bool;
pub extern fn supports_prefetchw() bool;

View File

@ -1,2 +0,0 @@
pub const memcpy_prefetchw = @import("folly/memcpy.zig").__folly_memcpy_prefetchw;
pub const memcpy_prefetcht0 = @import("folly/memcpy.zig").__folly_memcpy_prefetcht0;

View File

@ -1,437 +0,0 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* __folly_memcpy: An optimized memcpy implementation that uses prefetch and
* AVX2 instructions.
*
* This implementation of memcpy acts as a memmove: while overlapping copies
* are undefined in memcpy, in some implementations they're the same function and
* legacy programs rely on this behavior.
*
* This implementation uses prefetch to avoid dtlb misses. This can
* substantially reduce dtlb store misses in cases where the destination
* location is absent from L1 cache and where the copy size is small enough
* that the hardware prefetcher doesn't have a large impact.
*
* The number of branches is limited by the use of overlapping loads & stores.
* This helps with copies where the source and destination cache lines are already
* present in L1 because there are fewer instructions to execute and fewer
* branches to potentially mispredict.
* e.g. to copy the last 4 <= n <= 7 bytes: copy the first & last 4 bytes (overlapped):
* movl (%rsi), %r8d
* movl -4(%rsi,%rdx), %r9d
* movl %r8d, (%rdi)
* movl %r9d, -4(%rdi,%rdx)
*
*
* For sizes up to 256 all source data is first read into registers and then written:
* - n <= 16: overlapping movs
* - n <= 32: overlapping unaligned 16-byte SSE XMM load/stores
* - n <= 256: overlapping unaligned 32-byte AVX YMM load/stores
*
* Large copies (> 256 bytes) use unaligned loads + aligned stores.
* This is observed to always be faster than rep movsb, so the rep movsb
* instruction is not used.
* - The head & tail may be unaligned => they're always written using unaligned stores.
*
* If the copy size is humongous (> 32 KiB) and the source and destination are both
* aligned, this memcpy will use non-temporal operations (AVX2). This can have
* a substantial speedup for copies where data is absent from L1, but it
* is significantly slower if the source and destination data were already
* in L1. The use of non-temporal operations also has the effect that after
* the copy is complete, the data will be moved out of L1, even if the data was
* present before the copy started.
*
* For n > 256 and overlapping src & dst buffers (memmove):
* - use unaligned loads + aligned stores, but not non-temporal stores
* - for dst < src forward copy in 128 byte batches:
* - unaligned load the first 32 bytes & last 4 x 32 bytes
* - forward copy (unaligned load + aligned stores) 4 x 32 bytes at a time
* - unaligned store the first 32 bytes & last 4 x 32 bytes
* - for dst > src backward copy in 128 byte batches:
* - unaligned load the first 4 x 32 bytes & last 32 bytes
* - backward copy (unaligned load + aligned stores) 4 x 32 bytes at a time
* - unaligned store the first 4 x 32 bytes & last 32 bytes
*
* @author Logan Evans <lpe@fb.com>
*/
// .type {[function_prefix]s}__folly_memcpy_short_{[prefetch]s}, @function not supported by windows
{[function_prefix]s}__folly_memcpy_short_{[prefetch]s}:
.cfi_startproc
.L_GE1_LE7_{[prefetch]s}:
cmp $1, %rdx
je .L_EQ1_{[prefetch]s}
cmp $4, %rdx
jae .L_GE4_LE7_{[prefetch]s}
.L_GE2_LE3_{[prefetch]s}:
movw (%rsi), %r8w
movw -2(%rsi,%rdx), %r9w
movw %r8w, (%rdi)
movw %r9w, -2(%rdi,%rdx)
ret
.balign 2
.L_EQ1_{[prefetch]s}:
movb (%rsi), %r8b
movb %r8b, (%rdi)
ret
// Aligning the target of a jump to an even address has a measurable
// speedup in microbenchmarks.
.balign 2
.L_GE4_LE7_{[prefetch]s}:
movl (%rsi), %r8d
movl -4(%rsi,%rdx), %r9d
movl %r8d, (%rdi)
movl %r9d, -4(%rdi,%rdx)
ret
.cfi_endproc
// .size {[function_prefix]s}__folly_memcpy_short_{[prefetch]s}, .-{[function_prefix]s}__folly_memcpy_short_{[prefetch]s} not supported by windows
// memcpy is an alternative entrypoint into the function named __folly_memcpy.
// The compiler is able to call memcpy since the name is global while
// stacktraces will show __folly_memcpy since that is the name of the function.
// This is intended to aid in debugging by making it obvious which version of
// memcpy is being used.
.balign 64
.globl {[function_prefix]s}__folly_memcpy_{[prefetch]s}
// .type {[function_prefix]s}__folly_memcpy_{[prefetch]s}, @function not supported by windows
{[function_prefix]s}__folly_memcpy_{[prefetch]s}:
.cfi_startproc
mov %rdi, %rax // return: $rdi
test %rdx, %rdx
je .L_EQ0_{[prefetch]s}
{[prefetch]s} (%rdi)
{[prefetch]s} -1(%rdi,%rdx)
cmp $8, %rdx
jb .L_GE1_LE7_{[prefetch]s}
.L_GE8_{[prefetch]s}:
cmp $32, %rdx
ja .L_GE33_{[prefetch]s}
.L_GE8_LE32_{[prefetch]s}:
cmp $16, %rdx
ja .L_GE17_LE32_{[prefetch]s}
.L_GE8_LE16_{[prefetch]s}:
mov (%rsi), %r8
mov -8(%rsi,%rdx), %r9
mov %r8, (%rdi)
mov %r9, -8(%rdi,%rdx)
.L_EQ0_{[prefetch]s}:
ret
.balign 2
.L_GE17_LE32_{[prefetch]s}:
movdqu (%rsi), %xmm0
movdqu -16(%rsi,%rdx), %xmm1
movdqu %xmm0, (%rdi)
movdqu %xmm1, -16(%rdi,%rdx)
ret
.balign 2
.L_GE193_LE256_{[prefetch]s}:
vmovdqu %ymm3, 96(%rdi)
vmovdqu %ymm4, -128(%rdi,%rdx)
.L_GE129_LE192_{[prefetch]s}:
vmovdqu %ymm2, 64(%rdi)
vmovdqu %ymm5, -96(%rdi,%rdx)
.L_GE65_LE128_{[prefetch]s}:
vmovdqu %ymm1, 32(%rdi)
vmovdqu %ymm6, -64(%rdi,%rdx)
.L_GE33_LE64_{[prefetch]s}:
vmovdqu %ymm0, (%rdi)
vmovdqu %ymm7, -32(%rdi,%rdx)
vzeroupper
ret
.balign 2
.L_GE33_{[prefetch]s}:
vmovdqu (%rsi), %ymm0
vmovdqu -32(%rsi,%rdx), %ymm7
cmp $64, %rdx
jbe .L_GE33_LE64_{[prefetch]s}
{[prefetch]s} 64(%rdi)
vmovdqu 32(%rsi), %ymm1
vmovdqu -64(%rsi,%rdx), %ymm6
cmp $128, %rdx
jbe .L_GE65_LE128_{[prefetch]s}
{[prefetch]s} 128(%rdi)
vmovdqu 64(%rsi), %ymm2
vmovdqu -96(%rsi,%rdx), %ymm5
cmp $192, %rdx
jbe .L_GE129_LE192_{[prefetch]s}
{[prefetch]s} 192(%rdi)
vmovdqu 96(%rsi), %ymm3
vmovdqu -128(%rsi,%rdx), %ymm4
cmp $256, %rdx
jbe .L_GE193_LE256_{[prefetch]s}
.L_GE257_{[prefetch]s}:
{[prefetch]s} 256(%rdi)
// Check if there is an overlap. If there is an overlap then the caller
// has a bug since this is undefined behavior. However, for legacy
// reasons this behavior is expected by some callers.
//
// All copies through 256 bytes will operate as a memmove since for
// those sizes all reads are performed before any writes.
//
// This check uses the idea that there is an overlap if
// (%rdi < (%rsi + %rdx)) && (%rsi < (%rdi + %rdx)),
// or equivalently, there is no overlap if
// ((%rsi + %rdx) <= %rdi) || ((%rdi + %rdx) <= %rsi).
//
// %r9 will be used after .L_ALIGNED_DST_LOOP to calculate how many
// bytes remain to be copied.
// (%rsi + %rdx <= %rdi) => no overlap
lea (%rsi,%rdx), %r9
cmp %rdi, %r9
jbe .L_NO_OVERLAP_{[prefetch]s}
// (%rdi + %rdx <= %rsi) => no overlap
lea (%rdi,%rdx), %r8
cmp %rsi, %r8
// If no info is available in branch predictor's cache, Intel CPUs assume
// forward jumps are not taken. Use a forward jump as overlapping buffers
// are unlikely.
ja .L_OVERLAP_{[prefetch]s}
.balign 2
.L_NO_OVERLAP_{[prefetch]s}:
vmovdqu %ymm0, (%rdi)
vmovdqu %ymm1, 32(%rdi)
vmovdqu %ymm2, 64(%rdi)
vmovdqu %ymm3, 96(%rdi)
// Align %rdi to a 32 byte boundary.
// %rcx = 128 - 31 & %rdi
mov $128, %rcx
and $31, %rdi
sub %rdi, %rcx
lea (%rsi,%rcx), %rsi
lea (%rax,%rcx), %rdi
sub %rcx, %rdx
// %r8 is the end condition for the loop.
lea -128(%rsi,%rdx), %r8
// This threshold is half of L1 cache on a Skylake machine, which means that
// potentially all of L1 will be populated by this copy once it is executed
// (dst and src are cached for temporal copies).
// NON_TEMPORAL_STORE_THRESHOLD = $32768
// cmp NON_TEMPORAL_STORE_THRESHOLD, %rdx
cmp $32768, %rdx
jae .L_NON_TEMPORAL_LOOP_{[prefetch]s}
.balign 2
.L_ALIGNED_DST_LOOP_{[prefetch]s}:
{[prefetch]s} 128(%rdi)
{[prefetch]s} 192(%rdi)
vmovdqu (%rsi), %ymm0
vmovdqu 32(%rsi), %ymm1
vmovdqu 64(%rsi), %ymm2
vmovdqu 96(%rsi), %ymm3
add $128, %rsi
vmovdqa %ymm0, (%rdi)
vmovdqa %ymm1, 32(%rdi)
vmovdqa %ymm2, 64(%rdi)
vmovdqa %ymm3, 96(%rdi)
add $128, %rdi
cmp %r8, %rsi
jb .L_ALIGNED_DST_LOOP_{[prefetch]s}
.L_ALIGNED_DST_LOOP_END_{[prefetch]s}:
sub %rsi, %r9
mov %r9, %rdx
vmovdqu %ymm4, -128(%rdi,%rdx)
vmovdqu %ymm5, -96(%rdi,%rdx)
vmovdqu %ymm6, -64(%rdi,%rdx)
vmovdqu %ymm7, -32(%rdi,%rdx)
vzeroupper
ret
.balign 2
.L_NON_TEMPORAL_LOOP_{[prefetch]s}:
testb $31, %sil
jne .L_ALIGNED_DST_LOOP_{[prefetch]s}
// This is prefetching the source data unlike ALIGNED_DST_LOOP which
// prefetches the destination data. This choice is again informed by
// benchmarks. With a non-temporal store the entirety of the cache line
// is being written so the previous data can be discarded without being
// fetched.
prefetchnta 128(%rsi)
prefetchnta 196(%rsi)
vmovntdqa (%rsi), %ymm0
vmovntdqa 32(%rsi), %ymm1
vmovntdqa 64(%rsi), %ymm2
vmovntdqa 96(%rsi), %ymm3
add $128, %rsi
vmovntdq %ymm0, (%rdi)
vmovntdq %ymm1, 32(%rdi)
vmovntdq %ymm2, 64(%rdi)
vmovntdq %ymm3, 96(%rdi)
add $128, %rdi
cmp %r8, %rsi
jb .L_NON_TEMPORAL_LOOP_{[prefetch]s}
sfence
jmp .L_ALIGNED_DST_LOOP_END_{[prefetch]s}
.L_OVERLAP_{[prefetch]s}:
.balign 2
cmp %rdi, %rsi
jb .L_OVERLAP_BWD_{[prefetch]s} // %rsi < %rdi => backward-copy
je .L_RET_{[prefetch]s} // %rsi == %rdi => return, nothing to copy
// Source & destination buffers overlap. Forward copy.
vmovdqu (%rsi), %ymm8
// Align %rdi to a 32 byte boundary.
// %rcx = 32 - 31 & %rdi
mov $32, %rcx
and $31, %rdi
sub %rdi, %rcx
lea (%rsi,%rcx), %rsi
lea (%rax,%rcx), %rdi
sub %rcx, %rdx
// %r8 is the end condition for the loop.
lea -128(%rsi,%rdx), %r8
.L_OVERLAP_FWD_ALIGNED_DST_LOOP_{[prefetch]s}:
{[prefetch]s} 128(%rdi)
{[prefetch]s} 192(%rdi)
vmovdqu (%rsi), %ymm0
vmovdqu 32(%rsi), %ymm1
vmovdqu 64(%rsi), %ymm2
vmovdqu 96(%rsi), %ymm3
add $128, %rsi
vmovdqa %ymm0, (%rdi)
vmovdqa %ymm1, 32(%rdi)
vmovdqa %ymm2, 64(%rdi)
vmovdqa %ymm3, 96(%rdi)
add $128, %rdi
cmp %r8, %rsi
jb .L_OVERLAP_FWD_ALIGNED_DST_LOOP_{[prefetch]s}
sub %rsi, %r9
mov %r9, %rdx
vmovdqu %ymm4, -128(%rdi,%rdx)
vmovdqu %ymm5, -96(%rdi,%rdx)
vmovdqu %ymm6, -64(%rdi,%rdx)
vmovdqu %ymm7, -32(%rdi,%rdx)
vmovdqu %ymm8, (%rax) // %rax == the original (unaligned) %rdi
vzeroupper
.L_RET_{[prefetch]s}:
ret
.L_OVERLAP_BWD_{[prefetch]s}:
// Save last 32 bytes.
vmovdqu -32(%rsi, %rdx), %ymm8
lea -32(%rdi, %rdx), %r9
// %r8 is the end condition for the loop.
lea 128(%rsi), %r8
// Align %rdi+%rdx (destination end) to a 32 byte boundary.
// %rcx = (%rdi + %rdx - 32) & 31
mov %r9, %rcx
and $31, %rcx
// Set %rsi & %rdi to the end of the 32 byte aligned range.
sub %rcx, %rdx
add %rdx, %rsi
add %rdx, %rdi
.L_OVERLAP_BWD_ALIGNED_DST_LOOP_{[prefetch]s}:
{[prefetch]s} -128(%rdi)
{[prefetch]s} -192(%rdi)
vmovdqu -32(%rsi), %ymm4
vmovdqu -64(%rsi), %ymm5
vmovdqu -96(%rsi), %ymm6
vmovdqu -128(%rsi), %ymm7
sub $128, %rsi
vmovdqa %ymm4, -32(%rdi)
vmovdqa %ymm5, -64(%rdi)
vmovdqa %ymm6, -96(%rdi)
vmovdqa %ymm7, -128(%rdi)
sub $128, %rdi
cmp %r8, %rsi
ja .L_OVERLAP_BWD_ALIGNED_DST_LOOP_{[prefetch]s}
vmovdqu %ymm0, (%rax) // %rax == the original unaligned %rdi
vmovdqu %ymm1, 32(%rax)
vmovdqu %ymm2, 64(%rax)
vmovdqu %ymm3, 96(%rax)
vmovdqu %ymm8, (%r9)
vzeroupper
ret
.cfi_endproc
// .size {[function_prefix]s}__folly_memcpy_{[prefetch]s}, .-{[function_prefix]s}__folly_memcpy_{[prefetch]s} not supported by windows

View File

@ -1,18 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const function_prefix = @import("../assembly_util.zig").function_prefix;
comptime {
switch (arch) {
.x86_64 => {
inline for ([_][]const u8{ "prefetchw", "prefetcht0" }) |prefetch| {
asm (std.fmt.comptimePrint(@embedFile("memcpy-x86_64.S"), .{ .prefetch = prefetch, .function_prefix = function_prefix }));
}
},
else => unreachable,
}
}
pub extern fn __folly_memcpy_prefetchw(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.SysV) [*]u8;
pub extern fn __folly_memcpy_prefetcht0(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.SysV) [*]u8;

View File

@ -1 +0,0 @@
pub const memcpy = @import("musl/memcpy.zig").memcpy;

View File

@ -1,193 +0,0 @@
musl as a whole is licensed under the following standard MIT license:
----------------------------------------------------------------------
Copyright © 2005-2020 Rich Felker, et al.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
Authors/contributors include:
A. Wilcox
Ada Worcester
Alex Dowad
Alex Suykov
Alexander Monakov
Andre McCurdy
Andrew Kelley
Anthony G. Basile
Aric Belsito
Arvid Picciani
Bartosz Brachaczek
Benjamin Peterson
Bobby Bingham
Boris Brezillon
Brent Cook
Chris Spiegel
Clément Vasseur
Daniel Micay
Daniel Sabogal
Daurnimator
David Carlier
David Edelsohn
Denys Vlasenko
Dmitry Ivanov
Dmitry V. Levin
Drew DeVault
Emil Renner Berthing
Fangrui Song
Felix Fietkau
Felix Janda
Gianluca Anzolin
Hauke Mehrtens
He X
Hiltjo Posthuma
Isaac Dunham
Jaydeep Patil
Jens Gustedt
Jeremy Huntwork
Jo-Philipp Wich
Joakim Sindholt
John Spencer
Julien Ramseier
Justin Cormack
Kaarle Ritvanen
Khem Raj
Kylie McClain
Leah Neukirchen
Luca Barbato
Luka Perkov
M Farkas-Dyck (Strake)
Mahesh Bodapati
Markus Wichmann
Masanori Ogino
Michael Clark
Michael Forney
Mikhail Kremnyov
Natanael Copa
Nicholas J. Kain
orc
Pascal Cuoq
Patrick Oppenlander
Petr Hosek
Petr Skocik
Pierre Carrier
Reini Urban
Rich Felker
Richard Pennington
Ryan Fairfax
Samuel Holland
Segev Finer
Shiz
sin
Solar Designer
Stefan Kristiansson
Stefan O'Rear
Szabolcs Nagy
Timo Teräs
Trutz Behn
Valentin Ochs
Will Dietz
William Haddon
William Pitcock
Portions of this software are derived from third-party works licensed
under terms compatible with the above MIT license:
The TRE regular expression implementation (src/regex/reg* and
src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
under a 2-clause BSD license (license text in the source files). The
included version has been heavily modified by Rich Felker in 2012, in
the interests of size, simplicity, and namespace cleanliness.
Much of the math library code (src/math/* and src/complex/*) is
Copyright © 1993,2004 Sun Microsystems or
Copyright © 2003-2011 David Schultz or
Copyright © 2003-2009 Steven G. Kargl or
Copyright © 2003-2009 Bruce D. Evans or
Copyright © 2008 Stephen L. Moshier or
Copyright © 2017-2018 Arm Limited
and labelled as such in comments in the individual source files. All
have been licensed under extremely permissive terms.
The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
The Android Open Source Project and is licensed under a two-clause BSD
license. It was taken from Bionic libc, used on Android.
The AArch64 memcpy and memset code (src/string/aarch64/*) are
Copyright © 1999-2019, Arm Limited.
The implementation of DES for crypt (src/crypt/crypt_des.c) is
Copyright © 1994 David Burren. It is licensed under a BSD license.
The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
originally written by Solar Designer and placed into the public
domain. The code also comes with a fallback permissive license for use
in jurisdictions that may not recognize the public domain.
The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
Valentin Ochs and is licensed under an MIT-style license.
The x86_64 port was written by Nicholas J. Kain and is licensed under
the standard MIT terms.
The mips and microblaze ports were originally written by Richard
Pennington for use in the ellcc project. The original code was adapted
by Rich Felker for build system and code conventions during upstream
integration. It is licensed under the standard MIT terms.
The mips64 port was contributed by Imagination Technologies and is
licensed under the standard MIT terms.
The powerpc port was also originally written by Richard Pennington,
and later supplemented and integrated by John Spencer. It is licensed
under the standard MIT terms.
All other files which have no copyright comments are original works
produced specifically for use as part of this library, written either
by Rich Felker, the main author of the library, or by one or more
contibutors listed above. Details on authorship of individual files
can be found in the git version control history of the project. The
omission of copyright and license comments in each file is in the
interest of source tree size.
In addition, permission is hereby granted for all public header files
(include/* and arch/*/bits/*) and crt files intended to be linked into
applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
the copyright notice and permission notice otherwise required by the
license, and to use these files without any requirement of
attribution. These files include substantial contributions from:
Bobby Bingham
John Spencer
Nicholas J. Kain
Rich Felker
Richard Pennington
Stefan Kristiansson
Szabolcs Nagy
all of whom have explicitly granted such permission.
This file previously contained text expressing a belief that most of
the files covered by the above exception were sufficiently trivial not
to be subject to copyright, resulting in confusion over whether it
negated the permissions granted in the license. In the spirit of
permissive licensing, and of not having licensing issues being an
obstacle to adoption, that text has been removed.

View File

@ -1,2 +0,0 @@
This set of files all come from [musl libc](https://musl.libc.org/).
Roc just directly uses a few of them instead of depending on musl libc fully.

View File

@ -1,30 +0,0 @@
.global {[function_prefix]s}musl_memcpy
// Windows does not support the type directive.
// .type {[function_prefix]s}musl_memcpy,@function
{[function_prefix]s}musl_memcpy:
push %esi
push %edi
mov 12(%esp),%edi
mov 16(%esp),%esi
mov 20(%esp),%ecx
mov %edi,%eax
cmp $4,%ecx
jc 1f
test $3,%edi
jz 1f
2: movsb
dec %ecx
test $3,%edi
jnz 2b
1: mov %ecx,%edx
shr $2,%ecx
rep
movsl
and $3,%edx
jz 1f
2: movsb
dec %edx
jnz 2b
1: pop %edi
pop %esi
ret

View File

@ -1,23 +0,0 @@
.global {[function_prefix]s}musl_memcpy
// Windows does not support the type directive.
// .type {[function_prefix]s}musl_memcpy,@function
{[function_prefix]s}musl_memcpy:
mov %rdi,%rax
cmp $8,%rdx
jc 1f
test $7,%edi
jz 1f
2: movsb
dec %rdx
test $7,%edi
jnz 2b
1: mov %rdx,%rcx
shr $3,%rcx
rep
movsq
and $7,%edx
jz 1f
2: movsb
dec %edx
jnz 2b
1: ret

View File

@ -1,223 +0,0 @@
const std = @import("std");
const builtin = @import("builtin");
const arch = builtin.cpu.arch;
const function_prefix = @import("../assembly_util.zig").function_prefix;
comptime {
switch (arch) {
.x86_64 => {
asm (std.fmt.comptimePrint(@embedFile("memcpy-x86_64.S"), .{ .function_prefix = function_prefix }));
},
.x86 => {
asm (std.fmt.comptimePrint(@embedFile("memcpy-x86.S"), .{ .function_prefix = function_prefix }));
},
// TODO: add assembly implementations for other platforms.
else => {},
}
}
pub const memcpy =
switch (builtin.os.tag) {
.windows => fallback_memcpy,
else => switch (arch) {
.x86_64, .x86 => musl_memcpy,
else => fallback_memcpy,
},
};
pub extern fn musl_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8;
// Note: this is written to only support little endian targets.
// To support big endian, `<<` and `>>` wold need to be swapped.
pub fn fallback_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
var d = dest;
var s = src;
var n = len;
switch (@min(n, @intFromPtr(s) % 4)) {
1 => {
d[0] = s[0];
d += 1;
s += 1;
n -= 1;
},
2 => {
d[0] = s[0];
d[1] = s[1];
d += 2;
s += 2;
n -= 2;
},
3 => {
d[0] = s[0];
d[1] = s[1];
d[2] = s[2];
d += 3;
s += 3;
n -= 3;
},
else => {},
}
if (@intFromPtr(d) % 4 == 0) {
var d4 = @as([*]align(4) u8, @alignCast(d));
var s4 = @as([*]align(4) const u8, @alignCast(s));
while (n >= 16) : (n -= 16) {
var d_u32 = @as([*]u32, @ptrCast(d4));
var s_u32 = @as([*]const u32, @ptrCast(s4));
d_u32[0] = s_u32[0];
d_u32[1] = s_u32[1];
d_u32[2] = s_u32[2];
d_u32[3] = s_u32[3];
d4 += 16;
s4 += 16;
}
if (n & 8 != 0) {
var d_u32 = @as([*]u32, @ptrCast(d4));
var s_u32 = @as([*]const u32, @ptrCast(s4));
d_u32[0] = s_u32[0];
d_u32[1] = s_u32[1];
d4 += 8;
s4 += 8;
}
if (n & 4 != 0) {
var d_u32 = @as([*]u32, @ptrCast(d4));
var s_u32 = @as([*]const u32, @ptrCast(s4));
d_u32[0] = s_u32[0];
d4 += 4;
s4 += 4;
}
d = d4;
s = s4;
if (n & 2 != 0) {
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
}
if (n & 1 != 0) {
d[0] = s[0];
}
return dest;
}
if (n >= 32) {
switch (@intFromPtr(d) % 4) {
1 => {
var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
n -= 3;
while (n >= 17) : (n -= 16) {
var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 1)));
var x = s_u32[0];
d_u32[0] = (w >> 24) | (x << 8);
w = s_u32[1];
d_u32[1] = (x >> 24) | (w << 8);
x = s_u32[2];
d_u32[2] = (w >> 24) | (x << 8);
w = s_u32[3];
d_u32[3] = (x >> 24) | (w << 8);
d += 16;
s += 16;
}
},
2 => {
var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
n -= 2;
while (n >= 18) : (n -= 16) {
var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 2)));
var x = s_u32[0];
d_u32[0] = (w >> 16) | (x << 16);
w = s_u32[1];
d_u32[1] = (x >> 16) | (w << 16);
x = s_u32[2];
d_u32[2] = (w >> 16) | (x << 16);
w = s_u32[3];
d_u32[3] = (x >> 16) | (w << 16);
d += 16;
s += 16;
}
},
3 => {
var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
d[0] = s[0];
d += 1;
s += 1;
n -= 1;
while (n >= 19) : (n -= 16) {
var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 3)));
var x = s_u32[0];
d_u32[0] = (w >> 8) | (x << 24);
w = s_u32[1];
d_u32[1] = (x >> 8) | (w << 24);
x = s_u32[2];
d_u32[2] = (w >> 8) | (x << 24);
w = s_u32[3];
d_u32[3] = (x >> 8) | (w << 24);
d += 16;
s += 16;
}
},
else => unreachable,
}
}
if (n & 16 != 0) {
comptime var i = 0;
inline while (i < 16) : (i += 1) {
d[0] = s[0];
d += 1;
s += 1;
}
}
if (n & 8 != 0) {
comptime var i = 0;
inline while (i < 8) : (i += 1) {
d[0] = s[0];
d += 1;
s += 1;
}
}
if (n & 4 != 0) {
comptime var i = 0;
inline while (i < 4) : (i += 1) {
d[0] = s[0];
d += 1;
s += 1;
}
}
if (n & 2 != 0) {
d[0] = s[0];
d += 1;
s += 1;
d[0] = s[0];
d += 1;
s += 1;
}
if (n & 1 != 0) {
d[0] = s[0];
}
return dest;
}

View File

@ -6,11 +6,6 @@ const expect = @import("expect.zig");
const panic_utils = @import("panic.zig");
const dbg_utils = @import("dbg.zig");
comptime {
_ = @import("compiler_rt.zig");
_ = @import("libc.zig");
}
const ROC_BUILTINS = "roc_builtins";
const NUM = "num";
const STR = "str";
@ -18,6 +13,13 @@ const STR = "str";
// Dec Module
const dec = @import("dec.zig");
var FLTUSED: i32 = 0;
comptime {
if (builtin.os.tag == .windows) {
@export(FLTUSED, .{ .name = "_fltused", .linkage = .Weak });
}
}
comptime {
exportDecFn(dec.absC, "abs");
exportDecFn(dec.acosC, "acos");

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,8 @@ interface Set
exposes [
Set,
empty,
withCapacity,
reserve,
single,
walk,
walkUntil,
@ -45,7 +47,7 @@ Set k := Dict.Dict k {} where k implements Hash & Eq
},
]
isEq : Set k, Set k -> Bool where k implements Hash & Eq
isEq : Set k, Set k -> Bool
isEq = \xs, ys ->
if len xs != len ys then
Bool.false
@ -56,7 +58,7 @@ isEq = \xs, ys ->
else
Break Bool.false
hashSet : hasher, Set k -> hasher where k implements Hash & Eq, hasher implements Hasher
hashSet : hasher, Set k -> hasher where hasher implements Hasher
hashSet = \hasher, @Set inner -> Hash.hash hasher inner
toInspectorSet : Set k -> Inspector f where k implements Inspect & Hash & Eq, f implements InspectFormatter
@ -74,13 +76,18 @@ toInspectorSet = \set ->
empty : {} -> Set *
empty = \{} -> @Set (Dict.empty {})
## Return a dictionary with space allocated for a number of entries. This
## Return a set with space allocated for a number of entries. This
## may provide a performance optimization if you know how many entries will be
## inserted.
withCapacity : Nat -> Set *
withCapacity = \cap ->
@Set (Dict.withCapacity cap)
# Enlarge the set for at least capacity additional elements
reserve : Set k, Nat -> Set k
reserve = \@Set dict, requested ->
@Set (Dict.reserve dict requested)
## Creates a new `Set` with a single value.
## ```
## singleItemSet = Set.single "Apple"
@ -88,7 +95,7 @@ withCapacity = \cap ->
##
## expect countValues == 1
## ```
single : k -> Set k where k implements Hash & Eq
single : k -> Set k
single = \key ->
Dict.single key {} |> @Set
@ -104,7 +111,7 @@ single = \key ->
##
## expect countValues == 3
## ```
insert : Set k, k -> Set k where k implements Hash & Eq
insert : Set k, k -> Set k
insert = \@Set dict, key ->
Dict.insert dict key {} |> @Set
@ -189,7 +196,7 @@ expect
## expect has10 == Bool.false
## expect has20 == Bool.true
## ```
remove : Set k, k -> Set k where k implements Hash & Eq
remove : Set k, k -> Set k
remove = \@Set dict, key ->
Dict.remove dict key |> @Set
@ -208,7 +215,7 @@ remove = \@Set dict, key ->
## expect hasApple == Bool.true
## expect hasBanana == Bool.false
## ```
contains : Set k, k -> Bool where k implements Hash & Eq
contains : Set k, k -> Bool
contains = \@Set dict, key ->
Dict.contains dict key
@ -221,7 +228,7 @@ contains = \@Set dict, key ->
##
## expect Set.toList numbers == values
## ```
toList : Set k -> List k where k implements Hash & Eq
toList : Set k -> List k
toList = \@Set dict ->
Dict.keys dict
@ -235,7 +242,7 @@ toList = \@Set dict ->
##
## expect Set.fromList [Pear, Apple, Banana] == values
## ```
fromList : List k -> Set k where k implements Hash & Eq
fromList : List k -> Set k
fromList = \list ->
list
|> List.map \k -> (k, {})
@ -252,7 +259,7 @@ fromList = \list ->
##
## expect Set.union set1 set2 == Set.fromList [Left, Right]
## ```
union : Set k, Set k -> Set k where k implements Hash & Eq
union : Set k, Set k -> Set k
union = \@Set dict1, @Set dict2 ->
Dict.insertAll dict1 dict2 |> @Set
@ -265,7 +272,7 @@ union = \@Set dict1, @Set dict2 ->
##
## expect Set.intersection set1 set2 == Set.single Left
## ```
intersection : Set k, Set k -> Set k where k implements Hash & Eq
intersection : Set k, Set k -> Set k
intersection = \@Set dict1, @Set dict2 ->
Dict.keepShared dict1 dict2 |> @Set
@ -279,7 +286,7 @@ intersection = \@Set dict1, @Set dict2 ->
##
## expect Set.difference first second == Set.fromList [Up, Down]
## ```
difference : Set k, Set k -> Set k where k implements Hash & Eq
difference : Set k, Set k -> Set k
difference = \@Set dict1, @Set dict2 ->
Dict.removeAll dict1 dict2 |> @Set
@ -302,14 +309,14 @@ difference = \@Set dict1, @Set dict2 ->
##
## expect result == 2
## ```
walk : Set k, state, (state, k -> state) -> state where k implements Hash & Eq
walk : Set k, state, (state, k -> state) -> state
walk = \@Set dict, state, step ->
Dict.walk dict state (\s, k, _ -> step s k)
## Convert each value in the set to something new, by calling a conversion
## function on each of them which receives the old value. Then return a
## new set containing the converted values.
map : Set a, (a -> b) -> Set b where a implements Hash & Eq, b implements Hash & Eq
map : Set a, (a -> b) -> Set b
map = \set, transform ->
init = withCapacity (capacity set)
@ -321,7 +328,7 @@ map = \set, transform ->
## (using [Set.union]) into one set.
##
## You may know a similar function named `concatMap` in other languages.
joinMap : Set a, (a -> Set b) -> Set b where a implements Hash & Eq, b implements Hash & Eq
joinMap : Set a, (a -> Set b) -> Set b
joinMap = \set, transform ->
init = withCapacity (capacity set) # Might be a pessimization
@ -343,7 +350,7 @@ joinMap = \set, transform ->
##
## expect result == FoundTheAnswer
## ```
walkUntil : Set k, state, (state, k -> [Continue state, Break state]) -> state where k implements Hash & Eq
walkUntil : Set k, state, (state, k -> [Continue state, Break state]) -> state
walkUntil = \@Set dict, state, step ->
Dict.walkUntil dict state (\s, k, _ -> step s k)

View File

@ -1058,6 +1058,50 @@ pub fn module_from_builtins<'ctx>(
let module = Module::parse_bitcode_from_buffer(&memory_buffer, ctx)
.unwrap_or_else(|err| panic!("Unable to import builtins bitcode. LLVM error: {err:?}"));
// In my testing, this adds about 20ms extra to compilation.
// Long term it would be best if we could do this on the zig side.
// This change enables us to dce all the parts of compiler-rt we don't use.
// That said, it would be better to dce them before roc app compiltation time.
// Anything not depended on by a `roc_builtin.` function could alread by DCE'd theoretically.
// That said, this workaround is good enough and fixes compilations times.
// Also, must_keep is the functions we depend on that would normally be provide by libc.
// They are magically linked to by llvm builtins, so we must specify that they can't be DCE'd.
let must_keep = [
"_fltused",
"floorf",
"memcpy",
"memset",
// Roc special functions
"__roc_force_longjmp",
"__roc_force_setjmp",
"set_shared_buffer",
];
for func in module.get_functions() {
let has_definition = func.count_basic_blocks() > 0;
let name = func.get_name().to_string_lossy();
if has_definition
&& !name.starts_with("roc_builtins.")
&& !must_keep.contains(&name.as_ref())
{
func.set_linkage(Linkage::Private);
}
}
// Note, running DCE here is faster then waiting until full app DCE.
let mpm = PassManager::create(());
mpm.add_global_dce_pass();
mpm.run_on(&module);
// Now that the unused compiler-rt functions have been removed,
// mark that the builtin functions are allowed to be DCE'd if they aren't used.
for func in module.get_functions() {
let name = func.get_name().to_string_lossy();
if name.starts_with("roc_builtins.") {
func.set_linkage(Linkage::Private);
}
}
// Add LLVM intrinsics.
add_intrinsics(ctx, &module);

View File

@ -183,7 +183,7 @@ impl LowLevelWrapperType {
/// We use a rust macro to ensure that every LowLevel gets handled
macro_rules! map_symbol_to_lowlevel {
($($lowlevel:ident <= $symbol:ident),* $(,)?) => {
($($lowlevel:ident <= $($symbol:ident),+);* $(;)?) => {
fn for_symbol_help(symbol: Symbol) -> LowLevelWrapperType {
use $crate::low_level::LowLevelWrapperType::*;
@ -191,14 +191,14 @@ macro_rules! map_symbol_to_lowlevel {
// expands to a big (but non-exhaustive) match on symbols and maps them to a lowlevel
match symbol {
$(
Symbol::$symbol => CanBeReplacedBy(LowLevel::$lowlevel),
$(Symbol::$symbol)|+ => CanBeReplacedBy(LowLevel::$lowlevel),
)*
_ => NotALowLevelWrapper,
}
}
fn _enforce_exhaustiveness(lowlevel: LowLevel) -> Symbol {
fn _enforce_exhaustiveness(lowlevel: LowLevel) -> &'static [Symbol] {
// when adding a new lowlevel, this match will stop being exhaustive, and give a
// compiler error. Most likely, you are adding a new lowlevel that maps directly to a
// symbol. For instance, you want to have `List.foo` to stand for the `ListFoo`
@ -209,7 +209,7 @@ macro_rules! map_symbol_to_lowlevel {
// that it isn't and just see if that works.
match lowlevel {
$(
LowLevel::$lowlevel => Symbol::$symbol,
LowLevel::$lowlevel => &[$(Symbol::$symbol),+],
)*
// these are higher-order lowlevels. these need the surrounding
@ -259,107 +259,107 @@ macro_rules! map_symbol_to_lowlevel {
// pattern of a symbol mapping directly to a lowlevel. In other words, most lowlevels (left) are generated
// by only one specific symbol (right)
map_symbol_to_lowlevel! {
StrConcat <= STR_CONCAT,
StrJoinWith <= STR_JOIN_WITH,
StrIsEmpty <= STR_IS_EMPTY,
StrStartsWith <= STR_STARTS_WITH,
StrStartsWithScalar <= STR_STARTS_WITH_SCALAR,
StrEndsWith <= STR_ENDS_WITH,
StrSplit <= STR_SPLIT,
StrCountGraphemes <= STR_COUNT_GRAPHEMES,
StrCountUtf8Bytes <= STR_COUNT_UTF8_BYTES,
StrFromUtf8Range <= STR_FROM_UTF8_RANGE_LOWLEVEL,
StrToUtf8 <= STR_TO_UTF8,
StrRepeat <= STR_REPEAT,
StrTrim <= STR_TRIM,
StrTrimStart <= STR_TRIM_START,
StrTrimEnd <= STR_TRIM_END,
StrToScalars <= STR_TO_SCALARS,
StrGetUnsafe <= STR_GET_UNSAFE,
StrSubstringUnsafe <= STR_SUBSTRING_UNSAFE,
StrReserve <= STR_RESERVE,
StrAppendScalar <= STR_APPEND_SCALAR_UNSAFE,
StrGetScalarUnsafe <= STR_GET_SCALAR_UNSAFE,
StrToNum <= STR_TO_NUM,
StrGetCapacity <= STR_CAPACITY,
StrWithCapacity <= STR_WITH_CAPACITY,
StrGraphemes <= STR_GRAPHEMES,
StrReleaseExcessCapacity <= STR_RELEASE_EXCESS_CAPACITY,
ListLen <= LIST_LEN,
ListGetCapacity <= LIST_CAPACITY,
ListWithCapacity <= LIST_WITH_CAPACITY,
ListReserve <= LIST_RESERVE,
ListReleaseExcessCapacity <= LIST_RELEASE_EXCESS_CAPACITY,
ListIsUnique <= LIST_IS_UNIQUE,
ListAppendUnsafe <= LIST_APPEND_UNSAFE,
ListPrepend <= LIST_PREPEND,
ListGetUnsafe <= LIST_GET_UNSAFE,
ListReplaceUnsafe <= LIST_REPLACE_UNSAFE,
ListConcat <= LIST_CONCAT,
ListSublist <= LIST_SUBLIST_LOWLEVEL,
ListDropAt <= LIST_DROP_AT,
ListSwap <= LIST_SWAP,
NumAdd <= NUM_ADD,
NumAddWrap <= NUM_ADD_WRAP,
NumAddChecked <= NUM_ADD_CHECKED_LOWLEVEL,
NumAddSaturated <= NUM_ADD_SATURATED,
NumSub <= NUM_SUB,
NumSubWrap <= NUM_SUB_WRAP,
NumSubChecked <= NUM_SUB_CHECKED_LOWLEVEL,
NumSubSaturated <= NUM_SUB_SATURATED,
NumMul <= NUM_MUL,
NumMulWrap <= NUM_MUL_WRAP,
NumMulSaturated <= NUM_MUL_SATURATED,
NumMulChecked <= NUM_MUL_CHECKED_LOWLEVEL,
NumGt <= NUM_GT,
NumGte <= NUM_GTE,
NumLt <= NUM_LT,
NumLte <= NUM_LTE,
NumCompare <= NUM_COMPARE,
NumDivFrac <= NUM_DIV_FRAC,
NumDivCeilUnchecked <= NUM_DIV_CEIL,
NumDivTruncUnchecked <= NUM_DIV_TRUNC,
NumRemUnchecked <= NUM_REM,
NumIsMultipleOf <= NUM_IS_MULTIPLE_OF,
NumAbs <= NUM_ABS,
NumNeg <= NUM_NEG,
NumSin <= NUM_SIN,
NumCos <= NUM_COS,
NumTan <= NUM_TAN,
NumSqrtUnchecked <= NUM_SQRT,
NumLogUnchecked <= NUM_LOG,
NumRound <= NUM_ROUND,
NumToFrac <= NUM_TO_FRAC,
NumIsNan <= NUM_IS_NAN,
NumIsInfinite <= NUM_IS_INFINITE,
NumIsFinite <= NUM_IS_FINITE,
NumPow <= NUM_POW,
NumCeiling <= NUM_CEILING,
NumPowInt <= NUM_POW_INT,
NumFloor <= NUM_FLOOR,
NumAtan <= NUM_ATAN,
NumAcos <= NUM_ACOS,
NumAsin <= NUM_ASIN,
NumBytesToU16 <= NUM_BYTES_TO_U16_LOWLEVEL,
NumBytesToU32 <= NUM_BYTES_TO_U32_LOWLEVEL,
NumBytesToU64 <= NUM_BYTES_TO_U64_LOWLEVEL,
NumBytesToU128 <= NUM_BYTES_TO_U128_LOWLEVEL,
NumBitwiseAnd <= NUM_BITWISE_AND,
NumBitwiseXor <= NUM_BITWISE_XOR,
NumBitwiseOr <= NUM_BITWISE_OR,
NumShiftLeftBy <= NUM_SHIFT_LEFT,
NumShiftRightBy <= NUM_SHIFT_RIGHT,
NumShiftRightZfBy <= NUM_SHIFT_RIGHT_ZERO_FILL,
NumToStr <= NUM_TO_STR,
NumCountLeadingZeroBits <= NUM_COUNT_LEADING_ZERO_BITS,
NumCountTrailingZeroBits <= NUM_COUNT_TRAILING_ZERO_BITS,
NumCountOneBits <= NUM_COUNT_ONE_BITS,
I128OfDec <= I128_OF_DEC,
Eq <= BOOL_STRUCTURAL_EQ,
NotEq <= BOOL_STRUCTURAL_NOT_EQ,
And <= BOOL_AND,
Or <= BOOL_OR,
Not <= BOOL_NOT,
Unreachable <= LIST_UNREACHABLE,
DictPseudoSeed <= DICT_PSEUDO_SEED,
StrConcat <= STR_CONCAT;
StrJoinWith <= STR_JOIN_WITH;
StrIsEmpty <= STR_IS_EMPTY;
StrStartsWith <= STR_STARTS_WITH;
StrStartsWithScalar <= STR_STARTS_WITH_SCALAR;
StrEndsWith <= STR_ENDS_WITH;
StrSplit <= STR_SPLIT;
StrCountGraphemes <= STR_COUNT_GRAPHEMES;
StrCountUtf8Bytes <= STR_COUNT_UTF8_BYTES;
StrFromUtf8Range <= STR_FROM_UTF8_RANGE_LOWLEVEL;
StrToUtf8 <= STR_TO_UTF8;
StrRepeat <= STR_REPEAT;
StrTrim <= STR_TRIM;
StrTrimStart <= STR_TRIM_START;
StrTrimEnd <= STR_TRIM_END;
StrToScalars <= STR_TO_SCALARS;
StrGetUnsafe <= STR_GET_UNSAFE;
StrSubstringUnsafe <= STR_SUBSTRING_UNSAFE;
StrReserve <= STR_RESERVE;
StrAppendScalar <= STR_APPEND_SCALAR_UNSAFE;
StrGetScalarUnsafe <= STR_GET_SCALAR_UNSAFE;
StrToNum <= STR_TO_NUM;
StrGetCapacity <= STR_CAPACITY;
StrWithCapacity <= STR_WITH_CAPACITY;
StrGraphemes <= STR_GRAPHEMES;
StrReleaseExcessCapacity <= STR_RELEASE_EXCESS_CAPACITY;
ListLen <= LIST_LEN;
ListGetCapacity <= LIST_CAPACITY;
ListWithCapacity <= LIST_WITH_CAPACITY;
ListReserve <= LIST_RESERVE;
ListReleaseExcessCapacity <= LIST_RELEASE_EXCESS_CAPACITY;
ListIsUnique <= LIST_IS_UNIQUE;
ListAppendUnsafe <= LIST_APPEND_UNSAFE;
ListPrepend <= LIST_PREPEND;
ListGetUnsafe <= LIST_GET_UNSAFE, DICT_LIST_GET_UNSAFE;
ListReplaceUnsafe <= LIST_REPLACE_UNSAFE;
ListConcat <= LIST_CONCAT;
ListSublist <= LIST_SUBLIST_LOWLEVEL;
ListDropAt <= LIST_DROP_AT;
ListSwap <= LIST_SWAP;
NumAdd <= NUM_ADD;
NumAddWrap <= NUM_ADD_WRAP;
NumAddChecked <= NUM_ADD_CHECKED_LOWLEVEL;
NumAddSaturated <= NUM_ADD_SATURATED;
NumSub <= NUM_SUB;
NumSubWrap <= NUM_SUB_WRAP;
NumSubChecked <= NUM_SUB_CHECKED_LOWLEVEL;
NumSubSaturated <= NUM_SUB_SATURATED;
NumMul <= NUM_MUL;
NumMulWrap <= NUM_MUL_WRAP;
NumMulSaturated <= NUM_MUL_SATURATED;
NumMulChecked <= NUM_MUL_CHECKED_LOWLEVEL;
NumGt <= NUM_GT;
NumGte <= NUM_GTE;
NumLt <= NUM_LT;
NumLte <= NUM_LTE;
NumCompare <= NUM_COMPARE;
NumDivFrac <= NUM_DIV_FRAC;
NumDivCeilUnchecked <= NUM_DIV_CEIL;
NumDivTruncUnchecked <= NUM_DIV_TRUNC;
NumRemUnchecked <= NUM_REM;
NumIsMultipleOf <= NUM_IS_MULTIPLE_OF;
NumAbs <= NUM_ABS;
NumNeg <= NUM_NEG;
NumSin <= NUM_SIN;
NumCos <= NUM_COS;
NumTan <= NUM_TAN;
NumSqrtUnchecked <= NUM_SQRT;
NumLogUnchecked <= NUM_LOG;
NumRound <= NUM_ROUND;
NumToFrac <= NUM_TO_FRAC;
NumIsNan <= NUM_IS_NAN;
NumIsInfinite <= NUM_IS_INFINITE;
NumIsFinite <= NUM_IS_FINITE;
NumPow <= NUM_POW;
NumCeiling <= NUM_CEILING;
NumPowInt <= NUM_POW_INT;
NumFloor <= NUM_FLOOR;
NumAtan <= NUM_ATAN;
NumAcos <= NUM_ACOS;
NumAsin <= NUM_ASIN;
NumBytesToU16 <= NUM_BYTES_TO_U16_LOWLEVEL;
NumBytesToU32 <= NUM_BYTES_TO_U32_LOWLEVEL;
NumBytesToU64 <= NUM_BYTES_TO_U64_LOWLEVEL;
NumBytesToU128 <= NUM_BYTES_TO_U128_LOWLEVEL;
NumBitwiseAnd <= NUM_BITWISE_AND;
NumBitwiseXor <= NUM_BITWISE_XOR;
NumBitwiseOr <= NUM_BITWISE_OR;
NumShiftLeftBy <= NUM_SHIFT_LEFT;
NumShiftRightBy <= NUM_SHIFT_RIGHT;
NumShiftRightZfBy <= NUM_SHIFT_RIGHT_ZERO_FILL;
NumToStr <= NUM_TO_STR;
NumCountLeadingZeroBits <= NUM_COUNT_LEADING_ZERO_BITS;
NumCountTrailingZeroBits <= NUM_COUNT_TRAILING_ZERO_BITS;
NumCountOneBits <= NUM_COUNT_ONE_BITS;
I128OfDec <= I128_OF_DEC;
Eq <= BOOL_STRUCTURAL_EQ;
NotEq <= BOOL_STRUCTURAL_NOT_EQ;
And <= BOOL_AND;
Or <= BOOL_OR;
Not <= BOOL_NOT;
Unreachable <= LIST_UNREACHABLE;
DictPseudoSeed <= DICT_PSEUDO_SEED;
}

View File

@ -1486,6 +1486,7 @@ define_builtins! {
26 DICT_JOINMAP: "joinMap"
27 DICT_KEEP_IF: "keepIf"
28 DICT_DROP_IF: "dropIf"
29 DICT_RESERVE: "reserve"
}
9 SET: "Set" => {
0 SET_SET: "Set" exposed_type=true // the Set.Set type alias
@ -1510,6 +1511,8 @@ define_builtins! {
19 SET_JOIN_MAP: "joinMap"
20 SET_KEEP_IF: "keepIf"
21 SET_DROP_IF: "dropIf"
22 SET_WITH_CAPACITY: "withCapacity"
23 SET_RESERVE: "reserve"
}
10 BOX: "Box" => {
0 BOX_BOX_TYPE: "Box" exposed_apply_type=true // the Box.Box opaque type

View File

@ -1813,6 +1813,7 @@ fn ceiling() {
#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
fn floor() {
assert_evals_to!("Num.floor 1.9f64", 1, i64);
assert_evals_to!("Num.floor -1.9f64", -2, i64);
}
#[test]

View File

@ -1,46 +1,40 @@
procedure Dict.1 (Dict.596):
let Dict.606 : List {[], []} = Array [];
let Dict.35 : List U64 = CallByName Dict.35;
let Dict.34 : List I8 = CallByName Dict.34;
let Dict.605 : {List {[], []}, List U64, List I8} = Struct {Dict.606, Dict.35, Dict.34};
ret Dict.605;
procedure Dict.1 (Dict.679):
let Dict.688 : List {U32, U32} = Array [];
let Dict.689 : List {[], []} = Array [];
let Dict.690 : U64 = 0i64;
let Dict.41 : Float32 = CallByName Dict.41;
let Dict.42 : U8 = CallByName Dict.42;
let Dict.687 : {List {U32, U32}, List {[], []}, U64, Float32, U8} = Struct {Dict.688, Dict.689, Dict.690, Dict.41, Dict.42};
ret Dict.687;
procedure Dict.34 ():
let Dict.608 : I8 = CallByName Dict.46;
let Dict.609 : I8 = CallByName Dict.46;
let Dict.610 : I8 = CallByName Dict.46;
let Dict.611 : I8 = CallByName Dict.46;
let Dict.612 : I8 = CallByName Dict.46;
let Dict.613 : I8 = CallByName Dict.46;
let Dict.614 : I8 = CallByName Dict.46;
let Dict.615 : I8 = CallByName Dict.46;
let Dict.607 : List I8 = Array [Dict.608, Dict.609, Dict.610, Dict.611, Dict.612, Dict.613, Dict.614, Dict.615];
ret Dict.607;
procedure Dict.35 ():
let Dict.617 : List U64 = Array [0i64, 0i64, 0i64, 0i64, 0i64, 0i64, 0i64, 0i64];
ret Dict.617;
procedure Dict.4 (Dict.603):
let Dict.114 : List {[], []} = StructAtIndex 0 Dict.603;
let #Derived_gen.1 : List U64 = StructAtIndex 1 Dict.603;
dec #Derived_gen.1;
let #Derived_gen.0 : List I8 = StructAtIndex 2 Dict.603;
procedure Dict.4 (Dict.685):
let Dict.138 : List {[], []} = StructAtIndex 1 Dict.685;
let #Derived_gen.0 : List {U32, U32} = StructAtIndex 0 Dict.685;
dec #Derived_gen.0;
let Dict.604 : U64 = CallByName List.6 Dict.114;
dec Dict.114;
ret Dict.604;
let Dict.686 : U64 = CallByName List.6 Dict.138;
dec Dict.138;
ret Dict.686;
procedure Dict.46 ():
let Dict.616 : I8 = -128i64;
ret Dict.616;
procedure Dict.41 ():
let Dict.694 : Float32 = 0.8f64;
ret Dict.694;
procedure Dict.42 ():
let Dict.692 : U8 = 64i64;
let Dict.693 : U8 = 3i64;
let Dict.691 : U8 = CallByName Num.20 Dict.692 Dict.693;
ret Dict.691;
procedure List.6 (#Attr.2):
let List.553 : U64 = lowlevel ListLen #Attr.2;
ret List.553;
procedure Num.20 (#Attr.2, #Attr.3):
let Num.291 : U8 = lowlevel NumSub #Attr.2 #Attr.3;
ret Num.291;
procedure Test.0 ():
let Test.3 : {} = Struct {};
let Test.2 : {List {[], []}, List U64, List I8} = CallByName Dict.1 Test.3;
let Test.2 : {List {U32, U32}, List {[], []}, U64, Float32, U8} = CallByName Dict.1 Test.3;
let Test.1 : U64 = CallByName Dict.4 Test.2;
ret Test.1;

File diff suppressed because it is too large Load Diff

View File

@ -8,5 +8,5 @@ main =
s2 = Set.empty {}
Bool.isEq s1 s1 && Bool.isEq s2 s2
# ^^^^^^^^^ Set#Bool.isEq(22): Set Str, Set Str -[[Set.isEq(22)]]-> Bool
# ^^^^^^^^^ Set#Bool.isEq(22): Set U8, Set U8 -[[Set.isEq(22)]]-> Bool
# ^^^^^^^^^ Set#Bool.isEq(24): Set Str, Set Str -[[Set.isEq(24)]]-> Bool
# ^^^^^^^^^ Set#Bool.isEq(24): Set U8, Set U8 -[[Set.isEq(24)]]-> Bool

View File

@ -3,22 +3,22 @@
app "test" provides [main] to "./platform"
f = \{} ->
#^{-1} <2826><117>{} -<120>[[f(1)]]-> <116>[Ok <2834>{}]<80>*
#^{-1} <2918><117>{} -<120>[[f(1)]]-> <116>[Ok <2926>{}]<80>*
when g {} is
# ^ <2816><2834>{} -<2824>[[g(2)]]-> <72>[Ok <2834>{}]<102>*
# ^ <2908><2926>{} -<2916>[[g(2)]]-> <72>[Ok <2926>{}]<102>*
_ -> Ok {}
g = \{} ->
#^{-1} <2816><2834>{} -<2824>[[g(2)]]-> <72>[Ok <2834>{}]<102>*
#^{-1} <2908><2926>{} -<2916>[[g(2)]]-> <72>[Ok <2926>{}]<102>*
when h {} is
# ^ <2821><2834>{} -<2829>[[h(3)]]-> <94>[Ok <2834>{}]<124>*
# ^ <2913><2926>{} -<2921>[[h(3)]]-> <94>[Ok <2926>{}]<124>*
_ -> Ok {}
h = \{} ->
#^{-1} <2821><2834>{} -<2829>[[h(3)]]-> <94>[Ok <2834>{}]<124>*
#^{-1} <2913><2926>{} -<2921>[[h(3)]]-> <94>[Ok <2926>{}]<124>*
when f {} is
# ^ <2826><117>{} -<120>[[f(1)]]-> <116>[Ok <2834>{}]<80>*
# ^ <2918><117>{} -<120>[[f(1)]]-> <116>[Ok <2926>{}]<80>*
_ -> Ok {}
main = f {}
# ^ <2836><133>{} -<136>[[f(1)]]-> <138>[Ok <2834>{}]<2835>w_a
# ^ <2928><133>{} -<136>[[f(1)]]-> <138>[Ok <2926>{}]<2927>w_a

View File

@ -435,17 +435,87 @@ pub(crate) fn surgery_pe(executable_path: &Path, metadata_path: &Path, roc_app_b
);
} else {
let is_ingested_compiler_rt = [
"__muloti4",
"__addtf3",
"__ceilx",
"__cmpdf2",
"__cmphf2",
"__cmpsf2",
"__cmptf2",
"__cmpxf2",
"__cosx",
"__divsf3",
"__divtf3",
"__divti3",
"__udivti3",
"__modti3",
"__umodti3",
"__exp2x",
"__expx",
"__extendhfsf2",
"__fabsx",
"__fixdfti",
"__fixsfti",
"__fixunsdfti",
"__fixunssfti",
"__floorx",
"__fmax",
"__fmaxx",
"__fminx",
"__fmodx",
"__gedf2",
"__gehf2",
"__gesf2",
"__getf2",
"__gexf2",
"__log10x",
"__log2x",
"__logx",
"__lshrti3",
"memcpy_decision",
"__modti3",
"__muloti4",
"__multf3",
"__roundx",
"__sincosx",
"__sinx",
"__sqrtx",
"__tanx",
"__truncsfhf2",
"__truncx",
"__udivmoddi4",
"__udivti3",
"__umodti3",
"ceilq",
"cos",
"cosf",
"cosq",
"exp",
"exp2",
"exp2q",
"expf",
"expq",
"floor",
"floorf",
"floorq",
"fmaq",
"fmaxf",
"fmaxl",
"fmodf",
"log10",
"log10q",
"log2",
"log2q",
"logq",
"memcpy",
"roundq",
"sin",
"sincos",
"sincosf",
"sincosq",
"sinf",
"sinq",
"sqrt",
"sqrtf",
"sqrtq",
"tan",
"tanf",
"tanq",
]
.contains(&name.as_str());
if *address == 0 && !name.starts_with("roc") && !is_ingested_compiler_rt {