Merge pull request #6216 from roc-lang/dict-ankerl-unordered-dense

Swap Dict implementation to ankerl dense unordered
2024-09-22 08:17:40 +03:00 · 2023-12-09 22:17:32 -08:00 · 2023-12-09 22:17:32 -08:00 · f6bff3a86e
commit f6bff3a86e
parent eadd0e82ce 1ad9933b38
28 changed files with 1675 additions and 3110 deletions
--- a/31
+++ b/31
@ -277,4 +277,33 @@ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+===========================================================
+
+* ankerl::unordered_dense - https://github.com/martinus/unordered_dense
+
+A rather direct port of the source into Roc is currently the implementation for our Dict type.
+Source code is in crates/compiler/builtins/roc/Dict.roc
+
+MIT License
+
+Copyright (c) 2022 Martin Leitner-Ankerl
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/crates/compiler/builtins/bitcode/build.zig
+++ b/crates/compiler/builtins/bitcode/build.zig
@ -59,6 +59,8 @@ fn generateLlvmIrFile(
    const obj = b.addObject(.{ .name = object_name, .root_source_file = main_path, .optimize = mode, .target = target, .use_llvm = true });
    obj.strip = true;
    obj.disable_stack_probing = true;
+    if (target.cpu_arch != .wasm32)
+        obj.bundle_compiler_rt = true;

    // Generating the bin seems required to get zig to generate the llvm ir.
    _ = obj.getEmittedBin();
@ -91,6 +93,8 @@ fn generateObjectFile(
    obj.link_function_sections = true;
    obj.force_pic = true;
    obj.disable_stack_probing = true;
+    if (target.cpu_arch != .wasm32)
+        obj.bundle_compiler_rt = true;

    const obj_file = obj.getEmittedBin();

@ -112,7 +116,7 @@ fn makeLinux32Target() CrossTarget {

    target.cpu_arch = std.Target.Cpu.Arch.x86;
    target.os_tag = std.Target.Os.Tag.linux;
-    target.abi = std.Target.Abi.musl;
+    target.abi = std.Target.Abi.none;

    return target;
 }
@ -122,7 +126,7 @@ fn makeLinuxAarch64Target() CrossTarget {

    target.cpu_arch = std.Target.Cpu.Arch.aarch64;
    target.os_tag = std.Target.Os.Tag.linux;
-    target.abi = std.Target.Abi.musl;
+    target.abi = std.Target.Abi.none;

    return target;
 }
@ -132,7 +136,7 @@ fn makeLinuxX64Target() CrossTarget {

    target.cpu_arch = std.Target.Cpu.Arch.x86_64;
    target.os_tag = std.Target.Os.Tag.linux;
-    target.abi = std.Target.Abi.musl;
+    target.abi = std.Target.Abi.none;

    return target;
 }
@ -142,7 +146,7 @@ fn makeWindows64Target() CrossTarget {

    target.cpu_arch = std.Target.Cpu.Arch.x86_64;
    target.os_tag = std.Target.Os.Tag.windows;
-    target.abi = std.Target.Abi.gnu;
+    target.abi = std.Target.Abi.none;

    return target;
 }
--- a/crates/compiler/builtins/bitcode/src/compiler_rt.zig
+++ b/crates/compiler/builtins/bitcode/src/compiler_rt.zig
@ -1,478 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const math = std.math;
-
-// Eventually, we need to statically ingest compiler-rt and get it working with the surgical linker, then these should not be needed anymore.
-// Until then, we are manually ingesting used parts of compiler-rt here.
-//
-// Taken from
-// https://github.com/ziglang/zig/tree/4976b58ab16069f8d3267b69ed030f29685c1abe/lib/compiler_rt/
-// Thank you Zig Contributors!
-
-// Libcalls that involve u128 on Windows x86-64 are expected by LLVM to use the
-// calling convention of @Vector(2, u64), rather than what's standard.
-pub const want_windows_v2u64_abi = builtin.os.tag == .windows and builtin.cpu.arch == .x86_64 and @import("builtin").object_format != .c;
-
-const v2u64 = @Vector(2, u64);
-
-// Export it as weak incase it is already linked in by something else.
-comptime {
-    if (!want_windows_v2u64_abi) {
-        @export(__muloti4, .{ .name = "__muloti4", .linkage = .Weak });
-        @export(__lshrti3, .{ .name = "__lshrti3", .linkage = .Weak });
-        @export(__divti3, .{ .name = "__divti3", .linkage = .Weak });
-        @export(__modti3, .{ .name = "__modti3", .linkage = .Weak });
-        @export(__umodti3, .{ .name = "__umodti3", .linkage = .Weak });
-        @export(__udivti3, .{ .name = "__udivti3", .linkage = .Weak });
-        @export(__fixdfti, .{ .name = "__fixdfti", .linkage = .Weak });
-        @export(__fixsfti, .{ .name = "__fixsfti", .linkage = .Weak });
-        @export(__fixunsdfti, .{ .name = "__fixunsdfti", .linkage = .Weak });
-        @export(__fixunssfti, .{ .name = "__fixunssfti", .linkage = .Weak });
-    }
-}
-
-pub fn __muloti4(a: i128, b: i128, overflow: *c_int) callconv(.C) i128 {
-    if (2 * @bitSizeOf(i128) <= @bitSizeOf(usize)) {
-        return muloXi4_genericFast(i128, a, b, overflow);
-    } else {
-        return muloXi4_genericSmall(i128, a, b, overflow);
-    }
-}
-
-pub fn __divti3(a: i128, b: i128) callconv(.C) i128 {
-    return div(a, b);
-}
-
-fn __divti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(div(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
-}
-
-inline fn div(a: i128, b: i128) i128 {
-    const s_a = a >> (128 - 1);
-    const s_b = b >> (128 - 1);
-
-    const an = (a ^ s_a) -% s_a;
-    const bn = (b ^ s_b) -% s_b;
-
-    const r = udivmod(u128, @as(u128, @bitCast(an)), @as(u128, @bitCast(bn)), null);
-    const s = s_a ^ s_b;
-    return (@as(i128, @bitCast(r)) ^ s) -% s;
-}
-
-pub fn __udivti3(a: u128, b: u128) callconv(.C) u128 {
-    return udivmod(u128, a, b, null);
-}
-
-fn __udivti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(udivmod(u128, @as(u128, @bitCast(a)), @as(u128, @bitCast(b)), null)));
-}
-
-pub fn __umodti3(a: u128, b: u128) callconv(.C) u128 {
-    var r: u128 = undefined;
-    _ = udivmod(u128, a, b, &r);
-    return r;
-}
-
-fn __umodti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
-    var r: u128 = undefined;
-    _ = udivmod(u128, @as(u128, @bitCast(a)), @as(u128, @bitCast(b)), &r);
-    return @as(v2u64, @bitCast(r));
-}
-
-pub fn __modti3(a: i128, b: i128) callconv(.C) i128 {
-    return mod(a, b);
-}
-
-fn __modti3_windows_x86_64(a: v2u64, b: v2u64) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(mod(@as(i128, @bitCast(a)), @as(i128, @bitCast(b)))));
-}
-
-inline fn mod(a: i128, b: i128) i128 {
-    const s_a = a >> (128 - 1); // s = a < 0 ? -1 : 0
-    const s_b = b >> (128 - 1); // s = b < 0 ? -1 : 0
-
-    const an = (a ^ s_a) -% s_a; // negate if s == -1
-    const bn = (b ^ s_b) -% s_b; // negate if s == -1
-
-    var r: u128 = undefined;
-    _ = udivmod(u128, @as(u128, @bitCast(an)), @as(u128, @bitCast(bn)), &r);
-    return (@as(i128, @bitCast(r)) ^ s_a) -% s_a; // negate if s == -1
-}
-
-pub fn __fixdfti(a: f64) callconv(.C) i128 {
-    return floatToInt(i128, a);
-}
-
-fn __fixdfti_windows_x86_64(a: f64) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(floatToInt(i128, a)));
-}
-
-pub fn __fixsfti(a: f32) callconv(.C) i128 {
-    return floatToInt(i128, a);
-}
-
-fn __fixsfti_windows_x86_64(a: f32) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(floatToInt(i128, a)));
-}
-
-pub fn __fixunsdfti(a: f64) callconv(.C) u128 {
-    return floatToInt(u128, a);
-}
-
-fn __fixunsdfti_windows_x86_64(a: f64) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(floatToInt(u128, a)));
-}
-
-pub fn __fixunssfti(a: f32) callconv(.C) u128 {
-    return floatToInt(u128, a);
-}
-
-fn __fixunssfti_windows_x86_64(a: f32) callconv(.C) v2u64 {
-    return @as(v2u64, @bitCast(floatToInt(u128, a)));
-}
-// mulo - multiplication overflow
-// * return a*%b.
-// * return if a*b overflows => 1 else => 0
-// - muloXi4_genericSmall as default
-// - muloXi4_genericFast for 2*bitsize <= usize
-
-inline fn muloXi4_genericSmall(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
-    overflow.* = 0;
-    const min = math.minInt(ST);
-    var res: ST = a *% b;
-    // Hacker's Delight section Overflow subsection Multiplication
-    // case a=-2^{31}, b=-1 problem, because
-    // on some machines a*b = -2^{31} with overflow
-    // Then -2^{31}/-1 overflows and any result is possible.
-    // => check with a<0 and b=-2^{31}
-    if ((a < 0 and b == min) or (a != 0 and @divTrunc(res, a) != b))
-        overflow.* = 1;
-    return res;
-}
-
-inline fn muloXi4_genericFast(comptime ST: type, a: ST, b: ST, overflow: *c_int) ST {
-    overflow.* = 0;
-    const EST = switch (ST) {
-        i32 => i64,
-        i64 => i128,
-        i128 => i256,
-        else => unreachable,
-    };
-    const min = math.minInt(ST);
-    const max = math.maxInt(ST);
-    var res: EST = @as(EST, a) * @as(EST, b);
-    //invariant: -2^{bitwidth(EST)} < res < 2^{bitwidth(EST)-1}
-    if (res < min or max < res)
-        overflow.* = 1;
-    return @as(ST, @truncate(res));
-}
-
-const native_endian = builtin.cpu.arch.endian();
-const low = switch (native_endian) {
-    .Big => 1,
-    .Little => 0,
-};
-const high = 1 - low;
-
-pub fn udivmod(comptime DoubleInt: type, a: DoubleInt, b: DoubleInt, maybe_rem: ?*DoubleInt) DoubleInt {
-    // @setRuntimeSafety(builtin.is_test);
-
-    const double_int_bits = @typeInfo(DoubleInt).Int.bits;
-    const single_int_bits = @divExact(double_int_bits, 2);
-    const SingleInt = std.meta.Int(.unsigned, single_int_bits);
-    const SignedDoubleInt = std.meta.Int(.signed, double_int_bits);
-    const Log2SingleInt = std.math.Log2Int(SingleInt);
-
-    const n = @as([2]SingleInt, @bitCast(a));
-    const d = @as([2]SingleInt, @bitCast(b));
-    var q: [2]SingleInt = undefined;
-    var r: [2]SingleInt = undefined;
-    var sr: c_uint = undefined;
-    // special cases, X is unknown, K != 0
-    if (n[high] == 0) {
-        if (d[high] == 0) {
-            // 0 X
-            // ---
-            // 0 X
-            if (maybe_rem) |rem| {
-                rem.* = n[low] % d[low];
-            }
-            return n[low] / d[low];
-        }
-        // 0 X
-        // ---
-        // K X
-        if (maybe_rem) |rem| {
-            rem.* = n[low];
-        }
-        return 0;
-    }
-    // n[high] != 0
-    if (d[low] == 0) {
-        if (d[high] == 0) {
-            // K X
-            // ---
-            // 0 0
-            if (maybe_rem) |rem| {
-                rem.* = n[high] % d[low];
-            }
-            return n[high] / d[low];
-        }
-        // d[high] != 0
-        if (n[low] == 0) {
-            // K 0
-            // ---
-            // K 0
-            if (maybe_rem) |rem| {
-                r[high] = n[high] % d[high];
-                r[low] = 0;
-                rem.* = @as(DoubleInt, @bitCast(r));
-            }
-            return n[high] / d[high];
-        }
-        // K K
-        // ---
-        // K 0
-        if ((d[high] & (d[high] - 1)) == 0) {
-            // d is a power of 2
-            if (maybe_rem) |rem| {
-                r[low] = n[low];
-                r[high] = n[high] & (d[high] - 1);
-                rem.* = @as(DoubleInt, @bitCast(r));
-            }
-            return n[high] >> @as(Log2SingleInt, @intCast(@ctz(d[high])));
-        }
-        // K K
-        // ---
-        // K 0
-        sr = @as(c_uint, @bitCast(@as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))));
-        // 0 <= sr <= single_int_bits - 2 or sr large
-        if (sr > single_int_bits - 2) {
-            if (maybe_rem) |rem| {
-                rem.* = a;
-            }
-            return 0;
-        }
-        sr += 1;
-        // 1 <= sr <= single_int_bits - 1
-        // q.all = a << (double_int_bits - sr);
-        q[low] = 0;
-        q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
-        // r.all = a >> sr;
-        r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
-        r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
-    } else {
-        // d[low] != 0
-        if (d[high] == 0) {
-            // K X
-            // ---
-            // 0 K
-            if ((d[low] & (d[low] - 1)) == 0) {
-                // d is a power of 2
-                if (maybe_rem) |rem| {
-                    rem.* = n[low] & (d[low] - 1);
-                }
-                if (d[low] == 1) {
-                    return a;
-                }
-                sr = @ctz(d[low]);
-                q[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
-                q[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
-                return @as(DoubleInt, @bitCast(q));
-            }
-            // K X
-            // ---
-            // 0 K
-            sr = 1 + single_int_bits + @as(c_uint, @clz(d[low])) - @as(c_uint, @clz(n[high]));
-            // 2 <= sr <= double_int_bits - 1
-            // q.all = a << (double_int_bits - sr);
-            // r.all = a >> sr;
-            if (sr == single_int_bits) {
-                q[low] = 0;
-                q[high] = n[low];
-                r[high] = 0;
-                r[low] = n[high];
-            } else if (sr < single_int_bits) {
-                // 2 <= sr <= single_int_bits - 1
-                q[low] = 0;
-                q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
-                r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
-                r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
-            } else {
-                // single_int_bits + 1 <= sr <= double_int_bits - 1
-                q[low] = n[low] << @as(Log2SingleInt, @intCast(double_int_bits - sr));
-                q[high] = (n[high] << @as(Log2SingleInt, @intCast(double_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr - single_int_bits)));
-                r[high] = 0;
-                r[low] = n[high] >> @as(Log2SingleInt, @intCast(sr - single_int_bits));
-            }
-        } else {
-            // K X
-            // ---
-            // K K
-            sr = @as(c_uint, @bitCast(@as(c_int, @clz(d[high])) - @as(c_int, @clz(n[high]))));
-            // 0 <= sr <= single_int_bits - 1 or sr large
-            if (sr > single_int_bits - 1) {
-                if (maybe_rem) |rem| {
-                    rem.* = a;
-                }
-                return 0;
-            }
-            sr += 1;
-            // 1 <= sr <= single_int_bits
-            // q.all = a << (double_int_bits - sr);
-            // r.all = a >> sr;
-            q[low] = 0;
-            if (sr == single_int_bits) {
-                q[high] = n[low];
-                r[high] = 0;
-                r[low] = n[high];
-            } else {
-                r[high] = n[high] >> @as(Log2SingleInt, @intCast(sr));
-                r[low] = (n[high] << @as(Log2SingleInt, @intCast(single_int_bits - sr))) | (n[low] >> @as(Log2SingleInt, @intCast(sr)));
-                q[high] = n[low] << @as(Log2SingleInt, @intCast(single_int_bits - sr));
-            }
-        }
-    }
-    // Not a special case
-    // q and r are initialized with:
-    // q.all = a << (double_int_bits - sr);
-    // r.all = a >> sr;
-    // 1 <= sr <= double_int_bits - 1
-    var carry: u32 = 0;
-    var r_all: DoubleInt = undefined;
-    while (sr > 0) : (sr -= 1) {
-        // r:q = ((r:q)  << 1) | carry
-        r[high] = (r[high] << 1) | (r[low] >> (single_int_bits - 1));
-        r[low] = (r[low] << 1) | (q[high] >> (single_int_bits - 1));
-        q[high] = (q[high] << 1) | (q[low] >> (single_int_bits - 1));
-        q[low] = (q[low] << 1) | carry;
-        // carry = 0;
-        // if (r.all >= b)
-        // {
-        //     r.all -= b;
-        //      carry = 1;
-        // }
-        r_all = @as(DoubleInt, @bitCast(r));
-        const s: SignedDoubleInt = @as(SignedDoubleInt, @bitCast(b -% r_all -% 1)) >> (double_int_bits - 1);
-        carry = @as(u32, @intCast(s & 1));
-        r_all -= b & @as(DoubleInt, @bitCast(s));
-        r = @as([2]SingleInt, @bitCast(r_all));
-    }
-    const q_all = (@as(DoubleInt, @bitCast(q)) << 1) | carry;
-    if (maybe_rem) |rem| {
-        rem.* = r_all;
-    }
-    return q_all;
-}
-
-pub inline fn floatToInt(comptime I: type, a: anytype) I {
-    const Log2Int = math.Log2Int;
-    const Int = @import("std").meta.Int;
-    const F = @TypeOf(a);
-    const float_bits = @typeInfo(F).Float.bits;
-    const int_bits = @typeInfo(I).Int.bits;
-    const rep_t = Int(.unsigned, float_bits);
-    const sig_bits = math.floatMantissaBits(F);
-    const exp_bits = math.floatExponentBits(F);
-    const fractional_bits = floatFractionalBits(F);
-
-    // const implicit_bit = if (F != f80) (@as(rep_t, 1) << sig_bits) else 0;
-    const implicit_bit = @as(rep_t, 1) << sig_bits;
-    const max_exp = (1 << (exp_bits - 1));
-    const exp_bias = max_exp - 1;
-    const sig_mask = (@as(rep_t, 1) << sig_bits) - 1;
-
-    // Break a into sign, exponent, significand
-    const a_rep: rep_t = @as(rep_t, @bitCast(a));
-    const negative = (a_rep >> (float_bits - 1)) != 0;
-    const exponent = @as(i32, @intCast((a_rep << 1) >> (sig_bits + 1))) - exp_bias;
-    const significand: rep_t = (a_rep & sig_mask) | implicit_bit;
-
-    // If the exponent is negative, the result rounds to zero.
-    if (exponent < 0) return 0;
-
-    // If the value is too large for the integer type, saturate.
-    switch (@typeInfo(I).Int.signedness) {
-        .unsigned => {
-            if (negative) return 0;
-            if (@as(c_uint, @intCast(exponent)) >= @min(int_bits, max_exp)) return math.maxInt(I);
-        },
-        .signed => if (@as(c_uint, @intCast(exponent)) >= @min(int_bits - 1, max_exp)) {
-            return if (negative) math.minInt(I) else math.maxInt(I);
-        },
-    }
-
-    // If 0 <= exponent < sig_bits, right shift to get the result.
-    // Otherwise, shift left.
-    var result: I = undefined;
-    if (exponent < fractional_bits) {
-        result = @as(I, @intCast(significand >> @as(Log2Int(rep_t), @intCast(fractional_bits - exponent))));
-    } else {
-        result = @as(I, @intCast(significand)) << @as(Log2Int(I), @intCast(exponent - fractional_bits));
-    }
-
-    if ((@typeInfo(I).Int.signedness == .signed) and negative)
-        return ~result +% 1;
-    return result;
-}
-
-/// Returns the number of fractional bits in the mantissa of floating point type T.
-pub inline fn floatFractionalBits(comptime T: type) comptime_int {
-    comptime std.debug.assert(@typeInfo(T) == .Float);
-
-    // standard IEEE floats have an implicit 0.m or 1.m integer part
-    // f80 is special and has an explicitly stored bit in the MSB
-    // this function corresponds to `MANT_DIG - 1' from C
-    return switch (@typeInfo(T).Float.bits) {
-        16 => 10,
-        32 => 23,
-        64 => 52,
-        80 => 63,
-        128 => 112,
-        else => @compileError("unknown floating point type " ++ @typeName(T)),
-    };
-}
-
-pub fn __lshrti3(a: i128, b: i32) callconv(.C) i128 {
-    return lshrXi3(i128, a, b);
-}
-
-// Logical shift right: shift in 0 from left to right
-// Precondition: 0 <= b < T.bit_count
-inline fn lshrXi3(comptime T: type, a: T, b: i32) T {
-    const word_t = HalveInt(T, false);
-    const S = std.math.Log2Int(word_t.HalfT);
-
-    const input = word_t{ .all = a };
-    var output: word_t = undefined;
-
-    if (b >= word_t.bits) {
-        output.s.high = 0;
-        output.s.low = input.s.high >> @as(S, @intCast(b - word_t.bits));
-    } else if (b == 0) {
-        return a;
-    } else {
-        output.s.high = input.s.high >> @as(S, @intCast(b));
-        output.s.low = input.s.high << @as(S, @intCast(word_t.bits - b));
-        output.s.low |= input.s.low >> @as(S, @intCast(b));
-    }
-
-    return output.all;
-}
-
-/// Allows to access underlying bits as two equally sized lower and higher
-/// signed or unsigned integers.
-fn HalveInt(comptime T: type, comptime signed_half: bool) type {
-    return extern union {
-        pub const bits = @divExact(@typeInfo(T).Int.bits, 2);
-        pub const HalfTU = std.meta.Int(.unsigned, bits);
-        pub const HalfTS = std.meta.Int(.signed, bits);
-        pub const HalfT = if (signed_half) HalfTS else HalfTU;
-
-        all: T,
-        s: if (native_endian == .Little)
-            extern struct { low: HalfT, high: HalfT }
-        else
-            extern struct { high: HalfT, low: HalfT },
-    };
-}
--- a/crates/compiler/builtins/bitcode/src/libc.zig
+++ b/crates/compiler/builtins/bitcode/src/libc.zig
@ -1,87 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const arch = builtin.cpu.arch;
-const musl = @import("libc/musl.zig");
-const folly = @import("libc/folly.zig");
-const cpuid = @import("libc/cpuid.zig");
-
-comptime {
-    // TODO: remove this workaround.
-    // Our wasm llvm pipeline always links in memcpy.
-    // As such, our impl will conflict.
-    if (builtin.is_test) {
-        // We don't need memcpy for tests because the tests are built with -lc
-    } else if (arch != .wasm32) {
-        @export(memcpy, .{ .name = "memcpy", .linkage = .Strong });
-    }
-}
-
-const Memcpy = *const fn (noalias [*]u8, noalias [*]const u8, len: usize) callconv(.C) [*]u8;
-
-pub var memcpy_target: Memcpy = switch (arch) {
-    .x86_64 => dispatch_memcpy,
-    else => unreachable,
-};
-
-pub fn memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
-    switch (builtin.os.tag) {
-        .windows => {
-            return musl.memcpy(dest, src, len);
-        },
-        else => switch (arch) {
-            // x86_64 has a special optimized memcpy that can use avx2.
-            .x86_64 => {
-                return memcpy_target(dest, src, len);
-            },
-            else => {
-                return musl.memcpy(dest, src, len);
-            },
-        },
-    }
-}
-
-const MemcpyDecision = enum {
-    uninitialized,
-    folly_prefetchw,
-    folly_prefetcht0,
-    musl,
-};
-
-var memcpy_decision: MemcpyDecision = .uninitialized;
-
-fn dispatch_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
-    switch (arch) {
-        .x86_64 => {
-            // TODO: Switch this to overwrite the memcpy_target pointer once the surgical linker can support it.
-            // Then dispatch will just happen on the first call instead of every call.
-            // if (cpuid.supports_avx2()) {
-            //     if (cpuid.supports_prefetchw()) {
-            //         memcpy_target = folly.memcpy_prefetchw;
-            //     } else {
-            //         memcpy_target = folly.memcpy_prefetcht0;
-            //     }
-            // } else {
-            //     memcpy_target = musl.memcpy;
-            // }
-            // return memcpy_target(dest, src, len);
-            switch (memcpy_decision) {
-                .uninitialized => {
-                    if (cpuid.supports_avx2()) {
-                        if (cpuid.supports_prefetchw()) {
-                            memcpy_decision = .folly_prefetchw;
-                        } else {
-                            memcpy_decision = .folly_prefetcht0;
-                        }
-                    } else {
-                        memcpy_decision = .musl;
-                    }
-                    return dispatch_memcpy(dest, src, len);
-                },
-                .folly_prefetchw => return folly.memcpy_prefetchw(dest, src, len),
-                .folly_prefetcht0 => return folly.memcpy_prefetcht0(dest, src, len),
-                .musl => return musl.memcpy(dest, src, len),
-            }
-        },
-        else => unreachable,
-    }
-}
--- a/crates/compiler/builtins/bitcode/src/libc/assembly_util.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/assembly_util.zig
@ -1,7 +0,0 @@
-const builtin = @import("builtin");
-const os = builtin.os;
-
-pub const function_prefix = switch (os.tag) {
-    .macos => "_",
-    else => "",
-};
--- a/crates/compiler/builtins/bitcode/src/libc/cpuid.S
+++ b/crates/compiler/builtins/bitcode/src/libc/cpuid.S
@ -1,53 +0,0 @@
-// Check if AVX2 is supported.
-// Returns 1 if AVX2 is supported, 0 otherwise.
-.global {[function_prefix]s}supports_avx2;
-{[function_prefix]s}supports_avx2:
-    // Save the EBX register.
-    push %rbx
-
-    // Call the CPUID instruction with the EAX register set to 7 and ECX set to 0.
-    // This will get the CPUID information for the current CPU.
-    mov $7, %eax
-    mov $0, %ecx
-    cpuid
-
-    // The AVX2 feature flag is located in the EBX register at bit 5.
-    bt $5, %ebx
-    jc .avx2_supported
-
-    // AVX2 is not supported.
-    pop %rbx
-    mov $0, %eax
-    ret
-
-    .avx2_supported:
-    pop %rbx
-    mov $1, %eax
-    ret
-
- // Check if prefetchw is supported.
- // Returns 1 if the prefetchw instruction is supported, 0 otherwise.
-.global {[function_prefix]s}supports_prefetchw;
-{[function_prefix]s}supports_prefetchw:
-    // Save the EBX register.
-    push %rbx
-
-    // Call the CPUID instruction with the EAX register set to 0x80000001 and ECX set to 0.
-    // This will get the CPUID information for the current CPU.
-    mov $0x80000001, %eax
-    mov $0, %ecx
-    cpuid
-
-    // The prefetchw feature flag is located in the ECX register at bit 8.
-    bt $8, %ecx
-    jc .prefetchw_supported
-
-    // AVX2 is not supported.
-    pop %rbx
-    mov $0, %eax
-    ret
-
-    .prefetchw_supported:
-    pop %rbx
-    mov $1, %eax
-    ret
--- a/crates/compiler/builtins/bitcode/src/libc/cpuid.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/cpuid.zig
@ -1,18 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const arch = builtin.cpu.arch;
-const function_prefix = @import("assembly_util.zig").function_prefix;
-
-// I couldn't manage to define this in a PIE friendly way with inline assembly.
-// Instead, I am defining it as global assembly functions.
-comptime {
-    switch (arch) {
-        .x86_64 => {
-            asm (std.fmt.comptimePrint(@embedFile("cpuid.S"), .{ .function_prefix = function_prefix }));
-        },
-        else => unreachable,
-    }
-}
-
-pub extern fn supports_avx2() bool;
-pub extern fn supports_prefetchw() bool;
--- a/crates/compiler/builtins/bitcode/src/libc/folly.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/folly.zig
@ -1,2 +0,0 @@
-pub const memcpy_prefetchw = @import("folly/memcpy.zig").__folly_memcpy_prefetchw;
-pub const memcpy_prefetcht0 = @import("folly/memcpy.zig").__folly_memcpy_prefetcht0;
--- a/crates/compiler/builtins/bitcode/src/libc/folly/memcpy-x86_64.S
+++ b/crates/compiler/builtins/bitcode/src/libc/folly/memcpy-x86_64.S
@ -1,437 +0,0 @@
-/*
- * Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * __folly_memcpy: An optimized memcpy implementation that uses prefetch and
- * AVX2 instructions.
- *
- * This implementation of memcpy acts as a memmove: while overlapping copies
- * are undefined in memcpy, in some implementations they're the same function and
- * legacy programs rely on this behavior.
- *
- * This implementation uses prefetch to avoid dtlb misses. This can
- * substantially reduce dtlb store misses in cases where the destination
- * location is absent from L1 cache and where the copy size is small enough
- * that the hardware prefetcher doesn't have a large impact.
- *
- * The number of branches is limited by the use of overlapping loads & stores.
- * This helps with copies where the source and destination cache lines are already
- * present in L1 because there are fewer instructions to execute and fewer
- * branches to potentially mispredict.
- *   e.g. to copy the last 4 <= n <= 7 bytes: copy the first & last 4 bytes (overlapped):
- *      movl        (%rsi), %r8d
- *      movl        -4(%rsi,%rdx), %r9d
- *      movl        %r8d, (%rdi)
- *      movl        %r9d, -4(%rdi,%rdx)
- *
- *
- * For sizes up to 256 all source data is first read into registers and then written:
- * - n <=  16: overlapping movs
- * - n <=  32: overlapping unaligned 16-byte SSE XMM load/stores
- * - n <= 256: overlapping unaligned 32-byte AVX YMM load/stores
- *
- * Large copies (> 256 bytes) use unaligned loads + aligned stores.
- * This is observed to always be faster than rep movsb, so the rep movsb
- * instruction is not used.
- * - The head & tail may be unaligned => they're always written using unaligned stores.
- *
- * If the copy size is humongous (> 32 KiB) and the source and destination are both
- * aligned, this memcpy will use non-temporal operations (AVX2). This can have
- * a substantial speedup for copies where data is absent from L1, but it
- * is significantly slower if the source and destination data were already
- * in L1. The use of non-temporal operations also has the effect that after
- * the copy is complete, the data will be moved out of L1, even if the data was
- * present before the copy started.
- *
- * For n > 256 and overlapping src & dst buffers (memmove):
- * - use unaligned loads + aligned stores, but not non-temporal stores
- * - for dst < src forward copy in 128 byte batches:
- *   - unaligned load the first 32 bytes & last 4 x 32 bytes
- *   - forward copy (unaligned load + aligned stores) 4 x 32 bytes at a time
- *   - unaligned store the first 32 bytes & last 4 x 32 bytes
- * - for dst > src backward copy in 128 byte batches:
- *   - unaligned load the first 4 x 32 bytes & last 32 bytes
- *   - backward copy (unaligned load + aligned stores) 4 x 32 bytes at a time
- *   - unaligned store the first 4 x 32 bytes & last 32 bytes
- *
- * @author Logan Evans <lpe@fb.com>
- */
-
-
-        // .type       {[function_prefix]s}__folly_memcpy_short_{[prefetch]s}, @function not supported by windows
-{[function_prefix]s}__folly_memcpy_short_{[prefetch]s}:
-        .cfi_startproc
-
-.L_GE1_LE7_{[prefetch]s}:
-        cmp         $1, %rdx
-        je          .L_EQ1_{[prefetch]s}
-
-        cmp         $4, %rdx
-        jae         .L_GE4_LE7_{[prefetch]s}
-
-.L_GE2_LE3_{[prefetch]s}:
-        movw        (%rsi), %r8w
-        movw        -2(%rsi,%rdx), %r9w
-        movw        %r8w, (%rdi)
-        movw        %r9w, -2(%rdi,%rdx)
-        ret
-
-        .balign      2
-.L_EQ1_{[prefetch]s}:
-        movb        (%rsi), %r8b
-        movb        %r8b, (%rdi)
-        ret
-
-        // Aligning the target of a jump to an even address has a measurable
-        // speedup in microbenchmarks.
-        .balign      2
-.L_GE4_LE7_{[prefetch]s}:
-        movl        (%rsi), %r8d
-        movl        -4(%rsi,%rdx), %r9d
-        movl        %r8d, (%rdi)
-        movl        %r9d, -4(%rdi,%rdx)
-        ret
-
-        .cfi_endproc
-        // .size       {[function_prefix]s}__folly_memcpy_short_{[prefetch]s}, .-{[function_prefix]s}__folly_memcpy_short_{[prefetch]s} not supported by windows
-
-// memcpy is an alternative entrypoint into the function named __folly_memcpy.
-// The compiler is able to call memcpy since the name is global while
-// stacktraces will show __folly_memcpy since that is the name of the function.
-// This is intended to aid in debugging by making it obvious which version of
-// memcpy is being used.
-        .balign      64
-        .globl      {[function_prefix]s}__folly_memcpy_{[prefetch]s}
-        // .type       {[function_prefix]s}__folly_memcpy_{[prefetch]s}, @function not supported by windows
-
-{[function_prefix]s}__folly_memcpy_{[prefetch]s}:
-        .cfi_startproc
-
-        mov         %rdi, %rax    // return: $rdi
-
-        test        %rdx, %rdx
-        je          .L_EQ0_{[prefetch]s}
-
-        {[prefetch]s}    (%rdi)
-        {[prefetch]s}    -1(%rdi,%rdx)
-
-        cmp         $8, %rdx
-        jb          .L_GE1_LE7_{[prefetch]s}
-
-.L_GE8_{[prefetch]s}:
-        cmp         $32, %rdx
-        ja          .L_GE33_{[prefetch]s}
-
-.L_GE8_LE32_{[prefetch]s}:
-        cmp         $16, %rdx
-        ja          .L_GE17_LE32_{[prefetch]s}
-
-.L_GE8_LE16_{[prefetch]s}:
-        mov         (%rsi), %r8
-        mov         -8(%rsi,%rdx), %r9
-        mov         %r8, (%rdi)
-        mov         %r9, -8(%rdi,%rdx)
-.L_EQ0_{[prefetch]s}:
-        ret
-
-        .balign      2
-.L_GE17_LE32_{[prefetch]s}:
-        movdqu      (%rsi), %xmm0
-        movdqu      -16(%rsi,%rdx), %xmm1
-        movdqu      %xmm0, (%rdi)
-        movdqu      %xmm1, -16(%rdi,%rdx)
-        ret
-
-        .balign      2
-.L_GE193_LE256_{[prefetch]s}:
-        vmovdqu     %ymm3, 96(%rdi)
-        vmovdqu     %ymm4, -128(%rdi,%rdx)
-
-.L_GE129_LE192_{[prefetch]s}:
-        vmovdqu     %ymm2, 64(%rdi)
-        vmovdqu     %ymm5, -96(%rdi,%rdx)
-
-.L_GE65_LE128_{[prefetch]s}:
-        vmovdqu     %ymm1, 32(%rdi)
-        vmovdqu     %ymm6, -64(%rdi,%rdx)
-
-.L_GE33_LE64_{[prefetch]s}:
-        vmovdqu     %ymm0, (%rdi)
-        vmovdqu     %ymm7, -32(%rdi,%rdx)
-
-        vzeroupper
-        ret
-
-        .balign      2
-.L_GE33_{[prefetch]s}:
-        vmovdqu     (%rsi), %ymm0
-        vmovdqu     -32(%rsi,%rdx), %ymm7
-
-        cmp         $64, %rdx
-        jbe         .L_GE33_LE64_{[prefetch]s}
-
-        {[prefetch]s}    64(%rdi)
-
-        vmovdqu     32(%rsi), %ymm1
-        vmovdqu     -64(%rsi,%rdx), %ymm6
-
-        cmp         $128, %rdx
-        jbe         .L_GE65_LE128_{[prefetch]s}
-
-        {[prefetch]s}    128(%rdi)
-
-        vmovdqu     64(%rsi), %ymm2
-        vmovdqu     -96(%rsi,%rdx), %ymm5
-
-        cmp         $192, %rdx
-        jbe         .L_GE129_LE192_{[prefetch]s}
-
-        {[prefetch]s}    192(%rdi)
-
-        vmovdqu     96(%rsi), %ymm3
-        vmovdqu     -128(%rsi,%rdx), %ymm4
-
-        cmp         $256, %rdx
-        jbe         .L_GE193_LE256_{[prefetch]s}
-
-.L_GE257_{[prefetch]s}:
-        {[prefetch]s}    256(%rdi)
-
-        // Check if there is an overlap. If there is an overlap then the caller
-        // has a bug since this is undefined behavior. However, for legacy
-        // reasons this behavior is expected by some callers.
-        //
-        // All copies through 256 bytes will operate as a memmove since for
-        // those sizes all reads are performed before any writes.
-        //
-        // This check uses the idea that there is an overlap if
-        // (%rdi < (%rsi + %rdx)) && (%rsi < (%rdi + %rdx)),
-        // or equivalently, there is no overlap if
-        // ((%rsi + %rdx) <= %rdi) || ((%rdi + %rdx) <= %rsi).
-        //
-        // %r9 will be used after .L_ALIGNED_DST_LOOP to calculate how many
-        // bytes remain to be copied.
-
-        // (%rsi + %rdx <= %rdi) => no overlap
-        lea         (%rsi,%rdx), %r9
-        cmp         %rdi, %r9
-        jbe         .L_NO_OVERLAP_{[prefetch]s}
-
-        // (%rdi + %rdx <= %rsi) => no overlap
-        lea         (%rdi,%rdx), %r8
-        cmp         %rsi, %r8
-        // If no info is available in branch predictor's cache, Intel CPUs assume
-        // forward jumps are not taken. Use a forward jump as overlapping buffers
-        // are unlikely.
-        ja          .L_OVERLAP_{[prefetch]s}
-
-        .balign      2
-.L_NO_OVERLAP_{[prefetch]s}:
-        vmovdqu     %ymm0, (%rdi)
-        vmovdqu     %ymm1, 32(%rdi)
-        vmovdqu     %ymm2, 64(%rdi)
-        vmovdqu     %ymm3, 96(%rdi)
-
-        // Align %rdi to a 32 byte boundary.
-        // %rcx = 128 - 31 & %rdi
-        mov         $128, %rcx
-        and         $31, %rdi
-        sub         %rdi, %rcx
-
-        lea         (%rsi,%rcx), %rsi
-        lea         (%rax,%rcx), %rdi
-        sub         %rcx, %rdx
-
-        // %r8 is the end condition for the loop.
-        lea         -128(%rsi,%rdx), %r8
-
-		// This threshold is half of L1 cache on a Skylake machine, which means that
-		// potentially all of L1 will be populated by this copy once it is executed
-		// (dst and src are cached for temporal copies).
-		// NON_TEMPORAL_STORE_THRESHOLD = $32768
-        // cmp         NON_TEMPORAL_STORE_THRESHOLD, %rdx
-        cmp         $32768, %rdx
-        jae         .L_NON_TEMPORAL_LOOP_{[prefetch]s}
-
-        .balign      2
-.L_ALIGNED_DST_LOOP_{[prefetch]s}:
-        {[prefetch]s}    128(%rdi)
-        {[prefetch]s}    192(%rdi)
-
-        vmovdqu     (%rsi), %ymm0
-        vmovdqu     32(%rsi), %ymm1
-        vmovdqu     64(%rsi), %ymm2
-        vmovdqu     96(%rsi), %ymm3
-        add         $128, %rsi
-
-        vmovdqa     %ymm0, (%rdi)
-        vmovdqa     %ymm1, 32(%rdi)
-        vmovdqa     %ymm2, 64(%rdi)
-        vmovdqa     %ymm3, 96(%rdi)
-        add         $128, %rdi
-
-        cmp         %r8, %rsi
-        jb          .L_ALIGNED_DST_LOOP_{[prefetch]s}
-
-.L_ALIGNED_DST_LOOP_END_{[prefetch]s}:
-        sub         %rsi, %r9
-        mov         %r9, %rdx
-
-        vmovdqu     %ymm4, -128(%rdi,%rdx)
-        vmovdqu     %ymm5, -96(%rdi,%rdx)
-        vmovdqu     %ymm6, -64(%rdi,%rdx)
-        vmovdqu     %ymm7, -32(%rdi,%rdx)
-
-        vzeroupper
-        ret
-
-        .balign      2
-.L_NON_TEMPORAL_LOOP_{[prefetch]s}:
-        testb       $31, %sil
-        jne         .L_ALIGNED_DST_LOOP_{[prefetch]s}
-        // This is prefetching the source data unlike ALIGNED_DST_LOOP which
-        // prefetches the destination data. This choice is again informed by
-        // benchmarks. With a non-temporal store the entirety of the cache line
-        // is being written so the previous data can be discarded without being
-        // fetched.
-        prefetchnta 128(%rsi)
-        prefetchnta 196(%rsi)
-
-        vmovntdqa   (%rsi), %ymm0
-        vmovntdqa   32(%rsi), %ymm1
-        vmovntdqa   64(%rsi), %ymm2
-        vmovntdqa   96(%rsi), %ymm3
-        add         $128, %rsi
-
-        vmovntdq    %ymm0, (%rdi)
-        vmovntdq    %ymm1, 32(%rdi)
-        vmovntdq    %ymm2, 64(%rdi)
-        vmovntdq    %ymm3, 96(%rdi)
-        add         $128, %rdi
-
-        cmp         %r8, %rsi
-        jb          .L_NON_TEMPORAL_LOOP_{[prefetch]s}
-
-        sfence
-        jmp         .L_ALIGNED_DST_LOOP_END_{[prefetch]s}
-
-
-.L_OVERLAP_{[prefetch]s}:
-        .balign      2
-        cmp         %rdi, %rsi
-        jb          .L_OVERLAP_BWD_{[prefetch]s}  // %rsi  < %rdi => backward-copy
-        je          .L_RET_{[prefetch]s}          // %rsi == %rdi => return, nothing to copy
-
-        // Source & destination buffers overlap. Forward copy.
-
-        vmovdqu     (%rsi), %ymm8
-
-        // Align %rdi to a 32 byte boundary.
-        // %rcx = 32 - 31 & %rdi
-        mov         $32, %rcx
-        and         $31, %rdi
-        sub         %rdi, %rcx
-
-        lea         (%rsi,%rcx), %rsi
-        lea         (%rax,%rcx), %rdi
-        sub         %rcx, %rdx
-
-        // %r8 is the end condition for the loop.
-        lea         -128(%rsi,%rdx), %r8
-
-
-.L_OVERLAP_FWD_ALIGNED_DST_LOOP_{[prefetch]s}:
-        {[prefetch]s}    128(%rdi)
-        {[prefetch]s}    192(%rdi)
-
-        vmovdqu       (%rsi), %ymm0
-        vmovdqu     32(%rsi), %ymm1
-        vmovdqu     64(%rsi), %ymm2
-        vmovdqu     96(%rsi), %ymm3
-        add         $128, %rsi
-
-        vmovdqa     %ymm0,   (%rdi)
-        vmovdqa     %ymm1, 32(%rdi)
-        vmovdqa     %ymm2, 64(%rdi)
-        vmovdqa     %ymm3, 96(%rdi)
-        add         $128, %rdi
-
-        cmp         %r8, %rsi
-        jb          .L_OVERLAP_FWD_ALIGNED_DST_LOOP_{[prefetch]s}
-
-        sub         %rsi, %r9
-        mov         %r9, %rdx
-
-        vmovdqu     %ymm4, -128(%rdi,%rdx)
-        vmovdqu     %ymm5,  -96(%rdi,%rdx)
-        vmovdqu     %ymm6,  -64(%rdi,%rdx)
-        vmovdqu     %ymm7,  -32(%rdi,%rdx)
-        vmovdqu     %ymm8, (%rax)  // %rax == the original (unaligned) %rdi
-
-        vzeroupper
-
-.L_RET_{[prefetch]s}:
-        ret
-
-.L_OVERLAP_BWD_{[prefetch]s}:
-        // Save last 32 bytes.
-        vmovdqu     -32(%rsi, %rdx), %ymm8
-        lea         -32(%rdi, %rdx), %r9
-
-
-        // %r8 is the end condition for the loop.
-        lea         128(%rsi), %r8
-
-        // Align %rdi+%rdx (destination end) to a 32 byte boundary.
-        // %rcx = (%rdi + %rdx - 32) & 31
-        mov         %r9, %rcx
-        and         $31, %rcx
-        // Set %rsi & %rdi to the end of the 32 byte aligned range.
-        sub         %rcx, %rdx
-        add         %rdx, %rsi
-        add         %rdx, %rdi
-
-
-.L_OVERLAP_BWD_ALIGNED_DST_LOOP_{[prefetch]s}:
-        {[prefetch]s}    -128(%rdi)
-        {[prefetch]s}    -192(%rdi)
-
-        vmovdqu      -32(%rsi), %ymm4
-        vmovdqu      -64(%rsi), %ymm5
-        vmovdqu      -96(%rsi), %ymm6
-        vmovdqu     -128(%rsi), %ymm7
-        sub         $128, %rsi
-
-        vmovdqa     %ymm4,  -32(%rdi)
-        vmovdqa     %ymm5,  -64(%rdi)
-        vmovdqa     %ymm6,  -96(%rdi)
-        vmovdqa     %ymm7, -128(%rdi)
-        sub         $128, %rdi
-
-        cmp         %r8, %rsi
-        ja          .L_OVERLAP_BWD_ALIGNED_DST_LOOP_{[prefetch]s}
-
-        vmovdqu     %ymm0,   (%rax)  // %rax == the original unaligned %rdi
-        vmovdqu     %ymm1, 32(%rax)
-        vmovdqu     %ymm2, 64(%rax)
-        vmovdqu     %ymm3, 96(%rax)
-        vmovdqu     %ymm8, (%r9)
-
-        vzeroupper
-	ret
-
-        .cfi_endproc
-        // .size       {[function_prefix]s}__folly_memcpy_{[prefetch]s}, .-{[function_prefix]s}__folly_memcpy_{[prefetch]s} not supported by windows
--- a/crates/compiler/builtins/bitcode/src/libc/folly/memcpy.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/folly/memcpy.zig
@ -1,18 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const arch = builtin.cpu.arch;
-const function_prefix = @import("../assembly_util.zig").function_prefix;
-
-comptime {
-    switch (arch) {
-        .x86_64 => {
-            inline for ([_][]const u8{ "prefetchw", "prefetcht0" }) |prefetch| {
-                asm (std.fmt.comptimePrint(@embedFile("memcpy-x86_64.S"), .{ .prefetch = prefetch, .function_prefix = function_prefix }));
-            }
-        },
-        else => unreachable,
-    }
-}
-
-pub extern fn __folly_memcpy_prefetchw(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.SysV) [*]u8;
-pub extern fn __folly_memcpy_prefetcht0(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.SysV) [*]u8;
--- a/crates/compiler/builtins/bitcode/src/libc/musl.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/musl.zig
@ -1 +0,0 @@
-pub const memcpy = @import("musl/memcpy.zig").memcpy;
--- a/crates/compiler/builtins/bitcode/src/libc/musl/COPYRIGHT
+++ b/crates/compiler/builtins/bitcode/src/libc/musl/COPYRIGHT
@ -1,193 +0,0 @@
-musl as a whole is licensed under the following standard MIT license:
-
----------------------------------------------------------------------
-Copyright © 2005-2020 Rich Felker, et al.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
-
-Authors/contributors include:
-
-A. Wilcox
-Ada Worcester
-Alex Dowad
-Alex Suykov
-Alexander Monakov
-Andre McCurdy
-Andrew Kelley
-Anthony G. Basile
-Aric Belsito
-Arvid Picciani
-Bartosz Brachaczek
-Benjamin Peterson
-Bobby Bingham
-Boris Brezillon
-Brent Cook
-Chris Spiegel
-Clément Vasseur
-Daniel Micay
-Daniel Sabogal
-Daurnimator
-David Carlier
-David Edelsohn
-Denys Vlasenko
-Dmitry Ivanov
-Dmitry V. Levin
-Drew DeVault
-Emil Renner Berthing
-Fangrui Song
-Felix Fietkau
-Felix Janda
-Gianluca Anzolin
-Hauke Mehrtens
-He X
-Hiltjo Posthuma
-Isaac Dunham
-Jaydeep Patil
-Jens Gustedt
-Jeremy Huntwork
-Jo-Philipp Wich
-Joakim Sindholt
-John Spencer
-Julien Ramseier
-Justin Cormack
-Kaarle Ritvanen
-Khem Raj
-Kylie McClain
-Leah Neukirchen
-Luca Barbato
-Luka Perkov
-M Farkas-Dyck (Strake)
-Mahesh Bodapati
-Markus Wichmann
-Masanori Ogino
-Michael Clark
-Michael Forney
-Mikhail Kremnyov
-Natanael Copa
-Nicholas J. Kain
-orc
-Pascal Cuoq
-Patrick Oppenlander
-Petr Hosek
-Petr Skocik
-Pierre Carrier
-Reini Urban
-Rich Felker
-Richard Pennington
-Ryan Fairfax
-Samuel Holland
-Segev Finer
-Shiz
-sin
-Solar Designer
-Stefan Kristiansson
-Stefan O'Rear
-Szabolcs Nagy
-Timo Teräs
-Trutz Behn
-Valentin Ochs
-Will Dietz
-William Haddon
-William Pitcock
-
-Portions of this software are derived from third-party works licensed
-under terms compatible with the above MIT license:
-
-The TRE regular expression implementation (src/regex/reg* and
-src/regex/tre*) is Copyright © 2001-2008 Ville Laurikari and licensed
-under a 2-clause BSD license (license text in the source files). The
-included version has been heavily modified by Rich Felker in 2012, in
-the interests of size, simplicity, and namespace cleanliness.
-
-Much of the math library code (src/math/* and src/complex/*) is
-Copyright © 1993,2004 Sun Microsystems or
-Copyright © 2003-2011 David Schultz or
-Copyright © 2003-2009 Steven G. Kargl or
-Copyright © 2003-2009 Bruce D. Evans or
-Copyright © 2008 Stephen L. Moshier or
-Copyright © 2017-2018 Arm Limited
-and labelled as such in comments in the individual source files. All
-have been licensed under extremely permissive terms.
-
-The ARM memcpy code (src/string/arm/memcpy.S) is Copyright © 2008
-The Android Open Source Project and is licensed under a two-clause BSD
-license. It was taken from Bionic libc, used on Android.
-
-The AArch64 memcpy and memset code (src/string/aarch64/*) are
-Copyright © 1999-2019, Arm Limited.
-
-The implementation of DES for crypt (src/crypt/crypt_des.c) is
-Copyright © 1994 David Burren. It is licensed under a BSD license.
-
-The implementation of blowfish crypt (src/crypt/crypt_blowfish.c) was
-originally written by Solar Designer and placed into the public
-domain. The code also comes with a fallback permissive license for use
-in jurisdictions that may not recognize the public domain.
-
-The smoothsort implementation (src/stdlib/qsort.c) is Copyright © 2011
-Valentin Ochs and is licensed under an MIT-style license.
-
-The x86_64 port was written by Nicholas J. Kain and is licensed under
-the standard MIT terms.
-
-The mips and microblaze ports were originally written by Richard
-Pennington for use in the ellcc project. The original code was adapted
-by Rich Felker for build system and code conventions during upstream
-integration. It is licensed under the standard MIT terms.
-
-The mips64 port was contributed by Imagination Technologies and is
-licensed under the standard MIT terms.
-
-The powerpc port was also originally written by Richard Pennington,
-and later supplemented and integrated by John Spencer. It is licensed
-under the standard MIT terms.
-
-All other files which have no copyright comments are original works
-produced specifically for use as part of this library, written either
-by Rich Felker, the main author of the library, or by one or more
-contibutors listed above. Details on authorship of individual files
-can be found in the git version control history of the project. The
-omission of copyright and license comments in each file is in the
-interest of source tree size.
-
-In addition, permission is hereby granted for all public header files
-(include/* and arch/*/bits/*) and crt files intended to be linked into
-applications (crt/*, ldso/dlstart.c, and arch/*/crt_arch.h) to omit
-the copyright notice and permission notice otherwise required by the
-license, and to use these files without any requirement of
-attribution. These files include substantial contributions from:
-
-Bobby Bingham
-John Spencer
-Nicholas J. Kain
-Rich Felker
-Richard Pennington
-Stefan Kristiansson
-Szabolcs Nagy
-
-all of whom have explicitly granted such permission.
-
-This file previously contained text expressing a belief that most of
-the files covered by the above exception were sufficiently trivial not
-to be subject to copyright, resulting in confusion over whether it
-negated the permissions granted in the license. In the spirit of
-permissive licensing, and of not having licensing issues being an
-obstacle to adoption, that text has been removed.
--- a/crates/compiler/builtins/bitcode/src/libc/musl/README.md
+++ b/crates/compiler/builtins/bitcode/src/libc/musl/README.md
@ -1,2 +0,0 @@
-This set of files all come from [musl libc](https://musl.libc.org/).
-Roc just directly uses a few of them instead of depending on musl libc fully.
--- a/crates/compiler/builtins/bitcode/src/libc/musl/memcpy-x86.S
+++ b/crates/compiler/builtins/bitcode/src/libc/musl/memcpy-x86.S
@ -1,30 +0,0 @@
-.global {[function_prefix]s}musl_memcpy
-// Windows does not support the type directive.
-// .type {[function_prefix]s}musl_memcpy,@function
-{[function_prefix]s}musl_memcpy:
-	push %esi
-	push %edi
-	mov 12(%esp),%edi
-	mov 16(%esp),%esi
-	mov 20(%esp),%ecx
-	mov %edi,%eax
-	cmp $4,%ecx
-	jc 1f
-	test $3,%edi
-	jz 1f
-2:	movsb
-	dec %ecx
-	test $3,%edi
-	jnz 2b
-1:	mov %ecx,%edx
-	shr $2,%ecx
-	rep
-	movsl
-	and $3,%edx
-	jz 1f
-2:	movsb
-	dec %edx
-	jnz 2b
-1:	pop %edi
-	pop %esi
-	ret
--- a/crates/compiler/builtins/bitcode/src/libc/musl/memcpy-x86_64.S
+++ b/crates/compiler/builtins/bitcode/src/libc/musl/memcpy-x86_64.S
@ -1,23 +0,0 @@
-.global {[function_prefix]s}musl_memcpy
-// Windows does not support the type directive.
-// .type {[function_prefix]s}musl_memcpy,@function
-{[function_prefix]s}musl_memcpy:
-	mov %rdi,%rax
-	cmp $8,%rdx
-	jc 1f
-	test $7,%edi
-	jz 1f
-2:	movsb
-	dec %rdx
-	test $7,%edi
-	jnz 2b
-1:	mov %rdx,%rcx
-	shr $3,%rcx
-	rep
-	movsq
-	and $7,%edx
-	jz 1f
-2:	movsb
-	dec %edx
-	jnz 2b
-1:	ret
--- a/crates/compiler/builtins/bitcode/src/libc/musl/memcpy.zig
+++ b/crates/compiler/builtins/bitcode/src/libc/musl/memcpy.zig
@ -1,223 +0,0 @@
-const std = @import("std");
-const builtin = @import("builtin");
-const arch = builtin.cpu.arch;
-const function_prefix = @import("../assembly_util.zig").function_prefix;
-
-comptime {
-    switch (arch) {
-        .x86_64 => {
-            asm (std.fmt.comptimePrint(@embedFile("memcpy-x86_64.S"), .{ .function_prefix = function_prefix }));
-        },
-        .x86 => {
-            asm (std.fmt.comptimePrint(@embedFile("memcpy-x86.S"), .{ .function_prefix = function_prefix }));
-        },
-        // TODO: add assembly implementations for other platforms.
-        else => {},
-    }
-}
-
-pub const memcpy =
-    switch (builtin.os.tag) {
-    .windows => fallback_memcpy,
-    else => switch (arch) {
-        .x86_64, .x86 => musl_memcpy,
-        else => fallback_memcpy,
-    },
-};
-
-pub extern fn musl_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8;
-
-// Note: this is written to only support little endian targets.
-// To support big endian, `<<` and `>>` wold need to be swapped.
-pub fn fallback_memcpy(noalias dest: [*]u8, noalias src: [*]const u8, len: usize) callconv(.C) [*]u8 {
-    var d = dest;
-    var s = src;
-    var n = len;
-    switch (@min(n, @intFromPtr(s) % 4)) {
-        1 => {
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-            n -= 1;
-        },
-        2 => {
-            d[0] = s[0];
-            d[1] = s[1];
-            d += 2;
-            s += 2;
-            n -= 2;
-        },
-        3 => {
-            d[0] = s[0];
-            d[1] = s[1];
-            d[2] = s[2];
-            d += 3;
-            s += 3;
-            n -= 3;
-        },
-        else => {},
-    }
-
-    if (@intFromPtr(d) % 4 == 0) {
-        var d4 = @as([*]align(4) u8, @alignCast(d));
-        var s4 = @as([*]align(4) const u8, @alignCast(s));
-        while (n >= 16) : (n -= 16) {
-            var d_u32 = @as([*]u32, @ptrCast(d4));
-            var s_u32 = @as([*]const u32, @ptrCast(s4));
-            d_u32[0] = s_u32[0];
-            d_u32[1] = s_u32[1];
-            d_u32[2] = s_u32[2];
-            d_u32[3] = s_u32[3];
-
-            d4 += 16;
-            s4 += 16;
-        }
-        if (n & 8 != 0) {
-            var d_u32 = @as([*]u32, @ptrCast(d4));
-            var s_u32 = @as([*]const u32, @ptrCast(s4));
-            d_u32[0] = s_u32[0];
-            d_u32[1] = s_u32[1];
-
-            d4 += 8;
-            s4 += 8;
-        }
-        if (n & 4 != 0) {
-            var d_u32 = @as([*]u32, @ptrCast(d4));
-            var s_u32 = @as([*]const u32, @ptrCast(s4));
-            d_u32[0] = s_u32[0];
-
-            d4 += 4;
-            s4 += 4;
-        }
-        d = d4;
-        s = s4;
-        if (n & 2 != 0) {
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-        }
-        if (n & 1 != 0) {
-            d[0] = s[0];
-        }
-        return dest;
-    }
-    if (n >= 32) {
-        switch (@intFromPtr(d) % 4) {
-            1 => {
-                var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                n -= 3;
-                while (n >= 17) : (n -= 16) {
-                    var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
-                    var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 1)));
-                    var x = s_u32[0];
-                    d_u32[0] = (w >> 24) | (x << 8);
-                    w = s_u32[1];
-                    d_u32[1] = (x >> 24) | (w << 8);
-                    x = s_u32[2];
-                    d_u32[2] = (w >> 24) | (x << 8);
-                    w = s_u32[3];
-                    d_u32[3] = (x >> 24) | (w << 8);
-
-                    d += 16;
-                    s += 16;
-                }
-            },
-            2 => {
-                var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                n -= 2;
-                while (n >= 18) : (n -= 16) {
-                    var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
-                    var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 2)));
-                    var x = s_u32[0];
-                    d_u32[0] = (w >> 16) | (x << 16);
-                    w = s_u32[1];
-                    d_u32[1] = (x >> 16) | (w << 16);
-                    x = s_u32[2];
-                    d_u32[2] = (w >> 16) | (x << 16);
-                    w = s_u32[3];
-                    d_u32[3] = (x >> 16) | (w << 16);
-
-                    d += 16;
-                    s += 16;
-                }
-            },
-            3 => {
-                var w = @as([*]const u32, @ptrCast(@alignCast(s)))[0];
-                d[0] = s[0];
-                d += 1;
-                s += 1;
-                n -= 1;
-                while (n >= 19) : (n -= 16) {
-                    var d_u32 = @as([*]u32, @ptrCast(@alignCast(d)));
-                    var s_u32 = @as([*]const u32, @ptrCast(@alignCast(s + 3)));
-                    var x = s_u32[0];
-                    d_u32[0] = (w >> 8) | (x << 24);
-                    w = s_u32[1];
-                    d_u32[1] = (x >> 8) | (w << 24);
-                    x = s_u32[2];
-                    d_u32[2] = (w >> 8) | (x << 24);
-                    w = s_u32[3];
-                    d_u32[3] = (x >> 8) | (w << 24);
-
-                    d += 16;
-                    s += 16;
-                }
-            },
-            else => unreachable,
-        }
-    }
-    if (n & 16 != 0) {
-        comptime var i = 0;
-        inline while (i < 16) : (i += 1) {
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-        }
-    }
-    if (n & 8 != 0) {
-        comptime var i = 0;
-        inline while (i < 8) : (i += 1) {
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-        }
-    }
-    if (n & 4 != 0) {
-        comptime var i = 0;
-        inline while (i < 4) : (i += 1) {
-            d[0] = s[0];
-            d += 1;
-            s += 1;
-        }
-    }
-    if (n & 2 != 0) {
-        d[0] = s[0];
-        d += 1;
-        s += 1;
-        d[0] = s[0];
-        d += 1;
-        s += 1;
-    }
-    if (n & 1 != 0) {
-        d[0] = s[0];
-    }
-    return dest;
-}
--- a/crates/compiler/builtins/bitcode/src/main.zig
+++ b/crates/compiler/builtins/bitcode/src/main.zig
@ -6,11 +6,6 @@ const expect = @import("expect.zig");
 const panic_utils = @import("panic.zig");
 const dbg_utils = @import("dbg.zig");

-comptime {
-    _ = @import("compiler_rt.zig");
-    _ = @import("libc.zig");
-}
-
 const ROC_BUILTINS = "roc_builtins";
 const NUM = "num";
 const STR = "str";
@ -18,6 +13,13 @@ const STR = "str";
 // Dec Module
 const dec = @import("dec.zig");

+var FLTUSED: i32 = 0;
+comptime {
+    if (builtin.os.tag == .windows) {
+        @export(FLTUSED, .{ .name = "_fltused", .linkage = .Weak });
+    }
+}
+
 comptime {
    exportDecFn(dec.absC, "abs");
    exportDecFn(dec.acosC, "acos");
--- a/crates/compiler/builtins/roc/Dict.roc
+++ b/crates/compiler/builtins/roc/Dict.roc
--- a/crates/compiler/builtins/roc/Set.roc
+++ b/crates/compiler/builtins/roc/Set.roc
@ -2,6 +2,8 @@ interface Set
    exposes [
        Set,
        empty,
+        withCapacity,
+        reserve,
        single,
        walk,
        walkUntil,
@ -45,7 +47,7 @@ Set k := Dict.Dict k {} where k implements Hash & Eq
        },
    ]

-isEq : Set k, Set k -> Bool where k implements Hash & Eq
+isEq : Set k, Set k -> Bool
 isEq = \xs, ys ->
    if len xs != len ys then
        Bool.false
@ -56,7 +58,7 @@ isEq = \xs, ys ->
            else
                Break Bool.false

-hashSet : hasher, Set k -> hasher where k implements Hash & Eq, hasher implements Hasher
+hashSet : hasher, Set k -> hasher where hasher implements Hasher
 hashSet = \hasher, @Set inner -> Hash.hash hasher inner

 toInspectorSet : Set k -> Inspector f where k implements Inspect & Hash & Eq, f implements InspectFormatter
@ -74,13 +76,18 @@ toInspectorSet = \set ->
 empty : {} -> Set *
 empty = \{} -> @Set (Dict.empty {})

-## Return a dictionary with space allocated for a number of entries. This
+## Return a set with space allocated for a number of entries. This
 ## may provide a performance optimization if you know how many entries will be
 ## inserted.
 withCapacity : Nat -> Set *
 withCapacity = \cap ->
    @Set (Dict.withCapacity cap)

+# Enlarge the set for at least capacity additional elements
+reserve : Set k, Nat -> Set k
+reserve = \@Set dict, requested ->
+    @Set (Dict.reserve dict requested)
+
 ## Creates a new `Set` with a single value.
 ## ```
 ## singleItemSet = Set.single "Apple"
@ -88,7 +95,7 @@ withCapacity = \cap ->
 ##
 ## expect countValues == 1
 ## ```
-single : k -> Set k where k implements Hash & Eq
+single : k -> Set k
 single = \key ->
    Dict.single key {} |> @Set

@ -104,7 +111,7 @@ single = \key ->
 ##
 ## expect countValues == 3
 ## ```
-insert : Set k, k -> Set k where k implements Hash & Eq
+insert : Set k, k -> Set k
 insert = \@Set dict, key ->
    Dict.insert dict key {} |> @Set

@ -189,7 +196,7 @@ expect
 ## expect has10 == Bool.false
 ## expect has20 == Bool.true
 ## ```
-remove : Set k, k -> Set k where k implements Hash & Eq
+remove : Set k, k -> Set k
 remove = \@Set dict, key ->
    Dict.remove dict key |> @Set

@ -208,7 +215,7 @@ remove = \@Set dict, key ->
 ## expect hasApple == Bool.true
 ## expect hasBanana == Bool.false
 ## ```
-contains : Set k, k -> Bool where k implements Hash & Eq
+contains : Set k, k -> Bool
 contains = \@Set dict, key ->
    Dict.contains dict key

@ -221,7 +228,7 @@ contains = \@Set dict, key ->
 ##
 ## expect Set.toList numbers == values
 ## ```
-toList : Set k -> List k where k implements Hash & Eq
+toList : Set k -> List k
 toList = \@Set dict ->
    Dict.keys dict

@ -235,7 +242,7 @@ toList = \@Set dict ->
 ##
 ## expect Set.fromList [Pear, Apple, Banana] == values
 ## ```
-fromList : List k -> Set k where k implements Hash & Eq
+fromList : List k -> Set k
 fromList = \list ->
    list
    |> List.map \k -> (k, {})
@ -252,7 +259,7 @@ fromList = \list ->
 ##
 ## expect Set.union set1 set2 == Set.fromList [Left, Right]
 ## ```
-union : Set k, Set k -> Set k where k implements Hash & Eq
+union : Set k, Set k -> Set k
 union = \@Set dict1, @Set dict2 ->
    Dict.insertAll dict1 dict2 |> @Set

@ -265,7 +272,7 @@ union = \@Set dict1, @Set dict2 ->
 ##
 ## expect Set.intersection set1 set2 == Set.single Left
 ## ```
-intersection : Set k, Set k -> Set k where k implements Hash & Eq
+intersection : Set k, Set k -> Set k
 intersection = \@Set dict1, @Set dict2 ->
    Dict.keepShared dict1 dict2 |> @Set

@ -279,7 +286,7 @@ intersection = \@Set dict1, @Set dict2 ->
 ##
 ## expect Set.difference first second == Set.fromList [Up, Down]
 ## ```
-difference : Set k, Set k -> Set k where k implements Hash & Eq
+difference : Set k, Set k -> Set k
 difference = \@Set dict1, @Set dict2 ->
    Dict.removeAll dict1 dict2 |> @Set

@ -302,14 +309,14 @@ difference = \@Set dict1, @Set dict2 ->
 ##
 ## expect result == 2
 ## ```
-walk : Set k, state, (state, k -> state) -> state where k implements Hash & Eq
+walk : Set k, state, (state, k -> state) -> state
 walk = \@Set dict, state, step ->
    Dict.walk dict state (\s, k, _ -> step s k)

 ## Convert each value in the set to something new, by calling a conversion
 ## function on each of them which receives the old value. Then return a
 ## new set containing the converted values.
-map : Set a, (a -> b) -> Set b where a implements Hash & Eq, b implements Hash & Eq
+map : Set a, (a -> b) -> Set b
 map = \set, transform ->
    init = withCapacity (capacity set)

@ -321,7 +328,7 @@ map = \set, transform ->
 ## (using [Set.union]) into one set.
 ##
 ## You may know a similar function named `concatMap` in other languages.
-joinMap : Set a, (a -> Set b) -> Set b where a implements Hash & Eq, b implements Hash & Eq
+joinMap : Set a, (a -> Set b) -> Set b
 joinMap = \set, transform ->
    init = withCapacity (capacity set) # Might be a pessimization

@ -343,7 +350,7 @@ joinMap = \set, transform ->
 ##
 ## expect result == FoundTheAnswer
 ## ```
-walkUntil : Set k, state, (state, k -> [Continue state, Break state]) -> state where k implements Hash & Eq
+walkUntil : Set k, state, (state, k -> [Continue state, Break state]) -> state
 walkUntil = \@Set dict, state, step ->
    Dict.walkUntil dict state (\s, k, _ -> step s k)

--- a/crates/compiler/gen_llvm/src/llvm/build.rs
+++ b/crates/compiler/gen_llvm/src/llvm/build.rs
@ -1058,6 +1058,50 @@ pub fn module_from_builtins<'ctx>(
    let module = Module::parse_bitcode_from_buffer(&memory_buffer, ctx)
        .unwrap_or_else(|err| panic!("Unable to import builtins bitcode. LLVM error: {err:?}"));

+    // In my testing, this adds about 20ms extra to compilation.
+    // Long term it would be best if we could do this on the zig side.
+    // This change enables us to dce all the parts of compiler-rt we don't use.
+    // That said, it would be better to dce them before roc app compiltation time.
+    // Anything not depended on by a `roc_builtin.` function could alread by DCE'd theoretically.
+    // That said, this workaround is good enough and fixes compilations times.
+
+    // Also, must_keep is the functions we depend on that would normally be provide by libc.
+    // They are magically linked to by llvm builtins, so we must specify that they can't be DCE'd.
+    let must_keep = [
+        "_fltused",
+        "floorf",
+        "memcpy",
+        "memset",
+        // Roc special functions
+        "__roc_force_longjmp",
+        "__roc_force_setjmp",
+        "set_shared_buffer",
+    ];
+    for func in module.get_functions() {
+        let has_definition = func.count_basic_blocks() > 0;
+        let name = func.get_name().to_string_lossy();
+        if has_definition
+            && !name.starts_with("roc_builtins.")
+            && !must_keep.contains(&name.as_ref())
+        {
+            func.set_linkage(Linkage::Private);
+        }
+    }
+
+    // Note, running DCE here is faster then waiting until full app DCE.
+    let mpm = PassManager::create(());
+    mpm.add_global_dce_pass();
+    mpm.run_on(&module);
+
+    // Now that the unused compiler-rt functions have been removed,
+    // mark that the builtin functions are allowed to be DCE'd if they aren't used.
+    for func in module.get_functions() {
+        let name = func.get_name().to_string_lossy();
+        if name.starts_with("roc_builtins.") {
+            func.set_linkage(Linkage::Private);
+        }
+    }
+
    // Add LLVM intrinsics.
    add_intrinsics(ctx, &module);

--- a/crates/compiler/module/src/low_level.rs
+++ b/crates/compiler/module/src/low_level.rs
@ -183,7 +183,7 @@ impl LowLevelWrapperType {

 /// We use a rust macro to ensure that every LowLevel gets handled
 macro_rules! map_symbol_to_lowlevel {
-    ($($lowlevel:ident <= $symbol:ident),* $(,)?) => {
+    ($($lowlevel:ident <= $($symbol:ident),+);* $(;)?) => {

        fn for_symbol_help(symbol: Symbol) -> LowLevelWrapperType {
            use $crate::low_level::LowLevelWrapperType::*;
@ -191,14 +191,14 @@ macro_rules! map_symbol_to_lowlevel {
            // expands to a big (but non-exhaustive) match on symbols and maps them to a lowlevel
            match symbol {
                $(
-                Symbol::$symbol => CanBeReplacedBy(LowLevel::$lowlevel),
+                $(Symbol::$symbol)|+ => CanBeReplacedBy(LowLevel::$lowlevel),
                )*

                _ => NotALowLevelWrapper,
            }
        }

-        fn _enforce_exhaustiveness(lowlevel: LowLevel) -> Symbol {
+        fn _enforce_exhaustiveness(lowlevel: LowLevel) -> &'static [Symbol] {
            // when adding a new lowlevel, this match will stop being exhaustive, and give a
            // compiler error. Most likely, you are adding a new lowlevel that maps directly to a
            // symbol. For instance, you want to have `List.foo` to stand for the `ListFoo`
@ -209,7 +209,7 @@ macro_rules! map_symbol_to_lowlevel {
            // that it isn't and just see if that works.
            match lowlevel {
                $(
-                LowLevel::$lowlevel => Symbol::$symbol,
+                LowLevel::$lowlevel => &[$(Symbol::$symbol),+],
                )*

                // these are higher-order lowlevels. these need the surrounding
@ -259,107 +259,107 @@ macro_rules! map_symbol_to_lowlevel {
 // pattern of a symbol mapping directly to a lowlevel. In other words, most lowlevels (left) are generated
 // by only one specific symbol (right)
 map_symbol_to_lowlevel! {
-    StrConcat <= STR_CONCAT,
-    StrJoinWith <= STR_JOIN_WITH,
-    StrIsEmpty <= STR_IS_EMPTY,
-    StrStartsWith <= STR_STARTS_WITH,
-    StrStartsWithScalar <= STR_STARTS_WITH_SCALAR,
-    StrEndsWith <= STR_ENDS_WITH,
-    StrSplit <= STR_SPLIT,
-    StrCountGraphemes <= STR_COUNT_GRAPHEMES,
-    StrCountUtf8Bytes <= STR_COUNT_UTF8_BYTES,
-    StrFromUtf8Range <= STR_FROM_UTF8_RANGE_LOWLEVEL,
-    StrToUtf8 <= STR_TO_UTF8,
-    StrRepeat <= STR_REPEAT,
-    StrTrim <= STR_TRIM,
-    StrTrimStart <= STR_TRIM_START,
-    StrTrimEnd <= STR_TRIM_END,
-    StrToScalars <= STR_TO_SCALARS,
-    StrGetUnsafe <= STR_GET_UNSAFE,
-    StrSubstringUnsafe <= STR_SUBSTRING_UNSAFE,
-    StrReserve <= STR_RESERVE,
-    StrAppendScalar <= STR_APPEND_SCALAR_UNSAFE,
-    StrGetScalarUnsafe <= STR_GET_SCALAR_UNSAFE,
-    StrToNum <= STR_TO_NUM,
-    StrGetCapacity <= STR_CAPACITY,
-    StrWithCapacity <= STR_WITH_CAPACITY,
-    StrGraphemes <= STR_GRAPHEMES,
-    StrReleaseExcessCapacity <= STR_RELEASE_EXCESS_CAPACITY,
-    ListLen <= LIST_LEN,
-    ListGetCapacity <= LIST_CAPACITY,
-    ListWithCapacity <= LIST_WITH_CAPACITY,
-    ListReserve <= LIST_RESERVE,
-    ListReleaseExcessCapacity <= LIST_RELEASE_EXCESS_CAPACITY,
-    ListIsUnique <= LIST_IS_UNIQUE,
-    ListAppendUnsafe <= LIST_APPEND_UNSAFE,
-    ListPrepend <= LIST_PREPEND,
-    ListGetUnsafe <= LIST_GET_UNSAFE,
-    ListReplaceUnsafe <= LIST_REPLACE_UNSAFE,
-    ListConcat <= LIST_CONCAT,
-    ListSublist <= LIST_SUBLIST_LOWLEVEL,
-    ListDropAt <= LIST_DROP_AT,
-    ListSwap <= LIST_SWAP,
-    NumAdd <= NUM_ADD,
-    NumAddWrap <= NUM_ADD_WRAP,
-    NumAddChecked <= NUM_ADD_CHECKED_LOWLEVEL,
-    NumAddSaturated <= NUM_ADD_SATURATED,
-    NumSub <= NUM_SUB,
-    NumSubWrap <= NUM_SUB_WRAP,
-    NumSubChecked <= NUM_SUB_CHECKED_LOWLEVEL,
-    NumSubSaturated <= NUM_SUB_SATURATED,
-    NumMul <= NUM_MUL,
-    NumMulWrap <= NUM_MUL_WRAP,
-    NumMulSaturated <= NUM_MUL_SATURATED,
-    NumMulChecked <= NUM_MUL_CHECKED_LOWLEVEL,
-    NumGt <= NUM_GT,
-    NumGte <= NUM_GTE,
-    NumLt <= NUM_LT,
-    NumLte <= NUM_LTE,
-    NumCompare <= NUM_COMPARE,
-    NumDivFrac <= NUM_DIV_FRAC,
-    NumDivCeilUnchecked <= NUM_DIV_CEIL,
-    NumDivTruncUnchecked <= NUM_DIV_TRUNC,
-    NumRemUnchecked <= NUM_REM,
-    NumIsMultipleOf <= NUM_IS_MULTIPLE_OF,
-    NumAbs <= NUM_ABS,
-    NumNeg <= NUM_NEG,
-    NumSin <= NUM_SIN,
-    NumCos <= NUM_COS,
-    NumTan <= NUM_TAN,
-    NumSqrtUnchecked <= NUM_SQRT,
-    NumLogUnchecked <= NUM_LOG,
-    NumRound <= NUM_ROUND,
-    NumToFrac <= NUM_TO_FRAC,
-    NumIsNan <= NUM_IS_NAN,
-    NumIsInfinite <= NUM_IS_INFINITE,
-    NumIsFinite <= NUM_IS_FINITE,
-    NumPow <= NUM_POW,
-    NumCeiling <= NUM_CEILING,
-    NumPowInt <= NUM_POW_INT,
-    NumFloor <= NUM_FLOOR,
-    NumAtan <= NUM_ATAN,
-    NumAcos <= NUM_ACOS,
-    NumAsin <= NUM_ASIN,
-    NumBytesToU16 <= NUM_BYTES_TO_U16_LOWLEVEL,
-    NumBytesToU32 <= NUM_BYTES_TO_U32_LOWLEVEL,
-    NumBytesToU64 <= NUM_BYTES_TO_U64_LOWLEVEL,
-    NumBytesToU128 <= NUM_BYTES_TO_U128_LOWLEVEL,
-    NumBitwiseAnd <= NUM_BITWISE_AND,
-    NumBitwiseXor <= NUM_BITWISE_XOR,
-    NumBitwiseOr <= NUM_BITWISE_OR,
-    NumShiftLeftBy <= NUM_SHIFT_LEFT,
-    NumShiftRightBy <= NUM_SHIFT_RIGHT,
-    NumShiftRightZfBy <= NUM_SHIFT_RIGHT_ZERO_FILL,
-    NumToStr <= NUM_TO_STR,
-    NumCountLeadingZeroBits <= NUM_COUNT_LEADING_ZERO_BITS,
-    NumCountTrailingZeroBits <= NUM_COUNT_TRAILING_ZERO_BITS,
-    NumCountOneBits <= NUM_COUNT_ONE_BITS,
-    I128OfDec <= I128_OF_DEC,
-    Eq <= BOOL_STRUCTURAL_EQ,
-    NotEq <= BOOL_STRUCTURAL_NOT_EQ,
-    And <= BOOL_AND,
-    Or <= BOOL_OR,
-    Not <= BOOL_NOT,
-    Unreachable <= LIST_UNREACHABLE,
-    DictPseudoSeed <= DICT_PSEUDO_SEED,
+    StrConcat <= STR_CONCAT;
+    StrJoinWith <= STR_JOIN_WITH;
+    StrIsEmpty <= STR_IS_EMPTY;
+    StrStartsWith <= STR_STARTS_WITH;
+    StrStartsWithScalar <= STR_STARTS_WITH_SCALAR;
+    StrEndsWith <= STR_ENDS_WITH;
+    StrSplit <= STR_SPLIT;
+    StrCountGraphemes <= STR_COUNT_GRAPHEMES;
+    StrCountUtf8Bytes <= STR_COUNT_UTF8_BYTES;
+    StrFromUtf8Range <= STR_FROM_UTF8_RANGE_LOWLEVEL;
+    StrToUtf8 <= STR_TO_UTF8;
+    StrRepeat <= STR_REPEAT;
+    StrTrim <= STR_TRIM;
+    StrTrimStart <= STR_TRIM_START;
+    StrTrimEnd <= STR_TRIM_END;
+    StrToScalars <= STR_TO_SCALARS;
+    StrGetUnsafe <= STR_GET_UNSAFE;
+    StrSubstringUnsafe <= STR_SUBSTRING_UNSAFE;
+    StrReserve <= STR_RESERVE;
+    StrAppendScalar <= STR_APPEND_SCALAR_UNSAFE;
+    StrGetScalarUnsafe <= STR_GET_SCALAR_UNSAFE;
+    StrToNum <= STR_TO_NUM;
+    StrGetCapacity <= STR_CAPACITY;
+    StrWithCapacity <= STR_WITH_CAPACITY;
+    StrGraphemes <= STR_GRAPHEMES;
+    StrReleaseExcessCapacity <= STR_RELEASE_EXCESS_CAPACITY;
+    ListLen <= LIST_LEN;
+    ListGetCapacity <= LIST_CAPACITY;
+    ListWithCapacity <= LIST_WITH_CAPACITY;
+    ListReserve <= LIST_RESERVE;
+    ListReleaseExcessCapacity <= LIST_RELEASE_EXCESS_CAPACITY;
+    ListIsUnique <= LIST_IS_UNIQUE;
+    ListAppendUnsafe <= LIST_APPEND_UNSAFE;
+    ListPrepend <= LIST_PREPEND;
+    ListGetUnsafe <= LIST_GET_UNSAFE, DICT_LIST_GET_UNSAFE;
+    ListReplaceUnsafe <= LIST_REPLACE_UNSAFE;
+    ListConcat <= LIST_CONCAT;
+    ListSublist <= LIST_SUBLIST_LOWLEVEL;
+    ListDropAt <= LIST_DROP_AT;
+    ListSwap <= LIST_SWAP;
+    NumAdd <= NUM_ADD;
+    NumAddWrap <= NUM_ADD_WRAP;
+    NumAddChecked <= NUM_ADD_CHECKED_LOWLEVEL;
+    NumAddSaturated <= NUM_ADD_SATURATED;
+    NumSub <= NUM_SUB;
+    NumSubWrap <= NUM_SUB_WRAP;
+    NumSubChecked <= NUM_SUB_CHECKED_LOWLEVEL;
+    NumSubSaturated <= NUM_SUB_SATURATED;
+    NumMul <= NUM_MUL;
+    NumMulWrap <= NUM_MUL_WRAP;
+    NumMulSaturated <= NUM_MUL_SATURATED;
+    NumMulChecked <= NUM_MUL_CHECKED_LOWLEVEL;
+    NumGt <= NUM_GT;
+    NumGte <= NUM_GTE;
+    NumLt <= NUM_LT;
+    NumLte <= NUM_LTE;
+    NumCompare <= NUM_COMPARE;
+    NumDivFrac <= NUM_DIV_FRAC;
+    NumDivCeilUnchecked <= NUM_DIV_CEIL;
+    NumDivTruncUnchecked <= NUM_DIV_TRUNC;
+    NumRemUnchecked <= NUM_REM;
+    NumIsMultipleOf <= NUM_IS_MULTIPLE_OF;
+    NumAbs <= NUM_ABS;
+    NumNeg <= NUM_NEG;
+    NumSin <= NUM_SIN;
+    NumCos <= NUM_COS;
+    NumTan <= NUM_TAN;
+    NumSqrtUnchecked <= NUM_SQRT;
+    NumLogUnchecked <= NUM_LOG;
+    NumRound <= NUM_ROUND;
+    NumToFrac <= NUM_TO_FRAC;
+    NumIsNan <= NUM_IS_NAN;
+    NumIsInfinite <= NUM_IS_INFINITE;
+    NumIsFinite <= NUM_IS_FINITE;
+    NumPow <= NUM_POW;
+    NumCeiling <= NUM_CEILING;
+    NumPowInt <= NUM_POW_INT;
+    NumFloor <= NUM_FLOOR;
+    NumAtan <= NUM_ATAN;
+    NumAcos <= NUM_ACOS;
+    NumAsin <= NUM_ASIN;
+    NumBytesToU16 <= NUM_BYTES_TO_U16_LOWLEVEL;
+    NumBytesToU32 <= NUM_BYTES_TO_U32_LOWLEVEL;
+    NumBytesToU64 <= NUM_BYTES_TO_U64_LOWLEVEL;
+    NumBytesToU128 <= NUM_BYTES_TO_U128_LOWLEVEL;
+    NumBitwiseAnd <= NUM_BITWISE_AND;
+    NumBitwiseXor <= NUM_BITWISE_XOR;
+    NumBitwiseOr <= NUM_BITWISE_OR;
+    NumShiftLeftBy <= NUM_SHIFT_LEFT;
+    NumShiftRightBy <= NUM_SHIFT_RIGHT;
+    NumShiftRightZfBy <= NUM_SHIFT_RIGHT_ZERO_FILL;
+    NumToStr <= NUM_TO_STR;
+    NumCountLeadingZeroBits <= NUM_COUNT_LEADING_ZERO_BITS;
+    NumCountTrailingZeroBits <= NUM_COUNT_TRAILING_ZERO_BITS;
+    NumCountOneBits <= NUM_COUNT_ONE_BITS;
+    I128OfDec <= I128_OF_DEC;
+    Eq <= BOOL_STRUCTURAL_EQ;
+    NotEq <= BOOL_STRUCTURAL_NOT_EQ;
+    And <= BOOL_AND;
+    Or <= BOOL_OR;
+    Not <= BOOL_NOT;
+    Unreachable <= LIST_UNREACHABLE;
+    DictPseudoSeed <= DICT_PSEUDO_SEED;
 }
--- a/crates/compiler/module/src/symbol.rs
+++ b/crates/compiler/module/src/symbol.rs
@ -1486,6 +1486,7 @@ define_builtins! {
        26 DICT_JOINMAP: "joinMap"
        27 DICT_KEEP_IF: "keepIf"
        28 DICT_DROP_IF: "dropIf"
+        29 DICT_RESERVE: "reserve"
    }
    9 SET: "Set" => {
        0 SET_SET: "Set" exposed_type=true // the Set.Set type alias
@ -1510,6 +1511,8 @@ define_builtins! {
        19 SET_JOIN_MAP: "joinMap"
        20 SET_KEEP_IF: "keepIf"
        21 SET_DROP_IF: "dropIf"
+        22 SET_WITH_CAPACITY: "withCapacity"
+        23 SET_RESERVE: "reserve"
    }
    10 BOX: "Box" => {
        0 BOX_BOX_TYPE: "Box" exposed_apply_type=true // the Box.Box opaque type
--- a/crates/compiler/test_gen/src/gen_num.rs
+++ b/crates/compiler/test_gen/src/gen_num.rs
@ -1813,6 +1813,7 @@ fn ceiling() {
 #[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))]
 fn floor() {
    assert_evals_to!("Num.floor 1.9f64", 1, i64);
+    assert_evals_to!("Num.floor -1.9f64", -2, i64);
 }

 #[test]
--- a/crates/compiler/test_mono/generated/dict.txt
+++ b/crates/compiler/test_mono/generated/dict.txt
@ -1,46 +1,40 @@
-procedure Dict.1 (Dict.596):
-    let Dict.606 : List {[], []} = Array [];
-    let Dict.35 : List U64 = CallByName Dict.35;
-    let Dict.34 : List I8 = CallByName Dict.34;
-    let Dict.605 : {List {[], []}, List U64, List I8} = Struct {Dict.606, Dict.35, Dict.34};
-    ret Dict.605;
+procedure Dict.1 (Dict.679):
+    let Dict.688 : List {U32, U32} = Array [];
+    let Dict.689 : List {[], []} = Array [];
+    let Dict.690 : U64 = 0i64;
+    let Dict.41 : Float32 = CallByName Dict.41;
+    let Dict.42 : U8 = CallByName Dict.42;
+    let Dict.687 : {List {U32, U32}, List {[], []}, U64, Float32, U8} = Struct {Dict.688, Dict.689, Dict.690, Dict.41, Dict.42};
+    ret Dict.687;

-procedure Dict.34 ():
-    let Dict.608 : I8 = CallByName Dict.46;
-    let Dict.609 : I8 = CallByName Dict.46;
-    let Dict.610 : I8 = CallByName Dict.46;
-    let Dict.611 : I8 = CallByName Dict.46;
-    let Dict.612 : I8 = CallByName Dict.46;
-    let Dict.613 : I8 = CallByName Dict.46;
-    let Dict.614 : I8 = CallByName Dict.46;
-    let Dict.615 : I8 = CallByName Dict.46;
-    let Dict.607 : List I8 = Array [Dict.608, Dict.609, Dict.610, Dict.611, Dict.612, Dict.613, Dict.614, Dict.615];
-    ret Dict.607;
-
-procedure Dict.35 ():
-    let Dict.617 : List U64 = Array [0i64, 0i64, 0i64, 0i64, 0i64, 0i64, 0i64, 0i64];
-    ret Dict.617;
-
-procedure Dict.4 (Dict.603):
-    let Dict.114 : List {[], []} = StructAtIndex 0 Dict.603;
-    let #Derived_gen.1 : List U64 = StructAtIndex 1 Dict.603;
-    dec #Derived_gen.1;
-    let #Derived_gen.0 : List I8 = StructAtIndex 2 Dict.603;
+procedure Dict.4 (Dict.685):
+    let Dict.138 : List {[], []} = StructAtIndex 1 Dict.685;
+    let #Derived_gen.0 : List {U32, U32} = StructAtIndex 0 Dict.685;
    dec #Derived_gen.0;
-    let Dict.604 : U64 = CallByName List.6 Dict.114;
-    dec Dict.114;
-    ret Dict.604;
+    let Dict.686 : U64 = CallByName List.6 Dict.138;
+    dec Dict.138;
+    ret Dict.686;

-procedure Dict.46 ():
-    let Dict.616 : I8 = -128i64;
-    ret Dict.616;
+procedure Dict.41 ():
+    let Dict.694 : Float32 = 0.8f64;
+    ret Dict.694;
+
+procedure Dict.42 ():
+    let Dict.692 : U8 = 64i64;
+    let Dict.693 : U8 = 3i64;
+    let Dict.691 : U8 = CallByName Num.20 Dict.692 Dict.693;
+    ret Dict.691;

 procedure List.6 (#Attr.2):
    let List.553 : U64 = lowlevel ListLen #Attr.2;
    ret List.553;

+procedure Num.20 (#Attr.2, #Attr.3):
+    let Num.291 : U8 = lowlevel NumSub #Attr.2 #Attr.3;
+    ret Num.291;
+
 procedure Test.0 ():
    let Test.3 : {} = Struct {};
-    let Test.2 : {List {[], []}, List U64, List I8} = CallByName Dict.1 Test.3;
+    let Test.2 : {List {U32, U32}, List {[], []}, U64, Float32, U8} = CallByName Dict.1 Test.3;
    let Test.1 : U64 = CallByName Dict.4 Test.2;
    ret Test.1;
--- a/crates/compiler/test_mono/generated/inspect_derived_dict.txt
+++ b/crates/compiler/test_mono/generated/inspect_derived_dict.txt
--- a/crates/compiler/uitest/tests/ability/specialize/set_eq_issue_4761.txt
+++ b/crates/compiler/uitest/tests/ability/specialize/set_eq_issue_4761.txt
@ -8,5 +8,5 @@ main =
    s2 = Set.empty {}

    Bool.isEq s1 s1 && Bool.isEq s2 s2
-#                      ^^^^^^^^^ Set#Bool.isEq(22): Set Str, Set Str -[[Set.isEq(22)]]-> Bool
-#   ^^^^^^^^^ Set#Bool.isEq(22): Set U8, Set U8 -[[Set.isEq(22)]]-> Bool
+#                      ^^^^^^^^^ Set#Bool.isEq(24): Set Str, Set Str -[[Set.isEq(24)]]-> Bool
+#   ^^^^^^^^^ Set#Bool.isEq(24): Set U8, Set U8 -[[Set.isEq(24)]]-> Bool
--- a/crates/compiler/uitest/tests/recursion/generalization_among_large_recursive_group.txt
+++ b/crates/compiler/uitest/tests/recursion/generalization_among_large_recursive_group.txt
@ -3,22 +3,22 @@
 app "test" provides [main] to "./platform"

 f = \{} ->
-#^{-1} <2826><117>{} -<120>[[f(1)]]-> <116>[Ok <2834>{}]<80>*
+#^{-1} <2918><117>{} -<120>[[f(1)]]-> <116>[Ok <2926>{}]<80>*
    when g {} is
-#        ^ <2816><2834>{} -<2824>[[g(2)]]-> <72>[Ok <2834>{}]<102>*
+#        ^ <2908><2926>{} -<2916>[[g(2)]]-> <72>[Ok <2926>{}]<102>*
        _ -> Ok {}

 g = \{} ->
-#^{-1} <2816><2834>{} -<2824>[[g(2)]]-> <72>[Ok <2834>{}]<102>*
+#^{-1} <2908><2926>{} -<2916>[[g(2)]]-> <72>[Ok <2926>{}]<102>*
    when h {} is
-#        ^ <2821><2834>{} -<2829>[[h(3)]]-> <94>[Ok <2834>{}]<124>*
+#        ^ <2913><2926>{} -<2921>[[h(3)]]-> <94>[Ok <2926>{}]<124>*
        _ -> Ok {}

 h = \{} ->
-#^{-1} <2821><2834>{} -<2829>[[h(3)]]-> <94>[Ok <2834>{}]<124>*
+#^{-1} <2913><2926>{} -<2921>[[h(3)]]-> <94>[Ok <2926>{}]<124>*
    when f {} is
-#        ^ <2826><117>{} -<120>[[f(1)]]-> <116>[Ok <2834>{}]<80>*
+#        ^ <2918><117>{} -<120>[[f(1)]]-> <116>[Ok <2926>{}]<80>*
        _ -> Ok {}

 main = f {}
-#      ^ <2836><133>{} -<136>[[f(1)]]-> <138>[Ok <2834>{}]<2835>w_a
+#      ^ <2928><133>{} -<136>[[f(1)]]-> <138>[Ok <2926>{}]<2927>w_a
--- a/crates/linker/src/pe.rs
+++ b/crates/linker/src/pe.rs
@ -435,17 +435,87 @@ pub(crate) fn surgery_pe(executable_path: &Path, metadata_path: &Path, roc_app_b
                    );
                } else {
                    let is_ingested_compiler_rt = [
-                        "__muloti4",
+                        "__addtf3",
+                        "__ceilx",
+                        "__cmpdf2",
+                        "__cmphf2",
+                        "__cmpsf2",
+                        "__cmptf2",
+                        "__cmpxf2",
+                        "__cosx",
+                        "__divsf3",
+                        "__divtf3",
                        "__divti3",
-                        "__udivti3",
-                        "__modti3",
-                        "__umodti3",
+                        "__exp2x",
+                        "__expx",
+                        "__extendhfsf2",
+                        "__fabsx",
                        "__fixdfti",
                        "__fixsfti",
                        "__fixunsdfti",
                        "__fixunssfti",
+                        "__floorx",
+                        "__fmax",
+                        "__fmaxx",
+                        "__fminx",
+                        "__fmodx",
+                        "__gedf2",
+                        "__gehf2",
+                        "__gesf2",
+                        "__getf2",
+                        "__gexf2",
+                        "__log10x",
+                        "__log2x",
+                        "__logx",
                        "__lshrti3",
-                        "memcpy_decision",
+                        "__modti3",
+                        "__muloti4",
+                        "__multf3",
+                        "__roundx",
+                        "__sincosx",
+                        "__sinx",
+                        "__sqrtx",
+                        "__tanx",
+                        "__truncsfhf2",
+                        "__truncx",
+                        "__udivmoddi4",
+                        "__udivti3",
+                        "__umodti3",
+                        "ceilq",
+                        "cos",
+                        "cosf",
+                        "cosq",
+                        "exp",
+                        "exp2",
+                        "exp2q",
+                        "expf",
+                        "expq",
+                        "floor",
+                        "floorf",
+                        "floorq",
+                        "fmaq",
+                        "fmaxf",
+                        "fmaxl",
+                        "fmodf",
+                        "log10",
+                        "log10q",
+                        "log2",
+                        "log2q",
+                        "logq",
+                        "memcpy",
+                        "roundq",
+                        "sin",
+                        "sincos",
+                        "sincosf",
+                        "sincosq",
+                        "sinf",
+                        "sinq",
+                        "sqrt",
+                        "sqrtf",
+                        "sqrtq",
+                        "tan",
+                        "tanf",
+                        "tanq",
                    ]
                    .contains(&name.as_str());
                    if *address == 0 && !name.starts_with("roc") && !is_ingested_compiler_rt {
				`@ -1 +0,0 @@`
				`pub const memcpy = @import("musl/memcpy.zig").memcpy;`