Merge branch 'trunk' into editor-file-access

2024-09-22 16:30:04 +03:00 · 2020-12-04 00:26:12 -05:00 · 2020-12-04 00:26:12 -05:00 · 85e96a36d7
commit 85e96a36d7
parent 17f796a824 9167e03705
40 changed files with 9602 additions and 8552 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,5 @@
 target
+zig-cache
 .direnv
 *.rs.bk

@ -7,3 +8,4 @@ vgcore.*

 #editors
 .idea/
+
--- a/BUILDING_FROM_SOURCE.md
+++ b/BUILDING_FROM_SOURCE.md
@ -92,6 +92,8 @@ Now with nix installed you just need to run one command:

 > This may not output anything for a little while. This is normal, hang in there. Also make sure you are in the roc project root.

+> Also, if you're on NixOS you'll need to enable opengl at the system-wide level. You can do this in configuration.nix with `hardware.opengl.enable = true;`. If you don't do this, nix-shell will fail!
+
 You should be in a shell with everything needed to build already installed. Next run:

 `cargo run repl`
--- a/cli/src/repl/eval.rs
+++ b/cli/src/repl/eval.rs
@ -189,6 +189,11 @@ fn jit_to_ast_help<'a>(
            Content::Structure(FlatType::RecursiveTagUnion(_, _, _)) => {
                todo!("print recursive tag unions in the REPL")
            }
+            Content::Alias(_, _, actual) => {
+                let content = env.subs.get_without_compacting(*actual).content;
+
+                jit_to_ast_help(env, lib, main_fn_name, layout, &content)
+            }
            other => unreachable!("Weird content for Union layout: {:?}", other),
        },
        Layout::RecursiveUnion(_) | Layout::RecursivePointer => {
--- a/cli/tests/helpers.rs
+++ b/cli/tests/helpers.rs
@ -89,6 +89,25 @@ pub fn run_with_valgrind(args: &[&str]) -> (Out, String) {

    cmd.arg("--tool=memcheck");
    cmd.arg("--xml=yes");
+
+    // If you are having valgrind issues on MacOS, you may need to suppress some
+    // of the errors. Read more here: https://github.com/rtfeldman/roc/issues/746
+    if let Some(suppressions_file_os_str) = env::var_os("VALGRIND_SUPPRESSIONS") {
+        match suppressions_file_os_str.to_str() {
+            None => {
+                panic!("Could not determine suppression file location from OsStr");
+            }
+            Some(suppressions_file) => {
+                let mut buf = String::new();
+
+                buf.push_str("--suppressions=");
+                buf.push_str(suppressions_file);
+
+                cmd.arg(buf);
+            }
+        }
+    }
+
    cmd.arg(format!("--xml-file={}", filepath));

    for arg in args {
--- a/cli/tests/repl_eval.rs
+++ b/cli/tests/repl_eval.rs
@ -87,6 +87,11 @@ mod repl_eval {
        expect_success("1.1 + 2", "3.1 : F64");
    }

+    #[test]
+    fn num_rem() {
+        expect_success("299 % 10", "Ok 9 : Result Int [ DivByZero ]*");
+    }
+
    #[test]
    fn bool_in_record() {
        expect_success("{ x: 1 == 1 }", "{ x: True } : { x : Bool }");
--- a/compiler/build/Cargo.toml
+++ b/compiler/build/Cargo.toml
@ -54,3 +54,8 @@ maplit = "1.0.1"
 indoc = "0.3.3"
 quickcheck = "0.8"
 quickcheck_macros = "0.8"
+
+[features]
+target-arm = []
+target-aarch64 = []
+target-webassembly = []
--- a/compiler/build/src/link.rs
+++ b/compiler/build/src/link.rs
@ -42,26 +42,40 @@ pub fn link(
 pub fn rebuild_host(host_input_path: &Path) {
    let c_host_src = host_input_path.with_file_name("host.c");
    let c_host_dest = host_input_path.with_file_name("c_host.o");
+    let zig_host_src = host_input_path.with_file_name("host.zig");
    let rust_host_src = host_input_path.with_file_name("host.rs");
    let rust_host_dest = host_input_path.with_file_name("rust_host.o");
    let cargo_host_src = host_input_path.with_file_name("Cargo.toml");
    let host_dest = host_input_path.with_file_name("host.o");

    let env_path = env::var("PATH").unwrap_or_else(|_| "".to_string());
-    // Compile host.c
-    let output = Command::new("clang")
-        .env_clear()
-        .env("PATH", &env_path)
-        .args(&[
-            "-c",
-            c_host_src.to_str().unwrap(),
-            "-o",
-            c_host_dest.to_str().unwrap(),
-        ])
-        .output()
-        .unwrap();

-    validate_output("host.c", "clang", output);
+    if zig_host_src.exists() {
+        // Compile host.zig
+        let output = Command::new("zig")
+            .env_clear()
+            .env("PATH", &env_path)
+            .args(&["build-obj", zig_host_src.to_str().unwrap()])
+            .output()
+            .unwrap();
+
+        validate_output("host.zig", "zig", output);
+    } else {
+        // Compile host.c
+        let output = Command::new("clang")
+            .env_clear()
+            .env("PATH", &env_path)
+            .args(&[
+                "-c",
+                c_host_src.to_str().unwrap(),
+                "-o",
+                c_host_dest.to_str().unwrap(),
+            ])
+            .output()
+            .unwrap();
+
+        validate_output("host.c", "clang", output);
+    }

    if cargo_host_src.exists() {
        // Compile and link Cargo.toml, if it exists
@ -132,15 +146,15 @@ pub fn rebuild_host(host_input_path: &Path) {
            .unwrap();

        validate_output("rust_host.o", "rm", output);
-    } else {
-        // Clean up rust_host.o
+    } else if c_host_dest.exists() {
+        // Clean up c_host.o
        let output = Command::new("mv")
            .env_clear()
            .args(&[c_host_dest, host_dest])
            .output()
            .unwrap();

-        validate_output("rust_host.o", "mv", output);
+        validate_output("c_host.o", "mv", output);
    }
 }

--- a/compiler/build/src/target.rs
+++ b/compiler/build/src/target.rs
@ -41,7 +41,7 @@ pub fn arch_str(target: &Triple) -> &'static str {

            "x86-64"
        }
-        Architecture::Aarch64(_) => {
+        Architecture::Aarch64(_) if cfg!(feature = "target-aarch64") => {
            Target::initialize_aarch64(&InitializationConfig::default());
            "aarch64"
        }
--- a/compiler/builtins/bitcode/build.zig
+++ b/compiler/builtins/bitcode/build.zig
@ -38,7 +38,7 @@ pub fn build(b: *Builder) void {
    const ir_to_bitcode = b.addSystemCommand(&[_][]const u8{
        "llvm-as-10",
        ir_out_file,
-        bitcode_path_arg
+        bitcode_path_arg,
    });

    const bicode = b.step("bc", "Build LLVM ir and convert to bitcode");
--- a/compiler/builtins/bitcode/run-tests.sh
+++ b/compiler/builtins/bitcode/run-tests.sh
@ -1,6 +1,9 @@
 #!/bin/bash

-set -eux
+set -euxo pipefail

 # Test every zig
-find src/*.zig -type f -exec zig test {} \;
+find src/*.zig -type f -print0 | xargs -n 1 -0 zig test --library c
+
+# fmt every zig
+find src/*.zig -type f -print0 | xargs -n 1 -0 zig fmt --check
--- a/compiler/builtins/bitcode/src/helpers/grapheme.zig
+++ b/compiler/builtins/bitcode/src/helpers/grapheme.zig
--- a/compiler/builtins/bitcode/src/main.zig
+++ b/compiler/builtins/bitcode/src/main.zig
@ -4,28 +4,36 @@ const testing = std.testing;

 // Num Module
 const num = @import("num.zig");
-comptime { exportNumFn(num.atan, "atan"); }
-comptime { exportNumFn(num.isFinite, "is_finite"); }
-comptime { exportNumFn(num.powInt, "pow_int"); }
-comptime { exportNumFn(num.acos, "acos"); }
-comptime { exportNumFn(num.asin, "asin"); }
+comptime {
+    exportNumFn(num.atan, "atan");
+    exportNumFn(num.isFinite, "is_finite");
+    exportNumFn(num.powInt, "pow_int");
+    exportNumFn(num.acos, "acos");
+    exportNumFn(num.asin, "asin");
+}

 // Str Module
 const str = @import("str.zig");
-comptime { exportStrFn(str.strSplitInPlace, "str_split_in_place"); }
-comptime { exportStrFn(str.countSegments, "count_segments"); }
-comptime { exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters"); }
-comptime { exportStrFn(str.startsWith, "starts_with"); }
+comptime {
+    exportStrFn(str.strSplitInPlace, "str_split_in_place");
+    exportStrFn(str.countSegments, "count_segments");
+    exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");
+    exportStrFn(str.startsWith, "starts_with");
+    exportStrFn(str.endsWith, "ends_with");
+    exportStrFn(str.strConcat, "concat");
+    exportStrFn(str.strNumberOfBytes, "number_of_bytes");
+    exportStrFn(str.strFromInt, "from_int");
+}

 // Export helpers - Must be run inside a comptime
-fn exportBuiltinFn(comptime fn_target: anytype, comptime fn_name: []const u8) void {
-    @export(fn_target, .{ .name = "roc_builtins." ++ fn_name, .linkage = .Strong  });
+fn exportBuiltinFn(comptime func: anytype, comptime funcName: []const u8) void {
+    @export(func, .{ .name = "roc_builtins." ++ funcName, .linkage = .Strong });
 }
-fn exportNumFn(comptime fn_target: anytype, comptime fn_name: []const u8) void {
-    exportBuiltinFn(fn_target, "num." ++ fn_name);
+fn exportNumFn(comptime func: anytype, comptime funcName: []const u8) void {
+    exportBuiltinFn(func, "num." ++ funcName);
 }
-fn exportStrFn(comptime fn_target: anytype, comptime fn_name: []const u8) void {
-    exportBuiltinFn(fn_target, "str." ++ fn_name);
+fn exportStrFn(comptime func: anytype, comptime funcName: []const u8) void {
+    exportBuiltinFn(func, "str." ++ funcName);
 }

 // Run all tests in imported modules
--- a/compiler/builtins/bitcode/src/str.zig
+++ b/compiler/builtins/bitcode/src/str.zig
--- a/compiler/builtins/src/bitcode.rs
+++ b/compiler/builtins/src/bitcode.rs
@ -24,6 +24,10 @@ pub const NUM_IS_FINITE: &str = "roc_builtins.num.is_finite";
 pub const NUM_POW_INT: &str = "roc_builtins.num.pow_int";

 pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
+pub const STR_CONCAT: &str = "roc_builtins.str.concat";
 pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
 pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";
 pub const STR_STARTS_WITH: &str = "roc_builtins.str.starts_with";
+pub const STR_ENDS_WITH: &str = "roc_builtins.str.ends_with";
+pub const STR_NUMBER_OF_BYTES: &str = "roc_builtins.str.number_of_bytes";
+pub const STR_FROM_INT: &str = "roc_builtins.str.from_int";
--- a/compiler/builtins/src/std.rs
+++ b/compiler/builtins/src/std.rs
@ -417,12 +417,24 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
        top_level_function(vec![str_type(), str_type()], Box::new(bool_type())),
    );

+    // endsWith : Str, Str -> Bool
+    add_type(
+        Symbol::STR_ENDS_WITH,
+        top_level_function(vec![str_type(), str_type()], Box::new(bool_type())),
+    );
+
    // countGraphemes : Str -> Int
    add_type(
        Symbol::STR_COUNT_GRAPHEMES,
        top_level_function(vec![str_type()], Box::new(int_type())),
    );

+    // fromInt : Int -> Str
+    add_type(
+        Symbol::STR_FROM_INT,
+        top_level_function(vec![int_type()], Box::new(str_type())),
+    );
+
    // List module

    // get : List elem, Int -> Result elem [ OutOfBounds ]*
--- a/compiler/builtins/src/unique.rs
+++ b/compiler/builtins/src/unique.rs
@ -1096,12 +1096,24 @@ pub fn types() -> MutMap<Symbol, (SolvedType, Region)> {
        unique_function(vec![str_type(star1), str_type(star2)], bool_type(star3))
    });

+    // Str.endsWith : Attr * Str, Attr * Str -> Attr * Bool
+    add_type(Symbol::STR_ENDS_WITH, {
+        let_tvars! { star1, star2, star3 };
+        unique_function(vec![str_type(star1), str_type(star2)], bool_type(star3))
+    });
+
    // Str.countGraphemes : Attr * Str, -> Attr * Int
    add_type(Symbol::STR_COUNT_GRAPHEMES, {
        let_tvars! { star1, star2 };
        unique_function(vec![str_type(star1)], int_type(star2))
    });

+    // fromInt : Attr * Int -> Attr * Str
+    add_type(Symbol::STR_FROM_INT, {
+        let_tvars! { star1, star2 };
+        unique_function(vec![int_type(star1)], str_type(star2))
+    });
+
    // Result module

    // map : Attr * (Result (Attr a e))
--- a/compiler/can/src/builtins.rs
+++ b/compiler/can/src/builtins.rs
@ -54,7 +54,9 @@ pub fn builtin_defs(var_store: &mut VarStore) -> MutMap<Symbol, Def> {
        Symbol::STR_SPLIT => str_split,
        Symbol::STR_IS_EMPTY => str_is_empty,
        Symbol::STR_STARTS_WITH => str_starts_with,
+        Symbol::STR_ENDS_WITH => str_ends_with,
        Symbol::STR_COUNT_GRAPHEMES => str_count_graphemes,
+        Symbol::STR_FROM_INT => str_from_int,
        Symbol::LIST_LEN => list_len,
        Symbol::LIST_GET => list_get,
        Symbol::LIST_SET => list_set,
@ -989,6 +991,26 @@ fn str_starts_with(symbol: Symbol, var_store: &mut VarStore) -> Def {
    )
 }

+/// Str.endsWith : Str, Str -> Bool
+fn str_ends_with(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    let str_var = var_store.fresh();
+    let bool_var = var_store.fresh();
+
+    let body = RunLowLevel {
+        op: LowLevel::StrEndsWith,
+        args: vec![(str_var, Var(Symbol::ARG_1)), (str_var, Var(Symbol::ARG_2))],
+        ret_var: bool_var,
+    };
+
+    defn(
+        symbol,
+        vec![(str_var, Symbol::ARG_1), (str_var, Symbol::ARG_2)],
+        var_store,
+        body,
+        bool_var,
+    )
+}
+
 /// Str.countGraphemes : Str -> Int
 fn str_count_graphemes(symbol: Symbol, var_store: &mut VarStore) -> Def {
    let str_var = var_store.fresh();
@ -1009,6 +1031,26 @@ fn str_count_graphemes(symbol: Symbol, var_store: &mut VarStore) -> Def {
    )
 }

+/// Str.fromInt : Int -> Str
+fn str_from_int(symbol: Symbol, var_store: &mut VarStore) -> Def {
+    let int_var = var_store.fresh();
+    let str_var = var_store.fresh();
+
+    let body = RunLowLevel {
+        op: LowLevel::StrFromInt,
+        args: vec![(int_var, Var(Symbol::ARG_1))],
+        ret_var: str_var,
+    };
+
+    defn(
+        symbol,
+        vec![(int_var, Symbol::ARG_1)],
+        var_store,
+        body,
+        str_var,
+    )
+}
+
 /// List.concat : List elem, List elem -> List elem
 fn list_concat(symbol: Symbol, var_store: &mut VarStore) -> Def {
    let list_var = var_store.fresh();
--- a/compiler/gen/src/llvm/build.rs
+++ b/compiler/gen/src/llvm/build.rs
@ -4,7 +4,8 @@ use crate::llvm::build_list::{
    list_reverse, list_set, list_single, list_sum, list_walk, list_walk_backwards,
 };
 use crate::llvm::build_str::{
-    str_concat, str_count_graphemes, str_len, str_split, str_starts_with, CHAR_LAYOUT,
+    str_concat, str_count_graphemes, str_ends_with, str_from_int, str_number_of_bytes, str_split,
+    str_starts_with, CHAR_LAYOUT,
 };
 use crate::llvm::compare::{build_eq, build_neq};
 use crate::llvm::convert::{
@ -604,7 +605,9 @@ pub fn build_exp_expr<'a, 'ctx, 'env>(

    match expr {
        Literal(literal) => build_exp_literal(env, literal),
-        RunLowLevel(op, symbols) => run_low_level(env, scope, parent, layout, *op, symbols),
+        RunLowLevel(op, symbols) => {
+            run_low_level(env, layout_ids, scope, parent, layout, *op, symbols)
+        }

        ForeignCall {
            foreign_symbol,
@ -1165,12 +1168,10 @@ fn list_literal<'a, 'ctx, 'env>(
    let builder = env.builder;

    let len_u64 = elems.len() as u64;
-    let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64;

    let ptr = {
-        let bytes_len = elem_bytes * len_u64;
        let len_type = env.ptr_int();
-        let len = len_type.const_int(bytes_len, false);
+        let len = len_type.const_int(len_u64, false);

        allocate_list(env, inplace, elem_layout, len)

@ -2383,6 +2384,7 @@ fn call_with_args<'a, 'ctx, 'env>(
 }

 #[derive(Copy, Clone)]
+#[repr(u8)]
 pub enum InPlace {
    InPlace,
    Clone,
@ -2409,6 +2411,7 @@ pub static COLD_CALL_CONV: u32 = 9;

 fn run_low_level<'a, 'ctx, 'env>(
    env: &Env<'a, 'ctx, 'env>,
+    layout_ids: &mut LayoutIds<'a>,
    scope: &Scope<'a, 'ctx>,
    parent: FunctionValue<'ctx>,
    layout: &Layout<'a>,
@ -2424,15 +2427,25 @@ fn run_low_level<'a, 'ctx, 'env>(

            let inplace = get_inplace_from_layout(layout);

-            str_concat(env, inplace, scope, parent, args[0], args[1])
+            str_concat(env, inplace, scope, args[0], args[1])
        }
        StrStartsWith => {
            // Str.startsWith : Str, Str -> Bool
            debug_assert_eq!(args.len(), 2);

-            let inplace = get_inplace_from_layout(layout);
+            str_starts_with(env, scope, args[0], args[1])
+        }
+        StrEndsWith => {
+            // Str.startsWith : Str, Str -> Bool
+            debug_assert_eq!(args.len(), 2);

-            str_starts_with(env, inplace, scope, parent, args[0], args[1])
+            str_ends_with(env, scope, args[0], args[1])
+        }
+        StrFromInt => {
+            // Str.fromInt : Int -> Str
+            debug_assert_eq!(args.len(), 1);
+
+            str_from_int(env, scope, args[0])
        }
        StrSplit => {
            // Str.split : Str, Str -> List Str
@ -2440,14 +2453,13 @@ fn run_low_level<'a, 'ctx, 'env>(

            let inplace = get_inplace_from_layout(layout);

-            str_split(env, scope, parent, inplace, args[0], args[1])
+            str_split(env, scope, inplace, args[0], args[1])
        }
        StrIsEmpty => {
            // Str.isEmpty : Str -> Str
            debug_assert_eq!(args.len(), 1);

-            let wrapper_ptr = ptr_from_symbol(scope, args[0]);
-            let len = str_len(env, parent, *wrapper_ptr);
+            let len = str_number_of_bytes(env, scope, args[0]);
            let is_zero = env.builder.build_int_compare(
                IntPredicate::EQ,
                len,
@ -2460,7 +2472,7 @@ fn run_low_level<'a, 'ctx, 'env>(
            // Str.countGraphemes : Str -> Int
            debug_assert_eq!(args.len(), 1);

-            str_count_graphemes(env, scope, parent, args[0])
+            str_count_graphemes(env, scope, args[0])
        }
        ListLen => {
            // List.len : List * -> Int
@ -2522,7 +2534,16 @@ fn run_low_level<'a, 'ctx, 'env>(

            let inplace = get_inplace_from_layout(layout);

-            list_map(env, inplace, parent, func, func_layout, list, list_layout)
+            list_map(
+                env,
+                layout_ids,
+                inplace,
+                parent,
+                func,
+                func_layout,
+                list,
+                list_layout,
+            )
        }
        ListKeepIf => {
            // List.keepIf : List elem, (elem -> Bool) -> List elem
--- a/compiler/gen/src/llvm/build_list.rs
+++ b/compiler/gen/src/llvm/build_list.rs
@ -3,12 +3,13 @@ use crate::llvm::build::{
 };
 use crate::llvm::compare::build_eq;
 use crate::llvm::convert::{basic_type_from_layout, collection, get_ptr_type};
+use crate::llvm::refcounting::{decrement_refcount_layout, increment_refcount_layout};
 use inkwell::builder::Builder;
 use inkwell::context::Context;
 use inkwell::types::{BasicTypeEnum, PointerType};
 use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
 use inkwell::{AddressSpace, IntPredicate};
-use roc_mono::layout::{Builtin, Layout, MemoryMode};
+use roc_mono::layout::{Builtin, Layout, LayoutIds, MemoryMode};

 /// List.single : a -> List a
 pub fn list_single<'a, 'ctx, 'env>(
@ -1318,8 +1319,89 @@ pub fn list_keep_if_help<'a, 'ctx, 'env>(
 }

 /// List.map : List before, (before -> after) -> List after
+macro_rules! list_map_help {
+    ($env:expr, $layout_ids:expr, $inplace:expr, $parent:expr, $func:expr, $func_layout:expr, $list:expr, $list_layout:expr, $function_ptr:expr, $function_return_layout: expr, $closure_info:expr) => {{
+        let layout_ids = $layout_ids;
+        let inplace = $inplace;
+        let parent = $parent;
+        let func = $func;
+        let func_layout = $func_layout;
+        let list = $list;
+        let list_layout = $list_layout;
+        let function_ptr = $function_ptr;
+        let function_return_layout = $function_return_layout;
+        let closure_info : Option<(&Layout, BasicValueEnum)> = $closure_info;
+
+
+        let non_empty_fn = |elem_layout: &Layout<'a>,
+                            len: IntValue<'ctx>,
+                            list_wrapper: StructValue<'ctx>| {
+            let ctx = $env.context;
+            let builder = $env.builder;
+
+            let ret_list_ptr = allocate_list($env, inplace, function_return_layout, len);
+
+            let elem_type = basic_type_from_layout($env.arena, ctx, elem_layout, $env.ptr_bytes);
+            let ptr_type = get_ptr_type(&elem_type, AddressSpace::Generic);
+
+            let list_ptr = load_list_ptr(builder, list_wrapper, ptr_type);
+
+            let list_loop = |index, before_elem| {
+                increment_refcount_layout($env, parent, layout_ids, before_elem, elem_layout);
+
+                let arguments = match closure_info {
+                    Some((closure_data_layout, closure_data)) => {
+                        increment_refcount_layout( $env, parent, layout_ids, closure_data, closure_data_layout);
+
+                        bumpalo::vec![in $env.arena; before_elem, closure_data]
+                    }
+                    None => bumpalo::vec![in $env.arena; before_elem],
+                };
+
+
+                let call_site_value = builder.build_call(function_ptr, &arguments, "map_func");
+
+                // set the calling convention explicitly for this call
+                call_site_value.set_call_convention(crate::llvm::build::FAST_CALL_CONV);
+
+                let after_elem = call_site_value
+                    .try_as_basic_value()
+                    .left()
+                    .unwrap_or_else(|| panic!("LLVM error: Invalid call by pointer."));
+
+                // The pointer to the element in the mapped-over list
+                let after_elem_ptr = unsafe {
+                    builder.build_in_bounds_gep(ret_list_ptr, &[index], "load_index_after_list")
+                };
+
+                // Mutate the new array in-place to change the element.
+                builder.build_store(after_elem_ptr, after_elem);
+            };
+
+            incrementing_elem_loop(builder, ctx, parent, list_ptr, len, "#index", list_loop);
+
+            let result = store_list($env, ret_list_ptr, len);
+
+            // decrement the input list and function (if it's a closure)
+            decrement_refcount_layout($env, parent, layout_ids, list, list_layout);
+            decrement_refcount_layout($env, parent, layout_ids, func, func_layout);
+
+            if let Some((closure_data_layout, closure_data))  = closure_info  {
+                decrement_refcount_layout( $env, parent, layout_ids, closure_data, closure_data_layout);
+            }
+
+            result
+        };
+
+        if_list_is_not_empty($env, parent, non_empty_fn, list, list_layout, "List.map")
+    }};
+}
+
+/// List.map : List before, (before -> after) -> List after
+#[allow(clippy::too_many_arguments)]
 pub fn list_map<'a, 'ctx, 'env>(
    env: &Env<'a, 'ctx, 'env>,
+    layout_ids: &mut LayoutIds<'a>,
    inplace: InPlace,
    parent: FunctionValue<'ctx>,
    func: BasicValueEnum<'ctx>,
@ -1329,46 +1411,50 @@ pub fn list_map<'a, 'ctx, 'env>(
 ) -> BasicValueEnum<'ctx> {
    match (func, func_layout) {
        (BasicValueEnum::PointerValue(func_ptr), Layout::FunctionPointer(_, ret_elem_layout)) => {
-            let non_empty_fn = |elem_layout: &Layout<'a>,
-                                len: IntValue<'ctx>,
-                                list_wrapper: StructValue<'ctx>| {
-                let ctx = env.context;
-                let builder = env.builder;
+            list_map_help!(
+                env,
+                layout_ids,
+                inplace,
+                parent,
+                func,
+                func_layout,
+                list,
+                list_layout,
+                func_ptr,
+                ret_elem_layout,
+                None
+            )
+        }
+        (
+            BasicValueEnum::StructValue(ptr_and_data),
+            Layout::Closure(_, closure_layout, ret_elem_layout),
+        ) => {
+            let builder = env.builder;

-                let ret_list_ptr = allocate_list(env, inplace, ret_elem_layout, len);
+            let func_ptr = builder
+                .build_extract_value(ptr_and_data, 0, "function_ptr")
+                .unwrap()
+                .into_pointer_value();

-                let elem_type = basic_type_from_layout(env.arena, ctx, elem_layout, env.ptr_bytes);
-                let ptr_type = get_ptr_type(&elem_type, AddressSpace::Generic);
+            let closure_data = builder
+                .build_extract_value(ptr_and_data, 1, "closure_data")
+                .unwrap();

-                let list_ptr = load_list_ptr(builder, list_wrapper, ptr_type);
+            let closure_data_layout = closure_layout.as_block_of_memory_layout();

-                let list_loop = |index, before_elem| {
-                    let call_site_value =
-                        builder.build_call(func_ptr, env.arena.alloc([before_elem]), "map_func");
-
-                    // set the calling convention explicitly for this call
-                    call_site_value.set_call_convention(crate::llvm::build::FAST_CALL_CONV);
-
-                    let after_elem = call_site_value
-                        .try_as_basic_value()
-                        .left()
-                        .unwrap_or_else(|| panic!("LLVM error: Invalid call by pointer."));
-
-                    // The pointer to the element in the mapped-over list
-                    let after_elem_ptr = unsafe {
-                        builder.build_in_bounds_gep(ret_list_ptr, &[index], "load_index_after_list")
-                    };
-
-                    // Mutate the new array in-place to change the element.
-                    builder.build_store(after_elem_ptr, after_elem);
-                };
-
-                incrementing_elem_loop(builder, ctx, parent, list_ptr, len, "#index", list_loop);
-
-                store_list(env, ret_list_ptr, len)
-            };
-
-            if_list_is_not_empty(env, parent, non_empty_fn, list, list_layout, "List.map")
+            list_map_help!(
+                env,
+                layout_ids,
+                inplace,
+                parent,
+                func,
+                func_layout,
+                list,
+                list_layout,
+                func_ptr,
+                ret_elem_layout,
+                Some((&closure_data_layout, closure_data))
+            )
        }
        _ => {
            unreachable!(
@ -1989,7 +2075,6 @@ pub fn allocate_list<'a, 'ctx, 'env>(
    let len_type = env.ptr_int();
    let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64;
    let bytes_per_element = len_type.const_int(elem_bytes, false);
-
    let number_of_data_bytes = builder.build_int_mul(bytes_per_element, length, "data_length");

    let rc1 = match inplace {
--- a/compiler/gen/src/llvm/build_str.rs
+++ b/compiler/gen/src/llvm/build_str.rs
@ -1,93 +1,120 @@
 use crate::llvm::build::{
    call_bitcode_fn, call_void_bitcode_fn, ptr_from_symbol, Env, InPlace, Scope,
 };
-use crate::llvm::build_list::{
-    allocate_list, build_basic_phi2, empty_list, incrementing_elem_loop, load_list_ptr, store_list,
-};
+use crate::llvm::build_list::{allocate_list, store_list};
 use crate::llvm::convert::collection;
-use inkwell::builder::Builder;
 use inkwell::types::BasicTypeEnum;
-use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
-use inkwell::{AddressSpace, IntPredicate};
+use inkwell::values::{BasicValueEnum, IntValue, StructValue};
+use inkwell::AddressSpace;
 use roc_builtins::bitcode;
 use roc_module::symbol::Symbol;
 use roc_mono::layout::{Builtin, Layout};

+use super::build::load_symbol;
+
 pub static CHAR_LAYOUT: Layout = Layout::Builtin(Builtin::Int8);

 /// Str.split : Str, Str -> List Str
 pub fn str_split<'a, 'ctx, 'env>(
    env: &Env<'a, 'ctx, 'env>,
    scope: &Scope<'a, 'ctx>,
-    parent: FunctionValue<'ctx>,
    inplace: InPlace,
    str_symbol: Symbol,
    delimiter_symbol: Symbol,
 ) -> BasicValueEnum<'ctx> {
    let builder = env.builder;
-    let ctx = env.context;

-    let str_ptr = ptr_from_symbol(scope, str_symbol);
-    let delimiter_ptr = ptr_from_symbol(scope, delimiter_symbol);
+    let str_i128 = str_symbol_to_i128(env, scope, str_symbol);
+    let delim_i128 = str_symbol_to_i128(env, scope, delimiter_symbol);

-    let str_wrapper_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
-
-    load_str(
+    let segment_count = call_bitcode_fn(
        env,
-        parent,
-        *str_ptr,
-        str_wrapper_type,
-        |str_bytes_ptr, str_len, _str_smallness| {
-            load_str(
-                env,
-                parent,
-                *delimiter_ptr,
-                str_wrapper_type,
-                |delimiter_bytes_ptr, delimiter_len, _delimiter_smallness| {
-                    let segment_count = call_bitcode_fn(
-                        env,
-                        &[
-                            BasicValueEnum::PointerValue(str_bytes_ptr),
-                            BasicValueEnum::IntValue(str_len),
-                            BasicValueEnum::PointerValue(delimiter_bytes_ptr),
-                            BasicValueEnum::IntValue(delimiter_len),
-                        ],
-                        &bitcode::STR_COUNT_SEGMENTS,
-                    )
-                    .into_int_value();
-
-                    // a pointer to the elements
-                    let ret_list_ptr =
-                        allocate_list(env, inplace, &Layout::Builtin(Builtin::Str), segment_count);
-
-                    // get the RocStr type defined by zig
-                    let roc_str_type = env.module.get_struct_type("str.RocStr").unwrap();
-
-                    // convert `*mut { *mut u8, i64 }` to `*mut RocStr`
-                    let ret_list_ptr_zig_rocstr = builder.build_bitcast(
-                        ret_list_ptr,
-                        roc_str_type.ptr_type(AddressSpace::Generic),
-                        "convert_to_zig_rocstr",
-                    );
-
-                    call_void_bitcode_fn(
-                        env,
-                        &[
-                            ret_list_ptr_zig_rocstr,
-                            BasicValueEnum::IntValue(segment_count),
-                            BasicValueEnum::PointerValue(str_bytes_ptr),
-                            BasicValueEnum::IntValue(str_len),
-                            BasicValueEnum::PointerValue(delimiter_bytes_ptr),
-                            BasicValueEnum::IntValue(delimiter_len),
-                        ],
-                        &bitcode::STR_STR_SPLIT_IN_PLACE,
-                    );
-
-                    store_list(env, ret_list_ptr, segment_count)
-                },
-            )
-        },
+        &[str_i128.into(), delim_i128.into()],
+        &bitcode::STR_COUNT_SEGMENTS,
    )
+    .into_int_value();
+
+    // a pointer to the elements
+    let ret_list_ptr = allocate_list(env, inplace, &Layout::Builtin(Builtin::Str), segment_count);
+
+    // get the RocStr type defined by zig
+    let roc_str_type = env.module.get_struct_type("str.RocStr").unwrap();
+
+    // convert `*mut { *mut u8, i64 }` to `*mut RocStr`
+    let ret_list_ptr_zig_rocstr = builder.build_bitcast(
+        ret_list_ptr,
+        roc_str_type.ptr_type(AddressSpace::Generic),
+        "convert_to_zig_rocstr",
+    );
+
+    call_void_bitcode_fn(
+        env,
+        &[
+            ret_list_ptr_zig_rocstr,
+            BasicValueEnum::IntValue(segment_count),
+            str_i128.into(),
+            delim_i128.into(),
+        ],
+        &bitcode::STR_STR_SPLIT_IN_PLACE,
+    );
+
+    store_list(env, ret_list_ptr, segment_count)
+}
+
+fn str_symbol_to_i128<'a, 'ctx, 'env>(
+    env: &Env<'a, 'ctx, 'env>,
+    scope: &Scope<'a, 'ctx>,
+    symbol: Symbol,
+) -> IntValue<'ctx> {
+    let str_ptr = ptr_from_symbol(scope, symbol);
+
+    let i128_ptr = env
+        .builder
+        .build_bitcast(
+            *str_ptr,
+            env.context.i128_type().ptr_type(AddressSpace::Generic),
+            "cast",
+        )
+        .into_pointer_value();
+
+    env.builder
+        .build_load(i128_ptr, "load_as_i128")
+        .into_int_value()
+}
+
+fn zig_str_to_struct<'a, 'ctx, 'env>(
+    env: &Env<'a, 'ctx, 'env>,
+    zig_str: StructValue<'ctx>,
+) -> StructValue<'ctx> {
+    let builder = env.builder;
+
+    // get the RocStr type defined by zig
+    let zig_str_type = env.module.get_struct_type("str.RocStr").unwrap();
+
+    let ret_type = BasicTypeEnum::StructType(collection(env.context, env.ptr_bytes));
+
+    // a roundabout way of casting (LLVM does not accept a standard bitcast)
+    let allocation = builder.build_alloca(zig_str_type, "zig_result");
+
+    builder.build_store(allocation, zig_str);
+
+    let ptr3 = builder
+        .build_bitcast(
+            allocation,
+            env.context.i128_type().ptr_type(AddressSpace::Generic),
+            "cast",
+        )
+        .into_pointer_value();
+
+    let ptr4 = builder
+        .build_bitcast(
+            ptr3,
+            ret_type.into_struct_type().ptr_type(AddressSpace::Generic),
+            "cast",
+        )
+        .into_pointer_value();
+
+    builder.build_load(ptr4, "load").into_struct_value()
 }

 /// Str.concat : Str, Str -> Str
@ -95,622 +122,81 @@ pub fn str_concat<'a, 'ctx, 'env>(
    env: &Env<'a, 'ctx, 'env>,
    inplace: InPlace,
    scope: &Scope<'a, 'ctx>,
-    parent: FunctionValue<'ctx>,
-    first_str_symbol: Symbol,
-    second_str_symbol: Symbol,
+    str1_symbol: Symbol,
+    str2_symbol: Symbol,
 ) -> BasicValueEnum<'ctx> {
-    let builder = env.builder;
-    let ctx = env.context;
+    // swap the arguments; second argument comes before the second in the output string
+    let str1_i128 = str_symbol_to_i128(env, scope, str1_symbol);
+    let str2_i128 = str_symbol_to_i128(env, scope, str2_symbol);

-    let second_str_ptr = ptr_from_symbol(scope, second_str_symbol);
-    let first_str_ptr = ptr_from_symbol(scope, first_str_symbol);
-
-    let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
-
-    load_str(
+    let zig_result = call_bitcode_fn(
        env,
-        parent,
-        *second_str_ptr,
-        ret_type,
-        |second_str_ptr, second_str_len, second_str_smallness| {
-            load_str(
-                env,
-                parent,
-                *first_str_ptr,
-                ret_type,
-                |first_str_ptr, first_str_len, first_str_smallness| {
-                    // first_str_len > 0
-                    // We do this check to avoid allocating memory. If the first input
-                    // str is empty, then we can just return the second str cloned
-                    let first_str_length_comparison = str_is_not_empty(env, first_str_len);
-
-                    let if_first_str_is_empty = || {
-                        // second_str_len > 0
-                        // We do this check to avoid allocating memory. If the second input
-                        // str is empty, then we can just return an empty str
-                        let second_str_length_comparison = str_is_not_empty(env, second_str_len);
-
-                        let if_second_str_is_nonempty = || {
-                            let (new_wrapper, _) = clone_nonempty_str(
-                                env,
-                                inplace,
-                                second_str_smallness,
-                                second_str_len,
-                                second_str_ptr,
-                            );
-
-                            BasicValueEnum::StructValue(new_wrapper)
-                        };
-
-                        let if_second_str_is_empty = || empty_list(env);
-
-                        build_basic_phi2(
-                            env,
-                            parent,
-                            second_str_length_comparison,
-                            if_second_str_is_nonempty,
-                            if_second_str_is_empty,
-                            ret_type,
-                        )
-                    };
-
-                    let if_first_str_is_not_empty = || {
-                        let if_second_str_is_empty = || {
-                            let (new_wrapper, _) = clone_nonempty_str(
-                                env,
-                                inplace,
-                                first_str_smallness,
-                                first_str_len,
-                                first_str_ptr,
-                            );
-
-                            BasicValueEnum::StructValue(new_wrapper)
-                        };
-
-                        // second_str_len > 0
-                        // We do this check to avoid allocating memory. If the second input
-                        // str is empty, then we can just return the first str cloned
-                        let second_str_length_comparison = str_is_not_empty(env, second_str_len);
-
-                        let if_second_str_is_not_empty = || {
-                            let combined_str_len = builder.build_int_add(
-                                first_str_len,
-                                second_str_len,
-                                "add_list_lengths",
-                            );
-
-                            // The combined string is big iff its length is
-                            // greater than or equal to the size in memory
-                            // of a small str (e.g. len >= 16 on 64-bit targets)
-                            let is_big = env.builder.build_int_compare(
-                                IntPredicate::UGE,
-                                combined_str_len,
-                                env.ptr_int().const_int(env.small_str_bytes() as u64, false),
-                                "str_is_big",
-                            );
-
-                            let if_big = || {
-                                let combined_str_ptr =
-                                    allocate_list(env, inplace, &CHAR_LAYOUT, combined_str_len);
-
-                                // TODO replace FIRST_LOOP with a memcpy!
-                                // FIRST LOOP
-                                let first_loop = |first_index, first_str_elem| {
-                                    // The pointer to the element in the combined list
-                                    let combined_str_elem_ptr = unsafe {
-                                        builder.build_in_bounds_gep(
-                                            combined_str_ptr,
-                                            &[first_index],
-                                            "load_index_combined_list",
-                                        )
-                                    };
-
-                                    // Mutate the new array in-place to change the element.
-                                    builder.build_store(combined_str_elem_ptr, first_str_elem);
-                                };
-
-                                let index_name = "#index";
-
-                                let index_alloca = incrementing_elem_loop(
-                                    builder,
-                                    ctx,
-                                    parent,
-                                    first_str_ptr,
-                                    first_str_len,
-                                    index_name,
-                                    first_loop,
-                                );
-
-                                // Reset the index variable to 0
-                                builder
-                                    .build_store(index_alloca, ctx.i64_type().const_int(0, false));
-
-                                // TODO replace SECOND_LOOP with a memcpy!
-                                // SECOND LOOP
-                                let second_loop = |second_index, second_str_elem| {
-                                    // The pointer to the element in the combined str.
-                                    // Note that the pointer does not start at the index
-                                    // 0, it starts at the index of first_str_len. In that
-                                    // sense it is "offset".
-                                    let offset_combined_str_char_ptr = unsafe {
-                                        builder.build_in_bounds_gep(
-                                            combined_str_ptr,
-                                            &[first_str_len],
-                                            "elem",
-                                        )
-                                    };
-
-                                    // The pointer to the char from the second str
-                                    // in the combined list
-                                    let combined_str_char_ptr = unsafe {
-                                        builder.build_in_bounds_gep(
-                                            offset_combined_str_char_ptr,
-                                            &[second_index],
-                                            "load_index_combined_list",
-                                        )
-                                    };
-
-                                    // Mutate the new array in-place to change the element.
-                                    builder.build_store(combined_str_char_ptr, second_str_elem);
-                                };
-
-                                incrementing_elem_loop(
-                                    builder,
-                                    ctx,
-                                    parent,
-                                    second_str_ptr,
-                                    second_str_len,
-                                    index_name,
-                                    second_loop,
-                                );
-
-                                store_list(env, combined_str_ptr, combined_str_len)
-                            };
-
-                            let if_small = || {
-                                let combined_str_ptr = builder.build_array_alloca(
-                                    ctx.i8_type(),
-                                    ctx.i8_type().const_int(env.small_str_bytes() as u64, false),
-                                    "alloca_small_str",
-                                );
-
-                                // TODO replace FIRST_LOOP with a memcpy!
-                                // FIRST LOOP
-                                let first_loop = |first_index, first_str_elem| {
-                                    // The pointer to the element in the combined list
-                                    let combined_str_elem_ptr = unsafe {
-                                        builder.build_in_bounds_gep(
-                                            combined_str_ptr,
-                                            &[first_index],
-                                            "load_index_combined_list",
-                                        )
-                                    };
-
-                                    // Mutate the new array in-place to change the element.
-                                    builder.build_store(combined_str_elem_ptr, first_str_elem);
-                                };
-
-                                let index_name = "#index";
-
-                                let index_alloca = incrementing_elem_loop(
-                                    builder,
-                                    ctx,
-                                    parent,
-                                    first_str_ptr,
-                                    first_str_len,
-                                    index_name,
-                                    first_loop,
-                                );
-
-                                // Reset the index variable to 0
-                                builder
-                                    .build_store(index_alloca, ctx.i64_type().const_int(0, false));
-
-                                // TODO replace SECOND_LOOP with a memcpy!
-                                // SECOND LOOP
-                                let second_loop = |second_index, second_str_elem| {
-                                    // The pointer to the element in the combined str.
-                                    // Note that the pointer does not start at the index
-                                    // 0, it starts at the index of first_str_len. In that
-                                    // sense it is "offset".
-                                    let offset_combined_str_char_ptr = unsafe {
-                                        builder.build_in_bounds_gep(
-                                            combined_str_ptr,
-                                            &[first_str_len],
-                                            "elem",
-                                        )
-                                    };
-
-                                    // The pointer to the char from the second str
-                                    // in the combined list
-                                    let combined_str_char_ptr = unsafe {
-                                        builder.build_in_bounds_gep(
-                                            offset_combined_str_char_ptr,
-                                            &[second_index],
-                                            "load_index_combined_list",
-                                        )
-                                    };
-
-                                    // Mutate the new array in-place to change the element.
-                                    builder.build_store(combined_str_char_ptr, second_str_elem);
-                                };
-
-                                incrementing_elem_loop(
-                                    builder,
-                                    ctx,
-                                    parent,
-                                    second_str_ptr,
-                                    second_str_len,
-                                    index_name,
-                                    second_loop,
-                                );
-
-                                let final_byte = builder.build_int_cast(
-                                    combined_str_len,
-                                    ctx.i8_type(),
-                                    "str_len_to_i8",
-                                );
-
-                                let final_byte = builder.build_or(
-                                    final_byte,
-                                    ctx.i8_type().const_int(0b1000_0000, false),
-                                    "str_len_set_discriminant",
-                                );
-
-                                let final_byte_ptr = unsafe {
-                                    builder.build_in_bounds_gep(
-                                        combined_str_ptr,
-                                        &[ctx
-                                            .i8_type()
-                                            .const_int(env.small_str_bytes() as u64 - 1, false)],
-                                        "str_literal_final_byte",
-                                    )
-                                };
-
-                                builder.build_store(final_byte_ptr, final_byte);
-
-                                builder.build_load(
-                                    builder
-                                        .build_bitcast(
-                                            combined_str_ptr,
-                                            collection(ctx, env.ptr_bytes)
-                                                .ptr_type(AddressSpace::Generic),
-                                            "cast_collection",
-                                        )
-                                        .into_pointer_value(),
-                                    "small_str_array",
-                                )
-                            };
-
-                            // If the combined length fits in a small string,
-                            // write into a small string!
-                            build_basic_phi2(
-                                env,
-                                parent,
-                                is_big,
-                                // the result of a Str.concat is most likely big
-                                if_big,
-                                if_small,
-                                BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
-                            )
-                        };
-
-                        build_basic_phi2(
-                            env,
-                            parent,
-                            second_str_length_comparison,
-                            if_second_str_is_not_empty,
-                            if_second_str_is_empty,
-                            BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
-                        )
-                    };
-
-                    build_basic_phi2(
-                        env,
-                        parent,
-                        first_str_length_comparison,
-                        if_first_str_is_not_empty,
-                        if_first_str_is_empty,
-                        BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
-                    )
-                },
-            )
-        },
-    )
-}
-
-/// Obtain the string's length, cast from i8 to usize
-fn str_len_from_final_byte<'a, 'ctx, 'env>(
-    env: &Env<'a, 'ctx, 'env>,
-    final_byte: IntValue<'ctx>,
-) -> IntValue<'ctx> {
-    let builder = env.builder;
-    let ctx = env.context;
-    let bitmask = ctx.i8_type().const_int(0b0111_1111, false);
-    let len_i8 = builder.build_and(final_byte, bitmask, "small_str_length");
-
-    builder.build_int_cast(len_i8, env.ptr_int(), "len_as_usize")
-}
-
-/// Used by LowLevel::StrIsEmpty
-pub fn str_len<'a, 'ctx, 'env>(
-    env: &Env<'a, 'ctx, 'env>,
-    parent: FunctionValue<'ctx>,
-    wrapper_ptr: PointerValue<'ctx>,
-) -> IntValue<'ctx> {
-    let builder = env.builder;
-
-    let if_small = |final_byte| {
-        let len = str_len_from_final_byte(env, final_byte);
-
-        BasicValueEnum::IntValue(len)
-    };
-
-    let if_big = |_| {
-        let len = big_str_len(
-            builder,
-            builder
-                .build_load(wrapper_ptr, "big_str")
-                .into_struct_value(),
-        );
-
-        BasicValueEnum::IntValue(len)
-    };
-
-    if_small_str(
-        env,
-        parent,
-        wrapper_ptr,
-        if_small,
-        if_big,
-        BasicTypeEnum::IntType(env.ptr_int()),
-    )
-    .into_int_value()
-}
-
-fn load_str<'a, 'ctx, 'env, Callback>(
-    env: &Env<'a, 'ctx, 'env>,
-    parent: FunctionValue<'ctx>,
-    wrapper_ptr: PointerValue<'ctx>,
-    ret_type: BasicTypeEnum<'ctx>,
-    cb: Callback,
-) -> BasicValueEnum<'ctx>
-where
-    Callback: Fn(PointerValue<'ctx>, IntValue<'ctx>, Smallness) -> BasicValueEnum<'ctx>,
-{
-    let builder = env.builder;
-
-    let if_small = |final_byte| {
-        cb(
-            cast_str_wrapper_to_array(env, wrapper_ptr),
-            str_len_from_final_byte(env, final_byte),
-            Smallness::Small,
-        )
-    };
-
-    let if_big = |wrapper_struct| {
-        let list_ptr = load_list_ptr(
-            builder,
-            wrapper_struct,
-            env.context.i8_type().ptr_type(AddressSpace::Generic),
-        );
-
-        cb(
-            list_ptr,
-            big_str_len(builder, wrapper_struct),
-            Smallness::Big,
-        )
-    };
-
-    if_small_str(env, parent, wrapper_ptr, if_small, if_big, ret_type)
-}
-
-#[derive(Debug, Copy, Clone)]
-enum Smallness {
-    Small,
-    Big,
-}
-
-fn clone_nonempty_str<'a, 'ctx, 'env>(
-    env: &Env<'a, 'ctx, 'env>,
-    inplace: InPlace,
-    smallness: Smallness,
-    len: IntValue<'ctx>,
-    bytes_ptr: PointerValue<'ctx>,
-) -> (StructValue<'ctx>, PointerValue<'ctx>) {
-    let builder = env.builder;
-    let ctx = env.context;
-    let ptr_bytes = env.ptr_bytes;
-
-    // Allocate space for the new str that we'll copy into.
-    match smallness {
-        Smallness::Small => {
-            let wrapper_struct_ptr = cast_str_bytes_to_wrapper(env, bytes_ptr);
-            let wrapper_struct = builder.build_load(wrapper_struct_ptr, "str_wrapper");
-            let alloca = builder.build_alloca(collection(ctx, ptr_bytes), "small_str_clone");
-
-            builder.build_store(alloca, wrapper_struct);
-
-            (wrapper_struct.into_struct_value(), alloca)
-        }
-        Smallness::Big => {
-            let clone_ptr = allocate_list(env, inplace, &CHAR_LAYOUT, len);
-
-            // TODO check if malloc returned null; if so, runtime error for OOM!
-
-            // Copy the bytes from the original array into the new
-            // one we just malloc'd.
-            builder
-                .build_memcpy(clone_ptr, ptr_bytes, bytes_ptr, ptr_bytes, len)
-                .unwrap();
-
-            // Create a fresh wrapper struct for the newly populated array
-            let struct_type = collection(ctx, env.ptr_bytes);
-            let mut struct_val;
-
-            // Store the pointer
-            struct_val = builder
-                .build_insert_value(
-                    struct_type.get_undef(),
-                    clone_ptr,
-                    Builtin::WRAPPER_PTR,
-                    "insert_ptr",
-                )
-                .unwrap();
-
-            // Store the length
-            struct_val = builder
-                .build_insert_value(struct_val, len, Builtin::WRAPPER_LEN, "insert_len")
-                .unwrap();
-
-            let answer = builder
-                .build_bitcast(
-                    struct_val.into_struct_value(),
-                    collection(ctx, ptr_bytes),
-                    "cast_collection",
-                )
-                .into_struct_value();
-
-            (answer, clone_ptr)
-        }
-    }
-}
-
-fn cast_str_bytes_to_wrapper<'a, 'ctx, 'env>(
-    env: &Env<'a, 'ctx, 'env>,
-    bytes_ptr: PointerValue<'ctx>,
-) -> PointerValue<'ctx> {
-    let struct_ptr_type = collection(env.context, env.ptr_bytes).ptr_type(AddressSpace::Generic);
-
-    env.builder
-        .build_bitcast(bytes_ptr, struct_ptr_type, "str_as_struct_ptr")
-        .into_pointer_value()
-}
-
-fn cast_str_wrapper_to_array<'a, 'ctx, 'env>(
-    env: &Env<'a, 'ctx, 'env>,
-    wrapper_ptr: PointerValue<'ctx>,
-) -> PointerValue<'ctx> {
-    let array_ptr_type = env.context.i8_type().ptr_type(AddressSpace::Generic);
-
-    env.builder
-        .build_bitcast(wrapper_ptr, array_ptr_type, "str_as_array_ptr")
-        .into_pointer_value()
-}
-
-fn if_small_str<'a, 'ctx, 'env, IfSmallFn, IfBigFn>(
-    env: &Env<'a, 'ctx, 'env>,
-    parent: FunctionValue<'ctx>,
-    wrapper_ptr: PointerValue<'ctx>,
-    mut if_small: IfSmallFn,
-    mut if_big: IfBigFn,
-    ret_type: BasicTypeEnum<'ctx>,
-) -> BasicValueEnum<'ctx>
-where
-    IfSmallFn: FnMut(IntValue<'ctx>) -> BasicValueEnum<'ctx>,
-    IfBigFn: FnMut(StructValue<'ctx>) -> BasicValueEnum<'ctx>,
-{
-    let builder = env.builder;
-    let ctx = env.context;
-    let byte_array_ptr = cast_str_wrapper_to_array(env, wrapper_ptr);
-    let final_byte_ptr = unsafe {
-        builder.build_in_bounds_gep(
-            byte_array_ptr,
-            &[ctx
+        &[
+            env.context
+                .i32_type()
+                .const_int(env.ptr_bytes as u64, false)
+                .into(),
+            env.context
                .i8_type()
-                .const_int(env.small_str_bytes() as u64 - 1, false)],
-            "final_byte_ptr",
-        )
-    };
-
-    let final_byte = builder
-        .build_load(final_byte_ptr, "load_final_byte")
-        .into_int_value();
-
-    let bitmask = ctx.i8_type().const_int(0b1000_0000, false);
-
-    let is_small_i8 = builder.build_int_compare(
-        IntPredicate::NE,
-        ctx.i8_type().const_zero(),
-        builder.build_and(final_byte, bitmask, "is_small"),
-        "is_small_comparison",
-    );
-
-    let is_small = builder.build_int_cast(is_small_i8, ctx.bool_type(), "is_small_as_bool");
-
-    build_basic_phi2(
-        env,
-        parent,
-        is_small,
-        || if_small(final_byte),
-        || {
-            if_big(
-                builder
-                    .build_load(wrapper_ptr, "load_wrapper_struct")
-                    .into_struct_value(),
-            )
-        },
-        ret_type,
+                .const_int(inplace as u64, false)
+                .into(),
+            str1_i128.into(),
+            str2_i128.into(),
+        ],
+        &bitcode::STR_CONCAT,
    )
+    .into_struct_value();
+
+    zig_str_to_struct(env, zig_result).into()
 }

-fn big_str_len<'ctx>(builder: &Builder<'ctx>, wrapper_struct: StructValue<'ctx>) -> IntValue<'ctx> {
-    builder
-        .build_extract_value(wrapper_struct, Builtin::WRAPPER_LEN, "big_str_len")
-        .unwrap()
-        .into_int_value()
-}
+pub fn str_number_of_bytes<'a, 'ctx, 'env>(
+    env: &Env<'a, 'ctx, 'env>,
+    scope: &Scope<'a, 'ctx>,
+    str_symbol: Symbol,
+) -> IntValue<'ctx> {
+    let str_i128 = str_symbol_to_i128(env, scope, str_symbol);

-fn str_is_not_empty<'ctx>(env: &Env<'_, 'ctx, '_>, len: IntValue<'ctx>) -> IntValue<'ctx> {
-    env.builder.build_int_compare(
-        IntPredicate::UGT,
-        len,
-        env.ptr_int().const_zero(),
-        "str_len_is_nonzero",
-    )
+    // the builtin will always return an u64
+    let length =
+        call_bitcode_fn(env, &[str_i128.into()], &bitcode::STR_NUMBER_OF_BYTES).into_int_value();
+
+    // cast to the appropriate usize of the current build
+    env.builder
+        .build_int_cast(length, env.ptr_int(), "len_as_usize")
 }

 /// Str.startsWith : Str, Str -> Bool
 pub fn str_starts_with<'a, 'ctx, 'env>(
    env: &Env<'a, 'ctx, 'env>,
-    _inplace: InPlace,
    scope: &Scope<'a, 'ctx>,
-    parent: FunctionValue<'ctx>,
    str_symbol: Symbol,
    prefix_symbol: Symbol,
 ) -> BasicValueEnum<'ctx> {
-    let ctx = env.context;
+    let str_i128 = str_symbol_to_i128(env, scope, str_symbol);
+    let prefix_i128 = str_symbol_to_i128(env, scope, prefix_symbol);

-    let str_ptr = ptr_from_symbol(scope, str_symbol);
-    let prefix_ptr = ptr_from_symbol(scope, prefix_symbol);
-
-    let ret_type = BasicTypeEnum::IntType(ctx.bool_type());
-
-    load_str(
+    call_bitcode_fn(
        env,
-        parent,
-        *str_ptr,
-        ret_type,
-        |str_bytes_ptr, str_len, _str_smallness| {
-            load_str(
-                env,
-                parent,
-                *prefix_ptr,
-                ret_type,
-                |prefix_bytes_ptr, prefix_len, _prefix_smallness| {
-                    call_bitcode_fn(
-                        env,
-                        &[
-                            BasicValueEnum::PointerValue(str_bytes_ptr),
-                            BasicValueEnum::IntValue(str_len),
-                            BasicValueEnum::PointerValue(prefix_bytes_ptr),
-                            BasicValueEnum::IntValue(prefix_len),
-                        ],
-                        &bitcode::STR_STARTS_WITH,
-                    )
-                },
-            )
-        },
+        &[str_i128.into(), prefix_i128.into()],
+        &bitcode::STR_STARTS_WITH,
+    )
+}
+
+/// Str.endsWith : Str, Str -> Bool
+pub fn str_ends_with<'a, 'ctx, 'env>(
+    env: &Env<'a, 'ctx, 'env>,
+    scope: &Scope<'a, 'ctx>,
+    str_symbol: Symbol,
+    prefix_symbol: Symbol,
+) -> BasicValueEnum<'ctx> {
+    let str_i128 = str_symbol_to_i128(env, scope, str_symbol);
+    let prefix_i128 = str_symbol_to_i128(env, scope, prefix_symbol);
+
+    call_bitcode_fn(
+        env,
+        &[str_i128.into(), prefix_i128.into()],
+        &bitcode::STR_ENDS_WITH,
    )
 }

@ -718,28 +204,26 @@ pub fn str_starts_with<'a, 'ctx, 'env>(
 pub fn str_count_graphemes<'a, 'ctx, 'env>(
    env: &Env<'a, 'ctx, 'env>,
    scope: &Scope<'a, 'ctx>,
-    parent: FunctionValue<'ctx>,
    str_symbol: Symbol,
 ) -> BasicValueEnum<'ctx> {
-    let ctx = env.context;
+    let str_i128 = str_symbol_to_i128(env, scope, str_symbol);

-    let sym_str_ptr = ptr_from_symbol(scope, str_symbol);
-    let ret_type = BasicTypeEnum::IntType(ctx.i64_type());
-
-    load_str(
+    call_bitcode_fn(
        env,
-        parent,
-        *sym_str_ptr,
-        ret_type,
-        |str_ptr, str_len, _str_smallness| {
-            call_bitcode_fn(
-                env,
-                &[
-                    BasicValueEnum::PointerValue(str_ptr),
-                    BasicValueEnum::IntValue(str_len),
-                ],
-                &bitcode::STR_COUNT_GRAPEHEME_CLUSTERS,
-            )
-        },
+        &[str_i128.into()],
+        &bitcode::STR_COUNT_GRAPEHEME_CLUSTERS,
    )
 }
+
+/// Str.fromInt : Int -> Str
+pub fn str_from_int<'a, 'ctx, 'env>(
+    env: &Env<'a, 'ctx, 'env>,
+    scope: &Scope<'a, 'ctx>,
+    int_symbol: Symbol,
+) -> BasicValueEnum<'ctx> {
+    let int = load_symbol(env, scope, &int_symbol);
+
+    let zig_result = call_bitcode_fn(env, &[int], &bitcode::STR_FROM_INT).into_struct_value();
+
+    zig_str_to_struct(env, zig_result).into()
+}
--- a/compiler/gen/src/llvm/refcounting.rs
+++ b/compiler/gen/src/llvm/refcounting.rs
@ -2,7 +2,7 @@ use crate::llvm::build::{
    cast_basic_basic, cast_struct_struct, create_entry_block_alloca, set_name, Env, Scope,
    FAST_CALL_CONV, LLVM_SADD_WITH_OVERFLOW_I64,
 };
-use crate::llvm::build_list::list_len;
+use crate::llvm::build_list::{incrementing_elem_loop, list_len, load_list};
 use crate::llvm::convert::{basic_type_from_layout, block_of_memory, ptr_int};
 use bumpalo::collections::Vec;
 use inkwell::context::Context;
@ -367,7 +367,6 @@ fn decrement_refcount_builtin<'a, 'ctx, 'env>(
        List(memory_mode, element_layout) => {
            let wrapper_struct = value.into_struct_value();
            if element_layout.contains_refcounted() {
-                use crate::llvm::build_list::{incrementing_elem_loop, load_list};
                use inkwell::types::BasicType;

                let ptr_type =
@ -451,7 +450,6 @@ fn increment_refcount_builtin<'a, 'ctx, 'env>(
        List(memory_mode, element_layout) => {
            let wrapper_struct = value.into_struct_value();
            if element_layout.contains_refcounted() {
-                use crate::llvm::build_list::{incrementing_elem_loop, load_list};
                use inkwell::types::BasicType;

                let ptr_type =
--- a/compiler/gen/tests/gen_list.rs
+++ b/compiler/gen/tests/gen_list.rs
@ -557,6 +557,26 @@ mod gen_list {
        );
    }

+    #[test]
+    fn list_map_closure() {
+        assert_evals_to!(
+            indoc!(
+                r#"
+                pi : F64
+                pi = 3.14
+
+                single : List F64
+                single =
+                    [ 0 ]
+
+                List.map single (\x -> x + pi)
+                "#
+            ),
+            RocList::from_slice(&[3.14]),
+            RocList<f64>
+        );
+    }
+
    #[test]
    fn list_join_empty_list() {
        assert_evals_to!("List.join []", RocList::from_slice(&[]), RocList<i64>);
--- a/compiler/gen/tests/gen_str.rs
+++ b/compiler/gen/tests/gen_str.rs
@ -433,6 +433,13 @@ mod gen_str {
        assert_evals_to!(r#"Str.startsWith "" "hello world""#, false, bool);
    }

+    #[test]
+    fn str_ends_with() {
+        assert_evals_to!(r#"Str.endsWith "hello world" "world""#, true, bool);
+        assert_evals_to!(r#"Str.endsWith "nope" "hello world""#, false, bool);
+        assert_evals_to!(r#"Str.endsWith "" "hello world""#, false, bool);
+    }
+
    #[test]
    fn str_count_graphemes_small_str() {
        assert_evals_to!(r#"Str.countGraphemes "å🤔""#, 2, usize);
@ -483,4 +490,29 @@ mod gen_str {
    fn str_starts_with_false_small_str() {
        assert_evals_to!(r#"Str.startsWith "1234" "23""#, false, bool);
    }
+
+    #[test]
+    fn str_from_int() {
+        assert_evals_to!(
+            r#"Str.fromInt 1234"#,
+            roc_std::RocStr::from_slice("1234".as_bytes()),
+            roc_std::RocStr
+        );
+        assert_evals_to!(
+            r#"Str.fromInt 0"#,
+            roc_std::RocStr::from_slice("0".as_bytes()),
+            roc_std::RocStr
+        );
+        assert_evals_to!(
+            r#"Str.fromInt -1"#,
+            roc_std::RocStr::from_slice("-1".as_bytes()),
+            roc_std::RocStr
+        );
+
+        let max = format!("{}", i64::MAX);
+        assert_evals_to!(r#"Str.fromInt Num.maxInt"#, &max, &'static str);
+
+        let min = format!("{}", i64::MIN);
+        assert_evals_to!(r#"Str.fromInt Num.minInt"#, &min, &'static str);
+    }
 }
--- a/compiler/gen_dev/Cargo.toml
+++ b/compiler/gen_dev/Cargo.toml
@ -42,3 +42,6 @@ bumpalo = { version = "3.2", features = ["collections"] }
 libc = "0.2"
 tempfile = "3.1.0"
 itertools = "0.9"
+
+[features]
+target-aarch64 = ["roc_build/target-aarch64"]
--- a/compiler/gen_dev/src/generic64/aarch64.rs
+++ b/compiler/gen_dev/src/generic64/aarch64.rs
@ -0,0 +1,814 @@
+use crate::generic64::{Assembler, CallConv, GPRegTrait};
+use bumpalo::collections::Vec;
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
+#[allow(dead_code)]
+pub enum AArch64GPReg {
+    X0 = 0,
+    X1 = 1,
+    X2 = 2,
+    X3 = 3,
+    X4 = 4,
+    X5 = 5,
+    X6 = 6,
+    X7 = 7,
+    XR = 8,
+    X9 = 9,
+    X10 = 10,
+    X11 = 11,
+    X12 = 12,
+    X13 = 13,
+    X14 = 14,
+    X15 = 15,
+    IP0 = 16,
+    IP1 = 17,
+    PR = 18,
+    X19 = 19,
+    X20 = 20,
+    X21 = 21,
+    X22 = 22,
+    X23 = 23,
+    X24 = 24,
+    X25 = 25,
+    X26 = 26,
+    X27 = 27,
+    X28 = 28,
+    FP = 29,
+    LR = 30,
+    /// This can mean Zero or Stack Pointer depending on the context.
+    ZRSP = 31,
+}
+
+impl GPRegTrait for AArch64GPReg {}
+
+pub struct AArch64Assembler {}
+
+// AArch64Call may need to eventually be split by OS,
+// but I think with how we use it, they may all be the same.
+pub struct AArch64Call {}
+
+const STACK_ALIGNMENT: u8 = 16;
+
+impl CallConv<AArch64GPReg> for AArch64Call {
+    const GP_PARAM_REGS: &'static [AArch64GPReg] = &[
+        AArch64GPReg::X0,
+        AArch64GPReg::X1,
+        AArch64GPReg::X2,
+        AArch64GPReg::X3,
+        AArch64GPReg::X4,
+        AArch64GPReg::X5,
+        AArch64GPReg::X6,
+        AArch64GPReg::X7,
+    ];
+    const GP_RETURN_REGS: &'static [AArch64GPReg] = Self::GP_PARAM_REGS;
+    const GP_DEFAULT_FREE_REGS: &'static [AArch64GPReg] = &[
+        // The regs we want to use first should be at the end of this vec.
+        // We will use pop to get which reg to use next
+
+        // Don't use frame pointer: AArch64GPReg::FP,
+        // Don't user indirect result location: AArch64GPReg::XR,
+        // Don't use platform register: AArch64GPReg::PR,
+        // Don't use link register: AArch64GPReg::LR,
+        // Don't use zero register/stack pointer: AArch64GPReg::ZRSP,
+
+        // Use callee saved regs last.
+        AArch64GPReg::X19,
+        AArch64GPReg::X20,
+        AArch64GPReg::X21,
+        AArch64GPReg::X22,
+        AArch64GPReg::X23,
+        AArch64GPReg::X24,
+        AArch64GPReg::X25,
+        AArch64GPReg::X26,
+        AArch64GPReg::X27,
+        AArch64GPReg::X28,
+        // Use caller saved regs first.
+        AArch64GPReg::X0,
+        AArch64GPReg::X1,
+        AArch64GPReg::X2,
+        AArch64GPReg::X3,
+        AArch64GPReg::X4,
+        AArch64GPReg::X5,
+        AArch64GPReg::X6,
+        AArch64GPReg::X7,
+        AArch64GPReg::X9,
+        AArch64GPReg::X10,
+        AArch64GPReg::X11,
+        AArch64GPReg::X12,
+        AArch64GPReg::X13,
+        AArch64GPReg::X14,
+        AArch64GPReg::X15,
+        AArch64GPReg::IP0,
+        AArch64GPReg::IP1,
+    ];
+
+    const SHADOW_SPACE_SIZE: u8 = 0;
+
+    #[inline(always)]
+    fn callee_saved(reg: &AArch64GPReg) -> bool {
+        matches!(
+            reg,
+            AArch64GPReg::X19
+                | AArch64GPReg::X20
+                | AArch64GPReg::X21
+                | AArch64GPReg::X22
+                | AArch64GPReg::X23
+                | AArch64GPReg::X24
+                | AArch64GPReg::X25
+                | AArch64GPReg::X26
+                | AArch64GPReg::X27
+                | AArch64GPReg::X28
+        )
+    }
+
+    #[inline(always)]
+    fn setup_stack<'a>(
+        buf: &mut Vec<'a, u8>,
+        leaf_function: bool,
+        saved_regs: &[AArch64GPReg],
+        requested_stack_size: i32,
+    ) -> Result<i32, String> {
+        // full size is upcast to i64 to make sure we don't overflow here.
+        let mut full_size = 8 * saved_regs.len() as i64 + requested_stack_size as i64;
+        if !leaf_function {
+            full_size += 8;
+        }
+        let alignment = if full_size <= 0 {
+            0
+        } else {
+            full_size % STACK_ALIGNMENT as i64
+        };
+        let offset = if alignment == 0 {
+            0
+        } else {
+            STACK_ALIGNMENT - alignment as u8
+        };
+        if let Some(aligned_stack_size) =
+            requested_stack_size.checked_add(8 * saved_regs.len() as i32 + offset as i32)
+        {
+            if aligned_stack_size > 0 {
+                AArch64Assembler::sub_reg64_reg64_imm32(
+                    buf,
+                    AArch64GPReg::ZRSP,
+                    AArch64GPReg::ZRSP,
+                    aligned_stack_size,
+                );
+
+                // All the following stores could be optimized by using `STP` to store pairs.
+                let mut offset = aligned_stack_size;
+                if !leaf_function {
+                    offset -= 8;
+                    AArch64Assembler::mov_stack32_reg64(buf, offset, AArch64GPReg::LR);
+                    offset -= 8;
+                    AArch64Assembler::mov_stack32_reg64(buf, offset, AArch64GPReg::FP);
+                }
+                for reg in saved_regs {
+                    offset -= 8;
+                    AArch64Assembler::mov_stack32_reg64(buf, offset, *reg);
+                }
+                Ok(aligned_stack_size)
+            } else {
+                Ok(0)
+            }
+        } else {
+            Err("Ran out of stack space".to_string())
+        }
+    }
+
+    #[inline(always)]
+    fn cleanup_stack<'a>(
+        buf: &mut Vec<'a, u8>,
+        leaf_function: bool,
+        saved_regs: &[AArch64GPReg],
+        aligned_stack_size: i32,
+    ) -> Result<(), String> {
+        if aligned_stack_size > 0 {
+            // All the following stores could be optimized by using `STP` to store pairs.
+            let mut offset = aligned_stack_size;
+            if !leaf_function {
+                offset -= 8;
+                AArch64Assembler::mov_reg64_stack32(buf, AArch64GPReg::LR, offset);
+                offset -= 8;
+                AArch64Assembler::mov_reg64_stack32(buf, AArch64GPReg::FP, offset);
+            }
+            for reg in saved_regs {
+                offset -= 8;
+                AArch64Assembler::mov_reg64_stack32(buf, *reg, offset);
+            }
+            AArch64Assembler::add_reg64_reg64_imm32(
+                buf,
+                AArch64GPReg::ZRSP,
+                AArch64GPReg::ZRSP,
+                aligned_stack_size,
+            );
+        }
+        Ok(())
+    }
+}
+
+impl Assembler<AArch64GPReg> for AArch64Assembler {
+    #[inline(always)]
+    fn abs_reg64_reg64<'a>(_buf: &mut Vec<'a, u8>, _dst: AArch64GPReg, _src: AArch64GPReg) {
+        unimplemented!("abs_reg64_reg64 is not yet implement for AArch64");
+    }
+
+    #[inline(always)]
+    fn add_reg64_reg64_imm32<'a>(
+        buf: &mut Vec<'a, u8>,
+        dst: AArch64GPReg,
+        src: AArch64GPReg,
+        imm32: i32,
+    ) {
+        if imm32 < 0 {
+            unimplemented!("immediate addition with values less than 0 are not yet implemented");
+        } else if imm32 < 0xFFF {
+            add_reg64_reg64_imm12(buf, dst, src, imm32 as u16);
+        } else {
+            unimplemented!(
+                "immediate additions with values greater than 12bits are not yet implemented"
+            );
+        }
+    }
+
+    #[inline(always)]
+    fn add_reg64_reg64_reg64<'a>(
+        buf: &mut Vec<'a, u8>,
+        dst: AArch64GPReg,
+        src1: AArch64GPReg,
+        src2: AArch64GPReg,
+    ) {
+        add_reg64_reg64_reg64(buf, dst, src1, src2);
+    }
+
+    #[inline(always)]
+    fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, imm: i64) {
+        let mut remaining = imm as u64;
+        movz_reg64_imm16(buf, dst, remaining as u16, 0);
+        remaining >>= 16;
+        if remaining > 0 {
+            movk_reg64_imm16(buf, dst, remaining as u16, 1);
+        }
+        remaining >>= 16;
+        if remaining > 0 {
+            movk_reg64_imm16(buf, dst, remaining as u16, 2);
+        }
+        remaining >>= 16;
+        if remaining > 0 {
+            movk_reg64_imm16(buf, dst, remaining as u16, 3);
+        }
+    }
+
+    #[inline(always)]
+    fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, src: AArch64GPReg) {
+        mov_reg64_reg64(buf, dst, src);
+    }
+
+    #[inline(always)]
+    fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, offset: i32) {
+        if offset < 0 {
+            unimplemented!("negative stack offsets are not yet implement for AArch64");
+        } else if offset < (0xFFF << 8) {
+            debug_assert!(offset % 8 == 0);
+            ldr_reg64_imm12(buf, dst, AArch64GPReg::ZRSP, (offset as u16) >> 3);
+        } else {
+            unimplemented!("stack offsets over 32k are not yet implement for AArch64");
+        }
+    }
+
+    #[inline(always)]
+    fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: AArch64GPReg) {
+        if offset < 0 {
+            unimplemented!("negative stack offsets are not yet implement for AArch64");
+        } else if offset < (0xFFF << 8) {
+            debug_assert!(offset % 8 == 0);
+            str_reg64_imm12(buf, src, AArch64GPReg::ZRSP, (offset as u16) >> 3);
+        } else {
+            unimplemented!("stack offsets over 32k are not yet implement for AArch64");
+        }
+    }
+
+    #[inline(always)]
+    fn sub_reg64_reg64_imm32<'a>(
+        buf: &mut Vec<'a, u8>,
+        dst: AArch64GPReg,
+        src: AArch64GPReg,
+        imm32: i32,
+    ) {
+        if imm32 < 0 {
+            unimplemented!(
+                "immediate subtractions with values less than 0 are not yet implemented"
+            );
+        } else if imm32 < 0xFFF {
+            sub_reg64_reg64_imm12(buf, dst, src, imm32 as u16);
+        } else {
+            unimplemented!(
+                "immediate subtractions with values greater than 12bits are not yet implemented"
+            );
+        }
+    }
+
+    #[inline(always)]
+    fn ret<'a>(buf: &mut Vec<'a, u8>) {
+        ret_reg64(buf, AArch64GPReg::LR)
+    }
+}
+
+impl AArch64Assembler {}
+
+/// AArch64Instruction, maps all instructions to an enum.
+/// Decoding the function should be cheap because we will always inline.
+/// All of the operations should resolved by constants, leave just some bit manipulation.
+/// Enums may not be complete since we will only add what we need.
+#[derive(Debug)]
+enum AArch64Instruction {
+    _Reserved,
+    _SVE,
+    DPImm(DPImmGroup),
+    Branch(BranchGroup),
+    LdStr(LdStrGroup),
+    DPReg(DPRegGroup),
+    _DPFloat,
+}
+
+#[derive(Debug)]
+enum BranchGroup {
+    UnconditionBranchReg {
+        opc: u8,
+        op2: u8,
+        op3: u8,
+        reg_n: AArch64GPReg,
+        op4: u8,
+    },
+}
+
+#[derive(Debug)]
+enum DPRegGroup {
+    AddSubShifted {
+        sf: bool,
+        subtract: bool,
+        set_flags: bool,
+        shift: u8,
+        reg_m: AArch64GPReg,
+        imm6: u8,
+        reg_n: AArch64GPReg,
+        reg_d: AArch64GPReg,
+    },
+    Logical {
+        sf: bool,
+        op: DPRegLogicalOp,
+        shift: u8,
+        reg_m: AArch64GPReg,
+        imm6: u8,
+        reg_n: AArch64GPReg,
+        reg_d: AArch64GPReg,
+    },
+}
+
+#[derive(Debug)]
+enum DPImmGroup {
+    AddSubImm {
+        sf: bool,
+        subtract: bool,
+        set_flags: bool,
+        shift: bool,
+        imm12: u16,
+        reg_n: AArch64GPReg,
+        reg_d: AArch64GPReg,
+    },
+    MoveWide {
+        sf: bool,
+        opc: u8,
+        hw: u8,
+        imm16: u16,
+        reg_d: AArch64GPReg,
+    },
+}
+
+#[derive(Debug)]
+enum LdStrGroup {
+    UnsignedImm {
+        size: u8,
+        v: bool,
+        opc: u8,
+        imm12: u16,
+        reg_n: AArch64GPReg,
+        reg_t: AArch64GPReg,
+    },
+}
+
+#[derive(Debug)]
+#[allow(dead_code)]
+enum DPRegLogicalOp {
+    AND,
+    BIC,
+    ORR,
+    ORN,
+    EOR,
+    EON,
+    ANDS,
+    BICS,
+}
+
+#[inline(always)]
+fn build_instruction(inst: AArch64Instruction) -> [u8; 4] {
+    let mut out: u32 = 0;
+    match inst {
+        AArch64Instruction::Branch(branch) => {
+            out |= 0b101 << 26;
+            match branch {
+                BranchGroup::UnconditionBranchReg {
+                    opc,
+                    op2,
+                    op3,
+                    reg_n,
+                    op4,
+                } => {
+                    debug_assert!(opc <= 0b1111);
+                    debug_assert!(op2 <= 0b11111);
+                    debug_assert!(op3 <= 0b111111);
+                    debug_assert!(op4 <= 0b1111);
+                    out |= 0b1101011 << 25;
+                    out |= (opc as u32) << 21;
+                    out |= (op2 as u32) << 16;
+                    out |= (op3 as u32) << 10;
+                    out |= (reg_n as u32) << 5;
+                    out |= op4 as u32;
+                }
+            }
+        }
+        AArch64Instruction::DPImm(dpimm) => {
+            out |= 0b100 << 26;
+            match dpimm {
+                DPImmGroup::MoveWide {
+                    sf,
+                    opc,
+                    hw,
+                    imm16,
+                    reg_d,
+                } => {
+                    out |= (sf as u32) << 31;
+                    out |= (opc as u32) << 29;
+                    out |= 0b101 << 23;
+                    out |= (hw as u32) << 21;
+                    out |= (imm16 as u32) << 5;
+                    out |= reg_d as u32;
+                }
+                DPImmGroup::AddSubImm {
+                    sf,
+                    subtract,
+                    set_flags,
+                    shift,
+                    imm12,
+                    reg_n,
+                    reg_d,
+                } => {
+                    debug_assert!(imm12 <= 0xFFF);
+                    out |= (sf as u32) << 31;
+                    out |= (subtract as u32) << 30;
+                    out |= (set_flags as u32) << 29;
+                    out |= 0b010 << 23;
+                    out |= (shift as u32) << 22;
+                    out |= (imm12 as u32) << 10;
+                    out |= (reg_n as u32) << 5;
+                    out |= reg_d as u32;
+                }
+            }
+        }
+        AArch64Instruction::DPReg(dpreg) => {
+            out |= 0b101 << 25;
+            match dpreg {
+                DPRegGroup::Logical {
+                    sf,
+                    op,
+                    shift,
+                    reg_m,
+                    imm6,
+                    reg_n,
+                    reg_d,
+                } => {
+                    debug_assert!(shift <= 0b11);
+                    debug_assert!(imm6 <= 0b111111);
+                    let (opc, n) = match op {
+                        DPRegLogicalOp::AND => (0b00, 0),
+                        DPRegLogicalOp::BIC => (0b00, 1),
+                        DPRegLogicalOp::ORR => (0b01, 0),
+                        DPRegLogicalOp::ORN => (0b01, 1),
+                        DPRegLogicalOp::EOR => (0b10, 0),
+                        DPRegLogicalOp::EON => (0b10, 1),
+                        DPRegLogicalOp::ANDS => (0b11, 0),
+                        DPRegLogicalOp::BICS => (0b11, 1),
+                    };
+                    out |= (sf as u32) << 31;
+                    out |= opc << 29;
+                    out |= (shift as u32) << 22;
+                    out |= n << 21;
+                    out |= (reg_m as u32) << 16;
+                    out |= (imm6 as u32) << 10;
+                    out |= (reg_n as u32) << 5;
+                    out |= reg_d as u32;
+                }
+                DPRegGroup::AddSubShifted {
+                    sf,
+                    subtract,
+                    set_flags,
+                    shift,
+                    reg_m,
+                    imm6,
+                    reg_n,
+                    reg_d,
+                } => {
+                    debug_assert!(shift <= 0b11);
+                    debug_assert!(imm6 <= 0b111111);
+                    out |= (sf as u32) << 31;
+                    out |= (subtract as u32) << 30;
+                    out |= (set_flags as u32) << 29;
+                    out |= 0b1 << 24;
+                    out |= (shift as u32) << 22;
+                    out |= (reg_m as u32) << 16;
+                    out |= (imm6 as u32) << 10;
+                    out |= (reg_n as u32) << 5;
+                    out |= reg_d as u32;
+                }
+            }
+        }
+        AArch64Instruction::LdStr(ldstr) => {
+            out |= 0b1 << 27;
+            match ldstr {
+                LdStrGroup::UnsignedImm {
+                    size,
+                    v,
+                    opc,
+                    imm12,
+                    reg_n,
+                    reg_t,
+                } => {
+                    debug_assert!(size <= 0b11);
+                    debug_assert!(imm12 <= 0xFFF);
+                    out |= (size as u32) << 30;
+                    out |= 0b11 << 28;
+                    out |= (v as u32) << 26;
+                    out |= 0b1 << 24;
+                    out |= (opc as u32) << 22;
+                    out |= (imm12 as u32) << 10;
+                    out |= (reg_n as u32) << 5;
+                    out |= reg_t as u32;
+                }
+            }
+        }
+        x => unimplemented!("The instruction, {:?}, has not be implemented yet", x),
+    }
+    out.to_le_bytes()
+}
+
+// Below here are the functions for all of the assembly instructions.
+// Their names are based on the instruction and operators combined.
+// You should call `buf.reserve()` if you push or extend more than once.
+// Unit tests are added at the bottom of the file to ensure correct asm generation.
+// Please keep these in alphanumeric order.
+
+/// `ADD Xd, Xn, imm12` -> Add Xn and imm12 and place the result into Xd.
+#[inline(always)]
+fn add_reg64_reg64_imm12<'a>(
+    buf: &mut Vec<'a, u8>,
+    dst: AArch64GPReg,
+    src: AArch64GPReg,
+    imm12: u16,
+) {
+    buf.extend(&build_instruction(AArch64Instruction::DPImm(
+        DPImmGroup::AddSubImm {
+            sf: true,
+            subtract: false,
+            set_flags: false,
+            shift: false,
+            imm12,
+            reg_n: src,
+            reg_d: dst,
+        },
+    )));
+}
+
+/// `ADD Xd, Xm, Xn` -> Add Xm and Xn and place the result into Xd.
+#[inline(always)]
+fn add_reg64_reg64_reg64<'a>(
+    buf: &mut Vec<'a, u8>,
+    dst: AArch64GPReg,
+    src1: AArch64GPReg,
+    src2: AArch64GPReg,
+) {
+    buf.extend(&build_instruction(AArch64Instruction::DPReg(
+        DPRegGroup::AddSubShifted {
+            sf: true,
+            subtract: false,
+            set_flags: false,
+            shift: 0,
+            reg_m: src1,
+            imm6: 0,
+            reg_n: src2,
+            reg_d: dst,
+        },
+    )));
+}
+
+/// `LDR Xt, [Xn, #offset]` -> Load Xn + Offset Xt. ZRSP is SP.
+/// Note: imm12 is the offest divided by 8.
+#[inline(always)]
+fn ldr_reg64_imm12<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, base: AArch64GPReg, imm12: u16) {
+    debug_assert!(imm12 <= 0xFFF);
+    buf.extend(&build_instruction(AArch64Instruction::LdStr(
+        LdStrGroup::UnsignedImm {
+            size: 0b11,
+            v: false,
+            opc: 0b01,
+            imm12,
+            reg_n: base,
+            reg_t: dst,
+        },
+    )));
+}
+
+/// `MOV Xd, Xm` -> Move Xm to Xd.
+#[inline(always)]
+fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, src: AArch64GPReg) {
+    // MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64.
+    buf.extend(&build_instruction(AArch64Instruction::DPReg(
+        DPRegGroup::Logical {
+            sf: true,
+            op: DPRegLogicalOp::ORR,
+            shift: 0,
+            reg_m: src,
+            imm6: 0,
+            reg_n: AArch64GPReg::ZRSP,
+            reg_d: dst,
+        },
+    )));
+}
+
+/// `MOVK Xd, imm16` -> Keeps Xd and moves an optionally shifted imm16 to Xd.
+#[inline(always)]
+fn movk_reg64_imm16<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, imm16: u16, hw: u8) {
+    debug_assert!(hw <= 0b11);
+    // MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64.
+    buf.extend(&build_instruction(AArch64Instruction::DPImm(
+        DPImmGroup::MoveWide {
+            sf: true,
+            opc: 0b11,
+            hw,
+            imm16,
+            reg_d: dst,
+        },
+    )));
+}
+
+/// `MOVZ Xd, imm16` -> Zeros Xd and moves an optionally shifted imm16 to Xd.
+#[inline(always)]
+fn movz_reg64_imm16<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, imm16: u16, hw: u8) {
+    debug_assert!(hw <= 0b11);
+    // MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64.
+    buf.extend(&build_instruction(AArch64Instruction::DPImm(
+        DPImmGroup::MoveWide {
+            sf: true,
+            opc: 0b10,
+            hw,
+            imm16,
+            reg_d: dst,
+        },
+    )));
+}
+
+/// `STR Xt, [Xn, #offset]` -> Store Xt to Xn + Offset. ZRSP is SP.
+/// Note: imm12 is the offest divided by 8.
+#[inline(always)]
+fn str_reg64_imm12<'a>(buf: &mut Vec<'a, u8>, src: AArch64GPReg, base: AArch64GPReg, imm12: u16) {
+    debug_assert!(imm12 <= 0xFFF);
+    buf.extend(&build_instruction(AArch64Instruction::LdStr(
+        LdStrGroup::UnsignedImm {
+            size: 0b11,
+            v: false,
+            opc: 0b00,
+            imm12,
+            reg_n: base,
+            reg_t: src,
+        },
+    )));
+}
+
+/// `SUB Xd, Xn, imm12` -> Subtract Xn and imm12 and place the result into Xd.
+#[inline(always)]
+fn sub_reg64_reg64_imm12<'a>(
+    buf: &mut Vec<'a, u8>,
+    dst: AArch64GPReg,
+    src: AArch64GPReg,
+    imm12: u16,
+) {
+    buf.extend(&build_instruction(AArch64Instruction::DPImm(
+        DPImmGroup::AddSubImm {
+            sf: true,
+            subtract: true,
+            set_flags: false,
+            shift: false,
+            imm12,
+            reg_n: src,
+            reg_d: dst,
+        },
+    )));
+}
+
+/// `RET Xn` -> Return to the address stored in Xn.
+#[inline(always)]
+fn ret_reg64<'a>(buf: &mut Vec<'a, u8>, xn: AArch64GPReg) {
+    buf.extend(&build_instruction(AArch64Instruction::Branch(
+        BranchGroup::UnconditionBranchReg {
+            opc: 0b0010,
+            op2: 0b11111,
+            op3: 0b000000,
+            reg_n: xn,
+            op4: 0b000,
+        },
+    )));
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const TEST_U16: u16 = 0x1234;
+    //const TEST_I32: i32 = 0x12345678;
+    //const TEST_I64: i64 = 0x12345678_9ABCDEF0;
+
+    #[test]
+    fn test_add_reg64_reg64_reg64() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        add_reg64_reg64_reg64(
+            &mut buf,
+            AArch64GPReg::X10,
+            AArch64GPReg::ZRSP,
+            AArch64GPReg::X21,
+        );
+        assert_eq!(&buf, &[0xAA, 0x02, 0x1F, 0x8B]);
+    }
+
+    #[test]
+    fn test_add_reg64_reg64_imm12() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        add_reg64_reg64_imm12(&mut buf, AArch64GPReg::X10, AArch64GPReg::X21, 0x123);
+        assert_eq!(&buf, &[0xAA, 0x8E, 0x04, 0x91]);
+    }
+
+    #[test]
+    fn test_ldr_reg64_imm12() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        ldr_reg64_imm12(&mut buf, AArch64GPReg::X21, AArch64GPReg::ZRSP, 0x123);
+        assert_eq!(&buf, &[0xF5, 0x8F, 0x44, 0xF9]);
+    }
+
+    #[test]
+    fn test_mov_reg64_reg64() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        mov_reg64_reg64(&mut buf, AArch64GPReg::X10, AArch64GPReg::X21);
+        assert_eq!(&buf, &[0xEA, 0x03, 0x15, 0xAA]);
+    }
+
+    #[test]
+    fn test_movk_reg64_imm16() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        movk_reg64_imm16(&mut buf, AArch64GPReg::X21, TEST_U16, 3);
+        assert_eq!(&buf, &[0x95, 0x46, 0xE2, 0xF2]);
+    }
+
+    #[test]
+    fn test_movz_reg64_imm16() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        movz_reg64_imm16(&mut buf, AArch64GPReg::X21, TEST_U16, 3);
+        assert_eq!(&buf, &[0x95, 0x46, 0xE2, 0xD2]);
+    }
+
+    #[test]
+    fn test_str_reg64_imm12() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        str_reg64_imm12(&mut buf, AArch64GPReg::X21, AArch64GPReg::ZRSP, 0x123);
+        assert_eq!(&buf, &[0xF5, 0x8F, 0x04, 0xF9]);
+    }
+
+    #[test]
+    fn test_sub_reg64_reg64_imm12() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        sub_reg64_reg64_imm12(&mut buf, AArch64GPReg::X10, AArch64GPReg::X21, 0x123);
+        assert_eq!(&buf, &[0xAA, 0x8E, 0x04, 0xD1]);
+    }
+
+    #[test]
+    fn test_ret_reg64() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        ret_reg64(&mut buf, AArch64GPReg::LR);
+        assert_eq!(&buf, &[0xC0, 0x03, 0x5F, 0xD6]);
+    }
+}
--- a/compiler/gen_dev/src/generic64/mod.rs
+++ b/compiler/gen_dev/src/generic64/mod.rs
@ -1,49 +1,61 @@
 use crate::{Backend, Env, Relocation};
 use bumpalo::collections::Vec;
-use roc_collections::all::{ImSet, MutMap, MutSet};
+use roc_collections::all::{MutMap, MutSet};
 use roc_module::symbol::Symbol;
 use roc_mono::ir::{Literal, Stmt};
 use std::marker::PhantomData;
 use target_lexicon::Triple;

+pub mod aarch64;
 pub mod x86_64;

-pub trait CallConv<GPReg> {
-    fn gp_param_regs() -> &'static [GPReg];
-    fn gp_return_regs() -> &'static [GPReg];
-    fn gp_default_free_regs() -> &'static [GPReg];
+pub trait CallConv<GPReg: GPRegTrait> {
+    const GP_PARAM_REGS: &'static [GPReg];
+    const GP_RETURN_REGS: &'static [GPReg];
+    const GP_DEFAULT_FREE_REGS: &'static [GPReg];

-    // A linear scan of an array may be faster than a set technically.
-    // That being said, fastest would likely be a trait based on calling convention/register.
-    fn caller_saved_regs() -> ImSet<GPReg>;
-    fn callee_saved_regs() -> ImSet<GPReg>;
+    const SHADOW_SPACE_SIZE: u8;

-    fn stack_pointer() -> GPReg;
-    fn frame_pointer() -> GPReg;
+    fn callee_saved(reg: &GPReg) -> bool;
+    #[inline(always)]
+    fn caller_saved_regs(reg: &GPReg) -> bool {
+        !Self::callee_saved(reg)
+    }

-    fn shadow_space_size() -> u8;
-    // It may be worth ignoring the red zone and keeping things simpler.
-    fn red_zone_size() -> u8;
+    fn setup_stack<'a>(
+        buf: &mut Vec<'a, u8>,
+        leaf_function: bool,
+        saved_regs: &[GPReg],
+        requested_stack_size: i32,
+    ) -> Result<i32, String>;
+    fn cleanup_stack<'a>(
+        buf: &mut Vec<'a, u8>,
+        leaf_function: bool,
+        saved_regs: &[GPReg],
+        aligned_stack_size: i32,
+    ) -> Result<(), String>;
 }

-pub trait Assembler<GPReg> {
-    fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
-    fn add_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
-    fn cmovl_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
-    fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
-    fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64);
-    fn mov_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
-    fn mov_register64bit_stackoffset32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32);
-    fn mov_stackoffset32bit_register64bit<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg);
-    fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
+/// Assembler contains calls to the backend assembly generator.
+/// These calls do not necessarily map directly to a single assembly instruction.
+/// They are higher level in cases where an instruction would not be common and shared between multiple architectures.
+/// Thus, some backends will need to use mulitiple instructions to preform a single one of this calls.
+/// Generally, I prefer explicit sources, as opposed to dst being one of the sources. Ex: `x = x + y` would be `add x, x, y` instead of `add x, y`.
+/// dst should always come before sources.
+pub trait Assembler<GPReg: GPRegTrait> {
+    fn abs_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
+    fn add_reg64_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, imm32: i32);
+    fn add_reg64_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, src2: GPReg);
+    fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64);
+    fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
+    fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32);
+    fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg);
+    fn sub_reg64_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, imm32: i32);
    fn ret<'a>(buf: &mut Vec<'a, u8>);
-    fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
-    fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
-    fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
 }

 #[derive(Clone, Debug, PartialEq)]
-enum SymbolStorage<GPReg> {
+enum SymbolStorage<GPReg: GPRegTrait> {
    // These may need layout, but I am not sure.
    // I think whenever a symbol would be used, we specify layout anyways.
    GPRegeg(GPReg),
@ -69,7 +81,7 @@ pub struct Backend64Bit<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallCo
    literal_map: MutMap<Symbol, Literal<'a>>,

    // This should probably be smarter than a vec.
-    // There are certain registers we should always use first. With pushing and poping, this could get mixed.
+    // There are certain registers we should always use first. With pushing and popping, this could get mixed.
    gp_free_regs: Vec<'a, GPReg>,

    // The last major thing we need is a way to decide what reg to free when all of them are full.
@ -109,7 +121,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
    }

    fn reset(&mut self) {
-        self.stack_size = -(CC::red_zone_size() as i32);
+        self.stack_size = 0;
        self.leaf_function = true;
        self.last_seen_map.clear();
        self.free_map.clear();
@ -119,13 +131,12 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
        self.gp_free_regs.clear();
        self.gp_used_regs.clear();
        self.gp_free_regs
-            .extend_from_slice(CC::gp_default_free_regs());
+            .extend_from_slice(CC::GP_DEFAULT_FREE_REGS);
    }

    fn set_not_leaf_function(&mut self) {
        self.leaf_function = false;
-        // If this is not a leaf function, it can't use the shadow space.
-        self.stack_size = CC::shadow_space_size() as i32 - CC::red_zone_size() as i32;
+        self.stack_size = CC::SHADOW_SPACE_SIZE as i32;
    }

    fn literal_map(&mut self) -> &mut MutMap<Symbol, Literal<'a>> {
@ -147,38 +158,17 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
    fn finalize(&mut self) -> Result<(&'a [u8], &[Relocation]), String> {
        let mut out = bumpalo::vec![in self.env.arena];

-        if !self.leaf_function {
-            // I believe that this will have to move away from push and to mov to be generic across backends.
-            ASM::push_register64bit(&mut out, CC::frame_pointer());
-            ASM::mov_register64bit_register64bit(
-                &mut out,
-                CC::frame_pointer(),
-                CC::stack_pointer(),
-            );
-        }
-        // Save data in all callee saved regs.
-        let mut pop_order = bumpalo::vec![in self.env.arena];
-        for reg in &self.used_callee_saved_regs {
-            ASM::push_register64bit(&mut out, *reg);
-            pop_order.push(*reg);
-        }
-        if self.stack_size > 0 {
-            ASM::sub_register64bit_immediate32bit(&mut out, CC::stack_pointer(), self.stack_size);
-        }
+        // Setup stack.
+        let mut used_regs = bumpalo::vec![in self.env.arena];
+        used_regs.extend(&self.used_callee_saved_regs);
+        let aligned_stack_size =
+            CC::setup_stack(&mut out, self.leaf_function, &used_regs, self.stack_size)?;

        // Add function body.
        out.extend(&self.buf);

-        if self.stack_size > 0 {
-            ASM::add_register64bit_immediate32bit(&mut out, CC::stack_pointer(), self.stack_size);
-        }
-        // Restore data in callee saved regs.
-        while let Some(reg) = pop_order.pop() {
-            ASM::pop_register64bit(&mut out, reg);
-        }
-        if !self.leaf_function {
-            ASM::pop_register64bit(&mut out, CC::frame_pointer());
-        }
+        // Cleanup stack.
+        CC::cleanup_stack(&mut out, self.leaf_function, &used_regs, aligned_stack_size)?;
        ASM::ret(&mut out);

        Ok((out.into_bump_slice(), &[]))
@ -187,9 +177,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
    fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String> {
        let dst_reg = self.claim_gp_reg(dst)?;
        let src_reg = self.load_to_reg(src)?;
-        ASM::mov_register64bit_register64bit(&mut self.buf, dst_reg, src_reg);
-        ASM::neg_register64bit(&mut self.buf, dst_reg);
-        ASM::cmovl_register64bit_register64bit(&mut self.buf, dst_reg, src_reg);
+        ASM::abs_reg64_reg64(&mut self.buf, dst_reg, src_reg);
        Ok(())
    }

@ -201,9 +189,8 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
    ) -> Result<(), String> {
        let dst_reg = self.claim_gp_reg(dst)?;
        let src1_reg = self.load_to_reg(src1)?;
-        ASM::mov_register64bit_register64bit(&mut self.buf, dst_reg, src1_reg);
        let src2_reg = self.load_to_reg(src2)?;
-        ASM::add_register64bit_register64bit(&mut self.buf, dst_reg, src2_reg);
+        ASM::add_reg64_reg64_reg64(&mut self.buf, dst_reg, src1_reg, src2_reg);
        Ok(())
    }

@ -212,7 +199,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
            Literal::Int(x) => {
                let reg = self.claim_gp_reg(sym)?;
                let val = *x;
-                ASM::mov_register64bit_immediate64bit(&mut self.buf, reg, val);
+                ASM::mov_reg64_imm64(&mut self.buf, reg, val);
                Ok(())
            }
            x => Err(format!("loading literal, {:?}, is not yet implemented", x)),
@ -234,11 +221,11 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
    fn return_symbol(&mut self, sym: &Symbol) -> Result<(), String> {
        let val = self.symbols_map.get(sym);
        match val {
-            Some(SymbolStorage::GPRegeg(reg)) if *reg == CC::gp_return_regs()[0] => Ok(()),
+            Some(SymbolStorage::GPRegeg(reg)) if *reg == CC::GP_RETURN_REGS[0] => Ok(()),
            Some(SymbolStorage::GPRegeg(reg)) => {
                // If it fits in a general purpose register, just copy it over to.
                // Technically this can be optimized to produce shorter instructions if less than 64bits.
-                ASM::mov_register64bit_register64bit(&mut self.buf, CC::gp_return_regs()[0], *reg);
+                ASM::mov_reg64_reg64(&mut self.buf, CC::GP_RETURN_REGS[0], *reg);
                Ok(())
            }
            Some(x) => Err(format!(
@ -258,7 +245,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
    fn claim_gp_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
        let reg = if !self.gp_free_regs.is_empty() {
            let free_reg = self.gp_free_regs.pop().unwrap();
-            if CC::callee_saved_regs().contains(&free_reg) {
+            if CC::callee_saved(&free_reg) {
                self.used_callee_saved_regs.insert(free_reg);
            }
            Ok(free_reg)
@ -291,7 +278,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
                let reg = self.claim_gp_reg(sym)?;
                self.symbols_map
                    .insert(*sym, SymbolStorage::StackAndGPRegeg(reg, offset));
-                ASM::mov_register64bit_stackoffset32bit(&mut self.buf, reg, offset as i32);
+                ASM::mov_reg64_stack32(&mut self.buf, reg, offset as i32);
                Ok(reg)
            }
            None => Err(format!("Unknown symbol: {}", sym)),
@ -302,19 +289,9 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
        let val = self.symbols_map.remove(sym);
        match val {
            Some(SymbolStorage::GPRegeg(reg)) => {
-                let offset = self.stack_size;
-                self.stack_size += 8;
-                if let Some(size) = self.stack_size.checked_add(8) {
-                    self.stack_size = size;
-                } else {
-                    return Err(format!(
-                        "Ran out of stack space while saving symbol: {}",
-                        sym
-                    ));
-                }
-                ASM::mov_stackoffset32bit_register64bit(&mut self.buf, offset as i32, reg);
-                self.symbols_map
-                    .insert(*sym, SymbolStorage::Stack(offset as i32));
+                let offset = self.increase_stack_size(8)?;
+                ASM::mov_stack32_reg64(&mut self.buf, offset as i32, reg);
+                self.symbols_map.insert(*sym, SymbolStorage::Stack(offset));
                Ok(())
            }
            Some(SymbolStorage::StackAndGPRegeg(_, offset)) => {
@ -328,4 +305,16 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
            None => Err(format!("Unknown symbol: {}", sym)),
        }
    }
+
+    /// increase_stack_size increase the current stack size and returns the offset of the stack.
+    fn increase_stack_size(&mut self, amount: i32) -> Result<i32, String> {
+        debug_assert!(amount > 0);
+        let offset = self.stack_size;
+        if let Some(new_size) = self.stack_size.checked_add(amount) {
+            self.stack_size = new_size;
+            Ok(offset)
+        } else {
+            Err("Ran out of stack space".to_string())
+        }
+    }
 }
--- a/compiler/gen_dev/src/generic64/x86_64.rs
+++ b/compiler/gen_dev/src/generic64/x86_64.rs
@ -1,6 +1,5 @@
 use crate::generic64::{Assembler, CallConv, GPRegTrait};
 use bumpalo::collections::Vec;
-use roc_collections::all::ImSet;

 // Not sure exactly how I want to represent registers.
 // If we want max speed, we would likely make them structs that impl the same trait to avoid ifs.
@ -26,10 +25,312 @@ pub enum X86_64GPReg {

 impl GPRegTrait for X86_64GPReg {}

+pub struct X86_64Assembler {}
+pub struct X86_64WindowsFastcall {}
+pub struct X86_64SystemV {}
+
+const STACK_ALIGNMENT: u8 = 16;
+
+impl CallConv<X86_64GPReg> for X86_64SystemV {
+    const GP_PARAM_REGS: &'static [X86_64GPReg] = &[
+        X86_64GPReg::RDI,
+        X86_64GPReg::RSI,
+        X86_64GPReg::RDX,
+        X86_64GPReg::RCX,
+        X86_64GPReg::R8,
+        X86_64GPReg::R9,
+    ];
+    const GP_RETURN_REGS: &'static [X86_64GPReg] = &[X86_64GPReg::RAX, X86_64GPReg::RDX];
+
+    const GP_DEFAULT_FREE_REGS: &'static [X86_64GPReg] = &[
+        // The regs we want to use first should be at the end of this vec.
+        // We will use pop to get which reg to use next
+        // Use callee saved regs last.
+        X86_64GPReg::RBX,
+        // Don't use frame pointer: X86_64GPReg::RBP,
+        X86_64GPReg::R12,
+        X86_64GPReg::R13,
+        X86_64GPReg::R14,
+        X86_64GPReg::R15,
+        // Use caller saved regs first.
+        X86_64GPReg::RAX,
+        X86_64GPReg::RCX,
+        X86_64GPReg::RDX,
+        // Don't use stack pionter: X86_64GPReg::RSP,
+        X86_64GPReg::RSI,
+        X86_64GPReg::RDI,
+        X86_64GPReg::R8,
+        X86_64GPReg::R9,
+        X86_64GPReg::R10,
+        X86_64GPReg::R11,
+    ];
+    const SHADOW_SPACE_SIZE: u8 = 0;
+
+    #[inline(always)]
+    fn callee_saved(reg: &X86_64GPReg) -> bool {
+        matches!(
+            reg,
+            X86_64GPReg::RBX
+                | X86_64GPReg::RBP
+                | X86_64GPReg::R12
+                | X86_64GPReg::R13
+                | X86_64GPReg::R14
+                | X86_64GPReg::R15
+        )
+    }
+
+    #[inline(always)]
+    fn setup_stack<'a>(
+        buf: &mut Vec<'a, u8>,
+        leaf_function: bool,
+        saved_regs: &[X86_64GPReg],
+        requested_stack_size: i32,
+    ) -> Result<i32, String> {
+        x86_64_generic_setup_stack(buf, leaf_function, saved_regs, requested_stack_size)
+    }
+
+    #[inline(always)]
+    fn cleanup_stack<'a>(
+        buf: &mut Vec<'a, u8>,
+        leaf_function: bool,
+        saved_regs: &[X86_64GPReg],
+        aligned_stack_size: i32,
+    ) -> Result<(), String> {
+        x86_64_generic_cleanup_stack(buf, leaf_function, saved_regs, aligned_stack_size)
+    }
+}
+
+impl CallConv<X86_64GPReg> for X86_64WindowsFastcall {
+    const GP_PARAM_REGS: &'static [X86_64GPReg] = &[
+        X86_64GPReg::RCX,
+        X86_64GPReg::RDX,
+        X86_64GPReg::R8,
+        X86_64GPReg::R9,
+    ];
+    const GP_RETURN_REGS: &'static [X86_64GPReg] = &[X86_64GPReg::RAX];
+    const GP_DEFAULT_FREE_REGS: &'static [X86_64GPReg] = &[
+        // The regs we want to use first should be at the end of this vec.
+        // We will use pop to get which reg to use next
+
+        // Don't use stack pionter: X86_64GPReg::RSP,
+        // Don't use frame pointer: X86_64GPReg::RBP,
+
+        // Use callee saved regs last.
+        X86_64GPReg::RBX,
+        X86_64GPReg::RSI,
+        X86_64GPReg::RDI,
+        X86_64GPReg::R12,
+        X86_64GPReg::R13,
+        X86_64GPReg::R14,
+        X86_64GPReg::R15,
+        // Use caller saved regs first.
+        X86_64GPReg::RAX,
+        X86_64GPReg::RCX,
+        X86_64GPReg::RDX,
+        X86_64GPReg::R8,
+        X86_64GPReg::R9,
+        X86_64GPReg::R10,
+        X86_64GPReg::R11,
+    ];
+    const SHADOW_SPACE_SIZE: u8 = 32;
+
+    #[inline(always)]
+    fn callee_saved(reg: &X86_64GPReg) -> bool {
+        matches!(
+            reg,
+            X86_64GPReg::RBX
+                | X86_64GPReg::RBP
+                | X86_64GPReg::RSI
+                | X86_64GPReg::RSP
+                | X86_64GPReg::RDI
+                | X86_64GPReg::R12
+                | X86_64GPReg::R13
+                | X86_64GPReg::R14
+                | X86_64GPReg::R15
+        )
+    }
+
+    #[inline(always)]
+    fn setup_stack<'a>(
+        buf: &mut Vec<'a, u8>,
+        leaf_function: bool,
+        saved_regs: &[X86_64GPReg],
+        requested_stack_size: i32,
+    ) -> Result<i32, String> {
+        x86_64_generic_setup_stack(buf, leaf_function, saved_regs, requested_stack_size)
+    }
+
+    #[inline(always)]
+    fn cleanup_stack<'a>(
+        buf: &mut Vec<'a, u8>,
+        leaf_function: bool,
+        saved_regs: &[X86_64GPReg],
+        aligned_stack_size: i32,
+    ) -> Result<(), String> {
+        x86_64_generic_cleanup_stack(buf, leaf_function, saved_regs, aligned_stack_size)
+    }
+}
+
+#[inline(always)]
+fn x86_64_generic_setup_stack<'a>(
+    buf: &mut Vec<'a, u8>,
+    leaf_function: bool,
+    saved_regs: &[X86_64GPReg],
+    requested_stack_size: i32,
+) -> Result<i32, String> {
+    if !leaf_function {
+        X86_64Assembler::push_reg64(buf, X86_64GPReg::RBP);
+        X86_64Assembler::mov_reg64_reg64(buf, X86_64GPReg::RBP, X86_64GPReg::RSP);
+    }
+    for reg in saved_regs {
+        X86_64Assembler::push_reg64(buf, *reg);
+    }
+
+    // full size is upcast to i64 to make sure we don't overflow here.
+    let full_size = 8 * saved_regs.len() as i64 + requested_stack_size as i64;
+    let alignment = if full_size <= 0 {
+        0
+    } else {
+        full_size % STACK_ALIGNMENT as i64
+    };
+    let offset = if alignment == 0 {
+        0
+    } else {
+        STACK_ALIGNMENT - alignment as u8
+    };
+    if let Some(aligned_stack_size) = requested_stack_size.checked_add(offset as i32) {
+        if aligned_stack_size > 0 {
+            X86_64Assembler::sub_reg64_reg64_imm32(
+                buf,
+                X86_64GPReg::RSP,
+                X86_64GPReg::RSP,
+                aligned_stack_size,
+            );
+            Ok(aligned_stack_size)
+        } else {
+            Ok(0)
+        }
+    } else {
+        Err("Ran out of stack space".to_string())
+    }
+}
+
+#[inline(always)]
+fn x86_64_generic_cleanup_stack<'a>(
+    buf: &mut Vec<'a, u8>,
+    leaf_function: bool,
+    saved_regs: &[X86_64GPReg],
+    aligned_stack_size: i32,
+) -> Result<(), String> {
+    if aligned_stack_size > 0 {
+        X86_64Assembler::add_reg64_reg64_imm32(
+            buf,
+            X86_64GPReg::RSP,
+            X86_64GPReg::RSP,
+            aligned_stack_size,
+        );
+    }
+    for reg in saved_regs.iter().rev() {
+        X86_64Assembler::pop_reg64(buf, *reg);
+    }
+    if !leaf_function {
+        X86_64Assembler::mov_reg64_reg64(buf, X86_64GPReg::RSP, X86_64GPReg::RBP);
+        X86_64Assembler::pop_reg64(buf, X86_64GPReg::RBP);
+    }
+    Ok(())
+}
+
+impl Assembler<X86_64GPReg> for X86_64Assembler {
+    // These functions should map to the raw assembly functions below.
+    // In some cases, that means you can just directly call one of the direct assembly functions.
+    #[inline(always)]
+    fn abs_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
+        mov_reg64_reg64(buf, dst, src);
+        neg_reg64(buf, dst);
+        cmovl_reg64_reg64(buf, dst, src);
+    }
+    #[inline(always)]
+    fn add_reg64_reg64_imm32<'a>(
+        buf: &mut Vec<'a, u8>,
+        dst: X86_64GPReg,
+        src1: X86_64GPReg,
+        imm32: i32,
+    ) {
+        if dst == src1 {
+            add_reg64_imm32(buf, dst, imm32);
+        } else {
+            mov_reg64_reg64(buf, dst, src1);
+            add_reg64_imm32(buf, dst, imm32);
+        }
+    }
+    #[inline(always)]
+    fn add_reg64_reg64_reg64<'a>(
+        buf: &mut Vec<'a, u8>,
+        dst: X86_64GPReg,
+        src1: X86_64GPReg,
+        src2: X86_64GPReg,
+    ) {
+        if dst == src1 {
+            add_reg64_reg64(buf, dst, src2);
+        } else if dst == src2 {
+            add_reg64_reg64(buf, dst, src1);
+        } else {
+            mov_reg64_reg64(buf, dst, src1);
+            add_reg64_reg64(buf, dst, src2);
+        }
+    }
+    #[inline(always)]
+    fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) {
+        mov_reg64_imm64(buf, dst, imm);
+    }
+    #[inline(always)]
+    fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
+        mov_reg64_reg64(buf, dst, src);
+    }
+    #[inline(always)]
+    fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, offset: i32) {
+        mov_reg64_stack32(buf, dst, offset);
+    }
+    #[inline(always)]
+    fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: X86_64GPReg) {
+        mov_stack32_reg64(buf, offset, src);
+    }
+    #[inline(always)]
+    fn sub_reg64_reg64_imm32<'a>(
+        buf: &mut Vec<'a, u8>,
+        dst: X86_64GPReg,
+        src1: X86_64GPReg,
+        imm32: i32,
+    ) {
+        if dst == src1 {
+            sub_reg64_imm32(buf, dst, imm32);
+        } else {
+            mov_reg64_reg64(buf, dst, src1);
+            sub_reg64_imm32(buf, dst, imm32);
+        }
+    }
+    #[inline(always)]
+    fn ret<'a>(buf: &mut Vec<'a, u8>) {
+        ret(buf);
+    }
+}
+
+impl X86_64Assembler {
+    #[inline(always)]
+    fn pop_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
+        pop_reg64(buf, reg);
+    }
+
+    #[inline(always)]
+    fn push_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
+        push_reg64(buf, reg);
+    }
+}
 const REX: u8 = 0x40;
 const REX_W: u8 = REX + 0x8;

-fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 {
+#[inline(always)]
+const fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 {
    if reg as u8 > 7 {
        byte + 1
    } else {
@ -37,11 +338,13 @@ fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 {
    }
 }

-fn add_opcode_extension(reg: X86_64GPReg, byte: u8) -> u8 {
+#[inline(always)]
+const fn add_opcode_extension(reg: X86_64GPReg, byte: u8) -> u8 {
    add_rm_extension(reg, byte)
 }

-fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 {
+#[inline(always)]
+const fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 {
    if reg as u8 > 7 {
        byte + 4
    } else {
@ -49,316 +352,149 @@ fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 {
    }
 }

-pub struct X86_64Assembler {}
-pub struct X86_64WindowsFastcall {}
-pub struct X86_64SystemV {}
+// Below here are the functions for all of the assembly instructions.
+// Their names are based on the instruction and operators combined.
+// You should call `buf.reserve()` if you push or extend more than once.
+// Unit tests are added at the bottom of the file to ensure correct asm generation.
+// Please keep these in alphanumeric order.

-impl CallConv<X86_64GPReg> for X86_64SystemV {
-    fn gp_param_regs() -> &'static [X86_64GPReg] {
-        &[
-            X86_64GPReg::RDI,
-            X86_64GPReg::RSI,
-            X86_64GPReg::RDX,
-            X86_64GPReg::RCX,
-            X86_64GPReg::R8,
-            X86_64GPReg::R9,
-        ]
-    }
-    fn gp_return_regs() -> &'static [X86_64GPReg] {
-        &[X86_64GPReg::RAX, X86_64GPReg::RDX]
-    }
-    fn gp_default_free_regs() -> &'static [X86_64GPReg] {
-        &[
-            // The regs we want to use first should be at the end of this vec.
-            // We will use pop to get which reg to use next
-            // Use callee saved regs last.
-            X86_64GPReg::RBX,
-            // Don't use frame pointer: X86_64GPReg::RBP,
-            X86_64GPReg::R12,
-            X86_64GPReg::R13,
-            X86_64GPReg::R14,
-            X86_64GPReg::R15,
-            // Use caller saved regs first.
-            X86_64GPReg::RAX,
-            X86_64GPReg::RCX,
-            X86_64GPReg::RDX,
-            // Don't use stack pionter: X86_64GPReg::RSP,
-            X86_64GPReg::RSI,
-            X86_64GPReg::RDI,
-            X86_64GPReg::R8,
-            X86_64GPReg::R9,
-            X86_64GPReg::R10,
-            X86_64GPReg::R11,
-        ]
-    }
-    fn caller_saved_regs() -> ImSet<X86_64GPReg> {
-        // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
-        ImSet::from(vec![
-            X86_64GPReg::RAX,
-            X86_64GPReg::RCX,
-            X86_64GPReg::RDX,
-            X86_64GPReg::RSP,
-            X86_64GPReg::RSI,
-            X86_64GPReg::RDI,
-            X86_64GPReg::R8,
-            X86_64GPReg::R9,
-            X86_64GPReg::R10,
-            X86_64GPReg::R11,
-        ])
-    }
-    fn callee_saved_regs() -> ImSet<X86_64GPReg> {
-        // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
-        ImSet::from(vec![
-            X86_64GPReg::RBX,
-            X86_64GPReg::RBP,
-            X86_64GPReg::R12,
-            X86_64GPReg::R13,
-            X86_64GPReg::R14,
-            X86_64GPReg::R15,
-        ])
-    }
-    fn stack_pointer() -> X86_64GPReg {
-        X86_64GPReg::RSP
-    }
-    fn frame_pointer() -> X86_64GPReg {
-        X86_64GPReg::RBP
-    }
-    fn shadow_space_size() -> u8 {
-        0
-    }
-    fn red_zone_size() -> u8 {
-        128
+/// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64.
+#[inline(always)]
+fn add_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
+    // This can be optimized if the immediate is 1 byte.
+    let rex = add_rm_extension(dst, REX_W);
+    let dst_mod = dst as u8 % 8;
+    buf.reserve(7);
+    buf.extend(&[rex, 0x81, 0xC0 + dst_mod]);
+    buf.extend(&imm.to_le_bytes());
+}
+
+/// `ADD r/m64,r64` -> Add r64 to r/m64.
+#[inline(always)]
+fn add_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
+    let rex = add_rm_extension(dst, REX_W);
+    let rex = add_reg_extension(src, rex);
+    let dst_mod = dst as u8 % 8;
+    let src_mod = (src as u8 % 8) << 3;
+    buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]);
+}
+
+/// `CMOVL r64,r/m64` -> Move if less (SF≠ OF).
+#[inline(always)]
+fn cmovl_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
+    let rex = add_reg_extension(dst, REX_W);
+    let rex = add_rm_extension(src, rex);
+    let dst_mod = (dst as u8 % 8) << 3;
+    let src_mod = src as u8 % 8;
+    buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]);
+}
+
+/// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64.
+#[inline(always)]
+fn mov_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
+    let rex = add_rm_extension(dst, REX_W);
+    let dst_mod = dst as u8 % 8;
+    buf.reserve(7);
+    buf.extend(&[rex, 0xC7, 0xC0 + dst_mod]);
+    buf.extend(&imm.to_le_bytes());
+}
+
+/// `MOV r64, imm64` -> Move imm64 to r64.
+#[inline(always)]
+fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) {
+    if imm <= i32::MAX as i64 && imm >= i32::MIN as i64 {
+        mov_reg64_imm32(buf, dst, imm as i32)
+    } else {
+        let rex = add_opcode_extension(dst, REX_W);
+        let dst_mod = dst as u8 % 8;
+        buf.reserve(10);
+        buf.extend(&[rex, 0xB8 + dst_mod]);
+        buf.extend(&imm.to_le_bytes());
    }
 }

-impl CallConv<X86_64GPReg> for X86_64WindowsFastcall {
-    fn gp_param_regs() -> &'static [X86_64GPReg] {
-        &[
-            X86_64GPReg::RCX,
-            X86_64GPReg::RDX,
-            X86_64GPReg::R8,
-            X86_64GPReg::R9,
-        ]
-    }
-    fn gp_return_regs() -> &'static [X86_64GPReg] {
-        &[X86_64GPReg::RAX]
-    }
-    fn gp_default_free_regs() -> &'static [X86_64GPReg] {
-        &[
-            // The regs we want to use first should be at the end of this vec.
-            // We will use pop to get which reg to use next
-            // Use callee saved regs last.
-            X86_64GPReg::RBX,
-            // Don't use frame pointer: X86_64GPReg::RBP,
-            X86_64GPReg::RSI,
-            // Don't use stack pionter: X86_64GPReg::RSP,
-            X86_64GPReg::RDI,
-            X86_64GPReg::R12,
-            X86_64GPReg::R13,
-            X86_64GPReg::R14,
-            X86_64GPReg::R15,
-            // Use caller saved regs first.
-            X86_64GPReg::RAX,
-            X86_64GPReg::RCX,
-            X86_64GPReg::RDX,
-            X86_64GPReg::R8,
-            X86_64GPReg::R9,
-            X86_64GPReg::R10,
-            X86_64GPReg::R11,
-        ]
-    }
-    fn caller_saved_regs() -> ImSet<X86_64GPReg> {
-        // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
-        ImSet::from(vec![
-            X86_64GPReg::RAX,
-            X86_64GPReg::RCX,
-            X86_64GPReg::RDX,
-            X86_64GPReg::R8,
-            X86_64GPReg::R9,
-            X86_64GPReg::R10,
-            X86_64GPReg::R11,
-        ])
-    }
-    fn callee_saved_regs() -> ImSet<X86_64GPReg> {
-        // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
-        ImSet::from(vec![
-            X86_64GPReg::RBX,
-            X86_64GPReg::RBP,
-            X86_64GPReg::RSI,
-            X86_64GPReg::RSP,
-            X86_64GPReg::RDI,
-            X86_64GPReg::R12,
-            X86_64GPReg::R13,
-            X86_64GPReg::R14,
-            X86_64GPReg::R15,
-        ])
-    }
-    fn stack_pointer() -> X86_64GPReg {
-        X86_64GPReg::RSP
-    }
-    fn frame_pointer() -> X86_64GPReg {
-        X86_64GPReg::RBP
-    }
-    fn shadow_space_size() -> u8 {
-        32
-    }
-    fn red_zone_size() -> u8 {
-        0
+/// `MOV r/m64,r64` -> Move r64 to r/m64.
+#[inline(always)]
+fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
+    let rex = add_rm_extension(dst, REX_W);
+    let rex = add_reg_extension(src, rex);
+    let dst_mod = dst as u8 % 8;
+    let src_mod = (src as u8 % 8) << 3;
+    buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]);
+}
+
+/// `MOV r64,r/m64` -> Move r/m64 to r64.
+#[inline(always)]
+fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, offset: i32) {
+    // This can be optimized based on how many bytes the offset actually is.
+    // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
+    // Also, this may technically be faster genration since stack operations should be so common.
+    let rex = add_reg_extension(dst, REX_W);
+    let dst_mod = (dst as u8 % 8) << 3;
+    buf.reserve(8);
+    buf.extend(&[rex, 0x8B, 0x84 + dst_mod, 0x24]);
+    buf.extend(&offset.to_le_bytes());
+}
+
+/// `MOV r/m64,r64` -> Move r64 to r/m64.
+#[inline(always)]
+fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: X86_64GPReg) {
+    // This can be optimized based on how many bytes the offset actually is.
+    // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
+    // Also, this may technically be faster genration since stack operations should be so common.
+    let rex = add_reg_extension(src, REX_W);
+    let src_mod = (src as u8 % 8) << 3;
+    buf.reserve(8);
+    buf.extend(&[rex, 0x89, 0x84 + src_mod, 0x24]);
+    buf.extend(&offset.to_le_bytes());
+}
+
+/// `NEG r/m64` -> Two's complement negate r/m64.
+#[inline(always)]
+fn neg_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
+    let rex = add_rm_extension(reg, REX_W);
+    let reg_mod = reg as u8 % 8;
+    buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]);
+}
+
+/// `RET` -> Near return to calling procedure.
+#[inline(always)]
+fn ret<'a>(buf: &mut Vec<'a, u8>) {
+    buf.push(0xC3);
+}
+
+/// `SUB r/m64, imm32` -> Subtract imm32 sign-extended to 64-bits from r/m64.
+#[inline(always)]
+fn sub_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
+    // This can be optimized if the immediate is 1 byte.
+    let rex = add_rm_extension(dst, REX_W);
+    let dst_mod = dst as u8 % 8;
+    buf.reserve(7);
+    buf.extend(&[rex, 0x81, 0xE8 + dst_mod]);
+    buf.extend(&imm.to_le_bytes());
+}
+
+/// `POP r64` -> Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size.
+#[inline(always)]
+fn pop_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
+    let reg_mod = reg as u8 % 8;
+    if reg as u8 > 7 {
+        let rex = add_opcode_extension(reg, REX);
+        buf.extend(&[rex, 0x58 + reg_mod]);
+    } else {
+        buf.push(0x58 + reg_mod);
    }
 }

-impl Assembler<X86_64GPReg> for X86_64Assembler {
-    // Below here are the functions for all of the assembly instructions.
-    // Their names are based on the instruction and operators combined.
-    // You should call `buf.reserve()` if you push or extend more than once.
-    // Unit tests are added at the bottom of the file to ensure correct asm generation.
-    // Please keep these in alphanumeric order.
-
-    /// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64.
-    fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
-        // This can be optimized if the immediate is 1 byte.
-        let rex = add_rm_extension(dst, REX_W);
-        let dst_mod = dst as u8 % 8;
-        buf.reserve(7);
-        buf.extend(&[rex, 0x81, 0xC0 + dst_mod]);
-        buf.extend(&imm.to_le_bytes());
-    }
-
-    /// `ADD r/m64,r64` -> Add r64 to r/m64.
-    fn add_register64bit_register64bit<'a>(
-        buf: &mut Vec<'a, u8>,
-        dst: X86_64GPReg,
-        src: X86_64GPReg,
-    ) {
-        let rex = add_rm_extension(dst, REX_W);
-        let rex = add_reg_extension(src, rex);
-        let dst_mod = dst as u8 % 8;
-        let src_mod = (src as u8 % 8) << 3;
-        buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]);
-    }
-
-    /// `CMOVL r64,r/m64` -> Move if less (SF≠ OF).
-    fn cmovl_register64bit_register64bit<'a>(
-        buf: &mut Vec<'a, u8>,
-        dst: X86_64GPReg,
-        src: X86_64GPReg,
-    ) {
-        let rex = add_reg_extension(dst, REX_W);
-        let rex = add_rm_extension(src, rex);
-        let dst_mod = (dst as u8 % 8) << 3;
-        let src_mod = src as u8 % 8;
-        buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]);
-    }
-
-    /// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64.
-    fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
-        let rex = add_rm_extension(dst, REX_W);
-        let dst_mod = dst as u8 % 8;
-        buf.reserve(7);
-        buf.extend(&[rex, 0xC7, 0xC0 + dst_mod]);
-        buf.extend(&imm.to_le_bytes());
-    }
-
-    /// `MOV r64, imm64` -> Move imm64 to r64.
-    fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) {
-        if imm <= i32::MAX as i64 && imm >= i32::MIN as i64 {
-            Self::mov_register64bit_immediate32bit(buf, dst, imm as i32)
-        } else {
-            let rex = add_opcode_extension(dst, REX_W);
-            let dst_mod = dst as u8 % 8;
-            buf.reserve(10);
-            buf.extend(&[rex, 0xB8 + dst_mod]);
-            buf.extend(&imm.to_le_bytes());
-        }
-    }
-
-    /// `MOV r/m64,r64` -> Move r64 to r/m64.
-    fn mov_register64bit_register64bit<'a>(
-        buf: &mut Vec<'a, u8>,
-        dst: X86_64GPReg,
-        src: X86_64GPReg,
-    ) {
-        let rex = add_rm_extension(dst, REX_W);
-        let rex = add_reg_extension(src, rex);
-        let dst_mod = dst as u8 % 8;
-        let src_mod = (src as u8 % 8) << 3;
-        buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]);
-    }
-
-    /// `MOV r64,r/m64` -> Move r/m64 to r64.
-    fn mov_register64bit_stackoffset32bit<'a>(
-        buf: &mut Vec<'a, u8>,
-        dst: X86_64GPReg,
-        offset: i32,
-    ) {
-        // This can be optimized based on how many bytes the offset actually is.
-        // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
-        // Also, this may technically be faster genration since stack operations should be so common.
-        let rex = add_reg_extension(dst, REX_W);
-        let dst_mod = (dst as u8 % 8) << 3;
-        buf.reserve(8);
-        buf.extend(&[rex, 0x8B, 0x84 + dst_mod, 0x24]);
-        buf.extend(&offset.to_le_bytes());
-    }
-
-    /// `MOV r/m64,r64` -> Move r64 to r/m64.
-    fn mov_stackoffset32bit_register64bit<'a>(
-        buf: &mut Vec<'a, u8>,
-        offset: i32,
-        src: X86_64GPReg,
-    ) {
-        // This can be optimized based on how many bytes the offset actually is.
-        // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
-        // Also, this may technically be faster genration since stack operations should be so common.
-        let rex = add_reg_extension(src, REX_W);
-        let src_mod = (src as u8 % 8) << 3;
-        buf.reserve(8);
-        buf.extend(&[rex, 0x89, 0x84 + src_mod, 0x24]);
-        buf.extend(&offset.to_le_bytes());
-    }
-
-    /// `NEG r/m64` -> Two's complement negate r/m64.
-    fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
-        let rex = add_rm_extension(reg, REX_W);
-        let reg_mod = reg as u8 % 8;
-        buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]);
-    }
-
-    /// `RET` -> Near return to calling procedure.
-    fn ret<'a>(buf: &mut Vec<'a, u8>) {
-        buf.push(0xC3);
-    }
-
-    /// `SUB r/m64, imm32` -> Subtract imm32 sign-extended to 64-bits from r/m64.
-    fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
-        // This can be optimized if the immediate is 1 byte.
-        let rex = add_rm_extension(dst, REX_W);
-        let dst_mod = dst as u8 % 8;
-        buf.reserve(7);
-        buf.extend(&[rex, 0x81, 0xE8 + dst_mod]);
-        buf.extend(&imm.to_le_bytes());
-    }
-
-    /// `POP r64` -> Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size.
-    fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
-        let reg_mod = reg as u8 % 8;
-        if reg as u8 > 7 {
-            let rex = add_opcode_extension(reg, REX);
-            buf.extend(&[rex, 0x58 + reg_mod]);
-        } else {
-            buf.push(0x58 + reg_mod);
-        }
-    }
-
-    /// `PUSH r64` -> Push r64,
-    fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
-        let reg_mod = reg as u8 % 8;
-        if reg as u8 > 7 {
-            let rex = add_opcode_extension(reg, REX);
-            buf.extend(&[rex, 0x50 + reg_mod]);
-        } else {
-            buf.push(0x50 + reg_mod);
-        }
+/// `PUSH r64` -> Push r64,
+#[inline(always)]
+fn push_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
+    let reg_mod = reg as u8 % 8;
+    if reg as u8 > 7 {
+        let rex = add_opcode_extension(reg, REX);
+        buf.extend(&[rex, 0x50 + reg_mod]);
+    } else {
+        buf.push(0x50 + reg_mod);
    }
 }

@ -372,7 +508,7 @@ mod tests {
    const TEST_I64: i64 = 0x12345678_9ABCDEF0;

    #[test]
-    fn test_add_register64bit_immediate32bit() {
+    fn test_add_reg64_imm32() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
@ -380,14 +516,14 @@ mod tests {
            (X86_64GPReg::R15, [0x49, 0x81, 0xC7]),
        ] {
            buf.clear();
-            X86_64Assembler::add_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
+            add_reg64_imm32(&mut buf, *dst, TEST_I32);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }

    #[test]
-    fn test_add_register64bit_register64bit() {
+    fn test_add_reg64_reg64() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, src), expected) in &[
@ -397,13 +533,13 @@ mod tests {
            ((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x01, 0xFF]),
        ] {
            buf.clear();
-            X86_64Assembler::add_register64bit_register64bit(&mut buf, *dst, *src);
+            add_reg64_reg64(&mut buf, *dst, *src);
            assert_eq!(expected, &buf[..]);
        }
    }

    #[test]
-    fn test_cmovl_register64bit_register64bit() {
+    fn test_cmovl_reg64_reg64() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, src), expected) in &[
@ -425,13 +561,13 @@ mod tests {
            ),
        ] {
            buf.clear();
-            X86_64Assembler::cmovl_register64bit_register64bit(&mut buf, *dst, *src);
+            cmovl_reg64_reg64(&mut buf, *dst, *src);
            assert_eq!(expected, &buf[..]);
        }
    }

    #[test]
-    fn test_mov_register64bit_immediate32bit() {
+    fn test_mov_reg64_imm32() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
@ -439,14 +575,14 @@ mod tests {
            (X86_64GPReg::R15, [0x49, 0xC7, 0xC7]),
        ] {
            buf.clear();
-            X86_64Assembler::mov_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
+            mov_reg64_imm32(&mut buf, *dst, TEST_I32);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }

    #[test]
-    fn test_mov_register64bit_immediate64bit() {
+    fn test_mov_reg64_imm64() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
@ -454,7 +590,7 @@ mod tests {
            (X86_64GPReg::R15, [0x49, 0xBF]),
        ] {
            buf.clear();
-            X86_64Assembler::mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I64);
+            mov_reg64_imm64(&mut buf, *dst, TEST_I64);
            assert_eq!(expected, &buf[..2]);
            assert_eq!(TEST_I64.to_le_bytes(), &buf[2..]);
        }
@ -463,14 +599,14 @@ mod tests {
            (X86_64GPReg::R15, [0x49, 0xC7, 0xC7]),
        ] {
            buf.clear();
-            X86_64Assembler::mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I32 as i64);
+            mov_reg64_imm64(&mut buf, *dst, TEST_I32 as i64);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }

    #[test]
-    fn test_mov_register64bit_register64bit() {
+    fn test_mov_reg64_reg64() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, src), expected) in &[
@ -480,13 +616,13 @@ mod tests {
            ((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x89, 0xFF]),
        ] {
            buf.clear();
-            X86_64Assembler::mov_register64bit_register64bit(&mut buf, *dst, *src);
+            mov_reg64_reg64(&mut buf, *dst, *src);
            assert_eq!(expected, &buf[..]);
        }
    }

    #[test]
-    fn test_mov_register64bit_stackoffset32bit() {
+    fn test_mov_reg64_stack32() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((dst, offset), expected) in &[
@ -494,14 +630,14 @@ mod tests {
            ((X86_64GPReg::R15, TEST_I32), [0x4C, 0x8B, 0xBC, 0x24]),
        ] {
            buf.clear();
-            X86_64Assembler::mov_register64bit_stackoffset32bit(&mut buf, *dst, *offset);
+            mov_reg64_stack32(&mut buf, *dst, *offset);
            assert_eq!(expected, &buf[..4]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
        }
    }

    #[test]
-    fn test_mov_stackoffset32bit_register64bit() {
+    fn test_mov_stack32_reg64() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for ((offset, src), expected) in &[
@ -509,14 +645,14 @@ mod tests {
            ((TEST_I32, X86_64GPReg::R15), [0x4C, 0x89, 0xBC, 0x24]),
        ] {
            buf.clear();
-            X86_64Assembler::mov_stackoffset32bit_register64bit(&mut buf, *offset, *src);
+            mov_stack32_reg64(&mut buf, *offset, *src);
            assert_eq!(expected, &buf[..4]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
        }
    }

    #[test]
-    fn test_neg_register64bit() {
+    fn test_neg_reg64() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (reg, expected) in &[
@ -524,7 +660,7 @@ mod tests {
            (X86_64GPReg::R15, [0x49, 0xF7, 0xDF]),
        ] {
            buf.clear();
-            X86_64Assembler::neg_register64bit(&mut buf, *reg);
+            neg_reg64(&mut buf, *reg);
            assert_eq!(expected, &buf[..]);
        }
    }
@ -533,12 +669,12 @@ mod tests {
    fn test_ret() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
-        X86_64Assembler::ret(&mut buf);
+        ret(&mut buf);
        assert_eq!(&[0xC3], &buf[..]);
    }

    #[test]
-    fn test_sub_register64bit_immediate32bit() {
+    fn test_sub_reg64_imm32() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
@ -546,14 +682,14 @@ mod tests {
            (X86_64GPReg::R15, [0x49, 0x81, 0xEF]),
        ] {
            buf.clear();
-            X86_64Assembler::sub_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
+            sub_reg64_imm32(&mut buf, *dst, TEST_I32);
            assert_eq!(expected, &buf[..3]);
            assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
        }
    }

    #[test]
-    fn test_pop_register64bit() {
+    fn test_pop_reg64() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (dst, expected) in &[
@ -561,13 +697,13 @@ mod tests {
            (X86_64GPReg::R15, vec![0x41, 0x5F]),
        ] {
            buf.clear();
-            X86_64Assembler::pop_register64bit(&mut buf, *dst);
+            pop_reg64(&mut buf, *dst);
            assert_eq!(&expected[..], &buf[..]);
        }
    }

    #[test]
-    fn test_push_register64bit() {
+    fn test_push_reg64() {
        let arena = bumpalo::Bump::new();
        let mut buf = bumpalo::vec![in &arena];
        for (src, expected) in &[
@ -575,7 +711,7 @@ mod tests {
            (X86_64GPReg::R15, vec![0x41, 0x57]),
        ] {
            buf.clear();
-            X86_64Assembler::push_register64bit(&mut buf, *src);
+            push_reg64(&mut buf, *src);
            assert_eq!(&expected[..], &buf[..]);
        }
    }
--- a/compiler/gen_dev/src/object_builder.rs
+++ b/compiler/gen_dev/src/object_builder.rs
@ -1,4 +1,4 @@
-use crate::generic64::{x86_64, Backend64Bit};
+use crate::generic64::{aarch64, x86_64, Backend64Bit};
 use crate::{Backend, Env, Relocation, INLINED_SYMBOLS};
 use bumpalo::collections::Vec;
 use object::write;
@ -22,7 +22,7 @@ pub fn build_module<'a>(
    target: &Triple,
    procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>,
 ) -> Result<Object, String> {
-    let (mut output, mut backend) = match target {
+    match target {
        Triple {
            architecture: TargetArch::X86_64,
            binary_format: TargetBF::Elf,
@ -33,15 +33,42 @@ pub fn build_module<'a>(
                x86_64::X86_64Assembler,
                x86_64::X86_64SystemV,
            > = Backend::new(env, target)?;
-            Ok((
-                Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little),
+            build_object(
+                env,
+                procedures,
                backend,
-            ))
+                Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little),
+            )
+        }
+        Triple {
+            architecture: TargetArch::Aarch64(_),
+            binary_format: TargetBF::Elf,
+            ..
+        } => {
+            let backend: Backend64Bit<
+                aarch64::AArch64GPReg,
+                aarch64::AArch64Assembler,
+                aarch64::AArch64Call,
+            > = Backend::new(env, target)?;
+            build_object(
+                env,
+                procedures,
+                backend,
+                Object::new(BinaryFormat::Elf, Architecture::Aarch64, Endianness::Little),
+            )
        }
        x => Err(format! {
        "the target, {:?}, is not yet implemented",
        x}),
-    }?;
+    }
+}
+
+fn build_object<'a, B: Backend<'a>>(
+    env: &'a Env,
+    procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>,
+    mut backend: B,
+    mut output: Object,
+) -> Result<Object, String> {
    let text = output.section_id(StandardSection::Text);
    let data_section = output.section_id(StandardSection::Data);
    let comment = output.add_section(vec![], b"comment".to_vec(), SectionKind::OtherString);
--- a/compiler/gen_dev/tests/gen_num.rs
+++ b/compiler/gen_dev/tests/gen_num.rs
@ -9,7 +9,7 @@ extern crate libc;
 #[macro_use]
 mod helpers;

-#[cfg(all(test, target_os = "linux", target_arch = "x86_64"))]
+#[cfg(all(test, target_os = "linux", any(target_arch = "x86_64"/*, target_arch = "aarch64"*/)))]
 mod gen_num {
    //use roc_std::RocOrder;

--- a/compiler/module/src/low_level.rs
+++ b/compiler/module/src/low_level.rs
@ -6,8 +6,10 @@ pub enum LowLevel {
    StrConcat,
    StrIsEmpty,
    StrStartsWith,
+    StrEndsWith,
    StrSplit,
    StrCountGraphemes,
+    StrFromInt,
    ListLen,
    ListGetUnsafe,
    ListSet,
--- a/compiler/module/src/symbol.rs
+++ b/compiler/module/src/symbol.rs
@ -673,6 +673,8 @@ define_builtins! {
        5 STR_SPLIT: "split"
        6 STR_COUNT_GRAPHEMES: "countGraphemes"
        7 STR_STARTS_WITH: "startsWith"
+        8 STR_ENDS_WITH: "endsWith"
+        9 STR_FROM_INT: "fromInt"
    }
    4 LIST: "List" => {
        0 LIST_LIST: "List" imported // the List.List type alias
--- a/compiler/mono/src/borrow.rs
+++ b/compiler/mono/src/borrow.rs
@ -547,6 +547,7 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] {
        | NumToFloat | Not | NumIsFinite | NumAtan | NumAcos | NumAsin => {
            arena.alloc_slice_copy(&[irrelevant])
        }
-        StrStartsWith => arena.alloc_slice_copy(&[owned, borrowed]),
+        StrStartsWith | StrEndsWith => arena.alloc_slice_copy(&[owned, borrowed]),
+        StrFromInt => arena.alloc_slice_copy(&[irrelevant]),
    }
 }
--- a/compiler/mono/src/layout.rs
+++ b/compiler/mono/src/layout.rs
@ -460,7 +460,7 @@ impl<'a> Layout<'a> {

    pub fn is_refcounted(&self) -> bool {
        match self {
-            Layout::Builtin(Builtin::List(_, _)) => true,
+            Layout::Builtin(Builtin::List(MemoryMode::Refcounted, _)) => true,
            Layout::Builtin(Builtin::Str) => true,
            Layout::RecursiveUnion(_) => true,
            Layout::RecursivePointer => true,
@ -477,12 +477,12 @@ impl<'a> Layout<'a> {
        match self {
            Builtin(builtin) => builtin.is_refcounted(),
            PhantomEmptyStruct => false,
-            Struct(fields) => fields.iter().any(|f| f.is_refcounted()),
+            Struct(fields) => fields.iter().any(|f| f.contains_refcounted()),
            Union(fields) => fields
                .iter()
                .map(|ls| ls.iter())
                .flatten()
-                .any(|f| f.is_refcounted()),
+                .any(|f| f.contains_refcounted()),
            RecursiveUnion(_) => true,
            Closure(_, closure_layout, _) => closure_layout.contains_refcounted(),
            FunctionPointer(_, _) | RecursivePointer | Pointer(_) => false,
--- a/compiler/parse/src/expr.rs
+++ b/compiler/parse/src/expr.rs
@ -917,9 +917,13 @@ fn parse_def_signature<'a>(

 fn loc_function_arg<'a>(min_indent: u16) -> impl Parser<'a, Located<Expr<'a>>> {
    skip_first!(
-        // If this is a reserved keyword ("if", "then", "case, "when"), then
-        // it is not a function argument!
-        not(reserved_keyword()),
+        // If this is a reserved keyword ("if", "then", "case, "when"),
+        // followed by a blank space, then it is not a function argument!
+        //
+        // (The space is necessary because otherwise we'll get a false
+        // positive on function arguments beginning with keywords,
+        // e.g. `ifBlah` or `isSomething` will register as `if`/`is` keywords)
+        not(and!(reserved_keyword(), space1(min_indent))),
        // Don't parse operators, because they have a higher precedence than function application.
        // If we encounter one, we're done parsing function args!
        move |arena, state| loc_parse_function_arg(min_indent, arena, state)
--- a/compiler/parse/src/parser.rs
+++ b/compiler/parse/src/parser.rs
@ -777,24 +777,25 @@ macro_rules! skip_first {
            use $crate::parser::Fail;

            let original_attempting = state.attempting;
+            let original_state = state.clone();

            match $p1.parse(arena, state) {
                Ok((_, state)) => match $p2.parse(arena, state) {
                    Ok((out2, state)) => Ok((out2, state)),
-                    Err((fail, state)) => Err((
+                    Err((fail, _)) => Err((
                        Fail {
                            attempting: original_attempting,
                            ..fail
                        },
-                        state,
+                        original_state,
                    )),
                },
-                Err((fail, state)) => Err((
+                Err((fail, _)) => Err((
                    Fail {
                        attempting: original_attempting,
                        ..fail
                    },
-                    state,
+                    original_state,
                )),
            }
        }
@ -810,24 +811,25 @@ macro_rules! skip_second {
            use $crate::parser::Fail;

            let original_attempting = state.attempting;
+            let original_state = state.clone();

            match $p1.parse(arena, state) {
                Ok((out1, state)) => match $p2.parse(arena, state) {
                    Ok((_, state)) => Ok((out1, state)),
-                    Err((fail, state)) => Err((
+                    Err((fail, _)) => Err((
                        Fail {
                            attempting: original_attempting,
                            ..fail
                        },
-                        state,
+                        original_state,
                    )),
                },
-                Err((fail, state)) => Err((
+                Err((fail, _)) => Err((
                    Fail {
                        attempting: original_attempting,
                        ..fail
                    },
-                    state,
+                    original_state,
                )),
            }
        }
--- a/compiler/parse/tests/test_parse.rs
+++ b/compiler/parse/tests/test_parse.rs
@ -814,6 +814,71 @@ mod test_parse {
        assert_eq!(Ok(expected), actual);
    }

+    #[test]
+    fn var_when() {
+        // Regression test for identifiers beginning with keywords (if/then/else/when/is)
+        let arena = Bump::new();
+        let expected = Var {
+            module_name: "",
+            ident: "whenever",
+        };
+        let actual = parse_expr_with(&arena, "whenever");
+
+        assert_eq!(Ok(expected), actual);
+    }
+
+    #[test]
+    fn var_is() {
+        // Regression test for identifiers beginning with keywords (if/then/else/when/is)
+        let arena = Bump::new();
+        let expected = Var {
+            module_name: "",
+            ident: "isnt",
+        };
+        let actual = parse_expr_with(&arena, "isnt");
+
+        assert_eq!(Ok(expected), actual);
+    }
+
+    #[test]
+    fn var_if() {
+        // Regression test for identifiers beginning with keywords (if/then/else/when/is)
+        let arena = Bump::new();
+        let expected = Var {
+            module_name: "",
+            ident: "iffy",
+        };
+        let actual = parse_expr_with(&arena, "iffy");
+
+        assert_eq!(Ok(expected), actual);
+    }
+
+    #[test]
+    fn var_then() {
+        // Regression test for identifiers beginning with keywords (if/then/else/when/is)
+        let arena = Bump::new();
+        let expected = Var {
+            module_name: "",
+            ident: "thenever",
+        };
+        let actual = parse_expr_with(&arena, "thenever");
+
+        assert_eq!(Ok(expected), actual);
+    }
+
+    #[test]
+    fn var_else() {
+        // Regression test for identifiers beginning with keywords (if/then/else/when/is)
+        let arena = Bump::new();
+        let expected = Var {
+            module_name: "",
+            ident: "elsewhere",
+        };
+        let actual = parse_expr_with(&arena, "elsewhere");
+
+        assert_eq!(Ok(expected), actual);
+    }
+
    #[test]
    fn parenthetical_var() {
        let arena = Bump::new();
@ -1511,6 +1576,32 @@ mod test_parse {
        );
    }

+    #[test]
+    fn if_def() {
+        let arena = Bump::new();
+        let newlines = bumpalo::vec![in &arena; Newline, Newline];
+        let def = Def::Body(
+            arena.alloc(Located::new(0, 0, 0, 4, Identifier("iffy"))),
+            arena.alloc(Located::new(0, 0, 5, 6, Num("5"))),
+        );
+        let loc_def = &*arena.alloc(Located::new(0, 0, 0, 6, def));
+        let defs = &[loc_def];
+        let ret = Expr::SpaceBefore(arena.alloc(Num("42")), newlines.into_bump_slice());
+        let loc_ret = Located::new(2, 2, 0, 2, ret);
+        let expected = Defs(defs, arena.alloc(loc_ret));
+
+        assert_parses_to(
+            indoc!(
+                r#"
+                    iffy=5
+
+                    42
+                "#
+            ),
+            expected,
+        );
+    }
+
    #[test]
    fn one_spaced_def() {
        let arena = Bump::new();
@ -2573,6 +2664,41 @@ mod test_parse {
        assert_eq!(Ok(expected), actual);
    }

+    #[test]
+    fn repro_keyword_bug() {
+        // Reproducing this bug requires a bizarre set of things to all be true:
+        //
+        // * Must be parsing a *module* def (nested expr defs don't repro this)
+        // * That top-level module def conatins a def inside it
+        // * That inner def is defining a function
+        // * The name of the inner def begins with a keyword (`if`, `then`, `else`, `when`, `is`)
+        //
+        // If all of these are true, then lookups on that def get skipped over by the parser.
+        // If any one of the above is false, then everything works.
+
+        let arena = Bump::new();
+        let src = indoc!(
+            r#"
+                foo = \list ->
+                    isTest = \_ -> 5
+                    List.map list isTest
+            "#
+        );
+        let actual = module_defs()
+            .parse(&arena, State::new(src.as_bytes(), Attempting::Module))
+            .map(|tuple| tuple.0);
+
+        // It should occur twice in the debug output - once for the pattern,
+        // and then again for the lookup.
+        let occurrences = format!("{:?}", actual)
+            .split("isTest")
+            .collect::<std::vec::Vec<_>>()
+            .len()
+            - 1;
+
+        assert_eq!(occurrences, 2);
+    }
+
    #[test]
    fn standalone_module_defs() {
        use roc_parse::ast::Def::*;
--- a/editor/editor-ideas.md
+++ b/editor/editor-ideas.md
@ -62,6 +62,16 @@ These are potentially inspirational resources for the editor's design.
 * Excel and Google Sheets
    * Not sure, maybe something they do well that we (code editors) could learn from

+
+## Machine Learning Ideas
+
+* Ability to record all changes to abstract syntax tree with user permission.
+    * I think it is possible to create powerful automatic error resolution by having a dataset available of ast's with a specific error and the subsequent transformation that fixed the error.
+    * GPT-3 can generate correct python functions based on a comment describing the functionality, video [here](https://www.youtube.com/watch?v=utuz7wBGjKM). It's possible that training a model using ast's may lead to better results than text based models.
+    * Users with large private code bases could (re)train a publicly available error recovery model to experience benefits without having to share their code.
+    * It could be useful to a user who is creating a function to show them the most similar function (type signature, name, comment) in a public+their private database. Say I was using a web framework and I just created a function that has a multipart form as argument, it would be great to have an example instantly available.
+
+
 ## General Thoughts/Ideas

 Thoughts and ideas possibly taken from above inspirations or separate.
@ -76,6 +86,11 @@ Thoughts and ideas possibly taken from above inspirations or separate.
 * Ability to show import connection within project visually
    * This could be done by drawing connections between files or functions in the tree view. This would make it easier for people to get their bearings in new big projects.
 * Connections could also be drawn between functions that call each other in the tree view. The connections could be animated to show the execution flow of the program.
+* Ability to inline statements contained in called functions into the callee function for debugging.
+    * The value of expressions can be shown at the end of the line like in the [Inventing on Principle talk](https://youtu.be/8QiPFmIMxFc?t=1181)
+    * This would give a clear overview of the execution and should make it easy to pinpoint the line where the bug originates.
+    * That specific line can then be right clicked to go to the actual function.
+    * Having to jump around between different functions and files is unnecessary and makes it difficult to see the forest through the trees.
 * "Error mode" where the editor jumps you to the next error
    * Similar in theory to diff tools that jump you to the next merge conflict
 * dependency recommendation
--- a/editor/src/expr.rs
+++ b/editor/src/expr.rs
--- a/examples/effect/Main.roc
+++ b/examples/effect/Main.roc
@ -4,7 +4,7 @@ app "effect-example" imports [ Effect ] provides [ main ] to "./platform"
 main : Effect.Effect {} as Fx
 main =
    when if 1 == 1 then True 3 else False 3.14 is
-        True 3 -> Effect.putLine "Yay"
+        True n -> Effect.putLine (Str.fromInt n)
        _ -> Effect.putLine "Yay"

 # main : Effect.Effect {} as Fx
--- a/shell.nix
+++ b/shell.nix
@ -15,36 +15,48 @@ in with {
  }) { };

  isMacOS = currentOS == "darwin";
+  isLinux = currentOS == "linux";
  isAarch64 = currentArch == "aarch64";
 };

 with (pkgs);

 let
-  darwin-frameworks = if isMacOS then
-    with pkgs.darwin.apple_sdk.frameworks; [
-      AppKit
-      CoreFoundation
-      CoreServices
-      CoreVideo
-      Foundation
-      Metal
-      Security
-    ]
-  else
-    [ ];
+  darwin-inputs =
+    if isMacOS then
+      with pkgs.darwin.apple_sdk.frameworks; [
+        AppKit
+        CoreFoundation
+        CoreServices
+        CoreVideo
+        Foundation
+        Metal
+        Security
+      ]
+    else
+      [ ];

-  linux-only = if !isMacOS then [
-    vulkan-headers
-    vulkan-loader
-    vulkan-tools
-    vulkan-validation-layers
-    xorg.libX11
-    xorg.libXcursor
-    xorg.libXrandr
-    xorg.libXi
-  ] else
-    [ ];
+  linux-inputs =
+    if isLinux then
+      [
+        vulkan-headers
+        vulkan-loader
+        vulkan-tools
+        vulkan-validation-layers
+        xorg.libX11
+        xorg.libXcursor
+        xorg.libXrandr
+        xorg.libXi
+      ]
+    else
+      [ ];
+
+  nixos-env =
+    if isLinux && builtins.pathExists /etc/nixos/configuration.nix then
+      { XDG_DATA_DIRS = "/run/opengl-driver/share:$XDG_DATA_DIRS";
+      }
+    else
+      { };

  llvmPkgs = pkgs.llvmPackages_10;
  zig = import ./nix/zig.nix { inherit pkgs isMacOS isAarch64; };
@ -77,17 +89,18 @@ let
    ccls
  ];

-in mkShell {
-  buildInputs = inputs ++ darwin-frameworks ++ linux-only;
-  LLVM_SYS_100_PREFIX = "${llvmPkgs.llvm}";
+in mkShell (nixos-env // {
+  buildInputs = inputs ++ darwin-inputs ++ linux-inputs;

+  # Additional Env vars
+  LLVM_SYS_100_PREFIX = "${llvmPkgs.llvm}";
  APPEND_LIBRARY_PATH = stdenv.lib.makeLibraryPath
-    ([ pkg-config llvmPkgs.libcxx llvmPkgs.libcxxabi libunwind ] ++ linux-only);
+    ([ pkg-config llvmPkgs.libcxx llvmPkgs.libcxxabi libunwind ] ++ linux-inputs);
+  LD_LIBRARY_PATH = "$LD_LIBRARY_PATH:$APPEND_LIBRARY_PATH";

  # Aliases don't work cross shell, so we do this
  shellHook = ''
-    export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:$APPEND_LIBRARY_PATH"
    export PATH="$PATH:$PWD/nix/bin"
  '';
-}
+})