diff --git a/compiler/build/Cargo.toml b/compiler/build/Cargo.toml index a2a0eea088..4b48dd2d26 100644 --- a/compiler/build/Cargo.toml +++ b/compiler/build/Cargo.toml @@ -54,3 +54,8 @@ maplit = "1.0.1" indoc = "0.3.3" quickcheck = "0.8" quickcheck_macros = "0.8" + +[features] +target-arm = [] +target-aarch64 = [] +target-webassembly = [] diff --git a/compiler/builtins/bitcode/src/main.zig b/compiler/builtins/bitcode/src/main.zig index ed40fb62d2..500f482eb1 100644 --- a/compiler/builtins/bitcode/src/main.zig +++ b/compiler/builtins/bitcode/src/main.zig @@ -19,6 +19,7 @@ comptime { exportStrFn(str.countSegments, "count_segments"); exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters"); exportStrFn(str.startsWith, "starts_with"); + exportStrFn(str.strConcat, "concat"); } // Export helpers - Must be run inside a comptime diff --git a/compiler/builtins/bitcode/src/str.zig b/compiler/builtins/bitcode/src/str.zig index 0c9958622f..ddbb09b06e 100644 --- a/compiler/builtins/bitcode/src/str.zig +++ b/compiler/builtins/bitcode/src/str.zig @@ -122,6 +122,10 @@ const RocStr = extern struct { return if (self.is_small_str()) small_len else big_len; } + pub fn is_empty(self: RocStr) bool { + return self.len() == 0; + } + // Given a pointer to some bytes, write the first (len) bytes of this // RocStr's contents into it. // @@ -586,3 +590,145 @@ test "startsWith: 12345678912345678910 starts with 123456789123456789" { expect(startsWith(str_ptr, str_len, prefix_ptr, prefix_len)); } + +// Str.concat + +test "RocStr.concat: small concat small" { + const str1_len = 3; + var str1: [str1_len]u8 = "foo".*; + const str1_ptr: [*]u8 = &str1; + var roc_str1 = RocStr.init(str1_ptr, str1_len); + + const str2_len = 3; + var str2: [str2_len]u8 = "abc".*; + const str2_ptr: [*]u8 = &str2; + var roc_str2 = RocStr.init(str2_ptr, str2_len); + + const str3_len = 6; + var str3: [str3_len]u8 = "fooabc".*; + const str3_ptr: [*]u8 = &str3; + var roc_str3 = RocStr.init(str3_ptr, str3_len); + + const result = strConcat(8, InPlace.Clone, roc_str1, roc_str2); + + expect(roc_str3.eq(result)); + + roc_str1.drop(); + roc_str2.drop(); + roc_str3.drop(); + result.drop(); +} + +pub fn strConcat(ptr_size: u32, result_in_place: InPlace, arg1: RocStr, arg2: RocStr) callconv(.C) RocStr { + return switch (ptr_size) { + 4 => strConcatHelp(i32, result_in_place, arg1, arg2), + 8 => strConcatHelp(i64, result_in_place, arg1, arg2), + else => unreachable, + }; +} + +fn strConcatHelp(comptime T: type, result_in_place: InPlace, arg1: RocStr, arg2: RocStr) RocStr { + if (arg1.is_empty()) { + return cloneNonemptyStr(T, result_in_place, arg2); + } else if (arg2.is_empty()) { + return cloneNonemptyStr(T, result_in_place, arg1); + } else { + const combined_length = arg1.len() + arg2.len(); + + const small_str_bytes = 2 * @sizeOf(T); + const result_is_big = combined_length >= small_str_bytes; + + if (result_is_big) { + var result = allocate_str(T, result_in_place, combined_length); + + { + const old_if_small = &@bitCast([16]u8, arg1); + const old_if_big = @ptrCast([*]u8, arg1.str_bytes); + const old_bytes = if (arg1.is_small_str()) old_if_small else old_if_big; + + const new_bytes: [*]u8 = @ptrCast([*]u8, result.str_bytes); + + @memcpy(new_bytes, old_bytes, arg1.len()); + } + + { + const old_if_small = &@bitCast([16]u8, arg2); + const old_if_big = @ptrCast([*]u8, arg2.str_bytes); + const old_bytes = if (arg2.is_small_str()) old_if_small else old_if_big; + + const new_bytes = @ptrCast([*]u8, result.str_bytes) + arg1.len(); + + @memcpy(new_bytes, old_bytes, arg2.len()); + } + + return result; + } else { + var result = [16]u8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + // if the result is small, then for sure arg1 and arg2 are also small + + { + var old_bytes: [*]u8 = @ptrCast([*]u8, &@bitCast([16]u8, arg1)); + var new_bytes: [*]u8 = @ptrCast([*]u8, &result); + + @memcpy(new_bytes, old_bytes, arg1.len()); + } + + { + var old_bytes: [*]u8 = @ptrCast([*]u8, &@bitCast([16]u8, arg2)); + var new_bytes = @ptrCast([*]u8, &result) + arg1.len(); + + @memcpy(new_bytes, old_bytes, arg2.len()); + } + + const mask: u8 = 0b1000_0000; + const final_byte = @truncate(u8, combined_length) | mask; + + result[small_str_bytes - 1] = final_byte; + + return @bitCast(RocStr, result); + } + + return result; + } +} + +const InPlace = packed enum(u8) { + InPlace, + Clone, +}; + +fn cloneNonemptyStr(comptime T: type, in_place: InPlace, str: RocStr) RocStr { + if (str.is_small_str() or str.is_empty()) { + // just return the bytes + return str; + } else { + var new_str = allocate_str(T, in_place, str.str_len); + + var old_bytes: [*]u8 = @ptrCast([*]u8, str.str_bytes); + var new_bytes: [*]u8 = @ptrCast([*]u8, new_str.str_bytes); + + @memcpy(new_bytes, old_bytes, str.str_len); + + return new_str; + } +} + +fn allocate_str(comptime T: type, in_place: InPlace, number_of_chars: u64) RocStr { + const length = @sizeOf(T) + number_of_chars; + var new_bytes: [*]T = @ptrCast([*]T, @alignCast(@alignOf(T), malloc(length))); + + if (in_place == InPlace.InPlace) { + new_bytes[0] = @intCast(T, number_of_chars); + } else { + new_bytes[0] = std.math.minInt(T); + } + + var first_element = @ptrCast([*]align(@alignOf(T)) u8, new_bytes); + first_element += 8; + + return RocStr{ + .str_bytes = first_element, + .str_len = number_of_chars, + }; +} diff --git a/compiler/builtins/src/bitcode.rs b/compiler/builtins/src/bitcode.rs index aefb7bd186..2ad70214e6 100644 --- a/compiler/builtins/src/bitcode.rs +++ b/compiler/builtins/src/bitcode.rs @@ -24,6 +24,7 @@ pub const NUM_IS_FINITE: &str = "roc_builtins.num.is_finite"; pub const NUM_POW_INT: &str = "roc_builtins.num.pow_int"; pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments"; +pub const STR_CONCAT: &str = "roc_builtins.str.concat"; pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place"; pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters"; pub const STR_STARTS_WITH: &str = "roc_builtins.str.starts_with"; diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index cf88609054..b42461d47d 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -604,7 +604,9 @@ pub fn build_exp_expr<'a, 'ctx, 'env>( match expr { Literal(literal) => build_exp_literal(env, literal), - RunLowLevel(op, symbols) => run_low_level(env, scope, parent, layout, *op, symbols), + RunLowLevel(op, symbols) => { + run_low_level(env, layout_ids, scope, parent, layout, *op, symbols) + } ForeignCall { foreign_symbol, @@ -1165,12 +1167,10 @@ fn list_literal<'a, 'ctx, 'env>( let builder = env.builder; let len_u64 = elems.len() as u64; - let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64; let ptr = { - let bytes_len = elem_bytes * len_u64; let len_type = env.ptr_int(); - let len = len_type.const_int(bytes_len, false); + let len = len_type.const_int(len_u64, false); allocate_list(env, inplace, elem_layout, len) @@ -2383,6 +2383,7 @@ fn call_with_args<'a, 'ctx, 'env>( } #[derive(Copy, Clone)] +#[repr(u8)] pub enum InPlace { InPlace, Clone, @@ -2409,6 +2410,7 @@ pub static COLD_CALL_CONV: u32 = 9; fn run_low_level<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, + layout_ids: &mut LayoutIds<'a>, scope: &Scope<'a, 'ctx>, parent: FunctionValue<'ctx>, layout: &Layout<'a>, @@ -2522,7 +2524,16 @@ fn run_low_level<'a, 'ctx, 'env>( let inplace = get_inplace_from_layout(layout); - list_map(env, inplace, parent, func, func_layout, list, list_layout) + list_map( + env, + layout_ids, + inplace, + parent, + func, + func_layout, + list, + list_layout, + ) } ListKeepIf => { // List.keepIf : List elem, (elem -> Bool) -> List elem diff --git a/compiler/gen/src/llvm/build_list.rs b/compiler/gen/src/llvm/build_list.rs index 904c3f0fe9..9c6c20bbb7 100644 --- a/compiler/gen/src/llvm/build_list.rs +++ b/compiler/gen/src/llvm/build_list.rs @@ -3,12 +3,13 @@ use crate::llvm::build::{ }; use crate::llvm::compare::build_eq; use crate::llvm::convert::{basic_type_from_layout, collection, get_ptr_type}; +use crate::llvm::refcounting::decrement_refcount_layout; use inkwell::builder::Builder; use inkwell::context::Context; use inkwell::types::{BasicTypeEnum, PointerType}; use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue}; use inkwell::{AddressSpace, IntPredicate}; -use roc_mono::layout::{Builtin, Layout, MemoryMode}; +use roc_mono::layout::{Builtin, Layout, LayoutIds, MemoryMode}; /// List.single : a -> List a pub fn list_single<'a, 'ctx, 'env>( @@ -1318,8 +1319,10 @@ pub fn list_keep_if_help<'a, 'ctx, 'env>( } /// List.map : List before, (before -> after) -> List after +#[allow(clippy::too_many_arguments)] pub fn list_map<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, + layout_ids: &mut LayoutIds<'a>, inplace: InPlace, parent: FunctionValue<'ctx>, func: BasicValueEnum<'ctx>, @@ -1365,7 +1368,11 @@ pub fn list_map<'a, 'ctx, 'env>( incrementing_elem_loop(builder, ctx, parent, list_ptr, len, "#index", list_loop); - store_list(env, ret_list_ptr, len) + let result = store_list(env, ret_list_ptr, len); + + decrement_refcount_layout(env, parent, layout_ids, list, list_layout); + + result }; if_list_is_not_empty(env, parent, non_empty_fn, list, list_layout, "List.map") @@ -2043,7 +2050,6 @@ pub fn allocate_list<'a, 'ctx, 'env>( let len_type = env.ptr_int(); let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64; let bytes_per_element = len_type.const_int(elem_bytes, false); - let number_of_data_bytes = builder.build_int_mul(bytes_per_element, length, "data_length"); let rc1 = match inplace { diff --git a/compiler/gen/src/llvm/build_str.rs b/compiler/gen/src/llvm/build_str.rs index 90dd22e58e..6b4a885331 100644 --- a/compiler/gen/src/llvm/build_str.rs +++ b/compiler/gen/src/llvm/build_str.rs @@ -1,9 +1,7 @@ use crate::llvm::build::{ call_bitcode_fn, call_void_bitcode_fn, ptr_from_symbol, Env, InPlace, Scope, }; -use crate::llvm::build_list::{ - allocate_list, build_basic_phi2, empty_list, incrementing_elem_loop, load_list_ptr, store_list, -}; +use crate::llvm::build_list::{allocate_list, build_basic_phi2, load_list_ptr, store_list}; use crate::llvm::convert::collection; use inkwell::builder::Builder; use inkwell::types::BasicTypeEnum; @@ -90,333 +88,117 @@ pub fn str_split<'a, 'ctx, 'env>( ) } +/* +fn cast_to_zig_str( + env: &Env<'a, 'ctx, 'env>, + str_as_struct: StructValue<'ctx>, +) -> BasicValueEnum<'ctx> { + // get the RocStr type defined by zig + let roc_str_type = env.module.get_struct_type("str.RocStr").unwrap(); + + // convert `{ *mut u8, i64 }` to `RocStr` + builder.build_bitcast(str_as_struct, roc_str_type, "convert_to_zig_rocstr"); +} + +fn cast_from_zig_str( + env: &Env<'a, 'ctx, 'env>, + str_as_struct: StructValue<'ctx>, +) -> BasicValueEnum<'ctx> { + let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)); + + // convert `RocStr` to `{ *mut u8, i64 }` + builder.build_bitcast(str_as_struct, ret_type, "convert_from_zig_rocstr"); +} +*/ + +fn str_symbol_to_i128<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + scope: &Scope<'a, 'ctx>, + symbol: Symbol, +) -> IntValue<'ctx> { + let str_ptr = ptr_from_symbol(scope, symbol); + + let i128_ptr = env + .builder + .build_bitcast( + *str_ptr, + env.context.i128_type().ptr_type(AddressSpace::Generic), + "cast", + ) + .into_pointer_value(); + + env.builder + .build_load(i128_ptr, "load_as_i128") + .into_int_value() +} + +fn zig_str_to_struct<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + zig_str: StructValue<'ctx>, +) -> StructValue<'ctx> { + let builder = env.builder; + + // get the RocStr type defined by zig + let zig_str_type = env.module.get_struct_type("str.RocStr").unwrap(); + + let ret_type = BasicTypeEnum::StructType(collection(env.context, env.ptr_bytes)); + + // a roundabout way of casting (LLVM does not accept a standard bitcast) + let allocation = builder.build_alloca(zig_str_type, "zig_result"); + + builder.build_store(allocation, zig_str); + + let ptr3 = builder + .build_bitcast( + allocation, + env.context.i128_type().ptr_type(AddressSpace::Generic), + "cast", + ) + .into_pointer_value(); + + let ptr4 = builder + .build_bitcast( + ptr3, + ret_type.into_struct_type().ptr_type(AddressSpace::Generic), + "cast", + ) + .into_pointer_value(); + + builder.build_load(ptr4, "load").into_struct_value() +} + /// Str.concat : Str, Str -> Str pub fn str_concat<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, inplace: InPlace, scope: &Scope<'a, 'ctx>, - parent: FunctionValue<'ctx>, - first_str_symbol: Symbol, - second_str_symbol: Symbol, + _parent: FunctionValue<'ctx>, + str1_symbol: Symbol, + str2_symbol: Symbol, ) -> BasicValueEnum<'ctx> { - let builder = env.builder; - let ctx = env.context; + // swap the arguments; second argument comes before the second in the output string + let str1_i128 = str_symbol_to_i128(env, scope, str1_symbol); + let str2_i128 = str_symbol_to_i128(env, scope, str2_symbol); - let second_str_ptr = ptr_from_symbol(scope, second_str_symbol); - let first_str_ptr = ptr_from_symbol(scope, first_str_symbol); - - let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)); - - load_str( + let zig_result = call_bitcode_fn( env, - parent, - *second_str_ptr, - ret_type, - |second_str_ptr, second_str_len, second_str_smallness| { - load_str( - env, - parent, - *first_str_ptr, - ret_type, - |first_str_ptr, first_str_len, first_str_smallness| { - // first_str_len > 0 - // We do this check to avoid allocating memory. If the first input - // str is empty, then we can just return the second str cloned - let first_str_length_comparison = str_is_not_empty(env, first_str_len); - - let if_first_str_is_empty = || { - // second_str_len > 0 - // We do this check to avoid allocating memory. If the second input - // str is empty, then we can just return an empty str - let second_str_length_comparison = str_is_not_empty(env, second_str_len); - - let if_second_str_is_nonempty = || { - let (new_wrapper, _) = clone_nonempty_str( - env, - inplace, - second_str_smallness, - second_str_len, - second_str_ptr, - ); - - BasicValueEnum::StructValue(new_wrapper) - }; - - let if_second_str_is_empty = || empty_list(env); - - build_basic_phi2( - env, - parent, - second_str_length_comparison, - if_second_str_is_nonempty, - if_second_str_is_empty, - ret_type, - ) - }; - - let if_first_str_is_not_empty = || { - let if_second_str_is_empty = || { - let (new_wrapper, _) = clone_nonempty_str( - env, - inplace, - first_str_smallness, - first_str_len, - first_str_ptr, - ); - - BasicValueEnum::StructValue(new_wrapper) - }; - - // second_str_len > 0 - // We do this check to avoid allocating memory. If the second input - // str is empty, then we can just return the first str cloned - let second_str_length_comparison = str_is_not_empty(env, second_str_len); - - let if_second_str_is_not_empty = || { - let combined_str_len = builder.build_int_add( - first_str_len, - second_str_len, - "add_list_lengths", - ); - - // The combined string is big iff its length is - // greater than or equal to the size in memory - // of a small str (e.g. len >= 16 on 64-bit targets) - let is_big = env.builder.build_int_compare( - IntPredicate::UGE, - combined_str_len, - env.ptr_int().const_int(env.small_str_bytes() as u64, false), - "str_is_big", - ); - - let if_big = || { - let combined_str_ptr = - allocate_list(env, inplace, &CHAR_LAYOUT, combined_str_len); - - // TODO replace FIRST_LOOP with a memcpy! - // FIRST LOOP - let first_loop = |first_index, first_str_elem| { - // The pointer to the element in the combined list - let combined_str_elem_ptr = unsafe { - builder.build_in_bounds_gep( - combined_str_ptr, - &[first_index], - "load_index_combined_list", - ) - }; - - // Mutate the new array in-place to change the element. - builder.build_store(combined_str_elem_ptr, first_str_elem); - }; - - let index_name = "#index"; - - let index_alloca = incrementing_elem_loop( - builder, - ctx, - parent, - first_str_ptr, - first_str_len, - index_name, - first_loop, - ); - - // Reset the index variable to 0 - builder - .build_store(index_alloca, ctx.i64_type().const_int(0, false)); - - // TODO replace SECOND_LOOP with a memcpy! - // SECOND LOOP - let second_loop = |second_index, second_str_elem| { - // The pointer to the element in the combined str. - // Note that the pointer does not start at the index - // 0, it starts at the index of first_str_len. In that - // sense it is "offset". - let offset_combined_str_char_ptr = unsafe { - builder.build_in_bounds_gep( - combined_str_ptr, - &[first_str_len], - "elem", - ) - }; - - // The pointer to the char from the second str - // in the combined list - let combined_str_char_ptr = unsafe { - builder.build_in_bounds_gep( - offset_combined_str_char_ptr, - &[second_index], - "load_index_combined_list", - ) - }; - - // Mutate the new array in-place to change the element. - builder.build_store(combined_str_char_ptr, second_str_elem); - }; - - incrementing_elem_loop( - builder, - ctx, - parent, - second_str_ptr, - second_str_len, - index_name, - second_loop, - ); - - store_list(env, combined_str_ptr, combined_str_len) - }; - - let if_small = || { - let combined_str_ptr = builder.build_array_alloca( - ctx.i8_type(), - ctx.i8_type().const_int(env.small_str_bytes() as u64, false), - "alloca_small_str", - ); - - // TODO replace FIRST_LOOP with a memcpy! - // FIRST LOOP - let first_loop = |first_index, first_str_elem| { - // The pointer to the element in the combined list - let combined_str_elem_ptr = unsafe { - builder.build_in_bounds_gep( - combined_str_ptr, - &[first_index], - "load_index_combined_list", - ) - }; - - // Mutate the new array in-place to change the element. - builder.build_store(combined_str_elem_ptr, first_str_elem); - }; - - let index_name = "#index"; - - let index_alloca = incrementing_elem_loop( - builder, - ctx, - parent, - first_str_ptr, - first_str_len, - index_name, - first_loop, - ); - - // Reset the index variable to 0 - builder - .build_store(index_alloca, ctx.i64_type().const_int(0, false)); - - // TODO replace SECOND_LOOP with a memcpy! - // SECOND LOOP - let second_loop = |second_index, second_str_elem| { - // The pointer to the element in the combined str. - // Note that the pointer does not start at the index - // 0, it starts at the index of first_str_len. In that - // sense it is "offset". - let offset_combined_str_char_ptr = unsafe { - builder.build_in_bounds_gep( - combined_str_ptr, - &[first_str_len], - "elem", - ) - }; - - // The pointer to the char from the second str - // in the combined list - let combined_str_char_ptr = unsafe { - builder.build_in_bounds_gep( - offset_combined_str_char_ptr, - &[second_index], - "load_index_combined_list", - ) - }; - - // Mutate the new array in-place to change the element. - builder.build_store(combined_str_char_ptr, second_str_elem); - }; - - incrementing_elem_loop( - builder, - ctx, - parent, - second_str_ptr, - second_str_len, - index_name, - second_loop, - ); - - let final_byte = builder.build_int_cast( - combined_str_len, - ctx.i8_type(), - "str_len_to_i8", - ); - - let final_byte = builder.build_or( - final_byte, - ctx.i8_type().const_int(0b1000_0000, false), - "str_len_set_discriminant", - ); - - let final_byte_ptr = unsafe { - builder.build_in_bounds_gep( - combined_str_ptr, - &[ctx - .i8_type() - .const_int(env.small_str_bytes() as u64 - 1, false)], - "str_literal_final_byte", - ) - }; - - builder.build_store(final_byte_ptr, final_byte); - - builder.build_load( - builder - .build_bitcast( - combined_str_ptr, - collection(ctx, env.ptr_bytes) - .ptr_type(AddressSpace::Generic), - "cast_collection", - ) - .into_pointer_value(), - "small_str_array", - ) - }; - - // If the combined length fits in a small string, - // write into a small string! - build_basic_phi2( - env, - parent, - is_big, - // the result of a Str.concat is most likely big - if_big, - if_small, - BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)), - ) - }; - - build_basic_phi2( - env, - parent, - second_str_length_comparison, - if_second_str_is_not_empty, - if_second_str_is_empty, - BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)), - ) - }; - - build_basic_phi2( - env, - parent, - first_str_length_comparison, - if_first_str_is_not_empty, - if_first_str_is_empty, - BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)), - ) - }, - ) - }, + &[ + env.context + .i32_type() + .const_int(env.ptr_bytes as u64, false) + .into(), + env.context + .i8_type() + .const_int(inplace as u64, false) + .into(), + str1_i128.into(), + str2_i128.into(), + ], + &bitcode::STR_CONCAT, ) + .into_struct_value(); + + zig_str_to_struct(env, zig_result).into() } /// Obtain the string's length, cast from i8 to usize @@ -511,82 +293,6 @@ enum Smallness { Big, } -fn clone_nonempty_str<'a, 'ctx, 'env>( - env: &Env<'a, 'ctx, 'env>, - inplace: InPlace, - smallness: Smallness, - len: IntValue<'ctx>, - bytes_ptr: PointerValue<'ctx>, -) -> (StructValue<'ctx>, PointerValue<'ctx>) { - let builder = env.builder; - let ctx = env.context; - let ptr_bytes = env.ptr_bytes; - - // Allocate space for the new str that we'll copy into. - match smallness { - Smallness::Small => { - let wrapper_struct_ptr = cast_str_bytes_to_wrapper(env, bytes_ptr); - let wrapper_struct = builder.build_load(wrapper_struct_ptr, "str_wrapper"); - let alloca = builder.build_alloca(collection(ctx, ptr_bytes), "small_str_clone"); - - builder.build_store(alloca, wrapper_struct); - - (wrapper_struct.into_struct_value(), alloca) - } - Smallness::Big => { - let clone_ptr = allocate_list(env, inplace, &CHAR_LAYOUT, len); - - // TODO check if malloc returned null; if so, runtime error for OOM! - - // Copy the bytes from the original array into the new - // one we just malloc'd. - builder - .build_memcpy(clone_ptr, ptr_bytes, bytes_ptr, ptr_bytes, len) - .unwrap(); - - // Create a fresh wrapper struct for the newly populated array - let struct_type = collection(ctx, env.ptr_bytes); - let mut struct_val; - - // Store the pointer - struct_val = builder - .build_insert_value( - struct_type.get_undef(), - clone_ptr, - Builtin::WRAPPER_PTR, - "insert_ptr", - ) - .unwrap(); - - // Store the length - struct_val = builder - .build_insert_value(struct_val, len, Builtin::WRAPPER_LEN, "insert_len") - .unwrap(); - - let answer = builder - .build_bitcast( - struct_val.into_struct_value(), - collection(ctx, ptr_bytes), - "cast_collection", - ) - .into_struct_value(); - - (answer, clone_ptr) - } - } -} - -fn cast_str_bytes_to_wrapper<'a, 'ctx, 'env>( - env: &Env<'a, 'ctx, 'env>, - bytes_ptr: PointerValue<'ctx>, -) -> PointerValue<'ctx> { - let struct_ptr_type = collection(env.context, env.ptr_bytes).ptr_type(AddressSpace::Generic); - - env.builder - .build_bitcast(bytes_ptr, struct_ptr_type, "str_as_struct_ptr") - .into_pointer_value() -} - fn cast_str_wrapper_to_array<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, wrapper_ptr: PointerValue<'ctx>, @@ -661,6 +367,7 @@ fn big_str_len<'ctx>(builder: &Builder<'ctx>, wrapper_struct: StructValue<'ctx>) .into_int_value() } +#[allow(dead_code)] fn str_is_not_empty<'ctx>(env: &Env<'_, 'ctx, '_>, len: IntValue<'ctx>) -> IntValue<'ctx> { env.builder.build_int_compare( IntPredicate::UGT, diff --git a/compiler/gen_dev/Cargo.toml b/compiler/gen_dev/Cargo.toml index e27ab63099..f7c41ebbe0 100644 --- a/compiler/gen_dev/Cargo.toml +++ b/compiler/gen_dev/Cargo.toml @@ -42,3 +42,6 @@ bumpalo = { version = "3.2", features = ["collections"] } libc = "0.2" tempfile = "3.1.0" itertools = "0.9" + +[features] +target-aarch64 = ["roc_build/target-aarch64"] diff --git a/compiler/gen_dev/src/generic64/aarch64.rs b/compiler/gen_dev/src/generic64/aarch64.rs new file mode 100644 index 0000000000..df58823d12 --- /dev/null +++ b/compiler/gen_dev/src/generic64/aarch64.rs @@ -0,0 +1,814 @@ +use crate::generic64::{Assembler, CallConv, GPRegTrait}; +use bumpalo::collections::Vec; + +#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)] +#[allow(dead_code)] +pub enum AArch64GPReg { + X0 = 0, + X1 = 1, + X2 = 2, + X3 = 3, + X4 = 4, + X5 = 5, + X6 = 6, + X7 = 7, + XR = 8, + X9 = 9, + X10 = 10, + X11 = 11, + X12 = 12, + X13 = 13, + X14 = 14, + X15 = 15, + IP0 = 16, + IP1 = 17, + PR = 18, + X19 = 19, + X20 = 20, + X21 = 21, + X22 = 22, + X23 = 23, + X24 = 24, + X25 = 25, + X26 = 26, + X27 = 27, + X28 = 28, + FP = 29, + LR = 30, + /// This can mean Zero or Stack Pointer depending on the context. + ZRSP = 31, +} + +impl GPRegTrait for AArch64GPReg {} + +pub struct AArch64Assembler {} + +// AArch64Call may need to eventually be split by OS, +// but I think with how we use it, they may all be the same. +pub struct AArch64Call {} + +const STACK_ALIGNMENT: u8 = 16; + +impl CallConv for AArch64Call { + const GP_PARAM_REGS: &'static [AArch64GPReg] = &[ + AArch64GPReg::X0, + AArch64GPReg::X1, + AArch64GPReg::X2, + AArch64GPReg::X3, + AArch64GPReg::X4, + AArch64GPReg::X5, + AArch64GPReg::X6, + AArch64GPReg::X7, + ]; + const GP_RETURN_REGS: &'static [AArch64GPReg] = Self::GP_PARAM_REGS; + const GP_DEFAULT_FREE_REGS: &'static [AArch64GPReg] = &[ + // The regs we want to use first should be at the end of this vec. + // We will use pop to get which reg to use next + + // Don't use frame pointer: AArch64GPReg::FP, + // Don't user indirect result location: AArch64GPReg::XR, + // Don't use platform register: AArch64GPReg::PR, + // Don't use link register: AArch64GPReg::LR, + // Don't use zero register/stack pointer: AArch64GPReg::ZRSP, + + // Use callee saved regs last. + AArch64GPReg::X19, + AArch64GPReg::X20, + AArch64GPReg::X21, + AArch64GPReg::X22, + AArch64GPReg::X23, + AArch64GPReg::X24, + AArch64GPReg::X25, + AArch64GPReg::X26, + AArch64GPReg::X27, + AArch64GPReg::X28, + // Use caller saved regs first. + AArch64GPReg::X0, + AArch64GPReg::X1, + AArch64GPReg::X2, + AArch64GPReg::X3, + AArch64GPReg::X4, + AArch64GPReg::X5, + AArch64GPReg::X6, + AArch64GPReg::X7, + AArch64GPReg::X9, + AArch64GPReg::X10, + AArch64GPReg::X11, + AArch64GPReg::X12, + AArch64GPReg::X13, + AArch64GPReg::X14, + AArch64GPReg::X15, + AArch64GPReg::IP0, + AArch64GPReg::IP1, + ]; + + const SHADOW_SPACE_SIZE: u8 = 0; + + #[inline(always)] + fn callee_saved(reg: &AArch64GPReg) -> bool { + matches!( + reg, + AArch64GPReg::X19 + | AArch64GPReg::X20 + | AArch64GPReg::X21 + | AArch64GPReg::X22 + | AArch64GPReg::X23 + | AArch64GPReg::X24 + | AArch64GPReg::X25 + | AArch64GPReg::X26 + | AArch64GPReg::X27 + | AArch64GPReg::X28 + ) + } + + #[inline(always)] + fn setup_stack<'a>( + buf: &mut Vec<'a, u8>, + leaf_function: bool, + saved_regs: &[AArch64GPReg], + requested_stack_size: i32, + ) -> Result { + // full size is upcast to i64 to make sure we don't overflow here. + let mut full_size = 8 * saved_regs.len() as i64 + requested_stack_size as i64; + if !leaf_function { + full_size += 8; + } + let alignment = if full_size <= 0 { + 0 + } else { + full_size % STACK_ALIGNMENT as i64 + }; + let offset = if alignment == 0 { + 0 + } else { + STACK_ALIGNMENT - alignment as u8 + }; + if let Some(aligned_stack_size) = + requested_stack_size.checked_add(8 * saved_regs.len() as i32 + offset as i32) + { + if aligned_stack_size > 0 { + AArch64Assembler::sub_reg64_reg64_imm32( + buf, + AArch64GPReg::ZRSP, + AArch64GPReg::ZRSP, + aligned_stack_size, + ); + + // All the following stores could be optimized by using `STP` to store pairs. + let mut offset = aligned_stack_size; + if !leaf_function { + offset -= 8; + AArch64Assembler::mov_stack32_reg64(buf, offset, AArch64GPReg::LR); + offset -= 8; + AArch64Assembler::mov_stack32_reg64(buf, offset, AArch64GPReg::FP); + } + for reg in saved_regs { + offset -= 8; + AArch64Assembler::mov_stack32_reg64(buf, offset, *reg); + } + Ok(aligned_stack_size) + } else { + Ok(0) + } + } else { + Err("Ran out of stack space".to_string()) + } + } + + #[inline(always)] + fn cleanup_stack<'a>( + buf: &mut Vec<'a, u8>, + leaf_function: bool, + saved_regs: &[AArch64GPReg], + aligned_stack_size: i32, + ) -> Result<(), String> { + if aligned_stack_size > 0 { + // All the following stores could be optimized by using `STP` to store pairs. + let mut offset = aligned_stack_size; + if !leaf_function { + offset -= 8; + AArch64Assembler::mov_reg64_stack32(buf, AArch64GPReg::LR, offset); + offset -= 8; + AArch64Assembler::mov_reg64_stack32(buf, AArch64GPReg::FP, offset); + } + for reg in saved_regs { + offset -= 8; + AArch64Assembler::mov_reg64_stack32(buf, *reg, offset); + } + AArch64Assembler::add_reg64_reg64_imm32( + buf, + AArch64GPReg::ZRSP, + AArch64GPReg::ZRSP, + aligned_stack_size, + ); + } + Ok(()) + } +} + +impl Assembler for AArch64Assembler { + #[inline(always)] + fn abs_reg64_reg64<'a>(_buf: &mut Vec<'a, u8>, _dst: AArch64GPReg, _src: AArch64GPReg) { + unimplemented!("abs_reg64_reg64 is not yet implement for AArch64"); + } + + #[inline(always)] + fn add_reg64_reg64_imm32<'a>( + buf: &mut Vec<'a, u8>, + dst: AArch64GPReg, + src: AArch64GPReg, + imm32: i32, + ) { + if imm32 < 0 { + unimplemented!("immediate addition with values less than 0 are not yet implemented"); + } else if imm32 < 0xFFF { + add_reg64_reg64_imm12(buf, dst, src, imm32 as u16); + } else { + unimplemented!( + "immediate additions with values greater than 12bits are not yet implemented" + ); + } + } + + #[inline(always)] + fn add_reg64_reg64_reg64<'a>( + buf: &mut Vec<'a, u8>, + dst: AArch64GPReg, + src1: AArch64GPReg, + src2: AArch64GPReg, + ) { + add_reg64_reg64_reg64(buf, dst, src1, src2); + } + + #[inline(always)] + fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, imm: i64) { + let mut remaining = imm as u64; + movz_reg64_imm16(buf, dst, remaining as u16, 0); + remaining >>= 16; + if remaining > 0 { + movk_reg64_imm16(buf, dst, remaining as u16, 1); + } + remaining >>= 16; + if remaining > 0 { + movk_reg64_imm16(buf, dst, remaining as u16, 2); + } + remaining >>= 16; + if remaining > 0 { + movk_reg64_imm16(buf, dst, remaining as u16, 3); + } + } + + #[inline(always)] + fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, src: AArch64GPReg) { + mov_reg64_reg64(buf, dst, src); + } + + #[inline(always)] + fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, offset: i32) { + if offset < 0 { + unimplemented!("negative stack offsets are not yet implement for AArch64"); + } else if offset < (0xFFF << 8) { + debug_assert!(offset % 8 == 0); + ldr_reg64_imm12(buf, dst, AArch64GPReg::ZRSP, (offset as u16) >> 3); + } else { + unimplemented!("stack offsets over 32k are not yet implement for AArch64"); + } + } + + #[inline(always)] + fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: AArch64GPReg) { + if offset < 0 { + unimplemented!("negative stack offsets are not yet implement for AArch64"); + } else if offset < (0xFFF << 8) { + debug_assert!(offset % 8 == 0); + str_reg64_imm12(buf, src, AArch64GPReg::ZRSP, (offset as u16) >> 3); + } else { + unimplemented!("stack offsets over 32k are not yet implement for AArch64"); + } + } + + #[inline(always)] + fn sub_reg64_reg64_imm32<'a>( + buf: &mut Vec<'a, u8>, + dst: AArch64GPReg, + src: AArch64GPReg, + imm32: i32, + ) { + if imm32 < 0 { + unimplemented!( + "immediate subtractions with values less than 0 are not yet implemented" + ); + } else if imm32 < 0xFFF { + sub_reg64_reg64_imm12(buf, dst, src, imm32 as u16); + } else { + unimplemented!( + "immediate subtractions with values greater than 12bits are not yet implemented" + ); + } + } + + #[inline(always)] + fn ret<'a>(buf: &mut Vec<'a, u8>) { + ret_reg64(buf, AArch64GPReg::LR) + } +} + +impl AArch64Assembler {} + +/// AArch64Instruction, maps all instructions to an enum. +/// Decoding the function should be cheap because we will always inline. +/// All of the operations should resolved by constants, leave just some bit manipulation. +/// Enums may not be complete since we will only add what we need. +#[derive(Debug)] +enum AArch64Instruction { + _Reserved, + _SVE, + DPImm(DPImmGroup), + Branch(BranchGroup), + LdStr(LdStrGroup), + DPReg(DPRegGroup), + _DPFloat, +} + +#[derive(Debug)] +enum BranchGroup { + UnconditionBranchReg { + opc: u8, + op2: u8, + op3: u8, + reg_n: AArch64GPReg, + op4: u8, + }, +} + +#[derive(Debug)] +enum DPRegGroup { + AddSubShifted { + sf: bool, + subtract: bool, + set_flags: bool, + shift: u8, + reg_m: AArch64GPReg, + imm6: u8, + reg_n: AArch64GPReg, + reg_d: AArch64GPReg, + }, + Logical { + sf: bool, + op: DPRegLogicalOp, + shift: u8, + reg_m: AArch64GPReg, + imm6: u8, + reg_n: AArch64GPReg, + reg_d: AArch64GPReg, + }, +} + +#[derive(Debug)] +enum DPImmGroup { + AddSubImm { + sf: bool, + subtract: bool, + set_flags: bool, + shift: bool, + imm12: u16, + reg_n: AArch64GPReg, + reg_d: AArch64GPReg, + }, + MoveWide { + sf: bool, + opc: u8, + hw: u8, + imm16: u16, + reg_d: AArch64GPReg, + }, +} + +#[derive(Debug)] +enum LdStrGroup { + UnsignedImm { + size: u8, + v: bool, + opc: u8, + imm12: u16, + reg_n: AArch64GPReg, + reg_t: AArch64GPReg, + }, +} + +#[derive(Debug)] +#[allow(dead_code)] +enum DPRegLogicalOp { + AND, + BIC, + ORR, + ORN, + EOR, + EON, + ANDS, + BICS, +} + +#[inline(always)] +fn build_instruction(inst: AArch64Instruction) -> [u8; 4] { + let mut out: u32 = 0; + match inst { + AArch64Instruction::Branch(branch) => { + out |= 0b101 << 26; + match branch { + BranchGroup::UnconditionBranchReg { + opc, + op2, + op3, + reg_n, + op4, + } => { + debug_assert!(opc <= 0b1111); + debug_assert!(op2 <= 0b11111); + debug_assert!(op3 <= 0b111111); + debug_assert!(op4 <= 0b1111); + out |= 0b1101011 << 25; + out |= (opc as u32) << 21; + out |= (op2 as u32) << 16; + out |= (op3 as u32) << 10; + out |= (reg_n as u32) << 5; + out |= op4 as u32; + } + } + } + AArch64Instruction::DPImm(dpimm) => { + out |= 0b100 << 26; + match dpimm { + DPImmGroup::MoveWide { + sf, + opc, + hw, + imm16, + reg_d, + } => { + out |= (sf as u32) << 31; + out |= (opc as u32) << 29; + out |= 0b101 << 23; + out |= (hw as u32) << 21; + out |= (imm16 as u32) << 5; + out |= reg_d as u32; + } + DPImmGroup::AddSubImm { + sf, + subtract, + set_flags, + shift, + imm12, + reg_n, + reg_d, + } => { + debug_assert!(imm12 <= 0xFFF); + out |= (sf as u32) << 31; + out |= (subtract as u32) << 30; + out |= (set_flags as u32) << 29; + out |= 0b010 << 23; + out |= (shift as u32) << 22; + out |= (imm12 as u32) << 10; + out |= (reg_n as u32) << 5; + out |= reg_d as u32; + } + } + } + AArch64Instruction::DPReg(dpreg) => { + out |= 0b101 << 25; + match dpreg { + DPRegGroup::Logical { + sf, + op, + shift, + reg_m, + imm6, + reg_n, + reg_d, + } => { + debug_assert!(shift <= 0b11); + debug_assert!(imm6 <= 0b111111); + let (opc, n) = match op { + DPRegLogicalOp::AND => (0b00, 0), + DPRegLogicalOp::BIC => (0b00, 1), + DPRegLogicalOp::ORR => (0b01, 0), + DPRegLogicalOp::ORN => (0b01, 1), + DPRegLogicalOp::EOR => (0b10, 0), + DPRegLogicalOp::EON => (0b10, 1), + DPRegLogicalOp::ANDS => (0b11, 0), + DPRegLogicalOp::BICS => (0b11, 1), + }; + out |= (sf as u32) << 31; + out |= opc << 29; + out |= (shift as u32) << 22; + out |= n << 21; + out |= (reg_m as u32) << 16; + out |= (imm6 as u32) << 10; + out |= (reg_n as u32) << 5; + out |= reg_d as u32; + } + DPRegGroup::AddSubShifted { + sf, + subtract, + set_flags, + shift, + reg_m, + imm6, + reg_n, + reg_d, + } => { + debug_assert!(shift <= 0b11); + debug_assert!(imm6 <= 0b111111); + out |= (sf as u32) << 31; + out |= (subtract as u32) << 30; + out |= (set_flags as u32) << 29; + out |= 0b1 << 24; + out |= (shift as u32) << 22; + out |= (reg_m as u32) << 16; + out |= (imm6 as u32) << 10; + out |= (reg_n as u32) << 5; + out |= reg_d as u32; + } + } + } + AArch64Instruction::LdStr(ldstr) => { + out |= 0b1 << 27; + match ldstr { + LdStrGroup::UnsignedImm { + size, + v, + opc, + imm12, + reg_n, + reg_t, + } => { + debug_assert!(size <= 0b11); + debug_assert!(imm12 <= 0xFFF); + out |= (size as u32) << 30; + out |= 0b11 << 28; + out |= (v as u32) << 26; + out |= 0b1 << 24; + out |= (opc as u32) << 22; + out |= (imm12 as u32) << 10; + out |= (reg_n as u32) << 5; + out |= reg_t as u32; + } + } + } + x => unimplemented!("The instruction, {:?}, has not be implemented yet", x), + } + out.to_le_bytes() +} + +// Below here are the functions for all of the assembly instructions. +// Their names are based on the instruction and operators combined. +// You should call `buf.reserve()` if you push or extend more than once. +// Unit tests are added at the bottom of the file to ensure correct asm generation. +// Please keep these in alphanumeric order. + +/// `ADD Xd, Xn, imm12` -> Add Xn and imm12 and place the result into Xd. +#[inline(always)] +fn add_reg64_reg64_imm12<'a>( + buf: &mut Vec<'a, u8>, + dst: AArch64GPReg, + src: AArch64GPReg, + imm12: u16, +) { + buf.extend(&build_instruction(AArch64Instruction::DPImm( + DPImmGroup::AddSubImm { + sf: true, + subtract: false, + set_flags: false, + shift: false, + imm12, + reg_n: src, + reg_d: dst, + }, + ))); +} + +/// `ADD Xd, Xm, Xn` -> Add Xm and Xn and place the result into Xd. +#[inline(always)] +fn add_reg64_reg64_reg64<'a>( + buf: &mut Vec<'a, u8>, + dst: AArch64GPReg, + src1: AArch64GPReg, + src2: AArch64GPReg, +) { + buf.extend(&build_instruction(AArch64Instruction::DPReg( + DPRegGroup::AddSubShifted { + sf: true, + subtract: false, + set_flags: false, + shift: 0, + reg_m: src1, + imm6: 0, + reg_n: src2, + reg_d: dst, + }, + ))); +} + +/// `LDR Xt, [Xn, #offset]` -> Load Xn + Offset Xt. ZRSP is SP. +/// Note: imm12 is the offest divided by 8. +#[inline(always)] +fn ldr_reg64_imm12<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, base: AArch64GPReg, imm12: u16) { + debug_assert!(imm12 <= 0xFFF); + buf.extend(&build_instruction(AArch64Instruction::LdStr( + LdStrGroup::UnsignedImm { + size: 0b11, + v: false, + opc: 0b01, + imm12, + reg_n: base, + reg_t: dst, + }, + ))); +} + +/// `MOV Xd, Xm` -> Move Xm to Xd. +#[inline(always)] +fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, src: AArch64GPReg) { + // MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64. + buf.extend(&build_instruction(AArch64Instruction::DPReg( + DPRegGroup::Logical { + sf: true, + op: DPRegLogicalOp::ORR, + shift: 0, + reg_m: src, + imm6: 0, + reg_n: AArch64GPReg::ZRSP, + reg_d: dst, + }, + ))); +} + +/// `MOVK Xd, imm16` -> Keeps Xd and moves an optionally shifted imm16 to Xd. +#[inline(always)] +fn movk_reg64_imm16<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, imm16: u16, hw: u8) { + debug_assert!(hw <= 0b11); + // MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64. + buf.extend(&build_instruction(AArch64Instruction::DPImm( + DPImmGroup::MoveWide { + sf: true, + opc: 0b11, + hw, + imm16, + reg_d: dst, + }, + ))); +} + +/// `MOVZ Xd, imm16` -> Zeros Xd and moves an optionally shifted imm16 to Xd. +#[inline(always)] +fn movz_reg64_imm16<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, imm16: u16, hw: u8) { + debug_assert!(hw <= 0b11); + // MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64. + buf.extend(&build_instruction(AArch64Instruction::DPImm( + DPImmGroup::MoveWide { + sf: true, + opc: 0b10, + hw, + imm16, + reg_d: dst, + }, + ))); +} + +/// `STR Xt, [Xn, #offset]` -> Store Xt to Xn + Offset. ZRSP is SP. +/// Note: imm12 is the offest divided by 8. +#[inline(always)] +fn str_reg64_imm12<'a>(buf: &mut Vec<'a, u8>, src: AArch64GPReg, base: AArch64GPReg, imm12: u16) { + debug_assert!(imm12 <= 0xFFF); + buf.extend(&build_instruction(AArch64Instruction::LdStr( + LdStrGroup::UnsignedImm { + size: 0b11, + v: false, + opc: 0b00, + imm12, + reg_n: base, + reg_t: src, + }, + ))); +} + +/// `SUB Xd, Xn, imm12` -> Subtract Xn and imm12 and place the result into Xd. +#[inline(always)] +fn sub_reg64_reg64_imm12<'a>( + buf: &mut Vec<'a, u8>, + dst: AArch64GPReg, + src: AArch64GPReg, + imm12: u16, +) { + buf.extend(&build_instruction(AArch64Instruction::DPImm( + DPImmGroup::AddSubImm { + sf: true, + subtract: true, + set_flags: false, + shift: false, + imm12, + reg_n: src, + reg_d: dst, + }, + ))); +} + +/// `RET Xn` -> Return to the address stored in Xn. +#[inline(always)] +fn ret_reg64<'a>(buf: &mut Vec<'a, u8>, xn: AArch64GPReg) { + buf.extend(&build_instruction(AArch64Instruction::Branch( + BranchGroup::UnconditionBranchReg { + opc: 0b0010, + op2: 0b11111, + op3: 0b000000, + reg_n: xn, + op4: 0b000, + }, + ))); +} + +#[cfg(test)] +mod tests { + use super::*; + + const TEST_U16: u16 = 0x1234; + //const TEST_I32: i32 = 0x12345678; + //const TEST_I64: i64 = 0x12345678_9ABCDEF0; + + #[test] + fn test_add_reg64_reg64_reg64() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + add_reg64_reg64_reg64( + &mut buf, + AArch64GPReg::X10, + AArch64GPReg::ZRSP, + AArch64GPReg::X21, + ); + assert_eq!(&buf, &[0xAA, 0x02, 0x1F, 0x8B]); + } + + #[test] + fn test_add_reg64_reg64_imm12() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + add_reg64_reg64_imm12(&mut buf, AArch64GPReg::X10, AArch64GPReg::X21, 0x123); + assert_eq!(&buf, &[0xAA, 0x8E, 0x04, 0x91]); + } + + #[test] + fn test_ldr_reg64_imm12() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + ldr_reg64_imm12(&mut buf, AArch64GPReg::X21, AArch64GPReg::ZRSP, 0x123); + assert_eq!(&buf, &[0xF5, 0x8F, 0x44, 0xF9]); + } + + #[test] + fn test_mov_reg64_reg64() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + mov_reg64_reg64(&mut buf, AArch64GPReg::X10, AArch64GPReg::X21); + assert_eq!(&buf, &[0xEA, 0x03, 0x15, 0xAA]); + } + + #[test] + fn test_movk_reg64_imm16() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + movk_reg64_imm16(&mut buf, AArch64GPReg::X21, TEST_U16, 3); + assert_eq!(&buf, &[0x95, 0x46, 0xE2, 0xF2]); + } + + #[test] + fn test_movz_reg64_imm16() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + movz_reg64_imm16(&mut buf, AArch64GPReg::X21, TEST_U16, 3); + assert_eq!(&buf, &[0x95, 0x46, 0xE2, 0xD2]); + } + + #[test] + fn test_str_reg64_imm12() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + str_reg64_imm12(&mut buf, AArch64GPReg::X21, AArch64GPReg::ZRSP, 0x123); + assert_eq!(&buf, &[0xF5, 0x8F, 0x04, 0xF9]); + } + + #[test] + fn test_sub_reg64_reg64_imm12() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + sub_reg64_reg64_imm12(&mut buf, AArch64GPReg::X10, AArch64GPReg::X21, 0x123); + assert_eq!(&buf, &[0xAA, 0x8E, 0x04, 0xD1]); + } + + #[test] + fn test_ret_reg64() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + ret_reg64(&mut buf, AArch64GPReg::LR); + assert_eq!(&buf, &[0xC0, 0x03, 0x5F, 0xD6]); + } +} diff --git a/compiler/gen_dev/src/generic64/mod.rs b/compiler/gen_dev/src/generic64/mod.rs index 3d8c041776..fdc1519bb9 100644 --- a/compiler/gen_dev/src/generic64/mod.rs +++ b/compiler/gen_dev/src/generic64/mod.rs @@ -1,49 +1,61 @@ use crate::{Backend, Env, Relocation}; use bumpalo::collections::Vec; -use roc_collections::all::{ImSet, MutMap, MutSet}; +use roc_collections::all::{MutMap, MutSet}; use roc_module::symbol::Symbol; use roc_mono::ir::{Literal, Stmt}; use std::marker::PhantomData; use target_lexicon::Triple; +pub mod aarch64; pub mod x86_64; -pub trait CallConv { - fn gp_param_regs() -> &'static [GPReg]; - fn gp_return_regs() -> &'static [GPReg]; - fn gp_default_free_regs() -> &'static [GPReg]; +pub trait CallConv { + const GP_PARAM_REGS: &'static [GPReg]; + const GP_RETURN_REGS: &'static [GPReg]; + const GP_DEFAULT_FREE_REGS: &'static [GPReg]; - // A linear scan of an array may be faster than a set technically. - // That being said, fastest would likely be a trait based on calling convention/register. - fn caller_saved_regs() -> ImSet; - fn callee_saved_regs() -> ImSet; + const SHADOW_SPACE_SIZE: u8; - fn stack_pointer() -> GPReg; - fn frame_pointer() -> GPReg; + fn callee_saved(reg: &GPReg) -> bool; + #[inline(always)] + fn caller_saved_regs(reg: &GPReg) -> bool { + !Self::callee_saved(reg) + } - fn shadow_space_size() -> u8; - // It may be worth ignoring the red zone and keeping things simpler. - fn red_zone_size() -> u8; + fn setup_stack<'a>( + buf: &mut Vec<'a, u8>, + leaf_function: bool, + saved_regs: &[GPReg], + requested_stack_size: i32, + ) -> Result; + fn cleanup_stack<'a>( + buf: &mut Vec<'a, u8>, + leaf_function: bool, + saved_regs: &[GPReg], + aligned_stack_size: i32, + ) -> Result<(), String>; } -pub trait Assembler { - fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32); - fn add_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg); - fn cmovl_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg); - fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32); - fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64); - fn mov_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg); - fn mov_register64bit_stackoffset32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32); - fn mov_stackoffset32bit_register64bit<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg); - fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg); +/// Assembler contains calls to the backend assembly generator. +/// These calls do not necessarily map directly to a single assembly instruction. +/// They are higher level in cases where an instruction would not be common and shared between multiple architectures. +/// Thus, some backends will need to use mulitiple instructions to preform a single one of this calls. +/// Generally, I prefer explicit sources, as opposed to dst being one of the sources. Ex: `x = x + y` would be `add x, x, y` instead of `add x, y`. +/// dst should always come before sources. +pub trait Assembler { + fn abs_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg); + fn add_reg64_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, imm32: i32); + fn add_reg64_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, src2: GPReg); + fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64); + fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg); + fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32); + fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg); + fn sub_reg64_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, imm32: i32); fn ret<'a>(buf: &mut Vec<'a, u8>); - fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32); - fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg); - fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg); } #[derive(Clone, Debug, PartialEq)] -enum SymbolStorage { +enum SymbolStorage { // These may need layout, but I am not sure. // I think whenever a symbol would be used, we specify layout anyways. GPRegeg(GPReg), @@ -69,7 +81,7 @@ pub struct Backend64Bit<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallCo literal_map: MutMap>, // This should probably be smarter than a vec. - // There are certain registers we should always use first. With pushing and poping, this could get mixed. + // There are certain registers we should always use first. With pushing and popping, this could get mixed. gp_free_regs: Vec<'a, GPReg>, // The last major thing we need is a way to decide what reg to free when all of them are full. @@ -109,7 +121,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> Backend< } fn reset(&mut self) { - self.stack_size = -(CC::red_zone_size() as i32); + self.stack_size = 0; self.leaf_function = true; self.last_seen_map.clear(); self.free_map.clear(); @@ -119,13 +131,12 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> Backend< self.gp_free_regs.clear(); self.gp_used_regs.clear(); self.gp_free_regs - .extend_from_slice(CC::gp_default_free_regs()); + .extend_from_slice(CC::GP_DEFAULT_FREE_REGS); } fn set_not_leaf_function(&mut self) { self.leaf_function = false; - // If this is not a leaf function, it can't use the shadow space. - self.stack_size = CC::shadow_space_size() as i32 - CC::red_zone_size() as i32; + self.stack_size = CC::SHADOW_SPACE_SIZE as i32; } fn literal_map(&mut self) -> &mut MutMap> { @@ -147,38 +158,17 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> Backend< fn finalize(&mut self) -> Result<(&'a [u8], &[Relocation]), String> { let mut out = bumpalo::vec![in self.env.arena]; - if !self.leaf_function { - // I believe that this will have to move away from push and to mov to be generic across backends. - ASM::push_register64bit(&mut out, CC::frame_pointer()); - ASM::mov_register64bit_register64bit( - &mut out, - CC::frame_pointer(), - CC::stack_pointer(), - ); - } - // Save data in all callee saved regs. - let mut pop_order = bumpalo::vec![in self.env.arena]; - for reg in &self.used_callee_saved_regs { - ASM::push_register64bit(&mut out, *reg); - pop_order.push(*reg); - } - if self.stack_size > 0 { - ASM::sub_register64bit_immediate32bit(&mut out, CC::stack_pointer(), self.stack_size); - } + // Setup stack. + let mut used_regs = bumpalo::vec![in self.env.arena]; + used_regs.extend(&self.used_callee_saved_regs); + let aligned_stack_size = + CC::setup_stack(&mut out, self.leaf_function, &used_regs, self.stack_size)?; // Add function body. out.extend(&self.buf); - if self.stack_size > 0 { - ASM::add_register64bit_immediate32bit(&mut out, CC::stack_pointer(), self.stack_size); - } - // Restore data in callee saved regs. - while let Some(reg) = pop_order.pop() { - ASM::pop_register64bit(&mut out, reg); - } - if !self.leaf_function { - ASM::pop_register64bit(&mut out, CC::frame_pointer()); - } + // Cleanup stack. + CC::cleanup_stack(&mut out, self.leaf_function, &used_regs, aligned_stack_size)?; ASM::ret(&mut out); Ok((out.into_bump_slice(), &[])) @@ -187,9 +177,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> Backend< fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String> { let dst_reg = self.claim_gp_reg(dst)?; let src_reg = self.load_to_reg(src)?; - ASM::mov_register64bit_register64bit(&mut self.buf, dst_reg, src_reg); - ASM::neg_register64bit(&mut self.buf, dst_reg); - ASM::cmovl_register64bit_register64bit(&mut self.buf, dst_reg, src_reg); + ASM::abs_reg64_reg64(&mut self.buf, dst_reg, src_reg); Ok(()) } @@ -201,9 +189,8 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> Backend< ) -> Result<(), String> { let dst_reg = self.claim_gp_reg(dst)?; let src1_reg = self.load_to_reg(src1)?; - ASM::mov_register64bit_register64bit(&mut self.buf, dst_reg, src1_reg); let src2_reg = self.load_to_reg(src2)?; - ASM::add_register64bit_register64bit(&mut self.buf, dst_reg, src2_reg); + ASM::add_reg64_reg64_reg64(&mut self.buf, dst_reg, src1_reg, src2_reg); Ok(()) } @@ -212,7 +199,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> Backend< Literal::Int(x) => { let reg = self.claim_gp_reg(sym)?; let val = *x; - ASM::mov_register64bit_immediate64bit(&mut self.buf, reg, val); + ASM::mov_reg64_imm64(&mut self.buf, reg, val); Ok(()) } x => Err(format!("loading literal, {:?}, is not yet implemented", x)), @@ -234,11 +221,11 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> Backend< fn return_symbol(&mut self, sym: &Symbol) -> Result<(), String> { let val = self.symbols_map.get(sym); match val { - Some(SymbolStorage::GPRegeg(reg)) if *reg == CC::gp_return_regs()[0] => Ok(()), + Some(SymbolStorage::GPRegeg(reg)) if *reg == CC::GP_RETURN_REGS[0] => Ok(()), Some(SymbolStorage::GPRegeg(reg)) => { // If it fits in a general purpose register, just copy it over to. // Technically this can be optimized to produce shorter instructions if less than 64bits. - ASM::mov_register64bit_register64bit(&mut self.buf, CC::gp_return_regs()[0], *reg); + ASM::mov_reg64_reg64(&mut self.buf, CC::GP_RETURN_REGS[0], *reg); Ok(()) } Some(x) => Err(format!( @@ -258,7 +245,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> fn claim_gp_reg(&mut self, sym: &Symbol) -> Result { let reg = if !self.gp_free_regs.is_empty() { let free_reg = self.gp_free_regs.pop().unwrap(); - if CC::callee_saved_regs().contains(&free_reg) { + if CC::callee_saved(&free_reg) { self.used_callee_saved_regs.insert(free_reg); } Ok(free_reg) @@ -291,7 +278,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> let reg = self.claim_gp_reg(sym)?; self.symbols_map .insert(*sym, SymbolStorage::StackAndGPRegeg(reg, offset)); - ASM::mov_register64bit_stackoffset32bit(&mut self.buf, reg, offset as i32); + ASM::mov_reg64_stack32(&mut self.buf, reg, offset as i32); Ok(reg) } None => Err(format!("Unknown symbol: {}", sym)), @@ -302,19 +289,9 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> let val = self.symbols_map.remove(sym); match val { Some(SymbolStorage::GPRegeg(reg)) => { - let offset = self.stack_size; - self.stack_size += 8; - if let Some(size) = self.stack_size.checked_add(8) { - self.stack_size = size; - } else { - return Err(format!( - "Ran out of stack space while saving symbol: {}", - sym - )); - } - ASM::mov_stackoffset32bit_register64bit(&mut self.buf, offset as i32, reg); - self.symbols_map - .insert(*sym, SymbolStorage::Stack(offset as i32)); + let offset = self.increase_stack_size(8)?; + ASM::mov_stack32_reg64(&mut self.buf, offset as i32, reg); + self.symbols_map.insert(*sym, SymbolStorage::Stack(offset)); Ok(()) } Some(SymbolStorage::StackAndGPRegeg(_, offset)) => { @@ -328,4 +305,16 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler, CC: CallConv> None => Err(format!("Unknown symbol: {}", sym)), } } + + /// increase_stack_size increase the current stack size and returns the offset of the stack. + fn increase_stack_size(&mut self, amount: i32) -> Result { + debug_assert!(amount > 0); + let offset = self.stack_size; + if let Some(new_size) = self.stack_size.checked_add(amount) { + self.stack_size = new_size; + Ok(offset) + } else { + Err("Ran out of stack space".to_string()) + } + } } diff --git a/compiler/gen_dev/src/generic64/x86_64.rs b/compiler/gen_dev/src/generic64/x86_64.rs index 97a97bc20d..98fa94afa6 100644 --- a/compiler/gen_dev/src/generic64/x86_64.rs +++ b/compiler/gen_dev/src/generic64/x86_64.rs @@ -1,6 +1,5 @@ use crate::generic64::{Assembler, CallConv, GPRegTrait}; use bumpalo::collections::Vec; -use roc_collections::all::ImSet; // Not sure exactly how I want to represent registers. // If we want max speed, we would likely make them structs that impl the same trait to avoid ifs. @@ -26,10 +25,312 @@ pub enum X86_64GPReg { impl GPRegTrait for X86_64GPReg {} +pub struct X86_64Assembler {} +pub struct X86_64WindowsFastcall {} +pub struct X86_64SystemV {} + +const STACK_ALIGNMENT: u8 = 16; + +impl CallConv for X86_64SystemV { + const GP_PARAM_REGS: &'static [X86_64GPReg] = &[ + X86_64GPReg::RDI, + X86_64GPReg::RSI, + X86_64GPReg::RDX, + X86_64GPReg::RCX, + X86_64GPReg::R8, + X86_64GPReg::R9, + ]; + const GP_RETURN_REGS: &'static [X86_64GPReg] = &[X86_64GPReg::RAX, X86_64GPReg::RDX]; + + const GP_DEFAULT_FREE_REGS: &'static [X86_64GPReg] = &[ + // The regs we want to use first should be at the end of this vec. + // We will use pop to get which reg to use next + // Use callee saved regs last. + X86_64GPReg::RBX, + // Don't use frame pointer: X86_64GPReg::RBP, + X86_64GPReg::R12, + X86_64GPReg::R13, + X86_64GPReg::R14, + X86_64GPReg::R15, + // Use caller saved regs first. + X86_64GPReg::RAX, + X86_64GPReg::RCX, + X86_64GPReg::RDX, + // Don't use stack pionter: X86_64GPReg::RSP, + X86_64GPReg::RSI, + X86_64GPReg::RDI, + X86_64GPReg::R8, + X86_64GPReg::R9, + X86_64GPReg::R10, + X86_64GPReg::R11, + ]; + const SHADOW_SPACE_SIZE: u8 = 0; + + #[inline(always)] + fn callee_saved(reg: &X86_64GPReg) -> bool { + matches!( + reg, + X86_64GPReg::RBX + | X86_64GPReg::RBP + | X86_64GPReg::R12 + | X86_64GPReg::R13 + | X86_64GPReg::R14 + | X86_64GPReg::R15 + ) + } + + #[inline(always)] + fn setup_stack<'a>( + buf: &mut Vec<'a, u8>, + leaf_function: bool, + saved_regs: &[X86_64GPReg], + requested_stack_size: i32, + ) -> Result { + x86_64_generic_setup_stack(buf, leaf_function, saved_regs, requested_stack_size) + } + + #[inline(always)] + fn cleanup_stack<'a>( + buf: &mut Vec<'a, u8>, + leaf_function: bool, + saved_regs: &[X86_64GPReg], + aligned_stack_size: i32, + ) -> Result<(), String> { + x86_64_generic_cleanup_stack(buf, leaf_function, saved_regs, aligned_stack_size) + } +} + +impl CallConv for X86_64WindowsFastcall { + const GP_PARAM_REGS: &'static [X86_64GPReg] = &[ + X86_64GPReg::RCX, + X86_64GPReg::RDX, + X86_64GPReg::R8, + X86_64GPReg::R9, + ]; + const GP_RETURN_REGS: &'static [X86_64GPReg] = &[X86_64GPReg::RAX]; + const GP_DEFAULT_FREE_REGS: &'static [X86_64GPReg] = &[ + // The regs we want to use first should be at the end of this vec. + // We will use pop to get which reg to use next + + // Don't use stack pionter: X86_64GPReg::RSP, + // Don't use frame pointer: X86_64GPReg::RBP, + + // Use callee saved regs last. + X86_64GPReg::RBX, + X86_64GPReg::RSI, + X86_64GPReg::RDI, + X86_64GPReg::R12, + X86_64GPReg::R13, + X86_64GPReg::R14, + X86_64GPReg::R15, + // Use caller saved regs first. + X86_64GPReg::RAX, + X86_64GPReg::RCX, + X86_64GPReg::RDX, + X86_64GPReg::R8, + X86_64GPReg::R9, + X86_64GPReg::R10, + X86_64GPReg::R11, + ]; + const SHADOW_SPACE_SIZE: u8 = 32; + + #[inline(always)] + fn callee_saved(reg: &X86_64GPReg) -> bool { + matches!( + reg, + X86_64GPReg::RBX + | X86_64GPReg::RBP + | X86_64GPReg::RSI + | X86_64GPReg::RSP + | X86_64GPReg::RDI + | X86_64GPReg::R12 + | X86_64GPReg::R13 + | X86_64GPReg::R14 + | X86_64GPReg::R15 + ) + } + + #[inline(always)] + fn setup_stack<'a>( + buf: &mut Vec<'a, u8>, + leaf_function: bool, + saved_regs: &[X86_64GPReg], + requested_stack_size: i32, + ) -> Result { + x86_64_generic_setup_stack(buf, leaf_function, saved_regs, requested_stack_size) + } + + #[inline(always)] + fn cleanup_stack<'a>( + buf: &mut Vec<'a, u8>, + leaf_function: bool, + saved_regs: &[X86_64GPReg], + aligned_stack_size: i32, + ) -> Result<(), String> { + x86_64_generic_cleanup_stack(buf, leaf_function, saved_regs, aligned_stack_size) + } +} + +#[inline(always)] +fn x86_64_generic_setup_stack<'a>( + buf: &mut Vec<'a, u8>, + leaf_function: bool, + saved_regs: &[X86_64GPReg], + requested_stack_size: i32, +) -> Result { + if !leaf_function { + X86_64Assembler::push_reg64(buf, X86_64GPReg::RBP); + X86_64Assembler::mov_reg64_reg64(buf, X86_64GPReg::RBP, X86_64GPReg::RSP); + } + for reg in saved_regs { + X86_64Assembler::push_reg64(buf, *reg); + } + + // full size is upcast to i64 to make sure we don't overflow here. + let full_size = 8 * saved_regs.len() as i64 + requested_stack_size as i64; + let alignment = if full_size <= 0 { + 0 + } else { + full_size % STACK_ALIGNMENT as i64 + }; + let offset = if alignment == 0 { + 0 + } else { + STACK_ALIGNMENT - alignment as u8 + }; + if let Some(aligned_stack_size) = requested_stack_size.checked_add(offset as i32) { + if aligned_stack_size > 0 { + X86_64Assembler::sub_reg64_reg64_imm32( + buf, + X86_64GPReg::RSP, + X86_64GPReg::RSP, + aligned_stack_size, + ); + Ok(aligned_stack_size) + } else { + Ok(0) + } + } else { + Err("Ran out of stack space".to_string()) + } +} + +#[inline(always)] +fn x86_64_generic_cleanup_stack<'a>( + buf: &mut Vec<'a, u8>, + leaf_function: bool, + saved_regs: &[X86_64GPReg], + aligned_stack_size: i32, +) -> Result<(), String> { + if aligned_stack_size > 0 { + X86_64Assembler::add_reg64_reg64_imm32( + buf, + X86_64GPReg::RSP, + X86_64GPReg::RSP, + aligned_stack_size, + ); + } + for reg in saved_regs.iter().rev() { + X86_64Assembler::pop_reg64(buf, *reg); + } + if !leaf_function { + X86_64Assembler::mov_reg64_reg64(buf, X86_64GPReg::RSP, X86_64GPReg::RBP); + X86_64Assembler::pop_reg64(buf, X86_64GPReg::RBP); + } + Ok(()) +} + +impl Assembler for X86_64Assembler { + // These functions should map to the raw assembly functions below. + // In some cases, that means you can just directly call one of the direct assembly functions. + #[inline(always)] + fn abs_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) { + mov_reg64_reg64(buf, dst, src); + neg_reg64(buf, dst); + cmovl_reg64_reg64(buf, dst, src); + } + #[inline(always)] + fn add_reg64_reg64_imm32<'a>( + buf: &mut Vec<'a, u8>, + dst: X86_64GPReg, + src1: X86_64GPReg, + imm32: i32, + ) { + if dst == src1 { + add_reg64_imm32(buf, dst, imm32); + } else { + mov_reg64_reg64(buf, dst, src1); + add_reg64_imm32(buf, dst, imm32); + } + } + #[inline(always)] + fn add_reg64_reg64_reg64<'a>( + buf: &mut Vec<'a, u8>, + dst: X86_64GPReg, + src1: X86_64GPReg, + src2: X86_64GPReg, + ) { + if dst == src1 { + add_reg64_reg64(buf, dst, src2); + } else if dst == src2 { + add_reg64_reg64(buf, dst, src1); + } else { + mov_reg64_reg64(buf, dst, src1); + add_reg64_reg64(buf, dst, src2); + } + } + #[inline(always)] + fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) { + mov_reg64_imm64(buf, dst, imm); + } + #[inline(always)] + fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) { + mov_reg64_reg64(buf, dst, src); + } + #[inline(always)] + fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, offset: i32) { + mov_reg64_stack32(buf, dst, offset); + } + #[inline(always)] + fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: X86_64GPReg) { + mov_stack32_reg64(buf, offset, src); + } + #[inline(always)] + fn sub_reg64_reg64_imm32<'a>( + buf: &mut Vec<'a, u8>, + dst: X86_64GPReg, + src1: X86_64GPReg, + imm32: i32, + ) { + if dst == src1 { + sub_reg64_imm32(buf, dst, imm32); + } else { + mov_reg64_reg64(buf, dst, src1); + sub_reg64_imm32(buf, dst, imm32); + } + } + #[inline(always)] + fn ret<'a>(buf: &mut Vec<'a, u8>) { + ret(buf); + } +} + +impl X86_64Assembler { + #[inline(always)] + fn pop_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) { + pop_reg64(buf, reg); + } + + #[inline(always)] + fn push_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) { + push_reg64(buf, reg); + } +} const REX: u8 = 0x40; const REX_W: u8 = REX + 0x8; -fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 { +#[inline(always)] +const fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 { if reg as u8 > 7 { byte + 1 } else { @@ -37,11 +338,13 @@ fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 { } } -fn add_opcode_extension(reg: X86_64GPReg, byte: u8) -> u8 { +#[inline(always)] +const fn add_opcode_extension(reg: X86_64GPReg, byte: u8) -> u8 { add_rm_extension(reg, byte) } -fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 { +#[inline(always)] +const fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 { if reg as u8 > 7 { byte + 4 } else { @@ -49,316 +352,149 @@ fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 { } } -pub struct X86_64Assembler {} -pub struct X86_64WindowsFastcall {} -pub struct X86_64SystemV {} +// Below here are the functions for all of the assembly instructions. +// Their names are based on the instruction and operators combined. +// You should call `buf.reserve()` if you push or extend more than once. +// Unit tests are added at the bottom of the file to ensure correct asm generation. +// Please keep these in alphanumeric order. -impl CallConv for X86_64SystemV { - fn gp_param_regs() -> &'static [X86_64GPReg] { - &[ - X86_64GPReg::RDI, - X86_64GPReg::RSI, - X86_64GPReg::RDX, - X86_64GPReg::RCX, - X86_64GPReg::R8, - X86_64GPReg::R9, - ] - } - fn gp_return_regs() -> &'static [X86_64GPReg] { - &[X86_64GPReg::RAX, X86_64GPReg::RDX] - } - fn gp_default_free_regs() -> &'static [X86_64GPReg] { - &[ - // The regs we want to use first should be at the end of this vec. - // We will use pop to get which reg to use next - // Use callee saved regs last. - X86_64GPReg::RBX, - // Don't use frame pointer: X86_64GPReg::RBP, - X86_64GPReg::R12, - X86_64GPReg::R13, - X86_64GPReg::R14, - X86_64GPReg::R15, - // Use caller saved regs first. - X86_64GPReg::RAX, - X86_64GPReg::RCX, - X86_64GPReg::RDX, - // Don't use stack pionter: X86_64GPReg::RSP, - X86_64GPReg::RSI, - X86_64GPReg::RDI, - X86_64GPReg::R8, - X86_64GPReg::R9, - X86_64GPReg::R10, - X86_64GPReg::R11, - ] - } - fn caller_saved_regs() -> ImSet { - // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed. - ImSet::from(vec![ - X86_64GPReg::RAX, - X86_64GPReg::RCX, - X86_64GPReg::RDX, - X86_64GPReg::RSP, - X86_64GPReg::RSI, - X86_64GPReg::RDI, - X86_64GPReg::R8, - X86_64GPReg::R9, - X86_64GPReg::R10, - X86_64GPReg::R11, - ]) - } - fn callee_saved_regs() -> ImSet { - // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed. - ImSet::from(vec![ - X86_64GPReg::RBX, - X86_64GPReg::RBP, - X86_64GPReg::R12, - X86_64GPReg::R13, - X86_64GPReg::R14, - X86_64GPReg::R15, - ]) - } - fn stack_pointer() -> X86_64GPReg { - X86_64GPReg::RSP - } - fn frame_pointer() -> X86_64GPReg { - X86_64GPReg::RBP - } - fn shadow_space_size() -> u8 { - 0 - } - fn red_zone_size() -> u8 { - 128 +/// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64. +#[inline(always)] +fn add_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) { + // This can be optimized if the immediate is 1 byte. + let rex = add_rm_extension(dst, REX_W); + let dst_mod = dst as u8 % 8; + buf.reserve(7); + buf.extend(&[rex, 0x81, 0xC0 + dst_mod]); + buf.extend(&imm.to_le_bytes()); +} + +/// `ADD r/m64,r64` -> Add r64 to r/m64. +#[inline(always)] +fn add_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) { + let rex = add_rm_extension(dst, REX_W); + let rex = add_reg_extension(src, rex); + let dst_mod = dst as u8 % 8; + let src_mod = (src as u8 % 8) << 3; + buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]); +} + +/// `CMOVL r64,r/m64` -> Move if less (SF≠ OF). +#[inline(always)] +fn cmovl_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) { + let rex = add_reg_extension(dst, REX_W); + let rex = add_rm_extension(src, rex); + let dst_mod = (dst as u8 % 8) << 3; + let src_mod = src as u8 % 8; + buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]); +} + +/// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64. +#[inline(always)] +fn mov_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) { + let rex = add_rm_extension(dst, REX_W); + let dst_mod = dst as u8 % 8; + buf.reserve(7); + buf.extend(&[rex, 0xC7, 0xC0 + dst_mod]); + buf.extend(&imm.to_le_bytes()); +} + +/// `MOV r64, imm64` -> Move imm64 to r64. +#[inline(always)] +fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) { + if imm <= i32::MAX as i64 && imm >= i32::MIN as i64 { + mov_reg64_imm32(buf, dst, imm as i32) + } else { + let rex = add_opcode_extension(dst, REX_W); + let dst_mod = dst as u8 % 8; + buf.reserve(10); + buf.extend(&[rex, 0xB8 + dst_mod]); + buf.extend(&imm.to_le_bytes()); } } -impl CallConv for X86_64WindowsFastcall { - fn gp_param_regs() -> &'static [X86_64GPReg] { - &[ - X86_64GPReg::RCX, - X86_64GPReg::RDX, - X86_64GPReg::R8, - X86_64GPReg::R9, - ] - } - fn gp_return_regs() -> &'static [X86_64GPReg] { - &[X86_64GPReg::RAX] - } - fn gp_default_free_regs() -> &'static [X86_64GPReg] { - &[ - // The regs we want to use first should be at the end of this vec. - // We will use pop to get which reg to use next - // Use callee saved regs last. - X86_64GPReg::RBX, - // Don't use frame pointer: X86_64GPReg::RBP, - X86_64GPReg::RSI, - // Don't use stack pionter: X86_64GPReg::RSP, - X86_64GPReg::RDI, - X86_64GPReg::R12, - X86_64GPReg::R13, - X86_64GPReg::R14, - X86_64GPReg::R15, - // Use caller saved regs first. - X86_64GPReg::RAX, - X86_64GPReg::RCX, - X86_64GPReg::RDX, - X86_64GPReg::R8, - X86_64GPReg::R9, - X86_64GPReg::R10, - X86_64GPReg::R11, - ] - } - fn caller_saved_regs() -> ImSet { - // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed. - ImSet::from(vec![ - X86_64GPReg::RAX, - X86_64GPReg::RCX, - X86_64GPReg::RDX, - X86_64GPReg::R8, - X86_64GPReg::R9, - X86_64GPReg::R10, - X86_64GPReg::R11, - ]) - } - fn callee_saved_regs() -> ImSet { - // TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed. - ImSet::from(vec![ - X86_64GPReg::RBX, - X86_64GPReg::RBP, - X86_64GPReg::RSI, - X86_64GPReg::RSP, - X86_64GPReg::RDI, - X86_64GPReg::R12, - X86_64GPReg::R13, - X86_64GPReg::R14, - X86_64GPReg::R15, - ]) - } - fn stack_pointer() -> X86_64GPReg { - X86_64GPReg::RSP - } - fn frame_pointer() -> X86_64GPReg { - X86_64GPReg::RBP - } - fn shadow_space_size() -> u8 { - 32 - } - fn red_zone_size() -> u8 { - 0 +/// `MOV r/m64,r64` -> Move r64 to r/m64. +#[inline(always)] +fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) { + let rex = add_rm_extension(dst, REX_W); + let rex = add_reg_extension(src, rex); + let dst_mod = dst as u8 % 8; + let src_mod = (src as u8 % 8) << 3; + buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]); +} + +/// `MOV r64,r/m64` -> Move r/m64 to r64. +#[inline(always)] +fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, offset: i32) { + // This can be optimized based on how many bytes the offset actually is. + // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn. + // Also, this may technically be faster genration since stack operations should be so common. + let rex = add_reg_extension(dst, REX_W); + let dst_mod = (dst as u8 % 8) << 3; + buf.reserve(8); + buf.extend(&[rex, 0x8B, 0x84 + dst_mod, 0x24]); + buf.extend(&offset.to_le_bytes()); +} + +/// `MOV r/m64,r64` -> Move r64 to r/m64. +#[inline(always)] +fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: X86_64GPReg) { + // This can be optimized based on how many bytes the offset actually is. + // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn. + // Also, this may technically be faster genration since stack operations should be so common. + let rex = add_reg_extension(src, REX_W); + let src_mod = (src as u8 % 8) << 3; + buf.reserve(8); + buf.extend(&[rex, 0x89, 0x84 + src_mod, 0x24]); + buf.extend(&offset.to_le_bytes()); +} + +/// `NEG r/m64` -> Two's complement negate r/m64. +#[inline(always)] +fn neg_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) { + let rex = add_rm_extension(reg, REX_W); + let reg_mod = reg as u8 % 8; + buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]); +} + +/// `RET` -> Near return to calling procedure. +#[inline(always)] +fn ret<'a>(buf: &mut Vec<'a, u8>) { + buf.push(0xC3); +} + +/// `SUB r/m64, imm32` -> Subtract imm32 sign-extended to 64-bits from r/m64. +#[inline(always)] +fn sub_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) { + // This can be optimized if the immediate is 1 byte. + let rex = add_rm_extension(dst, REX_W); + let dst_mod = dst as u8 % 8; + buf.reserve(7); + buf.extend(&[rex, 0x81, 0xE8 + dst_mod]); + buf.extend(&imm.to_le_bytes()); +} + +/// `POP r64` -> Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size. +#[inline(always)] +fn pop_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) { + let reg_mod = reg as u8 % 8; + if reg as u8 > 7 { + let rex = add_opcode_extension(reg, REX); + buf.extend(&[rex, 0x58 + reg_mod]); + } else { + buf.push(0x58 + reg_mod); } } -impl Assembler for X86_64Assembler { - // Below here are the functions for all of the assembly instructions. - // Their names are based on the instruction and operators combined. - // You should call `buf.reserve()` if you push or extend more than once. - // Unit tests are added at the bottom of the file to ensure correct asm generation. - // Please keep these in alphanumeric order. - - /// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64. - fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) { - // This can be optimized if the immediate is 1 byte. - let rex = add_rm_extension(dst, REX_W); - let dst_mod = dst as u8 % 8; - buf.reserve(7); - buf.extend(&[rex, 0x81, 0xC0 + dst_mod]); - buf.extend(&imm.to_le_bytes()); - } - - /// `ADD r/m64,r64` -> Add r64 to r/m64. - fn add_register64bit_register64bit<'a>( - buf: &mut Vec<'a, u8>, - dst: X86_64GPReg, - src: X86_64GPReg, - ) { - let rex = add_rm_extension(dst, REX_W); - let rex = add_reg_extension(src, rex); - let dst_mod = dst as u8 % 8; - let src_mod = (src as u8 % 8) << 3; - buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]); - } - - /// `CMOVL r64,r/m64` -> Move if less (SF≠ OF). - fn cmovl_register64bit_register64bit<'a>( - buf: &mut Vec<'a, u8>, - dst: X86_64GPReg, - src: X86_64GPReg, - ) { - let rex = add_reg_extension(dst, REX_W); - let rex = add_rm_extension(src, rex); - let dst_mod = (dst as u8 % 8) << 3; - let src_mod = src as u8 % 8; - buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]); - } - - /// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64. - fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) { - let rex = add_rm_extension(dst, REX_W); - let dst_mod = dst as u8 % 8; - buf.reserve(7); - buf.extend(&[rex, 0xC7, 0xC0 + dst_mod]); - buf.extend(&imm.to_le_bytes()); - } - - /// `MOV r64, imm64` -> Move imm64 to r64. - fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) { - if imm <= i32::MAX as i64 && imm >= i32::MIN as i64 { - Self::mov_register64bit_immediate32bit(buf, dst, imm as i32) - } else { - let rex = add_opcode_extension(dst, REX_W); - let dst_mod = dst as u8 % 8; - buf.reserve(10); - buf.extend(&[rex, 0xB8 + dst_mod]); - buf.extend(&imm.to_le_bytes()); - } - } - - /// `MOV r/m64,r64` -> Move r64 to r/m64. - fn mov_register64bit_register64bit<'a>( - buf: &mut Vec<'a, u8>, - dst: X86_64GPReg, - src: X86_64GPReg, - ) { - let rex = add_rm_extension(dst, REX_W); - let rex = add_reg_extension(src, rex); - let dst_mod = dst as u8 % 8; - let src_mod = (src as u8 % 8) << 3; - buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]); - } - - /// `MOV r64,r/m64` -> Move r/m64 to r64. - fn mov_register64bit_stackoffset32bit<'a>( - buf: &mut Vec<'a, u8>, - dst: X86_64GPReg, - offset: i32, - ) { - // This can be optimized based on how many bytes the offset actually is. - // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn. - // Also, this may technically be faster genration since stack operations should be so common. - let rex = add_reg_extension(dst, REX_W); - let dst_mod = (dst as u8 % 8) << 3; - buf.reserve(8); - buf.extend(&[rex, 0x8B, 0x84 + dst_mod, 0x24]); - buf.extend(&offset.to_le_bytes()); - } - - /// `MOV r/m64,r64` -> Move r64 to r/m64. - fn mov_stackoffset32bit_register64bit<'a>( - buf: &mut Vec<'a, u8>, - offset: i32, - src: X86_64GPReg, - ) { - // This can be optimized based on how many bytes the offset actually is. - // This function can probably be made to take any memory offset, I didn't feel like figuring it out rn. - // Also, this may technically be faster genration since stack operations should be so common. - let rex = add_reg_extension(src, REX_W); - let src_mod = (src as u8 % 8) << 3; - buf.reserve(8); - buf.extend(&[rex, 0x89, 0x84 + src_mod, 0x24]); - buf.extend(&offset.to_le_bytes()); - } - - /// `NEG r/m64` -> Two's complement negate r/m64. - fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) { - let rex = add_rm_extension(reg, REX_W); - let reg_mod = reg as u8 % 8; - buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]); - } - - /// `RET` -> Near return to calling procedure. - fn ret<'a>(buf: &mut Vec<'a, u8>) { - buf.push(0xC3); - } - - /// `SUB r/m64, imm32` -> Subtract imm32 sign-extended to 64-bits from r/m64. - fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) { - // This can be optimized if the immediate is 1 byte. - let rex = add_rm_extension(dst, REX_W); - let dst_mod = dst as u8 % 8; - buf.reserve(7); - buf.extend(&[rex, 0x81, 0xE8 + dst_mod]); - buf.extend(&imm.to_le_bytes()); - } - - /// `POP r64` -> Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size. - fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) { - let reg_mod = reg as u8 % 8; - if reg as u8 > 7 { - let rex = add_opcode_extension(reg, REX); - buf.extend(&[rex, 0x58 + reg_mod]); - } else { - buf.push(0x58 + reg_mod); - } - } - - /// `PUSH r64` -> Push r64, - fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) { - let reg_mod = reg as u8 % 8; - if reg as u8 > 7 { - let rex = add_opcode_extension(reg, REX); - buf.extend(&[rex, 0x50 + reg_mod]); - } else { - buf.push(0x50 + reg_mod); - } +/// `PUSH r64` -> Push r64, +#[inline(always)] +fn push_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) { + let reg_mod = reg as u8 % 8; + if reg as u8 > 7 { + let rex = add_opcode_extension(reg, REX); + buf.extend(&[rex, 0x50 + reg_mod]); + } else { + buf.push(0x50 + reg_mod); } } @@ -372,7 +508,7 @@ mod tests { const TEST_I64: i64 = 0x12345678_9ABCDEF0; #[test] - fn test_add_register64bit_immediate32bit() { + fn test_add_reg64_imm32() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for (dst, expected) in &[ @@ -380,14 +516,14 @@ mod tests { (X86_64GPReg::R15, [0x49, 0x81, 0xC7]), ] { buf.clear(); - X86_64Assembler::add_register64bit_immediate32bit(&mut buf, *dst, TEST_I32); + add_reg64_imm32(&mut buf, *dst, TEST_I32); assert_eq!(expected, &buf[..3]); assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]); } } #[test] - fn test_add_register64bit_register64bit() { + fn test_add_reg64_reg64() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for ((dst, src), expected) in &[ @@ -397,13 +533,13 @@ mod tests { ((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x01, 0xFF]), ] { buf.clear(); - X86_64Assembler::add_register64bit_register64bit(&mut buf, *dst, *src); + add_reg64_reg64(&mut buf, *dst, *src); assert_eq!(expected, &buf[..]); } } #[test] - fn test_cmovl_register64bit_register64bit() { + fn test_cmovl_reg64_reg64() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for ((dst, src), expected) in &[ @@ -425,13 +561,13 @@ mod tests { ), ] { buf.clear(); - X86_64Assembler::cmovl_register64bit_register64bit(&mut buf, *dst, *src); + cmovl_reg64_reg64(&mut buf, *dst, *src); assert_eq!(expected, &buf[..]); } } #[test] - fn test_mov_register64bit_immediate32bit() { + fn test_mov_reg64_imm32() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for (dst, expected) in &[ @@ -439,14 +575,14 @@ mod tests { (X86_64GPReg::R15, [0x49, 0xC7, 0xC7]), ] { buf.clear(); - X86_64Assembler::mov_register64bit_immediate32bit(&mut buf, *dst, TEST_I32); + mov_reg64_imm32(&mut buf, *dst, TEST_I32); assert_eq!(expected, &buf[..3]); assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]); } } #[test] - fn test_mov_register64bit_immediate64bit() { + fn test_mov_reg64_imm64() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for (dst, expected) in &[ @@ -454,7 +590,7 @@ mod tests { (X86_64GPReg::R15, [0x49, 0xBF]), ] { buf.clear(); - X86_64Assembler::mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I64); + mov_reg64_imm64(&mut buf, *dst, TEST_I64); assert_eq!(expected, &buf[..2]); assert_eq!(TEST_I64.to_le_bytes(), &buf[2..]); } @@ -463,14 +599,14 @@ mod tests { (X86_64GPReg::R15, [0x49, 0xC7, 0xC7]), ] { buf.clear(); - X86_64Assembler::mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I32 as i64); + mov_reg64_imm64(&mut buf, *dst, TEST_I32 as i64); assert_eq!(expected, &buf[..3]); assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]); } } #[test] - fn test_mov_register64bit_register64bit() { + fn test_mov_reg64_reg64() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for ((dst, src), expected) in &[ @@ -480,13 +616,13 @@ mod tests { ((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x89, 0xFF]), ] { buf.clear(); - X86_64Assembler::mov_register64bit_register64bit(&mut buf, *dst, *src); + mov_reg64_reg64(&mut buf, *dst, *src); assert_eq!(expected, &buf[..]); } } #[test] - fn test_mov_register64bit_stackoffset32bit() { + fn test_mov_reg64_stack32() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for ((dst, offset), expected) in &[ @@ -494,14 +630,14 @@ mod tests { ((X86_64GPReg::R15, TEST_I32), [0x4C, 0x8B, 0xBC, 0x24]), ] { buf.clear(); - X86_64Assembler::mov_register64bit_stackoffset32bit(&mut buf, *dst, *offset); + mov_reg64_stack32(&mut buf, *dst, *offset); assert_eq!(expected, &buf[..4]); assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]); } } #[test] - fn test_mov_stackoffset32bit_register64bit() { + fn test_mov_stack32_reg64() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for ((offset, src), expected) in &[ @@ -509,14 +645,14 @@ mod tests { ((TEST_I32, X86_64GPReg::R15), [0x4C, 0x89, 0xBC, 0x24]), ] { buf.clear(); - X86_64Assembler::mov_stackoffset32bit_register64bit(&mut buf, *offset, *src); + mov_stack32_reg64(&mut buf, *offset, *src); assert_eq!(expected, &buf[..4]); assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]); } } #[test] - fn test_neg_register64bit() { + fn test_neg_reg64() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for (reg, expected) in &[ @@ -524,7 +660,7 @@ mod tests { (X86_64GPReg::R15, [0x49, 0xF7, 0xDF]), ] { buf.clear(); - X86_64Assembler::neg_register64bit(&mut buf, *reg); + neg_reg64(&mut buf, *reg); assert_eq!(expected, &buf[..]); } } @@ -533,12 +669,12 @@ mod tests { fn test_ret() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; - X86_64Assembler::ret(&mut buf); + ret(&mut buf); assert_eq!(&[0xC3], &buf[..]); } #[test] - fn test_sub_register64bit_immediate32bit() { + fn test_sub_reg64_imm32() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for (dst, expected) in &[ @@ -546,14 +682,14 @@ mod tests { (X86_64GPReg::R15, [0x49, 0x81, 0xEF]), ] { buf.clear(); - X86_64Assembler::sub_register64bit_immediate32bit(&mut buf, *dst, TEST_I32); + sub_reg64_imm32(&mut buf, *dst, TEST_I32); assert_eq!(expected, &buf[..3]); assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]); } } #[test] - fn test_pop_register64bit() { + fn test_pop_reg64() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for (dst, expected) in &[ @@ -561,13 +697,13 @@ mod tests { (X86_64GPReg::R15, vec![0x41, 0x5F]), ] { buf.clear(); - X86_64Assembler::pop_register64bit(&mut buf, *dst); + pop_reg64(&mut buf, *dst); assert_eq!(&expected[..], &buf[..]); } } #[test] - fn test_push_register64bit() { + fn test_push_reg64() { let arena = bumpalo::Bump::new(); let mut buf = bumpalo::vec![in &arena]; for (src, expected) in &[ @@ -575,7 +711,7 @@ mod tests { (X86_64GPReg::R15, vec![0x41, 0x57]), ] { buf.clear(); - X86_64Assembler::push_register64bit(&mut buf, *src); + push_reg64(&mut buf, *src); assert_eq!(&expected[..], &buf[..]); } } diff --git a/compiler/gen_dev/src/object_builder.rs b/compiler/gen_dev/src/object_builder.rs index 6c16325d9c..588d206e23 100644 --- a/compiler/gen_dev/src/object_builder.rs +++ b/compiler/gen_dev/src/object_builder.rs @@ -1,4 +1,4 @@ -use crate::generic64::{x86_64, Backend64Bit}; +use crate::generic64::{aarch64, x86_64, Backend64Bit}; use crate::{Backend, Env, Relocation, INLINED_SYMBOLS}; use bumpalo::collections::Vec; use object::write; @@ -22,7 +22,7 @@ pub fn build_module<'a>( target: &Triple, procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>, ) -> Result { - let (mut output, mut backend) = match target { + match target { Triple { architecture: TargetArch::X86_64, binary_format: TargetBF::Elf, @@ -33,15 +33,42 @@ pub fn build_module<'a>( x86_64::X86_64Assembler, x86_64::X86_64SystemV, > = Backend::new(env, target)?; - Ok(( - Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little), + build_object( + env, + procedures, backend, - )) + Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little), + ) + } + Triple { + architecture: TargetArch::Aarch64(_), + binary_format: TargetBF::Elf, + .. + } => { + let backend: Backend64Bit< + aarch64::AArch64GPReg, + aarch64::AArch64Assembler, + aarch64::AArch64Call, + > = Backend::new(env, target)?; + build_object( + env, + procedures, + backend, + Object::new(BinaryFormat::Elf, Architecture::Aarch64, Endianness::Little), + ) } x => Err(format! { "the target, {:?}, is not yet implemented", x}), - }?; + } +} + +fn build_object<'a, B: Backend<'a>>( + env: &'a Env, + procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>, + mut backend: B, + mut output: Object, +) -> Result { let text = output.section_id(StandardSection::Text); let data_section = output.section_id(StandardSection::Data); let comment = output.add_section(vec![], b"comment".to_vec(), SectionKind::OtherString); diff --git a/compiler/gen_dev/tests/gen_num.rs b/compiler/gen_dev/tests/gen_num.rs index c2550052bc..7b73fe1d37 100644 --- a/compiler/gen_dev/tests/gen_num.rs +++ b/compiler/gen_dev/tests/gen_num.rs @@ -9,7 +9,7 @@ extern crate libc; #[macro_use] mod helpers; -#[cfg(all(test, target_os = "linux", target_arch = "x86_64"))] +#[cfg(all(test, target_os = "linux", any(target_arch = "x86_64"/*, target_arch = "aarch64"*/)))] mod gen_num { //use roc_std::RocOrder;