mirror of
https://github.com/roc-lang/roc.git
synced 2024-11-11 16:51:53 +03:00
Merge branch 'trunk' into zig-hosts
This commit is contained in:
commit
2d9fe11c64
@ -54,3 +54,8 @@ maplit = "1.0.1"
|
|||||||
indoc = "0.3.3"
|
indoc = "0.3.3"
|
||||||
quickcheck = "0.8"
|
quickcheck = "0.8"
|
||||||
quickcheck_macros = "0.8"
|
quickcheck_macros = "0.8"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
target-arm = []
|
||||||
|
target-aarch64 = []
|
||||||
|
target-webassembly = []
|
||||||
|
@ -19,6 +19,7 @@ comptime {
|
|||||||
exportStrFn(str.countSegments, "count_segments");
|
exportStrFn(str.countSegments, "count_segments");
|
||||||
exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");
|
exportStrFn(str.countGraphemeClusters, "count_grapheme_clusters");
|
||||||
exportStrFn(str.startsWith, "starts_with");
|
exportStrFn(str.startsWith, "starts_with");
|
||||||
|
exportStrFn(str.strConcat, "concat");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Export helpers - Must be run inside a comptime
|
// Export helpers - Must be run inside a comptime
|
||||||
|
@ -122,6 +122,10 @@ const RocStr = extern struct {
|
|||||||
return if (self.is_small_str()) small_len else big_len;
|
return if (self.is_small_str()) small_len else big_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(self: RocStr) bool {
|
||||||
|
return self.len() == 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Given a pointer to some bytes, write the first (len) bytes of this
|
// Given a pointer to some bytes, write the first (len) bytes of this
|
||||||
// RocStr's contents into it.
|
// RocStr's contents into it.
|
||||||
//
|
//
|
||||||
@ -586,3 +590,145 @@ test "startsWith: 12345678912345678910 starts with 123456789123456789" {
|
|||||||
|
|
||||||
expect(startsWith(str_ptr, str_len, prefix_ptr, prefix_len));
|
expect(startsWith(str_ptr, str_len, prefix_ptr, prefix_len));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Str.concat
|
||||||
|
|
||||||
|
test "RocStr.concat: small concat small" {
|
||||||
|
const str1_len = 3;
|
||||||
|
var str1: [str1_len]u8 = "foo".*;
|
||||||
|
const str1_ptr: [*]u8 = &str1;
|
||||||
|
var roc_str1 = RocStr.init(str1_ptr, str1_len);
|
||||||
|
|
||||||
|
const str2_len = 3;
|
||||||
|
var str2: [str2_len]u8 = "abc".*;
|
||||||
|
const str2_ptr: [*]u8 = &str2;
|
||||||
|
var roc_str2 = RocStr.init(str2_ptr, str2_len);
|
||||||
|
|
||||||
|
const str3_len = 6;
|
||||||
|
var str3: [str3_len]u8 = "fooabc".*;
|
||||||
|
const str3_ptr: [*]u8 = &str3;
|
||||||
|
var roc_str3 = RocStr.init(str3_ptr, str3_len);
|
||||||
|
|
||||||
|
const result = strConcat(8, InPlace.Clone, roc_str1, roc_str2);
|
||||||
|
|
||||||
|
expect(roc_str3.eq(result));
|
||||||
|
|
||||||
|
roc_str1.drop();
|
||||||
|
roc_str2.drop();
|
||||||
|
roc_str3.drop();
|
||||||
|
result.drop();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn strConcat(ptr_size: u32, result_in_place: InPlace, arg1: RocStr, arg2: RocStr) callconv(.C) RocStr {
|
||||||
|
return switch (ptr_size) {
|
||||||
|
4 => strConcatHelp(i32, result_in_place, arg1, arg2),
|
||||||
|
8 => strConcatHelp(i64, result_in_place, arg1, arg2),
|
||||||
|
else => unreachable,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn strConcatHelp(comptime T: type, result_in_place: InPlace, arg1: RocStr, arg2: RocStr) RocStr {
|
||||||
|
if (arg1.is_empty()) {
|
||||||
|
return cloneNonemptyStr(T, result_in_place, arg2);
|
||||||
|
} else if (arg2.is_empty()) {
|
||||||
|
return cloneNonemptyStr(T, result_in_place, arg1);
|
||||||
|
} else {
|
||||||
|
const combined_length = arg1.len() + arg2.len();
|
||||||
|
|
||||||
|
const small_str_bytes = 2 * @sizeOf(T);
|
||||||
|
const result_is_big = combined_length >= small_str_bytes;
|
||||||
|
|
||||||
|
if (result_is_big) {
|
||||||
|
var result = allocate_str(T, result_in_place, combined_length);
|
||||||
|
|
||||||
|
{
|
||||||
|
const old_if_small = &@bitCast([16]u8, arg1);
|
||||||
|
const old_if_big = @ptrCast([*]u8, arg1.str_bytes);
|
||||||
|
const old_bytes = if (arg1.is_small_str()) old_if_small else old_if_big;
|
||||||
|
|
||||||
|
const new_bytes: [*]u8 = @ptrCast([*]u8, result.str_bytes);
|
||||||
|
|
||||||
|
@memcpy(new_bytes, old_bytes, arg1.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
const old_if_small = &@bitCast([16]u8, arg2);
|
||||||
|
const old_if_big = @ptrCast([*]u8, arg2.str_bytes);
|
||||||
|
const old_bytes = if (arg2.is_small_str()) old_if_small else old_if_big;
|
||||||
|
|
||||||
|
const new_bytes = @ptrCast([*]u8, result.str_bytes) + arg1.len();
|
||||||
|
|
||||||
|
@memcpy(new_bytes, old_bytes, arg2.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
var result = [16]u8{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||||
|
|
||||||
|
// if the result is small, then for sure arg1 and arg2 are also small
|
||||||
|
|
||||||
|
{
|
||||||
|
var old_bytes: [*]u8 = @ptrCast([*]u8, &@bitCast([16]u8, arg1));
|
||||||
|
var new_bytes: [*]u8 = @ptrCast([*]u8, &result);
|
||||||
|
|
||||||
|
@memcpy(new_bytes, old_bytes, arg1.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
var old_bytes: [*]u8 = @ptrCast([*]u8, &@bitCast([16]u8, arg2));
|
||||||
|
var new_bytes = @ptrCast([*]u8, &result) + arg1.len();
|
||||||
|
|
||||||
|
@memcpy(new_bytes, old_bytes, arg2.len());
|
||||||
|
}
|
||||||
|
|
||||||
|
const mask: u8 = 0b1000_0000;
|
||||||
|
const final_byte = @truncate(u8, combined_length) | mask;
|
||||||
|
|
||||||
|
result[small_str_bytes - 1] = final_byte;
|
||||||
|
|
||||||
|
return @bitCast(RocStr, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const InPlace = packed enum(u8) {
|
||||||
|
InPlace,
|
||||||
|
Clone,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn cloneNonemptyStr(comptime T: type, in_place: InPlace, str: RocStr) RocStr {
|
||||||
|
if (str.is_small_str() or str.is_empty()) {
|
||||||
|
// just return the bytes
|
||||||
|
return str;
|
||||||
|
} else {
|
||||||
|
var new_str = allocate_str(T, in_place, str.str_len);
|
||||||
|
|
||||||
|
var old_bytes: [*]u8 = @ptrCast([*]u8, str.str_bytes);
|
||||||
|
var new_bytes: [*]u8 = @ptrCast([*]u8, new_str.str_bytes);
|
||||||
|
|
||||||
|
@memcpy(new_bytes, old_bytes, str.str_len);
|
||||||
|
|
||||||
|
return new_str;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn allocate_str(comptime T: type, in_place: InPlace, number_of_chars: u64) RocStr {
|
||||||
|
const length = @sizeOf(T) + number_of_chars;
|
||||||
|
var new_bytes: [*]T = @ptrCast([*]T, @alignCast(@alignOf(T), malloc(length)));
|
||||||
|
|
||||||
|
if (in_place == InPlace.InPlace) {
|
||||||
|
new_bytes[0] = @intCast(T, number_of_chars);
|
||||||
|
} else {
|
||||||
|
new_bytes[0] = std.math.minInt(T);
|
||||||
|
}
|
||||||
|
|
||||||
|
var first_element = @ptrCast([*]align(@alignOf(T)) u8, new_bytes);
|
||||||
|
first_element += 8;
|
||||||
|
|
||||||
|
return RocStr{
|
||||||
|
.str_bytes = first_element,
|
||||||
|
.str_len = number_of_chars,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
@ -24,6 +24,7 @@ pub const NUM_IS_FINITE: &str = "roc_builtins.num.is_finite";
|
|||||||
pub const NUM_POW_INT: &str = "roc_builtins.num.pow_int";
|
pub const NUM_POW_INT: &str = "roc_builtins.num.pow_int";
|
||||||
|
|
||||||
pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
|
pub const STR_COUNT_SEGMENTS: &str = "roc_builtins.str.count_segments";
|
||||||
|
pub const STR_CONCAT: &str = "roc_builtins.str.concat";
|
||||||
pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
|
pub const STR_STR_SPLIT_IN_PLACE: &str = "roc_builtins.str.str_split_in_place";
|
||||||
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";
|
pub const STR_COUNT_GRAPEHEME_CLUSTERS: &str = "roc_builtins.str.count_grapheme_clusters";
|
||||||
pub const STR_STARTS_WITH: &str = "roc_builtins.str.starts_with";
|
pub const STR_STARTS_WITH: &str = "roc_builtins.str.starts_with";
|
||||||
|
@ -604,7 +604,9 @@ pub fn build_exp_expr<'a, 'ctx, 'env>(
|
|||||||
|
|
||||||
match expr {
|
match expr {
|
||||||
Literal(literal) => build_exp_literal(env, literal),
|
Literal(literal) => build_exp_literal(env, literal),
|
||||||
RunLowLevel(op, symbols) => run_low_level(env, scope, parent, layout, *op, symbols),
|
RunLowLevel(op, symbols) => {
|
||||||
|
run_low_level(env, layout_ids, scope, parent, layout, *op, symbols)
|
||||||
|
}
|
||||||
|
|
||||||
ForeignCall {
|
ForeignCall {
|
||||||
foreign_symbol,
|
foreign_symbol,
|
||||||
@ -1165,12 +1167,10 @@ fn list_literal<'a, 'ctx, 'env>(
|
|||||||
let builder = env.builder;
|
let builder = env.builder;
|
||||||
|
|
||||||
let len_u64 = elems.len() as u64;
|
let len_u64 = elems.len() as u64;
|
||||||
let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64;
|
|
||||||
|
|
||||||
let ptr = {
|
let ptr = {
|
||||||
let bytes_len = elem_bytes * len_u64;
|
|
||||||
let len_type = env.ptr_int();
|
let len_type = env.ptr_int();
|
||||||
let len = len_type.const_int(bytes_len, false);
|
let len = len_type.const_int(len_u64, false);
|
||||||
|
|
||||||
allocate_list(env, inplace, elem_layout, len)
|
allocate_list(env, inplace, elem_layout, len)
|
||||||
|
|
||||||
@ -2383,6 +2383,7 @@ fn call_with_args<'a, 'ctx, 'env>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
|
#[repr(u8)]
|
||||||
pub enum InPlace {
|
pub enum InPlace {
|
||||||
InPlace,
|
InPlace,
|
||||||
Clone,
|
Clone,
|
||||||
@ -2409,6 +2410,7 @@ pub static COLD_CALL_CONV: u32 = 9;
|
|||||||
|
|
||||||
fn run_low_level<'a, 'ctx, 'env>(
|
fn run_low_level<'a, 'ctx, 'env>(
|
||||||
env: &Env<'a, 'ctx, 'env>,
|
env: &Env<'a, 'ctx, 'env>,
|
||||||
|
layout_ids: &mut LayoutIds<'a>,
|
||||||
scope: &Scope<'a, 'ctx>,
|
scope: &Scope<'a, 'ctx>,
|
||||||
parent: FunctionValue<'ctx>,
|
parent: FunctionValue<'ctx>,
|
||||||
layout: &Layout<'a>,
|
layout: &Layout<'a>,
|
||||||
@ -2522,7 +2524,16 @@ fn run_low_level<'a, 'ctx, 'env>(
|
|||||||
|
|
||||||
let inplace = get_inplace_from_layout(layout);
|
let inplace = get_inplace_from_layout(layout);
|
||||||
|
|
||||||
list_map(env, inplace, parent, func, func_layout, list, list_layout)
|
list_map(
|
||||||
|
env,
|
||||||
|
layout_ids,
|
||||||
|
inplace,
|
||||||
|
parent,
|
||||||
|
func,
|
||||||
|
func_layout,
|
||||||
|
list,
|
||||||
|
list_layout,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
ListKeepIf => {
|
ListKeepIf => {
|
||||||
// List.keepIf : List elem, (elem -> Bool) -> List elem
|
// List.keepIf : List elem, (elem -> Bool) -> List elem
|
||||||
|
@ -3,12 +3,13 @@ use crate::llvm::build::{
|
|||||||
};
|
};
|
||||||
use crate::llvm::compare::build_eq;
|
use crate::llvm::compare::build_eq;
|
||||||
use crate::llvm::convert::{basic_type_from_layout, collection, get_ptr_type};
|
use crate::llvm::convert::{basic_type_from_layout, collection, get_ptr_type};
|
||||||
|
use crate::llvm::refcounting::decrement_refcount_layout;
|
||||||
use inkwell::builder::Builder;
|
use inkwell::builder::Builder;
|
||||||
use inkwell::context::Context;
|
use inkwell::context::Context;
|
||||||
use inkwell::types::{BasicTypeEnum, PointerType};
|
use inkwell::types::{BasicTypeEnum, PointerType};
|
||||||
use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
|
use inkwell::values::{BasicValueEnum, FunctionValue, IntValue, PointerValue, StructValue};
|
||||||
use inkwell::{AddressSpace, IntPredicate};
|
use inkwell::{AddressSpace, IntPredicate};
|
||||||
use roc_mono::layout::{Builtin, Layout, MemoryMode};
|
use roc_mono::layout::{Builtin, Layout, LayoutIds, MemoryMode};
|
||||||
|
|
||||||
/// List.single : a -> List a
|
/// List.single : a -> List a
|
||||||
pub fn list_single<'a, 'ctx, 'env>(
|
pub fn list_single<'a, 'ctx, 'env>(
|
||||||
@ -1318,8 +1319,10 @@ pub fn list_keep_if_help<'a, 'ctx, 'env>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// List.map : List before, (before -> after) -> List after
|
/// List.map : List before, (before -> after) -> List after
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
pub fn list_map<'a, 'ctx, 'env>(
|
pub fn list_map<'a, 'ctx, 'env>(
|
||||||
env: &Env<'a, 'ctx, 'env>,
|
env: &Env<'a, 'ctx, 'env>,
|
||||||
|
layout_ids: &mut LayoutIds<'a>,
|
||||||
inplace: InPlace,
|
inplace: InPlace,
|
||||||
parent: FunctionValue<'ctx>,
|
parent: FunctionValue<'ctx>,
|
||||||
func: BasicValueEnum<'ctx>,
|
func: BasicValueEnum<'ctx>,
|
||||||
@ -1365,7 +1368,11 @@ pub fn list_map<'a, 'ctx, 'env>(
|
|||||||
|
|
||||||
incrementing_elem_loop(builder, ctx, parent, list_ptr, len, "#index", list_loop);
|
incrementing_elem_loop(builder, ctx, parent, list_ptr, len, "#index", list_loop);
|
||||||
|
|
||||||
store_list(env, ret_list_ptr, len)
|
let result = store_list(env, ret_list_ptr, len);
|
||||||
|
|
||||||
|
decrement_refcount_layout(env, parent, layout_ids, list, list_layout);
|
||||||
|
|
||||||
|
result
|
||||||
};
|
};
|
||||||
|
|
||||||
if_list_is_not_empty(env, parent, non_empty_fn, list, list_layout, "List.map")
|
if_list_is_not_empty(env, parent, non_empty_fn, list, list_layout, "List.map")
|
||||||
@ -2043,7 +2050,6 @@ pub fn allocate_list<'a, 'ctx, 'env>(
|
|||||||
let len_type = env.ptr_int();
|
let len_type = env.ptr_int();
|
||||||
let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64;
|
let elem_bytes = elem_layout.stack_size(env.ptr_bytes) as u64;
|
||||||
let bytes_per_element = len_type.const_int(elem_bytes, false);
|
let bytes_per_element = len_type.const_int(elem_bytes, false);
|
||||||
|
|
||||||
let number_of_data_bytes = builder.build_int_mul(bytes_per_element, length, "data_length");
|
let number_of_data_bytes = builder.build_int_mul(bytes_per_element, length, "data_length");
|
||||||
|
|
||||||
let rc1 = match inplace {
|
let rc1 = match inplace {
|
||||||
|
@ -1,9 +1,7 @@
|
|||||||
use crate::llvm::build::{
|
use crate::llvm::build::{
|
||||||
call_bitcode_fn, call_void_bitcode_fn, ptr_from_symbol, Env, InPlace, Scope,
|
call_bitcode_fn, call_void_bitcode_fn, ptr_from_symbol, Env, InPlace, Scope,
|
||||||
};
|
};
|
||||||
use crate::llvm::build_list::{
|
use crate::llvm::build_list::{allocate_list, build_basic_phi2, load_list_ptr, store_list};
|
||||||
allocate_list, build_basic_phi2, empty_list, incrementing_elem_loop, load_list_ptr, store_list,
|
|
||||||
};
|
|
||||||
use crate::llvm::convert::collection;
|
use crate::llvm::convert::collection;
|
||||||
use inkwell::builder::Builder;
|
use inkwell::builder::Builder;
|
||||||
use inkwell::types::BasicTypeEnum;
|
use inkwell::types::BasicTypeEnum;
|
||||||
@ -90,333 +88,117 @@ pub fn str_split<'a, 'ctx, 'env>(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
fn cast_to_zig_str(
|
||||||
|
env: &Env<'a, 'ctx, 'env>,
|
||||||
|
str_as_struct: StructValue<'ctx>,
|
||||||
|
) -> BasicValueEnum<'ctx> {
|
||||||
|
// get the RocStr type defined by zig
|
||||||
|
let roc_str_type = env.module.get_struct_type("str.RocStr").unwrap();
|
||||||
|
|
||||||
|
// convert `{ *mut u8, i64 }` to `RocStr`
|
||||||
|
builder.build_bitcast(str_as_struct, roc_str_type, "convert_to_zig_rocstr");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cast_from_zig_str(
|
||||||
|
env: &Env<'a, 'ctx, 'env>,
|
||||||
|
str_as_struct: StructValue<'ctx>,
|
||||||
|
) -> BasicValueEnum<'ctx> {
|
||||||
|
let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
|
||||||
|
|
||||||
|
// convert `RocStr` to `{ *mut u8, i64 }`
|
||||||
|
builder.build_bitcast(str_as_struct, ret_type, "convert_from_zig_rocstr");
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
fn str_symbol_to_i128<'a, 'ctx, 'env>(
|
||||||
|
env: &Env<'a, 'ctx, 'env>,
|
||||||
|
scope: &Scope<'a, 'ctx>,
|
||||||
|
symbol: Symbol,
|
||||||
|
) -> IntValue<'ctx> {
|
||||||
|
let str_ptr = ptr_from_symbol(scope, symbol);
|
||||||
|
|
||||||
|
let i128_ptr = env
|
||||||
|
.builder
|
||||||
|
.build_bitcast(
|
||||||
|
*str_ptr,
|
||||||
|
env.context.i128_type().ptr_type(AddressSpace::Generic),
|
||||||
|
"cast",
|
||||||
|
)
|
||||||
|
.into_pointer_value();
|
||||||
|
|
||||||
|
env.builder
|
||||||
|
.build_load(i128_ptr, "load_as_i128")
|
||||||
|
.into_int_value()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn zig_str_to_struct<'a, 'ctx, 'env>(
|
||||||
|
env: &Env<'a, 'ctx, 'env>,
|
||||||
|
zig_str: StructValue<'ctx>,
|
||||||
|
) -> StructValue<'ctx> {
|
||||||
|
let builder = env.builder;
|
||||||
|
|
||||||
|
// get the RocStr type defined by zig
|
||||||
|
let zig_str_type = env.module.get_struct_type("str.RocStr").unwrap();
|
||||||
|
|
||||||
|
let ret_type = BasicTypeEnum::StructType(collection(env.context, env.ptr_bytes));
|
||||||
|
|
||||||
|
// a roundabout way of casting (LLVM does not accept a standard bitcast)
|
||||||
|
let allocation = builder.build_alloca(zig_str_type, "zig_result");
|
||||||
|
|
||||||
|
builder.build_store(allocation, zig_str);
|
||||||
|
|
||||||
|
let ptr3 = builder
|
||||||
|
.build_bitcast(
|
||||||
|
allocation,
|
||||||
|
env.context.i128_type().ptr_type(AddressSpace::Generic),
|
||||||
|
"cast",
|
||||||
|
)
|
||||||
|
.into_pointer_value();
|
||||||
|
|
||||||
|
let ptr4 = builder
|
||||||
|
.build_bitcast(
|
||||||
|
ptr3,
|
||||||
|
ret_type.into_struct_type().ptr_type(AddressSpace::Generic),
|
||||||
|
"cast",
|
||||||
|
)
|
||||||
|
.into_pointer_value();
|
||||||
|
|
||||||
|
builder.build_load(ptr4, "load").into_struct_value()
|
||||||
|
}
|
||||||
|
|
||||||
/// Str.concat : Str, Str -> Str
|
/// Str.concat : Str, Str -> Str
|
||||||
pub fn str_concat<'a, 'ctx, 'env>(
|
pub fn str_concat<'a, 'ctx, 'env>(
|
||||||
env: &Env<'a, 'ctx, 'env>,
|
env: &Env<'a, 'ctx, 'env>,
|
||||||
inplace: InPlace,
|
inplace: InPlace,
|
||||||
scope: &Scope<'a, 'ctx>,
|
scope: &Scope<'a, 'ctx>,
|
||||||
parent: FunctionValue<'ctx>,
|
_parent: FunctionValue<'ctx>,
|
||||||
first_str_symbol: Symbol,
|
str1_symbol: Symbol,
|
||||||
second_str_symbol: Symbol,
|
str2_symbol: Symbol,
|
||||||
) -> BasicValueEnum<'ctx> {
|
) -> BasicValueEnum<'ctx> {
|
||||||
let builder = env.builder;
|
// swap the arguments; second argument comes before the second in the output string
|
||||||
let ctx = env.context;
|
let str1_i128 = str_symbol_to_i128(env, scope, str1_symbol);
|
||||||
|
let str2_i128 = str_symbol_to_i128(env, scope, str2_symbol);
|
||||||
|
|
||||||
let second_str_ptr = ptr_from_symbol(scope, second_str_symbol);
|
let zig_result = call_bitcode_fn(
|
||||||
let first_str_ptr = ptr_from_symbol(scope, first_str_symbol);
|
|
||||||
|
|
||||||
let ret_type = BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes));
|
|
||||||
|
|
||||||
load_str(
|
|
||||||
env,
|
env,
|
||||||
parent,
|
&[
|
||||||
*second_str_ptr,
|
env.context
|
||||||
ret_type,
|
.i32_type()
|
||||||
|second_str_ptr, second_str_len, second_str_smallness| {
|
.const_int(env.ptr_bytes as u64, false)
|
||||||
load_str(
|
.into(),
|
||||||
env,
|
env.context
|
||||||
parent,
|
|
||||||
*first_str_ptr,
|
|
||||||
ret_type,
|
|
||||||
|first_str_ptr, first_str_len, first_str_smallness| {
|
|
||||||
// first_str_len > 0
|
|
||||||
// We do this check to avoid allocating memory. If the first input
|
|
||||||
// str is empty, then we can just return the second str cloned
|
|
||||||
let first_str_length_comparison = str_is_not_empty(env, first_str_len);
|
|
||||||
|
|
||||||
let if_first_str_is_empty = || {
|
|
||||||
// second_str_len > 0
|
|
||||||
// We do this check to avoid allocating memory. If the second input
|
|
||||||
// str is empty, then we can just return an empty str
|
|
||||||
let second_str_length_comparison = str_is_not_empty(env, second_str_len);
|
|
||||||
|
|
||||||
let if_second_str_is_nonempty = || {
|
|
||||||
let (new_wrapper, _) = clone_nonempty_str(
|
|
||||||
env,
|
|
||||||
inplace,
|
|
||||||
second_str_smallness,
|
|
||||||
second_str_len,
|
|
||||||
second_str_ptr,
|
|
||||||
);
|
|
||||||
|
|
||||||
BasicValueEnum::StructValue(new_wrapper)
|
|
||||||
};
|
|
||||||
|
|
||||||
let if_second_str_is_empty = || empty_list(env);
|
|
||||||
|
|
||||||
build_basic_phi2(
|
|
||||||
env,
|
|
||||||
parent,
|
|
||||||
second_str_length_comparison,
|
|
||||||
if_second_str_is_nonempty,
|
|
||||||
if_second_str_is_empty,
|
|
||||||
ret_type,
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
let if_first_str_is_not_empty = || {
|
|
||||||
let if_second_str_is_empty = || {
|
|
||||||
let (new_wrapper, _) = clone_nonempty_str(
|
|
||||||
env,
|
|
||||||
inplace,
|
|
||||||
first_str_smallness,
|
|
||||||
first_str_len,
|
|
||||||
first_str_ptr,
|
|
||||||
);
|
|
||||||
|
|
||||||
BasicValueEnum::StructValue(new_wrapper)
|
|
||||||
};
|
|
||||||
|
|
||||||
// second_str_len > 0
|
|
||||||
// We do this check to avoid allocating memory. If the second input
|
|
||||||
// str is empty, then we can just return the first str cloned
|
|
||||||
let second_str_length_comparison = str_is_not_empty(env, second_str_len);
|
|
||||||
|
|
||||||
let if_second_str_is_not_empty = || {
|
|
||||||
let combined_str_len = builder.build_int_add(
|
|
||||||
first_str_len,
|
|
||||||
second_str_len,
|
|
||||||
"add_list_lengths",
|
|
||||||
);
|
|
||||||
|
|
||||||
// The combined string is big iff its length is
|
|
||||||
// greater than or equal to the size in memory
|
|
||||||
// of a small str (e.g. len >= 16 on 64-bit targets)
|
|
||||||
let is_big = env.builder.build_int_compare(
|
|
||||||
IntPredicate::UGE,
|
|
||||||
combined_str_len,
|
|
||||||
env.ptr_int().const_int(env.small_str_bytes() as u64, false),
|
|
||||||
"str_is_big",
|
|
||||||
);
|
|
||||||
|
|
||||||
let if_big = || {
|
|
||||||
let combined_str_ptr =
|
|
||||||
allocate_list(env, inplace, &CHAR_LAYOUT, combined_str_len);
|
|
||||||
|
|
||||||
// TODO replace FIRST_LOOP with a memcpy!
|
|
||||||
// FIRST LOOP
|
|
||||||
let first_loop = |first_index, first_str_elem| {
|
|
||||||
// The pointer to the element in the combined list
|
|
||||||
let combined_str_elem_ptr = unsafe {
|
|
||||||
builder.build_in_bounds_gep(
|
|
||||||
combined_str_ptr,
|
|
||||||
&[first_index],
|
|
||||||
"load_index_combined_list",
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Mutate the new array in-place to change the element.
|
|
||||||
builder.build_store(combined_str_elem_ptr, first_str_elem);
|
|
||||||
};
|
|
||||||
|
|
||||||
let index_name = "#index";
|
|
||||||
|
|
||||||
let index_alloca = incrementing_elem_loop(
|
|
||||||
builder,
|
|
||||||
ctx,
|
|
||||||
parent,
|
|
||||||
first_str_ptr,
|
|
||||||
first_str_len,
|
|
||||||
index_name,
|
|
||||||
first_loop,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Reset the index variable to 0
|
|
||||||
builder
|
|
||||||
.build_store(index_alloca, ctx.i64_type().const_int(0, false));
|
|
||||||
|
|
||||||
// TODO replace SECOND_LOOP with a memcpy!
|
|
||||||
// SECOND LOOP
|
|
||||||
let second_loop = |second_index, second_str_elem| {
|
|
||||||
// The pointer to the element in the combined str.
|
|
||||||
// Note that the pointer does not start at the index
|
|
||||||
// 0, it starts at the index of first_str_len. In that
|
|
||||||
// sense it is "offset".
|
|
||||||
let offset_combined_str_char_ptr = unsafe {
|
|
||||||
builder.build_in_bounds_gep(
|
|
||||||
combined_str_ptr,
|
|
||||||
&[first_str_len],
|
|
||||||
"elem",
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// The pointer to the char from the second str
|
|
||||||
// in the combined list
|
|
||||||
let combined_str_char_ptr = unsafe {
|
|
||||||
builder.build_in_bounds_gep(
|
|
||||||
offset_combined_str_char_ptr,
|
|
||||||
&[second_index],
|
|
||||||
"load_index_combined_list",
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Mutate the new array in-place to change the element.
|
|
||||||
builder.build_store(combined_str_char_ptr, second_str_elem);
|
|
||||||
};
|
|
||||||
|
|
||||||
incrementing_elem_loop(
|
|
||||||
builder,
|
|
||||||
ctx,
|
|
||||||
parent,
|
|
||||||
second_str_ptr,
|
|
||||||
second_str_len,
|
|
||||||
index_name,
|
|
||||||
second_loop,
|
|
||||||
);
|
|
||||||
|
|
||||||
store_list(env, combined_str_ptr, combined_str_len)
|
|
||||||
};
|
|
||||||
|
|
||||||
let if_small = || {
|
|
||||||
let combined_str_ptr = builder.build_array_alloca(
|
|
||||||
ctx.i8_type(),
|
|
||||||
ctx.i8_type().const_int(env.small_str_bytes() as u64, false),
|
|
||||||
"alloca_small_str",
|
|
||||||
);
|
|
||||||
|
|
||||||
// TODO replace FIRST_LOOP with a memcpy!
|
|
||||||
// FIRST LOOP
|
|
||||||
let first_loop = |first_index, first_str_elem| {
|
|
||||||
// The pointer to the element in the combined list
|
|
||||||
let combined_str_elem_ptr = unsafe {
|
|
||||||
builder.build_in_bounds_gep(
|
|
||||||
combined_str_ptr,
|
|
||||||
&[first_index],
|
|
||||||
"load_index_combined_list",
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Mutate the new array in-place to change the element.
|
|
||||||
builder.build_store(combined_str_elem_ptr, first_str_elem);
|
|
||||||
};
|
|
||||||
|
|
||||||
let index_name = "#index";
|
|
||||||
|
|
||||||
let index_alloca = incrementing_elem_loop(
|
|
||||||
builder,
|
|
||||||
ctx,
|
|
||||||
parent,
|
|
||||||
first_str_ptr,
|
|
||||||
first_str_len,
|
|
||||||
index_name,
|
|
||||||
first_loop,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Reset the index variable to 0
|
|
||||||
builder
|
|
||||||
.build_store(index_alloca, ctx.i64_type().const_int(0, false));
|
|
||||||
|
|
||||||
// TODO replace SECOND_LOOP with a memcpy!
|
|
||||||
// SECOND LOOP
|
|
||||||
let second_loop = |second_index, second_str_elem| {
|
|
||||||
// The pointer to the element in the combined str.
|
|
||||||
// Note that the pointer does not start at the index
|
|
||||||
// 0, it starts at the index of first_str_len. In that
|
|
||||||
// sense it is "offset".
|
|
||||||
let offset_combined_str_char_ptr = unsafe {
|
|
||||||
builder.build_in_bounds_gep(
|
|
||||||
combined_str_ptr,
|
|
||||||
&[first_str_len],
|
|
||||||
"elem",
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// The pointer to the char from the second str
|
|
||||||
// in the combined list
|
|
||||||
let combined_str_char_ptr = unsafe {
|
|
||||||
builder.build_in_bounds_gep(
|
|
||||||
offset_combined_str_char_ptr,
|
|
||||||
&[second_index],
|
|
||||||
"load_index_combined_list",
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// Mutate the new array in-place to change the element.
|
|
||||||
builder.build_store(combined_str_char_ptr, second_str_elem);
|
|
||||||
};
|
|
||||||
|
|
||||||
incrementing_elem_loop(
|
|
||||||
builder,
|
|
||||||
ctx,
|
|
||||||
parent,
|
|
||||||
second_str_ptr,
|
|
||||||
second_str_len,
|
|
||||||
index_name,
|
|
||||||
second_loop,
|
|
||||||
);
|
|
||||||
|
|
||||||
let final_byte = builder.build_int_cast(
|
|
||||||
combined_str_len,
|
|
||||||
ctx.i8_type(),
|
|
||||||
"str_len_to_i8",
|
|
||||||
);
|
|
||||||
|
|
||||||
let final_byte = builder.build_or(
|
|
||||||
final_byte,
|
|
||||||
ctx.i8_type().const_int(0b1000_0000, false),
|
|
||||||
"str_len_set_discriminant",
|
|
||||||
);
|
|
||||||
|
|
||||||
let final_byte_ptr = unsafe {
|
|
||||||
builder.build_in_bounds_gep(
|
|
||||||
combined_str_ptr,
|
|
||||||
&[ctx
|
|
||||||
.i8_type()
|
.i8_type()
|
||||||
.const_int(env.small_str_bytes() as u64 - 1, false)],
|
.const_int(inplace as u64, false)
|
||||||
"str_literal_final_byte",
|
.into(),
|
||||||
|
str1_i128.into(),
|
||||||
|
str2_i128.into(),
|
||||||
|
],
|
||||||
|
&bitcode::STR_CONCAT,
|
||||||
)
|
)
|
||||||
};
|
.into_struct_value();
|
||||||
|
|
||||||
builder.build_store(final_byte_ptr, final_byte);
|
zig_str_to_struct(env, zig_result).into()
|
||||||
|
|
||||||
builder.build_load(
|
|
||||||
builder
|
|
||||||
.build_bitcast(
|
|
||||||
combined_str_ptr,
|
|
||||||
collection(ctx, env.ptr_bytes)
|
|
||||||
.ptr_type(AddressSpace::Generic),
|
|
||||||
"cast_collection",
|
|
||||||
)
|
|
||||||
.into_pointer_value(),
|
|
||||||
"small_str_array",
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
// If the combined length fits in a small string,
|
|
||||||
// write into a small string!
|
|
||||||
build_basic_phi2(
|
|
||||||
env,
|
|
||||||
parent,
|
|
||||||
is_big,
|
|
||||||
// the result of a Str.concat is most likely big
|
|
||||||
if_big,
|
|
||||||
if_small,
|
|
||||||
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
build_basic_phi2(
|
|
||||||
env,
|
|
||||||
parent,
|
|
||||||
second_str_length_comparison,
|
|
||||||
if_second_str_is_not_empty,
|
|
||||||
if_second_str_is_empty,
|
|
||||||
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
|
|
||||||
)
|
|
||||||
};
|
|
||||||
|
|
||||||
build_basic_phi2(
|
|
||||||
env,
|
|
||||||
parent,
|
|
||||||
first_str_length_comparison,
|
|
||||||
if_first_str_is_not_empty,
|
|
||||||
if_first_str_is_empty,
|
|
||||||
BasicTypeEnum::StructType(collection(ctx, env.ptr_bytes)),
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Obtain the string's length, cast from i8 to usize
|
/// Obtain the string's length, cast from i8 to usize
|
||||||
@ -511,82 +293,6 @@ enum Smallness {
|
|||||||
Big,
|
Big,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clone_nonempty_str<'a, 'ctx, 'env>(
|
|
||||||
env: &Env<'a, 'ctx, 'env>,
|
|
||||||
inplace: InPlace,
|
|
||||||
smallness: Smallness,
|
|
||||||
len: IntValue<'ctx>,
|
|
||||||
bytes_ptr: PointerValue<'ctx>,
|
|
||||||
) -> (StructValue<'ctx>, PointerValue<'ctx>) {
|
|
||||||
let builder = env.builder;
|
|
||||||
let ctx = env.context;
|
|
||||||
let ptr_bytes = env.ptr_bytes;
|
|
||||||
|
|
||||||
// Allocate space for the new str that we'll copy into.
|
|
||||||
match smallness {
|
|
||||||
Smallness::Small => {
|
|
||||||
let wrapper_struct_ptr = cast_str_bytes_to_wrapper(env, bytes_ptr);
|
|
||||||
let wrapper_struct = builder.build_load(wrapper_struct_ptr, "str_wrapper");
|
|
||||||
let alloca = builder.build_alloca(collection(ctx, ptr_bytes), "small_str_clone");
|
|
||||||
|
|
||||||
builder.build_store(alloca, wrapper_struct);
|
|
||||||
|
|
||||||
(wrapper_struct.into_struct_value(), alloca)
|
|
||||||
}
|
|
||||||
Smallness::Big => {
|
|
||||||
let clone_ptr = allocate_list(env, inplace, &CHAR_LAYOUT, len);
|
|
||||||
|
|
||||||
// TODO check if malloc returned null; if so, runtime error for OOM!
|
|
||||||
|
|
||||||
// Copy the bytes from the original array into the new
|
|
||||||
// one we just malloc'd.
|
|
||||||
builder
|
|
||||||
.build_memcpy(clone_ptr, ptr_bytes, bytes_ptr, ptr_bytes, len)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Create a fresh wrapper struct for the newly populated array
|
|
||||||
let struct_type = collection(ctx, env.ptr_bytes);
|
|
||||||
let mut struct_val;
|
|
||||||
|
|
||||||
// Store the pointer
|
|
||||||
struct_val = builder
|
|
||||||
.build_insert_value(
|
|
||||||
struct_type.get_undef(),
|
|
||||||
clone_ptr,
|
|
||||||
Builtin::WRAPPER_PTR,
|
|
||||||
"insert_ptr",
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// Store the length
|
|
||||||
struct_val = builder
|
|
||||||
.build_insert_value(struct_val, len, Builtin::WRAPPER_LEN, "insert_len")
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
let answer = builder
|
|
||||||
.build_bitcast(
|
|
||||||
struct_val.into_struct_value(),
|
|
||||||
collection(ctx, ptr_bytes),
|
|
||||||
"cast_collection",
|
|
||||||
)
|
|
||||||
.into_struct_value();
|
|
||||||
|
|
||||||
(answer, clone_ptr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cast_str_bytes_to_wrapper<'a, 'ctx, 'env>(
|
|
||||||
env: &Env<'a, 'ctx, 'env>,
|
|
||||||
bytes_ptr: PointerValue<'ctx>,
|
|
||||||
) -> PointerValue<'ctx> {
|
|
||||||
let struct_ptr_type = collection(env.context, env.ptr_bytes).ptr_type(AddressSpace::Generic);
|
|
||||||
|
|
||||||
env.builder
|
|
||||||
.build_bitcast(bytes_ptr, struct_ptr_type, "str_as_struct_ptr")
|
|
||||||
.into_pointer_value()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn cast_str_wrapper_to_array<'a, 'ctx, 'env>(
|
fn cast_str_wrapper_to_array<'a, 'ctx, 'env>(
|
||||||
env: &Env<'a, 'ctx, 'env>,
|
env: &Env<'a, 'ctx, 'env>,
|
||||||
wrapper_ptr: PointerValue<'ctx>,
|
wrapper_ptr: PointerValue<'ctx>,
|
||||||
@ -661,6 +367,7 @@ fn big_str_len<'ctx>(builder: &Builder<'ctx>, wrapper_struct: StructValue<'ctx>)
|
|||||||
.into_int_value()
|
.into_int_value()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
fn str_is_not_empty<'ctx>(env: &Env<'_, 'ctx, '_>, len: IntValue<'ctx>) -> IntValue<'ctx> {
|
fn str_is_not_empty<'ctx>(env: &Env<'_, 'ctx, '_>, len: IntValue<'ctx>) -> IntValue<'ctx> {
|
||||||
env.builder.build_int_compare(
|
env.builder.build_int_compare(
|
||||||
IntPredicate::UGT,
|
IntPredicate::UGT,
|
||||||
|
@ -42,3 +42,6 @@ bumpalo = { version = "3.2", features = ["collections"] }
|
|||||||
libc = "0.2"
|
libc = "0.2"
|
||||||
tempfile = "3.1.0"
|
tempfile = "3.1.0"
|
||||||
itertools = "0.9"
|
itertools = "0.9"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
target-aarch64 = ["roc_build/target-aarch64"]
|
||||||
|
814
compiler/gen_dev/src/generic64/aarch64.rs
Normal file
814
compiler/gen_dev/src/generic64/aarch64.rs
Normal file
@ -0,0 +1,814 @@
|
|||||||
|
use crate::generic64::{Assembler, CallConv, GPRegTrait};
|
||||||
|
use bumpalo::collections::Vec;
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub enum AArch64GPReg {
|
||||||
|
X0 = 0,
|
||||||
|
X1 = 1,
|
||||||
|
X2 = 2,
|
||||||
|
X3 = 3,
|
||||||
|
X4 = 4,
|
||||||
|
X5 = 5,
|
||||||
|
X6 = 6,
|
||||||
|
X7 = 7,
|
||||||
|
XR = 8,
|
||||||
|
X9 = 9,
|
||||||
|
X10 = 10,
|
||||||
|
X11 = 11,
|
||||||
|
X12 = 12,
|
||||||
|
X13 = 13,
|
||||||
|
X14 = 14,
|
||||||
|
X15 = 15,
|
||||||
|
IP0 = 16,
|
||||||
|
IP1 = 17,
|
||||||
|
PR = 18,
|
||||||
|
X19 = 19,
|
||||||
|
X20 = 20,
|
||||||
|
X21 = 21,
|
||||||
|
X22 = 22,
|
||||||
|
X23 = 23,
|
||||||
|
X24 = 24,
|
||||||
|
X25 = 25,
|
||||||
|
X26 = 26,
|
||||||
|
X27 = 27,
|
||||||
|
X28 = 28,
|
||||||
|
FP = 29,
|
||||||
|
LR = 30,
|
||||||
|
/// This can mean Zero or Stack Pointer depending on the context.
|
||||||
|
ZRSP = 31,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GPRegTrait for AArch64GPReg {}
|
||||||
|
|
||||||
|
pub struct AArch64Assembler {}
|
||||||
|
|
||||||
|
// AArch64Call may need to eventually be split by OS,
|
||||||
|
// but I think with how we use it, they may all be the same.
|
||||||
|
pub struct AArch64Call {}
|
||||||
|
|
||||||
|
const STACK_ALIGNMENT: u8 = 16;
|
||||||
|
|
||||||
|
impl CallConv<AArch64GPReg> for AArch64Call {
|
||||||
|
const GP_PARAM_REGS: &'static [AArch64GPReg] = &[
|
||||||
|
AArch64GPReg::X0,
|
||||||
|
AArch64GPReg::X1,
|
||||||
|
AArch64GPReg::X2,
|
||||||
|
AArch64GPReg::X3,
|
||||||
|
AArch64GPReg::X4,
|
||||||
|
AArch64GPReg::X5,
|
||||||
|
AArch64GPReg::X6,
|
||||||
|
AArch64GPReg::X7,
|
||||||
|
];
|
||||||
|
const GP_RETURN_REGS: &'static [AArch64GPReg] = Self::GP_PARAM_REGS;
|
||||||
|
const GP_DEFAULT_FREE_REGS: &'static [AArch64GPReg] = &[
|
||||||
|
// The regs we want to use first should be at the end of this vec.
|
||||||
|
// We will use pop to get which reg to use next
|
||||||
|
|
||||||
|
// Don't use frame pointer: AArch64GPReg::FP,
|
||||||
|
// Don't user indirect result location: AArch64GPReg::XR,
|
||||||
|
// Don't use platform register: AArch64GPReg::PR,
|
||||||
|
// Don't use link register: AArch64GPReg::LR,
|
||||||
|
// Don't use zero register/stack pointer: AArch64GPReg::ZRSP,
|
||||||
|
|
||||||
|
// Use callee saved regs last.
|
||||||
|
AArch64GPReg::X19,
|
||||||
|
AArch64GPReg::X20,
|
||||||
|
AArch64GPReg::X21,
|
||||||
|
AArch64GPReg::X22,
|
||||||
|
AArch64GPReg::X23,
|
||||||
|
AArch64GPReg::X24,
|
||||||
|
AArch64GPReg::X25,
|
||||||
|
AArch64GPReg::X26,
|
||||||
|
AArch64GPReg::X27,
|
||||||
|
AArch64GPReg::X28,
|
||||||
|
// Use caller saved regs first.
|
||||||
|
AArch64GPReg::X0,
|
||||||
|
AArch64GPReg::X1,
|
||||||
|
AArch64GPReg::X2,
|
||||||
|
AArch64GPReg::X3,
|
||||||
|
AArch64GPReg::X4,
|
||||||
|
AArch64GPReg::X5,
|
||||||
|
AArch64GPReg::X6,
|
||||||
|
AArch64GPReg::X7,
|
||||||
|
AArch64GPReg::X9,
|
||||||
|
AArch64GPReg::X10,
|
||||||
|
AArch64GPReg::X11,
|
||||||
|
AArch64GPReg::X12,
|
||||||
|
AArch64GPReg::X13,
|
||||||
|
AArch64GPReg::X14,
|
||||||
|
AArch64GPReg::X15,
|
||||||
|
AArch64GPReg::IP0,
|
||||||
|
AArch64GPReg::IP1,
|
||||||
|
];
|
||||||
|
|
||||||
|
const SHADOW_SPACE_SIZE: u8 = 0;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn callee_saved(reg: &AArch64GPReg) -> bool {
|
||||||
|
matches!(
|
||||||
|
reg,
|
||||||
|
AArch64GPReg::X19
|
||||||
|
| AArch64GPReg::X20
|
||||||
|
| AArch64GPReg::X21
|
||||||
|
| AArch64GPReg::X22
|
||||||
|
| AArch64GPReg::X23
|
||||||
|
| AArch64GPReg::X24
|
||||||
|
| AArch64GPReg::X25
|
||||||
|
| AArch64GPReg::X26
|
||||||
|
| AArch64GPReg::X27
|
||||||
|
| AArch64GPReg::X28
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn setup_stack<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
leaf_function: bool,
|
||||||
|
saved_regs: &[AArch64GPReg],
|
||||||
|
requested_stack_size: i32,
|
||||||
|
) -> Result<i32, String> {
|
||||||
|
// full size is upcast to i64 to make sure we don't overflow here.
|
||||||
|
let mut full_size = 8 * saved_regs.len() as i64 + requested_stack_size as i64;
|
||||||
|
if !leaf_function {
|
||||||
|
full_size += 8;
|
||||||
|
}
|
||||||
|
let alignment = if full_size <= 0 {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
full_size % STACK_ALIGNMENT as i64
|
||||||
|
};
|
||||||
|
let offset = if alignment == 0 {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
STACK_ALIGNMENT - alignment as u8
|
||||||
|
};
|
||||||
|
if let Some(aligned_stack_size) =
|
||||||
|
requested_stack_size.checked_add(8 * saved_regs.len() as i32 + offset as i32)
|
||||||
|
{
|
||||||
|
if aligned_stack_size > 0 {
|
||||||
|
AArch64Assembler::sub_reg64_reg64_imm32(
|
||||||
|
buf,
|
||||||
|
AArch64GPReg::ZRSP,
|
||||||
|
AArch64GPReg::ZRSP,
|
||||||
|
aligned_stack_size,
|
||||||
|
);
|
||||||
|
|
||||||
|
// All the following stores could be optimized by using `STP` to store pairs.
|
||||||
|
let mut offset = aligned_stack_size;
|
||||||
|
if !leaf_function {
|
||||||
|
offset -= 8;
|
||||||
|
AArch64Assembler::mov_stack32_reg64(buf, offset, AArch64GPReg::LR);
|
||||||
|
offset -= 8;
|
||||||
|
AArch64Assembler::mov_stack32_reg64(buf, offset, AArch64GPReg::FP);
|
||||||
|
}
|
||||||
|
for reg in saved_regs {
|
||||||
|
offset -= 8;
|
||||||
|
AArch64Assembler::mov_stack32_reg64(buf, offset, *reg);
|
||||||
|
}
|
||||||
|
Ok(aligned_stack_size)
|
||||||
|
} else {
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Err("Ran out of stack space".to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn cleanup_stack<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
leaf_function: bool,
|
||||||
|
saved_regs: &[AArch64GPReg],
|
||||||
|
aligned_stack_size: i32,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
if aligned_stack_size > 0 {
|
||||||
|
// All the following stores could be optimized by using `STP` to store pairs.
|
||||||
|
let mut offset = aligned_stack_size;
|
||||||
|
if !leaf_function {
|
||||||
|
offset -= 8;
|
||||||
|
AArch64Assembler::mov_reg64_stack32(buf, AArch64GPReg::LR, offset);
|
||||||
|
offset -= 8;
|
||||||
|
AArch64Assembler::mov_reg64_stack32(buf, AArch64GPReg::FP, offset);
|
||||||
|
}
|
||||||
|
for reg in saved_regs {
|
||||||
|
offset -= 8;
|
||||||
|
AArch64Assembler::mov_reg64_stack32(buf, *reg, offset);
|
||||||
|
}
|
||||||
|
AArch64Assembler::add_reg64_reg64_imm32(
|
||||||
|
buf,
|
||||||
|
AArch64GPReg::ZRSP,
|
||||||
|
AArch64GPReg::ZRSP,
|
||||||
|
aligned_stack_size,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Assembler<AArch64GPReg> for AArch64Assembler {
|
||||||
|
#[inline(always)]
|
||||||
|
fn abs_reg64_reg64<'a>(_buf: &mut Vec<'a, u8>, _dst: AArch64GPReg, _src: AArch64GPReg) {
|
||||||
|
unimplemented!("abs_reg64_reg64 is not yet implement for AArch64");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn add_reg64_reg64_imm32<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
dst: AArch64GPReg,
|
||||||
|
src: AArch64GPReg,
|
||||||
|
imm32: i32,
|
||||||
|
) {
|
||||||
|
if imm32 < 0 {
|
||||||
|
unimplemented!("immediate addition with values less than 0 are not yet implemented");
|
||||||
|
} else if imm32 < 0xFFF {
|
||||||
|
add_reg64_reg64_imm12(buf, dst, src, imm32 as u16);
|
||||||
|
} else {
|
||||||
|
unimplemented!(
|
||||||
|
"immediate additions with values greater than 12bits are not yet implemented"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn add_reg64_reg64_reg64<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
dst: AArch64GPReg,
|
||||||
|
src1: AArch64GPReg,
|
||||||
|
src2: AArch64GPReg,
|
||||||
|
) {
|
||||||
|
add_reg64_reg64_reg64(buf, dst, src1, src2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, imm: i64) {
|
||||||
|
let mut remaining = imm as u64;
|
||||||
|
movz_reg64_imm16(buf, dst, remaining as u16, 0);
|
||||||
|
remaining >>= 16;
|
||||||
|
if remaining > 0 {
|
||||||
|
movk_reg64_imm16(buf, dst, remaining as u16, 1);
|
||||||
|
}
|
||||||
|
remaining >>= 16;
|
||||||
|
if remaining > 0 {
|
||||||
|
movk_reg64_imm16(buf, dst, remaining as u16, 2);
|
||||||
|
}
|
||||||
|
remaining >>= 16;
|
||||||
|
if remaining > 0 {
|
||||||
|
movk_reg64_imm16(buf, dst, remaining as u16, 3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, src: AArch64GPReg) {
|
||||||
|
mov_reg64_reg64(buf, dst, src);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, offset: i32) {
|
||||||
|
if offset < 0 {
|
||||||
|
unimplemented!("negative stack offsets are not yet implement for AArch64");
|
||||||
|
} else if offset < (0xFFF << 8) {
|
||||||
|
debug_assert!(offset % 8 == 0);
|
||||||
|
ldr_reg64_imm12(buf, dst, AArch64GPReg::ZRSP, (offset as u16) >> 3);
|
||||||
|
} else {
|
||||||
|
unimplemented!("stack offsets over 32k are not yet implement for AArch64");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: AArch64GPReg) {
|
||||||
|
if offset < 0 {
|
||||||
|
unimplemented!("negative stack offsets are not yet implement for AArch64");
|
||||||
|
} else if offset < (0xFFF << 8) {
|
||||||
|
debug_assert!(offset % 8 == 0);
|
||||||
|
str_reg64_imm12(buf, src, AArch64GPReg::ZRSP, (offset as u16) >> 3);
|
||||||
|
} else {
|
||||||
|
unimplemented!("stack offsets over 32k are not yet implement for AArch64");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn sub_reg64_reg64_imm32<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
dst: AArch64GPReg,
|
||||||
|
src: AArch64GPReg,
|
||||||
|
imm32: i32,
|
||||||
|
) {
|
||||||
|
if imm32 < 0 {
|
||||||
|
unimplemented!(
|
||||||
|
"immediate subtractions with values less than 0 are not yet implemented"
|
||||||
|
);
|
||||||
|
} else if imm32 < 0xFFF {
|
||||||
|
sub_reg64_reg64_imm12(buf, dst, src, imm32 as u16);
|
||||||
|
} else {
|
||||||
|
unimplemented!(
|
||||||
|
"immediate subtractions with values greater than 12bits are not yet implemented"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn ret<'a>(buf: &mut Vec<'a, u8>) {
|
||||||
|
ret_reg64(buf, AArch64GPReg::LR)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AArch64Assembler {}
|
||||||
|
|
||||||
|
/// AArch64Instruction, maps all instructions to an enum.
|
||||||
|
/// Decoding the function should be cheap because we will always inline.
|
||||||
|
/// All of the operations should resolved by constants, leave just some bit manipulation.
|
||||||
|
/// Enums may not be complete since we will only add what we need.
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum AArch64Instruction {
|
||||||
|
_Reserved,
|
||||||
|
_SVE,
|
||||||
|
DPImm(DPImmGroup),
|
||||||
|
Branch(BranchGroup),
|
||||||
|
LdStr(LdStrGroup),
|
||||||
|
DPReg(DPRegGroup),
|
||||||
|
_DPFloat,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum BranchGroup {
|
||||||
|
UnconditionBranchReg {
|
||||||
|
opc: u8,
|
||||||
|
op2: u8,
|
||||||
|
op3: u8,
|
||||||
|
reg_n: AArch64GPReg,
|
||||||
|
op4: u8,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum DPRegGroup {
|
||||||
|
AddSubShifted {
|
||||||
|
sf: bool,
|
||||||
|
subtract: bool,
|
||||||
|
set_flags: bool,
|
||||||
|
shift: u8,
|
||||||
|
reg_m: AArch64GPReg,
|
||||||
|
imm6: u8,
|
||||||
|
reg_n: AArch64GPReg,
|
||||||
|
reg_d: AArch64GPReg,
|
||||||
|
},
|
||||||
|
Logical {
|
||||||
|
sf: bool,
|
||||||
|
op: DPRegLogicalOp,
|
||||||
|
shift: u8,
|
||||||
|
reg_m: AArch64GPReg,
|
||||||
|
imm6: u8,
|
||||||
|
reg_n: AArch64GPReg,
|
||||||
|
reg_d: AArch64GPReg,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum DPImmGroup {
|
||||||
|
AddSubImm {
|
||||||
|
sf: bool,
|
||||||
|
subtract: bool,
|
||||||
|
set_flags: bool,
|
||||||
|
shift: bool,
|
||||||
|
imm12: u16,
|
||||||
|
reg_n: AArch64GPReg,
|
||||||
|
reg_d: AArch64GPReg,
|
||||||
|
},
|
||||||
|
MoveWide {
|
||||||
|
sf: bool,
|
||||||
|
opc: u8,
|
||||||
|
hw: u8,
|
||||||
|
imm16: u16,
|
||||||
|
reg_d: AArch64GPReg,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum LdStrGroup {
|
||||||
|
UnsignedImm {
|
||||||
|
size: u8,
|
||||||
|
v: bool,
|
||||||
|
opc: u8,
|
||||||
|
imm12: u16,
|
||||||
|
reg_n: AArch64GPReg,
|
||||||
|
reg_t: AArch64GPReg,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
#[allow(dead_code)]
|
||||||
|
enum DPRegLogicalOp {
|
||||||
|
AND,
|
||||||
|
BIC,
|
||||||
|
ORR,
|
||||||
|
ORN,
|
||||||
|
EOR,
|
||||||
|
EON,
|
||||||
|
ANDS,
|
||||||
|
BICS,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn build_instruction(inst: AArch64Instruction) -> [u8; 4] {
|
||||||
|
let mut out: u32 = 0;
|
||||||
|
match inst {
|
||||||
|
AArch64Instruction::Branch(branch) => {
|
||||||
|
out |= 0b101 << 26;
|
||||||
|
match branch {
|
||||||
|
BranchGroup::UnconditionBranchReg {
|
||||||
|
opc,
|
||||||
|
op2,
|
||||||
|
op3,
|
||||||
|
reg_n,
|
||||||
|
op4,
|
||||||
|
} => {
|
||||||
|
debug_assert!(opc <= 0b1111);
|
||||||
|
debug_assert!(op2 <= 0b11111);
|
||||||
|
debug_assert!(op3 <= 0b111111);
|
||||||
|
debug_assert!(op4 <= 0b1111);
|
||||||
|
out |= 0b1101011 << 25;
|
||||||
|
out |= (opc as u32) << 21;
|
||||||
|
out |= (op2 as u32) << 16;
|
||||||
|
out |= (op3 as u32) << 10;
|
||||||
|
out |= (reg_n as u32) << 5;
|
||||||
|
out |= op4 as u32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AArch64Instruction::DPImm(dpimm) => {
|
||||||
|
out |= 0b100 << 26;
|
||||||
|
match dpimm {
|
||||||
|
DPImmGroup::MoveWide {
|
||||||
|
sf,
|
||||||
|
opc,
|
||||||
|
hw,
|
||||||
|
imm16,
|
||||||
|
reg_d,
|
||||||
|
} => {
|
||||||
|
out |= (sf as u32) << 31;
|
||||||
|
out |= (opc as u32) << 29;
|
||||||
|
out |= 0b101 << 23;
|
||||||
|
out |= (hw as u32) << 21;
|
||||||
|
out |= (imm16 as u32) << 5;
|
||||||
|
out |= reg_d as u32;
|
||||||
|
}
|
||||||
|
DPImmGroup::AddSubImm {
|
||||||
|
sf,
|
||||||
|
subtract,
|
||||||
|
set_flags,
|
||||||
|
shift,
|
||||||
|
imm12,
|
||||||
|
reg_n,
|
||||||
|
reg_d,
|
||||||
|
} => {
|
||||||
|
debug_assert!(imm12 <= 0xFFF);
|
||||||
|
out |= (sf as u32) << 31;
|
||||||
|
out |= (subtract as u32) << 30;
|
||||||
|
out |= (set_flags as u32) << 29;
|
||||||
|
out |= 0b010 << 23;
|
||||||
|
out |= (shift as u32) << 22;
|
||||||
|
out |= (imm12 as u32) << 10;
|
||||||
|
out |= (reg_n as u32) << 5;
|
||||||
|
out |= reg_d as u32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AArch64Instruction::DPReg(dpreg) => {
|
||||||
|
out |= 0b101 << 25;
|
||||||
|
match dpreg {
|
||||||
|
DPRegGroup::Logical {
|
||||||
|
sf,
|
||||||
|
op,
|
||||||
|
shift,
|
||||||
|
reg_m,
|
||||||
|
imm6,
|
||||||
|
reg_n,
|
||||||
|
reg_d,
|
||||||
|
} => {
|
||||||
|
debug_assert!(shift <= 0b11);
|
||||||
|
debug_assert!(imm6 <= 0b111111);
|
||||||
|
let (opc, n) = match op {
|
||||||
|
DPRegLogicalOp::AND => (0b00, 0),
|
||||||
|
DPRegLogicalOp::BIC => (0b00, 1),
|
||||||
|
DPRegLogicalOp::ORR => (0b01, 0),
|
||||||
|
DPRegLogicalOp::ORN => (0b01, 1),
|
||||||
|
DPRegLogicalOp::EOR => (0b10, 0),
|
||||||
|
DPRegLogicalOp::EON => (0b10, 1),
|
||||||
|
DPRegLogicalOp::ANDS => (0b11, 0),
|
||||||
|
DPRegLogicalOp::BICS => (0b11, 1),
|
||||||
|
};
|
||||||
|
out |= (sf as u32) << 31;
|
||||||
|
out |= opc << 29;
|
||||||
|
out |= (shift as u32) << 22;
|
||||||
|
out |= n << 21;
|
||||||
|
out |= (reg_m as u32) << 16;
|
||||||
|
out |= (imm6 as u32) << 10;
|
||||||
|
out |= (reg_n as u32) << 5;
|
||||||
|
out |= reg_d as u32;
|
||||||
|
}
|
||||||
|
DPRegGroup::AddSubShifted {
|
||||||
|
sf,
|
||||||
|
subtract,
|
||||||
|
set_flags,
|
||||||
|
shift,
|
||||||
|
reg_m,
|
||||||
|
imm6,
|
||||||
|
reg_n,
|
||||||
|
reg_d,
|
||||||
|
} => {
|
||||||
|
debug_assert!(shift <= 0b11);
|
||||||
|
debug_assert!(imm6 <= 0b111111);
|
||||||
|
out |= (sf as u32) << 31;
|
||||||
|
out |= (subtract as u32) << 30;
|
||||||
|
out |= (set_flags as u32) << 29;
|
||||||
|
out |= 0b1 << 24;
|
||||||
|
out |= (shift as u32) << 22;
|
||||||
|
out |= (reg_m as u32) << 16;
|
||||||
|
out |= (imm6 as u32) << 10;
|
||||||
|
out |= (reg_n as u32) << 5;
|
||||||
|
out |= reg_d as u32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AArch64Instruction::LdStr(ldstr) => {
|
||||||
|
out |= 0b1 << 27;
|
||||||
|
match ldstr {
|
||||||
|
LdStrGroup::UnsignedImm {
|
||||||
|
size,
|
||||||
|
v,
|
||||||
|
opc,
|
||||||
|
imm12,
|
||||||
|
reg_n,
|
||||||
|
reg_t,
|
||||||
|
} => {
|
||||||
|
debug_assert!(size <= 0b11);
|
||||||
|
debug_assert!(imm12 <= 0xFFF);
|
||||||
|
out |= (size as u32) << 30;
|
||||||
|
out |= 0b11 << 28;
|
||||||
|
out |= (v as u32) << 26;
|
||||||
|
out |= 0b1 << 24;
|
||||||
|
out |= (opc as u32) << 22;
|
||||||
|
out |= (imm12 as u32) << 10;
|
||||||
|
out |= (reg_n as u32) << 5;
|
||||||
|
out |= reg_t as u32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
x => unimplemented!("The instruction, {:?}, has not be implemented yet", x),
|
||||||
|
}
|
||||||
|
out.to_le_bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Below here are the functions for all of the assembly instructions.
|
||||||
|
// Their names are based on the instruction and operators combined.
|
||||||
|
// You should call `buf.reserve()` if you push or extend more than once.
|
||||||
|
// Unit tests are added at the bottom of the file to ensure correct asm generation.
|
||||||
|
// Please keep these in alphanumeric order.
|
||||||
|
|
||||||
|
/// `ADD Xd, Xn, imm12` -> Add Xn and imm12 and place the result into Xd.
|
||||||
|
#[inline(always)]
|
||||||
|
fn add_reg64_reg64_imm12<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
dst: AArch64GPReg,
|
||||||
|
src: AArch64GPReg,
|
||||||
|
imm12: u16,
|
||||||
|
) {
|
||||||
|
buf.extend(&build_instruction(AArch64Instruction::DPImm(
|
||||||
|
DPImmGroup::AddSubImm {
|
||||||
|
sf: true,
|
||||||
|
subtract: false,
|
||||||
|
set_flags: false,
|
||||||
|
shift: false,
|
||||||
|
imm12,
|
||||||
|
reg_n: src,
|
||||||
|
reg_d: dst,
|
||||||
|
},
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `ADD Xd, Xm, Xn` -> Add Xm and Xn and place the result into Xd.
|
||||||
|
#[inline(always)]
|
||||||
|
fn add_reg64_reg64_reg64<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
dst: AArch64GPReg,
|
||||||
|
src1: AArch64GPReg,
|
||||||
|
src2: AArch64GPReg,
|
||||||
|
) {
|
||||||
|
buf.extend(&build_instruction(AArch64Instruction::DPReg(
|
||||||
|
DPRegGroup::AddSubShifted {
|
||||||
|
sf: true,
|
||||||
|
subtract: false,
|
||||||
|
set_flags: false,
|
||||||
|
shift: 0,
|
||||||
|
reg_m: src1,
|
||||||
|
imm6: 0,
|
||||||
|
reg_n: src2,
|
||||||
|
reg_d: dst,
|
||||||
|
},
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `LDR Xt, [Xn, #offset]` -> Load Xn + Offset Xt. ZRSP is SP.
|
||||||
|
/// Note: imm12 is the offest divided by 8.
|
||||||
|
#[inline(always)]
|
||||||
|
fn ldr_reg64_imm12<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, base: AArch64GPReg, imm12: u16) {
|
||||||
|
debug_assert!(imm12 <= 0xFFF);
|
||||||
|
buf.extend(&build_instruction(AArch64Instruction::LdStr(
|
||||||
|
LdStrGroup::UnsignedImm {
|
||||||
|
size: 0b11,
|
||||||
|
v: false,
|
||||||
|
opc: 0b01,
|
||||||
|
imm12,
|
||||||
|
reg_n: base,
|
||||||
|
reg_t: dst,
|
||||||
|
},
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `MOV Xd, Xm` -> Move Xm to Xd.
|
||||||
|
#[inline(always)]
|
||||||
|
fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, src: AArch64GPReg) {
|
||||||
|
// MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64.
|
||||||
|
buf.extend(&build_instruction(AArch64Instruction::DPReg(
|
||||||
|
DPRegGroup::Logical {
|
||||||
|
sf: true,
|
||||||
|
op: DPRegLogicalOp::ORR,
|
||||||
|
shift: 0,
|
||||||
|
reg_m: src,
|
||||||
|
imm6: 0,
|
||||||
|
reg_n: AArch64GPReg::ZRSP,
|
||||||
|
reg_d: dst,
|
||||||
|
},
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `MOVK Xd, imm16` -> Keeps Xd and moves an optionally shifted imm16 to Xd.
|
||||||
|
#[inline(always)]
|
||||||
|
fn movk_reg64_imm16<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, imm16: u16, hw: u8) {
|
||||||
|
debug_assert!(hw <= 0b11);
|
||||||
|
// MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64.
|
||||||
|
buf.extend(&build_instruction(AArch64Instruction::DPImm(
|
||||||
|
DPImmGroup::MoveWide {
|
||||||
|
sf: true,
|
||||||
|
opc: 0b11,
|
||||||
|
hw,
|
||||||
|
imm16,
|
||||||
|
reg_d: dst,
|
||||||
|
},
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `MOVZ Xd, imm16` -> Zeros Xd and moves an optionally shifted imm16 to Xd.
|
||||||
|
#[inline(always)]
|
||||||
|
fn movz_reg64_imm16<'a>(buf: &mut Vec<'a, u8>, dst: AArch64GPReg, imm16: u16, hw: u8) {
|
||||||
|
debug_assert!(hw <= 0b11);
|
||||||
|
// MOV is equvalent to `ORR Xd, XZR, XM` in AARCH64.
|
||||||
|
buf.extend(&build_instruction(AArch64Instruction::DPImm(
|
||||||
|
DPImmGroup::MoveWide {
|
||||||
|
sf: true,
|
||||||
|
opc: 0b10,
|
||||||
|
hw,
|
||||||
|
imm16,
|
||||||
|
reg_d: dst,
|
||||||
|
},
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `STR Xt, [Xn, #offset]` -> Store Xt to Xn + Offset. ZRSP is SP.
|
||||||
|
/// Note: imm12 is the offest divided by 8.
|
||||||
|
#[inline(always)]
|
||||||
|
fn str_reg64_imm12<'a>(buf: &mut Vec<'a, u8>, src: AArch64GPReg, base: AArch64GPReg, imm12: u16) {
|
||||||
|
debug_assert!(imm12 <= 0xFFF);
|
||||||
|
buf.extend(&build_instruction(AArch64Instruction::LdStr(
|
||||||
|
LdStrGroup::UnsignedImm {
|
||||||
|
size: 0b11,
|
||||||
|
v: false,
|
||||||
|
opc: 0b00,
|
||||||
|
imm12,
|
||||||
|
reg_n: base,
|
||||||
|
reg_t: src,
|
||||||
|
},
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `SUB Xd, Xn, imm12` -> Subtract Xn and imm12 and place the result into Xd.
|
||||||
|
#[inline(always)]
|
||||||
|
fn sub_reg64_reg64_imm12<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
dst: AArch64GPReg,
|
||||||
|
src: AArch64GPReg,
|
||||||
|
imm12: u16,
|
||||||
|
) {
|
||||||
|
buf.extend(&build_instruction(AArch64Instruction::DPImm(
|
||||||
|
DPImmGroup::AddSubImm {
|
||||||
|
sf: true,
|
||||||
|
subtract: true,
|
||||||
|
set_flags: false,
|
||||||
|
shift: false,
|
||||||
|
imm12,
|
||||||
|
reg_n: src,
|
||||||
|
reg_d: dst,
|
||||||
|
},
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// `RET Xn` -> Return to the address stored in Xn.
|
||||||
|
#[inline(always)]
|
||||||
|
fn ret_reg64<'a>(buf: &mut Vec<'a, u8>, xn: AArch64GPReg) {
|
||||||
|
buf.extend(&build_instruction(AArch64Instruction::Branch(
|
||||||
|
BranchGroup::UnconditionBranchReg {
|
||||||
|
opc: 0b0010,
|
||||||
|
op2: 0b11111,
|
||||||
|
op3: 0b000000,
|
||||||
|
reg_n: xn,
|
||||||
|
op4: 0b000,
|
||||||
|
},
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
const TEST_U16: u16 = 0x1234;
|
||||||
|
//const TEST_I32: i32 = 0x12345678;
|
||||||
|
//const TEST_I64: i64 = 0x12345678_9ABCDEF0;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_add_reg64_reg64_reg64() {
|
||||||
|
let arena = bumpalo::Bump::new();
|
||||||
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
|
add_reg64_reg64_reg64(
|
||||||
|
&mut buf,
|
||||||
|
AArch64GPReg::X10,
|
||||||
|
AArch64GPReg::ZRSP,
|
||||||
|
AArch64GPReg::X21,
|
||||||
|
);
|
||||||
|
assert_eq!(&buf, &[0xAA, 0x02, 0x1F, 0x8B]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_add_reg64_reg64_imm12() {
|
||||||
|
let arena = bumpalo::Bump::new();
|
||||||
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
|
add_reg64_reg64_imm12(&mut buf, AArch64GPReg::X10, AArch64GPReg::X21, 0x123);
|
||||||
|
assert_eq!(&buf, &[0xAA, 0x8E, 0x04, 0x91]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ldr_reg64_imm12() {
|
||||||
|
let arena = bumpalo::Bump::new();
|
||||||
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
|
ldr_reg64_imm12(&mut buf, AArch64GPReg::X21, AArch64GPReg::ZRSP, 0x123);
|
||||||
|
assert_eq!(&buf, &[0xF5, 0x8F, 0x44, 0xF9]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_mov_reg64_reg64() {
|
||||||
|
let arena = bumpalo::Bump::new();
|
||||||
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
|
mov_reg64_reg64(&mut buf, AArch64GPReg::X10, AArch64GPReg::X21);
|
||||||
|
assert_eq!(&buf, &[0xEA, 0x03, 0x15, 0xAA]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_movk_reg64_imm16() {
|
||||||
|
let arena = bumpalo::Bump::new();
|
||||||
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
|
movk_reg64_imm16(&mut buf, AArch64GPReg::X21, TEST_U16, 3);
|
||||||
|
assert_eq!(&buf, &[0x95, 0x46, 0xE2, 0xF2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_movz_reg64_imm16() {
|
||||||
|
let arena = bumpalo::Bump::new();
|
||||||
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
|
movz_reg64_imm16(&mut buf, AArch64GPReg::X21, TEST_U16, 3);
|
||||||
|
assert_eq!(&buf, &[0x95, 0x46, 0xE2, 0xD2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_str_reg64_imm12() {
|
||||||
|
let arena = bumpalo::Bump::new();
|
||||||
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
|
str_reg64_imm12(&mut buf, AArch64GPReg::X21, AArch64GPReg::ZRSP, 0x123);
|
||||||
|
assert_eq!(&buf, &[0xF5, 0x8F, 0x04, 0xF9]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sub_reg64_reg64_imm12() {
|
||||||
|
let arena = bumpalo::Bump::new();
|
||||||
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
|
sub_reg64_reg64_imm12(&mut buf, AArch64GPReg::X10, AArch64GPReg::X21, 0x123);
|
||||||
|
assert_eq!(&buf, &[0xAA, 0x8E, 0x04, 0xD1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_ret_reg64() {
|
||||||
|
let arena = bumpalo::Bump::new();
|
||||||
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
|
ret_reg64(&mut buf, AArch64GPReg::LR);
|
||||||
|
assert_eq!(&buf, &[0xC0, 0x03, 0x5F, 0xD6]);
|
||||||
|
}
|
||||||
|
}
|
@ -1,49 +1,61 @@
|
|||||||
use crate::{Backend, Env, Relocation};
|
use crate::{Backend, Env, Relocation};
|
||||||
use bumpalo::collections::Vec;
|
use bumpalo::collections::Vec;
|
||||||
use roc_collections::all::{ImSet, MutMap, MutSet};
|
use roc_collections::all::{MutMap, MutSet};
|
||||||
use roc_module::symbol::Symbol;
|
use roc_module::symbol::Symbol;
|
||||||
use roc_mono::ir::{Literal, Stmt};
|
use roc_mono::ir::{Literal, Stmt};
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use target_lexicon::Triple;
|
use target_lexicon::Triple;
|
||||||
|
|
||||||
|
pub mod aarch64;
|
||||||
pub mod x86_64;
|
pub mod x86_64;
|
||||||
|
|
||||||
pub trait CallConv<GPReg> {
|
pub trait CallConv<GPReg: GPRegTrait> {
|
||||||
fn gp_param_regs() -> &'static [GPReg];
|
const GP_PARAM_REGS: &'static [GPReg];
|
||||||
fn gp_return_regs() -> &'static [GPReg];
|
const GP_RETURN_REGS: &'static [GPReg];
|
||||||
fn gp_default_free_regs() -> &'static [GPReg];
|
const GP_DEFAULT_FREE_REGS: &'static [GPReg];
|
||||||
|
|
||||||
// A linear scan of an array may be faster than a set technically.
|
const SHADOW_SPACE_SIZE: u8;
|
||||||
// That being said, fastest would likely be a trait based on calling convention/register.
|
|
||||||
fn caller_saved_regs() -> ImSet<GPReg>;
|
|
||||||
fn callee_saved_regs() -> ImSet<GPReg>;
|
|
||||||
|
|
||||||
fn stack_pointer() -> GPReg;
|
fn callee_saved(reg: &GPReg) -> bool;
|
||||||
fn frame_pointer() -> GPReg;
|
#[inline(always)]
|
||||||
|
fn caller_saved_regs(reg: &GPReg) -> bool {
|
||||||
|
!Self::callee_saved(reg)
|
||||||
|
}
|
||||||
|
|
||||||
fn shadow_space_size() -> u8;
|
fn setup_stack<'a>(
|
||||||
// It may be worth ignoring the red zone and keeping things simpler.
|
buf: &mut Vec<'a, u8>,
|
||||||
fn red_zone_size() -> u8;
|
leaf_function: bool,
|
||||||
|
saved_regs: &[GPReg],
|
||||||
|
requested_stack_size: i32,
|
||||||
|
) -> Result<i32, String>;
|
||||||
|
fn cleanup_stack<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
leaf_function: bool,
|
||||||
|
saved_regs: &[GPReg],
|
||||||
|
aligned_stack_size: i32,
|
||||||
|
) -> Result<(), String>;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait Assembler<GPReg> {
|
/// Assembler contains calls to the backend assembly generator.
|
||||||
fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
|
/// These calls do not necessarily map directly to a single assembly instruction.
|
||||||
fn add_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
|
/// They are higher level in cases where an instruction would not be common and shared between multiple architectures.
|
||||||
fn cmovl_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
|
/// Thus, some backends will need to use mulitiple instructions to preform a single one of this calls.
|
||||||
fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
|
/// Generally, I prefer explicit sources, as opposed to dst being one of the sources. Ex: `x = x + y` would be `add x, x, y` instead of `add x, y`.
|
||||||
fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64);
|
/// dst should always come before sources.
|
||||||
fn mov_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
|
pub trait Assembler<GPReg: GPRegTrait> {
|
||||||
fn mov_register64bit_stackoffset32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32);
|
fn abs_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
|
||||||
fn mov_stackoffset32bit_register64bit<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg);
|
fn add_reg64_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, imm32: i32);
|
||||||
fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
|
fn add_reg64_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, src2: GPReg);
|
||||||
|
fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i64);
|
||||||
|
fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg);
|
||||||
|
fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, offset: i32);
|
||||||
|
fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: GPReg);
|
||||||
|
fn sub_reg64_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src1: GPReg, imm32: i32);
|
||||||
fn ret<'a>(buf: &mut Vec<'a, u8>);
|
fn ret<'a>(buf: &mut Vec<'a, u8>);
|
||||||
fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32);
|
|
||||||
fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
|
|
||||||
fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq)]
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
enum SymbolStorage<GPReg> {
|
enum SymbolStorage<GPReg: GPRegTrait> {
|
||||||
// These may need layout, but I am not sure.
|
// These may need layout, but I am not sure.
|
||||||
// I think whenever a symbol would be used, we specify layout anyways.
|
// I think whenever a symbol would be used, we specify layout anyways.
|
||||||
GPRegeg(GPReg),
|
GPRegeg(GPReg),
|
||||||
@ -69,7 +81,7 @@ pub struct Backend64Bit<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallCo
|
|||||||
literal_map: MutMap<Symbol, Literal<'a>>,
|
literal_map: MutMap<Symbol, Literal<'a>>,
|
||||||
|
|
||||||
// This should probably be smarter than a vec.
|
// This should probably be smarter than a vec.
|
||||||
// There are certain registers we should always use first. With pushing and poping, this could get mixed.
|
// There are certain registers we should always use first. With pushing and popping, this could get mixed.
|
||||||
gp_free_regs: Vec<'a, GPReg>,
|
gp_free_regs: Vec<'a, GPReg>,
|
||||||
|
|
||||||
// The last major thing we need is a way to decide what reg to free when all of them are full.
|
// The last major thing we need is a way to decide what reg to free when all of them are full.
|
||||||
@ -109,7 +121,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn reset(&mut self) {
|
fn reset(&mut self) {
|
||||||
self.stack_size = -(CC::red_zone_size() as i32);
|
self.stack_size = 0;
|
||||||
self.leaf_function = true;
|
self.leaf_function = true;
|
||||||
self.last_seen_map.clear();
|
self.last_seen_map.clear();
|
||||||
self.free_map.clear();
|
self.free_map.clear();
|
||||||
@ -119,13 +131,12 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
|
|||||||
self.gp_free_regs.clear();
|
self.gp_free_regs.clear();
|
||||||
self.gp_used_regs.clear();
|
self.gp_used_regs.clear();
|
||||||
self.gp_free_regs
|
self.gp_free_regs
|
||||||
.extend_from_slice(CC::gp_default_free_regs());
|
.extend_from_slice(CC::GP_DEFAULT_FREE_REGS);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn set_not_leaf_function(&mut self) {
|
fn set_not_leaf_function(&mut self) {
|
||||||
self.leaf_function = false;
|
self.leaf_function = false;
|
||||||
// If this is not a leaf function, it can't use the shadow space.
|
self.stack_size = CC::SHADOW_SPACE_SIZE as i32;
|
||||||
self.stack_size = CC::shadow_space_size() as i32 - CC::red_zone_size() as i32;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn literal_map(&mut self) -> &mut MutMap<Symbol, Literal<'a>> {
|
fn literal_map(&mut self) -> &mut MutMap<Symbol, Literal<'a>> {
|
||||||
@ -147,38 +158,17 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
|
|||||||
fn finalize(&mut self) -> Result<(&'a [u8], &[Relocation]), String> {
|
fn finalize(&mut self) -> Result<(&'a [u8], &[Relocation]), String> {
|
||||||
let mut out = bumpalo::vec![in self.env.arena];
|
let mut out = bumpalo::vec![in self.env.arena];
|
||||||
|
|
||||||
if !self.leaf_function {
|
// Setup stack.
|
||||||
// I believe that this will have to move away from push and to mov to be generic across backends.
|
let mut used_regs = bumpalo::vec![in self.env.arena];
|
||||||
ASM::push_register64bit(&mut out, CC::frame_pointer());
|
used_regs.extend(&self.used_callee_saved_regs);
|
||||||
ASM::mov_register64bit_register64bit(
|
let aligned_stack_size =
|
||||||
&mut out,
|
CC::setup_stack(&mut out, self.leaf_function, &used_regs, self.stack_size)?;
|
||||||
CC::frame_pointer(),
|
|
||||||
CC::stack_pointer(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
// Save data in all callee saved regs.
|
|
||||||
let mut pop_order = bumpalo::vec![in self.env.arena];
|
|
||||||
for reg in &self.used_callee_saved_regs {
|
|
||||||
ASM::push_register64bit(&mut out, *reg);
|
|
||||||
pop_order.push(*reg);
|
|
||||||
}
|
|
||||||
if self.stack_size > 0 {
|
|
||||||
ASM::sub_register64bit_immediate32bit(&mut out, CC::stack_pointer(), self.stack_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add function body.
|
// Add function body.
|
||||||
out.extend(&self.buf);
|
out.extend(&self.buf);
|
||||||
|
|
||||||
if self.stack_size > 0 {
|
// Cleanup stack.
|
||||||
ASM::add_register64bit_immediate32bit(&mut out, CC::stack_pointer(), self.stack_size);
|
CC::cleanup_stack(&mut out, self.leaf_function, &used_regs, aligned_stack_size)?;
|
||||||
}
|
|
||||||
// Restore data in callee saved regs.
|
|
||||||
while let Some(reg) = pop_order.pop() {
|
|
||||||
ASM::pop_register64bit(&mut out, reg);
|
|
||||||
}
|
|
||||||
if !self.leaf_function {
|
|
||||||
ASM::pop_register64bit(&mut out, CC::frame_pointer());
|
|
||||||
}
|
|
||||||
ASM::ret(&mut out);
|
ASM::ret(&mut out);
|
||||||
|
|
||||||
Ok((out.into_bump_slice(), &[]))
|
Ok((out.into_bump_slice(), &[]))
|
||||||
@ -187,9 +177,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
|
|||||||
fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String> {
|
fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String> {
|
||||||
let dst_reg = self.claim_gp_reg(dst)?;
|
let dst_reg = self.claim_gp_reg(dst)?;
|
||||||
let src_reg = self.load_to_reg(src)?;
|
let src_reg = self.load_to_reg(src)?;
|
||||||
ASM::mov_register64bit_register64bit(&mut self.buf, dst_reg, src_reg);
|
ASM::abs_reg64_reg64(&mut self.buf, dst_reg, src_reg);
|
||||||
ASM::neg_register64bit(&mut self.buf, dst_reg);
|
|
||||||
ASM::cmovl_register64bit_register64bit(&mut self.buf, dst_reg, src_reg);
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -201,9 +189,8 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
|
|||||||
) -> Result<(), String> {
|
) -> Result<(), String> {
|
||||||
let dst_reg = self.claim_gp_reg(dst)?;
|
let dst_reg = self.claim_gp_reg(dst)?;
|
||||||
let src1_reg = self.load_to_reg(src1)?;
|
let src1_reg = self.load_to_reg(src1)?;
|
||||||
ASM::mov_register64bit_register64bit(&mut self.buf, dst_reg, src1_reg);
|
|
||||||
let src2_reg = self.load_to_reg(src2)?;
|
let src2_reg = self.load_to_reg(src2)?;
|
||||||
ASM::add_register64bit_register64bit(&mut self.buf, dst_reg, src2_reg);
|
ASM::add_reg64_reg64_reg64(&mut self.buf, dst_reg, src1_reg, src2_reg);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -212,7 +199,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
|
|||||||
Literal::Int(x) => {
|
Literal::Int(x) => {
|
||||||
let reg = self.claim_gp_reg(sym)?;
|
let reg = self.claim_gp_reg(sym)?;
|
||||||
let val = *x;
|
let val = *x;
|
||||||
ASM::mov_register64bit_immediate64bit(&mut self.buf, reg, val);
|
ASM::mov_reg64_imm64(&mut self.buf, reg, val);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
x => Err(format!("loading literal, {:?}, is not yet implemented", x)),
|
x => Err(format!("loading literal, {:?}, is not yet implemented", x)),
|
||||||
@ -234,11 +221,11 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
|
|||||||
fn return_symbol(&mut self, sym: &Symbol) -> Result<(), String> {
|
fn return_symbol(&mut self, sym: &Symbol) -> Result<(), String> {
|
||||||
let val = self.symbols_map.get(sym);
|
let val = self.symbols_map.get(sym);
|
||||||
match val {
|
match val {
|
||||||
Some(SymbolStorage::GPRegeg(reg)) if *reg == CC::gp_return_regs()[0] => Ok(()),
|
Some(SymbolStorage::GPRegeg(reg)) if *reg == CC::GP_RETURN_REGS[0] => Ok(()),
|
||||||
Some(SymbolStorage::GPRegeg(reg)) => {
|
Some(SymbolStorage::GPRegeg(reg)) => {
|
||||||
// If it fits in a general purpose register, just copy it over to.
|
// If it fits in a general purpose register, just copy it over to.
|
||||||
// Technically this can be optimized to produce shorter instructions if less than 64bits.
|
// Technically this can be optimized to produce shorter instructions if less than 64bits.
|
||||||
ASM::mov_register64bit_register64bit(&mut self.buf, CC::gp_return_regs()[0], *reg);
|
ASM::mov_reg64_reg64(&mut self.buf, CC::GP_RETURN_REGS[0], *reg);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
Some(x) => Err(format!(
|
Some(x) => Err(format!(
|
||||||
@ -258,7 +245,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
|
|||||||
fn claim_gp_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
|
fn claim_gp_reg(&mut self, sym: &Symbol) -> Result<GPReg, String> {
|
||||||
let reg = if !self.gp_free_regs.is_empty() {
|
let reg = if !self.gp_free_regs.is_empty() {
|
||||||
let free_reg = self.gp_free_regs.pop().unwrap();
|
let free_reg = self.gp_free_regs.pop().unwrap();
|
||||||
if CC::callee_saved_regs().contains(&free_reg) {
|
if CC::callee_saved(&free_reg) {
|
||||||
self.used_callee_saved_regs.insert(free_reg);
|
self.used_callee_saved_regs.insert(free_reg);
|
||||||
}
|
}
|
||||||
Ok(free_reg)
|
Ok(free_reg)
|
||||||
@ -291,7 +278,7 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
|
|||||||
let reg = self.claim_gp_reg(sym)?;
|
let reg = self.claim_gp_reg(sym)?;
|
||||||
self.symbols_map
|
self.symbols_map
|
||||||
.insert(*sym, SymbolStorage::StackAndGPRegeg(reg, offset));
|
.insert(*sym, SymbolStorage::StackAndGPRegeg(reg, offset));
|
||||||
ASM::mov_register64bit_stackoffset32bit(&mut self.buf, reg, offset as i32);
|
ASM::mov_reg64_stack32(&mut self.buf, reg, offset as i32);
|
||||||
Ok(reg)
|
Ok(reg)
|
||||||
}
|
}
|
||||||
None => Err(format!("Unknown symbol: {}", sym)),
|
None => Err(format!("Unknown symbol: {}", sym)),
|
||||||
@ -302,19 +289,9 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
|
|||||||
let val = self.symbols_map.remove(sym);
|
let val = self.symbols_map.remove(sym);
|
||||||
match val {
|
match val {
|
||||||
Some(SymbolStorage::GPRegeg(reg)) => {
|
Some(SymbolStorage::GPRegeg(reg)) => {
|
||||||
let offset = self.stack_size;
|
let offset = self.increase_stack_size(8)?;
|
||||||
self.stack_size += 8;
|
ASM::mov_stack32_reg64(&mut self.buf, offset as i32, reg);
|
||||||
if let Some(size) = self.stack_size.checked_add(8) {
|
self.symbols_map.insert(*sym, SymbolStorage::Stack(offset));
|
||||||
self.stack_size = size;
|
|
||||||
} else {
|
|
||||||
return Err(format!(
|
|
||||||
"Ran out of stack space while saving symbol: {}",
|
|
||||||
sym
|
|
||||||
));
|
|
||||||
}
|
|
||||||
ASM::mov_stackoffset32bit_register64bit(&mut self.buf, offset as i32, reg);
|
|
||||||
self.symbols_map
|
|
||||||
.insert(*sym, SymbolStorage::Stack(offset as i32));
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
Some(SymbolStorage::StackAndGPRegeg(_, offset)) => {
|
Some(SymbolStorage::StackAndGPRegeg(_, offset)) => {
|
||||||
@ -328,4 +305,16 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>>
|
|||||||
None => Err(format!("Unknown symbol: {}", sym)),
|
None => Err(format!("Unknown symbol: {}", sym)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// increase_stack_size increase the current stack size and returns the offset of the stack.
|
||||||
|
fn increase_stack_size(&mut self, amount: i32) -> Result<i32, String> {
|
||||||
|
debug_assert!(amount > 0);
|
||||||
|
let offset = self.stack_size;
|
||||||
|
if let Some(new_size) = self.stack_size.checked_add(amount) {
|
||||||
|
self.stack_size = new_size;
|
||||||
|
Ok(offset)
|
||||||
|
} else {
|
||||||
|
Err("Ran out of stack space".to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
use crate::generic64::{Assembler, CallConv, GPRegTrait};
|
use crate::generic64::{Assembler, CallConv, GPRegTrait};
|
||||||
use bumpalo::collections::Vec;
|
use bumpalo::collections::Vec;
|
||||||
use roc_collections::all::ImSet;
|
|
||||||
|
|
||||||
// Not sure exactly how I want to represent registers.
|
// Not sure exactly how I want to represent registers.
|
||||||
// If we want max speed, we would likely make them structs that impl the same trait to avoid ifs.
|
// If we want max speed, we would likely make them structs that impl the same trait to avoid ifs.
|
||||||
@ -26,10 +25,312 @@ pub enum X86_64GPReg {
|
|||||||
|
|
||||||
impl GPRegTrait for X86_64GPReg {}
|
impl GPRegTrait for X86_64GPReg {}
|
||||||
|
|
||||||
|
pub struct X86_64Assembler {}
|
||||||
|
pub struct X86_64WindowsFastcall {}
|
||||||
|
pub struct X86_64SystemV {}
|
||||||
|
|
||||||
|
const STACK_ALIGNMENT: u8 = 16;
|
||||||
|
|
||||||
|
impl CallConv<X86_64GPReg> for X86_64SystemV {
|
||||||
|
const GP_PARAM_REGS: &'static [X86_64GPReg] = &[
|
||||||
|
X86_64GPReg::RDI,
|
||||||
|
X86_64GPReg::RSI,
|
||||||
|
X86_64GPReg::RDX,
|
||||||
|
X86_64GPReg::RCX,
|
||||||
|
X86_64GPReg::R8,
|
||||||
|
X86_64GPReg::R9,
|
||||||
|
];
|
||||||
|
const GP_RETURN_REGS: &'static [X86_64GPReg] = &[X86_64GPReg::RAX, X86_64GPReg::RDX];
|
||||||
|
|
||||||
|
const GP_DEFAULT_FREE_REGS: &'static [X86_64GPReg] = &[
|
||||||
|
// The regs we want to use first should be at the end of this vec.
|
||||||
|
// We will use pop to get which reg to use next
|
||||||
|
// Use callee saved regs last.
|
||||||
|
X86_64GPReg::RBX,
|
||||||
|
// Don't use frame pointer: X86_64GPReg::RBP,
|
||||||
|
X86_64GPReg::R12,
|
||||||
|
X86_64GPReg::R13,
|
||||||
|
X86_64GPReg::R14,
|
||||||
|
X86_64GPReg::R15,
|
||||||
|
// Use caller saved regs first.
|
||||||
|
X86_64GPReg::RAX,
|
||||||
|
X86_64GPReg::RCX,
|
||||||
|
X86_64GPReg::RDX,
|
||||||
|
// Don't use stack pionter: X86_64GPReg::RSP,
|
||||||
|
X86_64GPReg::RSI,
|
||||||
|
X86_64GPReg::RDI,
|
||||||
|
X86_64GPReg::R8,
|
||||||
|
X86_64GPReg::R9,
|
||||||
|
X86_64GPReg::R10,
|
||||||
|
X86_64GPReg::R11,
|
||||||
|
];
|
||||||
|
const SHADOW_SPACE_SIZE: u8 = 0;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn callee_saved(reg: &X86_64GPReg) -> bool {
|
||||||
|
matches!(
|
||||||
|
reg,
|
||||||
|
X86_64GPReg::RBX
|
||||||
|
| X86_64GPReg::RBP
|
||||||
|
| X86_64GPReg::R12
|
||||||
|
| X86_64GPReg::R13
|
||||||
|
| X86_64GPReg::R14
|
||||||
|
| X86_64GPReg::R15
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn setup_stack<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
leaf_function: bool,
|
||||||
|
saved_regs: &[X86_64GPReg],
|
||||||
|
requested_stack_size: i32,
|
||||||
|
) -> Result<i32, String> {
|
||||||
|
x86_64_generic_setup_stack(buf, leaf_function, saved_regs, requested_stack_size)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn cleanup_stack<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
leaf_function: bool,
|
||||||
|
saved_regs: &[X86_64GPReg],
|
||||||
|
aligned_stack_size: i32,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
x86_64_generic_cleanup_stack(buf, leaf_function, saved_regs, aligned_stack_size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CallConv<X86_64GPReg> for X86_64WindowsFastcall {
|
||||||
|
const GP_PARAM_REGS: &'static [X86_64GPReg] = &[
|
||||||
|
X86_64GPReg::RCX,
|
||||||
|
X86_64GPReg::RDX,
|
||||||
|
X86_64GPReg::R8,
|
||||||
|
X86_64GPReg::R9,
|
||||||
|
];
|
||||||
|
const GP_RETURN_REGS: &'static [X86_64GPReg] = &[X86_64GPReg::RAX];
|
||||||
|
const GP_DEFAULT_FREE_REGS: &'static [X86_64GPReg] = &[
|
||||||
|
// The regs we want to use first should be at the end of this vec.
|
||||||
|
// We will use pop to get which reg to use next
|
||||||
|
|
||||||
|
// Don't use stack pionter: X86_64GPReg::RSP,
|
||||||
|
// Don't use frame pointer: X86_64GPReg::RBP,
|
||||||
|
|
||||||
|
// Use callee saved regs last.
|
||||||
|
X86_64GPReg::RBX,
|
||||||
|
X86_64GPReg::RSI,
|
||||||
|
X86_64GPReg::RDI,
|
||||||
|
X86_64GPReg::R12,
|
||||||
|
X86_64GPReg::R13,
|
||||||
|
X86_64GPReg::R14,
|
||||||
|
X86_64GPReg::R15,
|
||||||
|
// Use caller saved regs first.
|
||||||
|
X86_64GPReg::RAX,
|
||||||
|
X86_64GPReg::RCX,
|
||||||
|
X86_64GPReg::RDX,
|
||||||
|
X86_64GPReg::R8,
|
||||||
|
X86_64GPReg::R9,
|
||||||
|
X86_64GPReg::R10,
|
||||||
|
X86_64GPReg::R11,
|
||||||
|
];
|
||||||
|
const SHADOW_SPACE_SIZE: u8 = 32;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn callee_saved(reg: &X86_64GPReg) -> bool {
|
||||||
|
matches!(
|
||||||
|
reg,
|
||||||
|
X86_64GPReg::RBX
|
||||||
|
| X86_64GPReg::RBP
|
||||||
|
| X86_64GPReg::RSI
|
||||||
|
| X86_64GPReg::RSP
|
||||||
|
| X86_64GPReg::RDI
|
||||||
|
| X86_64GPReg::R12
|
||||||
|
| X86_64GPReg::R13
|
||||||
|
| X86_64GPReg::R14
|
||||||
|
| X86_64GPReg::R15
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn setup_stack<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
leaf_function: bool,
|
||||||
|
saved_regs: &[X86_64GPReg],
|
||||||
|
requested_stack_size: i32,
|
||||||
|
) -> Result<i32, String> {
|
||||||
|
x86_64_generic_setup_stack(buf, leaf_function, saved_regs, requested_stack_size)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn cleanup_stack<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
leaf_function: bool,
|
||||||
|
saved_regs: &[X86_64GPReg],
|
||||||
|
aligned_stack_size: i32,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
x86_64_generic_cleanup_stack(buf, leaf_function, saved_regs, aligned_stack_size)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn x86_64_generic_setup_stack<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
leaf_function: bool,
|
||||||
|
saved_regs: &[X86_64GPReg],
|
||||||
|
requested_stack_size: i32,
|
||||||
|
) -> Result<i32, String> {
|
||||||
|
if !leaf_function {
|
||||||
|
X86_64Assembler::push_reg64(buf, X86_64GPReg::RBP);
|
||||||
|
X86_64Assembler::mov_reg64_reg64(buf, X86_64GPReg::RBP, X86_64GPReg::RSP);
|
||||||
|
}
|
||||||
|
for reg in saved_regs {
|
||||||
|
X86_64Assembler::push_reg64(buf, *reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// full size is upcast to i64 to make sure we don't overflow here.
|
||||||
|
let full_size = 8 * saved_regs.len() as i64 + requested_stack_size as i64;
|
||||||
|
let alignment = if full_size <= 0 {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
full_size % STACK_ALIGNMENT as i64
|
||||||
|
};
|
||||||
|
let offset = if alignment == 0 {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
STACK_ALIGNMENT - alignment as u8
|
||||||
|
};
|
||||||
|
if let Some(aligned_stack_size) = requested_stack_size.checked_add(offset as i32) {
|
||||||
|
if aligned_stack_size > 0 {
|
||||||
|
X86_64Assembler::sub_reg64_reg64_imm32(
|
||||||
|
buf,
|
||||||
|
X86_64GPReg::RSP,
|
||||||
|
X86_64GPReg::RSP,
|
||||||
|
aligned_stack_size,
|
||||||
|
);
|
||||||
|
Ok(aligned_stack_size)
|
||||||
|
} else {
|
||||||
|
Ok(0)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Err("Ran out of stack space".to_string())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn x86_64_generic_cleanup_stack<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
leaf_function: bool,
|
||||||
|
saved_regs: &[X86_64GPReg],
|
||||||
|
aligned_stack_size: i32,
|
||||||
|
) -> Result<(), String> {
|
||||||
|
if aligned_stack_size > 0 {
|
||||||
|
X86_64Assembler::add_reg64_reg64_imm32(
|
||||||
|
buf,
|
||||||
|
X86_64GPReg::RSP,
|
||||||
|
X86_64GPReg::RSP,
|
||||||
|
aligned_stack_size,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
for reg in saved_regs.iter().rev() {
|
||||||
|
X86_64Assembler::pop_reg64(buf, *reg);
|
||||||
|
}
|
||||||
|
if !leaf_function {
|
||||||
|
X86_64Assembler::mov_reg64_reg64(buf, X86_64GPReg::RSP, X86_64GPReg::RBP);
|
||||||
|
X86_64Assembler::pop_reg64(buf, X86_64GPReg::RBP);
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Assembler<X86_64GPReg> for X86_64Assembler {
|
||||||
|
// These functions should map to the raw assembly functions below.
|
||||||
|
// In some cases, that means you can just directly call one of the direct assembly functions.
|
||||||
|
#[inline(always)]
|
||||||
|
fn abs_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
|
||||||
|
mov_reg64_reg64(buf, dst, src);
|
||||||
|
neg_reg64(buf, dst);
|
||||||
|
cmovl_reg64_reg64(buf, dst, src);
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
fn add_reg64_reg64_imm32<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
dst: X86_64GPReg,
|
||||||
|
src1: X86_64GPReg,
|
||||||
|
imm32: i32,
|
||||||
|
) {
|
||||||
|
if dst == src1 {
|
||||||
|
add_reg64_imm32(buf, dst, imm32);
|
||||||
|
} else {
|
||||||
|
mov_reg64_reg64(buf, dst, src1);
|
||||||
|
add_reg64_imm32(buf, dst, imm32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
fn add_reg64_reg64_reg64<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
dst: X86_64GPReg,
|
||||||
|
src1: X86_64GPReg,
|
||||||
|
src2: X86_64GPReg,
|
||||||
|
) {
|
||||||
|
if dst == src1 {
|
||||||
|
add_reg64_reg64(buf, dst, src2);
|
||||||
|
} else if dst == src2 {
|
||||||
|
add_reg64_reg64(buf, dst, src1);
|
||||||
|
} else {
|
||||||
|
mov_reg64_reg64(buf, dst, src1);
|
||||||
|
add_reg64_reg64(buf, dst, src2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) {
|
||||||
|
mov_reg64_imm64(buf, dst, imm);
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
|
||||||
|
mov_reg64_reg64(buf, dst, src);
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, offset: i32) {
|
||||||
|
mov_reg64_stack32(buf, dst, offset);
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: X86_64GPReg) {
|
||||||
|
mov_stack32_reg64(buf, offset, src);
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
fn sub_reg64_reg64_imm32<'a>(
|
||||||
|
buf: &mut Vec<'a, u8>,
|
||||||
|
dst: X86_64GPReg,
|
||||||
|
src1: X86_64GPReg,
|
||||||
|
imm32: i32,
|
||||||
|
) {
|
||||||
|
if dst == src1 {
|
||||||
|
sub_reg64_imm32(buf, dst, imm32);
|
||||||
|
} else {
|
||||||
|
mov_reg64_reg64(buf, dst, src1);
|
||||||
|
sub_reg64_imm32(buf, dst, imm32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
fn ret<'a>(buf: &mut Vec<'a, u8>) {
|
||||||
|
ret(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl X86_64Assembler {
|
||||||
|
#[inline(always)]
|
||||||
|
fn pop_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
|
||||||
|
pop_reg64(buf, reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn push_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
|
||||||
|
push_reg64(buf, reg);
|
||||||
|
}
|
||||||
|
}
|
||||||
const REX: u8 = 0x40;
|
const REX: u8 = 0x40;
|
||||||
const REX_W: u8 = REX + 0x8;
|
const REX_W: u8 = REX + 0x8;
|
||||||
|
|
||||||
fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 {
|
#[inline(always)]
|
||||||
|
const fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 {
|
||||||
if reg as u8 > 7 {
|
if reg as u8 > 7 {
|
||||||
byte + 1
|
byte + 1
|
||||||
} else {
|
} else {
|
||||||
@ -37,11 +338,13 @@ fn add_rm_extension(reg: X86_64GPReg, byte: u8) -> u8 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_opcode_extension(reg: X86_64GPReg, byte: u8) -> u8 {
|
#[inline(always)]
|
||||||
|
const fn add_opcode_extension(reg: X86_64GPReg, byte: u8) -> u8 {
|
||||||
add_rm_extension(reg, byte)
|
add_rm_extension(reg, byte)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 {
|
#[inline(always)]
|
||||||
|
const fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 {
|
||||||
if reg as u8 > 7 {
|
if reg as u8 > 7 {
|
||||||
byte + 4
|
byte + 4
|
||||||
} else {
|
} else {
|
||||||
@ -49,220 +352,58 @@ fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct X86_64Assembler {}
|
// Below here are the functions for all of the assembly instructions.
|
||||||
pub struct X86_64WindowsFastcall {}
|
// Their names are based on the instruction and operators combined.
|
||||||
pub struct X86_64SystemV {}
|
// You should call `buf.reserve()` if you push or extend more than once.
|
||||||
|
// Unit tests are added at the bottom of the file to ensure correct asm generation.
|
||||||
|
// Please keep these in alphanumeric order.
|
||||||
|
|
||||||
impl CallConv<X86_64GPReg> for X86_64SystemV {
|
/// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64.
|
||||||
fn gp_param_regs() -> &'static [X86_64GPReg] {
|
#[inline(always)]
|
||||||
&[
|
fn add_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
|
||||||
X86_64GPReg::RDI,
|
|
||||||
X86_64GPReg::RSI,
|
|
||||||
X86_64GPReg::RDX,
|
|
||||||
X86_64GPReg::RCX,
|
|
||||||
X86_64GPReg::R8,
|
|
||||||
X86_64GPReg::R9,
|
|
||||||
]
|
|
||||||
}
|
|
||||||
fn gp_return_regs() -> &'static [X86_64GPReg] {
|
|
||||||
&[X86_64GPReg::RAX, X86_64GPReg::RDX]
|
|
||||||
}
|
|
||||||
fn gp_default_free_regs() -> &'static [X86_64GPReg] {
|
|
||||||
&[
|
|
||||||
// The regs we want to use first should be at the end of this vec.
|
|
||||||
// We will use pop to get which reg to use next
|
|
||||||
// Use callee saved regs last.
|
|
||||||
X86_64GPReg::RBX,
|
|
||||||
// Don't use frame pointer: X86_64GPReg::RBP,
|
|
||||||
X86_64GPReg::R12,
|
|
||||||
X86_64GPReg::R13,
|
|
||||||
X86_64GPReg::R14,
|
|
||||||
X86_64GPReg::R15,
|
|
||||||
// Use caller saved regs first.
|
|
||||||
X86_64GPReg::RAX,
|
|
||||||
X86_64GPReg::RCX,
|
|
||||||
X86_64GPReg::RDX,
|
|
||||||
// Don't use stack pionter: X86_64GPReg::RSP,
|
|
||||||
X86_64GPReg::RSI,
|
|
||||||
X86_64GPReg::RDI,
|
|
||||||
X86_64GPReg::R8,
|
|
||||||
X86_64GPReg::R9,
|
|
||||||
X86_64GPReg::R10,
|
|
||||||
X86_64GPReg::R11,
|
|
||||||
]
|
|
||||||
}
|
|
||||||
fn caller_saved_regs() -> ImSet<X86_64GPReg> {
|
|
||||||
// TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
|
|
||||||
ImSet::from(vec![
|
|
||||||
X86_64GPReg::RAX,
|
|
||||||
X86_64GPReg::RCX,
|
|
||||||
X86_64GPReg::RDX,
|
|
||||||
X86_64GPReg::RSP,
|
|
||||||
X86_64GPReg::RSI,
|
|
||||||
X86_64GPReg::RDI,
|
|
||||||
X86_64GPReg::R8,
|
|
||||||
X86_64GPReg::R9,
|
|
||||||
X86_64GPReg::R10,
|
|
||||||
X86_64GPReg::R11,
|
|
||||||
])
|
|
||||||
}
|
|
||||||
fn callee_saved_regs() -> ImSet<X86_64GPReg> {
|
|
||||||
// TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
|
|
||||||
ImSet::from(vec![
|
|
||||||
X86_64GPReg::RBX,
|
|
||||||
X86_64GPReg::RBP,
|
|
||||||
X86_64GPReg::R12,
|
|
||||||
X86_64GPReg::R13,
|
|
||||||
X86_64GPReg::R14,
|
|
||||||
X86_64GPReg::R15,
|
|
||||||
])
|
|
||||||
}
|
|
||||||
fn stack_pointer() -> X86_64GPReg {
|
|
||||||
X86_64GPReg::RSP
|
|
||||||
}
|
|
||||||
fn frame_pointer() -> X86_64GPReg {
|
|
||||||
X86_64GPReg::RBP
|
|
||||||
}
|
|
||||||
fn shadow_space_size() -> u8 {
|
|
||||||
0
|
|
||||||
}
|
|
||||||
fn red_zone_size() -> u8 {
|
|
||||||
128
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CallConv<X86_64GPReg> for X86_64WindowsFastcall {
|
|
||||||
fn gp_param_regs() -> &'static [X86_64GPReg] {
|
|
||||||
&[
|
|
||||||
X86_64GPReg::RCX,
|
|
||||||
X86_64GPReg::RDX,
|
|
||||||
X86_64GPReg::R8,
|
|
||||||
X86_64GPReg::R9,
|
|
||||||
]
|
|
||||||
}
|
|
||||||
fn gp_return_regs() -> &'static [X86_64GPReg] {
|
|
||||||
&[X86_64GPReg::RAX]
|
|
||||||
}
|
|
||||||
fn gp_default_free_regs() -> &'static [X86_64GPReg] {
|
|
||||||
&[
|
|
||||||
// The regs we want to use first should be at the end of this vec.
|
|
||||||
// We will use pop to get which reg to use next
|
|
||||||
// Use callee saved regs last.
|
|
||||||
X86_64GPReg::RBX,
|
|
||||||
// Don't use frame pointer: X86_64GPReg::RBP,
|
|
||||||
X86_64GPReg::RSI,
|
|
||||||
// Don't use stack pionter: X86_64GPReg::RSP,
|
|
||||||
X86_64GPReg::RDI,
|
|
||||||
X86_64GPReg::R12,
|
|
||||||
X86_64GPReg::R13,
|
|
||||||
X86_64GPReg::R14,
|
|
||||||
X86_64GPReg::R15,
|
|
||||||
// Use caller saved regs first.
|
|
||||||
X86_64GPReg::RAX,
|
|
||||||
X86_64GPReg::RCX,
|
|
||||||
X86_64GPReg::RDX,
|
|
||||||
X86_64GPReg::R8,
|
|
||||||
X86_64GPReg::R9,
|
|
||||||
X86_64GPReg::R10,
|
|
||||||
X86_64GPReg::R11,
|
|
||||||
]
|
|
||||||
}
|
|
||||||
fn caller_saved_regs() -> ImSet<X86_64GPReg> {
|
|
||||||
// TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
|
|
||||||
ImSet::from(vec![
|
|
||||||
X86_64GPReg::RAX,
|
|
||||||
X86_64GPReg::RCX,
|
|
||||||
X86_64GPReg::RDX,
|
|
||||||
X86_64GPReg::R8,
|
|
||||||
X86_64GPReg::R9,
|
|
||||||
X86_64GPReg::R10,
|
|
||||||
X86_64GPReg::R11,
|
|
||||||
])
|
|
||||||
}
|
|
||||||
fn callee_saved_regs() -> ImSet<X86_64GPReg> {
|
|
||||||
// TODO: stop using vec! here. I was just have trouble with some errors, but it shouldn't be needed.
|
|
||||||
ImSet::from(vec![
|
|
||||||
X86_64GPReg::RBX,
|
|
||||||
X86_64GPReg::RBP,
|
|
||||||
X86_64GPReg::RSI,
|
|
||||||
X86_64GPReg::RSP,
|
|
||||||
X86_64GPReg::RDI,
|
|
||||||
X86_64GPReg::R12,
|
|
||||||
X86_64GPReg::R13,
|
|
||||||
X86_64GPReg::R14,
|
|
||||||
X86_64GPReg::R15,
|
|
||||||
])
|
|
||||||
}
|
|
||||||
fn stack_pointer() -> X86_64GPReg {
|
|
||||||
X86_64GPReg::RSP
|
|
||||||
}
|
|
||||||
fn frame_pointer() -> X86_64GPReg {
|
|
||||||
X86_64GPReg::RBP
|
|
||||||
}
|
|
||||||
fn shadow_space_size() -> u8 {
|
|
||||||
32
|
|
||||||
}
|
|
||||||
fn red_zone_size() -> u8 {
|
|
||||||
0
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Assembler<X86_64GPReg> for X86_64Assembler {
|
|
||||||
// Below here are the functions for all of the assembly instructions.
|
|
||||||
// Their names are based on the instruction and operators combined.
|
|
||||||
// You should call `buf.reserve()` if you push or extend more than once.
|
|
||||||
// Unit tests are added at the bottom of the file to ensure correct asm generation.
|
|
||||||
// Please keep these in alphanumeric order.
|
|
||||||
|
|
||||||
/// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64.
|
|
||||||
fn add_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
|
|
||||||
// This can be optimized if the immediate is 1 byte.
|
// This can be optimized if the immediate is 1 byte.
|
||||||
let rex = add_rm_extension(dst, REX_W);
|
let rex = add_rm_extension(dst, REX_W);
|
||||||
let dst_mod = dst as u8 % 8;
|
let dst_mod = dst as u8 % 8;
|
||||||
buf.reserve(7);
|
buf.reserve(7);
|
||||||
buf.extend(&[rex, 0x81, 0xC0 + dst_mod]);
|
buf.extend(&[rex, 0x81, 0xC0 + dst_mod]);
|
||||||
buf.extend(&imm.to_le_bytes());
|
buf.extend(&imm.to_le_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `ADD r/m64,r64` -> Add r64 to r/m64.
|
/// `ADD r/m64,r64` -> Add r64 to r/m64.
|
||||||
fn add_register64bit_register64bit<'a>(
|
#[inline(always)]
|
||||||
buf: &mut Vec<'a, u8>,
|
fn add_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
|
||||||
dst: X86_64GPReg,
|
|
||||||
src: X86_64GPReg,
|
|
||||||
) {
|
|
||||||
let rex = add_rm_extension(dst, REX_W);
|
let rex = add_rm_extension(dst, REX_W);
|
||||||
let rex = add_reg_extension(src, rex);
|
let rex = add_reg_extension(src, rex);
|
||||||
let dst_mod = dst as u8 % 8;
|
let dst_mod = dst as u8 % 8;
|
||||||
let src_mod = (src as u8 % 8) << 3;
|
let src_mod = (src as u8 % 8) << 3;
|
||||||
buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]);
|
buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `CMOVL r64,r/m64` -> Move if less (SF≠ OF).
|
/// `CMOVL r64,r/m64` -> Move if less (SF≠ OF).
|
||||||
fn cmovl_register64bit_register64bit<'a>(
|
#[inline(always)]
|
||||||
buf: &mut Vec<'a, u8>,
|
fn cmovl_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
|
||||||
dst: X86_64GPReg,
|
|
||||||
src: X86_64GPReg,
|
|
||||||
) {
|
|
||||||
let rex = add_reg_extension(dst, REX_W);
|
let rex = add_reg_extension(dst, REX_W);
|
||||||
let rex = add_rm_extension(src, rex);
|
let rex = add_rm_extension(src, rex);
|
||||||
let dst_mod = (dst as u8 % 8) << 3;
|
let dst_mod = (dst as u8 % 8) << 3;
|
||||||
let src_mod = src as u8 % 8;
|
let src_mod = src as u8 % 8;
|
||||||
buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]);
|
buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64.
|
/// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64.
|
||||||
fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
|
#[inline(always)]
|
||||||
|
fn mov_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
|
||||||
let rex = add_rm_extension(dst, REX_W);
|
let rex = add_rm_extension(dst, REX_W);
|
||||||
let dst_mod = dst as u8 % 8;
|
let dst_mod = dst as u8 % 8;
|
||||||
buf.reserve(7);
|
buf.reserve(7);
|
||||||
buf.extend(&[rex, 0xC7, 0xC0 + dst_mod]);
|
buf.extend(&[rex, 0xC7, 0xC0 + dst_mod]);
|
||||||
buf.extend(&imm.to_le_bytes());
|
buf.extend(&imm.to_le_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `MOV r64, imm64` -> Move imm64 to r64.
|
/// `MOV r64, imm64` -> Move imm64 to r64.
|
||||||
fn mov_register64bit_immediate64bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) {
|
#[inline(always)]
|
||||||
|
fn mov_reg64_imm64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i64) {
|
||||||
if imm <= i32::MAX as i64 && imm >= i32::MIN as i64 {
|
if imm <= i32::MAX as i64 && imm >= i32::MIN as i64 {
|
||||||
Self::mov_register64bit_immediate32bit(buf, dst, imm as i32)
|
mov_reg64_imm32(buf, dst, imm as i32)
|
||||||
} else {
|
} else {
|
||||||
let rex = add_opcode_extension(dst, REX_W);
|
let rex = add_opcode_extension(dst, REX_W);
|
||||||
let dst_mod = dst as u8 % 8;
|
let dst_mod = dst as u8 % 8;
|
||||||
@ -270,27 +411,21 @@ impl Assembler<X86_64GPReg> for X86_64Assembler {
|
|||||||
buf.extend(&[rex, 0xB8 + dst_mod]);
|
buf.extend(&[rex, 0xB8 + dst_mod]);
|
||||||
buf.extend(&imm.to_le_bytes());
|
buf.extend(&imm.to_le_bytes());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `MOV r/m64,r64` -> Move r64 to r/m64.
|
/// `MOV r/m64,r64` -> Move r64 to r/m64.
|
||||||
fn mov_register64bit_register64bit<'a>(
|
#[inline(always)]
|
||||||
buf: &mut Vec<'a, u8>,
|
fn mov_reg64_reg64<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
|
||||||
dst: X86_64GPReg,
|
|
||||||
src: X86_64GPReg,
|
|
||||||
) {
|
|
||||||
let rex = add_rm_extension(dst, REX_W);
|
let rex = add_rm_extension(dst, REX_W);
|
||||||
let rex = add_reg_extension(src, rex);
|
let rex = add_reg_extension(src, rex);
|
||||||
let dst_mod = dst as u8 % 8;
|
let dst_mod = dst as u8 % 8;
|
||||||
let src_mod = (src as u8 % 8) << 3;
|
let src_mod = (src as u8 % 8) << 3;
|
||||||
buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]);
|
buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `MOV r64,r/m64` -> Move r/m64 to r64.
|
/// `MOV r64,r/m64` -> Move r/m64 to r64.
|
||||||
fn mov_register64bit_stackoffset32bit<'a>(
|
#[inline(always)]
|
||||||
buf: &mut Vec<'a, u8>,
|
fn mov_reg64_stack32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, offset: i32) {
|
||||||
dst: X86_64GPReg,
|
|
||||||
offset: i32,
|
|
||||||
) {
|
|
||||||
// This can be optimized based on how many bytes the offset actually is.
|
// This can be optimized based on how many bytes the offset actually is.
|
||||||
// This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
|
// This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
|
||||||
// Also, this may technically be faster genration since stack operations should be so common.
|
// Also, this may technically be faster genration since stack operations should be so common.
|
||||||
@ -299,14 +434,11 @@ impl Assembler<X86_64GPReg> for X86_64Assembler {
|
|||||||
buf.reserve(8);
|
buf.reserve(8);
|
||||||
buf.extend(&[rex, 0x8B, 0x84 + dst_mod, 0x24]);
|
buf.extend(&[rex, 0x8B, 0x84 + dst_mod, 0x24]);
|
||||||
buf.extend(&offset.to_le_bytes());
|
buf.extend(&offset.to_le_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `MOV r/m64,r64` -> Move r64 to r/m64.
|
/// `MOV r/m64,r64` -> Move r64 to r/m64.
|
||||||
fn mov_stackoffset32bit_register64bit<'a>(
|
#[inline(always)]
|
||||||
buf: &mut Vec<'a, u8>,
|
fn mov_stack32_reg64<'a>(buf: &mut Vec<'a, u8>, offset: i32, src: X86_64GPReg) {
|
||||||
offset: i32,
|
|
||||||
src: X86_64GPReg,
|
|
||||||
) {
|
|
||||||
// This can be optimized based on how many bytes the offset actually is.
|
// This can be optimized based on how many bytes the offset actually is.
|
||||||
// This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
|
// This function can probably be made to take any memory offset, I didn't feel like figuring it out rn.
|
||||||
// Also, this may technically be faster genration since stack operations should be so common.
|
// Also, this may technically be faster genration since stack operations should be so common.
|
||||||
@ -315,32 +447,36 @@ impl Assembler<X86_64GPReg> for X86_64Assembler {
|
|||||||
buf.reserve(8);
|
buf.reserve(8);
|
||||||
buf.extend(&[rex, 0x89, 0x84 + src_mod, 0x24]);
|
buf.extend(&[rex, 0x89, 0x84 + src_mod, 0x24]);
|
||||||
buf.extend(&offset.to_le_bytes());
|
buf.extend(&offset.to_le_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `NEG r/m64` -> Two's complement negate r/m64.
|
/// `NEG r/m64` -> Two's complement negate r/m64.
|
||||||
fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
|
#[inline(always)]
|
||||||
|
fn neg_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
|
||||||
let rex = add_rm_extension(reg, REX_W);
|
let rex = add_rm_extension(reg, REX_W);
|
||||||
let reg_mod = reg as u8 % 8;
|
let reg_mod = reg as u8 % 8;
|
||||||
buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]);
|
buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `RET` -> Near return to calling procedure.
|
/// `RET` -> Near return to calling procedure.
|
||||||
fn ret<'a>(buf: &mut Vec<'a, u8>) {
|
#[inline(always)]
|
||||||
|
fn ret<'a>(buf: &mut Vec<'a, u8>) {
|
||||||
buf.push(0xC3);
|
buf.push(0xC3);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `SUB r/m64, imm32` -> Subtract imm32 sign-extended to 64-bits from r/m64.
|
/// `SUB r/m64, imm32` -> Subtract imm32 sign-extended to 64-bits from r/m64.
|
||||||
fn sub_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
|
#[inline(always)]
|
||||||
|
fn sub_reg64_imm32<'a>(buf: &mut Vec<'a, u8>, dst: X86_64GPReg, imm: i32) {
|
||||||
// This can be optimized if the immediate is 1 byte.
|
// This can be optimized if the immediate is 1 byte.
|
||||||
let rex = add_rm_extension(dst, REX_W);
|
let rex = add_rm_extension(dst, REX_W);
|
||||||
let dst_mod = dst as u8 % 8;
|
let dst_mod = dst as u8 % 8;
|
||||||
buf.reserve(7);
|
buf.reserve(7);
|
||||||
buf.extend(&[rex, 0x81, 0xE8 + dst_mod]);
|
buf.extend(&[rex, 0x81, 0xE8 + dst_mod]);
|
||||||
buf.extend(&imm.to_le_bytes());
|
buf.extend(&imm.to_le_bytes());
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `POP r64` -> Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size.
|
/// `POP r64` -> Pop top of stack into r64; increment stack pointer. Cannot encode 32-bit operand size.
|
||||||
fn pop_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
|
#[inline(always)]
|
||||||
|
fn pop_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
|
||||||
let reg_mod = reg as u8 % 8;
|
let reg_mod = reg as u8 % 8;
|
||||||
if reg as u8 > 7 {
|
if reg as u8 > 7 {
|
||||||
let rex = add_opcode_extension(reg, REX);
|
let rex = add_opcode_extension(reg, REX);
|
||||||
@ -348,10 +484,11 @@ impl Assembler<X86_64GPReg> for X86_64Assembler {
|
|||||||
} else {
|
} else {
|
||||||
buf.push(0x58 + reg_mod);
|
buf.push(0x58 + reg_mod);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// `PUSH r64` -> Push r64,
|
/// `PUSH r64` -> Push r64,
|
||||||
fn push_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
|
#[inline(always)]
|
||||||
|
fn push_reg64<'a>(buf: &mut Vec<'a, u8>, reg: X86_64GPReg) {
|
||||||
let reg_mod = reg as u8 % 8;
|
let reg_mod = reg as u8 % 8;
|
||||||
if reg as u8 > 7 {
|
if reg as u8 > 7 {
|
||||||
let rex = add_opcode_extension(reg, REX);
|
let rex = add_opcode_extension(reg, REX);
|
||||||
@ -359,7 +496,6 @@ impl Assembler<X86_64GPReg> for X86_64Assembler {
|
|||||||
} else {
|
} else {
|
||||||
buf.push(0x50 + reg_mod);
|
buf.push(0x50 + reg_mod);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// When writing tests, it is a good idea to test both a number and unnumbered register.
|
// When writing tests, it is a good idea to test both a number and unnumbered register.
|
||||||
@ -372,7 +508,7 @@ mod tests {
|
|||||||
const TEST_I64: i64 = 0x12345678_9ABCDEF0;
|
const TEST_I64: i64 = 0x12345678_9ABCDEF0;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_add_register64bit_immediate32bit() {
|
fn test_add_reg64_imm32() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for (dst, expected) in &[
|
for (dst, expected) in &[
|
||||||
@ -380,14 +516,14 @@ mod tests {
|
|||||||
(X86_64GPReg::R15, [0x49, 0x81, 0xC7]),
|
(X86_64GPReg::R15, [0x49, 0x81, 0xC7]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::add_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
|
add_reg64_imm32(&mut buf, *dst, TEST_I32);
|
||||||
assert_eq!(expected, &buf[..3]);
|
assert_eq!(expected, &buf[..3]);
|
||||||
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
|
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_add_register64bit_register64bit() {
|
fn test_add_reg64_reg64() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for ((dst, src), expected) in &[
|
for ((dst, src), expected) in &[
|
||||||
@ -397,13 +533,13 @@ mod tests {
|
|||||||
((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x01, 0xFF]),
|
((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x01, 0xFF]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::add_register64bit_register64bit(&mut buf, *dst, *src);
|
add_reg64_reg64(&mut buf, *dst, *src);
|
||||||
assert_eq!(expected, &buf[..]);
|
assert_eq!(expected, &buf[..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_cmovl_register64bit_register64bit() {
|
fn test_cmovl_reg64_reg64() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for ((dst, src), expected) in &[
|
for ((dst, src), expected) in &[
|
||||||
@ -425,13 +561,13 @@ mod tests {
|
|||||||
),
|
),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::cmovl_register64bit_register64bit(&mut buf, *dst, *src);
|
cmovl_reg64_reg64(&mut buf, *dst, *src);
|
||||||
assert_eq!(expected, &buf[..]);
|
assert_eq!(expected, &buf[..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_mov_register64bit_immediate32bit() {
|
fn test_mov_reg64_imm32() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for (dst, expected) in &[
|
for (dst, expected) in &[
|
||||||
@ -439,14 +575,14 @@ mod tests {
|
|||||||
(X86_64GPReg::R15, [0x49, 0xC7, 0xC7]),
|
(X86_64GPReg::R15, [0x49, 0xC7, 0xC7]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::mov_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
|
mov_reg64_imm32(&mut buf, *dst, TEST_I32);
|
||||||
assert_eq!(expected, &buf[..3]);
|
assert_eq!(expected, &buf[..3]);
|
||||||
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
|
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_mov_register64bit_immediate64bit() {
|
fn test_mov_reg64_imm64() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for (dst, expected) in &[
|
for (dst, expected) in &[
|
||||||
@ -454,7 +590,7 @@ mod tests {
|
|||||||
(X86_64GPReg::R15, [0x49, 0xBF]),
|
(X86_64GPReg::R15, [0x49, 0xBF]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I64);
|
mov_reg64_imm64(&mut buf, *dst, TEST_I64);
|
||||||
assert_eq!(expected, &buf[..2]);
|
assert_eq!(expected, &buf[..2]);
|
||||||
assert_eq!(TEST_I64.to_le_bytes(), &buf[2..]);
|
assert_eq!(TEST_I64.to_le_bytes(), &buf[2..]);
|
||||||
}
|
}
|
||||||
@ -463,14 +599,14 @@ mod tests {
|
|||||||
(X86_64GPReg::R15, [0x49, 0xC7, 0xC7]),
|
(X86_64GPReg::R15, [0x49, 0xC7, 0xC7]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::mov_register64bit_immediate64bit(&mut buf, *dst, TEST_I32 as i64);
|
mov_reg64_imm64(&mut buf, *dst, TEST_I32 as i64);
|
||||||
assert_eq!(expected, &buf[..3]);
|
assert_eq!(expected, &buf[..3]);
|
||||||
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
|
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_mov_register64bit_register64bit() {
|
fn test_mov_reg64_reg64() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for ((dst, src), expected) in &[
|
for ((dst, src), expected) in &[
|
||||||
@ -480,13 +616,13 @@ mod tests {
|
|||||||
((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x89, 0xFF]),
|
((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x89, 0xFF]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::mov_register64bit_register64bit(&mut buf, *dst, *src);
|
mov_reg64_reg64(&mut buf, *dst, *src);
|
||||||
assert_eq!(expected, &buf[..]);
|
assert_eq!(expected, &buf[..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_mov_register64bit_stackoffset32bit() {
|
fn test_mov_reg64_stack32() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for ((dst, offset), expected) in &[
|
for ((dst, offset), expected) in &[
|
||||||
@ -494,14 +630,14 @@ mod tests {
|
|||||||
((X86_64GPReg::R15, TEST_I32), [0x4C, 0x8B, 0xBC, 0x24]),
|
((X86_64GPReg::R15, TEST_I32), [0x4C, 0x8B, 0xBC, 0x24]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::mov_register64bit_stackoffset32bit(&mut buf, *dst, *offset);
|
mov_reg64_stack32(&mut buf, *dst, *offset);
|
||||||
assert_eq!(expected, &buf[..4]);
|
assert_eq!(expected, &buf[..4]);
|
||||||
assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
|
assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_mov_stackoffset32bit_register64bit() {
|
fn test_mov_stack32_reg64() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for ((offset, src), expected) in &[
|
for ((offset, src), expected) in &[
|
||||||
@ -509,14 +645,14 @@ mod tests {
|
|||||||
((TEST_I32, X86_64GPReg::R15), [0x4C, 0x89, 0xBC, 0x24]),
|
((TEST_I32, X86_64GPReg::R15), [0x4C, 0x89, 0xBC, 0x24]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::mov_stackoffset32bit_register64bit(&mut buf, *offset, *src);
|
mov_stack32_reg64(&mut buf, *offset, *src);
|
||||||
assert_eq!(expected, &buf[..4]);
|
assert_eq!(expected, &buf[..4]);
|
||||||
assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
|
assert_eq!(TEST_I32.to_le_bytes(), &buf[4..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_neg_register64bit() {
|
fn test_neg_reg64() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for (reg, expected) in &[
|
for (reg, expected) in &[
|
||||||
@ -524,7 +660,7 @@ mod tests {
|
|||||||
(X86_64GPReg::R15, [0x49, 0xF7, 0xDF]),
|
(X86_64GPReg::R15, [0x49, 0xF7, 0xDF]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::neg_register64bit(&mut buf, *reg);
|
neg_reg64(&mut buf, *reg);
|
||||||
assert_eq!(expected, &buf[..]);
|
assert_eq!(expected, &buf[..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -533,12 +669,12 @@ mod tests {
|
|||||||
fn test_ret() {
|
fn test_ret() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
X86_64Assembler::ret(&mut buf);
|
ret(&mut buf);
|
||||||
assert_eq!(&[0xC3], &buf[..]);
|
assert_eq!(&[0xC3], &buf[..]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_sub_register64bit_immediate32bit() {
|
fn test_sub_reg64_imm32() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for (dst, expected) in &[
|
for (dst, expected) in &[
|
||||||
@ -546,14 +682,14 @@ mod tests {
|
|||||||
(X86_64GPReg::R15, [0x49, 0x81, 0xEF]),
|
(X86_64GPReg::R15, [0x49, 0x81, 0xEF]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::sub_register64bit_immediate32bit(&mut buf, *dst, TEST_I32);
|
sub_reg64_imm32(&mut buf, *dst, TEST_I32);
|
||||||
assert_eq!(expected, &buf[..3]);
|
assert_eq!(expected, &buf[..3]);
|
||||||
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
|
assert_eq!(TEST_I32.to_le_bytes(), &buf[3..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_pop_register64bit() {
|
fn test_pop_reg64() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for (dst, expected) in &[
|
for (dst, expected) in &[
|
||||||
@ -561,13 +697,13 @@ mod tests {
|
|||||||
(X86_64GPReg::R15, vec![0x41, 0x5F]),
|
(X86_64GPReg::R15, vec![0x41, 0x5F]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::pop_register64bit(&mut buf, *dst);
|
pop_reg64(&mut buf, *dst);
|
||||||
assert_eq!(&expected[..], &buf[..]);
|
assert_eq!(&expected[..], &buf[..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_push_register64bit() {
|
fn test_push_reg64() {
|
||||||
let arena = bumpalo::Bump::new();
|
let arena = bumpalo::Bump::new();
|
||||||
let mut buf = bumpalo::vec![in &arena];
|
let mut buf = bumpalo::vec![in &arena];
|
||||||
for (src, expected) in &[
|
for (src, expected) in &[
|
||||||
@ -575,7 +711,7 @@ mod tests {
|
|||||||
(X86_64GPReg::R15, vec![0x41, 0x57]),
|
(X86_64GPReg::R15, vec![0x41, 0x57]),
|
||||||
] {
|
] {
|
||||||
buf.clear();
|
buf.clear();
|
||||||
X86_64Assembler::push_register64bit(&mut buf, *src);
|
push_reg64(&mut buf, *src);
|
||||||
assert_eq!(&expected[..], &buf[..]);
|
assert_eq!(&expected[..], &buf[..]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use crate::generic64::{x86_64, Backend64Bit};
|
use crate::generic64::{aarch64, x86_64, Backend64Bit};
|
||||||
use crate::{Backend, Env, Relocation, INLINED_SYMBOLS};
|
use crate::{Backend, Env, Relocation, INLINED_SYMBOLS};
|
||||||
use bumpalo::collections::Vec;
|
use bumpalo::collections::Vec;
|
||||||
use object::write;
|
use object::write;
|
||||||
@ -22,7 +22,7 @@ pub fn build_module<'a>(
|
|||||||
target: &Triple,
|
target: &Triple,
|
||||||
procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>,
|
procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>,
|
||||||
) -> Result<Object, String> {
|
) -> Result<Object, String> {
|
||||||
let (mut output, mut backend) = match target {
|
match target {
|
||||||
Triple {
|
Triple {
|
||||||
architecture: TargetArch::X86_64,
|
architecture: TargetArch::X86_64,
|
||||||
binary_format: TargetBF::Elf,
|
binary_format: TargetBF::Elf,
|
||||||
@ -33,15 +33,42 @@ pub fn build_module<'a>(
|
|||||||
x86_64::X86_64Assembler,
|
x86_64::X86_64Assembler,
|
||||||
x86_64::X86_64SystemV,
|
x86_64::X86_64SystemV,
|
||||||
> = Backend::new(env, target)?;
|
> = Backend::new(env, target)?;
|
||||||
Ok((
|
build_object(
|
||||||
Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little),
|
env,
|
||||||
|
procedures,
|
||||||
backend,
|
backend,
|
||||||
))
|
Object::new(BinaryFormat::Elf, Architecture::X86_64, Endianness::Little),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
Triple {
|
||||||
|
architecture: TargetArch::Aarch64(_),
|
||||||
|
binary_format: TargetBF::Elf,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
let backend: Backend64Bit<
|
||||||
|
aarch64::AArch64GPReg,
|
||||||
|
aarch64::AArch64Assembler,
|
||||||
|
aarch64::AArch64Call,
|
||||||
|
> = Backend::new(env, target)?;
|
||||||
|
build_object(
|
||||||
|
env,
|
||||||
|
procedures,
|
||||||
|
backend,
|
||||||
|
Object::new(BinaryFormat::Elf, Architecture::Aarch64, Endianness::Little),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
x => Err(format! {
|
x => Err(format! {
|
||||||
"the target, {:?}, is not yet implemented",
|
"the target, {:?}, is not yet implemented",
|
||||||
x}),
|
x}),
|
||||||
}?;
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn build_object<'a, B: Backend<'a>>(
|
||||||
|
env: &'a Env,
|
||||||
|
procedures: MutMap<(symbol::Symbol, Layout<'a>), Proc<'a>>,
|
||||||
|
mut backend: B,
|
||||||
|
mut output: Object,
|
||||||
|
) -> Result<Object, String> {
|
||||||
let text = output.section_id(StandardSection::Text);
|
let text = output.section_id(StandardSection::Text);
|
||||||
let data_section = output.section_id(StandardSection::Data);
|
let data_section = output.section_id(StandardSection::Data);
|
||||||
let comment = output.add_section(vec![], b"comment".to_vec(), SectionKind::OtherString);
|
let comment = output.add_section(vec![], b"comment".to_vec(), SectionKind::OtherString);
|
||||||
|
@ -9,7 +9,7 @@ extern crate libc;
|
|||||||
#[macro_use]
|
#[macro_use]
|
||||||
mod helpers;
|
mod helpers;
|
||||||
|
|
||||||
#[cfg(all(test, target_os = "linux", target_arch = "x86_64"))]
|
#[cfg(all(test, target_os = "linux", any(target_arch = "x86_64"/*, target_arch = "aarch64"*/)))]
|
||||||
mod gen_num {
|
mod gen_num {
|
||||||
//use roc_std::RocOrder;
|
//use roc_std::RocOrder;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user