From 13781a6f8fd12489a10dbfd7f9a86cbc3af5eebd Mon Sep 17 00:00:00 2001 From: Brendan Hansknecht Date: Fri, 20 Nov 2020 23:04:35 -0800 Subject: [PATCH] Add Num.abs as inlined function --- compiler/gen_dev/src/elf.rs | 13 ++- compiler/gen_dev/src/lib.rs | 69 +++++++++++++-- compiler/gen_dev/src/x86_64/asm.rs | 46 ++++++++++ compiler/gen_dev/src/x86_64/mod.rs | 130 +++++++++++++++++++++++------ compiler/gen_dev/tests/gen_num.rs | 55 ++++++------ 5 files changed, 248 insertions(+), 65 deletions(-) diff --git a/compiler/gen_dev/src/elf.rs b/compiler/gen_dev/src/elf.rs index bc46376d54..7f562dcff3 100644 --- a/compiler/gen_dev/src/elf.rs +++ b/compiler/gen_dev/src/elf.rs @@ -36,10 +36,15 @@ pub fn build_module<'a>( // Setup layout_ids for procedure calls. let mut layout_ids = roc_mono::layout::LayoutIds::default(); let mut procs = Vec::with_capacity_in(procedures.len(), env.arena); - for ((symbol, layout), proc) in procedures { + for ((sym, layout), proc) in procedures { + // This is temporary until we support passing args to functions. + if sym == symbol::Symbol::NUM_ABS { + continue; + } + let fn_name = layout_ids - .get(symbol, &layout) - .to_symbol_string(symbol, &env.interns); + .get(sym, &layout) + .to_symbol_string(sym, &env.interns); let proc_symbol = Symbol { name: fn_name.as_bytes().to_vec(), @@ -48,7 +53,7 @@ pub fn build_module<'a>( kind: SymbolKind::Text, // TODO: Depending on whether we are building a static or dynamic lib, this should change. // We should use Dynamic -> anyone, Linkage -> static link, Compilation -> this module only. - scope: if env.exposed_to_host.contains(&symbol) { + scope: if env.exposed_to_host.contains(&sym) { SymbolScope::Dynamic } else { SymbolScope::Linkage diff --git a/compiler/gen_dev/src/lib.rs b/compiler/gen_dev/src/lib.rs index 1db2b9a2c1..88fb66db8a 100644 --- a/compiler/gen_dev/src/lib.rs +++ b/compiler/gen_dev/src/lib.rs @@ -15,9 +15,10 @@ use bumpalo::Bump; use object::write::Object; use roc_collections::all::{MutMap, MutSet}; use roc_module::ident::TagName; +use roc_module::low_level::LowLevel; use roc_module::symbol::{Interns, Symbol}; use roc_mono::ir::{CallType, Expr, JoinPointId, Literal, Proc, Stmt}; -use roc_mono::layout::Layout; +use roc_mono::layout::{Builtin, Layout}; use target_lexicon::{BinaryFormat, Triple}; pub mod elf; @@ -68,9 +69,13 @@ where /// last_seen_map gets the map from symbol to when it is last seen in the function. fn last_seen_map(&mut self) -> &mut MutMap>; - /// set_symbol_to_lit sets a symbol to be equal to a literal. - /// When the symbol is used, the literal should be loaded. - fn set_symbol_to_lit(&mut self, sym: &Symbol, lit: &Literal<'a>); + /// load_literal sets a symbol to be equal to a literal. + fn load_literal( + &mut self, + sym: &Symbol, + lit: &Literal<'a>, + layout: &Layout<'a>, + ) -> Result<(), String>; /// free_symbol frees any registers or stack space used to hold a symbol. fn free_symbol(&mut self, sym: &Symbol); @@ -85,8 +90,6 @@ where fn finalize(&mut self) -> Result<(&'a [u8], &[Relocation]), String>; /// build_proc creates a procedure and outputs it to the wrapped object writer. - /// This will need to return the list of relocations because they have to be added differently based on file format. - /// Also, assembly will of course be generated by individual calls on backend like may setup_stack. fn build_proc(&mut self, proc: Proc<'a>) -> Result<(&'a [u8], &[Relocation]), String> { self.reset(); // TODO: let the backend know of all the arguments. @@ -119,22 +122,70 @@ where /// build_expr builds the expressions for the specified symbol. /// The builder must keep track of the symbol because it may be refered to later. - /// In many cases values can be lazy loaded, like literals. fn build_expr( &mut self, sym: &Symbol, expr: &Expr<'a>, - _layout: &Layout<'a>, + layout: &Layout<'a>, ) -> Result<(), String> { match expr { Expr::Literal(lit) => { - self.set_symbol_to_lit(sym, lit); + self.load_literal(sym, lit, layout); Ok(()) } + Expr::FunctionCall { + call_type: CallType::ByName(func_sym), + args, + .. + } => { + match *func_sym { + Symbol::NUM_ABS => { + // Instead of calling the function, just inline it. + self.build_expr(sym, &Expr::RunLowLevel(LowLevel::NumAbs, args), layout) + } + x => Err(format!("the function, {:?}, is not yet implemented", x)), + } + } + Expr::RunLowLevel(lowlevel, args) => { + self.build_run_low_level(sym, lowlevel, args, layout) + } x => Err(format!("the expression, {:?}, is not yet implemented", x)), } } + /// build_run_low_level builds the low level opertation and outputs to the specified symbol. + /// The builder must keep track of the symbol because it may be refered to later. + fn build_run_low_level( + &mut self, + sym: &Symbol, + lowlevel: &LowLevel, + args: &'a [Symbol], + layout: &Layout<'a>, + ) -> Result<(), String> { + match lowlevel { + LowLevel::NumAbs => self.build_num_abs(sym, &args[0], layout), + x => Err(format!("low level, {:?}. is not yet implemented", x)), + } + } + + /// build_num_abs stores the absolute value of src into dst. + fn build_num_abs( + &mut self, + dst: &Symbol, + src: &Symbol, + layout: &Layout<'a>, + ) -> Result<(), String> { + // TODO: when this is expanded to flaots. deal with typecasting here, and then call correct low level method. + match layout { + Layout::Builtin(Builtin::Int64) => self.build_num_abs_i64(dst, src), + x => Err(format!("layout, {:?}, not implemented yet", x)), + } + } + + /// build_num_abs stores the absolute value of src into dst. + /// It only deals with inputs and outputs of i64 type. + fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String>; + /// maybe_free will check if the symbol is last seen in the current state. If so, it will free the symbol resources, like registers. fn maybe_free_symbol(&mut self, sym: &Symbol, stmt: &Stmt<'a>) { match self.last_seen_map().get(sym) { diff --git a/compiler/gen_dev/src/x86_64/asm.rs b/compiler/gen_dev/src/x86_64/asm.rs index 2bd788bcb0..39454233e1 100644 --- a/compiler/gen_dev/src/x86_64/asm.rs +++ b/compiler/gen_dev/src/x86_64/asm.rs @@ -51,6 +51,15 @@ fn add_reg_extension(reg: GPReg, byte: u8) -> u8 { // Unit tests are added at the bottom of the file to ensure correct asm generation. // Please keep these in alphanumeric order. +/// `CMOVL r64,r/m64` -> Move if less (SF=ΜΈ OF). +pub fn cmovl_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, src: GPReg) { + let rex = add_reg_extension(dst, REX_W); + let rex = add_rm_extension(src, rex); + let dst_mod = (dst as u8 % 8) << 3; + let src_mod = src as u8 % 8; + buf.extend(&[rex, 0x0F, 0x4C, 0xC0 + dst_mod + src_mod]); +} + /// `MOV r/m64, imm32` -> Move imm32 sign extended to 64-bits to r/m64. pub fn mov_register64bit_immediate32bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, imm: i32) { let rex = add_rm_extension(dst, REX_W); @@ -78,6 +87,13 @@ pub fn mov_register64bit_register64bit<'a>(buf: &mut Vec<'a, u8>, dst: GPReg, sr buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]); } +/// `NEG r/m64` -> Two's complement negate r/m64. +pub fn neg_register64bit<'a>(buf: &mut Vec<'a, u8>, reg: GPReg) { + let rex = add_rm_extension(reg, REX_W); + let reg_mod = reg as u8 % 8; + buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]); +} + /// `RET` -> Near return to calling procedure. pub fn ret_near<'a>(buf: &mut Vec<'a, u8>) { buf.push(0xC3); @@ -114,6 +130,22 @@ mod tests { const TEST_I32: i32 = 0x12345678; const TEST_I64: i64 = 0x12345678_9ABCDEF0; + #[test] + fn test_cmovl_register64bit_register64bit() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + for ((in1, in2), expected) in &[ + ((GPReg::RAX, GPReg::RAX), [0x48, 0x0F, 0x4C, 0xC0]), + ((GPReg::RAX, GPReg::R15), [0x49, 0x0F, 0x4C, 0xC7]), + ((GPReg::R15, GPReg::RAX), [0x4C, 0x0F, 0x4C, 0xF8]), + ((GPReg::R15, GPReg::R15), [0x4D, 0x0F, 0x4C, 0xFF]), + ] { + buf.clear(); + cmovl_register64bit_register64bit(&mut buf, *in1, *in2); + assert_eq!(expected, &buf[..]); + } + } + #[test] fn test_mov_register64bit_immediate32bit() { let arena = bumpalo::Bump::new(); @@ -157,6 +189,20 @@ mod tests { } } + #[test] + fn test_neg_register64bit() { + let arena = bumpalo::Bump::new(); + let mut buf = bumpalo::vec![in &arena]; + for (in1, expected) in &[ + (GPReg::RAX, [0x48, 0xF7, 0xD8]), + (GPReg::R15, [0x49, 0xF7, 0xDF]), + ] { + buf.clear(); + neg_register64bit(&mut buf, *in1); + assert_eq!(expected, &buf[..]); + } + } + #[test] fn test_ret_near() { let arena = bumpalo::Bump::new(); diff --git a/compiler/gen_dev/src/x86_64/mod.rs b/compiler/gen_dev/src/x86_64/mod.rs index ce4af28f97..2178a834ab 100644 --- a/compiler/gen_dev/src/x86_64/mod.rs +++ b/compiler/gen_dev/src/x86_64/mod.rs @@ -3,7 +3,7 @@ use bumpalo::collections::Vec; use roc_collections::all::{ImSet, MutMap}; use roc_module::symbol::Symbol; use roc_mono::ir::{Literal, Stmt}; -use roc_mono::layout::Layout; +use roc_mono::layout::{Builtin, Layout}; use target_lexicon::{CallingConvention, Triple}; mod asm; @@ -11,9 +11,9 @@ use asm::GPReg; #[derive(Clone, Debug, PartialEq)] enum SymbolStorage<'a> { - Literal(Literal<'a>), GPReg(GPReg, Layout<'a>), - Stack(u32, Layout<'a>), + Stack(u16, Layout<'a>), + StackAndGPReg(GPReg, u16, Layout<'a>), } pub struct X86_64Backend<'a> { @@ -188,35 +188,45 @@ impl<'a> Backend<'a> for X86_64Backend<'a> { &mut self.last_seen_map } - fn set_symbol_to_lit(&mut self, sym: &Symbol, lit: &Literal<'a>) { - self.symbols_map - .insert(*sym, SymbolStorage::Literal(lit.clone())); + fn load_literal( + &mut self, + sym: &Symbol, + lit: &Literal<'a>, + layout: &Layout<'a>, + ) -> Result<(), String> { + match lit { + Literal::Int(x) => { + let reg = self.claim_gp_reg()?; + let val = *x; + if val <= i32::MAX as i64 && val >= i32::MIN as i64 { + asm::mov_register64bit_immediate32bit(&mut self.buf, reg, val as i32); + } else { + asm::mov_register64bit_immediate64bit(&mut self.buf, reg, val); + } + self.gp_used_regs.push((reg, *sym)); + self.symbols_map + .insert(*sym, SymbolStorage::GPReg(reg, layout.clone())); + Ok(()) + } + x => Err(format!("loading literal, {:?}, is not yet implemented", x)), + } } fn free_symbol(&mut self, sym: &Symbol) { self.symbols_map.remove(sym); + for i in 0..self.gp_used_regs.len() { + let (reg, saved_sym) = self.gp_used_regs[i]; + if saved_sym == *sym { + self.gp_free_regs.push(reg); + self.gp_used_regs.remove(i); + break; + } + } } fn return_symbol(&mut self, sym: &Symbol) -> Result<(), String> { let val = self.symbols_map.get(sym); match val { - Some(SymbolStorage::Literal(Literal::Int(x))) => { - let val = *x; - if val <= i32::MAX as i64 && val >= i32::MIN as i64 { - asm::mov_register64bit_immediate32bit( - &mut self.buf, - self.gp_return_regs[0], - val as i32, - ); - } else { - asm::mov_register64bit_immediate64bit( - &mut self.buf, - self.gp_return_regs[0], - val, - ); - } - Ok(()) - } Some(SymbolStorage::GPReg(reg, _)) if *reg == self.gp_return_regs[0] => Ok(()), Some(SymbolStorage::GPReg(reg, _)) => { // If it fits in a general purpose register, just copy it over to. @@ -224,11 +234,28 @@ impl<'a> Backend<'a> for X86_64Backend<'a> { asm::mov_register64bit_register64bit(&mut self.buf, self.gp_return_regs[0], *reg); Ok(()) } - Some(x) => Err(format!("symbol, {:?}, is not yet implemented", x)), + Some(x) => Err(format!( + "returning symbol storage, {:?}, is not yet implemented", + x + )), None => Err(format!("Unknown return symbol: {}", sym)), } } + fn build_num_abs_i64(&mut self, dst: &Symbol, src: &Symbol) -> Result<(), String> { + let dst_reg = self.claim_gp_reg()?; + self.gp_used_regs.push((dst_reg, *dst)); + self.symbols_map.insert( + *dst, + SymbolStorage::GPReg(dst_reg, Layout::Builtin(Builtin::Int64)), + ); + let src_reg = self.load_to_reg(src)?; + asm::mov_register64bit_register64bit(&mut self.buf, dst_reg, src_reg); + asm::neg_register64bit(&mut self.buf, dst_reg); + asm::cmovl_register64bit_register64bit(&mut self.buf, dst_reg, src_reg); + Ok(()) + } + fn finalize(&mut self) -> Result<(&'a [u8], &[Relocation]), String> { // TODO: handle allocating and cleaning up data on the stack. let mut out = bumpalo::vec![in self.env.arena]; @@ -252,6 +279,59 @@ impl<'a> Backend<'a> for X86_64Backend<'a> { impl<'a> X86_64Backend<'a> { fn requires_stack_modification(&self) -> bool { !self.leaf_proc - || self.stack_size < self.shadow_space_size as u16 + self.red_zone_size as u16 + || self.stack_size > self.shadow_space_size as u16 + self.red_zone_size as u16 + } + + fn claim_gp_reg(&mut self) -> Result { + if self.gp_free_regs.len() > 0 { + // TODO: deal with callee saved registers. + Ok(self.gp_free_regs.pop().unwrap()) + } else if self.gp_used_regs.len() > 0 { + let (reg, sym) = self.gp_used_regs.remove(0); + self.free_to_stack(&sym); + Ok(reg) + } else { + Err(format!("completely out of registers")) + } + } + + fn load_to_reg(&mut self, sym: &Symbol) -> Result { + let val = self.symbols_map.remove(sym); + match val { + Some(SymbolStorage::GPReg(reg, layout)) => { + self.symbols_map + .insert(*sym, SymbolStorage::GPReg(reg, layout)); + Ok(reg) + } + Some(SymbolStorage::StackAndGPReg(reg, offset, layout)) => { + self.symbols_map + .insert(*sym, SymbolStorage::StackAndGPReg(reg, offset, layout)); + Ok(reg) + } + Some(SymbolStorage::Stack(_offset, _layout)) => { + Err(format!("loading to the stack is not yet implemented")) + } + None => Err(format!("Unknown symbol: {}", sym)), + } + } + + fn free_to_stack(&mut self, sym: &Symbol) -> Result<(), String> { + let val = self.symbols_map.remove(sym); + match val { + Some(SymbolStorage::GPReg(_reg, _layout)) => { + Err(format!("pushing to the stack is not yet implemented")) + } + Some(SymbolStorage::StackAndGPReg(_, offset, layout)) => { + self.symbols_map + .insert(*sym, SymbolStorage::Stack(offset, layout)); + Ok(()) + } + Some(SymbolStorage::Stack(offset, layout)) => { + self.symbols_map + .insert(*sym, SymbolStorage::Stack(offset, layout)); + Ok(()) + } + None => Err(format!("Unknown symbol: {}", sym)), + } } } diff --git a/compiler/gen_dev/tests/gen_num.rs b/compiler/gen_dev/tests/gen_num.rs index aeb61b1769..8794cad3fb 100644 --- a/compiler/gen_dev/tests/gen_num.rs +++ b/compiler/gen_dev/tests/gen_num.rs @@ -1,6 +1,6 @@ #[macro_use] extern crate pretty_assertions; -//#[macro_use] +#[macro_use] extern crate indoc; extern crate bumpalo; @@ -9,7 +9,7 @@ extern crate libc; #[macro_use] mod helpers; -#[cfg(test)] +#[cfg(all(test, target_os = "linux", target_arch = "x86_64"))] mod gen_num { //use roc_std::RocOrder; @@ -25,6 +25,32 @@ mod gen_num { assert_evals_to!("0o17", 0o17, i64); assert_evals_to!("0x1000_0000_0000_0000", 0x1000_0000_0000_0000, i64); } + + // #[test] + // fn gen_add_i64() { + // assert_evals_to!( + // indoc!( + // r#" + // 1 + 2 + 3 + // "# + // ), + // 6, + // i64 + // ); + // } + + #[test] + fn i64_abs() { + assert_evals_to!("Num.abs -6", 6, i64); + assert_evals_to!("Num.abs 7", 7, i64); + assert_evals_to!("Num.abs 0", 0, i64); + assert_evals_to!("Num.abs -0", 0, i64); + assert_evals_to!("Num.abs -1", 1, i64); + assert_evals_to!("Num.abs 1", 1, i64); + assert_evals_to!("Num.abs 9_000_000_000_000", 9_000_000_000_000, i64); + assert_evals_to!("Num.abs -9_000_000_000_000", 9_000_000_000_000, i64); + } + /* #[test] fn f64_sqrt() { @@ -53,18 +79,6 @@ mod gen_num { assert_evals_to!("Num.abs 5.8", 5.8, f64); } - #[test] - fn i64_abs() { - //assert_evals_to!("Num.abs -6", 6, i64); - assert_evals_to!("Num.abs 7", 7, i64); - assert_evals_to!("Num.abs 0", 0, i64); - assert_evals_to!("Num.abs -0", 0, i64); - assert_evals_to!("Num.abs -1", 1, i64); - assert_evals_to!("Num.abs 1", 1, i64); - assert_evals_to!("Num.abs 9_000_000_000_000", 9_000_000_000_000, i64); - assert_evals_to!("Num.abs -9_000_000_000_000", 9_000_000_000_000, i64); - } - #[test] fn gen_if_fn() { assert_evals_to!( @@ -206,19 +220,6 @@ mod gen_num { ); } - #[test] - fn gen_add_i64() { - assert_evals_to!( - indoc!( - r#" - 1 + 2 + 3 - "# - ), - 6, - i64 - ); - } - #[test] fn gen_sub_f64() { assert_evals_to!(