Merge pull request #919 from rtfeldman/gen_dev/add_eq

2024-11-11 16:51:53 +03:00 · 2021-01-20 22:55:34 -05:00 · 2021-01-20 22:55:34 -05:00 · 5d59a32938
commit 5d59a32938
parent 55d2586316 3c5549072c
5 changed files with 156 additions and 18 deletions
--- a/compiler/gen_dev/src/generic64/aarch64.rs
+++ b/compiler/gen_dev/src/generic64/aarch64.rs
@ -317,6 +317,15 @@ impl Assembler<AArch64GPReg> for AArch64Assembler {
        unimplemented!("registers subtractions not implemented yet for AArch64");
    }

+    #[inline(always)]
+    fn eq_reg64_reg64_reg64(
+        _buf: &mut Vec<'_, u8>,
+        _dst: AArch64GPReg,
+        _src1: AArch64GPReg,
+        _src2: AArch64GPReg,
+    ) {
+        unimplemented!("registers equality not implemented yet for AArch64");
+    }
    #[inline(always)]
    fn ret(buf: &mut Vec<'_, u8>) {
        ret_reg64(buf, AArch64GPReg::LR)
--- a/compiler/gen_dev/src/generic64/mod.rs
+++ b/compiler/gen_dev/src/generic64/mod.rs
@ -52,6 +52,7 @@ pub trait Assembler<GPReg: GPRegTrait> {
    fn mov_stack32_reg64(buf: &mut Vec<'_, u8>, offset: i32, src: GPReg);
    fn sub_reg64_reg64_imm32(buf: &mut Vec<'_, u8>, dst: GPReg, src1: GPReg, imm32: i32);
    fn sub_reg64_reg64_reg64(buf: &mut Vec<'_, u8>, dst: GPReg, src1: GPReg, src2: GPReg);
+    fn eq_reg64_reg64_reg64(buf: &mut Vec<'_, u8>, dst: GPReg, src1: GPReg, src2: GPReg);
    fn ret(buf: &mut Vec<'_, u8>);
 }

@ -208,6 +209,14 @@ impl<'a, GPReg: GPRegTrait, ASM: Assembler<GPReg>, CC: CallConv<GPReg>> Backend<
        Ok(())
    }

+    fn build_eq_i64(&mut self, dst: &Symbol, src1: &Symbol, src2: &Symbol) -> Result<(), String> {
+        let dst_reg = self.claim_gp_reg(dst)?;
+        let src1_reg = self.load_to_reg(src1)?;
+        let src2_reg = self.load_to_reg(src2)?;
+        ASM::eq_reg64_reg64_reg64(&mut self.buf, dst_reg, src1_reg, src2_reg);
+        Ok(())
+    }
+
    fn load_literal(&mut self, sym: &Symbol, lit: &Literal<'a>) -> Result<(), String> {
        match lit {
            Literal::Int(x) => {
--- a/compiler/gen_dev/src/generic64/x86_64.rs
+++ b/compiler/gen_dev/src/generic64/x86_64.rs
@ -323,6 +323,18 @@ impl Assembler<X86_64GPReg> for X86_64Assembler {
            sub_reg64_reg64(buf, dst, src2);
        }
    }
+
+    #[inline(always)]
+    fn eq_reg64_reg64_reg64(
+        buf: &mut Vec<'_, u8>,
+        dst: X86_64GPReg,
+        src1: X86_64GPReg,
+        src2: X86_64GPReg,
+    ) {
+        cmp_reg64_reg64(buf, src1, src2);
+        sete_reg64(buf, dst);
+    }
+
    #[inline(always)]
    fn ret(buf: &mut Vec<'_, u8>) {
        ret(buf);
@ -371,7 +383,6 @@ const fn add_reg_extension(reg: X86_64GPReg, byte: u8) -> u8 {
 // You should call `buf.reserve()` if you push or extend more than once.
 // Unit tests are added at the bottom of the file to ensure correct asm generation.
 // Please keep these in alphanumeric order.
-
 /// `ADD r/m64, imm32` -> Add imm32 sign-extended to 64-bits from r/m64.
 #[inline(always)]
 fn add_reg64_imm32(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, imm: i32) {
@ -383,24 +394,36 @@ fn add_reg64_imm32(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, imm: i32) {
    buf.extend(&imm.to_le_bytes());
 }

-/// `ADD r/m64,r64` -> Add r64 to r/m64.
-#[inline(always)]
-fn add_reg64_reg64(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
+fn binop_reg64_reg64(op_code: u8, buf: &mut Vec<'_, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
    let rex = add_rm_extension(dst, REX_W);
    let rex = add_reg_extension(src, rex);
    let dst_mod = dst as u8 % 8;
    let src_mod = (src as u8 % 8) << 3;
-    buf.extend(&[rex, 0x01, 0xC0 + dst_mod + src_mod]);
+    buf.extend(&[rex, op_code, 0xC0 + dst_mod + src_mod]);
+}
+
+/// `ADD r/m64,r64` -> Add r64 to r/m64.
+#[inline(always)]
+fn add_reg64_reg64(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
+    binop_reg64_reg64(0x01, buf, dst, src);
 }

 /// `SUB r/m64,r64` -> Sub r64 to r/m64.
 #[inline(always)]
 fn sub_reg64_reg64(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
-    let rex = add_rm_extension(dst, REX_W);
-    let rex = add_reg_extension(src, rex);
-    let dst_mod = dst as u8 % 8;
-    let src_mod = (src as u8 % 8) << 3;
-    buf.extend(&[rex, 0x29, 0xC0 + dst_mod + src_mod]);
+    binop_reg64_reg64(0x29, buf, dst, src);
+}
+
+/// `CMP r/m64,r64` -> Compare r64 to r/m64.
+#[inline(always)]
+fn cmp_reg64_reg64(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
+    binop_reg64_reg64(0x39, buf, dst, src);
+}
+
+/// `XOR r/m64,r64` -> Xor r64 to r/m64.
+#[inline(always)]
+fn xor_reg64_reg64(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
+    binop_reg64_reg64(0x31, buf, dst, src);
 }

 /// `CMOVL r64,r/m64` -> Move if less (SF≠ OF).
@ -440,11 +463,7 @@ fn mov_reg64_imm64(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, imm: i64) {
 /// `MOV r/m64,r64` -> Move r64 to r/m64.
 #[inline(always)]
 fn mov_reg64_reg64(buf: &mut Vec<'_, u8>, dst: X86_64GPReg, src: X86_64GPReg) {
-    let rex = add_rm_extension(dst, REX_W);
-    let rex = add_reg_extension(src, rex);
-    let dst_mod = dst as u8 % 8;
-    let src_mod = (src as u8 % 8) << 3;
-    buf.extend(&[rex, 0x89, 0xC0 + dst_mod + src_mod]);
+    binop_reg64_reg64(0x89, buf, dst, src);
 }

 /// `MOV r64,r/m64` -> Move r/m64 to r64.
@ -481,6 +500,28 @@ fn neg_reg64(buf: &mut Vec<'_, u8>, reg: X86_64GPReg) {
    buf.extend(&[rex, 0xF7, 0xD8 + reg_mod]);
 }

+/// `SETE r/m64` -> Set Byte on Condition - zero/equal (ZF=1)
+#[inline(always)]
+fn sete_reg64(buf: &mut Vec<'_, u8>, reg: X86_64GPReg) {
+    // XOR needs 3 bytes, actual SETE instruction need 3 or 4 bytes
+    buf.reserve(7);
+
+    // We reset reg to 0 because the SETE instruction only applies
+    // to the lower bits of the register
+    xor_reg64_reg64(buf, reg, reg);
+
+    // Actually apply the SETE instruction
+    let reg_mod = reg as u8 % 8;
+    use X86_64GPReg::*;
+    match reg {
+        RAX | RCX | RDX | RBX => buf.extend(&[0x0F, 0x94, 0xC0 + reg_mod]),
+        RSP | RBP | RSI | RDI => buf.extend(&[REX, 0x0F, 0x94, 0xC0 + reg_mod]),
+        R8 | R9 | R10 | R11 | R12 | R13 | R14 | R15 => {
+            buf.extend(&[REX + 1, 0x0F, 0x94, 0xC0 + reg_mod])
+        }
+    }
+}
+
 /// `RET` -> Near return to calling procedure.
 #[inline(always)]
 fn ret(buf: &mut Vec<'_, u8>) {
@ -562,6 +603,22 @@ mod tests {
        }
    }

+    #[test]
+    fn test_xor_reg64_reg64() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+        for ((dst, src), expected) in &[
+            ((X86_64GPReg::RAX, X86_64GPReg::RAX), [0x48, 0x31, 0xC0]),
+            ((X86_64GPReg::RAX, X86_64GPReg::R15), [0x4C, 0x31, 0xF8]),
+            ((X86_64GPReg::R15, X86_64GPReg::RAX), [0x49, 0x31, 0xC7]),
+            ((X86_64GPReg::R15, X86_64GPReg::R15), [0x4D, 0x31, 0xFF]),
+        ] {
+            buf.clear();
+            xor_reg64_reg64(&mut buf, *dst, *src);
+            assert_eq!(expected, &buf[..]);
+        }
+    }
+
    #[test]
    fn test_cmovl_reg64_reg64() {
        let arena = bumpalo::Bump::new();
@ -689,6 +746,50 @@ mod tests {
        }
    }

+    #[test]
+    fn test_sete_reg64() {
+        let arena = bumpalo::Bump::new();
+        let mut buf = bumpalo::vec![in &arena];
+
+        // tests for 6 bytes in the output buffer
+        let (reg, expected) = (
+            X86_64GPReg::RAX,
+            [
+                0x48, 0x31, 0xC0, // XOR rax, rax
+                0x0F, 0x94, 0xC0, // SETE al ; al are the 8 lower weight bits of rax
+            ],
+        );
+        buf.clear();
+        sete_reg64(&mut buf, reg);
+        assert_eq!(expected, &buf[..]);
+
+        // tests for 7 bytes in the output buffer
+        for (reg, expected) in &[
+            (
+                X86_64GPReg::RSP,
+                [
+                    // XOR rsp, rsp
+                    0x48, 0x31, 0xE4,
+                    // SETE spl ; spl are the 8 lower weight bits of rsp
+                    0x40, 0x0F, 0x94, 0xC4,
+                ],
+            ),
+            (
+                X86_64GPReg::R15,
+                [
+                    // XOR r15, r15
+                    0x4D, 0x31, 0xFF,
+                    // SETE r15b ; r15b are the 8 lower weight bits of r15
+                    0x41, 0x0F, 0x94, 0xC7,
+                ],
+            ),
+        ] {
+            buf.clear();
+            sete_reg64(&mut buf, *reg);
+            assert_eq!(expected, &buf[..]);
+        }
+    }
+
    #[test]
    fn test_ret() {
        let arena = bumpalo::Bump::new();
--- a/compiler/gen_dev/src/lib.rs
+++ b/compiler/gen_dev/src/lib.rs
@ -24,7 +24,12 @@ pub struct Env<'a> {
 }

 // INLINED_SYMBOLS is a set of all of the functions we automatically inline if seen.
-const INLINED_SYMBOLS: [Symbol; 3] = [Symbol::NUM_ABS, Symbol::NUM_ADD, Symbol::NUM_SUB];
+const INLINED_SYMBOLS: [Symbol; 4] = [
+    Symbol::NUM_ABS,
+    Symbol::NUM_ADD,
+    Symbol::NUM_SUB,
+    Symbol::BOOL_EQ,
+];

 // These relocations likely will need a length.
 // They may even need more definition, but this should be at least good enough for how we will use elf.
@ -134,6 +139,10 @@ where
                                // Instead of calling the function, just inline it.
                                self.build_run_low_level(sym, &LowLevel::NumSub, arguments, layout)
                            }
+                            Symbol::BOOL_EQ => {
+                                // Instead of calling the function, just inline it.
+                                self.build_run_low_level(sym, &LowLevel::Eq, arguments, layout)
+                            }
                            x => Err(format!("the function, {:?}, is not yet implemented", x)),
                        }
                    }
@ -185,6 +194,12 @@ where
                    x => Err(format!("layout, {:?}, not implemented yet", x)),
                }
            }
+            LowLevel::Eq => match layout {
+                Layout::Builtin(Builtin::Int1) => self.build_eq_i64(sym, &args[0], &args[1]),
+                // Should we panic?
+                x => Err(format!("wrong layout, {:?}, for LowLevel::Eq", x)),
+            },
+
            x => Err(format!("low level, {:?}. is not yet implemented", x)),
        }
    }
@ -211,6 +226,10 @@ where
        src2: &Symbol,
    ) -> Result<(), String>;

+    /// build_eq_i64 stores the result of `src1 == src2` into dst.
+    /// It only deals with inputs and outputs of i64 type.
+    fn build_eq_i64(&mut self, dst: &Symbol, src1: &Symbol, src2: &Symbol) -> Result<(), String>;
+
    /// literal_map gets the map from symbol to literal, used for lazy loading and literal folding.
    fn literal_map(&mut self) -> &mut MutMap<Symbol, Literal<'a>>;

--- a/compiler/gen_dev/tests/gen_num.rs
+++ b/compiler/gen_dev/tests/gen_num.rs
@ -240,7 +240,7 @@ mod gen_num {
            f64
        );
    }
-
+    */
    #[test]
    fn gen_int_eq() {
        assert_evals_to!(
@ -253,7 +253,7 @@ mod gen_num {
            bool
        );
    }
-
+    /*
    #[test]
    fn gen_int_neq() {
        assert_evals_to!(