ZJIT: A64: Use MOVN for small negative immediates

Save a couple instructions to load a small negative constant into a register. In fact MOVN is speced to alias as `mov` in the official disassembly.
2025-08-15 13:39:04 +02:00 · 2025-08-01 16:50:10 -04:00 · 2025-08-01 16:50:10 -04:00 · f58fca7de0
commit f58fca7de0
parent faa67506e5
3 changed files with 74 additions and 15 deletions
--- a/zjit/src/asm/arm64/inst/mov.rs
+++ b/zjit/src/asm/arm64/inst/mov.rs
@ -2,6 +2,9 @@ use super::super::arg::Sf;
 /// Which operation is being performed.
 enum Op {
    /// A movn operation which inverts the immediate and zeroes out the other bits.
    MOVN = 0b00,
    /// A movz operation which zeroes out the other bits.
    MOVZ = 0b10,
@ -61,6 +64,12 @@ impl Mov {
        Self { rd, imm16, hw: hw.into(), op: Op::MOVK, sf: num_bits.into() }
    }
    /// MOVN
    /// <https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/MOVN--Move-wide-with-NOT->
    pub fn movn(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self {
        Self { rd, imm16, hw: hw.into(), op: Op::MOVN, sf: num_bits.into() }
    }
    /// MOVZ
    /// <https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/MOVZ--Move-wide-with-zero-?lang=en>
    pub fn movz(rd: u8, imm16: u16, hw: u8, num_bits: u8) -> Self {
@ -104,6 +113,34 @@ mod tests {
        assert_eq!(0xf2800f60, result);
    }
    #[test]
    fn test_movn_unshifted() {
        let inst = Mov::movn(0, 123, 0, 64);
        let result: u32 = inst.into();
        assert_eq!(0x92800f60, result);
    }
    #[test]
    fn test_movn_shifted_16() {
        let inst = Mov::movn(0, 123, 16, 64);
        let result: u32 = inst.into();
        assert_eq!(0x92a00f60, result);
    }
    #[test]
    fn test_movn_shifted_32() {
        let inst = Mov::movn(0, 123, 32, 64);
        let result: u32 = inst.into();
        assert_eq!(0x92c00f60, result);
    }
    #[test]
    fn test_movn_shifted_48() {
        let inst = Mov::movn(0, 123, 48, 64);
        let result: u32 = inst.into();
        assert_eq!(0x92e00f60, result);
    }
    #[test]
    fn test_movk_shifted_16() {
        let inst = Mov::movk(0, 123, 16, 64);
--- a/zjit/src/asm/arm64/mod.rs
+++ b/zjit/src/asm/arm64/mod.rs
@ -716,6 +716,21 @@ pub fn movk(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
    cb.write_bytes(&bytes);
 }
 /// MOVN - load a register with the complement of a shifted then zero extended 16-bit immediate
 /// <https://developer.arm.com/documentation/ddi0602/2025-06/Base-Instructions/MOVN--Move-wide-with-NOT->
 pub fn movn(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
    let bytes: [u8; 4] = match (rd, imm16) {
        (A64Opnd::Reg(rd), A64Opnd::UImm(imm16)) => {
            assert!(uimm_fits_bits(imm16, 16), "The immediate operand must be 16 bits or less.");
            Mov::movn(rd.reg_no, imm16 as u16, shift, rd.num_bits).into()
        },
        _ => panic!("Invalid operand combination to movn instruction.")
    };
    cb.write_bytes(&bytes);
 }
 /// MOVZ - move a 16 bit immediate into a register, zero the other bits
 pub fn movz(cb: &mut CodeBlock, rd: A64Opnd, imm16: A64Opnd, shift: u8) {
    let bytes: [u8; 4] = match (rd, imm16) {
@ -1543,6 +1558,11 @@ mod tests {
        check_bytes("600fa0f2", |cb| movk(cb, X0, A64Opnd::new_uimm(123), 16));
    }
    #[test]
    fn test_movn() {
        check_bytes("600fa092", |cb| movn(cb, X0, A64Opnd::new_uimm(123), 16));
    }
    #[test]
    fn test_movz() {
        check_bytes("600fa0d2", |cb| movz(cb, X0, A64Opnd::new_uimm(123), 16));
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@ -140,6 +140,10 @@ fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize {
        // instruction, then we'll use that.
        movz(cb, rd, A64Opnd::new_uimm(current), 0);
        return 1;
    } else if u16::try_from(!value).is_ok() {
        // For small negative values, use a single movn
        movn(cb, rd, A64Opnd::new_uimm(!value), 0);
        return 1;
    } else if BitmaskImmediate::try_from(current).is_ok() {
        // Otherwise, if the immediate can be encoded
        // with the special bitmask immediate encoding,
@ -1592,15 +1596,16 @@ mod tests {
        // Test values that exercise various types of immediates.
        //  - 9 bit displacement for Load/Store
-        //  - 12 bit shifted immediate
+        //  - 12 bit ADD/SUB shifted immediate
        //  - 16 bit MOV family shifted immediates
        //  - bit mask immediates
-        for displacement in [i32::MAX, 0x10008, 0x1800, 0x208, -0x208, -0x1800, -0x1008, i32::MIN] {
+        for displacement in [i32::MAX, 0x10008, 0x1800, 0x208, -0x208, -0x1800, -0x10008, i32::MIN] {
            let mem = Opnd::mem(64, NATIVE_STACK_PTR, displacement);
            asm.lea_into(Opnd::Reg(X0_REG), mem);
        }
        asm.compile_with_num_regs(&mut cb, 0);
-        assert_disasm!(cb, "e07b40b2e063208b000180d22000a0f2e063208b000083d2e063208be0230891e02308d100009dd2e0ffbff2e0ffdff2e0fffff2e063208b00ff9dd2e0ffbff2e0ffdff2e0fffff2e063208be08361b2e063208b", "
+        assert_disasm!(cb, "e07b40b2e063208b000180d22000a0f2e063208b000083d2e063208be0230891e02308d1e0ff8292e063208b00ff9fd2c0ffbff2e0ffdff2e0fffff2e063208be08361b2e063208b", "
            0x0: orr x0, xzr, #0x7fffffff
            0x4: add x0, sp, x0
            0x8: mov x0, #8
@ -1610,18 +1615,15 @@ mod tests {
            0x18: add x0, sp, x0
            0x1c: add x0, sp, #0x208
            0x20: sub x0, sp, #0x208
-            0x24: mov x0, #0xe800
+            0x24: mov x0, #-0x1800
-            0x28: movk x0, #0xffff, lsl #16
+            0x28: add x0, sp, x0
-            0x2c: movk x0, #0xffff, lsl #32
+            0x2c: mov x0, #0xfff8
-            0x30: movk x0, #0xffff, lsl #48
+            0x30: movk x0, #0xfffe, lsl #16
-            0x34: add x0, sp, x0
+            0x34: movk x0, #0xffff, lsl #32
-            0x38: mov x0, #0xeff8
+            0x38: movk x0, #0xffff, lsl #48
-            0x3c: movk x0, #0xffff, lsl #16
+            0x3c: add x0, sp, x0
-            0x40: movk x0, #0xffff, lsl #32
+            0x40: orr x0, xzr, #0xffffffff80000000
-            0x44: movk x0, #0xffff, lsl #48
+            0x44: add x0, sp, x0
            0x48: add x0, sp, x0
            0x4c: orr x0, xzr, #0xffffffff80000000
            0x50: add x0, sp, x0
        ");
    }