ZJIT: Fix side-exit panicking when there's too many locals

Previously, ARM64 panicked due to compiled_side_exits() when the memory displacement got large enough to exceed the 9 bits limit. Usually, we split these kind of memory operands, but compiled_side_exits() runs after split. Using scratch registers, implement `Insn::Store` on ARM such that it can handle large displacements without split(). Do this for x86 as well, and remove arch specific code from compiled_side_exits(). We can now run `TestKeywordArguments`. Since `Insn::Store` doesn't need splitting now, users enjoy lower register pressure. Downside is, using `Assembler::SCRATCH_REG` as a base register is now sometimes an error, depending on whether `Insn::Store` also needs to use the register. It seems a fair trade off since `SCRATCH_REG` is not often used, and we don't put it as a base register anywhere at the moment.
2025-08-15 13:39:04 +02:00 · 2025-08-01 14:54:52 -04:00 · 2025-08-01 14:54:52 -04:00 · afac226478
commit afac226478
parent f58fca7de0
4 changed files with 269 additions and 114 deletions
--- a/test/.excludes-zjit/TestKeywordArguments.rb
+++ b/test/.excludes-zjit/TestKeywordArguments.rb
@ -1 +0,0 @@
 exclude(/test_/, 'Multiple tests make ZJIT panic')
--- a/zjit/src/backend/arm64/mod.rs
+++ b/zjit/src/backend/arm64/mod.rs
@ -195,11 +195,15 @@ pub const ALLOC_REGS: &'static [Reg] = &[
 impl Assembler
 {
-    // Special scratch registers for intermediate processing.
+    /// Special scratch registers for intermediate processing.
-    // This register is caller-saved (so we don't have to save it before using it)
+    /// This register is call-clobbered (so we don't have to save it before using it).
    /// Avoid using if you can since this is used to lower [Insn] internally and
    /// so conflicts are possible.
    pub const SCRATCH_REG: Reg = X16_REG;
    const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG);
    const SCRATCH0_REG: Reg = Assembler::SCRATCH_REG;
    const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG);
    const SCRATCH1_REG: Reg = X17_REG;
    /// Get the list of registers from which we will allocate on this platform
    pub fn get_alloc_regs() -> Vec<Reg> {
@ -652,31 +656,6 @@ impl Assembler
                    *opnd = split_load_operand(asm, *opnd);
                    asm.push_insn(insn);
                },
                Insn::Store { dest, src } => {
                    // The value being stored must be in a register, so if it's
                    // not already one we'll load it first.
                    let opnd1 = match src {
                         // If the first operand is zero, then we can just use
                        // the zero register.
                        Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG),
                        // Otherwise we'll check if we need to load it first.
                        _ => split_load_operand(asm, *src)
                    };
                    match dest {
                        Opnd::Reg(_) => {
                            // Store does not support a register as a dest operand.
                            asm.mov(*dest, opnd1);
                        }
                        _ => {
                            // The displacement for the STUR instruction can't be more
                            // than 9 bits long. If it's longer, we need to load the
                            // memory address into a register first.
                            let opnd0 = split_memory_address(asm, *dest);
                            asm.store(opnd0, opnd1);
                        }
                    }
                },
                Insn::Mul { left, right, .. } => {
                    *left = split_load_operand(asm, *left);
                    *right = split_load_operand(asm, *right);
@ -843,6 +822,42 @@ impl Assembler
            }
        }
        /// Do the address calculation of `out_reg = base_reg + disp`
        fn load_effective_address(cb: &mut CodeBlock, out: A64Opnd, base_reg_no: u8, disp: i32) {
            let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no });
            assert_ne!(31, out.unwrap_reg().reg_no, "Lea sp, [sp, #imm] not always encodable. Use add/sub instead.");
            if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() {
                // Use ADD/SUB if the displacement fits
                add(cb, out, base_reg, A64Opnd::new_imm(disp.into()));
            } else {
                // Use add_extended() to interpret reg_no=31 as sp
                // since the base register is never the zero register.
                // Careful! Only the first two operands can refer to sp.
                emit_load_value(cb, out, disp as u64);
                add_extended(cb, out, base_reg, out);
            };
        }
        /// Load a VALUE to a register and remember it for GC marking and reference updating
        fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec<CodePtr>, dest: A64Opnd, value: VALUE) {
            // We dont need to check if it's a special const
            // here because we only allow these operands to hit
            // this point if they're not a special const.
            assert!(!value.special_const_p());
            // This assumes only load instructions can contain
            // references to GC'd Value operands. If the value
            // being loaded is a heap object, we'll report that
            // back out to the gc_offsets list.
            ldr_literal(cb, dest, 2.into());
            b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
            cb.write_bytes(&value.as_u64().to_le_bytes());
            let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
            gc_offsets.push(ptr_offset);
        }
        /// Emit a push instruction for the given operand by adding to the stack
        /// pointer and then storing the given value.
        fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) {
@ -1013,12 +1028,84 @@ impl Assembler
                Insn::LShift { opnd, shift, out } => {
                    lsl(cb, out.into(), opnd.into(), shift.into());
                },
-                Insn::Store { dest, src } => {
+                store_insn @ Insn::Store { dest, src } => {
                    // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are
                    // accepted. As a rule of thumb, avoid using Assembler::SCRATCH as a memory
                    // base register to gurantee things will work.
                    let &Opnd::Mem(Mem { num_bits: dest_num_bits, base: MemBase::Reg(base_reg_no), disp }) = dest else {
                        panic!("Unexpected Insn::Store destination in arm64_emit: {dest:?}");
                    };
                    // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
                    // so we panic to get the author to change their code.
                    #[track_caller]
                    fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
                        assert_ne!(
                            backend_use.reg_no,
                            user_use,
                            "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
                        );
                    }
                    // Split src into SCRATCH0 if necessary
                    let src_reg: A64Reg = match src {
                        Opnd::Reg(reg) => *reg,
                        // Use zero register when possible
                        Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG,
                        // Immediates
                        &Opnd::Imm(imm) => {
                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
                            emit_load_value(cb, Self::SCRATCH0, imm as u64);
                            Self::SCRATCH0_REG
                        }
                        &Opnd::UImm(imm) => {
                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
                            emit_load_value(cb, Self::SCRATCH0, imm);
                            Self::SCRATCH0_REG
                        }
                        &Opnd::Value(value) => {
                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
                            emit_load_gc_value(cb, &mut gc_offsets, Self::SCRATCH0, value);
                            Self::SCRATCH0_REG
                        }
                        src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => {
                            // For mem-to-mem store, load the source into SCRATCH0
                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
                            let src_mem = if mem_disp_fits_bits(src_disp) {
                                src_mem.into()
                            } else {
                                // Split the load address into SCRATCH0 first if necessary
                                assert_no_clobber(store_insn, src_base_reg_no, Self::SCRATCH0_REG);
                                load_effective_address(cb, Self::SCRATCH0, src_base_reg_no, src_disp);
                                A64Opnd::new_mem(dest_num_bits, Self::SCRATCH0, 0)
                            };
                            match src_num_bits {
                                64 | 32 => ldur(cb, Self::SCRATCH0, src_mem),
                                16 => ldurh(cb, Self::SCRATCH0, src_mem),
                                8 => ldurb(cb, Self::SCRATCH0, src_mem),
                                num_bits => panic!("unexpected num_bits: {num_bits}")
                            };
                            Self::SCRATCH0_REG
                        }
                        src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}")
                    };
                    let src = A64Opnd::Reg(src_reg);
                    // Split dest into SCRATCH1 if necessary.
                    let dest = if mem_disp_fits_bits(disp) {
                        dest.into()
                    } else {
                        assert_no_clobber(store_insn, src_reg.reg_no, Self::SCRATCH1_REG);
                        assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH1_REG);
                        load_effective_address(cb, Self::SCRATCH1, base_reg_no, disp);
                        A64Opnd::new_mem(dest_num_bits, Self::SCRATCH1, 0)
                    };
                    // This order may be surprising but it is correct. The way
                    // the Arm64 assembler works, the register that is going to
                    // be stored is first and the address is second. However in
                    // our IR we have the address first and the register second.
-                    match dest.rm_num_bits() {
+                    match dest_num_bits {
                        64 | 32 => stur(cb, src.into(), dest.into()),
                        16 => sturh(cb, src.into(), dest.into()),
                        num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest),
@ -1045,21 +1132,7 @@ impl Assembler
                            };
                        },
                        Opnd::Value(value) => {
-                            // We dont need to check if it's a special const
+                            emit_load_gc_value(cb, &mut gc_offsets, out.into(), value);
                            // here because we only allow these operands to hit
                            // this point if they're not a special const.
                            assert!(!value.special_const_p());
                            // This assumes only load instructions can contain
                            // references to GC'd Value operands. If the value
                            // being loaded is a heap object, we'll report that
                            // back out to the gc_offsets list.
                            ldr_literal(cb, out.into(), 2.into());
                            b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
                            cb.write_bytes(&value.as_u64().to_le_bytes());
                            let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
                            gc_offsets.push(ptr_offset);
                        },
                        Opnd::None => {
                            unreachable!("Attempted to load from None operand");
@ -1097,20 +1170,7 @@ impl Assembler
                    let &Opnd::Mem(Mem { num_bits: _, base: MemBase::Reg(base_reg_no), disp }) = opnd else {
                        panic!("Unexpected Insn::Lea operand in arm64_emit: {opnd:?}");
                    };
-                    let out: A64Opnd = out.into();
+                    load_effective_address(cb, out.into(), base_reg_no, disp);
                    let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no });
                    assert_ne!(31, out.unwrap_reg().reg_no, "Insn::Lea sp, [sp, #imm] not always encodable. Use add/sub instead.");
                    if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() {
                        // Use ADD/SUB if the displacement fits
                        add(cb, out, base_reg, A64Opnd::new_imm(disp.into()));
                    } else {
                        // Use add_extended() to interpret reg_no=31 as sp
                        // since the base register is never the zero register.
                        // Careful! Only the first two operands can refer to sp.
                        emit_load_value(cb, out, disp as u64);
                        add_extended(cb, out, base_reg, out);
                    };
                }
                Insn::LeaJumpTarget { out, target, .. } => {
                    if let Target::Label(label_idx) = target {
@ -1627,6 +1687,64 @@ mod tests {
        ");
    }
    #[test]
    fn test_store() {
        let (mut asm, mut cb) = setup_asm();
        // Large memory offsets in combinations of destination and source
        let large_mem = Opnd::mem(64, NATIVE_STACK_PTR, -0x305);
        let small_mem = Opnd::mem(64, C_RET_OPND, 0);
        asm.store(small_mem, large_mem);
        asm.store(large_mem, small_mem);
        asm.store(large_mem, large_mem);
        asm.compile_with_num_regs(&mut cb, 0);
        assert_disasm!(cb, "f0170cd1100240f8100000f8100040f8f1170cd1300200f8f0170cd1100240f8f1170cd1300200f8", "
            0x0: sub x16, sp, #0x305
            0x4: ldur x16, [x16]
            0x8: stur x16, [x0]
            0xc: ldur x16, [x0]
            0x10: sub x17, sp, #0x305
            0x14: stur x16, [x17]
            0x18: sub x16, sp, #0x305
            0x1c: ldur x16, [x16]
            0x20: sub x17, sp, #0x305
            0x24: stur x16, [x17]
        ");
    }
    #[test]
    fn test_store_value_without_split() {
        let (mut asm, mut cb) = setup_asm();
        let imitation_heap_value = VALUE(0x1000);
        assert!(imitation_heap_value.heap_object_p());
        asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into());
        // Side exit code are compiled without the split pass, so we directly call emit here to
        // emulate that scenario.
        let gc_offsets = asm.arm64_emit(&mut cb).unwrap();
        assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset");
        assert_disasm!(cb, "50000058030000140010000000000000b00200f8", "
            0x0: ldr x16, #8
            0x4: b #0x10
            0x8: .byte 0x00, 0x10, 0x00, 0x00
            0xc: .byte 0x00, 0x00, 0x00, 0x00
            0x10: stur x16, [x21]
        ");
    }
    #[test]
    #[should_panic]
    fn test_store_unserviceable() {
        let (mut asm, mut cb) = setup_asm();
        // This would put the source into SCRATCH_REG, messing up the destination
        asm.store(Opnd::mem(64, Opnd::Reg(Assembler::SCRATCH_REG), 0), 0x83902.into());
        asm.compile_with_num_regs(&mut cb, 0);
    }
    /*
    #[test]
    fn test_emit_lea_label() {
--- a/zjit/src/backend/lir.rs
+++ b/zjit/src/backend/lir.rs
@ -1822,29 +1822,16 @@ impl Assembler
                };
                self.write_label(side_exit_label.clone());
                // Load an operand that cannot be used as a source of Insn::Store
                fn split_store_source(asm: &mut Assembler, opnd: Opnd) -> Opnd {
                    if matches!(opnd, Opnd::Mem(_) | Opnd::Value(_)) ||
                        (cfg!(target_arch = "aarch64") && matches!(opnd, Opnd::UImm(_))) {
                        asm.load_into(Opnd::Reg(Assembler::SCRATCH_REG), opnd);
                        Opnd::Reg(Assembler::SCRATCH_REG)
                    } else {
                        opnd
                    }
                }
                // Restore the PC and the stack for regular side exits. We don't do this for
                // side exits right after JIT-to-JIT calls, which restore them before the call.
                if let Some(SideExitContext { pc, stack, locals }) = context {
                    asm_comment!(self, "write stack slots: {stack:?}");
                    for (idx, &opnd) in stack.iter().enumerate() {
                        let opnd = split_store_source(self, opnd);
                        self.store(Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32), opnd);
                    }
                    asm_comment!(self, "write locals: {locals:?}");
                    for (idx, &opnd) in locals.iter().enumerate() {
                        let opnd = split_store_source(self, opnd);
                        self.store(Opnd::mem(64, SP, (-local_size_and_idx_to_ep_offset(locals.len(), idx) - 1) * SIZEOF_VALUE_I32), opnd);
                    }
--- a/zjit/src/backend/x86_64/mod.rs
+++ b/zjit/src/backend/x86_64/mod.rs
@ -96,8 +96,10 @@ pub const ALLOC_REGS: &'static [Reg] = &[
 impl Assembler
 {
-    // A special scratch register for intermediate processing.
+    /// Special scratch registers for intermediate processing.
-    // This register is caller-saved (so we don't have to save it before using it)
+    /// This register is call-clobbered (so we don't have to save it before using it).
    /// Avoid using if you can since this is used to lower [Insn] internally and
    /// so conflicts are possible.
    pub const SCRATCH_REG: Reg = R11_REG;
    const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG);
@ -293,38 +295,11 @@ impl Assembler
                    asm.push_insn(insn);
                },
-                Insn::Mov { dest, src } | Insn::Store { dest, src } => {
+                Insn::Mov { dest, src } => {
-                    match (&dest, &src) {
+                    if let Opnd::Mem(_) = dest {
-                        (Opnd::Mem(_), Opnd::Mem(_)) => {
+                        asm.store(*dest, *src);
-                            // We load opnd1 because for mov, opnd0 is the output
+                    } else {
-                            let opnd1 = asm.load(*src);
+                        asm.mov(*dest, *src);
                            asm.mov(*dest, opnd1);
                        },
                        (Opnd::Mem(Mem { num_bits, .. }), Opnd::UImm(value)) => {
                            // For 64 bit destinations, 32-bit values will be sign-extended
                            if *num_bits == 64 && imm_num_bits(*value as i64) > 32 {
                                let opnd1 = asm.load(*src);
                                asm.mov(*dest, opnd1);
                            } else {
                                asm.mov(*dest, *src);
                            }
                        },
                        (Opnd::Mem(Mem { num_bits, .. }), Opnd::Imm(value)) => {
                            // For 64 bit destinations, 32-bit values will be sign-extended
                            if *num_bits == 64 && imm_num_bits(*value) > 32 {
                                let opnd1 = asm.load(*src);
                                asm.mov(*dest, opnd1);
                            } else if uimm_num_bits(*value as u64) <= *num_bits {
                                // If the bit string is short enough for the destination, use the unsigned representation.
                                // Note that 64-bit and negative values are ruled out.
                                asm.mov(*dest, Opnd::UImm(*value as u64));
                            } else {
                                asm.mov(*dest, *src);
                            }
                        },
                        _ => {
                            asm.mov(*dest, *src);
                        }
                    }
                },
                Insn::Not { opnd, .. } => {
@ -440,6 +415,14 @@ impl Assembler
            }
        }
        fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec<CodePtr>, dest_reg: X86Opnd, value: VALUE) {
            // Using movabs because mov might write value in 32 bits
            movabs(cb, dest_reg, value.0 as _);
            // The pointer immediate is encoded as the last part of the mov written out
            let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
            gc_offsets.push(ptr_offset);
        }
        // List of GC offsets
        let mut gc_offsets: Vec<CodePtr> = Vec::new();
@ -552,20 +535,71 @@ impl Assembler
                    shr(cb, opnd.into(), shift.into())
                },
-                Insn::Store { dest, src } => {
+                store_insn @ Insn::Store { dest, src } => {
-                    mov(cb, dest.into(), src.into());
+                    let &Opnd::Mem(Mem { num_bits, base: MemBase::Reg(base_reg_no), disp: _ }) = dest else {
-                },
+                        panic!("Unexpected Insn::Store destination in x64_emit: {dest:?}");
                    };
                    // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
                    // so we panic to get the author to change their code.
                    #[track_caller]
                    fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
                        assert_ne!(
                            backend_use.reg_no,
                            user_use,
                            "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
                        );
                    }
                    let scratch = X86Opnd::Reg(Self::SCRATCH_REG);
                    let src = match src {
                        Opnd::Reg(_) => src.into(),
                        &Opnd::Mem(_) => {
                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
                            mov(cb, scratch, src.into());
                            scratch
                        }
                        &Opnd::Imm(imm) => {
                            // For 64 bit destinations, 32-bit values will be sign-extended
                            if num_bits == 64 && imm_num_bits(imm) > 32 {
                                assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
                                mov(cb, scratch, src.into());
                                scratch
                            } else if uimm_num_bits(imm as u64) <= num_bits {
                                // If the bit string is short enough for the destination, use the unsigned representation.
                                // Note that 64-bit and negative values are ruled out.
                                uimm_opnd(imm as u64)
                            } else {
                                src.into()
                            }
                        }
                        &Opnd::UImm(imm) => {
                            // For 64 bit destinations, 32-bit values will be sign-extended
                            if num_bits == 64 && imm_num_bits(imm as i64) > 32 {
                                assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
                                mov(cb, scratch, src.into());
                                scratch
                            } else {
                                src.into()
                            }
                        }
                        &Opnd::Value(value) => {
                            assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
                            emit_load_gc_value(cb, &mut gc_offsets, scratch, value);
                            scratch
                        }
                        src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_emit: {src:?}")
                    };
                    mov(cb, dest.into(), src);
                }
                // This assumes only load instructions can contain references to GC'd Value operands
                Insn::Load { opnd, out } |
                Insn::LoadInto { dest: out, opnd } => {
                    match opnd {
                        Opnd::Value(val) if val.heap_object_p() => {
-                            // Using movabs because mov might write value in 32 bits
+                            emit_load_gc_value(cb, &mut gc_offsets, out.into(), *val);
                            movabs(cb, out.into(), val.0 as _);
                            // The pointer immediate is encoded as the last part of the mov written out
                            let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
                            gc_offsets.push(ptr_offset);
                        }
                        _ => mov(cb, out.into(), opnd.into())
                    }
@ -1352,4 +1386,21 @@ mod tests {
            0x29: pop rbp
        "});
    }
    #[test]
    fn test_store_value_without_split() {
        let (mut asm, mut cb) = setup_asm();
        let imitation_heap_value = VALUE(0x1000);
        assert!(imitation_heap_value.heap_object_p());
        asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into());
        let gc_offsets = asm.x86_emit(&mut cb).unwrap();
        assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset");
        assert_disasm!(cb, "49bb00100000000000004c891b", "
            0x0: movabs r11, 0x1000
            0xa: mov qword ptr [rbx], r11
        ");
    }
 }
		`@ -1 +0,0 @@`
			`exclude(/test_/, 'Multiple tests make ZJIT panic')`