From afac22647852439e7b3563216afac7b9491eaa0b Mon Sep 17 00:00:00 2001 From: Alan Wu Date: Fri, 1 Aug 2025 14:54:52 -0400 Subject: [PATCH] ZJIT: Fix side-exit panicking when there's too many locals Previously, ARM64 panicked due to compiled_side_exits() when the memory displacement got large enough to exceed the 9 bits limit. Usually, we split these kind of memory operands, but compiled_side_exits() runs after split. Using scratch registers, implement `Insn::Store` on ARM such that it can handle large displacements without split(). Do this for x86 as well, and remove arch specific code from compiled_side_exits(). We can now run `TestKeywordArguments`. Since `Insn::Store` doesn't need splitting now, users enjoy lower register pressure. Downside is, using `Assembler::SCRATCH_REG` as a base register is now sometimes an error, depending on whether `Insn::Store` also needs to use the register. It seems a fair trade off since `SCRATCH_REG` is not often used, and we don't put it as a base register anywhere at the moment. --- test/.excludes-zjit/TestKeywordArguments.rb | 1 - zjit/src/backend/arm64/mod.rs | 234 +++++++++++++++----- zjit/src/backend/lir.rs | 13 -- zjit/src/backend/x86_64/mod.rs | 135 +++++++---- 4 files changed, 269 insertions(+), 114 deletions(-) delete mode 100644 test/.excludes-zjit/TestKeywordArguments.rb diff --git a/test/.excludes-zjit/TestKeywordArguments.rb b/test/.excludes-zjit/TestKeywordArguments.rb deleted file mode 100644 index f52bdf6d30..0000000000 --- a/test/.excludes-zjit/TestKeywordArguments.rb +++ /dev/null @@ -1 +0,0 @@ -exclude(/test_/, 'Multiple tests make ZJIT panic') diff --git a/zjit/src/backend/arm64/mod.rs b/zjit/src/backend/arm64/mod.rs index 8ba7a33de5..2025e444ae 100644 --- a/zjit/src/backend/arm64/mod.rs +++ b/zjit/src/backend/arm64/mod.rs @@ -195,11 +195,15 @@ pub const ALLOC_REGS: &'static [Reg] = &[ impl Assembler { - // Special scratch registers for intermediate processing. - // This register is caller-saved (so we don't have to save it before using it) + /// Special scratch registers for intermediate processing. + /// This register is call-clobbered (so we don't have to save it before using it). + /// Avoid using if you can since this is used to lower [Insn] internally and + /// so conflicts are possible. pub const SCRATCH_REG: Reg = X16_REG; const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG); + const SCRATCH0_REG: Reg = Assembler::SCRATCH_REG; const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG); + const SCRATCH1_REG: Reg = X17_REG; /// Get the list of registers from which we will allocate on this platform pub fn get_alloc_regs() -> Vec { @@ -652,31 +656,6 @@ impl Assembler *opnd = split_load_operand(asm, *opnd); asm.push_insn(insn); }, - Insn::Store { dest, src } => { - // The value being stored must be in a register, so if it's - // not already one we'll load it first. - let opnd1 = match src { - // If the first operand is zero, then we can just use - // the zero register. - Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), - // Otherwise we'll check if we need to load it first. - _ => split_load_operand(asm, *src) - }; - - match dest { - Opnd::Reg(_) => { - // Store does not support a register as a dest operand. - asm.mov(*dest, opnd1); - } - _ => { - // The displacement for the STUR instruction can't be more - // than 9 bits long. If it's longer, we need to load the - // memory address into a register first. - let opnd0 = split_memory_address(asm, *dest); - asm.store(opnd0, opnd1); - } - } - }, Insn::Mul { left, right, .. } => { *left = split_load_operand(asm, *left); *right = split_load_operand(asm, *right); @@ -843,6 +822,42 @@ impl Assembler } } + /// Do the address calculation of `out_reg = base_reg + disp` + fn load_effective_address(cb: &mut CodeBlock, out: A64Opnd, base_reg_no: u8, disp: i32) { + let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no }); + assert_ne!(31, out.unwrap_reg().reg_no, "Lea sp, [sp, #imm] not always encodable. Use add/sub instead."); + + if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() { + // Use ADD/SUB if the displacement fits + add(cb, out, base_reg, A64Opnd::new_imm(disp.into())); + } else { + // Use add_extended() to interpret reg_no=31 as sp + // since the base register is never the zero register. + // Careful! Only the first two operands can refer to sp. + emit_load_value(cb, out, disp as u64); + add_extended(cb, out, base_reg, out); + }; + } + + /// Load a VALUE to a register and remember it for GC marking and reference updating + fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec, dest: A64Opnd, value: VALUE) { + // We dont need to check if it's a special const + // here because we only allow these operands to hit + // this point if they're not a special const. + assert!(!value.special_const_p()); + + // This assumes only load instructions can contain + // references to GC'd Value operands. If the value + // being loaded is a heap object, we'll report that + // back out to the gc_offsets list. + ldr_literal(cb, dest, 2.into()); + b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); + cb.write_bytes(&value.as_u64().to_le_bytes()); + + let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); + gc_offsets.push(ptr_offset); + } + /// Emit a push instruction for the given operand by adding to the stack /// pointer and then storing the given value. fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { @@ -1013,12 +1028,84 @@ impl Assembler Insn::LShift { opnd, shift, out } => { lsl(cb, out.into(), opnd.into(), shift.into()); }, - Insn::Store { dest, src } => { + store_insn @ Insn::Store { dest, src } => { + // With minor exceptions, as long as `dest` is a Mem, all forms of `src` are + // accepted. As a rule of thumb, avoid using Assembler::SCRATCH as a memory + // base register to gurantee things will work. + let &Opnd::Mem(Mem { num_bits: dest_num_bits, base: MemBase::Reg(base_reg_no), disp }) = dest else { + panic!("Unexpected Insn::Store destination in arm64_emit: {dest:?}"); + }; + + // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG, + // so we panic to get the author to change their code. + #[track_caller] + fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) { + assert_ne!( + backend_use.reg_no, + user_use, + "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics" + ); + } + + // Split src into SCRATCH0 if necessary + let src_reg: A64Reg = match src { + Opnd::Reg(reg) => *reg, + // Use zero register when possible + Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG, + // Immediates + &Opnd::Imm(imm) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + emit_load_value(cb, Self::SCRATCH0, imm as u64); + Self::SCRATCH0_REG + } + &Opnd::UImm(imm) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + emit_load_value(cb, Self::SCRATCH0, imm); + Self::SCRATCH0_REG + } + &Opnd::Value(value) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + emit_load_gc_value(cb, &mut gc_offsets, Self::SCRATCH0, value); + Self::SCRATCH0_REG + } + src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => { + // For mem-to-mem store, load the source into SCRATCH0 + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG); + let src_mem = if mem_disp_fits_bits(src_disp) { + src_mem.into() + } else { + // Split the load address into SCRATCH0 first if necessary + assert_no_clobber(store_insn, src_base_reg_no, Self::SCRATCH0_REG); + load_effective_address(cb, Self::SCRATCH0, src_base_reg_no, src_disp); + A64Opnd::new_mem(dest_num_bits, Self::SCRATCH0, 0) + }; + match src_num_bits { + 64 | 32 => ldur(cb, Self::SCRATCH0, src_mem), + 16 => ldurh(cb, Self::SCRATCH0, src_mem), + 8 => ldurb(cb, Self::SCRATCH0, src_mem), + num_bits => panic!("unexpected num_bits: {num_bits}") + }; + Self::SCRATCH0_REG + } + src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}") + }; + let src = A64Opnd::Reg(src_reg); + + // Split dest into SCRATCH1 if necessary. + let dest = if mem_disp_fits_bits(disp) { + dest.into() + } else { + assert_no_clobber(store_insn, src_reg.reg_no, Self::SCRATCH1_REG); + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH1_REG); + load_effective_address(cb, Self::SCRATCH1, base_reg_no, disp); + A64Opnd::new_mem(dest_num_bits, Self::SCRATCH1, 0) + }; + // This order may be surprising but it is correct. The way // the Arm64 assembler works, the register that is going to // be stored is first and the address is second. However in // our IR we have the address first and the register second. - match dest.rm_num_bits() { + match dest_num_bits { 64 | 32 => stur(cb, src.into(), dest.into()), 16 => sturh(cb, src.into(), dest.into()), num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest), @@ -1045,21 +1132,7 @@ impl Assembler }; }, Opnd::Value(value) => { - // We dont need to check if it's a special const - // here because we only allow these operands to hit - // this point if they're not a special const. - assert!(!value.special_const_p()); - - // This assumes only load instructions can contain - // references to GC'd Value operands. If the value - // being loaded is a heap object, we'll report that - // back out to the gc_offsets list. - ldr_literal(cb, out.into(), 2.into()); - b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); - cb.write_bytes(&value.as_u64().to_le_bytes()); - - let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); - gc_offsets.push(ptr_offset); + emit_load_gc_value(cb, &mut gc_offsets, out.into(), value); }, Opnd::None => { unreachable!("Attempted to load from None operand"); @@ -1097,20 +1170,7 @@ impl Assembler let &Opnd::Mem(Mem { num_bits: _, base: MemBase::Reg(base_reg_no), disp }) = opnd else { panic!("Unexpected Insn::Lea operand in arm64_emit: {opnd:?}"); }; - let out: A64Opnd = out.into(); - let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no }); - assert_ne!(31, out.unwrap_reg().reg_no, "Insn::Lea sp, [sp, #imm] not always encodable. Use add/sub instead."); - - if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() { - // Use ADD/SUB if the displacement fits - add(cb, out, base_reg, A64Opnd::new_imm(disp.into())); - } else { - // Use add_extended() to interpret reg_no=31 as sp - // since the base register is never the zero register. - // Careful! Only the first two operands can refer to sp. - emit_load_value(cb, out, disp as u64); - add_extended(cb, out, base_reg, out); - }; + load_effective_address(cb, out.into(), base_reg_no, disp); } Insn::LeaJumpTarget { out, target, .. } => { if let Target::Label(label_idx) = target { @@ -1627,6 +1687,64 @@ mod tests { "); } + #[test] + fn test_store() { + let (mut asm, mut cb) = setup_asm(); + + // Large memory offsets in combinations of destination and source + let large_mem = Opnd::mem(64, NATIVE_STACK_PTR, -0x305); + let small_mem = Opnd::mem(64, C_RET_OPND, 0); + asm.store(small_mem, large_mem); + asm.store(large_mem, small_mem); + asm.store(large_mem, large_mem); + + asm.compile_with_num_regs(&mut cb, 0); + assert_disasm!(cb, "f0170cd1100240f8100000f8100040f8f1170cd1300200f8f0170cd1100240f8f1170cd1300200f8", " + 0x0: sub x16, sp, #0x305 + 0x4: ldur x16, [x16] + 0x8: stur x16, [x0] + 0xc: ldur x16, [x0] + 0x10: sub x17, sp, #0x305 + 0x14: stur x16, [x17] + 0x18: sub x16, sp, #0x305 + 0x1c: ldur x16, [x16] + 0x20: sub x17, sp, #0x305 + 0x24: stur x16, [x17] + "); + } + + #[test] + fn test_store_value_without_split() { + let (mut asm, mut cb) = setup_asm(); + + let imitation_heap_value = VALUE(0x1000); + assert!(imitation_heap_value.heap_object_p()); + asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into()); + + // Side exit code are compiled without the split pass, so we directly call emit here to + // emulate that scenario. + let gc_offsets = asm.arm64_emit(&mut cb).unwrap(); + assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset"); + + assert_disasm!(cb, "50000058030000140010000000000000b00200f8", " + 0x0: ldr x16, #8 + 0x4: b #0x10 + 0x8: .byte 0x00, 0x10, 0x00, 0x00 + 0xc: .byte 0x00, 0x00, 0x00, 0x00 + 0x10: stur x16, [x21] + "); + } + + #[test] + #[should_panic] + fn test_store_unserviceable() { + let (mut asm, mut cb) = setup_asm(); + // This would put the source into SCRATCH_REG, messing up the destination + asm.store(Opnd::mem(64, Opnd::Reg(Assembler::SCRATCH_REG), 0), 0x83902.into()); + + asm.compile_with_num_regs(&mut cb, 0); + } + /* #[test] fn test_emit_lea_label() { diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index 36e783bd4e..b910052dae 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -1822,29 +1822,16 @@ impl Assembler }; self.write_label(side_exit_label.clone()); - // Load an operand that cannot be used as a source of Insn::Store - fn split_store_source(asm: &mut Assembler, opnd: Opnd) -> Opnd { - if matches!(opnd, Opnd::Mem(_) | Opnd::Value(_)) || - (cfg!(target_arch = "aarch64") && matches!(opnd, Opnd::UImm(_))) { - asm.load_into(Opnd::Reg(Assembler::SCRATCH_REG), opnd); - Opnd::Reg(Assembler::SCRATCH_REG) - } else { - opnd - } - } - // Restore the PC and the stack for regular side exits. We don't do this for // side exits right after JIT-to-JIT calls, which restore them before the call. if let Some(SideExitContext { pc, stack, locals }) = context { asm_comment!(self, "write stack slots: {stack:?}"); for (idx, &opnd) in stack.iter().enumerate() { - let opnd = split_store_source(self, opnd); self.store(Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32), opnd); } asm_comment!(self, "write locals: {locals:?}"); for (idx, &opnd) in locals.iter().enumerate() { - let opnd = split_store_source(self, opnd); self.store(Opnd::mem(64, SP, (-local_size_and_idx_to_ep_offset(locals.len(), idx) - 1) * SIZEOF_VALUE_I32), opnd); } diff --git a/zjit/src/backend/x86_64/mod.rs b/zjit/src/backend/x86_64/mod.rs index 4543252573..d21c7ee09c 100644 --- a/zjit/src/backend/x86_64/mod.rs +++ b/zjit/src/backend/x86_64/mod.rs @@ -96,8 +96,10 @@ pub const ALLOC_REGS: &'static [Reg] = &[ impl Assembler { - // A special scratch register for intermediate processing. - // This register is caller-saved (so we don't have to save it before using it) + /// Special scratch registers for intermediate processing. + /// This register is call-clobbered (so we don't have to save it before using it). + /// Avoid using if you can since this is used to lower [Insn] internally and + /// so conflicts are possible. pub const SCRATCH_REG: Reg = R11_REG; const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); @@ -293,38 +295,11 @@ impl Assembler asm.push_insn(insn); }, - Insn::Mov { dest, src } | Insn::Store { dest, src } => { - match (&dest, &src) { - (Opnd::Mem(_), Opnd::Mem(_)) => { - // We load opnd1 because for mov, opnd0 is the output - let opnd1 = asm.load(*src); - asm.mov(*dest, opnd1); - }, - (Opnd::Mem(Mem { num_bits, .. }), Opnd::UImm(value)) => { - // For 64 bit destinations, 32-bit values will be sign-extended - if *num_bits == 64 && imm_num_bits(*value as i64) > 32 { - let opnd1 = asm.load(*src); - asm.mov(*dest, opnd1); - } else { - asm.mov(*dest, *src); - } - }, - (Opnd::Mem(Mem { num_bits, .. }), Opnd::Imm(value)) => { - // For 64 bit destinations, 32-bit values will be sign-extended - if *num_bits == 64 && imm_num_bits(*value) > 32 { - let opnd1 = asm.load(*src); - asm.mov(*dest, opnd1); - } else if uimm_num_bits(*value as u64) <= *num_bits { - // If the bit string is short enough for the destination, use the unsigned representation. - // Note that 64-bit and negative values are ruled out. - asm.mov(*dest, Opnd::UImm(*value as u64)); - } else { - asm.mov(*dest, *src); - } - }, - _ => { - asm.mov(*dest, *src); - } + Insn::Mov { dest, src } => { + if let Opnd::Mem(_) = dest { + asm.store(*dest, *src); + } else { + asm.mov(*dest, *src); } }, Insn::Not { opnd, .. } => { @@ -440,6 +415,14 @@ impl Assembler } } + fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec, dest_reg: X86Opnd, value: VALUE) { + // Using movabs because mov might write value in 32 bits + movabs(cb, dest_reg, value.0 as _); + // The pointer immediate is encoded as the last part of the mov written out + let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); + gc_offsets.push(ptr_offset); + } + // List of GC offsets let mut gc_offsets: Vec = Vec::new(); @@ -552,20 +535,71 @@ impl Assembler shr(cb, opnd.into(), shift.into()) }, - Insn::Store { dest, src } => { - mov(cb, dest.into(), src.into()); - }, + store_insn @ Insn::Store { dest, src } => { + let &Opnd::Mem(Mem { num_bits, base: MemBase::Reg(base_reg_no), disp: _ }) = dest else { + panic!("Unexpected Insn::Store destination in x64_emit: {dest:?}"); + }; + + // This kind of tricky clobber can only happen for explicit use of SCRATCH_REG, + // so we panic to get the author to change their code. + #[track_caller] + fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) { + assert_ne!( + backend_use.reg_no, + user_use, + "Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics" + ); + } + + let scratch = X86Opnd::Reg(Self::SCRATCH_REG); + let src = match src { + Opnd::Reg(_) => src.into(), + &Opnd::Mem(_) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + mov(cb, scratch, src.into()); + scratch + } + &Opnd::Imm(imm) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if num_bits == 64 && imm_num_bits(imm) > 32 { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + mov(cb, scratch, src.into()); + scratch + } else if uimm_num_bits(imm as u64) <= num_bits { + // If the bit string is short enough for the destination, use the unsigned representation. + // Note that 64-bit and negative values are ruled out. + uimm_opnd(imm as u64) + } else { + src.into() + } + } + &Opnd::UImm(imm) => { + // For 64 bit destinations, 32-bit values will be sign-extended + if num_bits == 64 && imm_num_bits(imm as i64) > 32 { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + mov(cb, scratch, src.into()); + scratch + } else { + src.into() + } + } + &Opnd::Value(value) => { + assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG); + emit_load_gc_value(cb, &mut gc_offsets, scratch, value); + scratch + } + src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_emit: {src:?}") + + }; + mov(cb, dest.into(), src); + } // This assumes only load instructions can contain references to GC'd Value operands Insn::Load { opnd, out } | Insn::LoadInto { dest: out, opnd } => { match opnd { Opnd::Value(val) if val.heap_object_p() => { - // Using movabs because mov might write value in 32 bits - movabs(cb, out.into(), val.0 as _); - // The pointer immediate is encoded as the last part of the mov written out - let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE); - gc_offsets.push(ptr_offset); + emit_load_gc_value(cb, &mut gc_offsets, out.into(), *val); } _ => mov(cb, out.into(), opnd.into()) } @@ -1352,4 +1386,21 @@ mod tests { 0x29: pop rbp "}); } + + #[test] + fn test_store_value_without_split() { + let (mut asm, mut cb) = setup_asm(); + + let imitation_heap_value = VALUE(0x1000); + assert!(imitation_heap_value.heap_object_p()); + asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into()); + + let gc_offsets = asm.x86_emit(&mut cb).unwrap(); + assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset"); + + assert_disasm!(cb, "49bb00100000000000004c891b", " + 0x0: movabs r11, 0x1000 + 0xa: mov qword ptr [rbx], r11 + "); + } }