ZJIT: Fix side-exit panicking when there's too many locals

Previously, ARM64 panicked due to compiled_side_exits() when the memory
displacement got large enough to exceed the 9 bits limit. Usually, we split
these kind of memory operands, but compiled_side_exits() runs after
split.

Using scratch registers, implement `Insn::Store` on ARM such that it can
handle large displacements without split(). Do this for x86 as well, and
remove arch specific code from compiled_side_exits(). We can now run
`TestKeywordArguments`.

Since `Insn::Store` doesn't need splitting now, users enjoy lower
register pressure.

Downside is, using `Assembler::SCRATCH_REG` as a base register is now
sometimes an error, depending on whether `Insn::Store` also needs to
use the register. It seems a fair trade off since `SCRATCH_REG` is
not often used, and we don't put it as a base register anywhere at the
moment.
This commit is contained in:
Alan Wu 2025-08-01 14:54:52 -04:00
parent f58fca7de0
commit afac226478
4 changed files with 269 additions and 114 deletions

View file

@ -1 +0,0 @@
exclude(/test_/, 'Multiple tests make ZJIT panic')

View file

@ -195,11 +195,15 @@ pub const ALLOC_REGS: &'static [Reg] = &[
impl Assembler impl Assembler
{ {
// Special scratch registers for intermediate processing. /// Special scratch registers for intermediate processing.
// This register is caller-saved (so we don't have to save it before using it) /// This register is call-clobbered (so we don't have to save it before using it).
/// Avoid using if you can since this is used to lower [Insn] internally and
/// so conflicts are possible.
pub const SCRATCH_REG: Reg = X16_REG; pub const SCRATCH_REG: Reg = X16_REG;
const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG); const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG);
const SCRATCH0_REG: Reg = Assembler::SCRATCH_REG;
const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG); const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG);
const SCRATCH1_REG: Reg = X17_REG;
/// Get the list of registers from which we will allocate on this platform /// Get the list of registers from which we will allocate on this platform
pub fn get_alloc_regs() -> Vec<Reg> { pub fn get_alloc_regs() -> Vec<Reg> {
@ -652,31 +656,6 @@ impl Assembler
*opnd = split_load_operand(asm, *opnd); *opnd = split_load_operand(asm, *opnd);
asm.push_insn(insn); asm.push_insn(insn);
}, },
Insn::Store { dest, src } => {
// The value being stored must be in a register, so if it's
// not already one we'll load it first.
let opnd1 = match src {
// If the first operand is zero, then we can just use
// the zero register.
Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG),
// Otherwise we'll check if we need to load it first.
_ => split_load_operand(asm, *src)
};
match dest {
Opnd::Reg(_) => {
// Store does not support a register as a dest operand.
asm.mov(*dest, opnd1);
}
_ => {
// The displacement for the STUR instruction can't be more
// than 9 bits long. If it's longer, we need to load the
// memory address into a register first.
let opnd0 = split_memory_address(asm, *dest);
asm.store(opnd0, opnd1);
}
}
},
Insn::Mul { left, right, .. } => { Insn::Mul { left, right, .. } => {
*left = split_load_operand(asm, *left); *left = split_load_operand(asm, *left);
*right = split_load_operand(asm, *right); *right = split_load_operand(asm, *right);
@ -843,6 +822,42 @@ impl Assembler
} }
} }
/// Do the address calculation of `out_reg = base_reg + disp`
fn load_effective_address(cb: &mut CodeBlock, out: A64Opnd, base_reg_no: u8, disp: i32) {
let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no });
assert_ne!(31, out.unwrap_reg().reg_no, "Lea sp, [sp, #imm] not always encodable. Use add/sub instead.");
if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() {
// Use ADD/SUB if the displacement fits
add(cb, out, base_reg, A64Opnd::new_imm(disp.into()));
} else {
// Use add_extended() to interpret reg_no=31 as sp
// since the base register is never the zero register.
// Careful! Only the first two operands can refer to sp.
emit_load_value(cb, out, disp as u64);
add_extended(cb, out, base_reg, out);
};
}
/// Load a VALUE to a register and remember it for GC marking and reference updating
fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec<CodePtr>, dest: A64Opnd, value: VALUE) {
// We dont need to check if it's a special const
// here because we only allow these operands to hit
// this point if they're not a special const.
assert!(!value.special_const_p());
// This assumes only load instructions can contain
// references to GC'd Value operands. If the value
// being loaded is a heap object, we'll report that
// back out to the gc_offsets list.
ldr_literal(cb, dest, 2.into());
b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
cb.write_bytes(&value.as_u64().to_le_bytes());
let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
gc_offsets.push(ptr_offset);
}
/// Emit a push instruction for the given operand by adding to the stack /// Emit a push instruction for the given operand by adding to the stack
/// pointer and then storing the given value. /// pointer and then storing the given value.
fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) {
@ -1013,12 +1028,84 @@ impl Assembler
Insn::LShift { opnd, shift, out } => { Insn::LShift { opnd, shift, out } => {
lsl(cb, out.into(), opnd.into(), shift.into()); lsl(cb, out.into(), opnd.into(), shift.into());
}, },
Insn::Store { dest, src } => { store_insn @ Insn::Store { dest, src } => {
// With minor exceptions, as long as `dest` is a Mem, all forms of `src` are
// accepted. As a rule of thumb, avoid using Assembler::SCRATCH as a memory
// base register to gurantee things will work.
let &Opnd::Mem(Mem { num_bits: dest_num_bits, base: MemBase::Reg(base_reg_no), disp }) = dest else {
panic!("Unexpected Insn::Store destination in arm64_emit: {dest:?}");
};
// This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
// so we panic to get the author to change their code.
#[track_caller]
fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
assert_ne!(
backend_use.reg_no,
user_use,
"Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
);
}
// Split src into SCRATCH0 if necessary
let src_reg: A64Reg = match src {
Opnd::Reg(reg) => *reg,
// Use zero register when possible
Opnd::UImm(0) | Opnd::Imm(0) => XZR_REG,
// Immediates
&Opnd::Imm(imm) => {
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
emit_load_value(cb, Self::SCRATCH0, imm as u64);
Self::SCRATCH0_REG
}
&Opnd::UImm(imm) => {
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
emit_load_value(cb, Self::SCRATCH0, imm);
Self::SCRATCH0_REG
}
&Opnd::Value(value) => {
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
emit_load_gc_value(cb, &mut gc_offsets, Self::SCRATCH0, value);
Self::SCRATCH0_REG
}
src_mem @ &Opnd::Mem(Mem { num_bits: src_num_bits, base: MemBase::Reg(src_base_reg_no), disp: src_disp }) => {
// For mem-to-mem store, load the source into SCRATCH0
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH0_REG);
let src_mem = if mem_disp_fits_bits(src_disp) {
src_mem.into()
} else {
// Split the load address into SCRATCH0 first if necessary
assert_no_clobber(store_insn, src_base_reg_no, Self::SCRATCH0_REG);
load_effective_address(cb, Self::SCRATCH0, src_base_reg_no, src_disp);
A64Opnd::new_mem(dest_num_bits, Self::SCRATCH0, 0)
};
match src_num_bits {
64 | 32 => ldur(cb, Self::SCRATCH0, src_mem),
16 => ldurh(cb, Self::SCRATCH0, src_mem),
8 => ldurb(cb, Self::SCRATCH0, src_mem),
num_bits => panic!("unexpected num_bits: {num_bits}")
};
Self::SCRATCH0_REG
}
src @ (Opnd::Mem(_) | Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during arm64_emit: {src:?}")
};
let src = A64Opnd::Reg(src_reg);
// Split dest into SCRATCH1 if necessary.
let dest = if mem_disp_fits_bits(disp) {
dest.into()
} else {
assert_no_clobber(store_insn, src_reg.reg_no, Self::SCRATCH1_REG);
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH1_REG);
load_effective_address(cb, Self::SCRATCH1, base_reg_no, disp);
A64Opnd::new_mem(dest_num_bits, Self::SCRATCH1, 0)
};
// This order may be surprising but it is correct. The way // This order may be surprising but it is correct. The way
// the Arm64 assembler works, the register that is going to // the Arm64 assembler works, the register that is going to
// be stored is first and the address is second. However in // be stored is first and the address is second. However in
// our IR we have the address first and the register second. // our IR we have the address first and the register second.
match dest.rm_num_bits() { match dest_num_bits {
64 | 32 => stur(cb, src.into(), dest.into()), 64 | 32 => stur(cb, src.into(), dest.into()),
16 => sturh(cb, src.into(), dest.into()), 16 => sturh(cb, src.into(), dest.into()),
num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest), num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest),
@ -1045,21 +1132,7 @@ impl Assembler
}; };
}, },
Opnd::Value(value) => { Opnd::Value(value) => {
// We dont need to check if it's a special const emit_load_gc_value(cb, &mut gc_offsets, out.into(), value);
// here because we only allow these operands to hit
// this point if they're not a special const.
assert!(!value.special_const_p());
// This assumes only load instructions can contain
// references to GC'd Value operands. If the value
// being loaded is a heap object, we'll report that
// back out to the gc_offsets list.
ldr_literal(cb, out.into(), 2.into());
b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32)));
cb.write_bytes(&value.as_u64().to_le_bytes());
let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
gc_offsets.push(ptr_offset);
}, },
Opnd::None => { Opnd::None => {
unreachable!("Attempted to load from None operand"); unreachable!("Attempted to load from None operand");
@ -1097,20 +1170,7 @@ impl Assembler
let &Opnd::Mem(Mem { num_bits: _, base: MemBase::Reg(base_reg_no), disp }) = opnd else { let &Opnd::Mem(Mem { num_bits: _, base: MemBase::Reg(base_reg_no), disp }) = opnd else {
panic!("Unexpected Insn::Lea operand in arm64_emit: {opnd:?}"); panic!("Unexpected Insn::Lea operand in arm64_emit: {opnd:?}");
}; };
let out: A64Opnd = out.into(); load_effective_address(cb, out.into(), base_reg_no, disp);
let base_reg = A64Opnd::Reg(A64Reg { num_bits: 64, reg_no: base_reg_no });
assert_ne!(31, out.unwrap_reg().reg_no, "Insn::Lea sp, [sp, #imm] not always encodable. Use add/sub instead.");
if ShiftedImmediate::try_from(disp.unsigned_abs() as u64).is_ok() {
// Use ADD/SUB if the displacement fits
add(cb, out, base_reg, A64Opnd::new_imm(disp.into()));
} else {
// Use add_extended() to interpret reg_no=31 as sp
// since the base register is never the zero register.
// Careful! Only the first two operands can refer to sp.
emit_load_value(cb, out, disp as u64);
add_extended(cb, out, base_reg, out);
};
} }
Insn::LeaJumpTarget { out, target, .. } => { Insn::LeaJumpTarget { out, target, .. } => {
if let Target::Label(label_idx) = target { if let Target::Label(label_idx) = target {
@ -1627,6 +1687,64 @@ mod tests {
"); ");
} }
#[test]
fn test_store() {
let (mut asm, mut cb) = setup_asm();
// Large memory offsets in combinations of destination and source
let large_mem = Opnd::mem(64, NATIVE_STACK_PTR, -0x305);
let small_mem = Opnd::mem(64, C_RET_OPND, 0);
asm.store(small_mem, large_mem);
asm.store(large_mem, small_mem);
asm.store(large_mem, large_mem);
asm.compile_with_num_regs(&mut cb, 0);
assert_disasm!(cb, "f0170cd1100240f8100000f8100040f8f1170cd1300200f8f0170cd1100240f8f1170cd1300200f8", "
0x0: sub x16, sp, #0x305
0x4: ldur x16, [x16]
0x8: stur x16, [x0]
0xc: ldur x16, [x0]
0x10: sub x17, sp, #0x305
0x14: stur x16, [x17]
0x18: sub x16, sp, #0x305
0x1c: ldur x16, [x16]
0x20: sub x17, sp, #0x305
0x24: stur x16, [x17]
");
}
#[test]
fn test_store_value_without_split() {
let (mut asm, mut cb) = setup_asm();
let imitation_heap_value = VALUE(0x1000);
assert!(imitation_heap_value.heap_object_p());
asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into());
// Side exit code are compiled without the split pass, so we directly call emit here to
// emulate that scenario.
let gc_offsets = asm.arm64_emit(&mut cb).unwrap();
assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset");
assert_disasm!(cb, "50000058030000140010000000000000b00200f8", "
0x0: ldr x16, #8
0x4: b #0x10
0x8: .byte 0x00, 0x10, 0x00, 0x00
0xc: .byte 0x00, 0x00, 0x00, 0x00
0x10: stur x16, [x21]
");
}
#[test]
#[should_panic]
fn test_store_unserviceable() {
let (mut asm, mut cb) = setup_asm();
// This would put the source into SCRATCH_REG, messing up the destination
asm.store(Opnd::mem(64, Opnd::Reg(Assembler::SCRATCH_REG), 0), 0x83902.into());
asm.compile_with_num_regs(&mut cb, 0);
}
/* /*
#[test] #[test]
fn test_emit_lea_label() { fn test_emit_lea_label() {

View file

@ -1822,29 +1822,16 @@ impl Assembler
}; };
self.write_label(side_exit_label.clone()); self.write_label(side_exit_label.clone());
// Load an operand that cannot be used as a source of Insn::Store
fn split_store_source(asm: &mut Assembler, opnd: Opnd) -> Opnd {
if matches!(opnd, Opnd::Mem(_) | Opnd::Value(_)) ||
(cfg!(target_arch = "aarch64") && matches!(opnd, Opnd::UImm(_))) {
asm.load_into(Opnd::Reg(Assembler::SCRATCH_REG), opnd);
Opnd::Reg(Assembler::SCRATCH_REG)
} else {
opnd
}
}
// Restore the PC and the stack for regular side exits. We don't do this for // Restore the PC and the stack for regular side exits. We don't do this for
// side exits right after JIT-to-JIT calls, which restore them before the call. // side exits right after JIT-to-JIT calls, which restore them before the call.
if let Some(SideExitContext { pc, stack, locals }) = context { if let Some(SideExitContext { pc, stack, locals }) = context {
asm_comment!(self, "write stack slots: {stack:?}"); asm_comment!(self, "write stack slots: {stack:?}");
for (idx, &opnd) in stack.iter().enumerate() { for (idx, &opnd) in stack.iter().enumerate() {
let opnd = split_store_source(self, opnd);
self.store(Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32), opnd); self.store(Opnd::mem(64, SP, idx as i32 * SIZEOF_VALUE_I32), opnd);
} }
asm_comment!(self, "write locals: {locals:?}"); asm_comment!(self, "write locals: {locals:?}");
for (idx, &opnd) in locals.iter().enumerate() { for (idx, &opnd) in locals.iter().enumerate() {
let opnd = split_store_source(self, opnd);
self.store(Opnd::mem(64, SP, (-local_size_and_idx_to_ep_offset(locals.len(), idx) - 1) * SIZEOF_VALUE_I32), opnd); self.store(Opnd::mem(64, SP, (-local_size_and_idx_to_ep_offset(locals.len(), idx) - 1) * SIZEOF_VALUE_I32), opnd);
} }

View file

@ -96,8 +96,10 @@ pub const ALLOC_REGS: &'static [Reg] = &[
impl Assembler impl Assembler
{ {
// A special scratch register for intermediate processing. /// Special scratch registers for intermediate processing.
// This register is caller-saved (so we don't have to save it before using it) /// This register is call-clobbered (so we don't have to save it before using it).
/// Avoid using if you can since this is used to lower [Insn] internally and
/// so conflicts are possible.
pub const SCRATCH_REG: Reg = R11_REG; pub const SCRATCH_REG: Reg = R11_REG;
const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG); const SCRATCH0: X86Opnd = X86Opnd::Reg(Assembler::SCRATCH_REG);
@ -293,38 +295,11 @@ impl Assembler
asm.push_insn(insn); asm.push_insn(insn);
}, },
Insn::Mov { dest, src } | Insn::Store { dest, src } => { Insn::Mov { dest, src } => {
match (&dest, &src) { if let Opnd::Mem(_) = dest {
(Opnd::Mem(_), Opnd::Mem(_)) => { asm.store(*dest, *src);
// We load opnd1 because for mov, opnd0 is the output } else {
let opnd1 = asm.load(*src); asm.mov(*dest, *src);
asm.mov(*dest, opnd1);
},
(Opnd::Mem(Mem { num_bits, .. }), Opnd::UImm(value)) => {
// For 64 bit destinations, 32-bit values will be sign-extended
if *num_bits == 64 && imm_num_bits(*value as i64) > 32 {
let opnd1 = asm.load(*src);
asm.mov(*dest, opnd1);
} else {
asm.mov(*dest, *src);
}
},
(Opnd::Mem(Mem { num_bits, .. }), Opnd::Imm(value)) => {
// For 64 bit destinations, 32-bit values will be sign-extended
if *num_bits == 64 && imm_num_bits(*value) > 32 {
let opnd1 = asm.load(*src);
asm.mov(*dest, opnd1);
} else if uimm_num_bits(*value as u64) <= *num_bits {
// If the bit string is short enough for the destination, use the unsigned representation.
// Note that 64-bit and negative values are ruled out.
asm.mov(*dest, Opnd::UImm(*value as u64));
} else {
asm.mov(*dest, *src);
}
},
_ => {
asm.mov(*dest, *src);
}
} }
}, },
Insn::Not { opnd, .. } => { Insn::Not { opnd, .. } => {
@ -440,6 +415,14 @@ impl Assembler
} }
} }
fn emit_load_gc_value(cb: &mut CodeBlock, gc_offsets: &mut Vec<CodePtr>, dest_reg: X86Opnd, value: VALUE) {
// Using movabs because mov might write value in 32 bits
movabs(cb, dest_reg, value.0 as _);
// The pointer immediate is encoded as the last part of the mov written out
let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
gc_offsets.push(ptr_offset);
}
// List of GC offsets // List of GC offsets
let mut gc_offsets: Vec<CodePtr> = Vec::new(); let mut gc_offsets: Vec<CodePtr> = Vec::new();
@ -552,20 +535,71 @@ impl Assembler
shr(cb, opnd.into(), shift.into()) shr(cb, opnd.into(), shift.into())
}, },
Insn::Store { dest, src } => { store_insn @ Insn::Store { dest, src } => {
mov(cb, dest.into(), src.into()); let &Opnd::Mem(Mem { num_bits, base: MemBase::Reg(base_reg_no), disp: _ }) = dest else {
}, panic!("Unexpected Insn::Store destination in x64_emit: {dest:?}");
};
// This kind of tricky clobber can only happen for explicit use of SCRATCH_REG,
// so we panic to get the author to change their code.
#[track_caller]
fn assert_no_clobber(store_insn: &Insn, user_use: u8, backend_use: Reg) {
assert_ne!(
backend_use.reg_no,
user_use,
"Emitting {store_insn:?} would clobber {user_use:?}, in conflict with its semantics"
);
}
let scratch = X86Opnd::Reg(Self::SCRATCH_REG);
let src = match src {
Opnd::Reg(_) => src.into(),
&Opnd::Mem(_) => {
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
mov(cb, scratch, src.into());
scratch
}
&Opnd::Imm(imm) => {
// For 64 bit destinations, 32-bit values will be sign-extended
if num_bits == 64 && imm_num_bits(imm) > 32 {
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
mov(cb, scratch, src.into());
scratch
} else if uimm_num_bits(imm as u64) <= num_bits {
// If the bit string is short enough for the destination, use the unsigned representation.
// Note that 64-bit and negative values are ruled out.
uimm_opnd(imm as u64)
} else {
src.into()
}
}
&Opnd::UImm(imm) => {
// For 64 bit destinations, 32-bit values will be sign-extended
if num_bits == 64 && imm_num_bits(imm as i64) > 32 {
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
mov(cb, scratch, src.into());
scratch
} else {
src.into()
}
}
&Opnd::Value(value) => {
assert_no_clobber(store_insn, base_reg_no, Self::SCRATCH_REG);
emit_load_gc_value(cb, &mut gc_offsets, scratch, value);
scratch
}
src @ (Opnd::None | Opnd::VReg { .. }) => panic!("Unexpected source operand during x86_emit: {src:?}")
};
mov(cb, dest.into(), src);
}
// This assumes only load instructions can contain references to GC'd Value operands // This assumes only load instructions can contain references to GC'd Value operands
Insn::Load { opnd, out } | Insn::Load { opnd, out } |
Insn::LoadInto { dest: out, opnd } => { Insn::LoadInto { dest: out, opnd } => {
match opnd { match opnd {
Opnd::Value(val) if val.heap_object_p() => { Opnd::Value(val) if val.heap_object_p() => {
// Using movabs because mov might write value in 32 bits emit_load_gc_value(cb, &mut gc_offsets, out.into(), *val);
movabs(cb, out.into(), val.0 as _);
// The pointer immediate is encoded as the last part of the mov written out
let ptr_offset = cb.get_write_ptr().sub_bytes(SIZEOF_VALUE);
gc_offsets.push(ptr_offset);
} }
_ => mov(cb, out.into(), opnd.into()) _ => mov(cb, out.into(), opnd.into())
} }
@ -1352,4 +1386,21 @@ mod tests {
0x29: pop rbp 0x29: pop rbp
"}); "});
} }
#[test]
fn test_store_value_without_split() {
let (mut asm, mut cb) = setup_asm();
let imitation_heap_value = VALUE(0x1000);
assert!(imitation_heap_value.heap_object_p());
asm.store(Opnd::mem(VALUE_BITS, SP, 0), imitation_heap_value.into());
let gc_offsets = asm.x86_emit(&mut cb).unwrap();
assert_eq!(1, gc_offsets.len(), "VALUE source operand should be reported as gc offset");
assert_disasm!(cb, "49bb00100000000000004c891b", "
0x0: movabs r11, 0x1000
0xa: mov qword ptr [rbx], r11
");
}
} }