ZJIT: Keep a frame pointer and use it for memory params

Previously, ZJIT miscompiled the following because of native SP
interference.

    def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8]
    a(0,0,0,0,0,0,0, :ok)

Commented problematic disassembly:

    ; call rb_ary_new_capa
    mov x0, #1
    mov x16, #0x1278
    movk x16, #0x4bc, lsl #16
    movk x16, #1, lsl #32
    blr x16
    ; call rb_ary_push
    mov x1, x0
    str x1, [sp, #-0x10]! ; c_push() from alloc_regs()
    mov x0, x1            ; arg0, the array
    ldur x1, [sp]         ; meant to be arg1=n8, but sp just moved!
    mov x16, #0x3968
    movk x16, #0x4bc, lsl #16
    movk x16, #1, lsl #32
    blr x16

Since the frame pointer stays constant in the body of the function,
static offsets based on it don't run the risk of being invalidated by SP
movements.

Pass the registers to preserve through Insn::FrameSetup. This allows ARM
to use STP and waste no gaps between EC, SP, and CFP.

x86 now preserves and restores RBP since we use it as the frame pointer.
Since all arches now have a frame pointer, remove offset based SP
movement in the epilogue and restore registers using the frame pointer.
This commit is contained in:
Alan Wu 2025-07-25 22:09:51 -04:00
parent 5ca71364ff
commit ff428b4dd0
6 changed files with 220 additions and 111 deletions

View file

@ -8,7 +8,7 @@ use crate::invariants::{track_bop_assumption, track_cme_assumption};
use crate::gc::{get_or_create_iseq_payload, append_gc_offsets};
use crate::state::ZJITState;
use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr};
use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, SideExitContext, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, SP};
use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, SideExitContext, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP};
use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, CallInfo, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX};
use crate::hir::{Const, FrameState, Function, Insn, InsnId};
use crate::hir_type::{types::Fixnum, Type};
@ -29,18 +29,18 @@ struct JITState {
branch_iseqs: Vec<(Rc<Branch>, IseqPtr)>,
/// The number of bytes allocated for basic block arguments spilled onto the C stack
c_stack_bytes: usize,
c_stack_slots: usize,
}
impl JITState {
/// Create a new JITState instance
fn new(iseq: IseqPtr, num_insns: usize, num_blocks: usize, c_stack_bytes: usize) -> Self {
fn new(iseq: IseqPtr, num_insns: usize, num_blocks: usize, c_stack_slots: usize) -> Self {
JITState {
iseq,
opnds: vec![None; num_insns],
labels: vec![None; num_blocks],
branch_iseqs: Vec::default(),
c_stack_bytes,
c_stack_slots,
}
}
@ -128,7 +128,7 @@ fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> *const u8 {
append_gc_offsets(iseq, &gc_offsets);
// Compile an entry point to the JIT code
(gen_entry(cb, iseq, &function, start_ptr, jit.c_stack_bytes), jit.branch_iseqs)
(gen_entry(cb, iseq, &function, start_ptr), jit.branch_iseqs)
},
None => (None, vec![]),
};
@ -170,21 +170,18 @@ fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) {
}
/// Compile a JIT entry
fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function, function_ptr: CodePtr, c_stack_bytes: usize) -> Option<CodePtr> {
fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function, function_ptr: CodePtr) -> Option<CodePtr> {
// Set up registers for CFP, EC, SP, and basic block arguments
let mut asm = Assembler::new();
gen_entry_prologue(&mut asm, iseq);
gen_entry_params(&mut asm, iseq, function.block(BlockId(0)), c_stack_bytes);
gen_entry_params(&mut asm, iseq, function.block(BlockId(0)));
// Jump to the first block using a call instruction
asm.ccall(function_ptr.raw_ptr(cb) as *const u8, vec![]);
// Restore registers for CFP, EC, and SP after use
asm_comment!(asm, "exit to the interpreter");
asm.cpop_into(SP);
asm.cpop_into(EC);
asm.cpop_into(CFP);
asm.frame_teardown();
asm_comment!(asm, "return to the interpreter");
asm.frame_teardown(lir::JIT_PRESERVED_REGS);
asm.cret(C_RET_OPND);
if get_option!(dump_lir) {
@ -231,8 +228,8 @@ fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<(CodePtr, Vec<(Rc<Branc
/// Compile a function
fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Option<(CodePtr, Vec<CodePtr>, JITState)> {
let c_stack_bytes = aligned_stack_bytes(max_num_params(function).saturating_sub(ALLOC_REGS.len()));
let mut jit = JITState::new(iseq, function.num_insns(), function.num_blocks(), c_stack_bytes);
let c_stack_slots = max_num_params(function).saturating_sub(ALLOC_REGS.len());
let mut jit = JITState::new(iseq, function.num_insns(), function.num_blocks(), c_stack_slots);
let mut asm = Assembler::new();
// Compile each basic block
@ -245,16 +242,9 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Optio
let label = jit.get_label(&mut asm, block_id);
asm.write_label(label);
// Set up the frame at the first block
// Set up the frame at the first block. :bb0-prologue:
if block_id == BlockId(0) {
asm.frame_setup();
// Bump the C stack pointer for basic block arguments
if jit.c_stack_bytes > 0 {
asm_comment!(asm, "bump C stack pointer");
let new_sp = asm.sub(NATIVE_STACK_PTR, jit.c_stack_bytes.into());
asm.mov(NATIVE_STACK_PTR, new_sp);
}
asm.frame_setup(&[], jit.c_stack_slots);
}
// Compile all parameters
@ -335,7 +325,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
gen_send_without_block(jit, asm, call_info, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?,
Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), opnds!(args), &function.frame_state(*state))?,
Insn::InvokeBuiltin { bf, args, state } => gen_invokebuiltin(asm, &function.frame_state(*state), bf, opnds!(args))?,
Insn::Return { val } => return Some(gen_return(jit, asm, opnd!(val))?),
Insn::Return { val } => return Some(gen_return(asm, opnd!(val))?),
Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
Insn::FixnumSub { left, right, state } => gen_fixnum_sub(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
Insn::FixnumMult { left, right, state } => gen_fixnum_mult(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
@ -569,12 +559,8 @@ fn gen_putspecialobject(asm: &mut Assembler, value_type: SpecialObjectType) -> O
/// Compile an interpreter entry block to be inserted into an ISEQ
fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) {
asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0));
asm.frame_setup();
// Save the registers we'll use for CFP, EP, SP
asm.cpush(CFP);
asm.cpush(EC);
asm.cpush(SP);
asm.frame_setup(lir::JIT_PRESERVED_REGS, 0);
// EC and CFP are passed as arguments
asm.mov(EC, C_ARG_OPNDS[0]);
@ -587,7 +573,7 @@ fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) {
}
/// Assign method arguments to basic block arguments at JIT entry
fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block, c_stack_bytes: usize) {
fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block) {
let num_params = entry_block.params().len() - 1; // -1 to exclude self
if num_params > 0 {
asm_comment!(asm, "set method params: {num_params}");
@ -616,8 +602,8 @@ fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block, c_s
// the HIR function ────────────► └────────────┘
// is running
match param {
Opnd::Mem(lir::Mem { base, disp, num_bits }) => {
let param_slot = Opnd::Mem(lir::Mem { num_bits, base, disp: disp - c_stack_bytes as i32 - Assembler::frame_size() });
Opnd::Mem(lir::Mem { base: _, disp, num_bits }) => {
let param_slot = Opnd::mem(num_bits, NATIVE_STACK_PTR, disp - Assembler::frame_size());
asm.mov(param_slot, local);
}
// Prepare for parallel move for locals in registers
@ -848,7 +834,7 @@ fn gen_send_without_block_direct(
asm_comment!(asm, "side-exit if callee side-exits");
asm.cmp(ret, Qundef.into());
// Restore the C stack pointer on exit
asm.je(Target::SideExit { context: None, reason: CalleeSideExit, c_stack_bytes: jit.c_stack_bytes, label: None });
asm.je(Target::SideExit { context: None, reason: CalleeSideExit, label: None });
asm_comment!(asm, "restore SP register for the caller");
let new_sp = asm.sub(SP, sp_offset.into());
@ -912,7 +898,7 @@ fn gen_new_range(
}
/// Compile code that exits from JIT code with a return value
fn gen_return(jit: &JITState, asm: &mut Assembler, val: lir::Opnd) -> Option<()> {
fn gen_return(asm: &mut Assembler, val: lir::Opnd) -> Option<()> {
// Pop the current frame (ec->cfp++)
// Note: the return PC is already in the previous CFP
asm_comment!(asm, "pop stack frame");
@ -924,16 +910,8 @@ fn gen_return(jit: &JITState, asm: &mut Assembler, val: lir::Opnd) -> Option<()>
// we need to load the return value, which might be part of the frame.
asm.load_into(C_RET_OPND, val);
// Restore the C stack pointer bumped for basic block arguments
if jit.c_stack_bytes > 0 {
asm_comment!(asm, "restore C stack pointer");
let new_sp = asm.add(NATIVE_STACK_PTR, jit.c_stack_bytes.into());
asm.mov(NATIVE_STACK_PTR, new_sp);
}
asm.frame_teardown();
// Return from the function
asm.frame_teardown(&[]); // matching the setup in :bb0-prologue:
asm.cret(C_RET_OPND);
Some(())
}
@ -1140,7 +1118,7 @@ fn param_opnd(idx: usize) -> Opnd {
if idx < ALLOC_REGS.len() {
Opnd::Reg(ALLOC_REGS[idx])
} else {
Opnd::mem(64, NATIVE_STACK_PTR, (idx - ALLOC_REGS.len()) as i32 * SIZEOF_VALUE_I32)
Opnd::mem(64, NATIVE_BASE_PTR, (idx - ALLOC_REGS.len() + 1) as i32 * -SIZEOF_VALUE_I32)
}
}
@ -1196,7 +1174,6 @@ fn build_side_exit(jit: &mut JITState, state: &FrameState, reason: SideExitReaso
locals,
}),
reason,
c_stack_bytes: jit.c_stack_bytes,
label,
};
Some(target)
@ -1225,26 +1202,6 @@ fn max_num_params(function: &Function) -> usize {
}).max().unwrap_or(0)
}
/// Given the number of spill slots needed for a function, return the number of bytes
/// the function needs to allocate on the stack for the stack frame.
fn aligned_stack_bytes(num_slots: usize) -> usize {
// Both x86_64 and arm64 require the stack to be aligned to 16 bytes.
let num_slots = if cfg!(target_arch = "x86_64") && num_slots % 2 == 0 {
// On x86_64, since the call instruction bumps the stack pointer by 8 bytes on entry,
// we need to round up `num_slots` to an odd number.
num_slots + 1
} else if cfg!(target_arch = "aarch64") && num_slots % 2 == 1 {
// On arm64, the stack pointer is always aligned to 16 bytes, so we need to round up
// `num_slots`` to an even number.
num_slots + 1
} else {
num_slots
};
const { assert!(SIZEOF_VALUE == 8, "aligned_stack_bytes() assumes SIZEOF_VALUE == 8"); }
num_slots * SIZEOF_VALUE
}
impl Assembler {
/// Make a C call while marking the start and end positions of it
fn ccall_with_branch(&mut self, fptr: *const u8, opnds: Vec<Opnd>, branch: &Rc<Branch>) -> Opnd {