mirror of
https://github.com/ruby/ruby.git
synced 2025-08-15 13:39:04 +02:00
ZJIT: Keep a frame pointer and use it for memory params
Previously, ZJIT miscompiled the following because of native SP interference. def a(n1,n2,n3,n4,n5,n6,n7,n8) = [n8] a(0,0,0,0,0,0,0, :ok) Commented problematic disassembly: ; call rb_ary_new_capa mov x0, #1 mov x16, #0x1278 movk x16, #0x4bc, lsl #16 movk x16, #1, lsl #32 blr x16 ; call rb_ary_push mov x1, x0 str x1, [sp, #-0x10]! ; c_push() from alloc_regs() mov x0, x1 ; arg0, the array ldur x1, [sp] ; meant to be arg1=n8, but sp just moved! mov x16, #0x3968 movk x16, #0x4bc, lsl #16 movk x16, #1, lsl #32 blr x16 Since the frame pointer stays constant in the body of the function, static offsets based on it don't run the risk of being invalidated by SP movements. Pass the registers to preserve through Insn::FrameSetup. This allows ARM to use STP and waste no gaps between EC, SP, and CFP. x86 now preserves and restores RBP since we use it as the frame pointer. Since all arches now have a frame pointer, remove offset based SP movement in the epilogue and restore registers using the frame pointer.
This commit is contained in:
parent
5ca71364ff
commit
ff428b4dd0
6 changed files with 220 additions and 111 deletions
|
@ -8,7 +8,7 @@ use crate::invariants::{track_bop_assumption, track_cme_assumption};
|
|||
use crate::gc::{get_or_create_iseq_payload, append_gc_offsets};
|
||||
use crate::state::ZJITState;
|
||||
use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr};
|
||||
use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, SideExitContext, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, SP};
|
||||
use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, SideExitContext, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP};
|
||||
use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, CallInfo, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX};
|
||||
use crate::hir::{Const, FrameState, Function, Insn, InsnId};
|
||||
use crate::hir_type::{types::Fixnum, Type};
|
||||
|
@ -29,18 +29,18 @@ struct JITState {
|
|||
branch_iseqs: Vec<(Rc<Branch>, IseqPtr)>,
|
||||
|
||||
/// The number of bytes allocated for basic block arguments spilled onto the C stack
|
||||
c_stack_bytes: usize,
|
||||
c_stack_slots: usize,
|
||||
}
|
||||
|
||||
impl JITState {
|
||||
/// Create a new JITState instance
|
||||
fn new(iseq: IseqPtr, num_insns: usize, num_blocks: usize, c_stack_bytes: usize) -> Self {
|
||||
fn new(iseq: IseqPtr, num_insns: usize, num_blocks: usize, c_stack_slots: usize) -> Self {
|
||||
JITState {
|
||||
iseq,
|
||||
opnds: vec![None; num_insns],
|
||||
labels: vec![None; num_blocks],
|
||||
branch_iseqs: Vec::default(),
|
||||
c_stack_bytes,
|
||||
c_stack_slots,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -128,7 +128,7 @@ fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> *const u8 {
|
|||
append_gc_offsets(iseq, &gc_offsets);
|
||||
|
||||
// Compile an entry point to the JIT code
|
||||
(gen_entry(cb, iseq, &function, start_ptr, jit.c_stack_bytes), jit.branch_iseqs)
|
||||
(gen_entry(cb, iseq, &function, start_ptr), jit.branch_iseqs)
|
||||
},
|
||||
None => (None, vec![]),
|
||||
};
|
||||
|
@ -170,21 +170,18 @@ fn register_with_perf(iseq_name: String, start_ptr: usize, code_size: usize) {
|
|||
}
|
||||
|
||||
/// Compile a JIT entry
|
||||
fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function, function_ptr: CodePtr, c_stack_bytes: usize) -> Option<CodePtr> {
|
||||
fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function, function_ptr: CodePtr) -> Option<CodePtr> {
|
||||
// Set up registers for CFP, EC, SP, and basic block arguments
|
||||
let mut asm = Assembler::new();
|
||||
gen_entry_prologue(&mut asm, iseq);
|
||||
gen_entry_params(&mut asm, iseq, function.block(BlockId(0)), c_stack_bytes);
|
||||
gen_entry_params(&mut asm, iseq, function.block(BlockId(0)));
|
||||
|
||||
// Jump to the first block using a call instruction
|
||||
asm.ccall(function_ptr.raw_ptr(cb) as *const u8, vec![]);
|
||||
|
||||
// Restore registers for CFP, EC, and SP after use
|
||||
asm_comment!(asm, "exit to the interpreter");
|
||||
asm.cpop_into(SP);
|
||||
asm.cpop_into(EC);
|
||||
asm.cpop_into(CFP);
|
||||
asm.frame_teardown();
|
||||
asm_comment!(asm, "return to the interpreter");
|
||||
asm.frame_teardown(lir::JIT_PRESERVED_REGS);
|
||||
asm.cret(C_RET_OPND);
|
||||
|
||||
if get_option!(dump_lir) {
|
||||
|
@ -231,8 +228,8 @@ fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<(CodePtr, Vec<(Rc<Branc
|
|||
|
||||
/// Compile a function
|
||||
fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Option<(CodePtr, Vec<CodePtr>, JITState)> {
|
||||
let c_stack_bytes = aligned_stack_bytes(max_num_params(function).saturating_sub(ALLOC_REGS.len()));
|
||||
let mut jit = JITState::new(iseq, function.num_insns(), function.num_blocks(), c_stack_bytes);
|
||||
let c_stack_slots = max_num_params(function).saturating_sub(ALLOC_REGS.len());
|
||||
let mut jit = JITState::new(iseq, function.num_insns(), function.num_blocks(), c_stack_slots);
|
||||
let mut asm = Assembler::new();
|
||||
|
||||
// Compile each basic block
|
||||
|
@ -245,16 +242,9 @@ fn gen_function(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function) -> Optio
|
|||
let label = jit.get_label(&mut asm, block_id);
|
||||
asm.write_label(label);
|
||||
|
||||
// Set up the frame at the first block
|
||||
// Set up the frame at the first block. :bb0-prologue:
|
||||
if block_id == BlockId(0) {
|
||||
asm.frame_setup();
|
||||
|
||||
// Bump the C stack pointer for basic block arguments
|
||||
if jit.c_stack_bytes > 0 {
|
||||
asm_comment!(asm, "bump C stack pointer");
|
||||
let new_sp = asm.sub(NATIVE_STACK_PTR, jit.c_stack_bytes.into());
|
||||
asm.mov(NATIVE_STACK_PTR, new_sp);
|
||||
}
|
||||
asm.frame_setup(&[], jit.c_stack_slots);
|
||||
}
|
||||
|
||||
// Compile all parameters
|
||||
|
@ -335,7 +325,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
|
|||
gen_send_without_block(jit, asm, call_info, *cd, &function.frame_state(*state), opnd!(self_val), opnds!(args))?,
|
||||
Insn::SendWithoutBlockDirect { cme, iseq, self_val, args, state, .. } => gen_send_without_block_direct(cb, jit, asm, *cme, *iseq, opnd!(self_val), opnds!(args), &function.frame_state(*state))?,
|
||||
Insn::InvokeBuiltin { bf, args, state } => gen_invokebuiltin(asm, &function.frame_state(*state), bf, opnds!(args))?,
|
||||
Insn::Return { val } => return Some(gen_return(jit, asm, opnd!(val))?),
|
||||
Insn::Return { val } => return Some(gen_return(asm, opnd!(val))?),
|
||||
Insn::FixnumAdd { left, right, state } => gen_fixnum_add(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
|
||||
Insn::FixnumSub { left, right, state } => gen_fixnum_sub(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
|
||||
Insn::FixnumMult { left, right, state } => gen_fixnum_mult(jit, asm, opnd!(left), opnd!(right), &function.frame_state(*state))?,
|
||||
|
@ -569,12 +559,8 @@ fn gen_putspecialobject(asm: &mut Assembler, value_type: SpecialObjectType) -> O
|
|||
/// Compile an interpreter entry block to be inserted into an ISEQ
|
||||
fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) {
|
||||
asm_comment!(asm, "ZJIT entry point: {}", iseq_get_location(iseq, 0));
|
||||
asm.frame_setup();
|
||||
|
||||
// Save the registers we'll use for CFP, EP, SP
|
||||
asm.cpush(CFP);
|
||||
asm.cpush(EC);
|
||||
asm.cpush(SP);
|
||||
asm.frame_setup(lir::JIT_PRESERVED_REGS, 0);
|
||||
|
||||
// EC and CFP are passed as arguments
|
||||
asm.mov(EC, C_ARG_OPNDS[0]);
|
||||
|
@ -587,7 +573,7 @@ fn gen_entry_prologue(asm: &mut Assembler, iseq: IseqPtr) {
|
|||
}
|
||||
|
||||
/// Assign method arguments to basic block arguments at JIT entry
|
||||
fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block, c_stack_bytes: usize) {
|
||||
fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block) {
|
||||
let num_params = entry_block.params().len() - 1; // -1 to exclude self
|
||||
if num_params > 0 {
|
||||
asm_comment!(asm, "set method params: {num_params}");
|
||||
|
@ -616,8 +602,8 @@ fn gen_entry_params(asm: &mut Assembler, iseq: IseqPtr, entry_block: &Block, c_s
|
|||
// the HIR function ────────────► └────────────┘
|
||||
// is running
|
||||
match param {
|
||||
Opnd::Mem(lir::Mem { base, disp, num_bits }) => {
|
||||
let param_slot = Opnd::Mem(lir::Mem { num_bits, base, disp: disp - c_stack_bytes as i32 - Assembler::frame_size() });
|
||||
Opnd::Mem(lir::Mem { base: _, disp, num_bits }) => {
|
||||
let param_slot = Opnd::mem(num_bits, NATIVE_STACK_PTR, disp - Assembler::frame_size());
|
||||
asm.mov(param_slot, local);
|
||||
}
|
||||
// Prepare for parallel move for locals in registers
|
||||
|
@ -848,7 +834,7 @@ fn gen_send_without_block_direct(
|
|||
asm_comment!(asm, "side-exit if callee side-exits");
|
||||
asm.cmp(ret, Qundef.into());
|
||||
// Restore the C stack pointer on exit
|
||||
asm.je(Target::SideExit { context: None, reason: CalleeSideExit, c_stack_bytes: jit.c_stack_bytes, label: None });
|
||||
asm.je(Target::SideExit { context: None, reason: CalleeSideExit, label: None });
|
||||
|
||||
asm_comment!(asm, "restore SP register for the caller");
|
||||
let new_sp = asm.sub(SP, sp_offset.into());
|
||||
|
@ -912,7 +898,7 @@ fn gen_new_range(
|
|||
}
|
||||
|
||||
/// Compile code that exits from JIT code with a return value
|
||||
fn gen_return(jit: &JITState, asm: &mut Assembler, val: lir::Opnd) -> Option<()> {
|
||||
fn gen_return(asm: &mut Assembler, val: lir::Opnd) -> Option<()> {
|
||||
// Pop the current frame (ec->cfp++)
|
||||
// Note: the return PC is already in the previous CFP
|
||||
asm_comment!(asm, "pop stack frame");
|
||||
|
@ -924,16 +910,8 @@ fn gen_return(jit: &JITState, asm: &mut Assembler, val: lir::Opnd) -> Option<()>
|
|||
// we need to load the return value, which might be part of the frame.
|
||||
asm.load_into(C_RET_OPND, val);
|
||||
|
||||
// Restore the C stack pointer bumped for basic block arguments
|
||||
if jit.c_stack_bytes > 0 {
|
||||
asm_comment!(asm, "restore C stack pointer");
|
||||
let new_sp = asm.add(NATIVE_STACK_PTR, jit.c_stack_bytes.into());
|
||||
asm.mov(NATIVE_STACK_PTR, new_sp);
|
||||
}
|
||||
|
||||
asm.frame_teardown();
|
||||
|
||||
// Return from the function
|
||||
asm.frame_teardown(&[]); // matching the setup in :bb0-prologue:
|
||||
asm.cret(C_RET_OPND);
|
||||
Some(())
|
||||
}
|
||||
|
@ -1140,7 +1118,7 @@ fn param_opnd(idx: usize) -> Opnd {
|
|||
if idx < ALLOC_REGS.len() {
|
||||
Opnd::Reg(ALLOC_REGS[idx])
|
||||
} else {
|
||||
Opnd::mem(64, NATIVE_STACK_PTR, (idx - ALLOC_REGS.len()) as i32 * SIZEOF_VALUE_I32)
|
||||
Opnd::mem(64, NATIVE_BASE_PTR, (idx - ALLOC_REGS.len() + 1) as i32 * -SIZEOF_VALUE_I32)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1196,7 +1174,6 @@ fn build_side_exit(jit: &mut JITState, state: &FrameState, reason: SideExitReaso
|
|||
locals,
|
||||
}),
|
||||
reason,
|
||||
c_stack_bytes: jit.c_stack_bytes,
|
||||
label,
|
||||
};
|
||||
Some(target)
|
||||
|
@ -1225,26 +1202,6 @@ fn max_num_params(function: &Function) -> usize {
|
|||
}).max().unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Given the number of spill slots needed for a function, return the number of bytes
|
||||
/// the function needs to allocate on the stack for the stack frame.
|
||||
fn aligned_stack_bytes(num_slots: usize) -> usize {
|
||||
// Both x86_64 and arm64 require the stack to be aligned to 16 bytes.
|
||||
let num_slots = if cfg!(target_arch = "x86_64") && num_slots % 2 == 0 {
|
||||
// On x86_64, since the call instruction bumps the stack pointer by 8 bytes on entry,
|
||||
// we need to round up `num_slots` to an odd number.
|
||||
num_slots + 1
|
||||
} else if cfg!(target_arch = "aarch64") && num_slots % 2 == 1 {
|
||||
// On arm64, the stack pointer is always aligned to 16 bytes, so we need to round up
|
||||
// `num_slots`` to an even number.
|
||||
num_slots + 1
|
||||
} else {
|
||||
num_slots
|
||||
};
|
||||
|
||||
const { assert!(SIZEOF_VALUE == 8, "aligned_stack_bytes() assumes SIZEOF_VALUE == 8"); }
|
||||
num_slots * SIZEOF_VALUE
|
||||
}
|
||||
|
||||
impl Assembler {
|
||||
/// Make a C call while marking the start and end positions of it
|
||||
fn ccall_with_branch(&mut self, fptr: *const u8, opnds: Vec<Opnd>, branch: &Rc<Branch>) -> Opnd {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue