diff --git a/zjit/src/backend/lir.rs b/zjit/src/backend/lir.rs index a0065b574c..54bef9d925 100644 --- a/zjit/src/backend/lir.rs +++ b/zjit/src/backend/lir.rs @@ -15,6 +15,7 @@ pub use crate::backend::current::{ NATIVE_STACK_PTR, NATIVE_BASE_PTR, C_ARG_OPNDS, C_RET_REG, C_RET_OPND, }; +pub const SCRATCH_OPND: Opnd = Opnd::Reg(Assembler::SCRATCH_REG); pub static JIT_PRESERVED_REGS: &'static [Opnd] = &[CFP, SP, EC]; diff --git a/zjit/src/codegen.rs b/zjit/src/codegen.rs index 43fde7db7f..5260d41e99 100644 --- a/zjit/src/codegen.rs +++ b/zjit/src/codegen.rs @@ -9,7 +9,7 @@ use crate::gc::{append_gc_offsets, get_or_create_iseq_payload, get_or_create_ise use crate::state::ZJITState; use crate::stats::{counter_ptr, with_time_stat, Counter, Counter::compile_time_ns}; use crate::{asm::CodeBlock, cruby::*, options::debug, virtualmem::CodePtr}; -use crate::backend::lir::{self, asm_comment, asm_ccall, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_STACK_PTR, NATIVE_BASE_PTR, SP}; +use crate::backend::lir::{self, asm_ccall, asm_comment, Assembler, Opnd, Target, CFP, C_ARG_OPNDS, C_RET_OPND, EC, NATIVE_BASE_PTR, NATIVE_STACK_PTR, SCRATCH_OPND, SP}; use crate::hir::{iseq_to_hir, Block, BlockId, BranchEdge, Invariant, RangeType, SideExitReason, SideExitReason::*, SpecialObjectType, SELF_PARAM_IDX}; use crate::hir::{Const, FrameState, Function, Insn, InsnId}; use crate::hir_type::{types, Type}; @@ -26,8 +26,8 @@ struct JITState { /// Labels for each basic block indexed by the BlockId labels: Vec>, - /// Branches to an ISEQ that need to be compiled later - branch_iseqs: Vec<(Rc, IseqPtr)>, + /// ISEQ calls that need to be compiled later + iseq_calls: Vec>, /// The number of bytes allocated for basic block arguments spilled onto the C stack c_stack_slots: usize, @@ -40,7 +40,7 @@ impl JITState { iseq, opnds: vec![None; num_insns], labels: vec![None; num_blocks], - branch_iseqs: Vec::default(), + iseq_calls: Vec::default(), c_stack_slots, } } @@ -130,8 +130,8 @@ fn gen_iseq_entry_point_body(cb: &mut CodeBlock, iseq: IseqPtr) -> Option Option) -> Option<()> { +fn gen_iseq_call(cb: &mut CodeBlock, caller_iseq: IseqPtr, iseq_call: Rc) -> Option<()> { // Compile a function stub - let Some((stub_ptr, gc_offsets)) = gen_function_stub(cb, iseq, branch.clone()) else { + let Some(stub_ptr) = gen_function_stub(cb, iseq_call.clone()) else { // Failed to compile the stub. Bail out of compiling the caller ISEQ. debug!("Failed to compile iseq: could not compile stub: {} -> {}", - iseq_get_location(caller_iseq, 0), iseq_get_location(iseq, 0)); + iseq_get_location(caller_iseq, 0), iseq_get_location(iseq_call.iseq, 0)); return None; }; - append_gc_offsets(iseq, &gc_offsets); // Update the JIT-to-JIT call to call the stub let stub_addr = stub_ptr.raw_ptr(cb); - branch.regenerate(cb, |asm| { - asm_comment!(asm, "call function stub: {}", iseq_get_location(iseq, 0)); + iseq_call.regenerate(cb, |asm| { + asm_comment!(asm, "call function stub: {}", iseq_get_location(iseq_call.iseq, 0)); asm.ccall(stub_addr, vec![]); }); Some(()) @@ -210,7 +209,7 @@ fn gen_entry(cb: &mut CodeBlock, iseq: IseqPtr, function: &Function, function_pt } /// Compile an ISEQ into machine code -fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<(CodePtr, Vec<(Rc, IseqPtr)>)> { +fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<(CodePtr, Vec>)> { // Return an existing pointer if it's already compiled let payload = get_or_create_iseq_payload(iseq); match payload.status { @@ -233,7 +232,7 @@ fn gen_iseq(cb: &mut CodeBlock, iseq: IseqPtr) -> Option<(CodePtr, Vec<(Rc *const u8 { + fn function_stub_hit(iseq_call_ptr: *const c_void, ec: EcPtr, sp: *mut VALUE) -> *const u8 { with_vm_lock(src_loc!(), || { /// gen_push_frame() doesn't set PC and SP, so we need to set them before exit fn set_pc_and_sp(iseq: IseqPtr, ec: EcPtr, sp: *mut VALUE) { @@ -1408,21 +1407,25 @@ c_callable! { // TODO: Alan thinks the payload status part of this check can happen without the VM lock, since the whole // code path can be made read-only. But you still need the check as is while holding the VM lock in any case. let cb = ZJITState::get_code_block(); - let payload = get_or_create_iseq_payload(iseq); + let iseq_call = unsafe { Rc::from_raw(iseq_call_ptr as *const IseqCall) }; + let payload = get_or_create_iseq_payload(iseq_call.iseq); if cb.has_dropped_bytes() || payload.status == IseqStatus::CantCompile { + // We'll use this Rc again, so increment the ref count decremented by from_raw. + unsafe { Rc::increment_strong_count(iseq_call_ptr as *const IseqCall); } + // Exit to the interpreter - set_pc_and_sp(iseq, ec, sp); - return ZJITState::get_exit_code().raw_ptr(cb); + set_pc_and_sp(iseq_call.iseq, ec, sp); + return ZJITState::get_exit_trampoline().raw_ptr(cb); } // Otherwise, attempt to compile the ISEQ. We have to mark_all_executable() beyond this point. - let code_ptr = with_time_stat(compile_time_ns, || function_stub_hit_body(cb, iseq, branch_ptr)); + let code_ptr = with_time_stat(compile_time_ns, || function_stub_hit_body(cb, &iseq_call)); let code_ptr = if let Some(code_ptr) = code_ptr { code_ptr } else { // Exit to the interpreter - set_pc_and_sp(iseq, ec, sp); - ZJITState::get_exit_code() + set_pc_and_sp(iseq_call.iseq, ec, sp); + ZJITState::get_exit_trampoline() }; cb.mark_all_executable(); code_ptr.raw_ptr(cb) @@ -1431,23 +1434,22 @@ c_callable! { } /// Compile an ISEQ for a function stub -fn function_stub_hit_body(cb: &mut CodeBlock, iseq: IseqPtr, branch_ptr: *const c_void) -> Option { +fn function_stub_hit_body(cb: &mut CodeBlock, iseq_call: &Rc) -> Option { // Compile the stubbed ISEQ - let Some((code_ptr, branch_iseqs)) = gen_iseq(cb, iseq) else { - debug!("Failed to compile iseq: gen_iseq failed: {}", iseq_get_location(iseq, 0)); + let Some((code_ptr, iseq_calls)) = gen_iseq(cb, iseq_call.iseq) else { + debug!("Failed to compile iseq: gen_iseq failed: {}", iseq_get_location(iseq_call.iseq, 0)); return None; }; // Stub callee ISEQs for JIT-to-JIT calls - for (branch, callee_iseq) in branch_iseqs.into_iter() { - gen_iseq_branch(cb, callee_iseq, iseq, branch)?; + for callee_iseq_call in iseq_calls.into_iter() { + gen_iseq_call(cb, iseq_call.iseq, callee_iseq_call)?; } // Update the stub to call the code pointer - let branch = unsafe { Rc::from_raw(branch_ptr as *const Branch) }; let code_addr = code_ptr.raw_ptr(cb); - branch.regenerate(cb, |asm| { - asm_comment!(asm, "call compiled function: {}", iseq_get_location(iseq, 0)); + iseq_call.regenerate(cb, |asm| { + asm_comment!(asm, "call compiled function: {}", iseq_get_location(iseq_call.iseq, 0)); asm.ccall(code_addr, vec![]); }); @@ -1455,10 +1457,25 @@ fn function_stub_hit_body(cb: &mut CodeBlock, iseq: IseqPtr, branch_ptr: *const } /// Compile a stub for an ISEQ called by SendWithoutBlockDirect -/// TODO: Consider creating a trampoline to share some of the code among function stubs -fn gen_function_stub(cb: &mut CodeBlock, iseq: IseqPtr, branch: Rc) -> Option<(CodePtr, Vec)> { +fn gen_function_stub(cb: &mut CodeBlock, iseq_call: Rc) -> Option { let mut asm = Assembler::new(); - asm_comment!(asm, "Stub: {}", iseq_get_location(iseq, 0)); + asm_comment!(asm, "Stub: {}", iseq_get_location(iseq_call.iseq, 0)); + + // Call function_stub_hit using the shared trampoline. See `gen_function_stub_hit_trampoline`. + // Use load_into instead of mov, which is split on arm64, to avoid clobbering ALLOC_REGS. + asm.load_into(SCRATCH_OPND, Opnd::const_ptr(Rc::into_raw(iseq_call).into())); + asm.jmp(ZJITState::get_function_stub_hit_trampoline().into()); + + asm.compile(cb).map(|(code_ptr, gc_offsets)| { + assert_eq!(gc_offsets.len(), 0); + code_ptr + }) +} + +/// Generate a trampoline that is used when a +pub fn gen_function_stub_hit_trampoline(cb: &mut CodeBlock) -> Option { + let mut asm = Assembler::new(); + asm_comment!(asm, "function_stub_hit trampoline"); // Maintain alignment for x86_64, and set up a frame for arm64 properly asm.frame_setup(&[], 0); @@ -1470,14 +1487,8 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq: IseqPtr, branch: Rc) -> O const { assert!(ALLOC_REGS.len() % 2 == 0, "x86_64 would need to push one more if we push an odd number of regs"); } // Compile the stubbed ISEQ - let branch_addr = Rc::into_raw(branch); - let jump_addr = asm_ccall!(asm, function_stub_hit, - Opnd::Value(iseq.into()), - Opnd::const_ptr(branch_addr as *const u8), - EC, - SP - ); - asm.mov(Opnd::Reg(Assembler::SCRATCH_REG), jump_addr); + let jump_addr = asm_ccall!(asm, function_stub_hit, SCRATCH_OPND, EC, SP); + asm.mov(SCRATCH_OPND, jump_addr); asm_comment!(asm, "restore argument registers"); for ® in ALLOC_REGS.iter().rev() { @@ -1487,16 +1498,20 @@ fn gen_function_stub(cb: &mut CodeBlock, iseq: IseqPtr, branch: Rc) -> O // Discard the current frame since the JIT function will set it up again asm.frame_teardown(&[]); - // Jump to SCRATCH_REG so that cpop_all() doesn't clobber it - asm.jmp_opnd(Opnd::Reg(Assembler::SCRATCH_REG)); - asm.compile(cb) + // Jump to SCRATCH_OPND so that cpop_into() doesn't clobber it + asm.jmp_opnd(SCRATCH_OPND); + + asm.compile(cb).map(|(code_ptr, gc_offsets)| { + assert_eq!(gc_offsets.len(), 0); + code_ptr + }) } /// Generate a trampoline that is used when a function exits without restoring PC and the stack -pub fn gen_exit(cb: &mut CodeBlock) -> Option { +pub fn gen_exit_trampoline(cb: &mut CodeBlock) -> Option { let mut asm = Assembler::new(); - asm_comment!(asm, "exit from function stub"); + asm_comment!(asm, "side-exit trampoline"); asm.frame_teardown(&[]); // matching the setup in :bb0-prologue: asm.cret(Qundef.into()); @@ -1557,45 +1572,49 @@ fn aligned_stack_bytes(num_slots: usize) -> usize { } impl Assembler { - /// Make a C call while marking the start and end positions of it - fn ccall_with_branch(&mut self, fptr: *const u8, opnds: Vec, branch: &Rc) -> Opnd { + /// Make a C call while marking the start and end positions for IseqCall + fn ccall_with_iseq_call(&mut self, fptr: *const u8, opnds: Vec, iseq_call: &Rc) -> Opnd { // We need to create our own branch rc objects so that we can move the closure below - let start_branch = branch.clone(); - let end_branch = branch.clone(); + let start_iseq_call = iseq_call.clone(); + let end_iseq_call = iseq_call.clone(); self.ccall_with_pos_markers( fptr, opnds, move |code_ptr, _| { - start_branch.start_addr.set(Some(code_ptr)); + start_iseq_call.start_addr.set(Some(code_ptr)); }, move |code_ptr, _| { - end_branch.end_addr.set(Some(code_ptr)); + end_iseq_call.end_addr.set(Some(code_ptr)); }, ) } } -/// Store info about an outgoing branch in a code segment +/// Store info about a JIT-to-JIT call #[derive(Debug)] -struct Branch { - /// Position where the generated code starts +struct IseqCall { + /// Callee ISEQ that start_addr jumps to + iseq: IseqPtr, + + /// Position where the call instruction starts start_addr: Cell>, - /// Position where the generated code ends (exclusive) + /// Position where the call instruction ends (exclusive) end_addr: Cell>, } -impl Branch { - /// Allocate a new branch - fn new() -> Rc { - Rc::new(Branch { +impl IseqCall { + /// Allocate a new JITCall + fn new(iseq: IseqPtr) -> Rc { + Rc::new(IseqCall { + iseq, start_addr: Cell::new(None), end_addr: Cell::new(None), }) } - /// Regenerate a branch with a given callback + /// Regenerate a JITCall with a given callback fn regenerate(&self, cb: &mut CodeBlock, callback: impl Fn(&mut Assembler)) { cb.with_write_ptr(self.start_addr.get().unwrap(), |cb| { let mut asm = Assembler::new(); diff --git a/zjit/src/state.rs b/zjit/src/state.rs index f752f72980..194b02fc8d 100644 --- a/zjit/src/state.rs +++ b/zjit/src/state.rs @@ -1,4 +1,4 @@ -use crate::codegen::gen_exit; +use crate::codegen::{gen_exit_trampoline, gen_function_stub_hit_trampoline}; use crate::cruby::{self, rb_bug_panic_hook, rb_vm_insns_count, EcPtr, Qnil, VALUE}; use crate::cruby_methods; use crate::invariants::Invariants; @@ -34,7 +34,10 @@ pub struct ZJITState { method_annotations: cruby_methods::Annotations, /// Trampoline to side-exit without restoring PC or the stack - exit_code: CodePtr, + exit_trampoline: CodePtr, + + /// Trampoline to call function_stub_hit + function_stub_hit_trampoline: CodePtr, } /// Private singleton instance of the codegen globals @@ -83,7 +86,8 @@ impl ZJITState { #[cfg(test)] let mut cb = CodeBlock::new_dummy(); - let exit_code = gen_exit(&mut cb).unwrap(); + let exit_trampoline = gen_exit_trampoline(&mut cb).unwrap(); + let function_stub_hit_trampoline = gen_function_stub_hit_trampoline(&mut cb).unwrap(); // Initialize the codegen globals instance let zjit_state = ZJITState { @@ -92,7 +96,8 @@ impl ZJITState { invariants: Invariants::default(), assert_compiles: false, method_annotations: cruby_methods::init(), - exit_code, + exit_trampoline, + function_stub_hit_trampoline, }; unsafe { ZJIT_STATE = Some(zjit_state); } } @@ -170,8 +175,13 @@ impl ZJITState { } /// Return a code pointer to the side-exit trampoline - pub fn get_exit_code() -> CodePtr { - ZJITState::get_instance().exit_code + pub fn get_exit_trampoline() -> CodePtr { + ZJITState::get_instance().exit_trampoline + } + + /// Return a code pointer to the function stub hit trampoline + pub fn get_function_stub_hit_trampoline() -> CodePtr { + ZJITState::get_instance().function_stub_hit_trampoline } }