From ec773e15f472ae2fe655529ea646d8fb2a4f0919 Mon Sep 17 00:00:00 2001 From: Takashi Kokubun Date: Mon, 15 Jul 2024 07:56:57 -0700 Subject: [PATCH] YJIT: Local variable register allocation (#11157) * YJIT: Local variable register allocation * locals are not stack temps * Rename RegTemps to RegMappings * Rename RegMapping to RegOpnd * Rename local_size to num_locals * s/stack value/operand/ * Rename spill_temps() to spill_regs() * Clarify when num_locals becomes None * Mention that InsnOut uses different registers * Rename get_reg_mapping to get_reg_opnd * Resurrect --yjit-temp-regs capability * Use MAX_CTX_TEMPS and MAX_CTX_LOCALS --- yjit/src/backend/arm64/mod.rs | 6 +- yjit/src/backend/ir.rs | 246 ++++++++++++----------- yjit/src/backend/tests.rs | 15 +- yjit/src/backend/x86_64/mod.rs | 8 +- yjit/src/codegen.rs | 82 ++++---- yjit/src/core.rs | 353 ++++++++++++++++++++------------- yjit/src/invariants.rs | 2 +- yjit/src/utils.rs | 4 +- 8 files changed, 415 insertions(+), 301 deletions(-) diff --git a/yjit/src/backend/arm64/mod.rs b/yjit/src/backend/arm64/mod.rs index 0e620c5266..b695f8da96 100644 --- a/yjit/src/backend/arm64/mod.rs +++ b/yjit/src/backend/arm64/mod.rs @@ -381,7 +381,7 @@ impl Assembler } let live_ranges: Vec = take(&mut self.live_ranges); - let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits)); + let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals); let asm = &mut asm_local; let mut iterator = self.into_draining_iter(); @@ -1383,7 +1383,7 @@ mod tests { use crate::disasm::*; fn setup_asm() -> (Assembler, CodeBlock) { - (Assembler::new(), CodeBlock::new_dummy(1024)) + (Assembler::new(0), CodeBlock::new_dummy(1024)) } #[test] @@ -1682,7 +1682,7 @@ mod tests { #[test] fn test_bcond_straddling_code_pages() { const LANDING_PAGE: usize = 65; - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]); // Skip to near the end of the page. Room for two instructions. diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs index 867ef6f7df..599ecfabc9 100644 --- a/yjit/src/backend/ir.rs +++ b/yjit/src/backend/ir.rs @@ -2,11 +2,11 @@ use std::collections::HashMap; use std::fmt; use std::convert::From; use std::mem::take; -use crate::codegen::{gen_outlined_exit, gen_counted_exit}; -use crate::cruby::{vm_stack_canary, SIZEOF_VALUE_I32, VALUE}; +use crate::codegen::{gen_counted_exit, gen_outlined_exit}; +use crate::cruby::{vm_stack_canary, SIZEOF_VALUE_I32, VALUE, VM_ENV_DATA_SIZE}; use crate::virtualmem::CodePtr; use crate::asm::{CodeBlock, OutlinedCb}; -use crate::core::{Context, RegTemps, MAX_REG_TEMPS}; +use crate::core::{Context, RegMapping, RegOpnd, MAX_CTX_TEMPS}; use crate::options::*; use crate::stats::*; @@ -77,10 +77,12 @@ pub enum Opnd num_bits: u8, /// ctx.stack_size when this operand is made. Used with idx for Opnd::Reg. stack_size: u8, + /// The number of local variables in the current ISEQ. Used only for locals. + num_locals: Option, /// ctx.sp_offset when this operand is made. Used with idx for Opnd::Mem. sp_offset: i8, - /// ctx.reg_temps when this operand is read. Used for register allocation. - reg_temps: Option + /// ctx.reg_mapping when this operand is read. Used for register allocation. + reg_mapping: Option }, // Low-level operands, for lowering @@ -172,7 +174,7 @@ impl Opnd Opnd::Reg(reg) => Some(Opnd::Reg(reg.with_num_bits(num_bits))), Opnd::Mem(Mem { base, disp, .. }) => Some(Opnd::Mem(Mem { base, disp, num_bits })), Opnd::InsnOut { idx, .. } => Some(Opnd::InsnOut { idx, num_bits }), - Opnd::Stack { idx, stack_size, sp_offset, reg_temps, .. } => Some(Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps }), + Opnd::Stack { idx, stack_size, num_locals, sp_offset, reg_mapping, .. } => Some(Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping }), _ => None, } } @@ -227,28 +229,26 @@ impl Opnd Self::match_num_bits_iter(opnds.iter()) } - /// Calculate Opnd::Stack's index from the stack bottom. - pub fn stack_idx(&self) -> u8 { - self.get_stack_idx().unwrap() + /// Convert Opnd::Stack into RegMapping + pub fn reg_opnd(&self) -> RegOpnd { + self.get_reg_opnd().unwrap() } - /// Calculate Opnd::Stack's index from the stack bottom if it's Opnd::Stack. - pub fn get_stack_idx(&self) -> Option { - match self { - Opnd::Stack { idx, stack_size, .. } => { - Some((*stack_size as isize - *idx as isize - 1) as u8) - }, - _ => None - } - } - - /// Get the index for stack temp registers. - pub fn reg_idx(&self) -> usize { - match self { - Opnd::Stack { .. } => { - self.stack_idx() as usize % get_option!(num_temp_regs) - }, - _ => unreachable!(), + /// Convert an operand into RegMapping if it's Opnd::Stack + pub fn get_reg_opnd(&self) -> Option { + match *self { + Opnd::Stack { idx, stack_size, num_locals, .. } => Some( + if let Some(num_locals) = num_locals { + let last_idx = stack_size as i32 + VM_ENV_DATA_SIZE as i32 - 1; + assert!(last_idx <= idx, "Local index {} must be >= last local index {}", idx, last_idx); + assert!(idx <= last_idx + num_locals as i32, "Local index {} must be < last local index {} + local size {}", idx, last_idx, num_locals); + RegOpnd::Local((last_idx + num_locals as i32 - idx) as u8) + } else { + assert!(idx < stack_size as i32); + RegOpnd::Stack((stack_size as i32 - idx - 1) as u8) + } + ), + _ => None, } } } @@ -974,7 +974,7 @@ pub struct SideExitContext { /// Context fields used by get_generic_ctx() pub stack_size: u8, pub sp_offset: i8, - pub reg_temps: RegTemps, + pub reg_mapping: RegMapping, pub is_return_landing: bool, pub is_deferred: bool, } @@ -986,7 +986,7 @@ impl SideExitContext { pc, stack_size: ctx.get_stack_size(), sp_offset: ctx.get_sp_offset(), - reg_temps: ctx.get_reg_temps(), + reg_mapping: ctx.get_reg_mapping(), is_return_landing: ctx.is_return_landing(), is_deferred: ctx.is_deferred(), }; @@ -1002,7 +1002,7 @@ impl SideExitContext { let mut ctx = Context::default(); ctx.set_stack_size(self.stack_size); ctx.set_sp_offset(self.sp_offset); - ctx.set_reg_temps(self.reg_temps); + ctx.set_reg_mapping(self.reg_mapping); if self.is_return_landing { ctx.set_as_return_landing(); } @@ -1031,6 +1031,13 @@ pub struct Assembler { /// Context for generating the current insn pub ctx: Context, + /// The current ISEQ's local table size. asm.local_opnd() uses this, and it's + /// sometimes hard to pass this value, e.g. asm.spill_temps() in asm.ccall(). + /// + /// `None` means we're not assembling for an ISEQ, or that the local size is + /// not relevant. + pub(super) num_locals: Option, + /// Side exit caches for each SideExitContext pub(super) side_exits: HashMap, @@ -1046,16 +1053,31 @@ pub struct Assembler { impl Assembler { - pub fn new() -> Self { - Self::new_with_label_names(Vec::default(), HashMap::default()) + /// Create an Assembler for ISEQ-specific code. + /// It includes all inline code and some outlined code like side exits and stubs. + pub fn new(num_locals: u32) -> Self { + Self::new_with_label_names(Vec::default(), HashMap::default(), Some(num_locals)) } - pub fn new_with_label_names(label_names: Vec, side_exits: HashMap) -> Self { + /// Create an Assembler for outlined code that are not specific to any ISEQ, + /// e.g. trampolines that are shared globally. + pub fn new_without_iseq() -> Self { + Self::new_with_label_names(Vec::default(), HashMap::default(), None) + } + + /// Create an Assembler with parameters that are populated by another Assembler instance. + /// This API is used for copying an Assembler for the next compiler pass. + pub fn new_with_label_names( + label_names: Vec, + side_exits: HashMap, + num_locals: Option + ) -> Self { Self { insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), live_ranges: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY), label_names, ctx: Context::default(), + num_locals, side_exits, side_exit_pc: None, side_exit_stack_size: None, @@ -1064,11 +1086,16 @@ impl Assembler } /// Get the list of registers that can be used for stack temps. - pub fn get_temp_regs() -> &'static [Reg] { + pub fn get_temp_regs2() -> &'static [Reg] { let num_regs = get_option!(num_temp_regs); &TEMP_REGS[0..num_regs] } + /// Get the number of locals for the ISEQ being compiled + pub fn get_num_locals(&self) -> Option { + self.num_locals + } + /// Set a context for generating side exits pub fn set_side_exit_context(&mut self, pc: *mut VALUE, stack_size: u8) { self.side_exit_pc = Some(pc); @@ -1090,31 +1117,32 @@ impl Assembler let mut opnd_iter = insn.opnd_iter_mut(); while let Some(opnd) = opnd_iter.next() { - match opnd { + match *opnd { // If we find any InsnOut from previous instructions, we're going to update // the live range of the previous instruction to point to this one. Opnd::InsnOut { idx, .. } => { - assert!(*idx < self.insns.len()); - self.live_ranges[*idx] = insn_idx; + assert!(idx < self.insns.len()); + self.live_ranges[idx] = insn_idx; } Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => { - assert!(*idx < self.insns.len()); - self.live_ranges[*idx] = insn_idx; + assert!(idx < self.insns.len()); + self.live_ranges[idx] = insn_idx; } - // Set current ctx.reg_temps to Opnd::Stack. - Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: None } => { + // Set current ctx.reg_mapping to Opnd::Stack. + Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: None } => { assert_eq!( self.ctx.get_stack_size() as i16 - self.ctx.get_sp_offset() as i16, - *stack_size as i16 - *sp_offset as i16, + stack_size as i16 - sp_offset as i16, "Opnd::Stack (stack_size: {}, sp_offset: {}) expects a different SP position from asm.ctx (stack_size: {}, sp_offset: {})", - *stack_size, *sp_offset, self.ctx.get_stack_size(), self.ctx.get_sp_offset(), + stack_size, sp_offset, self.ctx.get_stack_size(), self.ctx.get_sp_offset(), ); *opnd = Opnd::Stack { - idx: *idx, - num_bits: *num_bits, - stack_size: *stack_size, - sp_offset: *sp_offset, - reg_temps: Some(self.ctx.get_reg_temps()), + idx, + num_bits, + stack_size, + num_locals, + sp_offset, + reg_mapping: Some(self.ctx.get_reg_mapping()), }; } _ => {} @@ -1141,7 +1169,7 @@ impl Assembler // Get a cached side exit let side_exit = match self.side_exits.get(&side_exit_context) { None => { - let exit_code = gen_outlined_exit(side_exit_context.pc, &side_exit_context.get_ctx(), ocb)?; + let exit_code = gen_outlined_exit(side_exit_context.pc, self.num_locals.unwrap(), &side_exit_context.get_ctx(), ocb)?; self.side_exits.insert(*side_exit_context, exit_code); exit_code } @@ -1175,20 +1203,20 @@ impl Assembler } // Convert Opnd::Stack to Opnd::Reg - fn reg_opnd(opnd: &Opnd) -> Opnd { - let regs = Assembler::get_temp_regs(); + fn reg_opnd(opnd: &Opnd, reg_idx: usize) -> Opnd { + let regs = Assembler::get_temp_regs2(); if let Opnd::Stack { num_bits, .. } = *opnd { incr_counter!(temp_reg_opnd); - Opnd::Reg(regs[opnd.reg_idx()]).with_num_bits(num_bits).unwrap() + Opnd::Reg(regs[reg_idx]).with_num_bits(num_bits).unwrap() } else { unreachable!() } } match opnd { - Opnd::Stack { reg_temps, .. } => { - if opnd.stack_idx() < MAX_REG_TEMPS && reg_temps.unwrap().get(opnd.stack_idx()) { - reg_opnd(opnd) + Opnd::Stack { reg_mapping, .. } => { + if let Some(reg_idx) = reg_mapping.unwrap().get_reg(opnd.reg_opnd()) { + reg_opnd(opnd, reg_idx) } else { mem_opnd(opnd) } @@ -1198,18 +1226,11 @@ impl Assembler } /// Allocate a register to a stack temp if available. - pub fn alloc_temp_reg(&mut self, stack_idx: u8) { - if get_option!(num_temp_regs) == 0 { - return; - } - + pub fn alloc_reg(&mut self, mapping: RegOpnd) { // Allocate a register if there's no conflict. - let mut reg_temps = self.ctx.get_reg_temps(); - if reg_temps.conflicts_with(stack_idx) { - assert!(!reg_temps.get(stack_idx)); - } else { - reg_temps.set(stack_idx, true); - self.set_reg_temps(reg_temps); + let mut reg_mapping = self.ctx.get_reg_mapping(); + if reg_mapping.alloc_reg(mapping) { + self.set_reg_mapping(reg_mapping); } } @@ -1220,47 +1241,58 @@ impl Assembler self.ctx.clear_local_types(); } - /// Spill all live stack temps from registers to the stack - pub fn spill_temps(&mut self) { + /// Spill all live registers to the stack + pub fn spill_regs(&mut self) { // Forget registers above the stack top - let mut reg_temps = self.ctx.get_reg_temps(); - for stack_idx in self.ctx.get_stack_size()..MAX_REG_TEMPS { - reg_temps.set(stack_idx, false); + let mut reg_mapping = self.ctx.get_reg_mapping(); + for stack_idx in self.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 { + reg_mapping.dealloc_reg(RegOpnd::Stack(stack_idx)); } - self.set_reg_temps(reg_temps); + self.set_reg_mapping(reg_mapping); // Spill live stack temps - if self.ctx.get_reg_temps() != RegTemps::default() { - asm_comment!(self, "spill_temps: {:08b} -> {:08b}", self.ctx.get_reg_temps().as_u8(), RegTemps::default().as_u8()); - for stack_idx in 0..u8::min(MAX_REG_TEMPS, self.ctx.get_stack_size()) { - if self.ctx.get_reg_temps().get(stack_idx) { + if self.ctx.get_reg_mapping() != RegMapping::default() { + asm_comment!(self, "spill_temps: {:?} -> {:?}", self.ctx.get_reg_mapping(), RegMapping::default()); + + // Spill stack temps + for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) { + if reg_mapping.dealloc_reg(RegOpnd::Stack(stack_idx)) { let idx = self.ctx.get_stack_size() - 1 - stack_idx; self.spill_temp(self.stack_opnd(idx.into())); - reg_temps.set(stack_idx, false); } } - self.ctx.set_reg_temps(reg_temps); + + // Spill locals + for local_idx in 0..MAX_CTX_TEMPS as u8 { + if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) { + let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1; + let ep_offset = first_local_ep_offset - local_idx as u32; + self.spill_temp(self.local_opnd(ep_offset)); + } + } + + self.ctx.set_reg_mapping(reg_mapping); } // Every stack temp should have been spilled - assert_eq!(self.ctx.get_reg_temps(), RegTemps::default()); + assert_eq!(self.ctx.get_reg_mapping(), RegMapping::default()); } /// Spill a stack temp from a register to the stack fn spill_temp(&mut self, opnd: Opnd) { - assert!(self.ctx.get_reg_temps().get(opnd.stack_idx())); + assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None); - // Use different RegTemps for dest and src operands - let reg_temps = self.ctx.get_reg_temps(); - let mut mem_temps = reg_temps; - mem_temps.set(opnd.stack_idx(), false); + // Use different RegMappings for dest and src operands + let reg_mapping = self.ctx.get_reg_mapping(); + let mut mem_mappings = reg_mapping; + mem_mappings.dealloc_reg(opnd.reg_opnd()); // Move the stack operand from a register to memory match opnd { - Opnd::Stack { idx, num_bits, stack_size, sp_offset, .. } => { + Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, .. } => { self.mov( - Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: Some(mem_temps) }, - Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: Some(reg_temps) }, + Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: Some(mem_mappings) }, + Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: Some(reg_mapping) }, ); } _ => unreachable!(), @@ -1269,20 +1301,10 @@ impl Assembler } /// Update which stack temps are in a register - pub fn set_reg_temps(&mut self, reg_temps: RegTemps) { - if self.ctx.get_reg_temps() != reg_temps { - asm_comment!(self, "reg_temps: {:08b} -> {:08b}", self.ctx.get_reg_temps().as_u8(), reg_temps.as_u8()); - self.ctx.set_reg_temps(reg_temps); - self.verify_reg_temps(); - } - } - - /// Assert there's no conflict in stack temp register allocation - fn verify_reg_temps(&self) { - for stack_idx in 0..MAX_REG_TEMPS { - if self.ctx.get_reg_temps().get(stack_idx) { - assert!(!self.ctx.get_reg_temps().conflicts_with(stack_idx)); - } + pub fn set_reg_mapping(&mut self, reg_mapping: RegMapping) { + if self.ctx.get_reg_mapping() != reg_mapping { + asm_comment!(self, "reg_mapping: {:?} -> {:?}", self.ctx.get_reg_mapping(), reg_mapping); + self.ctx.set_reg_mapping(reg_mapping); } } @@ -1411,7 +1433,7 @@ impl Assembler let live_ranges: Vec = take(&mut self.live_ranges); // shifted_live_ranges is indexed by mapped indexes in insn operands. let mut shifted_live_ranges: Vec = live_ranges.clone(); - let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits)); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals); let mut iterator = self.into_draining_iter(); while let Some((index, mut insn)) = iterator.next_mapped() { @@ -1703,24 +1725,24 @@ impl Assembler { // Let vm_check_canary() assert this ccall's leafness if leaf_ccall is set let canary_opnd = self.set_stack_canary(&opnds); - let old_temps = self.ctx.get_reg_temps(); // with registers + let old_temps = self.ctx.get_reg_mapping(); // with registers // Spill stack temp registers since they are caller-saved registers. // Note that this doesn't spill stack temps that are already popped // but may still be used in the C arguments. - self.spill_temps(); - let new_temps = self.ctx.get_reg_temps(); // all spilled + self.spill_regs(); + let new_temps = self.ctx.get_reg_mapping(); // all spilled - // Temporarily manipulate RegTemps so that we can use registers + // Temporarily manipulate RegMappings so that we can use registers // to pass stack operands that are already spilled above. - self.ctx.set_reg_temps(old_temps); + self.ctx.set_reg_mapping(old_temps); // Call a C function let out = self.next_opnd_out(Opnd::match_num_bits(&opnds)); self.push_insn(Insn::CCall { fptr, opnds, out }); // Registers in old_temps may be clobbered by the above C call, - // so rollback the manipulated RegTemps to a spilled version. - self.ctx.set_reg_temps(new_temps); + // so rollback the manipulated RegMappings to a spilled version. + self.ctx.set_reg_mapping(new_temps); // Clear the canary after use if let Some(canary_opnd) = canary_opnd { @@ -1738,7 +1760,7 @@ impl Assembler { // If the slot is already used, which is a valid optimization to avoid spills, // give up the verification. let canary_opnd = if cfg!(debug_assertions) && self.leaf_ccall && opnds.iter().all(|opnd| - opnd.get_stack_idx() != canary_opnd.get_stack_idx() + opnd.get_reg_opnd() != canary_opnd.get_reg_opnd() ) { asm_comment!(self, "set stack canary"); self.mov(canary_opnd, vm_stack_canary().into()); @@ -1767,9 +1789,9 @@ impl Assembler { pub fn cpop_all(&mut self) { self.push_insn(Insn::CPopAll); - // Re-enable ccall's RegTemps assertion disabled by cpush_all. + // Re-enable ccall's RegMappings assertion disabled by cpush_all. // cpush_all + cpop_all preserve all stack temp registers, so it's safe. - self.set_reg_temps(self.ctx.get_reg_temps()); + self.set_reg_mapping(self.ctx.get_reg_mapping()); } pub fn cpop_into(&mut self, opnd: Opnd) { @@ -1787,7 +1809,7 @@ impl Assembler { // Temps will be marked back as being in registers by cpop_all. // We assume that cpush_all + cpop_all are used for C functions in utils.rs // that don't require spill_temps for GC. - self.set_reg_temps(RegTemps::default()); + self.set_reg_mapping(RegMapping::default()); } pub fn cret(&mut self, opnd: Opnd) { diff --git a/yjit/src/backend/tests.rs b/yjit/src/backend/tests.rs index 01e87fe26c..ac2f35b3d9 100644 --- a/yjit/src/backend/tests.rs +++ b/yjit/src/backend/tests.rs @@ -1,19 +1,19 @@ #![cfg(test)] -use crate::asm::{CodeBlock}; +use crate::asm::CodeBlock; use crate::backend::ir::*; use crate::cruby::*; use crate::utils::c_callable; #[test] fn test_add() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); let out = asm.add(SP, Opnd::UImm(1)); let _ = asm.add(out, Opnd::UImm(2)); } #[test] fn test_alloc_regs() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); // Get the first output that we're going to reuse later. let out1 = asm.add(EC, Opnd::UImm(1)); @@ -62,7 +62,7 @@ fn test_alloc_regs() { fn setup_asm() -> (Assembler, CodeBlock) { return ( - Assembler::new(), + Assembler::new(0), CodeBlock::new_dummy(1024) ); } @@ -194,7 +194,7 @@ fn test_c_call() #[test] fn test_alloc_ccall_regs() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); let out1 = asm.ccall(0 as *const u8, vec![]); let out2 = asm.ccall(0 as *const u8, vec![out1]); asm.mov(EC, out2); @@ -283,8 +283,7 @@ fn test_bake_string() { #[test] fn test_draining_iterator() { - - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); let _ = asm.load(Opnd::None); asm.store(Opnd::None, Opnd::None); @@ -315,7 +314,7 @@ fn test_cmp_8_bit() { fn test_no_pos_marker_callback_when_compile_fails() { // When compilation fails (e.g. when out of memory), the code written out is malformed. // We don't want to invoke the pos_marker callbacks with positions of malformed code. - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); // Markers around code to exhaust memory limit let fail_if_called = |_code_ptr, _cb: &_| panic!("pos_marker callback should not be called"); diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs index c717f76c98..c0d42e79e6 100644 --- a/yjit/src/backend/x86_64/mod.rs +++ b/yjit/src/backend/x86_64/mod.rs @@ -79,7 +79,7 @@ impl From<&Opnd> for X86Opnd { } } -/// List of registers that can be used for stack temps. +/// List of registers that can be used for stack temps and locals. pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG]; impl Assembler @@ -112,7 +112,7 @@ impl Assembler fn x86_split(mut self) -> Assembler { let live_ranges: Vec = take(&mut self.live_ranges); - let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits)); + let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals); let mut iterator = self.into_draining_iter(); while let Some((index, mut insn)) = iterator.next_unmapped() { @@ -895,14 +895,14 @@ impl Assembler #[cfg(test)] mod tests { - use crate::disasm::{assert_disasm}; + use crate::disasm::assert_disasm; #[cfg(feature = "disasm")] use crate::disasm::{unindent, disasm_addr_range}; use super::*; fn setup_asm() -> (Assembler, CodeBlock) { - (Assembler::new(), CodeBlock::new_dummy(1024)) + (Assembler::new(0), CodeBlock::new_dummy(1024)) } #[test] diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index 5e05adff3b..4abf58fea4 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -402,6 +402,11 @@ impl<'a> JITState<'a> { _ => false, } } + + /// Return the number of locals in the current ISEQ + pub fn num_locals(&self) -> u32 { + unsafe { get_iseq_body_local_table_size(self.iseq) } + } } /// Macro to call jit.perf_symbol_push() without evaluating arguments when @@ -646,7 +651,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { } // Verify stack operand types - let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u8); + let top_idx = cmp::min(ctx.get_stack_size(), MAX_CTX_TEMPS as u8); for i in 0..top_idx { let learned_mapping = ctx.get_opnd_mapping(StackOpnd(i)); let learned_type = ctx.get_opnd_type(StackOpnd(i)); @@ -693,7 +698,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { // Verify local variable types let local_table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) }; - let top_idx: usize = cmp::min(local_table_size as usize, MAX_TEMP_TYPES); + let top_idx: usize = cmp::min(local_table_size as usize, MAX_CTX_TEMPS); for i in 0..top_idx { let learned_type = ctx.get_local_type(i); let learned_type = relax_type_with_singleton_class_assumption(learned_type); @@ -717,7 +722,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) { // interpreter state. fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option { let ocb = ocb.unwrap(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); gen_counter_incr(&mut asm, Counter::exit_from_branch_stub); @@ -748,7 +753,7 @@ fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) { } // Spill stack temps before returning to the interpreter - asm.spill_temps(); + asm.spill_regs(); // Generate the code to exit to the interpreters // Write the adjusted SP back into the CFP @@ -804,11 +809,11 @@ fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) { /// moment, so there is one unique side exit for each context. Note that /// it's incorrect to jump to the side exit after any ctx stack push operations /// since they change the logic required for reconstructing interpreter state. -pub fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) -> Option { +pub fn gen_outlined_exit(exit_pc: *mut VALUE, num_locals: u32, ctx: &Context, ocb: &mut OutlinedCb) -> Option { let mut cb = ocb.unwrap(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new(num_locals); asm.ctx = *ctx; - asm.set_reg_temps(ctx.get_reg_temps()); + asm.set_reg_mapping(ctx.get_reg_mapping()); gen_exit(exit_pc, &mut asm); @@ -826,7 +831,7 @@ pub fn gen_counted_exit(exit_pc: *mut VALUE, side_exit: CodePtr, ocb: &mut Outli None => return Some(side_exit), }; - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); // Increment a counter gen_counter_incr(&mut asm, counter); @@ -876,7 +881,7 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler) -> O jit.block_entry_exit = Some(entry_exit?); } else { let block_entry_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, jit.starting_insn_idx.into()) }; - jit.block_entry_exit = Some(gen_outlined_exit(block_entry_pc, block_starting_context, jit.get_ocb())?); + jit.block_entry_exit = Some(gen_outlined_exit(block_entry_pc, jit.num_locals(), block_starting_context, jit.get_ocb())?); } Some(()) @@ -885,7 +890,7 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler) -> O // Landing code for when c_return tracing is enabled. See full_cfunc_return(). fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option { let ocb = ocb.unwrap(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); // This chunk of code expects REG_EC to be filled properly and // RAX to contain the return value of the C method. @@ -915,7 +920,7 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option { /// This is used by gen_leave() and gen_entry_prologue() fn gen_leave_exit(ocb: &mut OutlinedCb) -> Option { let ocb = ocb.unwrap(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); // gen_leave() fully reconstructs interpreter state and leaves the // return value in C_RET_OPND before coming here. @@ -942,7 +947,7 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> Option { // the caller's stack, which is different from gen_stub_exit(). fn gen_leave_exception(ocb: &mut OutlinedCb) -> Option { let ocb = ocb.unwrap(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); // gen_leave() leaves the return value in C_RET_OPND before coming here. let ruby_ret_val = asm.live_reg_opnd(C_RET_OPND); @@ -1011,7 +1016,7 @@ pub fn gen_entry_prologue( ) -> Option { let code_ptr = cb.get_write_ptr(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) }); if get_option_ref!(dump_disasm).is_some() { asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0)); } else { @@ -1134,7 +1139,7 @@ fn end_block_with_jump( if jit.record_boundary_patch_point { jit.record_boundary_patch_point = false; let exit_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, continuation_insn_idx.into())}; - let exit_pos = gen_outlined_exit(exit_pc, &reset_depth, jit.get_ocb()); + let exit_pos = gen_outlined_exit(exit_pc, jit.num_locals(), &reset_depth, jit.get_ocb()); record_global_inval_patch(asm, exit_pos?); } @@ -1180,7 +1185,7 @@ pub fn gen_single_block( jit.iseq = blockid.iseq; // Create a backend assembler instance - let mut asm = Assembler::new(); + let mut asm = Assembler::new(jit.num_locals()); asm.ctx = ctx; #[cfg(feature = "disasm")] @@ -1188,7 +1193,7 @@ pub fn gen_single_block( let blockid_idx = blockid.idx; let chain_depth = if asm.ctx.get_chain_depth() > 0 { format!("(chain_depth: {})", asm.ctx.get_chain_depth()) } else { "".to_string() }; asm_comment!(asm, "Block: {} {}", iseq_get_location(blockid.iseq, blockid_idx), chain_depth); - asm_comment!(asm, "reg_temps: {:08b}", asm.ctx.get_reg_temps().as_u8()); + asm_comment!(asm, "reg_mapping: {:?}", asm.ctx.get_reg_mapping()); } // Mark the start of an ISEQ for --yjit-perf @@ -1233,14 +1238,14 @@ pub fn gen_single_block( // stack_pop doesn't immediately deallocate a register for stack temps, // but it's safe to do so at this instruction boundary. - for stack_idx in asm.ctx.get_stack_size()..MAX_REG_TEMPS { - asm.ctx.dealloc_temp_reg(stack_idx); + for stack_idx in asm.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 { + asm.ctx.dealloc_reg(RegOpnd::Stack(stack_idx)); } // If previous instruction requested to record the boundary if jit.record_boundary_patch_point { // Generate an exit to this instruction and record it - let exit_pos = gen_outlined_exit(jit.pc, &asm.ctx, jit.get_ocb()).ok_or(())?; + let exit_pos = gen_outlined_exit(jit.pc, jit.num_locals(), &asm.ctx, jit.get_ocb()).ok_or(())?; record_global_inval_patch(&mut asm, exit_pos); jit.record_boundary_patch_point = false; } @@ -1803,7 +1808,7 @@ fn gen_splatkw( asm.mov(stack_ret, hash); asm.stack_push(block_type); // Leave block_opnd spilled by ccall as is - asm.ctx.dealloc_temp_reg(asm.ctx.get_stack_size() - 1); + asm.ctx.dealloc_reg(RegOpnd::Stack(asm.ctx.get_stack_size() - 1)); } Some(KeepCompiling) @@ -2278,7 +2283,7 @@ fn gen_getlocal_generic( ) -> Option { let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm) { // Load the local using SP register - asm.ctx.ep_opnd(-(ep_offset as i32)) + asm.local_opnd(ep_offset) } else { // Load environment pointer EP (level 0) from CFP let ep_opnd = gen_get_ep(asm, level); @@ -2359,8 +2364,11 @@ fn gen_setlocal_generic( let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm) { // Load flags and the local using SP register - let local_opnd = asm.ctx.ep_opnd(-(ep_offset as i32)); let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32); + let local_opnd = asm.local_opnd(ep_offset); + + // Allocate a register to the new local operand + asm.alloc_reg(local_opnd.reg_opnd()); (flags_opnd, local_opnd) } else { // Load flags and the local for the level @@ -3071,7 +3079,7 @@ fn gen_set_ivar( // If we know the stack value is an immediate, there's no need to // generate WB code. if !stack_type.is_imm() { - asm.spill_temps(); // for ccall (unconditionally spill them for RegTemps consistency) + asm.spill_regs(); // for ccall (unconditionally spill them for RegMappings consistency) let skip_wb = asm.new_label("skip_wb"); // If the value we're writing is an immediate, we don't need to WB asm.test(write_val, (RUBY_IMMEDIATE_MASK as u64).into()); @@ -3516,7 +3524,7 @@ fn gen_equality_specialized( let ret = asm.new_label("ret"); // Spill for ccall. For safety, unconditionally spill temps before branching. - asm.spill_temps(); + asm.spill_regs(); // If they are equal by identity, return true asm.cmp(a_opnd, b_opnd); @@ -5482,7 +5490,7 @@ fn jit_rb_str_uplus( // We allocate when we dup the string jit_prepare_call_with_gc(jit, asm); - asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency. + asm.spill_regs(); // For ccall. Unconditionally spill them for RegMappings consistency. asm_comment!(asm, "Unary plus on string"); let recv_opnd = asm.stack_pop(1); @@ -5500,7 +5508,7 @@ fn jit_rb_str_uplus( asm.jz(ret_label); // Str is frozen - duplicate it - asm.spill_temps(); // for ccall + asm.spill_regs(); // for ccall let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]); asm.mov(stack_ret, ret_opnd); @@ -5782,7 +5790,7 @@ fn jit_rb_str_concat( // rb_str_buf_append may raise Encoding::CompatibilityError, but we accept compromised // backtraces on this method since the interpreter does the same thing on opt_ltlt. jit_prepare_non_leaf_call(jit, asm); - asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency. + asm.spill_regs(); // For ccall. Unconditionally spill them for RegMappings consistency. let concat_arg = asm.stack_pop(1); let recv = asm.stack_pop(1); @@ -5815,7 +5823,7 @@ fn jit_rb_str_concat( // If encodings are different, use a slower encoding-aware concatenate asm.write_label(enc_mismatch); - asm.spill_temps(); // Ignore the register for the other local branch + asm.spill_regs(); // Ignore the register for the other local branch let ret_opnd = asm.ccall(rb_str_buf_append as *const u8, vec![recv, concat_arg]); let stack_ret = asm.stack_push(Type::TString); asm.mov(stack_ret, ret_opnd); @@ -6321,7 +6329,7 @@ fn gen_push_frame( if frame.iseq.is_some() { // Spill stack temps to let the callee use them (must be done before changing the SP register) - asm.spill_temps(); + asm.spill_regs(); // Saving SP before calculating ep avoids a dependency on a register // However this must be done after referencing frame.recv, which may be SP-relative @@ -7502,7 +7510,7 @@ fn gen_send_iseq( }; // Store rest param to memory to avoid register shuffle as // we won't be reading it for the remainder of the block. - asm.ctx.dealloc_temp_reg(rest_param.stack_idx()); + asm.ctx.dealloc_reg(rest_param.reg_opnd()); asm.store(rest_param, rest_param_array); } @@ -7601,7 +7609,7 @@ fn gen_send_iseq( // Write the CI in to the stack and ensure that it actually gets // flushed to memory let ci_opnd = asm.stack_opnd(-1); - asm.ctx.dealloc_temp_reg(ci_opnd.stack_idx()); + asm.ctx.dealloc_reg(ci_opnd.reg_opnd()); asm.mov(ci_opnd, VALUE(ci as usize).into()); } @@ -7714,7 +7722,7 @@ fn gen_send_iseq( // Pop arguments and receiver in return context and // mark it as a continuation of gen_leave() - let mut return_asm = Assembler::new(); + let mut return_asm = Assembler::new(jit.num_locals()); return_asm.ctx = asm.ctx; return_asm.stack_pop(sp_offset.try_into().unwrap()); return_asm.ctx.set_sp_offset(0); // We set SP on the caller's frame above @@ -7967,7 +7975,7 @@ fn gen_iseq_kw_call( kwargs_order[kwrest_idx] = 0; } // Put kwrest straight into memory, since we might pop it later - asm.ctx.dealloc_temp_reg(stack_kwrest.stack_idx()); + asm.ctx.dealloc_reg(stack_kwrest.reg_opnd()); asm.mov(stack_kwrest, kwrest); if stack_kwrest_idx >= 0 { asm.ctx.set_opnd_mapping(stack_kwrest.into(), TempMapping::map_to_stack(kwrest_type)); @@ -8065,7 +8073,7 @@ fn gen_iseq_kw_call( if let Some(kwrest_type) = kwrest_type { let kwrest = asm.stack_push(kwrest_type); // We put the kwrest parameter in memory earlier - asm.ctx.dealloc_temp_reg(kwrest.stack_idx()); + asm.ctx.dealloc_reg(kwrest.reg_opnd()); argc += 1; } @@ -9858,7 +9866,7 @@ fn gen_getblockparam( // Save the PC and SP because we might allocate jit_prepare_call_with_gc(jit, asm); - asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency. + asm.spill_regs(); // For ccall. Unconditionally spill them for RegMappings consistency. // A mirror of the interpreter code. Checking for the case // where it's pushing rb_block_param_proxy. @@ -10466,7 +10474,7 @@ mod tests { return ( Context::default(), - Assembler::new(), + Assembler::new(0), cb, OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)), ); @@ -10534,7 +10542,7 @@ mod tests { assert_eq!(status, Some(KeepCompiling)); let mut default = Context::default(); - default.set_reg_temps(context.get_reg_temps()); + default.set_reg_mapping(context.get_reg_mapping()); assert_eq!(context.diff(&default), TypeDiff::Compatible(0)); } diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 699fcae840..f3ebdc0374 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -30,11 +30,11 @@ use YARVOpnd::*; use TempMappingKind::*; use crate::invariants::*; -// Maximum number of temp value types we keep track of -pub const MAX_TEMP_TYPES: usize = 8; +// Maximum number of temp value types or registers we keep track of +pub const MAX_CTX_TEMPS: usize = 8; -// Maximum number of local variable types we keep track of -const MAX_LOCAL_TYPES: usize = 8; +// Maximum number of local variable types or registers we keep track of +const MAX_CTX_LOCALS: usize = 8; /// An index into `ISEQ_BODY(iseq)->iseq_encoded`. Points /// to a YARV instruction or an instruction operand. @@ -411,43 +411,103 @@ impl From for YARVOpnd { } } -/// Maximum index of stack temps that could be in a register -pub const MAX_REG_TEMPS: u8 = 8; +/// Number of registers that can be used for stack temps or locals +pub const MAX_MAPPED_REGS: usize = 5; -/// Bitmap of which stack temps are in a register -#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)] -pub struct RegTemps(u8); +/// A stack slot or a local variable. u8 represents the index of it (<= 8). +#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)] +pub enum RegOpnd { + Stack(u8), + Local(u8), +} -impl RegTemps { - pub fn get(&self, index: u8) -> bool { - assert!(index < MAX_REG_TEMPS); - (self.0 >> index) & 1 == 1 +/// RegMappings manages a set of registers used for stack temps and locals. +/// Each element of the array represents each of the registers. +/// If an element is Some, the stack temp or the local uses a register. +/// +/// Note that Opnd::InsnOut uses a separate set of registers at the moment. +#[derive(Copy, Clone, Default, Eq, Hash, PartialEq)] +pub struct RegMapping([Option; MAX_MAPPED_REGS]); + +impl RegMapping { + /// Return the index of the register for a given operand if allocated. + pub fn get_reg(&self, opnd: RegOpnd) -> Option { + self.0.iter().enumerate() + .find(|(_, ®_opnd)| reg_opnd == Some(opnd)) + .map(|(reg_idx, _)| reg_idx) } - pub fn set(&mut self, index: u8, value: bool) { - assert!(index < MAX_REG_TEMPS); - if value { - self.0 = self.0 | (1 << index); - } else { - self.0 = self.0 & !(1 << index); + /// Allocate a register for a given operand if available. + /// Return true if self is updated. + pub fn alloc_reg(&mut self, opnd: RegOpnd) -> bool { + // If a given opnd already has a register, skip allocation. + if self.get_reg(opnd).is_some() { + return false; } - } - pub fn as_u8(&self) -> u8 { - self.0 - } - - /// Return true if there's a register that conflicts with a given stack_idx. - pub fn conflicts_with(&self, stack_idx: u8) -> bool { - let mut other_idx = stack_idx as usize % get_option!(num_temp_regs); - while other_idx < MAX_REG_TEMPS as usize { - if stack_idx as usize != other_idx && self.get(other_idx as u8) { - return true; + // If the index is too large to encode with with 3 bits, give up. + match opnd { + RegOpnd::Stack(stack_idx) => if stack_idx >= MAX_CTX_TEMPS as u8 { + return false; } - other_idx += get_option!(num_temp_regs); + RegOpnd::Local(local_idx) => if local_idx >= MAX_CTX_LOCALS as u8 { + return false; + } + }; + + // Allocate a register if available. + if let Some(reg_idx) = self.find_unused_reg(opnd) { + self.0[reg_idx] = Some(opnd); + return true; } false } + + /// Deallocate a register for a given operand if in use. + /// Return true if self is updated. + pub fn dealloc_reg(&mut self, opnd: RegOpnd) -> bool { + for reg_opnd in self.0.iter_mut() { + if *reg_opnd == Some(opnd) { + *reg_opnd = None; + return true; + } + } + false + } + + /// Find an available register and return the index of it. + fn find_unused_reg(&self, opnd: RegOpnd) -> Option { + let num_regs = get_option!(num_temp_regs); + if num_regs == 0 { + return None; + } + assert!(num_regs <= MAX_MAPPED_REGS); + + // If the default index for the operand is available, use that to minimize + // discrepancies among Contexts. + let default_idx = match opnd { + RegOpnd::Stack(stack_idx) => stack_idx.as_usize() % num_regs, + RegOpnd::Local(local_idx) => num_regs - (local_idx.as_usize() % num_regs) - 1, + }; + if self.0[default_idx].is_none() { + return Some(default_idx); + } + + // If not, pick any other available register. Like default indexes, prefer + // lower indexes for Stack, and higher indexes for Local. + let mut index_temps = self.0.iter().enumerate(); + match opnd { + RegOpnd::Stack(_) => index_temps.find(|(_, reg_opnd)| reg_opnd.is_none()), + RegOpnd::Local(_) => index_temps.rev().find(|(_, reg_opnd)| reg_opnd.is_none()), + }.map(|(index, _)| index) + } +} + +impl fmt::Debug for RegMapping { + /// Print `[None, ...]` instead of the default `RegMappings([None, ...])` + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{:?}", self.0) + } } /// Bits for chain_depth_return_landing_defer @@ -473,8 +533,8 @@ pub struct Context { // This represents how far the JIT's SP is from the "real" SP sp_offset: i8, - /// Bitmap of which stack temps are in a register - reg_temps: RegTemps, + /// Which stack temps or locals are in a register + reg_mapping: RegMapping, /// Fields packed into u8 /// - 1st bit from the left: Whether this code is the target of a JIT-to-JIT Ruby return ([Self::is_return_landing]) @@ -786,7 +846,7 @@ mod bitvector_tests { let idx0 = ctx0.encode_into(&mut bits); let mut ctx1 = Context::default(); - ctx1.reg_temps = RegTemps(1); + ctx1.reg_mapping = RegMapping([Some(RegOpnd::Stack(0)), None, None, None, None]); let idx1 = ctx1.encode_into(&mut bits); // Make sure that we can encode two contexts successively @@ -797,10 +857,10 @@ mod bitvector_tests { } #[test] - fn regress_reg_temps() { + fn regress_reg_mapping() { let mut bits = BitVector::new(); let mut ctx = Context::default(); - ctx.reg_temps = RegTemps(1); + ctx.reg_mapping = RegMapping([Some(RegOpnd::Stack(0)), None, None, None, None]); ctx.encode_into(&mut bits); let b0 = bits.read_u1(&mut 0); @@ -973,9 +1033,24 @@ impl Context { bits.push_u8(self.sp_offset as u8); } - // Bitmap of which stack temps are in a register - let RegTemps(reg_temps) = self.reg_temps; - bits.push_u8(reg_temps); + // Which stack temps or locals are in a register + for &temp in self.reg_mapping.0.iter() { + if let Some(temp) = temp { + bits.push_u1(1); // Some + match temp { + RegOpnd::Stack(stack_idx) => { + bits.push_u1(0); // Stack + bits.push_u3(stack_idx); + } + RegOpnd::Local(local_idx) => { + bits.push_u1(1); // Local + bits.push_u3(local_idx); + } + } + } else { + bits.push_u1(0); // None + } + } // chain_depth_and_flags: u8, bits.push_u8(self.chain_depth_and_flags); @@ -987,7 +1062,7 @@ impl Context { } // Encode the local types if known - for local_idx in 0..MAX_LOCAL_TYPES { + for local_idx in 0..MAX_CTX_LOCALS { let t = self.get_local_type(local_idx); if t != Type::Unknown { bits.push_op(CtxOp::SetLocalType); @@ -997,7 +1072,7 @@ impl Context { } // Encode stack temps - for stack_idx in 0..MAX_TEMP_TYPES { + for stack_idx in 0..MAX_CTX_TEMPS { let mapping = self.get_temp_mapping(stack_idx); match mapping.get_kind() { @@ -1059,8 +1134,17 @@ impl Context { debug_assert!(!( (sp_offset_bits & 0x80) != 0 && ctx.sp_offset > 0 )); } - // Bitmap of which stack temps are in a register - ctx.reg_temps = RegTemps(bits.read_u8(&mut idx)); + // Which stack temps or locals are in a register + for index in 0..MAX_MAPPED_REGS { + if bits.read_u1(&mut idx) == 1 { // Some + let temp = if bits.read_u1(&mut idx) == 0 { // RegMapping::Stack + RegOpnd::Stack(bits.read_u3(&mut idx)) + } else { + RegOpnd::Local(bits.read_u3(&mut idx)) + }; + ctx.reg_mapping.0[index] = Some(temp); + } + } // chain_depth_and_flags: u8 ctx.chain_depth_and_flags = bits.read_u8(&mut idx); @@ -1412,7 +1496,7 @@ impl PendingBranch { target_idx: u32, target: BlockId, ctx: &Context, - ocb: &mut OutlinedCb, + jit: &mut JITState, ) -> Option { // If the block already exists if let Some(blockref) = find_block_version(target, ctx) { @@ -1430,7 +1514,7 @@ impl PendingBranch { // The branch struct is uninitialized right now but as a stable address. // We make sure the stub runs after the branch is initialized. let branch_struct_addr = self.uninit_branch.as_ptr() as usize; - let stub_addr = gen_branch_stub(ctx, ocb, branch_struct_addr, target_idx); + let stub_addr = gen_branch_stub(ctx, jit.iseq, jit.get_ocb(), branch_struct_addr, target_idx); if let Some(stub_addr) = stub_addr { // Fill the branch target with a stub @@ -2362,7 +2446,7 @@ impl Context { let mut generic_ctx = Context::default(); generic_ctx.stack_size = self.stack_size; generic_ctx.sp_offset = self.sp_offset; - generic_ctx.reg_temps = self.reg_temps; + generic_ctx.reg_mapping = self.reg_mapping; if self.is_return_landing() { generic_ctx.set_as_return_landing(); } @@ -2390,12 +2474,12 @@ impl Context { self.sp_offset = offset; } - pub fn get_reg_temps(&self) -> RegTemps { - self.reg_temps + pub fn get_reg_mapping(&self) -> RegMapping { + self.reg_mapping } - pub fn set_reg_temps(&mut self, reg_temps: RegTemps) { - self.reg_temps = reg_temps; + pub fn set_reg_mapping(&mut self, reg_mapping: RegMapping) { + self.reg_mapping = reg_mapping; } pub fn get_chain_depth(&self) -> u8 { @@ -2447,14 +2531,13 @@ impl Context { self.sp_opnd(-ep_offset + offset) } - /// Stop using a register for a given stack temp. + /// Stop using a register for a given stack temp or a local. /// This allows us to reuse the register for a value that we know is dead /// and will no longer be used (e.g. popped stack temp). - pub fn dealloc_temp_reg(&mut self, stack_idx: u8) { - if stack_idx < MAX_REG_TEMPS { - let mut reg_temps = self.get_reg_temps(); - reg_temps.set(stack_idx, false); - self.set_reg_temps(reg_temps); + pub fn dealloc_reg(&mut self, opnd: RegOpnd) { + let mut reg_mapping = self.get_reg_mapping(); + if reg_mapping.dealloc_reg(opnd) { + self.set_reg_mapping(reg_mapping); } } @@ -2467,7 +2550,7 @@ impl Context { let stack_idx: usize = (self.stack_size - 1 - idx).into(); // If outside of tracked range, do nothing - if stack_idx >= MAX_TEMP_TYPES { + if stack_idx >= MAX_CTX_TEMPS { return Type::Unknown; } @@ -2478,7 +2561,7 @@ impl Context { MapToStack => mapping.get_type(), MapToLocal => { let idx = mapping.get_local_idx(); - assert!((idx as usize) < MAX_LOCAL_TYPES); + assert!((idx as usize) < MAX_CTX_LOCALS); return self.get_local_type(idx.into()); } } @@ -2488,7 +2571,7 @@ impl Context { /// Get the currently tracked type for a local variable pub fn get_local_type(&self, local_idx: usize) -> Type { - if local_idx >= MAX_LOCAL_TYPES { + if local_idx >= MAX_CTX_LOCALS { return Type::Unknown } else { // Each type is stored in 4 bits @@ -2499,7 +2582,7 @@ impl Context { /// Get the current temp mapping for a given stack slot fn get_temp_mapping(&self, temp_idx: usize) -> TempMapping { - assert!(temp_idx < MAX_TEMP_TYPES); + assert!(temp_idx < MAX_CTX_TEMPS); // Extract the temp mapping kind let kind_bits = (self.temp_mapping_kind >> (2 * temp_idx)) & 0b11; @@ -2527,7 +2610,7 @@ impl Context { /// Get the current temp mapping for a given stack slot fn set_temp_mapping(&mut self, temp_idx: usize, mapping: TempMapping) { - assert!(temp_idx < MAX_TEMP_TYPES); + assert!(temp_idx < MAX_CTX_TEMPS); // Extract the kind bits let mapping_kind = mapping.get_kind(); @@ -2583,7 +2666,7 @@ impl Context { let stack_idx = (self.stack_size - 1 - idx) as usize; // If outside of tracked range, do nothing - if stack_idx >= MAX_TEMP_TYPES { + if stack_idx >= MAX_CTX_TEMPS { return; } @@ -2598,7 +2681,7 @@ impl Context { } MapToLocal => { let idx = mapping.get_local_idx() as usize; - assert!(idx < MAX_LOCAL_TYPES); + assert!(idx < MAX_CTX_LOCALS); let mut new_type = self.get_local_type(idx); new_type.upgrade(opnd_type); self.set_local_type(idx, new_type); @@ -2625,7 +2708,7 @@ impl Context { assert!(idx < self.stack_size); let stack_idx = (self.stack_size - 1 - idx) as usize; - if stack_idx < MAX_TEMP_TYPES { + if stack_idx < MAX_CTX_TEMPS { self.get_temp_mapping(stack_idx) } else { // We can't know the source of this stack operand, so we assume it is @@ -2651,7 +2734,7 @@ impl Context { } // If outside of tracked range, do nothing - if stack_idx >= MAX_TEMP_TYPES { + if stack_idx >= MAX_CTX_TEMPS { return; } @@ -2667,12 +2750,12 @@ impl Context { return; } - if local_idx >= MAX_LOCAL_TYPES { + if local_idx >= MAX_CTX_LOCALS { return } // If any values on the stack map to this local we must detach them - for mapping_idx in 0..MAX_TEMP_TYPES { + for mapping_idx in 0..MAX_CTX_TEMPS { let mapping = self.get_temp_mapping(mapping_idx); let tm = match mapping.get_kind() { MapToStack => mapping, @@ -2704,7 +2787,7 @@ impl Context { // When clearing local types we must detach any stack mappings to those // locals. Even if local values may have changed, stack values will not. - for mapping_idx in 0..MAX_TEMP_TYPES { + for mapping_idx in 0..MAX_CTX_TEMPS { let mapping = self.get_temp_mapping(mapping_idx); if mapping.get_kind() == MapToLocal { let local_idx = mapping.get_local_idx() as usize; @@ -2758,7 +2841,7 @@ impl Context { return TypeDiff::Incompatible; } - if dst.reg_temps != src.reg_temps { + if dst.reg_mapping != src.reg_mapping { return TypeDiff::Incompatible; } @@ -2779,7 +2862,7 @@ impl Context { } // For each local type we track - for i in 0.. MAX_LOCAL_TYPES { + for i in 0.. MAX_CTX_LOCALS { let t_src = src.get_local_type(i); let t_dst = dst.get_local_type(i); diff += match t_src.diff(t_dst) { @@ -2845,24 +2928,23 @@ impl Assembler { let stack_size: usize = self.ctx.stack_size.into(); // Keep track of the type and mapping of the value - if stack_size < MAX_TEMP_TYPES { + if stack_size < MAX_CTX_TEMPS { self.ctx.set_temp_mapping(stack_size, mapping); if mapping.get_kind() == MapToLocal { let idx = mapping.get_local_idx(); - assert!((idx as usize) < MAX_LOCAL_TYPES); + assert!((idx as usize) < MAX_CTX_LOCALS); } } - // Allocate a register to the stack operand - if self.ctx.stack_size < MAX_REG_TEMPS { - self.alloc_temp_reg(self.ctx.stack_size); - } - self.ctx.stack_size += 1; self.ctx.sp_offset += 1; - return self.stack_opnd(0); + // Allocate a register to the new stack operand + let stack_opnd = self.stack_opnd(0); + self.alloc_reg(stack_opnd.reg_opnd()); + + stack_opnd } /// Push one new value on the temp stack @@ -2878,7 +2960,7 @@ impl Assembler { /// Push a local variable on the stack pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd { - if local_idx >= MAX_LOCAL_TYPES { + if local_idx >= MAX_CTX_LOCALS { return self.stack_push(Type::Unknown); } @@ -2896,7 +2978,7 @@ impl Assembler { for i in 0..n { let idx: usize = (self.ctx.stack_size as usize) - i - 1; - if idx < MAX_TEMP_TYPES { + if idx < MAX_CTX_TEMPS { self.ctx.set_temp_mapping(idx, TempMapping::map_to_stack(Type::Unknown)); } } @@ -2914,8 +2996,8 @@ impl Assembler { let method_name_index = (self.ctx.stack_size as usize) - argc - 1; for i in method_name_index..(self.ctx.stack_size - 1) as usize { - if i < MAX_TEMP_TYPES { - let next_arg_mapping = if i + 1 < MAX_TEMP_TYPES { + if i < MAX_CTX_TEMPS { + let next_arg_mapping = if i + 1 < MAX_CTX_TEMPS { self.ctx.get_temp_mapping(i + 1) } else { TempMapping::map_to_stack(Type::Unknown) @@ -2932,8 +3014,22 @@ impl Assembler { idx, num_bits: 64, stack_size: self.ctx.stack_size, + num_locals: None, // not needed for stack temps sp_offset: self.ctx.sp_offset, - reg_temps: None, // push_insn will set this + reg_mapping: None, // push_insn will set this + } + } + + /// Get an operand pointing to a local variable + pub fn local_opnd(&self, ep_offset: u32) -> Opnd { + let idx = self.ctx.stack_size as i32 + ep_offset as i32; + Opnd::Stack { + idx, + num_bits: 64, + stack_size: self.ctx.stack_size, + num_locals: Some(self.get_num_locals().unwrap()), // this must exist for locals + sp_offset: self.ctx.sp_offset, + reg_mapping: None, // push_insn will set this } } } @@ -3132,7 +3228,7 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option< // Change the entry's jump target from an entry stub to a next entry pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) { - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); asm_comment!(asm, "regenerate_entry"); // gen_entry_guard generates cmp + jne. We're rewriting only jne. @@ -3208,7 +3304,7 @@ fn entry_stub_hit_body( // Compile a new entry guard as a next entry let next_entry = cb.get_write_ptr(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?; asm.compile(cb, Some(ocb))?; @@ -3219,7 +3315,7 @@ fn entry_stub_hit_body( let blockref = match find_block_version(blockid, &ctx) { // If an existing block is found, generate a jump to the block. Some(blockref) => { - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); asm.jmp(unsafe { blockref.as_ref() }.start_addr.into()); asm.compile(cb, Some(ocb))?; Some(blockref) @@ -3247,7 +3343,7 @@ fn entry_stub_hit_body( pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option { let ocb = ocb.unwrap(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); asm_comment!(asm, "entry stub hit"); asm.mov(C_ARG_OPNDS[0], entry_address.into()); @@ -3263,7 +3359,7 @@ pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option Option { let ocb = ocb.unwrap(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); // See gen_entry_guard for how it's used. asm_comment!(asm, "entry_stub_hit() trampoline"); @@ -3286,7 +3382,7 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) { let branch_terminates_block = branch.end_addr.get() == block.get_end_addr(); // Generate the branch - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); asm_comment!(asm, "regenerate_branch"); branch.gen_fn.call( &mut asm, @@ -3597,15 +3693,16 @@ fn delete_empty_defer_block(branch: &Branch, new_block: &Block, target_ctx: Cont /// A piece of code that redeems for more code; a thunk for code. fn gen_branch_stub( ctx: u32, + iseq: IseqPtr, ocb: &mut OutlinedCb, branch_struct_address: usize, target_idx: u32, ) -> Option { let ocb = ocb.unwrap(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) }); asm.ctx = Context::decode(ctx); - asm.set_reg_temps(asm.ctx.reg_temps); + asm.set_reg_mapping(asm.ctx.reg_mapping); asm_comment!(asm, "branch stub hit"); if asm.ctx.is_return_landing() { @@ -3621,7 +3718,7 @@ fn gen_branch_stub( } // Spill temps to the VM stack as well for jit.peek_at_stack() - asm.spill_temps(); + asm.spill_regs(); // Set up the arguments unique to this stub for: // @@ -3641,7 +3738,7 @@ fn gen_branch_stub( pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option { let ocb = ocb.unwrap(); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); // For `branch_stub_hit(branch_ptr, target_idx, ec)`, // `branch_ptr` and `target_idx` is different for each stub, @@ -3678,7 +3775,7 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option { /// Return registers to be pushed and popped on branch_stub_hit. pub fn caller_saved_temp_regs() -> impl Iterator + DoubleEndedIterator { - let temp_regs = Assembler::get_temp_regs().iter(); + let temp_regs = Assembler::get_temp_regs2().iter(); let len = temp_regs.len(); // The return value gen_leave() leaves in C_RET_REG // needs to survive the branch_stub_hit() call. @@ -3752,12 +3849,11 @@ pub fn gen_branch( gen_fn: BranchGenFn, ) { let branch = new_pending_branch(jit, gen_fn); - let ocb = jit.get_ocb(); // Get the branch targets or stubs - let target0_addr = branch.set_target(0, target0, ctx0, ocb); + let target0_addr = branch.set_target(0, target0, ctx0, jit); let target1_addr = if let Some(ctx) = ctx1 { - let addr = branch.set_target(1, target1.unwrap(), ctx, ocb); + let addr = branch.set_target(1, target1.unwrap(), ctx, jit); if addr.is_none() { // target1 requested but we're out of memory. // Avoid unwrap() in gen_fn() @@ -3832,7 +3928,7 @@ pub fn defer_compilation( }; // Likely a stub since the context is marked as deferred(). - let target0_address = branch.set_target(0, blockid, &next_ctx, jit.get_ocb()); + let target0_address = branch.set_target(0, blockid, &next_ctx, jit); // Pad the block if it has the potential to be invalidated. This must be // done before gen_fn() in case the jump is overwritten by a fallthrough. @@ -4016,7 +4112,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { let cur_dropped_bytes = cb.has_dropped_bytes(); cb.set_write_ptr(block_start); - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); asm.jmp(block_entry_exit.as_side_exit()); cb.set_dropped_bytes(false); asm.compile(&mut cb, Some(ocb)).expect("can rewrite existing code"); @@ -4053,7 +4149,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { } // Create a stub for this branch target - let stub_addr = gen_branch_stub(block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32); + let stub_addr = gen_branch_stub(block.ctx, block.iseq.get(), ocb, branchref.as_ptr() as usize, target_idx as u32); // In case we were unable to generate a stub (e.g. OOM). Use the block's // exit instead of a stub for the block. It's important that we @@ -4187,7 +4283,7 @@ mod tests { // and all local types in 32 bits assert_eq!(mem::size_of::(), 1); assert!(Type::BlockParamProxy as usize <= 0b1111); - assert!(MAX_LOCAL_TYPES * 4 <= 32); + assert!(MAX_CTX_LOCALS * 4 <= 32); } #[test] @@ -4199,7 +4295,7 @@ mod tests { fn local_types() { let mut ctx = Context::default(); - for i in 0..MAX_LOCAL_TYPES { + for i in 0..MAX_CTX_LOCALS { ctx.set_local_type(i, Type::Fixnum); assert_eq!(ctx.get_local_type(i), Type::Fixnum); ctx.set_local_type(i, Type::BlockParamProxy); @@ -4251,41 +4347,30 @@ mod tests { } #[test] - fn reg_temps() { - let mut reg_temps = RegTemps(0); + fn reg_mapping() { + let mut reg_mapping = RegMapping([None, None, None, None, None]); // 0 means every slot is not spilled - for stack_idx in 0..MAX_REG_TEMPS { - assert_eq!(reg_temps.get(stack_idx), false); + for stack_idx in 0..MAX_CTX_TEMPS as u8 { + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(stack_idx)), None); } - // Set 0, 2, 7 (RegTemps: 10100001) - reg_temps.set(0, true); - reg_temps.set(2, true); - reg_temps.set(3, true); - reg_temps.set(3, false); - reg_temps.set(7, true); + // Set 0, 2, 6 (RegMapping: [Some(0), Some(6), Some(2), None, None]) + reg_mapping.alloc_reg(RegOpnd::Stack(0)); + reg_mapping.alloc_reg(RegOpnd::Stack(2)); + reg_mapping.alloc_reg(RegOpnd::Stack(3)); + reg_mapping.dealloc_reg(RegOpnd::Stack(3)); + reg_mapping.alloc_reg(RegOpnd::Stack(6)); // Get 0..8 - assert_eq!(reg_temps.get(0), true); - assert_eq!(reg_temps.get(1), false); - assert_eq!(reg_temps.get(2), true); - assert_eq!(reg_temps.get(3), false); - assert_eq!(reg_temps.get(4), false); - assert_eq!(reg_temps.get(5), false); - assert_eq!(reg_temps.get(6), false); - assert_eq!(reg_temps.get(7), true); - - // Test conflicts - assert_eq!(5, get_option!(num_temp_regs)); - assert_eq!(reg_temps.conflicts_with(0), false); // already set, but no conflict - assert_eq!(reg_temps.conflicts_with(1), false); - assert_eq!(reg_temps.conflicts_with(2), true); // already set, and conflicts with 7 - assert_eq!(reg_temps.conflicts_with(3), false); - assert_eq!(reg_temps.conflicts_with(4), false); - assert_eq!(reg_temps.conflicts_with(5), true); // not set, and will conflict with 0 - assert_eq!(reg_temps.conflicts_with(6), false); - assert_eq!(reg_temps.conflicts_with(7), true); // already set, and conflicts with 2 + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(0)), Some(0)); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(1)), None); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(2)), Some(2)); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(3)), None); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(4)), None); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(5)), None); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(6)), Some(1)); + assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(7)), None); } #[test] @@ -4294,7 +4379,7 @@ mod tests { assert_eq!(Context::default().diff(&Context::default()), TypeDiff::Compatible(0)); // Try pushing an operand and getting its type - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); asm.stack_push(Type::Fixnum); let top_type = asm.ctx.get_opnd_type(StackOpnd(0)); assert!(top_type == Type::Fixnum); @@ -4304,7 +4389,7 @@ mod tests { #[test] fn context_upgrade_local() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); asm.stack_push_local(0); asm.ctx.upgrade_opnd_type(StackOpnd(0), Type::Nil); assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0))); @@ -4338,7 +4423,7 @@ mod tests { #[test] fn shift_stack_for_send() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new(0); // Push values to simulate send(:name, arg) with 6 items already on-stack for _ in 0..6 { diff --git a/yjit/src/invariants.rs b/yjit/src/invariants.rs index 0a9a18520f..b1da603b27 100644 --- a/yjit/src/invariants.rs +++ b/yjit/src/invariants.rs @@ -683,7 +683,7 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() { cb.set_write_ptr(patch.inline_patch_pos); cb.set_dropped_bytes(false); cb.without_page_end_reserve(|cb| { - let mut asm = crate::backend::ir::Assembler::new(); + let mut asm = crate::backend::ir::Assembler::new_without_iseq(); asm.jmp(patch.outlined_target_pos.as_side_exit()); if asm.compile(cb, None).is_none() { panic!("Failed to apply patch at {:?}", patch.inline_patch_pos); diff --git a/yjit/src/utils.rs b/yjit/src/utils.rs index d76c519887..c4b5fbd2e7 100644 --- a/yjit/src/utils.rs +++ b/yjit/src/utils.rs @@ -273,7 +273,7 @@ mod tests { #[test] fn test_print_int() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); let mut cb = CodeBlock::new_dummy(1024); print_int(&mut asm, Opnd::Imm(42)); @@ -282,7 +282,7 @@ mod tests { #[test] fn test_print_str() { - let mut asm = Assembler::new(); + let mut asm = Assembler::new_without_iseq(); let mut cb = CodeBlock::new_dummy(1024); print_str(&mut asm, "Hello, world!");