YJIT: Local variable register allocation (#11157)

* YJIT: Local variable register allocation

* locals are not stack temps

* Rename RegTemps to RegMappings

* Rename RegMapping to RegOpnd

* Rename local_size to num_locals

* s/stack value/operand/

* Rename spill_temps() to spill_regs()

* Clarify when num_locals becomes None

* Mention that InsnOut uses different registers

* Rename get_reg_mapping to get_reg_opnd

* Resurrect --yjit-temp-regs capability

* Use MAX_CTX_TEMPS and MAX_CTX_LOCALS
This commit is contained in:
Takashi Kokubun 2024-07-15 07:56:57 -07:00 committed by GitHub
parent 461a7b8316
commit ec773e15f4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 415 additions and 301 deletions

View file

@ -381,7 +381,7 @@ impl Assembler
}
let live_ranges: Vec<usize> = take(&mut self.live_ranges);
let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals);
let asm = &mut asm_local;
let mut iterator = self.into_draining_iter();
@ -1383,7 +1383,7 @@ mod tests {
use crate::disasm::*;
fn setup_asm() -> (Assembler, CodeBlock) {
(Assembler::new(), CodeBlock::new_dummy(1024))
(Assembler::new(0), CodeBlock::new_dummy(1024))
}
#[test]
@ -1682,7 +1682,7 @@ mod tests {
#[test]
fn test_bcond_straddling_code_pages() {
const LANDING_PAGE: usize = 65;
let mut asm = Assembler::new();
let mut asm = Assembler::new(0);
let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]);
// Skip to near the end of the page. Room for two instructions.

View file

@ -2,11 +2,11 @@ use std::collections::HashMap;
use std::fmt;
use std::convert::From;
use std::mem::take;
use crate::codegen::{gen_outlined_exit, gen_counted_exit};
use crate::cruby::{vm_stack_canary, SIZEOF_VALUE_I32, VALUE};
use crate::codegen::{gen_counted_exit, gen_outlined_exit};
use crate::cruby::{vm_stack_canary, SIZEOF_VALUE_I32, VALUE, VM_ENV_DATA_SIZE};
use crate::virtualmem::CodePtr;
use crate::asm::{CodeBlock, OutlinedCb};
use crate::core::{Context, RegTemps, MAX_REG_TEMPS};
use crate::core::{Context, RegMapping, RegOpnd, MAX_CTX_TEMPS};
use crate::options::*;
use crate::stats::*;
@ -77,10 +77,12 @@ pub enum Opnd
num_bits: u8,
/// ctx.stack_size when this operand is made. Used with idx for Opnd::Reg.
stack_size: u8,
/// The number of local variables in the current ISEQ. Used only for locals.
num_locals: Option<u32>,
/// ctx.sp_offset when this operand is made. Used with idx for Opnd::Mem.
sp_offset: i8,
/// ctx.reg_temps when this operand is read. Used for register allocation.
reg_temps: Option<RegTemps>
/// ctx.reg_mapping when this operand is read. Used for register allocation.
reg_mapping: Option<RegMapping>
},
// Low-level operands, for lowering
@ -172,7 +174,7 @@ impl Opnd
Opnd::Reg(reg) => Some(Opnd::Reg(reg.with_num_bits(num_bits))),
Opnd::Mem(Mem { base, disp, .. }) => Some(Opnd::Mem(Mem { base, disp, num_bits })),
Opnd::InsnOut { idx, .. } => Some(Opnd::InsnOut { idx, num_bits }),
Opnd::Stack { idx, stack_size, sp_offset, reg_temps, .. } => Some(Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps }),
Opnd::Stack { idx, stack_size, num_locals, sp_offset, reg_mapping, .. } => Some(Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping }),
_ => None,
}
}
@ -227,28 +229,26 @@ impl Opnd
Self::match_num_bits_iter(opnds.iter())
}
/// Calculate Opnd::Stack's index from the stack bottom.
pub fn stack_idx(&self) -> u8 {
self.get_stack_idx().unwrap()
/// Convert Opnd::Stack into RegMapping
pub fn reg_opnd(&self) -> RegOpnd {
self.get_reg_opnd().unwrap()
}
/// Calculate Opnd::Stack's index from the stack bottom if it's Opnd::Stack.
pub fn get_stack_idx(&self) -> Option<u8> {
match self {
Opnd::Stack { idx, stack_size, .. } => {
Some((*stack_size as isize - *idx as isize - 1) as u8)
},
_ => None
}
}
/// Get the index for stack temp registers.
pub fn reg_idx(&self) -> usize {
match self {
Opnd::Stack { .. } => {
self.stack_idx() as usize % get_option!(num_temp_regs)
},
_ => unreachable!(),
/// Convert an operand into RegMapping if it's Opnd::Stack
pub fn get_reg_opnd(&self) -> Option<RegOpnd> {
match *self {
Opnd::Stack { idx, stack_size, num_locals, .. } => Some(
if let Some(num_locals) = num_locals {
let last_idx = stack_size as i32 + VM_ENV_DATA_SIZE as i32 - 1;
assert!(last_idx <= idx, "Local index {} must be >= last local index {}", idx, last_idx);
assert!(idx <= last_idx + num_locals as i32, "Local index {} must be < last local index {} + local size {}", idx, last_idx, num_locals);
RegOpnd::Local((last_idx + num_locals as i32 - idx) as u8)
} else {
assert!(idx < stack_size as i32);
RegOpnd::Stack((stack_size as i32 - idx - 1) as u8)
}
),
_ => None,
}
}
}
@ -974,7 +974,7 @@ pub struct SideExitContext {
/// Context fields used by get_generic_ctx()
pub stack_size: u8,
pub sp_offset: i8,
pub reg_temps: RegTemps,
pub reg_mapping: RegMapping,
pub is_return_landing: bool,
pub is_deferred: bool,
}
@ -986,7 +986,7 @@ impl SideExitContext {
pc,
stack_size: ctx.get_stack_size(),
sp_offset: ctx.get_sp_offset(),
reg_temps: ctx.get_reg_temps(),
reg_mapping: ctx.get_reg_mapping(),
is_return_landing: ctx.is_return_landing(),
is_deferred: ctx.is_deferred(),
};
@ -1002,7 +1002,7 @@ impl SideExitContext {
let mut ctx = Context::default();
ctx.set_stack_size(self.stack_size);
ctx.set_sp_offset(self.sp_offset);
ctx.set_reg_temps(self.reg_temps);
ctx.set_reg_mapping(self.reg_mapping);
if self.is_return_landing {
ctx.set_as_return_landing();
}
@ -1031,6 +1031,13 @@ pub struct Assembler {
/// Context for generating the current insn
pub ctx: Context,
/// The current ISEQ's local table size. asm.local_opnd() uses this, and it's
/// sometimes hard to pass this value, e.g. asm.spill_temps() in asm.ccall().
///
/// `None` means we're not assembling for an ISEQ, or that the local size is
/// not relevant.
pub(super) num_locals: Option<u32>,
/// Side exit caches for each SideExitContext
pub(super) side_exits: HashMap<SideExitContext, CodePtr>,
@ -1046,16 +1053,31 @@ pub struct Assembler {
impl Assembler
{
pub fn new() -> Self {
Self::new_with_label_names(Vec::default(), HashMap::default())
/// Create an Assembler for ISEQ-specific code.
/// It includes all inline code and some outlined code like side exits and stubs.
pub fn new(num_locals: u32) -> Self {
Self::new_with_label_names(Vec::default(), HashMap::default(), Some(num_locals))
}
pub fn new_with_label_names(label_names: Vec<String>, side_exits: HashMap<SideExitContext, CodePtr>) -> Self {
/// Create an Assembler for outlined code that are not specific to any ISEQ,
/// e.g. trampolines that are shared globally.
pub fn new_without_iseq() -> Self {
Self::new_with_label_names(Vec::default(), HashMap::default(), None)
}
/// Create an Assembler with parameters that are populated by another Assembler instance.
/// This API is used for copying an Assembler for the next compiler pass.
pub fn new_with_label_names(
label_names: Vec<String>,
side_exits: HashMap<SideExitContext, CodePtr>,
num_locals: Option<u32>
) -> Self {
Self {
insns: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY),
live_ranges: Vec::with_capacity(ASSEMBLER_INSNS_CAPACITY),
label_names,
ctx: Context::default(),
num_locals,
side_exits,
side_exit_pc: None,
side_exit_stack_size: None,
@ -1064,11 +1086,16 @@ impl Assembler
}
/// Get the list of registers that can be used for stack temps.
pub fn get_temp_regs() -> &'static [Reg] {
pub fn get_temp_regs2() -> &'static [Reg] {
let num_regs = get_option!(num_temp_regs);
&TEMP_REGS[0..num_regs]
}
/// Get the number of locals for the ISEQ being compiled
pub fn get_num_locals(&self) -> Option<u32> {
self.num_locals
}
/// Set a context for generating side exits
pub fn set_side_exit_context(&mut self, pc: *mut VALUE, stack_size: u8) {
self.side_exit_pc = Some(pc);
@ -1090,31 +1117,32 @@ impl Assembler
let mut opnd_iter = insn.opnd_iter_mut();
while let Some(opnd) = opnd_iter.next() {
match opnd {
match *opnd {
// If we find any InsnOut from previous instructions, we're going to update
// the live range of the previous instruction to point to this one.
Opnd::InsnOut { idx, .. } => {
assert!(*idx < self.insns.len());
self.live_ranges[*idx] = insn_idx;
assert!(idx < self.insns.len());
self.live_ranges[idx] = insn_idx;
}
Opnd::Mem(Mem { base: MemBase::InsnOut(idx), .. }) => {
assert!(*idx < self.insns.len());
self.live_ranges[*idx] = insn_idx;
assert!(idx < self.insns.len());
self.live_ranges[idx] = insn_idx;
}
// Set current ctx.reg_temps to Opnd::Stack.
Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: None } => {
// Set current ctx.reg_mapping to Opnd::Stack.
Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: None } => {
assert_eq!(
self.ctx.get_stack_size() as i16 - self.ctx.get_sp_offset() as i16,
*stack_size as i16 - *sp_offset as i16,
stack_size as i16 - sp_offset as i16,
"Opnd::Stack (stack_size: {}, sp_offset: {}) expects a different SP position from asm.ctx (stack_size: {}, sp_offset: {})",
*stack_size, *sp_offset, self.ctx.get_stack_size(), self.ctx.get_sp_offset(),
stack_size, sp_offset, self.ctx.get_stack_size(), self.ctx.get_sp_offset(),
);
*opnd = Opnd::Stack {
idx: *idx,
num_bits: *num_bits,
stack_size: *stack_size,
sp_offset: *sp_offset,
reg_temps: Some(self.ctx.get_reg_temps()),
idx,
num_bits,
stack_size,
num_locals,
sp_offset,
reg_mapping: Some(self.ctx.get_reg_mapping()),
};
}
_ => {}
@ -1141,7 +1169,7 @@ impl Assembler
// Get a cached side exit
let side_exit = match self.side_exits.get(&side_exit_context) {
None => {
let exit_code = gen_outlined_exit(side_exit_context.pc, &side_exit_context.get_ctx(), ocb)?;
let exit_code = gen_outlined_exit(side_exit_context.pc, self.num_locals.unwrap(), &side_exit_context.get_ctx(), ocb)?;
self.side_exits.insert(*side_exit_context, exit_code);
exit_code
}
@ -1175,20 +1203,20 @@ impl Assembler
}
// Convert Opnd::Stack to Opnd::Reg
fn reg_opnd(opnd: &Opnd) -> Opnd {
let regs = Assembler::get_temp_regs();
fn reg_opnd(opnd: &Opnd, reg_idx: usize) -> Opnd {
let regs = Assembler::get_temp_regs2();
if let Opnd::Stack { num_bits, .. } = *opnd {
incr_counter!(temp_reg_opnd);
Opnd::Reg(regs[opnd.reg_idx()]).with_num_bits(num_bits).unwrap()
Opnd::Reg(regs[reg_idx]).with_num_bits(num_bits).unwrap()
} else {
unreachable!()
}
}
match opnd {
Opnd::Stack { reg_temps, .. } => {
if opnd.stack_idx() < MAX_REG_TEMPS && reg_temps.unwrap().get(opnd.stack_idx()) {
reg_opnd(opnd)
Opnd::Stack { reg_mapping, .. } => {
if let Some(reg_idx) = reg_mapping.unwrap().get_reg(opnd.reg_opnd()) {
reg_opnd(opnd, reg_idx)
} else {
mem_opnd(opnd)
}
@ -1198,18 +1226,11 @@ impl Assembler
}
/// Allocate a register to a stack temp if available.
pub fn alloc_temp_reg(&mut self, stack_idx: u8) {
if get_option!(num_temp_regs) == 0 {
return;
}
pub fn alloc_reg(&mut self, mapping: RegOpnd) {
// Allocate a register if there's no conflict.
let mut reg_temps = self.ctx.get_reg_temps();
if reg_temps.conflicts_with(stack_idx) {
assert!(!reg_temps.get(stack_idx));
} else {
reg_temps.set(stack_idx, true);
self.set_reg_temps(reg_temps);
let mut reg_mapping = self.ctx.get_reg_mapping();
if reg_mapping.alloc_reg(mapping) {
self.set_reg_mapping(reg_mapping);
}
}
@ -1220,47 +1241,58 @@ impl Assembler
self.ctx.clear_local_types();
}
/// Spill all live stack temps from registers to the stack
pub fn spill_temps(&mut self) {
/// Spill all live registers to the stack
pub fn spill_regs(&mut self) {
// Forget registers above the stack top
let mut reg_temps = self.ctx.get_reg_temps();
for stack_idx in self.ctx.get_stack_size()..MAX_REG_TEMPS {
reg_temps.set(stack_idx, false);
let mut reg_mapping = self.ctx.get_reg_mapping();
for stack_idx in self.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 {
reg_mapping.dealloc_reg(RegOpnd::Stack(stack_idx));
}
self.set_reg_temps(reg_temps);
self.set_reg_mapping(reg_mapping);
// Spill live stack temps
if self.ctx.get_reg_temps() != RegTemps::default() {
asm_comment!(self, "spill_temps: {:08b} -> {:08b}", self.ctx.get_reg_temps().as_u8(), RegTemps::default().as_u8());
for stack_idx in 0..u8::min(MAX_REG_TEMPS, self.ctx.get_stack_size()) {
if self.ctx.get_reg_temps().get(stack_idx) {
if self.ctx.get_reg_mapping() != RegMapping::default() {
asm_comment!(self, "spill_temps: {:?} -> {:?}", self.ctx.get_reg_mapping(), RegMapping::default());
// Spill stack temps
for stack_idx in 0..u8::min(MAX_CTX_TEMPS as u8, self.ctx.get_stack_size()) {
if reg_mapping.dealloc_reg(RegOpnd::Stack(stack_idx)) {
let idx = self.ctx.get_stack_size() - 1 - stack_idx;
self.spill_temp(self.stack_opnd(idx.into()));
reg_temps.set(stack_idx, false);
}
}
self.ctx.set_reg_temps(reg_temps);
// Spill locals
for local_idx in 0..MAX_CTX_TEMPS as u8 {
if reg_mapping.dealloc_reg(RegOpnd::Local(local_idx)) {
let first_local_ep_offset = self.num_locals.unwrap() + VM_ENV_DATA_SIZE - 1;
let ep_offset = first_local_ep_offset - local_idx as u32;
self.spill_temp(self.local_opnd(ep_offset));
}
}
self.ctx.set_reg_mapping(reg_mapping);
}
// Every stack temp should have been spilled
assert_eq!(self.ctx.get_reg_temps(), RegTemps::default());
assert_eq!(self.ctx.get_reg_mapping(), RegMapping::default());
}
/// Spill a stack temp from a register to the stack
fn spill_temp(&mut self, opnd: Opnd) {
assert!(self.ctx.get_reg_temps().get(opnd.stack_idx()));
assert_ne!(self.ctx.get_reg_mapping().get_reg(opnd.reg_opnd()), None);
// Use different RegTemps for dest and src operands
let reg_temps = self.ctx.get_reg_temps();
let mut mem_temps = reg_temps;
mem_temps.set(opnd.stack_idx(), false);
// Use different RegMappings for dest and src operands
let reg_mapping = self.ctx.get_reg_mapping();
let mut mem_mappings = reg_mapping;
mem_mappings.dealloc_reg(opnd.reg_opnd());
// Move the stack operand from a register to memory
match opnd {
Opnd::Stack { idx, num_bits, stack_size, sp_offset, .. } => {
Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, .. } => {
self.mov(
Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: Some(mem_temps) },
Opnd::Stack { idx, num_bits, stack_size, sp_offset, reg_temps: Some(reg_temps) },
Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: Some(mem_mappings) },
Opnd::Stack { idx, num_bits, stack_size, num_locals, sp_offset, reg_mapping: Some(reg_mapping) },
);
}
_ => unreachable!(),
@ -1269,20 +1301,10 @@ impl Assembler
}
/// Update which stack temps are in a register
pub fn set_reg_temps(&mut self, reg_temps: RegTemps) {
if self.ctx.get_reg_temps() != reg_temps {
asm_comment!(self, "reg_temps: {:08b} -> {:08b}", self.ctx.get_reg_temps().as_u8(), reg_temps.as_u8());
self.ctx.set_reg_temps(reg_temps);
self.verify_reg_temps();
}
}
/// Assert there's no conflict in stack temp register allocation
fn verify_reg_temps(&self) {
for stack_idx in 0..MAX_REG_TEMPS {
if self.ctx.get_reg_temps().get(stack_idx) {
assert!(!self.ctx.get_reg_temps().conflicts_with(stack_idx));
}
pub fn set_reg_mapping(&mut self, reg_mapping: RegMapping) {
if self.ctx.get_reg_mapping() != reg_mapping {
asm_comment!(self, "reg_mapping: {:?} -> {:?}", self.ctx.get_reg_mapping(), reg_mapping);
self.ctx.set_reg_mapping(reg_mapping);
}
}
@ -1411,7 +1433,7 @@ impl Assembler
let live_ranges: Vec<usize> = take(&mut self.live_ranges);
// shifted_live_ranges is indexed by mapped indexes in insn operands.
let mut shifted_live_ranges: Vec<usize> = live_ranges.clone();
let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals);
let mut iterator = self.into_draining_iter();
while let Some((index, mut insn)) = iterator.next_mapped() {
@ -1703,24 +1725,24 @@ impl Assembler {
// Let vm_check_canary() assert this ccall's leafness if leaf_ccall is set
let canary_opnd = self.set_stack_canary(&opnds);
let old_temps = self.ctx.get_reg_temps(); // with registers
let old_temps = self.ctx.get_reg_mapping(); // with registers
// Spill stack temp registers since they are caller-saved registers.
// Note that this doesn't spill stack temps that are already popped
// but may still be used in the C arguments.
self.spill_temps();
let new_temps = self.ctx.get_reg_temps(); // all spilled
self.spill_regs();
let new_temps = self.ctx.get_reg_mapping(); // all spilled
// Temporarily manipulate RegTemps so that we can use registers
// Temporarily manipulate RegMappings so that we can use registers
// to pass stack operands that are already spilled above.
self.ctx.set_reg_temps(old_temps);
self.ctx.set_reg_mapping(old_temps);
// Call a C function
let out = self.next_opnd_out(Opnd::match_num_bits(&opnds));
self.push_insn(Insn::CCall { fptr, opnds, out });
// Registers in old_temps may be clobbered by the above C call,
// so rollback the manipulated RegTemps to a spilled version.
self.ctx.set_reg_temps(new_temps);
// so rollback the manipulated RegMappings to a spilled version.
self.ctx.set_reg_mapping(new_temps);
// Clear the canary after use
if let Some(canary_opnd) = canary_opnd {
@ -1738,7 +1760,7 @@ impl Assembler {
// If the slot is already used, which is a valid optimization to avoid spills,
// give up the verification.
let canary_opnd = if cfg!(debug_assertions) && self.leaf_ccall && opnds.iter().all(|opnd|
opnd.get_stack_idx() != canary_opnd.get_stack_idx()
opnd.get_reg_opnd() != canary_opnd.get_reg_opnd()
) {
asm_comment!(self, "set stack canary");
self.mov(canary_opnd, vm_stack_canary().into());
@ -1767,9 +1789,9 @@ impl Assembler {
pub fn cpop_all(&mut self) {
self.push_insn(Insn::CPopAll);
// Re-enable ccall's RegTemps assertion disabled by cpush_all.
// Re-enable ccall's RegMappings assertion disabled by cpush_all.
// cpush_all + cpop_all preserve all stack temp registers, so it's safe.
self.set_reg_temps(self.ctx.get_reg_temps());
self.set_reg_mapping(self.ctx.get_reg_mapping());
}
pub fn cpop_into(&mut self, opnd: Opnd) {
@ -1787,7 +1809,7 @@ impl Assembler {
// Temps will be marked back as being in registers by cpop_all.
// We assume that cpush_all + cpop_all are used for C functions in utils.rs
// that don't require spill_temps for GC.
self.set_reg_temps(RegTemps::default());
self.set_reg_mapping(RegMapping::default());
}
pub fn cret(&mut self, opnd: Opnd) {

View file

@ -1,19 +1,19 @@
#![cfg(test)]
use crate::asm::{CodeBlock};
use crate::asm::CodeBlock;
use crate::backend::ir::*;
use crate::cruby::*;
use crate::utils::c_callable;
#[test]
fn test_add() {
let mut asm = Assembler::new();
let mut asm = Assembler::new(0);
let out = asm.add(SP, Opnd::UImm(1));
let _ = asm.add(out, Opnd::UImm(2));
}
#[test]
fn test_alloc_regs() {
let mut asm = Assembler::new();
let mut asm = Assembler::new(0);
// Get the first output that we're going to reuse later.
let out1 = asm.add(EC, Opnd::UImm(1));
@ -62,7 +62,7 @@ fn test_alloc_regs() {
fn setup_asm() -> (Assembler, CodeBlock) {
return (
Assembler::new(),
Assembler::new(0),
CodeBlock::new_dummy(1024)
);
}
@ -194,7 +194,7 @@ fn test_c_call()
#[test]
fn test_alloc_ccall_regs() {
let mut asm = Assembler::new();
let mut asm = Assembler::new(0);
let out1 = asm.ccall(0 as *const u8, vec![]);
let out2 = asm.ccall(0 as *const u8, vec![out1]);
asm.mov(EC, out2);
@ -283,8 +283,7 @@ fn test_bake_string() {
#[test]
fn test_draining_iterator() {
let mut asm = Assembler::new();
let mut asm = Assembler::new(0);
let _ = asm.load(Opnd::None);
asm.store(Opnd::None, Opnd::None);
@ -315,7 +314,7 @@ fn test_cmp_8_bit() {
fn test_no_pos_marker_callback_when_compile_fails() {
// When compilation fails (e.g. when out of memory), the code written out is malformed.
// We don't want to invoke the pos_marker callbacks with positions of malformed code.
let mut asm = Assembler::new();
let mut asm = Assembler::new(0);
// Markers around code to exhaust memory limit
let fail_if_called = |_code_ptr, _cb: &_| panic!("pos_marker callback should not be called");

View file

@ -79,7 +79,7 @@ impl From<&Opnd> for X86Opnd {
}
}
/// List of registers that can be used for stack temps.
/// List of registers that can be used for stack temps and locals.
pub static TEMP_REGS: [Reg; 5] = [RSI_REG, RDI_REG, R8_REG, R9_REG, R10_REG];
impl Assembler
@ -112,7 +112,7 @@ impl Assembler
fn x86_split(mut self) -> Assembler
{
let live_ranges: Vec<usize> = take(&mut self.live_ranges);
let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits));
let mut asm = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals);
let mut iterator = self.into_draining_iter();
while let Some((index, mut insn)) = iterator.next_unmapped() {
@ -895,14 +895,14 @@ impl Assembler
#[cfg(test)]
mod tests {
use crate::disasm::{assert_disasm};
use crate::disasm::assert_disasm;
#[cfg(feature = "disasm")]
use crate::disasm::{unindent, disasm_addr_range};
use super::*;
fn setup_asm() -> (Assembler, CodeBlock) {
(Assembler::new(), CodeBlock::new_dummy(1024))
(Assembler::new(0), CodeBlock::new_dummy(1024))
}
#[test]

View file

@ -402,6 +402,11 @@ impl<'a> JITState<'a> {
_ => false,
}
}
/// Return the number of locals in the current ISEQ
pub fn num_locals(&self) -> u32 {
unsafe { get_iseq_body_local_table_size(self.iseq) }
}
}
/// Macro to call jit.perf_symbol_push() without evaluating arguments when
@ -646,7 +651,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
}
// Verify stack operand types
let top_idx = cmp::min(ctx.get_stack_size(), MAX_TEMP_TYPES as u8);
let top_idx = cmp::min(ctx.get_stack_size(), MAX_CTX_TEMPS as u8);
for i in 0..top_idx {
let learned_mapping = ctx.get_opnd_mapping(StackOpnd(i));
let learned_type = ctx.get_opnd_type(StackOpnd(i));
@ -693,7 +698,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
// Verify local variable types
let local_table_size = unsafe { get_iseq_body_local_table_size(jit.iseq) };
let top_idx: usize = cmp::min(local_table_size as usize, MAX_TEMP_TYPES);
let top_idx: usize = cmp::min(local_table_size as usize, MAX_CTX_TEMPS);
for i in 0..top_idx {
let learned_type = ctx.get_local_type(i);
let learned_type = relax_type_with_singleton_class_assumption(learned_type);
@ -717,7 +722,7 @@ fn verify_ctx(jit: &JITState, ctx: &Context) {
// interpreter state.
fn gen_stub_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
gen_counter_incr(&mut asm, Counter::exit_from_branch_stub);
@ -748,7 +753,7 @@ fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) {
}
// Spill stack temps before returning to the interpreter
asm.spill_temps();
asm.spill_regs();
// Generate the code to exit to the interpreters
// Write the adjusted SP back into the CFP
@ -804,11 +809,11 @@ fn gen_exit(exit_pc: *mut VALUE, asm: &mut Assembler) {
/// moment, so there is one unique side exit for each context. Note that
/// it's incorrect to jump to the side exit after any ctx stack push operations
/// since they change the logic required for reconstructing interpreter state.
pub fn gen_outlined_exit(exit_pc: *mut VALUE, ctx: &Context, ocb: &mut OutlinedCb) -> Option<CodePtr> {
pub fn gen_outlined_exit(exit_pc: *mut VALUE, num_locals: u32, ctx: &Context, ocb: &mut OutlinedCb) -> Option<CodePtr> {
let mut cb = ocb.unwrap();
let mut asm = Assembler::new();
let mut asm = Assembler::new(num_locals);
asm.ctx = *ctx;
asm.set_reg_temps(ctx.get_reg_temps());
asm.set_reg_mapping(ctx.get_reg_mapping());
gen_exit(exit_pc, &mut asm);
@ -826,7 +831,7 @@ pub fn gen_counted_exit(exit_pc: *mut VALUE, side_exit: CodePtr, ocb: &mut Outli
None => return Some(side_exit),
};
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
// Increment a counter
gen_counter_incr(&mut asm, counter);
@ -876,7 +881,7 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler) -> O
jit.block_entry_exit = Some(entry_exit?);
} else {
let block_entry_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, jit.starting_insn_idx.into()) };
jit.block_entry_exit = Some(gen_outlined_exit(block_entry_pc, block_starting_context, jit.get_ocb())?);
jit.block_entry_exit = Some(gen_outlined_exit(block_entry_pc, jit.num_locals(), block_starting_context, jit.get_ocb())?);
}
Some(())
@ -885,7 +890,7 @@ pub fn jit_ensure_block_entry_exit(jit: &mut JITState, asm: &mut Assembler) -> O
// Landing code for when c_return tracing is enabled. See full_cfunc_return().
fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
// This chunk of code expects REG_EC to be filled properly and
// RAX to contain the return value of the C method.
@ -915,7 +920,7 @@ fn gen_full_cfunc_return(ocb: &mut OutlinedCb) -> Option<CodePtr> {
/// This is used by gen_leave() and gen_entry_prologue()
fn gen_leave_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
// gen_leave() fully reconstructs interpreter state and leaves the
// return value in C_RET_OPND before coming here.
@ -942,7 +947,7 @@ fn gen_leave_exit(ocb: &mut OutlinedCb) -> Option<CodePtr> {
// the caller's stack, which is different from gen_stub_exit().
fn gen_leave_exception(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
// gen_leave() leaves the return value in C_RET_OPND before coming here.
let ruby_ret_val = asm.live_reg_opnd(C_RET_OPND);
@ -1011,7 +1016,7 @@ pub fn gen_entry_prologue(
) -> Option<CodePtr> {
let code_ptr = cb.get_write_ptr();
let mut asm = Assembler::new();
let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) });
if get_option_ref!(dump_disasm).is_some() {
asm_comment!(asm, "YJIT entry point: {}", iseq_get_location(iseq, 0));
} else {
@ -1134,7 +1139,7 @@ fn end_block_with_jump(
if jit.record_boundary_patch_point {
jit.record_boundary_patch_point = false;
let exit_pc = unsafe { rb_iseq_pc_at_idx(jit.iseq, continuation_insn_idx.into())};
let exit_pos = gen_outlined_exit(exit_pc, &reset_depth, jit.get_ocb());
let exit_pos = gen_outlined_exit(exit_pc, jit.num_locals(), &reset_depth, jit.get_ocb());
record_global_inval_patch(asm, exit_pos?);
}
@ -1180,7 +1185,7 @@ pub fn gen_single_block(
jit.iseq = blockid.iseq;
// Create a backend assembler instance
let mut asm = Assembler::new();
let mut asm = Assembler::new(jit.num_locals());
asm.ctx = ctx;
#[cfg(feature = "disasm")]
@ -1188,7 +1193,7 @@ pub fn gen_single_block(
let blockid_idx = blockid.idx;
let chain_depth = if asm.ctx.get_chain_depth() > 0 { format!("(chain_depth: {})", asm.ctx.get_chain_depth()) } else { "".to_string() };
asm_comment!(asm, "Block: {} {}", iseq_get_location(blockid.iseq, blockid_idx), chain_depth);
asm_comment!(asm, "reg_temps: {:08b}", asm.ctx.get_reg_temps().as_u8());
asm_comment!(asm, "reg_mapping: {:?}", asm.ctx.get_reg_mapping());
}
// Mark the start of an ISEQ for --yjit-perf
@ -1233,14 +1238,14 @@ pub fn gen_single_block(
// stack_pop doesn't immediately deallocate a register for stack temps,
// but it's safe to do so at this instruction boundary.
for stack_idx in asm.ctx.get_stack_size()..MAX_REG_TEMPS {
asm.ctx.dealloc_temp_reg(stack_idx);
for stack_idx in asm.ctx.get_stack_size()..MAX_CTX_TEMPS as u8 {
asm.ctx.dealloc_reg(RegOpnd::Stack(stack_idx));
}
// If previous instruction requested to record the boundary
if jit.record_boundary_patch_point {
// Generate an exit to this instruction and record it
let exit_pos = gen_outlined_exit(jit.pc, &asm.ctx, jit.get_ocb()).ok_or(())?;
let exit_pos = gen_outlined_exit(jit.pc, jit.num_locals(), &asm.ctx, jit.get_ocb()).ok_or(())?;
record_global_inval_patch(&mut asm, exit_pos);
jit.record_boundary_patch_point = false;
}
@ -1803,7 +1808,7 @@ fn gen_splatkw(
asm.mov(stack_ret, hash);
asm.stack_push(block_type);
// Leave block_opnd spilled by ccall as is
asm.ctx.dealloc_temp_reg(asm.ctx.get_stack_size() - 1);
asm.ctx.dealloc_reg(RegOpnd::Stack(asm.ctx.get_stack_size() - 1));
}
Some(KeepCompiling)
@ -2278,7 +2283,7 @@ fn gen_getlocal_generic(
) -> Option<CodegenStatus> {
let local_opnd = if level == 0 && jit.assume_no_ep_escape(asm) {
// Load the local using SP register
asm.ctx.ep_opnd(-(ep_offset as i32))
asm.local_opnd(ep_offset)
} else {
// Load environment pointer EP (level 0) from CFP
let ep_opnd = gen_get_ep(asm, level);
@ -2359,8 +2364,11 @@ fn gen_setlocal_generic(
let (flags_opnd, local_opnd) = if level == 0 && jit.assume_no_ep_escape(asm) {
// Load flags and the local using SP register
let local_opnd = asm.ctx.ep_opnd(-(ep_offset as i32));
let flags_opnd = asm.ctx.ep_opnd(VM_ENV_DATA_INDEX_FLAGS as i32);
let local_opnd = asm.local_opnd(ep_offset);
// Allocate a register to the new local operand
asm.alloc_reg(local_opnd.reg_opnd());
(flags_opnd, local_opnd)
} else {
// Load flags and the local for the level
@ -3071,7 +3079,7 @@ fn gen_set_ivar(
// If we know the stack value is an immediate, there's no need to
// generate WB code.
if !stack_type.is_imm() {
asm.spill_temps(); // for ccall (unconditionally spill them for RegTemps consistency)
asm.spill_regs(); // for ccall (unconditionally spill them for RegMappings consistency)
let skip_wb = asm.new_label("skip_wb");
// If the value we're writing is an immediate, we don't need to WB
asm.test(write_val, (RUBY_IMMEDIATE_MASK as u64).into());
@ -3516,7 +3524,7 @@ fn gen_equality_specialized(
let ret = asm.new_label("ret");
// Spill for ccall. For safety, unconditionally spill temps before branching.
asm.spill_temps();
asm.spill_regs();
// If they are equal by identity, return true
asm.cmp(a_opnd, b_opnd);
@ -5482,7 +5490,7 @@ fn jit_rb_str_uplus(
// We allocate when we dup the string
jit_prepare_call_with_gc(jit, asm);
asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency.
asm.spill_regs(); // For ccall. Unconditionally spill them for RegMappings consistency.
asm_comment!(asm, "Unary plus on string");
let recv_opnd = asm.stack_pop(1);
@ -5500,7 +5508,7 @@ fn jit_rb_str_uplus(
asm.jz(ret_label);
// Str is frozen - duplicate it
asm.spill_temps(); // for ccall
asm.spill_regs(); // for ccall
let ret_opnd = asm.ccall(rb_str_dup as *const u8, vec![recv_opnd]);
asm.mov(stack_ret, ret_opnd);
@ -5782,7 +5790,7 @@ fn jit_rb_str_concat(
// rb_str_buf_append may raise Encoding::CompatibilityError, but we accept compromised
// backtraces on this method since the interpreter does the same thing on opt_ltlt.
jit_prepare_non_leaf_call(jit, asm);
asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency.
asm.spill_regs(); // For ccall. Unconditionally spill them for RegMappings consistency.
let concat_arg = asm.stack_pop(1);
let recv = asm.stack_pop(1);
@ -5815,7 +5823,7 @@ fn jit_rb_str_concat(
// If encodings are different, use a slower encoding-aware concatenate
asm.write_label(enc_mismatch);
asm.spill_temps(); // Ignore the register for the other local branch
asm.spill_regs(); // Ignore the register for the other local branch
let ret_opnd = asm.ccall(rb_str_buf_append as *const u8, vec![recv, concat_arg]);
let stack_ret = asm.stack_push(Type::TString);
asm.mov(stack_ret, ret_opnd);
@ -6321,7 +6329,7 @@ fn gen_push_frame(
if frame.iseq.is_some() {
// Spill stack temps to let the callee use them (must be done before changing the SP register)
asm.spill_temps();
asm.spill_regs();
// Saving SP before calculating ep avoids a dependency on a register
// However this must be done after referencing frame.recv, which may be SP-relative
@ -7502,7 +7510,7 @@ fn gen_send_iseq(
};
// Store rest param to memory to avoid register shuffle as
// we won't be reading it for the remainder of the block.
asm.ctx.dealloc_temp_reg(rest_param.stack_idx());
asm.ctx.dealloc_reg(rest_param.reg_opnd());
asm.store(rest_param, rest_param_array);
}
@ -7601,7 +7609,7 @@ fn gen_send_iseq(
// Write the CI in to the stack and ensure that it actually gets
// flushed to memory
let ci_opnd = asm.stack_opnd(-1);
asm.ctx.dealloc_temp_reg(ci_opnd.stack_idx());
asm.ctx.dealloc_reg(ci_opnd.reg_opnd());
asm.mov(ci_opnd, VALUE(ci as usize).into());
}
@ -7714,7 +7722,7 @@ fn gen_send_iseq(
// Pop arguments and receiver in return context and
// mark it as a continuation of gen_leave()
let mut return_asm = Assembler::new();
let mut return_asm = Assembler::new(jit.num_locals());
return_asm.ctx = asm.ctx;
return_asm.stack_pop(sp_offset.try_into().unwrap());
return_asm.ctx.set_sp_offset(0); // We set SP on the caller's frame above
@ -7967,7 +7975,7 @@ fn gen_iseq_kw_call(
kwargs_order[kwrest_idx] = 0;
}
// Put kwrest straight into memory, since we might pop it later
asm.ctx.dealloc_temp_reg(stack_kwrest.stack_idx());
asm.ctx.dealloc_reg(stack_kwrest.reg_opnd());
asm.mov(stack_kwrest, kwrest);
if stack_kwrest_idx >= 0 {
asm.ctx.set_opnd_mapping(stack_kwrest.into(), TempMapping::map_to_stack(kwrest_type));
@ -8065,7 +8073,7 @@ fn gen_iseq_kw_call(
if let Some(kwrest_type) = kwrest_type {
let kwrest = asm.stack_push(kwrest_type);
// We put the kwrest parameter in memory earlier
asm.ctx.dealloc_temp_reg(kwrest.stack_idx());
asm.ctx.dealloc_reg(kwrest.reg_opnd());
argc += 1;
}
@ -9858,7 +9866,7 @@ fn gen_getblockparam(
// Save the PC and SP because we might allocate
jit_prepare_call_with_gc(jit, asm);
asm.spill_temps(); // For ccall. Unconditionally spill them for RegTemps consistency.
asm.spill_regs(); // For ccall. Unconditionally spill them for RegMappings consistency.
// A mirror of the interpreter code. Checking for the case
// where it's pushing rb_block_param_proxy.
@ -10466,7 +10474,7 @@ mod tests {
return (
Context::default(),
Assembler::new(),
Assembler::new(0),
cb,
OutlinedCb::wrap(CodeBlock::new_dummy(256 * 1024)),
);
@ -10534,7 +10542,7 @@ mod tests {
assert_eq!(status, Some(KeepCompiling));
let mut default = Context::default();
default.set_reg_temps(context.get_reg_temps());
default.set_reg_mapping(context.get_reg_mapping());
assert_eq!(context.diff(&default), TypeDiff::Compatible(0));
}

View file

@ -30,11 +30,11 @@ use YARVOpnd::*;
use TempMappingKind::*;
use crate::invariants::*;
// Maximum number of temp value types we keep track of
pub const MAX_TEMP_TYPES: usize = 8;
// Maximum number of temp value types or registers we keep track of
pub const MAX_CTX_TEMPS: usize = 8;
// Maximum number of local variable types we keep track of
const MAX_LOCAL_TYPES: usize = 8;
// Maximum number of local variable types or registers we keep track of
const MAX_CTX_LOCALS: usize = 8;
/// An index into `ISEQ_BODY(iseq)->iseq_encoded`. Points
/// to a YARV instruction or an instruction operand.
@ -411,43 +411,103 @@ impl From<Opnd> for YARVOpnd {
}
}
/// Maximum index of stack temps that could be in a register
pub const MAX_REG_TEMPS: u8 = 8;
/// Number of registers that can be used for stack temps or locals
pub const MAX_MAPPED_REGS: usize = 5;
/// Bitmap of which stack temps are in a register
#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)]
pub struct RegTemps(u8);
/// A stack slot or a local variable. u8 represents the index of it (<= 8).
#[derive(Copy, Clone, Eq, Hash, PartialEq, Debug)]
pub enum RegOpnd {
Stack(u8),
Local(u8),
}
impl RegTemps {
pub fn get(&self, index: u8) -> bool {
assert!(index < MAX_REG_TEMPS);
(self.0 >> index) & 1 == 1
/// RegMappings manages a set of registers used for stack temps and locals.
/// Each element of the array represents each of the registers.
/// If an element is Some, the stack temp or the local uses a register.
///
/// Note that Opnd::InsnOut uses a separate set of registers at the moment.
#[derive(Copy, Clone, Default, Eq, Hash, PartialEq)]
pub struct RegMapping([Option<RegOpnd>; MAX_MAPPED_REGS]);
impl RegMapping {
/// Return the index of the register for a given operand if allocated.
pub fn get_reg(&self, opnd: RegOpnd) -> Option<usize> {
self.0.iter().enumerate()
.find(|(_, &reg_opnd)| reg_opnd == Some(opnd))
.map(|(reg_idx, _)| reg_idx)
}
pub fn set(&mut self, index: u8, value: bool) {
assert!(index < MAX_REG_TEMPS);
if value {
self.0 = self.0 | (1 << index);
} else {
self.0 = self.0 & !(1 << index);
/// Allocate a register for a given operand if available.
/// Return true if self is updated.
pub fn alloc_reg(&mut self, opnd: RegOpnd) -> bool {
// If a given opnd already has a register, skip allocation.
if self.get_reg(opnd).is_some() {
return false;
}
}
pub fn as_u8(&self) -> u8 {
self.0
}
/// Return true if there's a register that conflicts with a given stack_idx.
pub fn conflicts_with(&self, stack_idx: u8) -> bool {
let mut other_idx = stack_idx as usize % get_option!(num_temp_regs);
while other_idx < MAX_REG_TEMPS as usize {
if stack_idx as usize != other_idx && self.get(other_idx as u8) {
return true;
// If the index is too large to encode with with 3 bits, give up.
match opnd {
RegOpnd::Stack(stack_idx) => if stack_idx >= MAX_CTX_TEMPS as u8 {
return false;
}
other_idx += get_option!(num_temp_regs);
RegOpnd::Local(local_idx) => if local_idx >= MAX_CTX_LOCALS as u8 {
return false;
}
};
// Allocate a register if available.
if let Some(reg_idx) = self.find_unused_reg(opnd) {
self.0[reg_idx] = Some(opnd);
return true;
}
false
}
/// Deallocate a register for a given operand if in use.
/// Return true if self is updated.
pub fn dealloc_reg(&mut self, opnd: RegOpnd) -> bool {
for reg_opnd in self.0.iter_mut() {
if *reg_opnd == Some(opnd) {
*reg_opnd = None;
return true;
}
}
false
}
/// Find an available register and return the index of it.
fn find_unused_reg(&self, opnd: RegOpnd) -> Option<usize> {
let num_regs = get_option!(num_temp_regs);
if num_regs == 0 {
return None;
}
assert!(num_regs <= MAX_MAPPED_REGS);
// If the default index for the operand is available, use that to minimize
// discrepancies among Contexts.
let default_idx = match opnd {
RegOpnd::Stack(stack_idx) => stack_idx.as_usize() % num_regs,
RegOpnd::Local(local_idx) => num_regs - (local_idx.as_usize() % num_regs) - 1,
};
if self.0[default_idx].is_none() {
return Some(default_idx);
}
// If not, pick any other available register. Like default indexes, prefer
// lower indexes for Stack, and higher indexes for Local.
let mut index_temps = self.0.iter().enumerate();
match opnd {
RegOpnd::Stack(_) => index_temps.find(|(_, reg_opnd)| reg_opnd.is_none()),
RegOpnd::Local(_) => index_temps.rev().find(|(_, reg_opnd)| reg_opnd.is_none()),
}.map(|(index, _)| index)
}
}
impl fmt::Debug for RegMapping {
/// Print `[None, ...]` instead of the default `RegMappings([None, ...])`
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
write!(fmt, "{:?}", self.0)
}
}
/// Bits for chain_depth_return_landing_defer
@ -473,8 +533,8 @@ pub struct Context {
// This represents how far the JIT's SP is from the "real" SP
sp_offset: i8,
/// Bitmap of which stack temps are in a register
reg_temps: RegTemps,
/// Which stack temps or locals are in a register
reg_mapping: RegMapping,
/// Fields packed into u8
/// - 1st bit from the left: Whether this code is the target of a JIT-to-JIT Ruby return ([Self::is_return_landing])
@ -786,7 +846,7 @@ mod bitvector_tests {
let idx0 = ctx0.encode_into(&mut bits);
let mut ctx1 = Context::default();
ctx1.reg_temps = RegTemps(1);
ctx1.reg_mapping = RegMapping([Some(RegOpnd::Stack(0)), None, None, None, None]);
let idx1 = ctx1.encode_into(&mut bits);
// Make sure that we can encode two contexts successively
@ -797,10 +857,10 @@ mod bitvector_tests {
}
#[test]
fn regress_reg_temps() {
fn regress_reg_mapping() {
let mut bits = BitVector::new();
let mut ctx = Context::default();
ctx.reg_temps = RegTemps(1);
ctx.reg_mapping = RegMapping([Some(RegOpnd::Stack(0)), None, None, None, None]);
ctx.encode_into(&mut bits);
let b0 = bits.read_u1(&mut 0);
@ -973,9 +1033,24 @@ impl Context {
bits.push_u8(self.sp_offset as u8);
}
// Bitmap of which stack temps are in a register
let RegTemps(reg_temps) = self.reg_temps;
bits.push_u8(reg_temps);
// Which stack temps or locals are in a register
for &temp in self.reg_mapping.0.iter() {
if let Some(temp) = temp {
bits.push_u1(1); // Some
match temp {
RegOpnd::Stack(stack_idx) => {
bits.push_u1(0); // Stack
bits.push_u3(stack_idx);
}
RegOpnd::Local(local_idx) => {
bits.push_u1(1); // Local
bits.push_u3(local_idx);
}
}
} else {
bits.push_u1(0); // None
}
}
// chain_depth_and_flags: u8,
bits.push_u8(self.chain_depth_and_flags);
@ -987,7 +1062,7 @@ impl Context {
}
// Encode the local types if known
for local_idx in 0..MAX_LOCAL_TYPES {
for local_idx in 0..MAX_CTX_LOCALS {
let t = self.get_local_type(local_idx);
if t != Type::Unknown {
bits.push_op(CtxOp::SetLocalType);
@ -997,7 +1072,7 @@ impl Context {
}
// Encode stack temps
for stack_idx in 0..MAX_TEMP_TYPES {
for stack_idx in 0..MAX_CTX_TEMPS {
let mapping = self.get_temp_mapping(stack_idx);
match mapping.get_kind() {
@ -1059,8 +1134,17 @@ impl Context {
debug_assert!(!( (sp_offset_bits & 0x80) != 0 && ctx.sp_offset > 0 ));
}
// Bitmap of which stack temps are in a register
ctx.reg_temps = RegTemps(bits.read_u8(&mut idx));
// Which stack temps or locals are in a register
for index in 0..MAX_MAPPED_REGS {
if bits.read_u1(&mut idx) == 1 { // Some
let temp = if bits.read_u1(&mut idx) == 0 { // RegMapping::Stack
RegOpnd::Stack(bits.read_u3(&mut idx))
} else {
RegOpnd::Local(bits.read_u3(&mut idx))
};
ctx.reg_mapping.0[index] = Some(temp);
}
}
// chain_depth_and_flags: u8
ctx.chain_depth_and_flags = bits.read_u8(&mut idx);
@ -1412,7 +1496,7 @@ impl PendingBranch {
target_idx: u32,
target: BlockId,
ctx: &Context,
ocb: &mut OutlinedCb,
jit: &mut JITState,
) -> Option<CodePtr> {
// If the block already exists
if let Some(blockref) = find_block_version(target, ctx) {
@ -1430,7 +1514,7 @@ impl PendingBranch {
// The branch struct is uninitialized right now but as a stable address.
// We make sure the stub runs after the branch is initialized.
let branch_struct_addr = self.uninit_branch.as_ptr() as usize;
let stub_addr = gen_branch_stub(ctx, ocb, branch_struct_addr, target_idx);
let stub_addr = gen_branch_stub(ctx, jit.iseq, jit.get_ocb(), branch_struct_addr, target_idx);
if let Some(stub_addr) = stub_addr {
// Fill the branch target with a stub
@ -2362,7 +2446,7 @@ impl Context {
let mut generic_ctx = Context::default();
generic_ctx.stack_size = self.stack_size;
generic_ctx.sp_offset = self.sp_offset;
generic_ctx.reg_temps = self.reg_temps;
generic_ctx.reg_mapping = self.reg_mapping;
if self.is_return_landing() {
generic_ctx.set_as_return_landing();
}
@ -2390,12 +2474,12 @@ impl Context {
self.sp_offset = offset;
}
pub fn get_reg_temps(&self) -> RegTemps {
self.reg_temps
pub fn get_reg_mapping(&self) -> RegMapping {
self.reg_mapping
}
pub fn set_reg_temps(&mut self, reg_temps: RegTemps) {
self.reg_temps = reg_temps;
pub fn set_reg_mapping(&mut self, reg_mapping: RegMapping) {
self.reg_mapping = reg_mapping;
}
pub fn get_chain_depth(&self) -> u8 {
@ -2447,14 +2531,13 @@ impl Context {
self.sp_opnd(-ep_offset + offset)
}
/// Stop using a register for a given stack temp.
/// Stop using a register for a given stack temp or a local.
/// This allows us to reuse the register for a value that we know is dead
/// and will no longer be used (e.g. popped stack temp).
pub fn dealloc_temp_reg(&mut self, stack_idx: u8) {
if stack_idx < MAX_REG_TEMPS {
let mut reg_temps = self.get_reg_temps();
reg_temps.set(stack_idx, false);
self.set_reg_temps(reg_temps);
pub fn dealloc_reg(&mut self, opnd: RegOpnd) {
let mut reg_mapping = self.get_reg_mapping();
if reg_mapping.dealloc_reg(opnd) {
self.set_reg_mapping(reg_mapping);
}
}
@ -2467,7 +2550,7 @@ impl Context {
let stack_idx: usize = (self.stack_size - 1 - idx).into();
// If outside of tracked range, do nothing
if stack_idx >= MAX_TEMP_TYPES {
if stack_idx >= MAX_CTX_TEMPS {
return Type::Unknown;
}
@ -2478,7 +2561,7 @@ impl Context {
MapToStack => mapping.get_type(),
MapToLocal => {
let idx = mapping.get_local_idx();
assert!((idx as usize) < MAX_LOCAL_TYPES);
assert!((idx as usize) < MAX_CTX_LOCALS);
return self.get_local_type(idx.into());
}
}
@ -2488,7 +2571,7 @@ impl Context {
/// Get the currently tracked type for a local variable
pub fn get_local_type(&self, local_idx: usize) -> Type {
if local_idx >= MAX_LOCAL_TYPES {
if local_idx >= MAX_CTX_LOCALS {
return Type::Unknown
} else {
// Each type is stored in 4 bits
@ -2499,7 +2582,7 @@ impl Context {
/// Get the current temp mapping for a given stack slot
fn get_temp_mapping(&self, temp_idx: usize) -> TempMapping {
assert!(temp_idx < MAX_TEMP_TYPES);
assert!(temp_idx < MAX_CTX_TEMPS);
// Extract the temp mapping kind
let kind_bits = (self.temp_mapping_kind >> (2 * temp_idx)) & 0b11;
@ -2527,7 +2610,7 @@ impl Context {
/// Get the current temp mapping for a given stack slot
fn set_temp_mapping(&mut self, temp_idx: usize, mapping: TempMapping) {
assert!(temp_idx < MAX_TEMP_TYPES);
assert!(temp_idx < MAX_CTX_TEMPS);
// Extract the kind bits
let mapping_kind = mapping.get_kind();
@ -2583,7 +2666,7 @@ impl Context {
let stack_idx = (self.stack_size - 1 - idx) as usize;
// If outside of tracked range, do nothing
if stack_idx >= MAX_TEMP_TYPES {
if stack_idx >= MAX_CTX_TEMPS {
return;
}
@ -2598,7 +2681,7 @@ impl Context {
}
MapToLocal => {
let idx = mapping.get_local_idx() as usize;
assert!(idx < MAX_LOCAL_TYPES);
assert!(idx < MAX_CTX_LOCALS);
let mut new_type = self.get_local_type(idx);
new_type.upgrade(opnd_type);
self.set_local_type(idx, new_type);
@ -2625,7 +2708,7 @@ impl Context {
assert!(idx < self.stack_size);
let stack_idx = (self.stack_size - 1 - idx) as usize;
if stack_idx < MAX_TEMP_TYPES {
if stack_idx < MAX_CTX_TEMPS {
self.get_temp_mapping(stack_idx)
} else {
// We can't know the source of this stack operand, so we assume it is
@ -2651,7 +2734,7 @@ impl Context {
}
// If outside of tracked range, do nothing
if stack_idx >= MAX_TEMP_TYPES {
if stack_idx >= MAX_CTX_TEMPS {
return;
}
@ -2667,12 +2750,12 @@ impl Context {
return;
}
if local_idx >= MAX_LOCAL_TYPES {
if local_idx >= MAX_CTX_LOCALS {
return
}
// If any values on the stack map to this local we must detach them
for mapping_idx in 0..MAX_TEMP_TYPES {
for mapping_idx in 0..MAX_CTX_TEMPS {
let mapping = self.get_temp_mapping(mapping_idx);
let tm = match mapping.get_kind() {
MapToStack => mapping,
@ -2704,7 +2787,7 @@ impl Context {
// When clearing local types we must detach any stack mappings to those
// locals. Even if local values may have changed, stack values will not.
for mapping_idx in 0..MAX_TEMP_TYPES {
for mapping_idx in 0..MAX_CTX_TEMPS {
let mapping = self.get_temp_mapping(mapping_idx);
if mapping.get_kind() == MapToLocal {
let local_idx = mapping.get_local_idx() as usize;
@ -2758,7 +2841,7 @@ impl Context {
return TypeDiff::Incompatible;
}
if dst.reg_temps != src.reg_temps {
if dst.reg_mapping != src.reg_mapping {
return TypeDiff::Incompatible;
}
@ -2779,7 +2862,7 @@ impl Context {
}
// For each local type we track
for i in 0.. MAX_LOCAL_TYPES {
for i in 0.. MAX_CTX_LOCALS {
let t_src = src.get_local_type(i);
let t_dst = dst.get_local_type(i);
diff += match t_src.diff(t_dst) {
@ -2845,24 +2928,23 @@ impl Assembler {
let stack_size: usize = self.ctx.stack_size.into();
// Keep track of the type and mapping of the value
if stack_size < MAX_TEMP_TYPES {
if stack_size < MAX_CTX_TEMPS {
self.ctx.set_temp_mapping(stack_size, mapping);
if mapping.get_kind() == MapToLocal {
let idx = mapping.get_local_idx();
assert!((idx as usize) < MAX_LOCAL_TYPES);
assert!((idx as usize) < MAX_CTX_LOCALS);
}
}
// Allocate a register to the stack operand
if self.ctx.stack_size < MAX_REG_TEMPS {
self.alloc_temp_reg(self.ctx.stack_size);
}
self.ctx.stack_size += 1;
self.ctx.sp_offset += 1;
return self.stack_opnd(0);
// Allocate a register to the new stack operand
let stack_opnd = self.stack_opnd(0);
self.alloc_reg(stack_opnd.reg_opnd());
stack_opnd
}
/// Push one new value on the temp stack
@ -2878,7 +2960,7 @@ impl Assembler {
/// Push a local variable on the stack
pub fn stack_push_local(&mut self, local_idx: usize) -> Opnd {
if local_idx >= MAX_LOCAL_TYPES {
if local_idx >= MAX_CTX_LOCALS {
return self.stack_push(Type::Unknown);
}
@ -2896,7 +2978,7 @@ impl Assembler {
for i in 0..n {
let idx: usize = (self.ctx.stack_size as usize) - i - 1;
if idx < MAX_TEMP_TYPES {
if idx < MAX_CTX_TEMPS {
self.ctx.set_temp_mapping(idx, TempMapping::map_to_stack(Type::Unknown));
}
}
@ -2914,8 +2996,8 @@ impl Assembler {
let method_name_index = (self.ctx.stack_size as usize) - argc - 1;
for i in method_name_index..(self.ctx.stack_size - 1) as usize {
if i < MAX_TEMP_TYPES {
let next_arg_mapping = if i + 1 < MAX_TEMP_TYPES {
if i < MAX_CTX_TEMPS {
let next_arg_mapping = if i + 1 < MAX_CTX_TEMPS {
self.ctx.get_temp_mapping(i + 1)
} else {
TempMapping::map_to_stack(Type::Unknown)
@ -2932,8 +3014,22 @@ impl Assembler {
idx,
num_bits: 64,
stack_size: self.ctx.stack_size,
num_locals: None, // not needed for stack temps
sp_offset: self.ctx.sp_offset,
reg_temps: None, // push_insn will set this
reg_mapping: None, // push_insn will set this
}
}
/// Get an operand pointing to a local variable
pub fn local_opnd(&self, ep_offset: u32) -> Opnd {
let idx = self.ctx.stack_size as i32 + ep_offset as i32;
Opnd::Stack {
idx,
num_bits: 64,
stack_size: self.ctx.stack_size,
num_locals: Some(self.get_num_locals().unwrap()), // this must exist for locals
sp_offset: self.ctx.sp_offset,
reg_mapping: None, // push_insn will set this
}
}
}
@ -3132,7 +3228,7 @@ pub fn gen_entry_point(iseq: IseqPtr, ec: EcPtr, jit_exception: bool) -> Option<
// Change the entry's jump target from an entry stub to a next entry
pub fn regenerate_entry(cb: &mut CodeBlock, entryref: &EntryRef, next_entry: CodePtr) {
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
asm_comment!(asm, "regenerate_entry");
// gen_entry_guard generates cmp + jne. We're rewriting only jne.
@ -3208,7 +3304,7 @@ fn entry_stub_hit_body(
// Compile a new entry guard as a next entry
let next_entry = cb.get_write_ptr();
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
let pending_entry = gen_entry_chain_guard(&mut asm, ocb, iseq, insn_idx)?;
asm.compile(cb, Some(ocb))?;
@ -3219,7 +3315,7 @@ fn entry_stub_hit_body(
let blockref = match find_block_version(blockid, &ctx) {
// If an existing block is found, generate a jump to the block.
Some(blockref) => {
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
asm.jmp(unsafe { blockref.as_ref() }.start_addr.into());
asm.compile(cb, Some(ocb))?;
Some(blockref)
@ -3247,7 +3343,7 @@ fn entry_stub_hit_body(
pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
asm_comment!(asm, "entry stub hit");
asm.mov(C_ARG_OPNDS[0], entry_address.into());
@ -3263,7 +3359,7 @@ pub fn gen_entry_stub(entry_address: usize, ocb: &mut OutlinedCb) -> Option<Code
/// it's useful for Code GC to call entry_stub_hit from a globally shared code.
pub fn gen_entry_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
// See gen_entry_guard for how it's used.
asm_comment!(asm, "entry_stub_hit() trampoline");
@ -3286,7 +3382,7 @@ fn regenerate_branch(cb: &mut CodeBlock, branch: &Branch) {
let branch_terminates_block = branch.end_addr.get() == block.get_end_addr();
// Generate the branch
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
asm_comment!(asm, "regenerate_branch");
branch.gen_fn.call(
&mut asm,
@ -3597,15 +3693,16 @@ fn delete_empty_defer_block(branch: &Branch, new_block: &Block, target_ctx: Cont
/// A piece of code that redeems for more code; a thunk for code.
fn gen_branch_stub(
ctx: u32,
iseq: IseqPtr,
ocb: &mut OutlinedCb,
branch_struct_address: usize,
target_idx: u32,
) -> Option<CodePtr> {
let ocb = ocb.unwrap();
let mut asm = Assembler::new();
let mut asm = Assembler::new(unsafe { get_iseq_body_local_table_size(iseq) });
asm.ctx = Context::decode(ctx);
asm.set_reg_temps(asm.ctx.reg_temps);
asm.set_reg_mapping(asm.ctx.reg_mapping);
asm_comment!(asm, "branch stub hit");
if asm.ctx.is_return_landing() {
@ -3621,7 +3718,7 @@ fn gen_branch_stub(
}
// Spill temps to the VM stack as well for jit.peek_at_stack()
asm.spill_temps();
asm.spill_regs();
// Set up the arguments unique to this stub for:
//
@ -3641,7 +3738,7 @@ fn gen_branch_stub(
pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> {
let ocb = ocb.unwrap();
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
// For `branch_stub_hit(branch_ptr, target_idx, ec)`,
// `branch_ptr` and `target_idx` is different for each stub,
@ -3678,7 +3775,7 @@ pub fn gen_branch_stub_hit_trampoline(ocb: &mut OutlinedCb) -> Option<CodePtr> {
/// Return registers to be pushed and popped on branch_stub_hit.
pub fn caller_saved_temp_regs() -> impl Iterator<Item = &'static Reg> + DoubleEndedIterator {
let temp_regs = Assembler::get_temp_regs().iter();
let temp_regs = Assembler::get_temp_regs2().iter();
let len = temp_regs.len();
// The return value gen_leave() leaves in C_RET_REG
// needs to survive the branch_stub_hit() call.
@ -3752,12 +3849,11 @@ pub fn gen_branch(
gen_fn: BranchGenFn,
) {
let branch = new_pending_branch(jit, gen_fn);
let ocb = jit.get_ocb();
// Get the branch targets or stubs
let target0_addr = branch.set_target(0, target0, ctx0, ocb);
let target0_addr = branch.set_target(0, target0, ctx0, jit);
let target1_addr = if let Some(ctx) = ctx1 {
let addr = branch.set_target(1, target1.unwrap(), ctx, ocb);
let addr = branch.set_target(1, target1.unwrap(), ctx, jit);
if addr.is_none() {
// target1 requested but we're out of memory.
// Avoid unwrap() in gen_fn()
@ -3832,7 +3928,7 @@ pub fn defer_compilation(
};
// Likely a stub since the context is marked as deferred().
let target0_address = branch.set_target(0, blockid, &next_ctx, jit.get_ocb());
let target0_address = branch.set_target(0, blockid, &next_ctx, jit);
// Pad the block if it has the potential to be invalidated. This must be
// done before gen_fn() in case the jump is overwritten by a fallthrough.
@ -4016,7 +4112,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
let cur_dropped_bytes = cb.has_dropped_bytes();
cb.set_write_ptr(block_start);
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
asm.jmp(block_entry_exit.as_side_exit());
cb.set_dropped_bytes(false);
asm.compile(&mut cb, Some(ocb)).expect("can rewrite existing code");
@ -4053,7 +4149,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
}
// Create a stub for this branch target
let stub_addr = gen_branch_stub(block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32);
let stub_addr = gen_branch_stub(block.ctx, block.iseq.get(), ocb, branchref.as_ptr() as usize, target_idx as u32);
// In case we were unable to generate a stub (e.g. OOM). Use the block's
// exit instead of a stub for the block. It's important that we
@ -4187,7 +4283,7 @@ mod tests {
// and all local types in 32 bits
assert_eq!(mem::size_of::<Type>(), 1);
assert!(Type::BlockParamProxy as usize <= 0b1111);
assert!(MAX_LOCAL_TYPES * 4 <= 32);
assert!(MAX_CTX_LOCALS * 4 <= 32);
}
#[test]
@ -4199,7 +4295,7 @@ mod tests {
fn local_types() {
let mut ctx = Context::default();
for i in 0..MAX_LOCAL_TYPES {
for i in 0..MAX_CTX_LOCALS {
ctx.set_local_type(i, Type::Fixnum);
assert_eq!(ctx.get_local_type(i), Type::Fixnum);
ctx.set_local_type(i, Type::BlockParamProxy);
@ -4251,41 +4347,30 @@ mod tests {
}
#[test]
fn reg_temps() {
let mut reg_temps = RegTemps(0);
fn reg_mapping() {
let mut reg_mapping = RegMapping([None, None, None, None, None]);
// 0 means every slot is not spilled
for stack_idx in 0..MAX_REG_TEMPS {
assert_eq!(reg_temps.get(stack_idx), false);
for stack_idx in 0..MAX_CTX_TEMPS as u8 {
assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(stack_idx)), None);
}
// Set 0, 2, 7 (RegTemps: 10100001)
reg_temps.set(0, true);
reg_temps.set(2, true);
reg_temps.set(3, true);
reg_temps.set(3, false);
reg_temps.set(7, true);
// Set 0, 2, 6 (RegMapping: [Some(0), Some(6), Some(2), None, None])
reg_mapping.alloc_reg(RegOpnd::Stack(0));
reg_mapping.alloc_reg(RegOpnd::Stack(2));
reg_mapping.alloc_reg(RegOpnd::Stack(3));
reg_mapping.dealloc_reg(RegOpnd::Stack(3));
reg_mapping.alloc_reg(RegOpnd::Stack(6));
// Get 0..8
assert_eq!(reg_temps.get(0), true);
assert_eq!(reg_temps.get(1), false);
assert_eq!(reg_temps.get(2), true);
assert_eq!(reg_temps.get(3), false);
assert_eq!(reg_temps.get(4), false);
assert_eq!(reg_temps.get(5), false);
assert_eq!(reg_temps.get(6), false);
assert_eq!(reg_temps.get(7), true);
// Test conflicts
assert_eq!(5, get_option!(num_temp_regs));
assert_eq!(reg_temps.conflicts_with(0), false); // already set, but no conflict
assert_eq!(reg_temps.conflicts_with(1), false);
assert_eq!(reg_temps.conflicts_with(2), true); // already set, and conflicts with 7
assert_eq!(reg_temps.conflicts_with(3), false);
assert_eq!(reg_temps.conflicts_with(4), false);
assert_eq!(reg_temps.conflicts_with(5), true); // not set, and will conflict with 0
assert_eq!(reg_temps.conflicts_with(6), false);
assert_eq!(reg_temps.conflicts_with(7), true); // already set, and conflicts with 2
assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(0)), Some(0));
assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(1)), None);
assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(2)), Some(2));
assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(3)), None);
assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(4)), None);
assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(5)), None);
assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(6)), Some(1));
assert_eq!(reg_mapping.get_reg(RegOpnd::Stack(7)), None);
}
#[test]
@ -4294,7 +4379,7 @@ mod tests {
assert_eq!(Context::default().diff(&Context::default()), TypeDiff::Compatible(0));
// Try pushing an operand and getting its type
let mut asm = Assembler::new();
let mut asm = Assembler::new(0);
asm.stack_push(Type::Fixnum);
let top_type = asm.ctx.get_opnd_type(StackOpnd(0));
assert!(top_type == Type::Fixnum);
@ -4304,7 +4389,7 @@ mod tests {
#[test]
fn context_upgrade_local() {
let mut asm = Assembler::new();
let mut asm = Assembler::new(0);
asm.stack_push_local(0);
asm.ctx.upgrade_opnd_type(StackOpnd(0), Type::Nil);
assert_eq!(Type::Nil, asm.ctx.get_opnd_type(StackOpnd(0)));
@ -4338,7 +4423,7 @@ mod tests {
#[test]
fn shift_stack_for_send() {
let mut asm = Assembler::new();
let mut asm = Assembler::new(0);
// Push values to simulate send(:name, arg) with 6 items already on-stack
for _ in 0..6 {

View file

@ -683,7 +683,7 @@ pub extern "C" fn rb_yjit_tracing_invalidate_all() {
cb.set_write_ptr(patch.inline_patch_pos);
cb.set_dropped_bytes(false);
cb.without_page_end_reserve(|cb| {
let mut asm = crate::backend::ir::Assembler::new();
let mut asm = crate::backend::ir::Assembler::new_without_iseq();
asm.jmp(patch.outlined_target_pos.as_side_exit());
if asm.compile(cb, None).is_none() {
panic!("Failed to apply patch at {:?}", patch.inline_patch_pos);

View file

@ -273,7 +273,7 @@ mod tests {
#[test]
fn test_print_int() {
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
let mut cb = CodeBlock::new_dummy(1024);
print_int(&mut asm, Opnd::Imm(42));
@ -282,7 +282,7 @@ mod tests {
#[test]
fn test_print_str() {
let mut asm = Assembler::new();
let mut asm = Assembler::new_without_iseq();
let mut cb = CodeBlock::new_dummy(1024);
print_str(&mut asm, "Hello, world!");