ZJIT: Profile type+shape distributions (#13901)

ZJIT uses the interpreter to take type profiles of what objects pass through
the code. It stores a compressed record of the history per opcode for the
opcodes we select.

Before this change, we re-used the HIR Type data-structure, a shallow type
lattice, to store historical type information. This was quick for bringup but
is quite lossy as profiles go: we get one bit per built-in type seen, and if we
see a non-built-in type in addition, we end up with BasicObject. Not very
helpful. Additionally, it does not give us any notion of cardinality: how many
of each type did we see?

This change brings with it a much more interesting slice of type history: a
histogram. A Distribution holds a record of the top-N (where N is fixed at Ruby
compile-time) `(Class, ShapeId)` pairs and their counts. It also holds an
*other* count in case we see more than N pairs.

Using this distribution, we can make more informed decisions about when we
should use type information. We can determine if we are strictly monomorphic,
very nearly monomorphic, or something else. Maybe the call-site is polymorphic,
so we should have a polymorphic inline cache. Exciting stuff.

I also plumb this new distribution into the HIR part of the compilation
pipeline.
This commit is contained in:
Max Bernstein 2025-08-05 13:56:04 -07:00 committed by GitHub
parent 52312d53ca
commit ef95e5ba3d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 482 additions and 72 deletions

11
zjit.c
View file

@ -346,6 +346,17 @@ rb_zjit_shape_obj_too_complex_p(VALUE obj)
return rb_shape_obj_too_complex_p(obj);
}
enum {
RB_SPECIAL_CONST_SHAPE_ID = SPECIAL_CONST_SHAPE_ID,
RB_INVALID_SHAPE_ID = INVALID_SHAPE_ID,
};
bool
rb_zjit_singleton_class_p(VALUE klass)
{
return RCLASS_SINGLETON_P(klass);
}
// Primitives used by zjit.rb. Don't put other functions below, which wouldn't use them.
VALUE rb_zjit_assert_compiles(rb_execution_context_t *ec, VALUE self);
VALUE rb_zjit_stats(rb_execution_context_t *ec, VALUE self);

View file

@ -351,8 +351,11 @@ fn main() {
.allowlist_function("rb_optimized_call")
.allowlist_function("rb_zjit_icache_invalidate")
.allowlist_function("rb_zjit_print_exception")
.allowlist_function("rb_zjit_singleton_class_p")
.allowlist_type("robject_offsets")
.allowlist_type("rstring_offsets")
.allowlist_var("RB_SPECIAL_CONST_SHAPE_ID")
.allowlist_var("RB_INVALID_SHAPE_ID")
// From jit.c
.allowlist_function("rb_assert_holding_vm_lock")

View file

@ -265,6 +265,12 @@ pub struct ID(pub ::std::os::raw::c_ulong);
/// Pointer to an ISEQ
pub type IseqPtr = *const rb_iseq_t;
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub struct ShapeId(pub u32);
pub const SPECIAL_CONST_SHAPE_ID: ShapeId = ShapeId(RB_SPECIAL_CONST_SHAPE_ID);
pub const INVALID_SHAPE_ID: ShapeId = ShapeId(RB_INVALID_SHAPE_ID);
// Given an ISEQ pointer, convert PC to insn_idx
pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option<u16> {
let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) };
@ -487,8 +493,8 @@ impl VALUE {
unsafe { rb_zjit_shape_obj_too_complex_p(self) }
}
pub fn shape_id_of(self) -> u32 {
unsafe { rb_obj_shape_id(self) }
pub fn shape_id_of(self) -> ShapeId {
ShapeId(unsafe { rb_obj_shape_id(self) })
}
pub fn embedded_p(self) -> bool {

View file

@ -719,6 +719,9 @@ pub const DEFINED_REF: defined_type = 15;
pub const DEFINED_FUNC: defined_type = 16;
pub const DEFINED_CONST_FROM: defined_type = 17;
pub type defined_type = u32;
pub const RB_SPECIAL_CONST_SHAPE_ID: _bindgen_ty_38 = 33554432;
pub const RB_INVALID_SHAPE_ID: _bindgen_ty_38 = 4294967295;
pub type _bindgen_ty_38 = u32;
pub type rb_iseq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword;
unsafe extern "C" {
pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void);
@ -938,6 +941,7 @@ unsafe extern "C" {
pub fn rb_iseq_set_zjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void);
pub fn rb_zjit_print_exception();
pub fn rb_zjit_shape_obj_too_complex_p(obj: VALUE) -> bool;
pub fn rb_zjit_singleton_class_p(klass: VALUE) -> bool;
pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint;
pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE;
pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int;

266
zjit/src/distribution.rs Normal file
View file

@ -0,0 +1,266 @@
/// This implementation was inspired by the type feedback module from Google's S6, which was
/// written in C++ for use with Python. This is a new implementation in Rust created for use with
/// Ruby instead of Python.
#[derive(Debug, Clone)]
pub struct Distribution<T: Copy + PartialEq + Default, const N: usize> {
/// buckets and counts have the same length
/// buckets[0] is always the most common item
buckets: [T; N],
counts: [usize; N],
/// if there is no more room, increment the fallback
other: usize,
// TODO(max): Add count disparity, which can help determine when to reset the distribution
}
impl<T: Copy + PartialEq + Default, const N: usize> Distribution<T, N> {
pub fn new() -> Self {
Self { buckets: [Default::default(); N], counts: [0; N], other: 0 }
}
pub fn observe(&mut self, item: T) {
for (bucket, count) in self.buckets.iter_mut().zip(self.counts.iter_mut()) {
if *bucket == item || *count == 0 {
*bucket = item;
*count += 1;
// Keep the most frequent item at the front
self.bubble_up();
return;
}
}
self.other += 1;
}
/// Keep the highest counted bucket at index 0
fn bubble_up(&mut self) {
if N == 0 { return; }
let max_index = self.counts.into_iter().enumerate().max_by_key(|(_, val)| *val).unwrap().0;
if max_index != 0 {
self.counts.swap(0, max_index);
self.buckets.swap(0, max_index);
}
}
pub fn each_item(&self) -> impl Iterator<Item = T> + '_ {
self.buckets.iter().zip(self.counts.iter())
.filter_map(|(&bucket, &count)| if count > 0 { Some(bucket) } else { None })
}
pub fn each_item_mut(&mut self) -> impl Iterator<Item = &mut T> + '_ {
self.buckets.iter_mut().zip(self.counts.iter())
.filter_map(|(bucket, &count)| if count > 0 { Some(bucket) } else { None })
}
}
#[derive(PartialEq, Debug, Clone, Copy)]
enum DistributionKind {
/// No types seen
Empty,
/// One type seen
Monomorphic,
/// Between 2 and (fixed) N types seen
Polymorphic,
/// Polymorphic, but with a significant skew towards one type
SkewedPolymorphic,
/// More than N types seen with no clear winner
Megamorphic,
/// Megamorphic, but with a significant skew towards one type
SkewedMegamorphic,
}
#[derive(Debug)]
pub struct DistributionSummary<T: Copy + PartialEq + Default + std::fmt::Debug, const N: usize> {
kind: DistributionKind,
buckets: [T; N],
// TODO(max): Determine if we need some notion of stability
}
const SKEW_THRESHOLD: f64 = 0.75;
impl<T: Copy + PartialEq + Default + std::fmt::Debug, const N: usize> DistributionSummary<T, N> {
pub fn new(dist: &Distribution<T, N>) -> Self {
#[cfg(debug_assertions)]
{
let first_count = dist.counts[0];
for &count in &dist.counts[1..] {
assert!(first_count >= count, "First count should be the largest");
}
}
let num_seen = dist.counts.iter().sum::<usize>() + dist.other;
let kind = if dist.other == 0 {
// Seen <= N types total
if dist.counts[0] == 0 {
DistributionKind::Empty
} else if dist.counts[1] == 0 {
DistributionKind::Monomorphic
} else if (dist.counts[0] as f64)/(num_seen as f64) >= SKEW_THRESHOLD {
DistributionKind::SkewedPolymorphic
} else {
DistributionKind::Polymorphic
}
} else {
// Seen > N types total; considered megamorphic
if (dist.counts[0] as f64)/(num_seen as f64) >= SKEW_THRESHOLD {
DistributionKind::SkewedMegamorphic
} else {
DistributionKind::Megamorphic
}
};
Self { kind, buckets: dist.buckets.clone() }
}
pub fn is_monomorphic(&self) -> bool {
self.kind == DistributionKind::Monomorphic
}
pub fn is_skewed_polymorphic(&self) -> bool {
self.kind == DistributionKind::SkewedPolymorphic
}
pub fn is_skewed_megamorphic(&self) -> bool {
self.kind == DistributionKind::SkewedMegamorphic
}
pub fn bucket(&self, idx: usize) -> T {
assert!(idx < N, "index {idx} out of bounds for buckets[{N}]");
self.buckets[idx]
}
}
#[cfg(test)]
mod distribution_tests {
use super::*;
#[test]
fn start_empty() {
let dist = Distribution::<usize, 4>::new();
assert_eq!(dist.other, 0);
assert!(dist.counts.iter().all(|&b| b == 0));
}
#[test]
fn observe_adds_record() {
let mut dist = Distribution::<usize, 4>::new();
dist.observe(10);
assert_eq!(dist.buckets[0], 10);
assert_eq!(dist.counts[0], 1);
assert_eq!(dist.other, 0);
}
#[test]
fn observe_increments_record() {
let mut dist = Distribution::<usize, 4>::new();
dist.observe(10);
dist.observe(10);
assert_eq!(dist.buckets[0], 10);
assert_eq!(dist.counts[0], 2);
assert_eq!(dist.other, 0);
}
#[test]
fn observe_two() {
let mut dist = Distribution::<usize, 4>::new();
dist.observe(10);
dist.observe(10);
dist.observe(11);
dist.observe(11);
dist.observe(11);
assert_eq!(dist.buckets[0], 11);
assert_eq!(dist.counts[0], 3);
assert_eq!(dist.buckets[1], 10);
assert_eq!(dist.counts[1], 2);
assert_eq!(dist.other, 0);
}
#[test]
fn observe_with_max_increments_other() {
let mut dist = Distribution::<usize, 0>::new();
dist.observe(10);
assert!(dist.buckets.is_empty());
assert!(dist.counts.is_empty());
assert_eq!(dist.other, 1);
}
#[test]
fn empty_distribution_returns_empty_summary() {
let dist = Distribution::<usize, 4>::new();
let summary = DistributionSummary::new(&dist);
assert_eq!(summary.kind, DistributionKind::Empty);
}
#[test]
fn monomorphic_distribution_returns_monomorphic_summary() {
let mut dist = Distribution::<usize, 4>::new();
dist.observe(10);
dist.observe(10);
let summary = DistributionSummary::new(&dist);
assert_eq!(summary.kind, DistributionKind::Monomorphic);
assert_eq!(summary.buckets[0], 10);
}
#[test]
fn polymorphic_distribution_returns_polymorphic_summary() {
let mut dist = Distribution::<usize, 4>::new();
dist.observe(10);
dist.observe(11);
dist.observe(11);
let summary = DistributionSummary::new(&dist);
assert_eq!(summary.kind, DistributionKind::Polymorphic);
assert_eq!(summary.buckets[0], 11);
assert_eq!(summary.buckets[1], 10);
}
#[test]
fn skewed_polymorphic_distribution_returns_skewed_polymorphic_summary() {
let mut dist = Distribution::<usize, 4>::new();
dist.observe(10);
dist.observe(11);
dist.observe(11);
dist.observe(11);
let summary = DistributionSummary::new(&dist);
assert_eq!(summary.kind, DistributionKind::SkewedPolymorphic);
assert_eq!(summary.buckets[0], 11);
assert_eq!(summary.buckets[1], 10);
}
#[test]
fn megamorphic_distribution_returns_megamorphic_summary() {
let mut dist = Distribution::<usize, 4>::new();
dist.observe(10);
dist.observe(11);
dist.observe(12);
dist.observe(13);
dist.observe(14);
dist.observe(11);
let summary = DistributionSummary::new(&dist);
assert_eq!(summary.kind, DistributionKind::Megamorphic);
assert_eq!(summary.buckets[0], 11);
}
#[test]
fn skewed_megamorphic_distribution_returns_skewed_megamorphic_summary() {
let mut dist = Distribution::<usize, 4>::new();
dist.observe(10);
dist.observe(11);
dist.observe(11);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(12);
dist.observe(13);
dist.observe(14);
let summary = DistributionSummary::new(&dist);
assert_eq!(summary.kind, DistributionKind::SkewedMegamorphic);
assert_eq!(summary.buckets[0], 12);
}
}

View file

@ -11,6 +11,7 @@ use std::{
};
use crate::hir_type::{Type, types};
use crate::bitset::BitSet;
use crate::profile::{TypeDistributionSummary, ProfiledType};
/// An index of an [`Insn`] in a [`Function`]. This is a popular
/// type since this effectively acts as a pointer to an [`Insn`].
@ -1357,19 +1358,23 @@ impl Function {
/// Return the interpreter-profiled type of the HIR instruction at the given ISEQ instruction
/// index, if it is known. This historical type record is not a guarantee and must be checked
/// with a GuardType or similar.
fn profiled_type_of_at(&self, insn: InsnId, iseq_insn_idx: usize) -> Option<Type> {
fn profiled_type_of_at(&self, insn: InsnId, iseq_insn_idx: usize) -> Option<ProfiledType> {
let Some(ref profiles) = self.profiles else { return None };
let Some(entries) = profiles.types.get(&iseq_insn_idx) else { return None };
for &(entry_insn, entry_type) in entries {
if self.union_find.borrow().find_const(entry_insn) == self.union_find.borrow().find_const(insn) {
return Some(entry_type);
for (entry_insn, entry_type_summary) in entries {
if self.union_find.borrow().find_const(*entry_insn) == self.union_find.borrow().find_const(insn) {
if entry_type_summary.is_monomorphic() || entry_type_summary.is_skewed_polymorphic() {
return Some(entry_type_summary.bucket(0));
} else {
return None;
}
}
}
None
}
fn likely_is_fixnum(&self, val: InsnId, profiled_type: Type) -> bool {
return self.is_a(val, types::Fixnum) || profiled_type.is_subtype(types::Fixnum);
fn likely_is_fixnum(&self, val: InsnId, profiled_type: ProfiledType) -> bool {
return self.is_a(val, types::Fixnum) || profiled_type.is_fixnum();
}
fn coerce_to_fixnum(&mut self, block: BlockId, val: InsnId, state: InsnId) -> InsnId {
@ -1380,8 +1385,8 @@ impl Function {
fn arguments_likely_fixnums(&mut self, left: InsnId, right: InsnId, state: InsnId) -> bool {
let frame_state = self.frame_state(state);
let iseq_insn_idx = frame_state.insn_idx as usize;
let left_profiled_type = self.profiled_type_of_at(left, iseq_insn_idx).unwrap_or(types::BasicObject);
let right_profiled_type = self.profiled_type_of_at(right, iseq_insn_idx).unwrap_or(types::BasicObject);
let left_profiled_type = self.profiled_type_of_at(left, iseq_insn_idx).unwrap_or(ProfiledType::empty());
let right_profiled_type = self.profiled_type_of_at(right, iseq_insn_idx).unwrap_or(ProfiledType::empty());
self.likely_is_fixnum(left, left_profiled_type) && self.likely_is_fixnum(right, right_profiled_type)
}
@ -1510,15 +1515,16 @@ impl Function {
self.try_rewrite_aref(block, insn_id, self_val, args[0], state),
Insn::SendWithoutBlock { mut self_val, cd, args, state } => {
let frame_state = self.frame_state(state);
let (klass, guard_equal_to) = if let Some(klass) = self.type_of(self_val).runtime_exact_ruby_class() {
let (klass, profiled_type) = if let Some(klass) = self.type_of(self_val).runtime_exact_ruby_class() {
// If we know the class statically, use it to fold the lookup at compile-time.
(klass, None)
} else {
// If we know that self is top-self from profile information, guard and use it to fold the lookup at compile-time.
match self.profiled_type_of_at(self_val, frame_state.insn_idx) {
Some(self_type) if self_type.is_top_self() => (self_type.exact_ruby_class().unwrap(), self_type.ruby_object()),
_ => { self.push_insn_id(block, insn_id); continue; }
}
// If we know that self is reasonably monomorphic from profile information, guard and use it to fold the lookup at compile-time.
// TODO(max): Figure out how to handle top self?
let Some(recv_type) = self.profiled_type_of_at(self_val, frame_state.insn_idx) else {
self.push_insn_id(block, insn_id); continue;
};
(recv_type.class(), Some(recv_type))
};
let ci = unsafe { get_call_data_ci(cd) }; // info about the call site
let mid = unsafe { vm_ci_mid(ci) };
@ -1542,8 +1548,8 @@ impl Function {
self.push_insn_id(block, insn_id); continue;
}
self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state });
if let Some(expected) = guard_equal_to {
self_val = self.push_insn(block, Insn::GuardBitEquals { val: self_val, expected, state });
if let Some(profiled_type) = profiled_type {
self_val = self.push_insn(block, Insn::GuardType { val: self_val, guard_type: Type::from_profiled_type(profiled_type), state });
}
let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { self_val, cd, cme, iseq, args, state });
self.make_equal_to(insn_id, send_direct);
@ -1611,17 +1617,12 @@ impl Function {
let method_id = unsafe { rb_vm_ci_mid(call_info) };
// If we have info about the class of the receiver
//
// TODO(alan): there was a seemingly a miscomp here if you swap with
// `inexact_ruby_class`. Theoretically it can call a method too general
// for the receiver. Confirm and add a test.
let (recv_class, guard_type) = if let Some(klass) = self_type.runtime_exact_ruby_class() {
(klass, None)
let (recv_class, profiled_type) = if let Some(class) = self_type.runtime_exact_ruby_class() {
(class, None)
} else {
let iseq_insn_idx = fun.frame_state(state).insn_idx;
let Some(recv_type) = fun.profiled_type_of_at(self_val, iseq_insn_idx) else { return Err(()) };
let Some(recv_class) = recv_type.runtime_exact_ruby_class() else { return Err(()) };
(recv_class, Some(recv_type.unspecialized()))
(recv_type.class(), Some(recv_type))
};
// Do method lookup
@ -1661,9 +1662,9 @@ impl Function {
if ci_flags & VM_CALL_ARGS_SIMPLE != 0 {
// Commit to the replacement. Put PatchPoint.
fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass: recv_class, method: method_id, cme: method }, state });
if let Some(guard_type) = guard_type {
if let Some(profiled_type) = profiled_type {
// Guard receiver class
self_val = fun.push_insn(block, Insn::GuardType { val: self_val, guard_type, state });
self_val = fun.push_insn(block, Insn::GuardType { val: self_val, guard_type: Type::from_profiled_type(profiled_type), state });
}
let cfun = unsafe { get_mct_func(cfunc) }.cast();
let mut cfunc_args = vec![self_val];
@ -2506,7 +2507,7 @@ struct ProfileOracle {
/// instruction index. At a given ISEQ instruction, the interpreter has profiled the stack
/// operands to a given ISEQ instruction, and this list of pairs of (InsnId, Type) map that
/// profiling information into HIR instructions.
types: HashMap<usize, Vec<(InsnId, Type)>>,
types: HashMap<usize, Vec<(InsnId, TypeDistributionSummary)>>,
}
impl ProfileOracle {
@ -2521,9 +2522,9 @@ impl ProfileOracle {
let entry = self.types.entry(iseq_insn_idx).or_insert_with(|| vec![]);
// operand_types is always going to be <= stack size (otherwise it would have an underflow
// at run-time) so use that to drive iteration.
for (idx, &insn_type) in operand_types.iter().rev().enumerate() {
for (idx, insn_type_distribution) in operand_types.iter().rev().enumerate() {
let insn = state.stack_topn(idx).expect("Unexpected stack underflow in profiling");
entry.push((insn, insn_type))
entry.push((insn, TypeDistributionSummary::new(insn_type_distribution)))
}
}
}
@ -5548,8 +5549,8 @@ mod opt_tests {
fn test@<compiled>:5:
bb0(v0:BasicObject):
PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010)
v6:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038)
v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040)
v6:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)]
v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1038)
Return v7
"#]]);
}
@ -5588,8 +5589,8 @@ mod opt_tests {
fn test@<compiled>:6:
bb0(v0:BasicObject):
PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010)
v6:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038)
v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040)
v6:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)]
v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1038)
Return v7
"#]]);
}
@ -5607,8 +5608,8 @@ mod opt_tests {
bb0(v0:BasicObject):
v2:Fixnum[3] = Const Value(3)
PatchPoint MethodRedefined(Object@0x1000, Integer@0x1008, cme:0x1010)
v7:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038)
v8:BasicObject = SendWithoutBlockDirect v7, :Integer (0x1040), v2
v7:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)]
v8:BasicObject = SendWithoutBlockDirect v7, :Integer (0x1038), v2
Return v8
"#]]);
}
@ -5629,8 +5630,8 @@ mod opt_tests {
v2:Fixnum[1] = Const Value(1)
v3:Fixnum[2] = Const Value(2)
PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010)
v8:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038)
v9:BasicObject = SendWithoutBlockDirect v8, :foo (0x1040), v2, v3
v8:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)]
v9:BasicObject = SendWithoutBlockDirect v8, :foo (0x1038), v2, v3
Return v9
"#]]);
}
@ -5652,11 +5653,11 @@ mod opt_tests {
fn test@<compiled>:7:
bb0(v0:BasicObject):
PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010)
v8:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038)
v9:BasicObject = SendWithoutBlockDirect v8, :foo (0x1040)
PatchPoint MethodRedefined(Object@0x1000, bar@0x1048, cme:0x1050)
v11:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038)
v12:BasicObject = SendWithoutBlockDirect v11, :bar (0x1040)
v8:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)]
v9:BasicObject = SendWithoutBlockDirect v8, :foo (0x1038)
PatchPoint MethodRedefined(Object@0x1000, bar@0x1040, cme:0x1048)
v11:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)]
v12:BasicObject = SendWithoutBlockDirect v11, :bar (0x1038)
Return v12
"#]]);
}
@ -6438,6 +6439,31 @@ mod opt_tests {
"#]]);
}
#[test]
fn test_send_direct_to_instance_method() {
eval("
class C
def foo
3
end
end
def test(c) = c.foo
c = C.new
test c
test c
");
assert_optimized_method_hir("test", expect![[r#"
fn test@<compiled>:8:
bb0(v0:BasicObject, v1:BasicObject):
PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010)
v7:BasicObject[class_exact:C] = GuardType v1, BasicObject[class_exact:C]
v8:BasicObject = SendWithoutBlockDirect v7, :foo (0x1038)
Return v8
"#]]);
}
#[test]
fn dont_specialize_call_to_iseq_with_opt() {
eval("
@ -7385,8 +7411,8 @@ mod opt_tests {
fn test@<compiled>:3:
bb0(v0:BasicObject):
PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010)
v6:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038)
v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040)
v6:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)]
v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1038)
Return v7
"#]]);
}

View file

@ -1,12 +1,13 @@
#![allow(non_upper_case_globals)]
use crate::cruby::{Qfalse, Qnil, Qtrue, VALUE, RUBY_T_ARRAY, RUBY_T_STRING, RUBY_T_HASH, RUBY_T_CLASS, RUBY_T_MODULE};
use crate::cruby::{rb_cInteger, rb_cFloat, rb_cArray, rb_cHash, rb_cString, rb_cSymbol, rb_cObject, rb_cTrueClass, rb_cFalseClass, rb_cNilClass, rb_cRange, rb_cSet, rb_cRegexp, rb_cClass, rb_cModule};
use crate::cruby::{rb_cInteger, rb_cFloat, rb_cArray, rb_cHash, rb_cString, rb_cSymbol, rb_cObject, rb_cTrueClass, rb_cFalseClass, rb_cNilClass, rb_cRange, rb_cSet, rb_cRegexp, rb_cClass, rb_cModule, rb_zjit_singleton_class_p};
use crate::cruby::ClassRelationship;
use crate::cruby::get_class_name;
use crate::cruby::ruby_sym_to_rust_string;
use crate::cruby::rb_mRubyVMFrozenCore;
use crate::cruby::rb_obj_class;
use crate::hir::PtrPrintMap;
use crate::profile::ProfiledType;
#[derive(Copy, Clone, Debug, PartialEq)]
/// Specialization of the type. If we know additional information about the object, we put it here.
@ -74,8 +75,14 @@ fn write_spec(f: &mut std::fmt::Formatter, printer: &TypePrinter) -> std::fmt::R
Specialization::Object(val) if val == unsafe { rb_mRubyVMFrozenCore } => write!(f, "[VMFrozenCore]"),
Specialization::Object(val) if ty.is_subtype(types::Symbol) => write!(f, "[:{}]", ruby_sym_to_rust_string(val)),
Specialization::Object(val) => write!(f, "[{}]", val.print(printer.ptr_map)),
// TODO(max): Ensure singleton classes never have Type specialization
Specialization::Type(val) if unsafe { rb_zjit_singleton_class_p(val) } =>
write!(f, "[class*:{}@{}]", get_class_name(val), val.print(printer.ptr_map)),
Specialization::Type(val) => write!(f, "[class:{}]", get_class_name(val)),
Specialization::TypeExact(val) => write!(f, "[class_exact:{}]", get_class_name(val)),
Specialization::TypeExact(val) if unsafe { rb_zjit_singleton_class_p(val) } =>
write!(f, "[class_exact*:{}@{}]", get_class_name(val), val.print(printer.ptr_map)),
Specialization::TypeExact(val) =>
write!(f, "[class_exact:{}]", get_class_name(val)),
Specialization::Int(val) if ty.is_subtype(types::CBool) => write!(f, "[{}]", val != 0),
Specialization::Int(val) if ty.is_subtype(types::CInt8) => write!(f, "[{}]", (val as i64) >> 56),
Specialization::Int(val) if ty.is_subtype(types::CInt16) => write!(f, "[{}]", (val as i64) >> 48),
@ -231,6 +238,20 @@ impl Type {
}
}
pub fn from_profiled_type(val: ProfiledType) -> Type {
if val.is_fixnum() { types::Fixnum }
else if val.is_flonum() { types::Flonum }
else if val.is_static_symbol() { types::StaticSymbol }
else if val.is_nil() { types::NilClass }
else if val.is_true() { types::TrueClass }
else if val.is_false() { types::FalseClass }
else if val.class() == unsafe { rb_cString } { types::StringExact }
else {
// TODO(max): Add more cases for inferring type bits from built-in types
Type { bits: bits::BasicObject, spec: Specialization::TypeExact(val.class()) }
}
}
/// Private. Only for creating type globals.
const fn from_bits(bits: u64) -> Type {
Type {
@ -274,12 +295,6 @@ impl Type {
self.is_subtype(types::NilClass) || self.is_subtype(types::FalseClass)
}
/// Top self is the Ruby global object, where top-level method definitions go. Return true if
/// this Type has a Ruby object specialization that is the top-level self.
pub fn is_top_self(&self) -> bool {
self.ruby_object() == Some(unsafe { crate::cruby::rb_vm_top_self() })
}
/// Return the object specialization, if any.
pub fn ruby_object(&self) -> Option<VALUE> {
match self.spec {

View file

@ -6,6 +6,7 @@
pub use std;
mod state;
mod distribution;
mod cruby;
mod cruby_methods;
mod hir;

View file

@ -1,7 +1,8 @@
// We use the YARV bytecode constants which have a CRuby-style name
#![allow(non_upper_case_globals)]
use crate::{cruby::*, gc::get_or_create_iseq_payload, hir_type::{types::{Empty}, Type}, options::get_option};
use crate::{cruby::*, gc::get_or_create_iseq_payload, options::get_option};
use crate::distribution::{Distribution, DistributionSummary};
/// Ephemeral state for profiling runtime information
struct Profiler {
@ -79,25 +80,100 @@ fn profile_insn(profiler: &mut Profiler, bare_opcode: ruby_vminsn_type) {
}
}
const DISTRIBUTION_SIZE: usize = 4;
pub type TypeDistribution = Distribution<ProfiledType, DISTRIBUTION_SIZE>;
pub type TypeDistributionSummary = DistributionSummary<ProfiledType, DISTRIBUTION_SIZE>;
/// Profile the Type of top-`n` stack operands
fn profile_operands(profiler: &mut Profiler, profile: &mut IseqProfile, n: usize) {
let types = &mut profile.opnd_types[profiler.insn_idx];
if types.len() <= n {
types.resize(n, Empty);
if types.is_empty() {
types.resize(n, TypeDistribution::new());
}
for i in 0..n {
let opnd_type = Type::from_value(profiler.peek_at_stack((n - i - 1) as isize));
types[i] = types[i].union(opnd_type);
if let Some(object) = types[i].gc_object() {
unsafe { rb_gc_writebarrier(profiler.iseq.into(), object) };
let obj = profiler.peek_at_stack((n - i - 1) as isize);
// TODO(max): Handle GC-hidden classes like Array, Hash, etc and make them look normal or
// drop them or something
let ty = ProfiledType::new(obj.class_of(), obj.shape_id_of());
unsafe { rb_gc_writebarrier(profiler.iseq.into(), ty.class()) };
types[i].observe(ty);
}
}
/// opt_send_without_block/opt_plus/... should store:
/// * the class of the receiver, so we can do method lookup
/// * the shape of the receiver, so we can optimize ivar lookup
/// with those two, pieces of information, we can also determine when an object is an immediate:
/// * Integer + SPECIAL_CONST_SHAPE_ID == Fixnum
/// * Float + SPECIAL_CONST_SHAPE_ID == Flonum
/// * Symbol + SPECIAL_CONST_SHAPE_ID == StaticSymbol
/// * NilClass == Nil
/// * TrueClass == True
/// * FalseClass == False
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ProfiledType {
class: VALUE,
shape: ShapeId,
}
impl Default for ProfiledType {
fn default() -> Self {
Self::empty()
}
}
impl ProfiledType {
fn new(class: VALUE, shape: ShapeId) -> Self {
Self { class, shape }
}
pub fn empty() -> Self {
Self { class: VALUE(0), shape: INVALID_SHAPE_ID }
}
pub fn is_empty(&self) -> bool {
self.class == VALUE(0)
}
pub fn class(&self) -> VALUE {
self.class
}
pub fn shape(&self) -> ShapeId {
self.shape
}
pub fn is_fixnum(&self) -> bool {
self.class == unsafe { rb_cInteger } && self.shape == SPECIAL_CONST_SHAPE_ID
}
pub fn is_flonum(&self) -> bool {
self.class == unsafe { rb_cFloat } && self.shape == SPECIAL_CONST_SHAPE_ID
}
pub fn is_static_symbol(&self) -> bool {
self.class == unsafe { rb_cSymbol } && self.shape == SPECIAL_CONST_SHAPE_ID
}
pub fn is_nil(&self) -> bool {
self.class == unsafe { rb_cNilClass } && self.shape == SPECIAL_CONST_SHAPE_ID
}
pub fn is_true(&self) -> bool {
self.class == unsafe { rb_cTrueClass } && self.shape == SPECIAL_CONST_SHAPE_ID
}
pub fn is_false(&self) -> bool {
self.class == unsafe { rb_cFalseClass } && self.shape == SPECIAL_CONST_SHAPE_ID
}
}
#[derive(Debug)]
pub struct IseqProfile {
/// Type information of YARV instruction operands, indexed by the instruction index
opnd_types: Vec<Vec<Type>>,
opnd_types: Vec<Vec<TypeDistribution>>,
/// Number of profiled executions for each YARV instruction, indexed by the instruction index
num_profiles: Vec<u8>,
@ -112,16 +188,17 @@ impl IseqProfile {
}
/// Get profiled operand types for a given instruction index
pub fn get_operand_types(&self, insn_idx: usize) -> Option<&[Type]> {
pub fn get_operand_types(&self, insn_idx: usize) -> Option<&[TypeDistribution]> {
self.opnd_types.get(insn_idx).map(|v| &**v)
}
/// Run a given callback with every object in IseqProfile
pub fn each_object(&self, callback: impl Fn(VALUE)) {
for types in &self.opnd_types {
for opnd_type in types {
if let Some(object) = opnd_type.gc_object() {
callback(object);
for operands in &self.opnd_types {
for distribution in operands {
for profiled_type in distribution.each_item() {
// If the type is a GC object, call the callback
callback(profiled_type.class);
}
}
}
@ -129,10 +206,11 @@ impl IseqProfile {
/// Run a given callback with a mutable reference to every object in IseqProfile
pub fn each_object_mut(&mut self, callback: impl Fn(&mut VALUE)) {
for types in self.opnd_types.iter_mut() {
for opnd_type in types.iter_mut() {
if let Some(object) = opnd_type.gc_object_mut() {
callback(object);
for operands in &mut self.opnd_types {
for distribution in operands {
for ref mut profiled_type in distribution.each_item_mut() {
// If the type is a GC object, call the callback
callback(&mut profiled_type.class);
}
}
}