From ef95e5ba3de65d42fe0e1d41519dcf05db11a4e8 Mon Sep 17 00:00:00 2001 From: Max Bernstein Date: Tue, 5 Aug 2025 13:56:04 -0700 Subject: [PATCH] ZJIT: Profile type+shape distributions (#13901) ZJIT uses the interpreter to take type profiles of what objects pass through the code. It stores a compressed record of the history per opcode for the opcodes we select. Before this change, we re-used the HIR Type data-structure, a shallow type lattice, to store historical type information. This was quick for bringup but is quite lossy as profiles go: we get one bit per built-in type seen, and if we see a non-built-in type in addition, we end up with BasicObject. Not very helpful. Additionally, it does not give us any notion of cardinality: how many of each type did we see? This change brings with it a much more interesting slice of type history: a histogram. A Distribution holds a record of the top-N (where N is fixed at Ruby compile-time) `(Class, ShapeId)` pairs and their counts. It also holds an *other* count in case we see more than N pairs. Using this distribution, we can make more informed decisions about when we should use type information. We can determine if we are strictly monomorphic, very nearly monomorphic, or something else. Maybe the call-site is polymorphic, so we should have a polymorphic inline cache. Exciting stuff. I also plumb this new distribution into the HIR part of the compilation pipeline. --- zjit.c | 11 ++ zjit/bindgen/src/main.rs | 3 + zjit/src/cruby.rs | 10 +- zjit/src/cruby_bindings.inc.rs | 4 + zjit/src/distribution.rs | 266 +++++++++++++++++++++++++++++++++ zjit/src/hir.rs | 114 ++++++++------ zjit/src/hir_type/mod.rs | 31 +++- zjit/src/lib.rs | 1 + zjit/src/profile.rs | 114 +++++++++++--- 9 files changed, 482 insertions(+), 72 deletions(-) create mode 100644 zjit/src/distribution.rs diff --git a/zjit.c b/zjit.c index abe7422540..09ab128ae3 100644 --- a/zjit.c +++ b/zjit.c @@ -346,6 +346,17 @@ rb_zjit_shape_obj_too_complex_p(VALUE obj) return rb_shape_obj_too_complex_p(obj); } +enum { + RB_SPECIAL_CONST_SHAPE_ID = SPECIAL_CONST_SHAPE_ID, + RB_INVALID_SHAPE_ID = INVALID_SHAPE_ID, +}; + +bool +rb_zjit_singleton_class_p(VALUE klass) +{ + return RCLASS_SINGLETON_P(klass); +} + // Primitives used by zjit.rb. Don't put other functions below, which wouldn't use them. VALUE rb_zjit_assert_compiles(rb_execution_context_t *ec, VALUE self); VALUE rb_zjit_stats(rb_execution_context_t *ec, VALUE self); diff --git a/zjit/bindgen/src/main.rs b/zjit/bindgen/src/main.rs index f67d8e91d3..77299c2657 100644 --- a/zjit/bindgen/src/main.rs +++ b/zjit/bindgen/src/main.rs @@ -351,8 +351,11 @@ fn main() { .allowlist_function("rb_optimized_call") .allowlist_function("rb_zjit_icache_invalidate") .allowlist_function("rb_zjit_print_exception") + .allowlist_function("rb_zjit_singleton_class_p") .allowlist_type("robject_offsets") .allowlist_type("rstring_offsets") + .allowlist_var("RB_SPECIAL_CONST_SHAPE_ID") + .allowlist_var("RB_INVALID_SHAPE_ID") // From jit.c .allowlist_function("rb_assert_holding_vm_lock") diff --git a/zjit/src/cruby.rs b/zjit/src/cruby.rs index afa3ddfb49..095a2988f8 100644 --- a/zjit/src/cruby.rs +++ b/zjit/src/cruby.rs @@ -265,6 +265,12 @@ pub struct ID(pub ::std::os::raw::c_ulong); /// Pointer to an ISEQ pub type IseqPtr = *const rb_iseq_t; +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub struct ShapeId(pub u32); + +pub const SPECIAL_CONST_SHAPE_ID: ShapeId = ShapeId(RB_SPECIAL_CONST_SHAPE_ID); +pub const INVALID_SHAPE_ID: ShapeId = ShapeId(RB_INVALID_SHAPE_ID); + // Given an ISEQ pointer, convert PC to insn_idx pub fn iseq_pc_to_insn_idx(iseq: IseqPtr, pc: *mut VALUE) -> Option { let pc_zero = unsafe { rb_iseq_pc_at_idx(iseq, 0) }; @@ -487,8 +493,8 @@ impl VALUE { unsafe { rb_zjit_shape_obj_too_complex_p(self) } } - pub fn shape_id_of(self) -> u32 { - unsafe { rb_obj_shape_id(self) } + pub fn shape_id_of(self) -> ShapeId { + ShapeId(unsafe { rb_obj_shape_id(self) }) } pub fn embedded_p(self) -> bool { diff --git a/zjit/src/cruby_bindings.inc.rs b/zjit/src/cruby_bindings.inc.rs index 7fe1a0406a..5c939fabe7 100644 --- a/zjit/src/cruby_bindings.inc.rs +++ b/zjit/src/cruby_bindings.inc.rs @@ -719,6 +719,9 @@ pub const DEFINED_REF: defined_type = 15; pub const DEFINED_FUNC: defined_type = 16; pub const DEFINED_CONST_FROM: defined_type = 17; pub type defined_type = u32; +pub const RB_SPECIAL_CONST_SHAPE_ID: _bindgen_ty_38 = 33554432; +pub const RB_INVALID_SHAPE_ID: _bindgen_ty_38 = 4294967295; +pub type _bindgen_ty_38 = u32; pub type rb_iseq_param_keyword_struct = rb_iseq_constant_body__bindgen_ty_1_rb_iseq_param_keyword; unsafe extern "C" { pub fn ruby_xfree(ptr: *mut ::std::os::raw::c_void); @@ -938,6 +941,7 @@ unsafe extern "C" { pub fn rb_iseq_set_zjit_payload(iseq: *const rb_iseq_t, payload: *mut ::std::os::raw::c_void); pub fn rb_zjit_print_exception(); pub fn rb_zjit_shape_obj_too_complex_p(obj: VALUE) -> bool; + pub fn rb_zjit_singleton_class_p(klass: VALUE) -> bool; pub fn rb_iseq_encoded_size(iseq: *const rb_iseq_t) -> ::std::os::raw::c_uint; pub fn rb_iseq_pc_at_idx(iseq: *const rb_iseq_t, insn_idx: u32) -> *mut VALUE; pub fn rb_iseq_opcode_at_pc(iseq: *const rb_iseq_t, pc: *const VALUE) -> ::std::os::raw::c_int; diff --git a/zjit/src/distribution.rs b/zjit/src/distribution.rs new file mode 100644 index 0000000000..5927ffa5c9 --- /dev/null +++ b/zjit/src/distribution.rs @@ -0,0 +1,266 @@ +/// This implementation was inspired by the type feedback module from Google's S6, which was +/// written in C++ for use with Python. This is a new implementation in Rust created for use with +/// Ruby instead of Python. +#[derive(Debug, Clone)] +pub struct Distribution { + /// buckets and counts have the same length + /// buckets[0] is always the most common item + buckets: [T; N], + counts: [usize; N], + /// if there is no more room, increment the fallback + other: usize, + // TODO(max): Add count disparity, which can help determine when to reset the distribution +} + +impl Distribution { + pub fn new() -> Self { + Self { buckets: [Default::default(); N], counts: [0; N], other: 0 } + } + + pub fn observe(&mut self, item: T) { + for (bucket, count) in self.buckets.iter_mut().zip(self.counts.iter_mut()) { + if *bucket == item || *count == 0 { + *bucket = item; + *count += 1; + // Keep the most frequent item at the front + self.bubble_up(); + return; + } + } + self.other += 1; + } + + /// Keep the highest counted bucket at index 0 + fn bubble_up(&mut self) { + if N == 0 { return; } + let max_index = self.counts.into_iter().enumerate().max_by_key(|(_, val)| *val).unwrap().0; + if max_index != 0 { + self.counts.swap(0, max_index); + self.buckets.swap(0, max_index); + } + } + + pub fn each_item(&self) -> impl Iterator + '_ { + self.buckets.iter().zip(self.counts.iter()) + .filter_map(|(&bucket, &count)| if count > 0 { Some(bucket) } else { None }) + } + + pub fn each_item_mut(&mut self) -> impl Iterator + '_ { + self.buckets.iter_mut().zip(self.counts.iter()) + .filter_map(|(bucket, &count)| if count > 0 { Some(bucket) } else { None }) + } +} + +#[derive(PartialEq, Debug, Clone, Copy)] +enum DistributionKind { + /// No types seen + Empty, + /// One type seen + Monomorphic, + /// Between 2 and (fixed) N types seen + Polymorphic, + /// Polymorphic, but with a significant skew towards one type + SkewedPolymorphic, + /// More than N types seen with no clear winner + Megamorphic, + /// Megamorphic, but with a significant skew towards one type + SkewedMegamorphic, +} + +#[derive(Debug)] +pub struct DistributionSummary { + kind: DistributionKind, + buckets: [T; N], + // TODO(max): Determine if we need some notion of stability +} + +const SKEW_THRESHOLD: f64 = 0.75; + +impl DistributionSummary { + pub fn new(dist: &Distribution) -> Self { + #[cfg(debug_assertions)] + { + let first_count = dist.counts[0]; + for &count in &dist.counts[1..] { + assert!(first_count >= count, "First count should be the largest"); + } + } + let num_seen = dist.counts.iter().sum::() + dist.other; + let kind = if dist.other == 0 { + // Seen <= N types total + if dist.counts[0] == 0 { + DistributionKind::Empty + } else if dist.counts[1] == 0 { + DistributionKind::Monomorphic + } else if (dist.counts[0] as f64)/(num_seen as f64) >= SKEW_THRESHOLD { + DistributionKind::SkewedPolymorphic + } else { + DistributionKind::Polymorphic + } + } else { + // Seen > N types total; considered megamorphic + if (dist.counts[0] as f64)/(num_seen as f64) >= SKEW_THRESHOLD { + DistributionKind::SkewedMegamorphic + } else { + DistributionKind::Megamorphic + } + }; + Self { kind, buckets: dist.buckets.clone() } + } + + pub fn is_monomorphic(&self) -> bool { + self.kind == DistributionKind::Monomorphic + } + + pub fn is_skewed_polymorphic(&self) -> bool { + self.kind == DistributionKind::SkewedPolymorphic + } + + pub fn is_skewed_megamorphic(&self) -> bool { + self.kind == DistributionKind::SkewedMegamorphic + } + + pub fn bucket(&self, idx: usize) -> T { + assert!(idx < N, "index {idx} out of bounds for buckets[{N}]"); + self.buckets[idx] + } +} + +#[cfg(test)] +mod distribution_tests { + use super::*; + + #[test] + fn start_empty() { + let dist = Distribution::::new(); + assert_eq!(dist.other, 0); + assert!(dist.counts.iter().all(|&b| b == 0)); + } + + #[test] + fn observe_adds_record() { + let mut dist = Distribution::::new(); + dist.observe(10); + assert_eq!(dist.buckets[0], 10); + assert_eq!(dist.counts[0], 1); + assert_eq!(dist.other, 0); + } + + #[test] + fn observe_increments_record() { + let mut dist = Distribution::::new(); + dist.observe(10); + dist.observe(10); + assert_eq!(dist.buckets[0], 10); + assert_eq!(dist.counts[0], 2); + assert_eq!(dist.other, 0); + } + + #[test] + fn observe_two() { + let mut dist = Distribution::::new(); + dist.observe(10); + dist.observe(10); + dist.observe(11); + dist.observe(11); + dist.observe(11); + assert_eq!(dist.buckets[0], 11); + assert_eq!(dist.counts[0], 3); + assert_eq!(dist.buckets[1], 10); + assert_eq!(dist.counts[1], 2); + assert_eq!(dist.other, 0); + } + + #[test] + fn observe_with_max_increments_other() { + let mut dist = Distribution::::new(); + dist.observe(10); + assert!(dist.buckets.is_empty()); + assert!(dist.counts.is_empty()); + assert_eq!(dist.other, 1); + } + + #[test] + fn empty_distribution_returns_empty_summary() { + let dist = Distribution::::new(); + let summary = DistributionSummary::new(&dist); + assert_eq!(summary.kind, DistributionKind::Empty); + } + + #[test] + fn monomorphic_distribution_returns_monomorphic_summary() { + let mut dist = Distribution::::new(); + dist.observe(10); + dist.observe(10); + let summary = DistributionSummary::new(&dist); + assert_eq!(summary.kind, DistributionKind::Monomorphic); + assert_eq!(summary.buckets[0], 10); + } + + #[test] + fn polymorphic_distribution_returns_polymorphic_summary() { + let mut dist = Distribution::::new(); + dist.observe(10); + dist.observe(11); + dist.observe(11); + let summary = DistributionSummary::new(&dist); + assert_eq!(summary.kind, DistributionKind::Polymorphic); + assert_eq!(summary.buckets[0], 11); + assert_eq!(summary.buckets[1], 10); + } + + #[test] + fn skewed_polymorphic_distribution_returns_skewed_polymorphic_summary() { + let mut dist = Distribution::::new(); + dist.observe(10); + dist.observe(11); + dist.observe(11); + dist.observe(11); + let summary = DistributionSummary::new(&dist); + assert_eq!(summary.kind, DistributionKind::SkewedPolymorphic); + assert_eq!(summary.buckets[0], 11); + assert_eq!(summary.buckets[1], 10); + } + + #[test] + fn megamorphic_distribution_returns_megamorphic_summary() { + let mut dist = Distribution::::new(); + dist.observe(10); + dist.observe(11); + dist.observe(12); + dist.observe(13); + dist.observe(14); + dist.observe(11); + let summary = DistributionSummary::new(&dist); + assert_eq!(summary.kind, DistributionKind::Megamorphic); + assert_eq!(summary.buckets[0], 11); + } + + #[test] + fn skewed_megamorphic_distribution_returns_skewed_megamorphic_summary() { + let mut dist = Distribution::::new(); + dist.observe(10); + dist.observe(11); + dist.observe(11); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(12); + dist.observe(13); + dist.observe(14); + let summary = DistributionSummary::new(&dist); + assert_eq!(summary.kind, DistributionKind::SkewedMegamorphic); + assert_eq!(summary.buckets[0], 12); + } +} diff --git a/zjit/src/hir.rs b/zjit/src/hir.rs index 976580c85b..203be0661e 100644 --- a/zjit/src/hir.rs +++ b/zjit/src/hir.rs @@ -11,6 +11,7 @@ use std::{ }; use crate::hir_type::{Type, types}; use crate::bitset::BitSet; +use crate::profile::{TypeDistributionSummary, ProfiledType}; /// An index of an [`Insn`] in a [`Function`]. This is a popular /// type since this effectively acts as a pointer to an [`Insn`]. @@ -1357,19 +1358,23 @@ impl Function { /// Return the interpreter-profiled type of the HIR instruction at the given ISEQ instruction /// index, if it is known. This historical type record is not a guarantee and must be checked /// with a GuardType or similar. - fn profiled_type_of_at(&self, insn: InsnId, iseq_insn_idx: usize) -> Option { + fn profiled_type_of_at(&self, insn: InsnId, iseq_insn_idx: usize) -> Option { let Some(ref profiles) = self.profiles else { return None }; let Some(entries) = profiles.types.get(&iseq_insn_idx) else { return None }; - for &(entry_insn, entry_type) in entries { - if self.union_find.borrow().find_const(entry_insn) == self.union_find.borrow().find_const(insn) { - return Some(entry_type); + for (entry_insn, entry_type_summary) in entries { + if self.union_find.borrow().find_const(*entry_insn) == self.union_find.borrow().find_const(insn) { + if entry_type_summary.is_monomorphic() || entry_type_summary.is_skewed_polymorphic() { + return Some(entry_type_summary.bucket(0)); + } else { + return None; + } } } None } - fn likely_is_fixnum(&self, val: InsnId, profiled_type: Type) -> bool { - return self.is_a(val, types::Fixnum) || profiled_type.is_subtype(types::Fixnum); + fn likely_is_fixnum(&self, val: InsnId, profiled_type: ProfiledType) -> bool { + return self.is_a(val, types::Fixnum) || profiled_type.is_fixnum(); } fn coerce_to_fixnum(&mut self, block: BlockId, val: InsnId, state: InsnId) -> InsnId { @@ -1380,8 +1385,8 @@ impl Function { fn arguments_likely_fixnums(&mut self, left: InsnId, right: InsnId, state: InsnId) -> bool { let frame_state = self.frame_state(state); let iseq_insn_idx = frame_state.insn_idx as usize; - let left_profiled_type = self.profiled_type_of_at(left, iseq_insn_idx).unwrap_or(types::BasicObject); - let right_profiled_type = self.profiled_type_of_at(right, iseq_insn_idx).unwrap_or(types::BasicObject); + let left_profiled_type = self.profiled_type_of_at(left, iseq_insn_idx).unwrap_or(ProfiledType::empty()); + let right_profiled_type = self.profiled_type_of_at(right, iseq_insn_idx).unwrap_or(ProfiledType::empty()); self.likely_is_fixnum(left, left_profiled_type) && self.likely_is_fixnum(right, right_profiled_type) } @@ -1510,15 +1515,16 @@ impl Function { self.try_rewrite_aref(block, insn_id, self_val, args[0], state), Insn::SendWithoutBlock { mut self_val, cd, args, state } => { let frame_state = self.frame_state(state); - let (klass, guard_equal_to) = if let Some(klass) = self.type_of(self_val).runtime_exact_ruby_class() { + let (klass, profiled_type) = if let Some(klass) = self.type_of(self_val).runtime_exact_ruby_class() { // If we know the class statically, use it to fold the lookup at compile-time. (klass, None) } else { - // If we know that self is top-self from profile information, guard and use it to fold the lookup at compile-time. - match self.profiled_type_of_at(self_val, frame_state.insn_idx) { - Some(self_type) if self_type.is_top_self() => (self_type.exact_ruby_class().unwrap(), self_type.ruby_object()), - _ => { self.push_insn_id(block, insn_id); continue; } - } + // If we know that self is reasonably monomorphic from profile information, guard and use it to fold the lookup at compile-time. + // TODO(max): Figure out how to handle top self? + let Some(recv_type) = self.profiled_type_of_at(self_val, frame_state.insn_idx) else { + self.push_insn_id(block, insn_id); continue; + }; + (recv_type.class(), Some(recv_type)) }; let ci = unsafe { get_call_data_ci(cd) }; // info about the call site let mid = unsafe { vm_ci_mid(ci) }; @@ -1542,8 +1548,8 @@ impl Function { self.push_insn_id(block, insn_id); continue; } self.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass, method: mid, cme }, state }); - if let Some(expected) = guard_equal_to { - self_val = self.push_insn(block, Insn::GuardBitEquals { val: self_val, expected, state }); + if let Some(profiled_type) = profiled_type { + self_val = self.push_insn(block, Insn::GuardType { val: self_val, guard_type: Type::from_profiled_type(profiled_type), state }); } let send_direct = self.push_insn(block, Insn::SendWithoutBlockDirect { self_val, cd, cme, iseq, args, state }); self.make_equal_to(insn_id, send_direct); @@ -1611,17 +1617,12 @@ impl Function { let method_id = unsafe { rb_vm_ci_mid(call_info) }; // If we have info about the class of the receiver - // - // TODO(alan): there was a seemingly a miscomp here if you swap with - // `inexact_ruby_class`. Theoretically it can call a method too general - // for the receiver. Confirm and add a test. - let (recv_class, guard_type) = if let Some(klass) = self_type.runtime_exact_ruby_class() { - (klass, None) + let (recv_class, profiled_type) = if let Some(class) = self_type.runtime_exact_ruby_class() { + (class, None) } else { let iseq_insn_idx = fun.frame_state(state).insn_idx; let Some(recv_type) = fun.profiled_type_of_at(self_val, iseq_insn_idx) else { return Err(()) }; - let Some(recv_class) = recv_type.runtime_exact_ruby_class() else { return Err(()) }; - (recv_class, Some(recv_type.unspecialized())) + (recv_type.class(), Some(recv_type)) }; // Do method lookup @@ -1661,9 +1662,9 @@ impl Function { if ci_flags & VM_CALL_ARGS_SIMPLE != 0 { // Commit to the replacement. Put PatchPoint. fun.push_insn(block, Insn::PatchPoint { invariant: Invariant::MethodRedefined { klass: recv_class, method: method_id, cme: method }, state }); - if let Some(guard_type) = guard_type { + if let Some(profiled_type) = profiled_type { // Guard receiver class - self_val = fun.push_insn(block, Insn::GuardType { val: self_val, guard_type, state }); + self_val = fun.push_insn(block, Insn::GuardType { val: self_val, guard_type: Type::from_profiled_type(profiled_type), state }); } let cfun = unsafe { get_mct_func(cfunc) }.cast(); let mut cfunc_args = vec![self_val]; @@ -2506,7 +2507,7 @@ struct ProfileOracle { /// instruction index. At a given ISEQ instruction, the interpreter has profiled the stack /// operands to a given ISEQ instruction, and this list of pairs of (InsnId, Type) map that /// profiling information into HIR instructions. - types: HashMap>, + types: HashMap>, } impl ProfileOracle { @@ -2521,9 +2522,9 @@ impl ProfileOracle { let entry = self.types.entry(iseq_insn_idx).or_insert_with(|| vec![]); // operand_types is always going to be <= stack size (otherwise it would have an underflow // at run-time) so use that to drive iteration. - for (idx, &insn_type) in operand_types.iter().rev().enumerate() { + for (idx, insn_type_distribution) in operand_types.iter().rev().enumerate() { let insn = state.stack_topn(idx).expect("Unexpected stack underflow in profiling"); - entry.push((insn, insn_type)) + entry.push((insn, TypeDistributionSummary::new(insn_type_distribution))) } } } @@ -5548,8 +5549,8 @@ mod opt_tests { fn test@:5: bb0(v0:BasicObject): PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) - v6:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038) - v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040) + v6:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)] + v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1038) Return v7 "#]]); } @@ -5588,8 +5589,8 @@ mod opt_tests { fn test@:6: bb0(v0:BasicObject): PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) - v6:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038) - v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040) + v6:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)] + v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1038) Return v7 "#]]); } @@ -5607,8 +5608,8 @@ mod opt_tests { bb0(v0:BasicObject): v2:Fixnum[3] = Const Value(3) PatchPoint MethodRedefined(Object@0x1000, Integer@0x1008, cme:0x1010) - v7:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038) - v8:BasicObject = SendWithoutBlockDirect v7, :Integer (0x1040), v2 + v7:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)] + v8:BasicObject = SendWithoutBlockDirect v7, :Integer (0x1038), v2 Return v8 "#]]); } @@ -5629,8 +5630,8 @@ mod opt_tests { v2:Fixnum[1] = Const Value(1) v3:Fixnum[2] = Const Value(2) PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) - v8:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038) - v9:BasicObject = SendWithoutBlockDirect v8, :foo (0x1040), v2, v3 + v8:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)] + v9:BasicObject = SendWithoutBlockDirect v8, :foo (0x1038), v2, v3 Return v9 "#]]); } @@ -5652,11 +5653,11 @@ mod opt_tests { fn test@:7: bb0(v0:BasicObject): PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) - v8:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038) - v9:BasicObject = SendWithoutBlockDirect v8, :foo (0x1040) - PatchPoint MethodRedefined(Object@0x1000, bar@0x1048, cme:0x1050) - v11:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038) - v12:BasicObject = SendWithoutBlockDirect v11, :bar (0x1040) + v8:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)] + v9:BasicObject = SendWithoutBlockDirect v8, :foo (0x1038) + PatchPoint MethodRedefined(Object@0x1000, bar@0x1040, cme:0x1048) + v11:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)] + v12:BasicObject = SendWithoutBlockDirect v11, :bar (0x1038) Return v12 "#]]); } @@ -6438,6 +6439,31 @@ mod opt_tests { "#]]); } + #[test] + fn test_send_direct_to_instance_method() { + eval(" + class C + def foo + 3 + end + end + + def test(c) = c.foo + c = C.new + test c + test c + "); + + assert_optimized_method_hir("test", expect![[r#" + fn test@:8: + bb0(v0:BasicObject, v1:BasicObject): + PatchPoint MethodRedefined(C@0x1000, foo@0x1008, cme:0x1010) + v7:BasicObject[class_exact:C] = GuardType v1, BasicObject[class_exact:C] + v8:BasicObject = SendWithoutBlockDirect v7, :foo (0x1038) + Return v8 + "#]]); + } + #[test] fn dont_specialize_call_to_iseq_with_opt() { eval(" @@ -7385,8 +7411,8 @@ mod opt_tests { fn test@:3: bb0(v0:BasicObject): PatchPoint MethodRedefined(Object@0x1000, foo@0x1008, cme:0x1010) - v6:BasicObject[VALUE(0x1038)] = GuardBitEquals v0, VALUE(0x1038) - v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1040) + v6:BasicObject[class_exact*:Object@VALUE(0x1000)] = GuardType v0, BasicObject[class_exact*:Object@VALUE(0x1000)] + v7:BasicObject = SendWithoutBlockDirect v6, :foo (0x1038) Return v7 "#]]); } diff --git a/zjit/src/hir_type/mod.rs b/zjit/src/hir_type/mod.rs index 9ad0bdc649..84679c419d 100644 --- a/zjit/src/hir_type/mod.rs +++ b/zjit/src/hir_type/mod.rs @@ -1,12 +1,13 @@ #![allow(non_upper_case_globals)] use crate::cruby::{Qfalse, Qnil, Qtrue, VALUE, RUBY_T_ARRAY, RUBY_T_STRING, RUBY_T_HASH, RUBY_T_CLASS, RUBY_T_MODULE}; -use crate::cruby::{rb_cInteger, rb_cFloat, rb_cArray, rb_cHash, rb_cString, rb_cSymbol, rb_cObject, rb_cTrueClass, rb_cFalseClass, rb_cNilClass, rb_cRange, rb_cSet, rb_cRegexp, rb_cClass, rb_cModule}; +use crate::cruby::{rb_cInteger, rb_cFloat, rb_cArray, rb_cHash, rb_cString, rb_cSymbol, rb_cObject, rb_cTrueClass, rb_cFalseClass, rb_cNilClass, rb_cRange, rb_cSet, rb_cRegexp, rb_cClass, rb_cModule, rb_zjit_singleton_class_p}; use crate::cruby::ClassRelationship; use crate::cruby::get_class_name; use crate::cruby::ruby_sym_to_rust_string; use crate::cruby::rb_mRubyVMFrozenCore; use crate::cruby::rb_obj_class; use crate::hir::PtrPrintMap; +use crate::profile::ProfiledType; #[derive(Copy, Clone, Debug, PartialEq)] /// Specialization of the type. If we know additional information about the object, we put it here. @@ -74,8 +75,14 @@ fn write_spec(f: &mut std::fmt::Formatter, printer: &TypePrinter) -> std::fmt::R Specialization::Object(val) if val == unsafe { rb_mRubyVMFrozenCore } => write!(f, "[VMFrozenCore]"), Specialization::Object(val) if ty.is_subtype(types::Symbol) => write!(f, "[:{}]", ruby_sym_to_rust_string(val)), Specialization::Object(val) => write!(f, "[{}]", val.print(printer.ptr_map)), + // TODO(max): Ensure singleton classes never have Type specialization + Specialization::Type(val) if unsafe { rb_zjit_singleton_class_p(val) } => + write!(f, "[class*:{}@{}]", get_class_name(val), val.print(printer.ptr_map)), Specialization::Type(val) => write!(f, "[class:{}]", get_class_name(val)), - Specialization::TypeExact(val) => write!(f, "[class_exact:{}]", get_class_name(val)), + Specialization::TypeExact(val) if unsafe { rb_zjit_singleton_class_p(val) } => + write!(f, "[class_exact*:{}@{}]", get_class_name(val), val.print(printer.ptr_map)), + Specialization::TypeExact(val) => + write!(f, "[class_exact:{}]", get_class_name(val)), Specialization::Int(val) if ty.is_subtype(types::CBool) => write!(f, "[{}]", val != 0), Specialization::Int(val) if ty.is_subtype(types::CInt8) => write!(f, "[{}]", (val as i64) >> 56), Specialization::Int(val) if ty.is_subtype(types::CInt16) => write!(f, "[{}]", (val as i64) >> 48), @@ -231,6 +238,20 @@ impl Type { } } + pub fn from_profiled_type(val: ProfiledType) -> Type { + if val.is_fixnum() { types::Fixnum } + else if val.is_flonum() { types::Flonum } + else if val.is_static_symbol() { types::StaticSymbol } + else if val.is_nil() { types::NilClass } + else if val.is_true() { types::TrueClass } + else if val.is_false() { types::FalseClass } + else if val.class() == unsafe { rb_cString } { types::StringExact } + else { + // TODO(max): Add more cases for inferring type bits from built-in types + Type { bits: bits::BasicObject, spec: Specialization::TypeExact(val.class()) } + } + } + /// Private. Only for creating type globals. const fn from_bits(bits: u64) -> Type { Type { @@ -274,12 +295,6 @@ impl Type { self.is_subtype(types::NilClass) || self.is_subtype(types::FalseClass) } - /// Top self is the Ruby global object, where top-level method definitions go. Return true if - /// this Type has a Ruby object specialization that is the top-level self. - pub fn is_top_self(&self) -> bool { - self.ruby_object() == Some(unsafe { crate::cruby::rb_vm_top_self() }) - } - /// Return the object specialization, if any. pub fn ruby_object(&self) -> Option { match self.spec { diff --git a/zjit/src/lib.rs b/zjit/src/lib.rs index d5ca2b74ba..b36bf6515e 100644 --- a/zjit/src/lib.rs +++ b/zjit/src/lib.rs @@ -6,6 +6,7 @@ pub use std; mod state; +mod distribution; mod cruby; mod cruby_methods; mod hir; diff --git a/zjit/src/profile.rs b/zjit/src/profile.rs index 7db8e44c7a..a99229604b 100644 --- a/zjit/src/profile.rs +++ b/zjit/src/profile.rs @@ -1,7 +1,8 @@ // We use the YARV bytecode constants which have a CRuby-style name #![allow(non_upper_case_globals)] -use crate::{cruby::*, gc::get_or_create_iseq_payload, hir_type::{types::{Empty}, Type}, options::get_option}; +use crate::{cruby::*, gc::get_or_create_iseq_payload, options::get_option}; +use crate::distribution::{Distribution, DistributionSummary}; /// Ephemeral state for profiling runtime information struct Profiler { @@ -79,25 +80,100 @@ fn profile_insn(profiler: &mut Profiler, bare_opcode: ruby_vminsn_type) { } } +const DISTRIBUTION_SIZE: usize = 4; + +pub type TypeDistribution = Distribution; + +pub type TypeDistributionSummary = DistributionSummary; + /// Profile the Type of top-`n` stack operands fn profile_operands(profiler: &mut Profiler, profile: &mut IseqProfile, n: usize) { let types = &mut profile.opnd_types[profiler.insn_idx]; - if types.len() <= n { - types.resize(n, Empty); + if types.is_empty() { + types.resize(n, TypeDistribution::new()); } for i in 0..n { - let opnd_type = Type::from_value(profiler.peek_at_stack((n - i - 1) as isize)); - types[i] = types[i].union(opnd_type); - if let Some(object) = types[i].gc_object() { - unsafe { rb_gc_writebarrier(profiler.iseq.into(), object) }; - } + let obj = profiler.peek_at_stack((n - i - 1) as isize); + // TODO(max): Handle GC-hidden classes like Array, Hash, etc and make them look normal or + // drop them or something + let ty = ProfiledType::new(obj.class_of(), obj.shape_id_of()); + unsafe { rb_gc_writebarrier(profiler.iseq.into(), ty.class()) }; + types[i].observe(ty); + } +} + +/// opt_send_without_block/opt_plus/... should store: +/// * the class of the receiver, so we can do method lookup +/// * the shape of the receiver, so we can optimize ivar lookup +/// with those two, pieces of information, we can also determine when an object is an immediate: +/// * Integer + SPECIAL_CONST_SHAPE_ID == Fixnum +/// * Float + SPECIAL_CONST_SHAPE_ID == Flonum +/// * Symbol + SPECIAL_CONST_SHAPE_ID == StaticSymbol +/// * NilClass == Nil +/// * TrueClass == True +/// * FalseClass == False +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct ProfiledType { + class: VALUE, + shape: ShapeId, +} + +impl Default for ProfiledType { + fn default() -> Self { + Self::empty() + } +} + +impl ProfiledType { + fn new(class: VALUE, shape: ShapeId) -> Self { + Self { class, shape } + } + + pub fn empty() -> Self { + Self { class: VALUE(0), shape: INVALID_SHAPE_ID } + } + + pub fn is_empty(&self) -> bool { + self.class == VALUE(0) + } + + pub fn class(&self) -> VALUE { + self.class + } + + pub fn shape(&self) -> ShapeId { + self.shape + } + + pub fn is_fixnum(&self) -> bool { + self.class == unsafe { rb_cInteger } && self.shape == SPECIAL_CONST_SHAPE_ID + } + + pub fn is_flonum(&self) -> bool { + self.class == unsafe { rb_cFloat } && self.shape == SPECIAL_CONST_SHAPE_ID + } + + pub fn is_static_symbol(&self) -> bool { + self.class == unsafe { rb_cSymbol } && self.shape == SPECIAL_CONST_SHAPE_ID + } + + pub fn is_nil(&self) -> bool { + self.class == unsafe { rb_cNilClass } && self.shape == SPECIAL_CONST_SHAPE_ID + } + + pub fn is_true(&self) -> bool { + self.class == unsafe { rb_cTrueClass } && self.shape == SPECIAL_CONST_SHAPE_ID + } + + pub fn is_false(&self) -> bool { + self.class == unsafe { rb_cFalseClass } && self.shape == SPECIAL_CONST_SHAPE_ID } } #[derive(Debug)] pub struct IseqProfile { /// Type information of YARV instruction operands, indexed by the instruction index - opnd_types: Vec>, + opnd_types: Vec>, /// Number of profiled executions for each YARV instruction, indexed by the instruction index num_profiles: Vec, @@ -112,16 +188,17 @@ impl IseqProfile { } /// Get profiled operand types for a given instruction index - pub fn get_operand_types(&self, insn_idx: usize) -> Option<&[Type]> { + pub fn get_operand_types(&self, insn_idx: usize) -> Option<&[TypeDistribution]> { self.opnd_types.get(insn_idx).map(|v| &**v) } /// Run a given callback with every object in IseqProfile pub fn each_object(&self, callback: impl Fn(VALUE)) { - for types in &self.opnd_types { - for opnd_type in types { - if let Some(object) = opnd_type.gc_object() { - callback(object); + for operands in &self.opnd_types { + for distribution in operands { + for profiled_type in distribution.each_item() { + // If the type is a GC object, call the callback + callback(profiled_type.class); } } } @@ -129,10 +206,11 @@ impl IseqProfile { /// Run a given callback with a mutable reference to every object in IseqProfile pub fn each_object_mut(&mut self, callback: impl Fn(&mut VALUE)) { - for types in self.opnd_types.iter_mut() { - for opnd_type in types.iter_mut() { - if let Some(object) = opnd_type.gc_object_mut() { - callback(object); + for operands in &mut self.opnd_types { + for distribution in operands { + for ref mut profiled_type in distribution.each_item_mut() { + // If the type is a GC object, call the callback + callback(&mut profiled_type.class); } } }