ZJIT: Fix ObjToString rewrite (#14196)

ZJIT: Fix ObjToString rewrite

Currently, the rewrite for `ObjToString` always replaces it with a
`SendWithoutBlock(to_s)` instruction when the receiver is not a
string literal. This is incorrect because it calls `to_s` on the
receiver even if it's already a string.

This change fixes it by:
- Avoiding the `SendWithoutBlock(to_s)` rewrite
- Implement codegen for `ObjToString`
This commit is contained in:
Stan Lo 2025-08-13 21:03:26 +01:00 committed by GitHub
parent ff622978d0
commit 2b16f27a35
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 98 additions and 6 deletions

View file

@ -1343,6 +1343,71 @@ class TestZJIT < Test::Unit::TestCase
}, call_threshold: 2 }, call_threshold: 2
end end
def test_objtostring_calls_to_s_on_non_strings
assert_compiles '["foo", "foo"]', %q{
results = []
class Foo
def to_s
"foo"
end
end
def test(str)
"#{str}"
end
results << test(Foo.new)
results << test(Foo.new)
results
}
end
def test_objtostring_rewrite_does_not_call_to_s_on_strings
assert_compiles '["foo", "foo"]', %q{
results = []
class String
def to_s
"bad"
end
end
def test(foo)
"#{foo}"
end
results << test("foo")
results << test("foo")
results
}
end
def test_objtostring_rewrite_does_not_call_to_s_on_string_subclasses
assert_compiles '["foo", "foo"]', %q{
results = []
class StringSubclass < String
def to_s
"bad"
end
end
foo = StringSubclass.new("foo")
def test(str)
"#{str}"
end
results << test(foo)
results << test(foo)
results
}
end
def test_string_bytesize_with_guard def test_string_bytesize_with_guard
assert_compiles '5', %q{ assert_compiles '5', %q{
def test(str) def test(str)

View file

@ -6209,6 +6209,14 @@ vm_objtostring(const rb_iseq_t *iseq, VALUE recv, CALL_DATA cd)
return Qundef; return Qundef;
} }
// ZJIT implementation is using the C function
// and needs to call a non-static function
VALUE
rb_vm_objtostring(const rb_iseq_t *iseq, VALUE recv, CALL_DATA cd)
{
return vm_objtostring(iseq, recv, cd);
}
static VALUE static VALUE
vm_opt_ary_freeze(VALUE ary, int bop, ID id) vm_opt_ary_freeze(VALUE ary, int bop, ID id)
{ {

View file

@ -378,6 +378,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
Insn::AnyToString { val, str, state } => gen_anytostring(asm, opnd!(val), opnd!(str), &function.frame_state(*state))?, Insn::AnyToString { val, str, state } => gen_anytostring(asm, opnd!(val), opnd!(str), &function.frame_state(*state))?,
Insn::Defined { op_type, obj, pushval, v, state } => gen_defined(jit, asm, *op_type, *obj, *pushval, opnd!(v), &function.frame_state(*state))?, Insn::Defined { op_type, obj, pushval, v, state } => gen_defined(jit, asm, *op_type, *obj, *pushval, opnd!(v), &function.frame_state(*state))?,
&Insn::IncrCounter(counter) => return Some(gen_incr_counter(asm, counter)), &Insn::IncrCounter(counter) => return Some(gen_incr_counter(asm, counter)),
Insn::ObjToString { val, cd, state, .. } => gen_objtostring(jit, asm, opnd!(val), *cd, &function.frame_state(*state))?,
Insn::ArrayExtend { .. } Insn::ArrayExtend { .. }
| Insn::ArrayMax { .. } | Insn::ArrayMax { .. }
| Insn::ArrayPush { .. } | Insn::ArrayPush { .. }
@ -386,7 +387,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
| Insn::FixnumMod { .. } | Insn::FixnumMod { .. }
| Insn::HashDup { .. } | Insn::HashDup { .. }
| Insn::NewHash { .. } | Insn::NewHash { .. }
| Insn::ObjToString { .. }
| Insn::Send { .. } | Insn::Send { .. }
| Insn::StringIntern { .. } | Insn::StringIntern { .. }
| Insn::Throw { .. } | Insn::Throw { .. }
@ -445,6 +445,24 @@ fn gen_get_ep(asm: &mut Assembler, level: u32) -> Opnd {
ep_opnd ep_opnd
} }
fn gen_objtostring(jit: &mut JITState, asm: &mut Assembler, val: Opnd, cd: *const rb_call_data, state: &FrameState) -> Option<Opnd> {
gen_prepare_non_leaf_call(jit, asm, state)?;
let iseq_opnd = Opnd::Value(jit.iseq.into());
// TODO: Specialize for immediate types
// Call rb_vm_objtostring(iseq, recv, cd)
let ret = asm_ccall!(asm, rb_vm_objtostring, iseq_opnd, val, (cd as usize).into());
// TODO: Call `to_s` on the receiver if rb_vm_objtostring returns Qundef
// Need to replicate what CALL_SIMPLE_METHOD does
asm_comment!(asm, "side-exit if rb_vm_objtostring returns Qundef");
asm.cmp(ret, Qundef.into());
asm.je(side_exit(jit, state, ObjToStringFallback)?);
Some(ret)
}
fn gen_defined(jit: &JITState, asm: &mut Assembler, op_type: usize, obj: VALUE, pushval: VALUE, tested_value: Opnd, state: &FrameState) -> Option<Opnd> { fn gen_defined(jit: &JITState, asm: &mut Assembler, op_type: usize, obj: VALUE, pushval: VALUE, tested_value: Opnd, state: &FrameState) -> Option<Opnd> {
match op_type as defined_type { match op_type as defined_type {
DEFINED_YIELD => { DEFINED_YIELD => {

View file

@ -159,6 +159,7 @@ unsafe extern "C" {
pub fn rb_vm_stack_canary() -> VALUE; pub fn rb_vm_stack_canary() -> VALUE;
pub fn rb_vm_push_cfunc_frame(cme: *const rb_callable_method_entry_t, recv_idx: c_int); pub fn rb_vm_push_cfunc_frame(cme: *const rb_callable_method_entry_t, recv_idx: c_int);
pub fn rb_obj_class(klass: VALUE) -> VALUE; pub fn rb_obj_class(klass: VALUE) -> VALUE;
pub fn rb_vm_objtostring(iseq: IseqPtr, recv: VALUE, cd: *const rb_call_data) -> VALUE;
} }
// Renames // Renames

View file

@ -414,6 +414,7 @@ pub enum SideExitReason {
GuardBitEquals(VALUE), GuardBitEquals(VALUE),
PatchPoint(Invariant), PatchPoint(Invariant),
CalleeSideExit, CalleeSideExit,
ObjToStringFallback,
} }
impl std::fmt::Display for SideExitReason { impl std::fmt::Display for SideExitReason {
@ -1615,13 +1616,12 @@ impl Function {
self.insn_types[replacement.0] = self.infer_type(replacement); self.insn_types[replacement.0] = self.infer_type(replacement);
self.make_equal_to(insn_id, replacement); self.make_equal_to(insn_id, replacement);
} }
Insn::ObjToString { val, cd, state, .. } => { Insn::ObjToString { val, .. } => {
if self.is_a(val, types::String) { if self.is_a(val, types::String) {
// behaves differently from `SendWithoutBlock` with `mid:to_s` because ObjToString should not have a patch point for String to_s being redefined // behaves differently from `SendWithoutBlock` with `mid:to_s` because ObjToString should not have a patch point for String to_s being redefined
self.make_equal_to(insn_id, val); self.make_equal_to(insn_id, val);
} else { } else {
let replacement = self.push_insn(block, Insn::SendWithoutBlock { self_val: val, cd, args: vec![], state }); self.push_insn_id(block, insn_id);
self.make_equal_to(insn_id, replacement)
} }
} }
Insn::AnyToString { str, .. } => { Insn::AnyToString { str, .. } => {
@ -7241,8 +7241,8 @@ mod opt_tests {
bb0(v0:BasicObject): bb0(v0:BasicObject):
v2:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000)) v2:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
v3:Fixnum[1] = Const Value(1) v3:Fixnum[1] = Const Value(1)
v11:BasicObject = SendWithoutBlock v3, :to_s v5:BasicObject = ObjToString v3
v7:String = AnyToString v3, str: v11 v7:String = AnyToString v3, str: v5
v9:StringExact = StringConcat v2, v7 v9:StringExact = StringConcat v2, v7
Return v9 Return v9
"#]]); "#]]);