ZJIT: Implement StringIntern codegen (#14207)

* ZJIT: Add test and implement display for StringIntern HIR

Co-authored-by: Emily Samp <emily.samp@shopify.com>

* ZJIT: Implement StringIntern codegen

Co-authored-by: Emily Samp <emily.samp@shopify.com>

* ZJIT: Fix StringIntern's return type

---------

Co-authored-by: Emily Samp <emily.samp@shopify.com>
This commit is contained in:
Stan Lo 2025-08-13 21:04:01 +01:00 committed by GitHub
parent 2b16f27a35
commit 549a326f86
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 45 additions and 7 deletions

View file

@ -72,6 +72,16 @@ class TestZJIT < Test::Unit::TestCase
}, insns: [:setglobal]
end
def test_string_intern
assert_compiles ':foo123', %q{
def test
:"foo#{123}"
end
test
}, insns: [:intern]
end
def test_setglobal_with_trace_var_exception
assert_compiles '"rescued"', %q{
def test

View file

@ -337,6 +337,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
Insn::ArrayDup { val, state } => gen_array_dup(asm, opnd!(val), &function.frame_state(*state)),
Insn::StringCopy { val, chilled, state } => gen_string_copy(asm, opnd!(val), *chilled, &function.frame_state(*state)),
Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state))?,
Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state))?,
Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"),
Insn::Snapshot { .. } => return Some(()), // we don't need to do anything for this instruction at the moment
Insn::Jump(branch) => return gen_jump(jit, asm, branch),
@ -388,7 +389,6 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
| Insn::HashDup { .. }
| Insn::NewHash { .. }
| Insn::Send { .. }
| Insn::StringIntern { .. }
| Insn::Throw { .. }
| Insn::ToArray { .. }
| Insn::ToNewArray { .. }
@ -609,6 +609,13 @@ fn gen_getglobal(asm: &mut Assembler, id: ID) -> Opnd {
asm_ccall!(asm, rb_gvar_get, id.0.into())
}
/// Intern a string
fn gen_intern(asm: &mut Assembler, val: Opnd, state: &FrameState) -> Option<Opnd> {
gen_prepare_call_with_gc(asm, state);
Some(asm_ccall!(asm, rb_str_intern, val))
}
/// Set global variables
fn gen_setglobal(jit: &mut JITState, asm: &mut Assembler, id: ID, val: Opnd, state: &FrameState) -> Option<()> {
// When trace_var is used, setting a global variable can cause exceptions

View file

@ -445,7 +445,7 @@ pub enum Insn {
Param { idx: usize },
StringCopy { val: InsnId, chilled: bool, state: InsnId },
StringIntern { val: InsnId },
StringIntern { val: InsnId, state: InsnId },
StringConcat { strings: Vec<InsnId>, state: InsnId },
/// Put special object (VMCORE, CBASE, etc.) based on value_type
@ -779,6 +779,7 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
Insn::ArrayExtend { left, right, .. } => write!(f, "ArrayExtend {left}, {right}"),
Insn::ArrayPush { array, val, .. } => write!(f, "ArrayPush {array}, {val}"),
Insn::ObjToString { val, .. } => { write!(f, "ObjToString {val}") },
Insn::StringIntern { val, .. } => { write!(f, "StringIntern {val}") },
Insn::AnyToString { val, str, .. } => { write!(f, "AnyToString {val}, str: {str}") },
Insn::SideExit { reason, .. } => write!(f, "SideExit {reason}"),
Insn::PutSpecialObject { value_type } => write!(f, "PutSpecialObject {value_type}"),
@ -802,7 +803,6 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
write!(f, ", {val}")
}
Insn::IncrCounter(counter) => write!(f, "IncrCounter {counter:?}"),
insn => { write!(f, "{insn:?}") }
}
}
}
@ -1148,7 +1148,7 @@ impl Function {
&Return { val } => Return { val: find!(val) },
&Throw { throw_state, val } => Throw { throw_state, val: find!(val) },
&StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state },
&StringIntern { val } => StringIntern { val: find!(val) },
&StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) },
&StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) },
&Test { val } => Test { val: find!(val) },
&IsNil { val } => IsNil { val: find!(val) },
@ -1272,7 +1272,7 @@ impl Function {
Insn::IsNil { val } if !self.type_of(*val).could_be(types::NilClass) => Type::from_cbool(false),
Insn::IsNil { .. } => types::CBool,
Insn::StringCopy { .. } => types::StringExact,
Insn::StringIntern { .. } => types::StringExact,
Insn::StringIntern { .. } => types::Symbol,
Insn::StringConcat { .. } => types::StringExact,
Insn::NewArray { .. } => types::ArrayExact,
Insn::ArrayDup { .. } => types::ArrayExact,
@ -1906,7 +1906,6 @@ impl Function {
worklist.extend(strings);
worklist.push_back(state);
}
| &Insn::StringIntern { val }
| &Insn::Return { val }
| &Insn::Throw { val, .. }
| &Insn::Defined { v: val, .. }
@ -1915,6 +1914,7 @@ impl Function {
| &Insn::IsNil { val } =>
worklist.push_back(val),
&Insn::SetGlobal { val, state, .. }
| &Insn::StringIntern { val, state }
| &Insn::StringCopy { val, state, .. }
| &Insn::GuardType { val, state, .. }
| &Insn::GuardBitEquals { val, state, .. }
@ -2815,7 +2815,8 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
YARVINSN_putself => { state.stack_push(self_param); }
YARVINSN_intern => {
let val = state.stack_pop()?;
let insn_id = fun.push_insn(block, Insn::StringIntern { val });
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
let insn_id = fun.push_insn(block, Insn::StringIntern { val, state: exit_id });
state.stack_push(insn_id);
}
YARVINSN_concatstrings => {
@ -4496,6 +4497,26 @@ mod tests {
"#]]);
}
#[test]
fn test_intern_interpolated_symbol() {
eval(r#"
def test
:"foo#{123}"
end
"#);
assert_method_hir_with_opcode("test", YARVINSN_intern, expect![[r#"
fn test@<compiled>:3:
bb0(v0:BasicObject):
v2:StringExact[VALUE(0x1000)] = Const Value(VALUE(0x1000))
v3:Fixnum[123] = Const Value(123)
v5:BasicObject = ObjToString v3
v7:String = AnyToString v3, str: v5
v9:StringExact = StringConcat v2, v7
v11:Symbol = StringIntern v9
Return v11
"#]]);
}
#[test]
fn different_objects_get_addresses() {
eval("def test = unknown_method([0], [1], '2', '2')");