mirror of
https://github.com/ruby/ruby.git
synced 2025-08-15 13:39:04 +02:00
ZJIT: Compile toregexp
`toregexp` is fairly similar to `concatstrings`, so this commit extracts a helper for pushing and popping operands on the native stack. There's probably opportunity to move some of this into lir (e.g. Alan suggested a push_many that could use STP on ARM to push 2 at a time), but I might save that for another day.
This commit is contained in:
parent
c9346a166c
commit
a58a4a6ca7
7 changed files with 163 additions and 30 deletions
|
@ -25,4 +25,9 @@ int rb_match_count(VALUE match);
|
|||
VALUE rb_reg_new_ary(VALUE ary, int options);
|
||||
VALUE rb_reg_last_defined(VALUE match);
|
||||
|
||||
#define ARG_REG_OPTION_MASK \
|
||||
(ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
|
||||
#define ARG_ENCODING_FIXED 16
|
||||
#define ARG_ENCODING_NONE 32
|
||||
|
||||
#endif /* INTERNAL_RE_H */
|
||||
|
|
5
re.c
5
re.c
|
@ -290,11 +290,6 @@ rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
|
|||
|
||||
#define KCODE_FIXED FL_USER4
|
||||
|
||||
#define ARG_REG_OPTION_MASK \
|
||||
(ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
|
||||
#define ARG_ENCODING_FIXED 16
|
||||
#define ARG_ENCODING_NONE 32
|
||||
|
||||
static int
|
||||
char_to_option(int c)
|
||||
{
|
||||
|
|
|
@ -1621,6 +1621,14 @@ class TestZJIT < Test::Unit::TestCase
|
|||
}, insns: [:concatstrings]
|
||||
end
|
||||
|
||||
def test_regexp_interpolation
|
||||
assert_compiles '/123/', %q{
|
||||
def test = /#{1}#{2}#{3}/
|
||||
|
||||
test
|
||||
}, insns: [:toregexp]
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Assert that every method call in `test_script` can be compiled by ZJIT
|
||||
|
|
|
@ -259,6 +259,13 @@ fn main() {
|
|||
|
||||
// From internal/re.h
|
||||
.allowlist_function("rb_reg_new_ary")
|
||||
.allowlist_var("ARG_ENCODING_FIXED")
|
||||
.allowlist_var("ARG_ENCODING_NONE")
|
||||
|
||||
// From include/ruby/onigmo.h
|
||||
.allowlist_var("ONIG_OPTION_IGNORECASE")
|
||||
.allowlist_var("ONIG_OPTION_EXTEND")
|
||||
.allowlist_var("ONIG_OPTION_MULTILINE")
|
||||
|
||||
// `ruby_value_type` is a C enum and this stops it from
|
||||
// prefixing all the members with the name of the type
|
||||
|
|
|
@ -338,6 +338,7 @@ fn gen_insn(cb: &mut CodeBlock, jit: &mut JITState, asm: &mut Assembler, functio
|
|||
Insn::StringCopy { val, chilled, state } => gen_string_copy(asm, opnd!(val), *chilled, &function.frame_state(*state)),
|
||||
Insn::StringConcat { strings, state } => gen_string_concat(jit, asm, opnds!(strings), &function.frame_state(*state))?,
|
||||
Insn::StringIntern { val, state } => gen_intern(asm, opnd!(val), &function.frame_state(*state))?,
|
||||
Insn::ToRegexp { opt, values, state } => gen_toregexp(jit, asm, *opt, opnds!(values), &function.frame_state(*state))?,
|
||||
Insn::Param { idx } => unreachable!("block.insns should not have Insn::Param({idx})"),
|
||||
Insn::Snapshot { .. } => return Some(()), // we don't need to do anything for this instruction at the moment
|
||||
Insn::Jump(branch) => return gen_jump(jit, asm, branch),
|
||||
|
@ -1508,6 +1509,52 @@ pub fn gen_stub_exit(cb: &mut CodeBlock) -> Option<CodePtr> {
|
|||
})
|
||||
}
|
||||
|
||||
fn gen_push_opnds(jit: &mut JITState, asm: &mut Assembler, opnds: &[Opnd]) -> lir::Opnd {
|
||||
let n = opnds.len();
|
||||
|
||||
// Calculate the compile-time NATIVE_STACK_PTR offset from NATIVE_BASE_PTR
|
||||
// At this point, frame_setup(&[], jit.c_stack_slots) has been called,
|
||||
// which allocated aligned_stack_bytes(jit.c_stack_slots) on the stack
|
||||
let frame_size = aligned_stack_bytes(jit.c_stack_slots);
|
||||
let allocation_size = aligned_stack_bytes(n);
|
||||
|
||||
asm_comment!(asm, "allocate {} bytes on C stack for {} values", allocation_size, n);
|
||||
asm.sub_into(NATIVE_STACK_PTR, allocation_size.into());
|
||||
|
||||
// Calculate the total offset from NATIVE_BASE_PTR to our buffer
|
||||
let total_offset_from_base = (frame_size + allocation_size) as i32;
|
||||
|
||||
for (idx, &opnd) in opnds.iter().enumerate() {
|
||||
let slot_offset = -total_offset_from_base + (idx as i32 * SIZEOF_VALUE_I32);
|
||||
asm.mov(
|
||||
Opnd::mem(VALUE_BITS, NATIVE_BASE_PTR, slot_offset),
|
||||
opnd
|
||||
);
|
||||
}
|
||||
|
||||
asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base))
|
||||
}
|
||||
|
||||
fn gen_pop_opnds(asm: &mut Assembler, opnds: &[Opnd]) {
|
||||
asm_comment!(asm, "restore C stack pointer");
|
||||
let allocation_size = aligned_stack_bytes(opnds.len());
|
||||
asm.add_into(NATIVE_STACK_PTR, allocation_size.into());
|
||||
}
|
||||
|
||||
fn gen_toregexp(jit: &mut JITState, asm: &mut Assembler, opt: usize, values: Vec<Opnd>, state: &FrameState) -> Option<lir::Opnd> {
|
||||
gen_prepare_non_leaf_call(jit, asm, state)?;
|
||||
|
||||
let first_opnd_ptr = gen_push_opnds(jit, asm, &values);
|
||||
|
||||
let ary = asm_ccall!(asm, rb_ary_tmp_new_from_values, Opnd::Imm(0), values.len().into(), first_opnd_ptr);
|
||||
let val = asm_ccall!(asm, rb_reg_new_ary, ary, opt.into());
|
||||
asm_ccall!(asm, rb_ary_clear, ary);
|
||||
|
||||
gen_pop_opnds(asm, &values);
|
||||
|
||||
Some(val)
|
||||
}
|
||||
|
||||
fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>, state: &FrameState) -> Option<Opnd> {
|
||||
let n = strings.len();
|
||||
|
||||
|
@ -1519,32 +1566,9 @@ fn gen_string_concat(jit: &mut JITState, asm: &mut Assembler, strings: Vec<Opnd>
|
|||
|
||||
gen_prepare_non_leaf_call(jit, asm, state)?;
|
||||
|
||||
// Calculate the compile-time NATIVE_STACK_PTR offset from NATIVE_BASE_PTR
|
||||
// At this point, frame_setup(&[], jit.c_stack_slots) has been called,
|
||||
// which allocated aligned_stack_bytes(jit.c_stack_slots) on the stack
|
||||
let frame_size = aligned_stack_bytes(jit.c_stack_slots);
|
||||
let allocation_size = aligned_stack_bytes(n);
|
||||
|
||||
asm_comment!(asm, "allocate {} bytes on C stack for {} strings", allocation_size, n);
|
||||
asm.sub_into(NATIVE_STACK_PTR, allocation_size.into());
|
||||
|
||||
// Calculate the total offset from NATIVE_BASE_PTR to our buffer
|
||||
let total_offset_from_base = (frame_size + allocation_size) as i32;
|
||||
|
||||
for (idx, &string_opnd) in strings.iter().enumerate() {
|
||||
let slot_offset = -total_offset_from_base + (idx as i32 * SIZEOF_VALUE_I32);
|
||||
asm.mov(
|
||||
Opnd::mem(VALUE_BITS, NATIVE_BASE_PTR, slot_offset),
|
||||
string_opnd
|
||||
);
|
||||
}
|
||||
|
||||
let first_string_ptr = asm.lea(Opnd::mem(64, NATIVE_BASE_PTR, -total_offset_from_base));
|
||||
|
||||
let first_string_ptr = gen_push_opnds(jit, asm, &strings);
|
||||
let result = asm_ccall!(asm, rb_str_concat_literals, n.into(), first_string_ptr);
|
||||
|
||||
asm_comment!(asm, "restore C stack pointer");
|
||||
asm.add_into(NATIVE_STACK_PTR, allocation_size.into());
|
||||
gen_pop_opnds(asm, &strings);
|
||||
|
||||
Some(result)
|
||||
}
|
||||
|
|
5
zjit/src/cruby_bindings.inc.rs
generated
5
zjit/src/cruby_bindings.inc.rs
generated
|
@ -30,6 +30,11 @@ impl<T> ::std::fmt::Debug for __IncompleteArrayField<T> {
|
|||
fmt.write_str("__IncompleteArrayField")
|
||||
}
|
||||
}
|
||||
pub const ONIG_OPTION_IGNORECASE: u32 = 1;
|
||||
pub const ONIG_OPTION_EXTEND: u32 = 2;
|
||||
pub const ONIG_OPTION_MULTILINE: u32 = 4;
|
||||
pub const ARG_ENCODING_FIXED: u32 = 16;
|
||||
pub const ARG_ENCODING_NONE: u32 = 32;
|
||||
pub const INTEGER_REDEFINED_OP_FLAG: u32 = 1;
|
||||
pub const FLOAT_REDEFINED_OP_FLAG: u32 = 2;
|
||||
pub const STRING_REDEFINED_OP_FLAG: u32 = 4;
|
||||
|
|
|
@ -448,6 +448,9 @@ pub enum Insn {
|
|||
StringIntern { val: InsnId, state: InsnId },
|
||||
StringConcat { strings: Vec<InsnId>, state: InsnId },
|
||||
|
||||
/// Combine count stack values into a regexp
|
||||
ToRegexp { opt: usize, values: Vec<InsnId>, state: InsnId },
|
||||
|
||||
/// Put special object (VMCORE, CBASE, etc.) based on value_type
|
||||
PutSpecialObject { value_type: SpecialObjectType },
|
||||
|
||||
|
@ -641,6 +644,14 @@ pub struct InsnPrinter<'a> {
|
|||
ptr_map: &'a PtrPrintMap,
|
||||
}
|
||||
|
||||
static REGEXP_FLAGS: &[(u32, &str)] = &[
|
||||
(ONIG_OPTION_MULTILINE, "MULTILINE"),
|
||||
(ONIG_OPTION_IGNORECASE, "IGNORECASE"),
|
||||
(ONIG_OPTION_EXTEND, "EXTENDED"),
|
||||
(ARG_ENCODING_FIXED, "FIXEDENCODING"),
|
||||
(ARG_ENCODING_NONE, "NOENCODING"),
|
||||
];
|
||||
|
||||
impl<'a> std::fmt::Display for InsnPrinter<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
match &self.inner {
|
||||
|
@ -689,6 +700,28 @@ impl<'a> std::fmt::Display for InsnPrinter<'a> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
Insn::ToRegexp { values, opt, .. } => {
|
||||
write!(f, "ToRegexp")?;
|
||||
let mut prefix = " ";
|
||||
for value in values {
|
||||
write!(f, "{prefix}{value}")?;
|
||||
prefix = ", ";
|
||||
}
|
||||
|
||||
let opt = *opt as u32;
|
||||
if opt != 0 {
|
||||
write!(f, ", ")?;
|
||||
let mut sep = "";
|
||||
for (flag, name) in REGEXP_FLAGS {
|
||||
if opt & flag != 0 {
|
||||
write!(f, "{sep}{name}")?;
|
||||
sep = "|";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Insn::Test { val } => { write!(f, "Test {val}") }
|
||||
Insn::IsNil { val } => { write!(f, "IsNil {val}") }
|
||||
Insn::Jump(target) => { write!(f, "Jump {target}") }
|
||||
|
@ -1150,6 +1183,7 @@ impl Function {
|
|||
&StringCopy { val, chilled, state } => StringCopy { val: find!(val), chilled, state },
|
||||
&StringIntern { val, state } => StringIntern { val: find!(val), state: find!(state) },
|
||||
&StringConcat { ref strings, state } => StringConcat { strings: find_vec!(strings), state: find!(state) },
|
||||
&ToRegexp { opt, ref values, state } => ToRegexp { opt, values: find_vec!(values), state },
|
||||
&Test { val } => Test { val: find!(val) },
|
||||
&IsNil { val } => IsNil { val: find!(val) },
|
||||
&Jump(ref target) => Jump(find_branch_edge!(target)),
|
||||
|
@ -1274,6 +1308,7 @@ impl Function {
|
|||
Insn::StringCopy { .. } => types::StringExact,
|
||||
Insn::StringIntern { .. } => types::Symbol,
|
||||
Insn::StringConcat { .. } => types::StringExact,
|
||||
Insn::ToRegexp { .. } => types::RegexpExact,
|
||||
Insn::NewArray { .. } => types::ArrayExact,
|
||||
Insn::ArrayDup { .. } => types::ArrayExact,
|
||||
Insn::NewHash { .. } => types::HashExact,
|
||||
|
@ -1906,6 +1941,10 @@ impl Function {
|
|||
worklist.extend(strings);
|
||||
worklist.push_back(state);
|
||||
}
|
||||
&Insn::ToRegexp { ref values, state, .. } => {
|
||||
worklist.extend(values);
|
||||
worklist.push_back(state);
|
||||
}
|
||||
| &Insn::Return { val }
|
||||
| &Insn::Throw { val, .. }
|
||||
| &Insn::Defined { v: val, .. }
|
||||
|
@ -2826,6 +2865,15 @@ pub fn iseq_to_hir(iseq: *const rb_iseq_t) -> Result<Function, ParseError> {
|
|||
let insn_id = fun.push_insn(block, Insn::StringConcat { strings, state: exit_id });
|
||||
state.stack_push(insn_id);
|
||||
}
|
||||
YARVINSN_toregexp => {
|
||||
// First arg contains the options (multiline, extended, ignorecase) used to create the regexp
|
||||
let opt = get_arg(pc, 0).as_usize();
|
||||
let count = get_arg(pc, 1).as_usize();
|
||||
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
|
||||
let values = state.stack_pop_n(count)?;
|
||||
let insn_id = fun.push_insn(block, Insn::ToRegexp { opt, values, state: exit_id });
|
||||
state.stack_push(insn_id);
|
||||
}
|
||||
YARVINSN_newarray => {
|
||||
let count = get_arg(pc, 0).as_usize();
|
||||
let exit_id = fun.push_insn(block, Insn::Snapshot { state: exit_state });
|
||||
|
@ -5299,6 +5347,47 @@ mod tests {
|
|||
"#]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_toregexp() {
|
||||
eval(r##"
|
||||
def test = /#{1}#{2}#{3}/
|
||||
"##);
|
||||
assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#"
|
||||
fn test@<compiled>:2:
|
||||
bb0(v0:BasicObject):
|
||||
v2:Fixnum[1] = Const Value(1)
|
||||
v4:BasicObject = ObjToString v2
|
||||
v6:String = AnyToString v2, str: v4
|
||||
v7:Fixnum[2] = Const Value(2)
|
||||
v9:BasicObject = ObjToString v7
|
||||
v11:String = AnyToString v7, str: v9
|
||||
v12:Fixnum[3] = Const Value(3)
|
||||
v14:BasicObject = ObjToString v12
|
||||
v16:String = AnyToString v12, str: v14
|
||||
v18:RegexpExact = ToRegexp v6, v11, v16
|
||||
Return v18
|
||||
"#]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_toregexp_with_options() {
|
||||
eval(r##"
|
||||
def test = /#{1}#{2}/mixn
|
||||
"##);
|
||||
assert_method_hir_with_opcode("test", YARVINSN_toregexp, expect![[r#"
|
||||
fn test@<compiled>:2:
|
||||
bb0(v0:BasicObject):
|
||||
v2:Fixnum[1] = Const Value(1)
|
||||
v4:BasicObject = ObjToString v2
|
||||
v6:String = AnyToString v2, str: v4
|
||||
v7:Fixnum[2] = Const Value(2)
|
||||
v9:BasicObject = ObjToString v7
|
||||
v11:String = AnyToString v7, str: v9
|
||||
v13:RegexpExact = ToRegexp v6, v11, MULTILINE|IGNORECASE|EXTENDED|NOENCODING
|
||||
Return v13
|
||||
"#]]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn throw() {
|
||||
eval("
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue