mirror of
https://github.com/ruby/ruby.git
synced 2025-09-23 20:44:00 +02:00

only for opt_nil_p and opt_not. While vm_method_cfunc_is is used for opt_eq too, many fast paths of it don't call it. So if it's populated, it should generate opt_send, regardless of cfunc or not. And again, opt_neq isn't relevant due to the difference in operands. So opt_nil_p and opt_not are the only variants using vm_method_cfunc_is like they use. ``` $ benchmark-driver -v --rbenv 'before2 --jit::ruby --jit;before --jit;after --jit' benchmark/mjit_opt_cc_insns.yml --repeat-count=4 before2 --jit: ruby 2.8.0dev (2020-06-22T08:37:37Z master3238641750
) +JIT [x86_64-linux] before --jit: ruby 2.8.0dev (2020-06-23T01:01:24Z master9ce2066209
) +JIT [x86_64-linux] after --jit: ruby 2.8.0dev (2020-06-23T06:58:37Z master 17e9df3157) +JIT [x86_64-linux] last_commit=Avoid generating opt_send with cfunc cc with JIT Calculating ------------------------------------- before2 --jit before --jit after --jit mjit_nil?(1) 54.204M 75.536M 75.031M i/s - 40.000M times in 0.737947s 0.529548s 0.533110s mjit_not(1) 53.822M 70.921M 71.920M i/s - 40.000M times in 0.743195s 0.564007s 0.556171s mjit_eq(1, nil) 7.367M 6.496M 7.331M i/s - 8.000M times in 1.085882s 1.231470s 1.091327s Comparison: mjit_nil?(1) before --jit: 75536059.3 i/s after --jit: 75031409.4 i/s - 1.01x slower before2 --jit: 54204431.6 i/s - 1.39x slower mjit_not(1) after --jit: 71920324.1 i/s before --jit: 70921063.1 i/s - 1.01x slower before2 --jit: 53821697.6 i/s - 1.34x slower mjit_eq(1, nil) before2 --jit: 7367280.0 i/s after --jit: 7330527.4 i/s - 1.01x slower before --jit: 6496302.8 i/s - 1.13x slower ```
112 lines
6.5 KiB
Text
112 lines
6.5 KiB
Text
% # -*- C -*-
|
|
% # Copyright (c) 2018 Takashi Kokubun. All rights reserved.
|
|
% #
|
|
% # This file is a part of the programming language Ruby. Permission is hereby
|
|
% # granted, to either redistribute and/or modify this file, provided that the
|
|
% # conditions mentioned in the file COPYING are met. Consult the file for
|
|
% # details.
|
|
%
|
|
% # Optimized case of send / opt_send_without_block instructions.
|
|
{
|
|
% # compiler: Prepare operands which may be used by `insn.call_attribute`
|
|
% insn.opes.each_with_index do |ope, i|
|
|
MAYBE_UNUSED(<%= ope.fetch(:decl) %>) = (<%= ope.fetch(:type) %>)operands[<%= i %>];
|
|
% end
|
|
% # compiler: Use captured cc to avoid race condition
|
|
const struct rb_callcache *captured_cc = captured_cc_entries(status)[call_data_index(cd, body)];
|
|
%
|
|
% # compiler: Inline send insn where some supported fastpath is used.
|
|
const rb_iseq_t *iseq = NULL;
|
|
const CALL_INFO ci = cd->ci;
|
|
int kw_splat = IS_ARGS_KW_SPLAT(ci) > 0;
|
|
extern bool rb_splat_or_kwargs_p(const struct rb_callinfo *restrict ci);
|
|
if (!status->compile_info->disable_send_cache && has_valid_method_type(captured_cc) && (
|
|
% # `CC_SET_FASTPATH(cd->cc, vm_call_cfunc_with_frame, ...)` in `vm_call_cfunc`
|
|
(vm_cc_cme(captured_cc)->def->type == VM_METHOD_TYPE_CFUNC
|
|
&& !rb_splat_or_kwargs_p(ci) && !kw_splat)
|
|
% # `CC_SET_FASTPATH(cc, vm_call_iseq_setup_func(...), vm_call_iseq_optimizable_p(...))` in `vm_callee_setup_arg`,
|
|
% # and support only non-VM_CALL_TAILCALL path inside it
|
|
|| (vm_cc_cme(captured_cc)->def->type == VM_METHOD_TYPE_ISEQ
|
|
&& fastpath_applied_iseq_p(ci, captured_cc, iseq = def_iseq_ptr(vm_cc_cme(captured_cc)->def))
|
|
&& !(vm_ci_flag(ci) & VM_CALL_TAILCALL))
|
|
)) {
|
|
int sp_inc = (int)sp_inc_of_sendish(ci);
|
|
fprintf(f, "{\n");
|
|
|
|
% # JIT: Invalidate call cache if it requires vm_search_method. This allows to inline some of following things.
|
|
bool opt_class_of = !maybe_special_const_class_p(captured_cc->klass); // If true, use RBASIC_CLASS instead of CLASS_OF to reduce code size
|
|
fprintf(f, " const struct rb_callcache *cc = (const struct rb_callcache *)0x%"PRIxVALUE";\n", (VALUE)captured_cc);
|
|
fprintf(f, " const rb_callable_method_entry_t *cc_cme = (const rb_callable_method_entry_t *)0x%"PRIxVALUE";\n", (VALUE)vm_cc_cme(captured_cc));
|
|
fprintf(f, " const VALUE recv = stack[%d];\n", b->stack_size + sp_inc - 1);
|
|
fprintf(f, " if (UNLIKELY(%s || !vm_cc_valid_p(cc, cc_cme, %s(recv)))) {\n", opt_class_of ? "RB_SPECIAL_CONST_P(recv)" : "false", opt_class_of ? "RBASIC_CLASS" : "CLASS_OF");
|
|
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", pos);
|
|
fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size);
|
|
fprintf(f, " goto send_cancel;\n");
|
|
fprintf(f, " }\n");
|
|
|
|
% # JIT: move sp and pc if necessary
|
|
<%= render 'mjit_compile_pc_and_sp', locals: { insn: insn } -%>
|
|
|
|
% # JIT: If ISeq is inlinable, call the inlined method without pushing a frame.
|
|
if (iseq && status->inlined_iseqs != NULL && iseq->body == status->inlined_iseqs[pos]) {
|
|
fprintf(f, " {\n");
|
|
fprintf(f, " VALUE orig_self = reg_cfp->self;\n");
|
|
fprintf(f, " reg_cfp->self = stack[%d];\n", b->stack_size + sp_inc - 1);
|
|
fprintf(f, " stack[%d] = _mjit%d_inlined_%d(ec, reg_cfp, orig_self, original_iseq);\n", b->stack_size + sp_inc - 1, status->compiled_id, pos);
|
|
fprintf(f, " reg_cfp->self = orig_self;\n");
|
|
fprintf(f, " }\n");
|
|
}
|
|
else {
|
|
% # JIT: Forked `vm_sendish` (except method_explorer = vm_search_method_wrap) to inline various things
|
|
fprintf(f, " {\n");
|
|
fprintf(f, " VALUE val;\n");
|
|
fprintf(f, " struct rb_calling_info calling;\n");
|
|
% if insn.name == 'send'
|
|
fprintf(f, " calling.block_handler = vm_caller_setup_arg_block(ec, reg_cfp, (const struct rb_callinfo *)0x%"PRIxVALUE", (rb_iseq_t *)0x%"PRIxVALUE", FALSE);\n", (VALUE)ci, (VALUE)blockiseq);
|
|
% else
|
|
fprintf(f, " calling.block_handler = VM_BLOCK_HANDLER_NONE;\n");
|
|
% end
|
|
fprintf(f, " calling.kw_splat = %d;\n", kw_splat);
|
|
fprintf(f, " calling.recv = stack[%d];\n", b->stack_size + sp_inc - 1);
|
|
fprintf(f, " calling.argc = %d;\n", vm_ci_argc(ci));
|
|
|
|
if (vm_cc_cme(captured_cc)->def->type == VM_METHOD_TYPE_CFUNC) {
|
|
% # TODO: optimize this more
|
|
fprintf(f, " CALL_DATA cd = (CALL_DATA)0x%"PRIxVALUE";\n", operands[0]);
|
|
fprintf(f, " val = vm_call_cfunc_with_frame(ec, reg_cfp, &calling, cd);\n");
|
|
}
|
|
else { // VM_METHOD_TYPE_ISEQ
|
|
% # fastpath_applied_iseq_p checks rb_simple_iseq_p, which ensures has_opt == FALSE
|
|
fprintf(f, " vm_call_iseq_setup_normal(ec, reg_cfp, &calling, cc_cme, 0, %d, %d);\n", iseq->body->param.size, iseq->body->local_table_size);
|
|
if (iseq->body->catch_except_p) {
|
|
fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n");
|
|
fprintf(f, " val = vm_exec(ec, TRUE);\n");
|
|
}
|
|
else {
|
|
fprintf(f, " if ((val = mjit_exec(ec)) == Qundef) {\n");
|
|
fprintf(f, " VM_ENV_FLAGS_SET(ec->cfp->ep, VM_FRAME_FLAG_FINISH);\n"); // This is vm_call0_body's code after vm_call_iseq_setup
|
|
fprintf(f, " val = vm_exec(ec, FALSE);\n");
|
|
fprintf(f, " }\n");
|
|
}
|
|
}
|
|
fprintf(f, " stack[%d] = val;\n", b->stack_size + sp_inc - 1);
|
|
fprintf(f, " }\n");
|
|
|
|
% # JIT: We should evaluate ISeq modified for TracePoint if it's enabled. Note: This is slow.
|
|
fprintf(f, " if (UNLIKELY(!mjit_call_p)) {\n");
|
|
fprintf(f, " reg_cfp->sp = vm_base_ptr(reg_cfp) + %d;\n", b->stack_size + (int)<%= insn.call_attribute('sp_inc') %>);
|
|
if (!pc_moved_p) {
|
|
fprintf(f, " reg_cfp->pc = original_body_iseq + %d;\n", next_pos);
|
|
}
|
|
fprintf(f, " RB_DEBUG_COUNTER_INC(mjit_cancel_invalidate_all);\n");
|
|
fprintf(f, " goto cancel;\n");
|
|
fprintf(f, " }\n");
|
|
}
|
|
|
|
% # compiler: Move JIT compiler's internal stack pointer
|
|
b->stack_size += <%= insn.call_attribute('sp_inc') %>;
|
|
|
|
fprintf(f, "}\n");
|
|
break;
|
|
}
|
|
}
|