8300002: Performance regression caused by non-inlined hot methods due to post call noop instructions

Reviewed-by: kvn, iveresov, eosterlund
This commit is contained in:
Sergey Kuksenko 2023-01-18 00:16:34 +00:00
parent 7071397ed9
commit 89a032dc05
14 changed files with 73 additions and 21 deletions

View file

@ -1101,6 +1101,7 @@ void MacroAssembler::post_call_nop() {
} }
InstructionMark im(this); InstructionMark im(this);
relocate(post_call_nop_Relocation::spec()); relocate(post_call_nop_Relocation::spec());
InlineSkippedInstructionsCounter skipCounter(this);
nop(); nop();
movk(zr, 0); movk(zr, 0);
movk(zr, 0); movk(zr, 0);

View file

@ -1185,6 +1185,7 @@ void MacroAssembler::post_call_nop() {
if (!Continuations::enabled()) { if (!Continuations::enabled()) {
return; return;
} }
InlineSkippedInstructionsCounter skipCounter(this);
nop(); nop();
} }

View file

@ -2035,6 +2035,7 @@ void MacroAssembler::post_call_nop() {
} }
InstructionMark im(this); InstructionMark im(this);
relocate(post_call_nop_Relocation::spec()); relocate(post_call_nop_Relocation::spec());
InlineSkippedInstructionsCounter skipCounter(this);
emit_int8((int8_t)0x0f); emit_int8((int8_t)0x0f);
emit_int8((int8_t)0x1f); emit_int8((int8_t)0x1f);
emit_int8((int8_t)0x84); emit_int8((int8_t)0x84);

View file

@ -241,6 +241,19 @@ class AbstractAssembler : public ResourceObj {
} }
}; };
friend class InstructionMark; friend class InstructionMark;
// count size of instructions which are skipped from inline heuristics
class InlineSkippedInstructionsCounter: public StackObj {
private:
AbstractAssembler* _assm;
address _start;
public:
InlineSkippedInstructionsCounter(AbstractAssembler* assm) : _assm(assm), _start(assm->pc()) {
}
~InlineSkippedInstructionsCounter() {
_assm->register_skipped(_assm->pc() - _start);
}
};
#ifdef ASSERT #ifdef ASSERT
// Make it return true on platforms which need to verify // Make it return true on platforms which need to verify
// instruction boundaries for some operations. // instruction boundaries for some operations.
@ -333,10 +346,13 @@ class AbstractAssembler : public ResourceObj {
OopRecorder* oop_recorder() const { return _oop_recorder; } OopRecorder* oop_recorder() const { return _oop_recorder; }
void set_oop_recorder(OopRecorder* r) { _oop_recorder = r; } void set_oop_recorder(OopRecorder* r) { _oop_recorder = r; }
void register_skipped(int size) { code_section()->register_skipped(size); }
address inst_mark() const { return code_section()->mark(); } address inst_mark() const { return code_section()->mark(); }
void set_inst_mark() { code_section()->set_mark(); } void set_inst_mark() { code_section()->set_mark(); }
void clear_inst_mark() { code_section()->clear_mark(); } void clear_inst_mark() { code_section()->clear_mark(); }
// Constants in code // Constants in code
void relocate(RelocationHolder const& rspec, int format = 0) { void relocate(RelocationHolder const& rspec, int format = 0) {
assert(!pd_check_instruction_mark() assert(!pd_check_instruction_mark()

View file

@ -596,6 +596,17 @@ csize_t CodeBuffer::total_offset_of(const CodeSection* cs) const {
return -1; return -1;
} }
int CodeBuffer::total_skipped_instructions_size() const {
int total_skipped_size = 0;
for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) {
const CodeSection* cur_cs = code_section(n);
if (!cur_cs->is_empty()) {
total_skipped_size += cur_cs->_skipped_instructions_size;
}
}
return total_skipped_size;
}
csize_t CodeBuffer::total_relocation_size() const { csize_t CodeBuffer::total_relocation_size() const {
csize_t total = copy_relocations_to(NULL); // dry run only csize_t total = copy_relocations_to(NULL); // dry run only
return (csize_t) align_up(total, HeapWordSize); return (csize_t) align_up(total, HeapWordSize);

View file

@ -98,6 +98,7 @@ class CodeSection {
address _locs_point; // last relocated position (grows upward) address _locs_point; // last relocated position (grows upward)
bool _locs_own; // did I allocate the locs myself? bool _locs_own; // did I allocate the locs myself?
bool _scratch_emit; // Buffer is used for scratch emit, don't relocate. bool _scratch_emit; // Buffer is used for scratch emit, don't relocate.
int _skipped_instructions_size;
char _index; // my section number (SECT_INST, etc.) char _index; // my section number (SECT_INST, etc.)
CodeBuffer* _outer; // enclosing CodeBuffer CodeBuffer* _outer; // enclosing CodeBuffer
@ -114,6 +115,7 @@ class CodeSection {
_locs_point = NULL; _locs_point = NULL;
_locs_own = false; _locs_own = false;
_scratch_emit = false; _scratch_emit = false;
_skipped_instructions_size = 0;
debug_only(_index = (char)-1); debug_only(_index = (char)-1);
debug_only(_outer = (CodeBuffer*)badAddress); debug_only(_outer = (CodeBuffer*)badAddress);
} }
@ -144,6 +146,7 @@ class CodeSection {
_end = cs->_end; _end = cs->_end;
_limit = cs->_limit; _limit = cs->_limit;
_locs_point = cs->_locs_point; _locs_point = cs->_locs_point;
_skipped_instructions_size = cs->_skipped_instructions_size;
} }
public: public:
@ -204,6 +207,10 @@ class CodeSection {
_locs_point = pc; _locs_point = pc;
} }
void register_skipped(int size) {
_skipped_instructions_size += size;
}
// Code emission // Code emission
void emit_int8(uint8_t x1) { void emit_int8(uint8_t x1) {
address curr = end(); address curr = end();
@ -638,6 +645,8 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) {
// allocated size of all relocation data, including index, rounded up // allocated size of all relocation data, including index, rounded up
csize_t total_relocation_size() const; csize_t total_relocation_size() const;
int total_skipped_instructions_size() const;
csize_t copy_relocations_to(address buf, csize_t buf_limit, bool only_inst) const; csize_t copy_relocations_to(address buf, csize_t buf_limit, bool only_inst) const;
// allocated size of any and all recorded oops // allocated size of any and all recorded oops

View file

@ -151,7 +151,7 @@ ciMethod::ciMethod(const methodHandle& h_m, ciInstanceKlass* holder) :
} }
if (_interpreter_invocation_count == 0) if (_interpreter_invocation_count == 0)
_interpreter_invocation_count = 1; _interpreter_invocation_count = 1;
_instructions_size = -1; _inline_instructions_size = -1;
if (ReplayCompiles) { if (ReplayCompiles) {
ciReplay::initialize(this); ciReplay::initialize(this);
} }
@ -172,7 +172,7 @@ ciMethod::ciMethod(ciInstanceKlass* holder,
_method_data( NULL), _method_data( NULL),
_method_blocks( NULL), _method_blocks( NULL),
_intrinsic_id( vmIntrinsics::_none), _intrinsic_id( vmIntrinsics::_none),
_instructions_size(-1), _inline_instructions_size(-1),
_can_be_statically_bound(false), _can_be_statically_bound(false),
_can_omit_stack_trace(true), _can_omit_stack_trace(true),
_liveness( NULL) _liveness( NULL)
@ -1087,7 +1087,7 @@ bool ciMethod::can_be_compiled() {
// ------------------------------------------------------------------ // ------------------------------------------------------------------
// ciMethod::has_compiled_code // ciMethod::has_compiled_code
bool ciMethod::has_compiled_code() { bool ciMethod::has_compiled_code() {
return instructions_size() > 0; return inline_instructions_size() > 0;
} }
int ciMethod::highest_osr_comp_level() { int ciMethod::highest_osr_comp_level() {
@ -1110,25 +1110,28 @@ int ciMethod::code_size_for_inlining() {
} }
// ------------------------------------------------------------------ // ------------------------------------------------------------------
// ciMethod::instructions_size // ciMethod::inline_instructions_size
// //
// This is a rough metric for "fat" methods, compared before inlining // This is a rough metric for "fat" methods, compared before inlining
// with InlineSmallCode. The CodeBlob::code_size accessor includes // with InlineSmallCode. The CodeBlob::code_size accessor includes
// junk like exception handler, stubs, and constant table, which are // junk like exception handler, stubs, and constant table, which are
// not highly relevant to an inlined method. So we use the more // not highly relevant to an inlined method. So we use the more
// specific accessor nmethod::insts_size. // specific accessor nmethod::insts_size.
int ciMethod::instructions_size() { // Also some instructions inside the code are excluded from inline
if (_instructions_size == -1) { // heuristic (e.g. post call nop instructions; see InlineSkippedInstructionsCounter)
int ciMethod::inline_instructions_size() {
if (_inline_instructions_size == -1) {
GUARDED_VM_ENTRY( GUARDED_VM_ENTRY(
CompiledMethod* code = get_Method()->code(); CompiledMethod* code = get_Method()->code();
if (code != NULL && (code->comp_level() == CompLevel_full_optimization)) { if (code != NULL && (code->comp_level() == CompLevel_full_optimization)) {
_instructions_size = code->insts_end() - code->verified_entry_point(); int isize = code->insts_end() - code->verified_entry_point() - code->skipped_instructions_size();
} else { _inline_instructions_size = isize > 0 ? isize : 0;
_instructions_size = 0; } else {
} _inline_instructions_size = 0;
); }
);
} }
return _instructions_size; return _inline_instructions_size;
} }
// ------------------------------------------------------------------ // ------------------------------------------------------------------
@ -1315,7 +1318,7 @@ void ciMethod::dump_replay_data(outputStream* st) {
mcs == NULL ? 0 : mcs->backedge_counter()->raw_counter(), mcs == NULL ? 0 : mcs->backedge_counter()->raw_counter(),
interpreter_invocation_count(), interpreter_invocation_count(),
interpreter_throwout_count(), interpreter_throwout_count(),
_instructions_size); _inline_instructions_size);
} }
// ------------------------------------------------------------------ // ------------------------------------------------------------------

View file

@ -82,7 +82,7 @@ class ciMethod : public ciMetadata {
int _handler_count; int _handler_count;
int _interpreter_invocation_count; int _interpreter_invocation_count;
int _interpreter_throwout_count; int _interpreter_throwout_count;
int _instructions_size; int _inline_instructions_size;
int _size_of_parameters; int _size_of_parameters;
bool _uses_monitors; bool _uses_monitors;
@ -315,7 +315,8 @@ class ciMethod : public ciMetadata {
bool check_call(int refinfo_index, bool is_static) const; bool check_call(int refinfo_index, bool is_static) const;
bool ensure_method_data(); // make sure it exists in the VM also bool ensure_method_data(); // make sure it exists in the VM also
MethodCounters* ensure_method_counters(); MethodCounters* ensure_method_counters();
int instructions_size();
int inline_instructions_size();
int scale_count(int count, float prof_factor = 1.); // make MDO count commensurate with IIC int scale_count(int count, float prof_factor = 1.); // make MDO count commensurate with IIC
// Stack walking support // Stack walking support

View file

@ -1529,7 +1529,7 @@ void ciReplay::initialize(ciMethod* m) {
} else { } else {
EXCEPTION_CONTEXT; EXCEPTION_CONTEXT;
// m->_instructions_size = rec->_instructions_size; // m->_instructions_size = rec->_instructions_size;
m->_instructions_size = -1; m->_inline_instructions_size = -1;
m->_interpreter_invocation_count = rec->_interpreter_invocation_count; m->_interpreter_invocation_count = rec->_interpreter_invocation_count;
m->_interpreter_throwout_count = rec->_interpreter_throwout_count; m->_interpreter_throwout_count = rec->_interpreter_throwout_count;
MethodCounters* mcs = method->get_method_counters(CHECK_AND_CLEAR); MethodCounters* mcs = method->get_method_counters(CHECK_AND_CLEAR);

View file

@ -281,6 +281,8 @@ public:
bool consts_contains(address addr) const { return consts_begin() <= addr && addr < consts_end(); } bool consts_contains(address addr) const { return consts_begin() <= addr && addr < consts_end(); }
int consts_size() const { return consts_end() - consts_begin(); } int consts_size() const { return consts_end() - consts_begin(); }
virtual int skipped_instructions_size() const = 0;
virtual address stub_begin() const = 0; virtual address stub_begin() const = 0;
virtual address stub_end() const = 0; virtual address stub_end() const = 0;
bool stub_contains(address addr) const { return stub_begin() <= addr && addr < stub_end(); } bool stub_contains(address addr) const { return stub_begin() <= addr && addr < stub_end(); }

View file

@ -677,6 +677,7 @@ nmethod::nmethod(
_dependencies_offset = _scopes_pcs_offset; _dependencies_offset = _scopes_pcs_offset;
_handler_table_offset = _dependencies_offset; _handler_table_offset = _dependencies_offset;
_nul_chk_table_offset = _handler_table_offset; _nul_chk_table_offset = _handler_table_offset;
_skipped_instructions_size = code_buffer->total_skipped_instructions_size();
#if INCLUDE_JVMCI #if INCLUDE_JVMCI
_speculations_offset = _nul_chk_table_offset; _speculations_offset = _nul_chk_table_offset;
_jvmci_data_offset = _speculations_offset; _jvmci_data_offset = _speculations_offset;
@ -813,6 +814,7 @@ nmethod::nmethod(
_consts_offset = content_offset() + code_buffer->total_offset_of(code_buffer->consts()); _consts_offset = content_offset() + code_buffer->total_offset_of(code_buffer->consts());
_stub_offset = content_offset() + code_buffer->total_offset_of(code_buffer->stubs()); _stub_offset = content_offset() + code_buffer->total_offset_of(code_buffer->stubs());
set_ctable_begin(header_begin() + _consts_offset); set_ctable_begin(header_begin() + _consts_offset);
_skipped_instructions_size = code_buffer->total_skipped_instructions_size();
#if INCLUDE_JVMCI #if INCLUDE_JVMCI
if (compiler->is_jvmci()) { if (compiler->is_jvmci()) {

View file

@ -260,6 +260,8 @@ class nmethod : public CompiledMethod {
// Protected by CompiledMethod_lock // Protected by CompiledMethod_lock
volatile signed char _state; // {not_installed, in_use, not_used, not_entrant} volatile signed char _state; // {not_installed, in_use, not_used, not_entrant}
int _skipped_instructions_size;
// For native wrappers // For native wrappers
nmethod(Method* method, nmethod(Method* method,
CompilerType type, CompilerType type,
@ -393,6 +395,9 @@ class nmethod : public CompiledMethod {
address handler_table_begin () const { return header_begin() + _handler_table_offset ; } address handler_table_begin () const { return header_begin() + _handler_table_offset ; }
address handler_table_end () const { return header_begin() + _nul_chk_table_offset ; } address handler_table_end () const { return header_begin() + _nul_chk_table_offset ; }
address nul_chk_table_begin () const { return header_begin() + _nul_chk_table_offset ; } address nul_chk_table_begin () const { return header_begin() + _nul_chk_table_offset ; }
int skipped_instructions_size () const { return _skipped_instructions_size ; }
#if INCLUDE_JVMCI #if INCLUDE_JVMCI
address nul_chk_table_end () const { return header_begin() + _speculations_offset ; } address nul_chk_table_end () const { return header_begin() + _speculations_offset ; }
address speculations_begin () const { return header_begin() + _speculations_offset ; } address speculations_begin () const { return header_begin() + _speculations_offset ; }

View file

@ -180,7 +180,7 @@ bool InlineTree::should_inline(ciMethod* callee_method, ciMethod* caller_method,
} else { } else {
// Not hot. Check for medium-sized pre-existing nmethod at cold sites. // Not hot. Check for medium-sized pre-existing nmethod at cold sites.
if (callee_method->has_compiled_code() && if (callee_method->has_compiled_code() &&
callee_method->instructions_size() > inline_small_code_size) { callee_method->inline_instructions_size() > inline_small_code_size) {
set_msg("already compiled into a medium method"); set_msg("already compiled into a medium method");
return false; return false;
} }
@ -278,7 +278,7 @@ bool InlineTree::should_not_inline(ciMethod* callee_method, ciMethod* caller_met
} }
if (callee_method->has_compiled_code() && if (callee_method->has_compiled_code() &&
callee_method->instructions_size() > InlineSmallCode) { callee_method->inline_instructions_size() > InlineSmallCode) {
set_msg("already compiled into a big method"); set_msg("already compiled into a big method");
return true; return true;
} }

View file

@ -814,7 +814,7 @@
\ \
nonstatic_field(ciMethod, _interpreter_invocation_count, int) \ nonstatic_field(ciMethod, _interpreter_invocation_count, int) \
nonstatic_field(ciMethod, _interpreter_throwout_count, int) \ nonstatic_field(ciMethod, _interpreter_throwout_count, int) \
nonstatic_field(ciMethod, _instructions_size, int) \ nonstatic_field(ciMethod, _inline_instructions_size, int) \
\ \
nonstatic_field(ciMethodData, _data_size, int) \ nonstatic_field(ciMethodData, _data_size, int) \
nonstatic_field(ciMethodData, _state, u_char) \ nonstatic_field(ciMethodData, _state, u_char) \