8300002: Performance regression caused by non-inlined hot methods due to post call noop instructions

Reviewed-by: kvn, iveresov, eosterlund
This commit is contained in:
Sergey Kuksenko 2023-01-18 00:16:34 +00:00
parent 7071397ed9
commit 89a032dc05
14 changed files with 73 additions and 21 deletions

View file

@ -1101,6 +1101,7 @@ void MacroAssembler::post_call_nop() {
}
InstructionMark im(this);
relocate(post_call_nop_Relocation::spec());
InlineSkippedInstructionsCounter skipCounter(this);
nop();
movk(zr, 0);
movk(zr, 0);

View file

@ -1185,6 +1185,7 @@ void MacroAssembler::post_call_nop() {
if (!Continuations::enabled()) {
return;
}
InlineSkippedInstructionsCounter skipCounter(this);
nop();
}

View file

@ -2035,6 +2035,7 @@ void MacroAssembler::post_call_nop() {
}
InstructionMark im(this);
relocate(post_call_nop_Relocation::spec());
InlineSkippedInstructionsCounter skipCounter(this);
emit_int8((int8_t)0x0f);
emit_int8((int8_t)0x1f);
emit_int8((int8_t)0x84);

View file

@ -241,6 +241,19 @@ class AbstractAssembler : public ResourceObj {
}
};
friend class InstructionMark;
// count size of instructions which are skipped from inline heuristics
class InlineSkippedInstructionsCounter: public StackObj {
private:
AbstractAssembler* _assm;
address _start;
public:
InlineSkippedInstructionsCounter(AbstractAssembler* assm) : _assm(assm), _start(assm->pc()) {
}
~InlineSkippedInstructionsCounter() {
_assm->register_skipped(_assm->pc() - _start);
}
};
#ifdef ASSERT
// Make it return true on platforms which need to verify
// instruction boundaries for some operations.
@ -333,10 +346,13 @@ class AbstractAssembler : public ResourceObj {
OopRecorder* oop_recorder() const { return _oop_recorder; }
void set_oop_recorder(OopRecorder* r) { _oop_recorder = r; }
void register_skipped(int size) { code_section()->register_skipped(size); }
address inst_mark() const { return code_section()->mark(); }
void set_inst_mark() { code_section()->set_mark(); }
void clear_inst_mark() { code_section()->clear_mark(); }
// Constants in code
void relocate(RelocationHolder const& rspec, int format = 0) {
assert(!pd_check_instruction_mark()

View file

@ -596,6 +596,17 @@ csize_t CodeBuffer::total_offset_of(const CodeSection* cs) const {
return -1;
}
int CodeBuffer::total_skipped_instructions_size() const {
int total_skipped_size = 0;
for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) {
const CodeSection* cur_cs = code_section(n);
if (!cur_cs->is_empty()) {
total_skipped_size += cur_cs->_skipped_instructions_size;
}
}
return total_skipped_size;
}
csize_t CodeBuffer::total_relocation_size() const {
csize_t total = copy_relocations_to(NULL); // dry run only
return (csize_t) align_up(total, HeapWordSize);

View file

@ -98,6 +98,7 @@ class CodeSection {
address _locs_point; // last relocated position (grows upward)
bool _locs_own; // did I allocate the locs myself?
bool _scratch_emit; // Buffer is used for scratch emit, don't relocate.
int _skipped_instructions_size;
char _index; // my section number (SECT_INST, etc.)
CodeBuffer* _outer; // enclosing CodeBuffer
@ -114,6 +115,7 @@ class CodeSection {
_locs_point = NULL;
_locs_own = false;
_scratch_emit = false;
_skipped_instructions_size = 0;
debug_only(_index = (char)-1);
debug_only(_outer = (CodeBuffer*)badAddress);
}
@ -144,6 +146,7 @@ class CodeSection {
_end = cs->_end;
_limit = cs->_limit;
_locs_point = cs->_locs_point;
_skipped_instructions_size = cs->_skipped_instructions_size;
}
public:
@ -204,6 +207,10 @@ class CodeSection {
_locs_point = pc;
}
void register_skipped(int size) {
_skipped_instructions_size += size;
}
// Code emission
void emit_int8(uint8_t x1) {
address curr = end();
@ -638,6 +645,8 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) {
// allocated size of all relocation data, including index, rounded up
csize_t total_relocation_size() const;
int total_skipped_instructions_size() const;
csize_t copy_relocations_to(address buf, csize_t buf_limit, bool only_inst) const;
// allocated size of any and all recorded oops

View file

@ -151,7 +151,7 @@ ciMethod::ciMethod(const methodHandle& h_m, ciInstanceKlass* holder) :
}
if (_interpreter_invocation_count == 0)
_interpreter_invocation_count = 1;
_instructions_size = -1;
_inline_instructions_size = -1;
if (ReplayCompiles) {
ciReplay::initialize(this);
}
@ -172,7 +172,7 @@ ciMethod::ciMethod(ciInstanceKlass* holder,
_method_data( NULL),
_method_blocks( NULL),
_intrinsic_id( vmIntrinsics::_none),
_instructions_size(-1),
_inline_instructions_size(-1),
_can_be_statically_bound(false),
_can_omit_stack_trace(true),
_liveness( NULL)
@ -1087,7 +1087,7 @@ bool ciMethod::can_be_compiled() {
// ------------------------------------------------------------------
// ciMethod::has_compiled_code
bool ciMethod::has_compiled_code() {
return instructions_size() > 0;
return inline_instructions_size() > 0;
}
int ciMethod::highest_osr_comp_level() {
@ -1110,25 +1110,28 @@ int ciMethod::code_size_for_inlining() {
}
// ------------------------------------------------------------------
// ciMethod::instructions_size
// ciMethod::inline_instructions_size
//
// This is a rough metric for "fat" methods, compared before inlining
// with InlineSmallCode. The CodeBlob::code_size accessor includes
// junk like exception handler, stubs, and constant table, which are
// not highly relevant to an inlined method. So we use the more
// specific accessor nmethod::insts_size.
int ciMethod::instructions_size() {
if (_instructions_size == -1) {
// Also some instructions inside the code are excluded from inline
// heuristic (e.g. post call nop instructions; see InlineSkippedInstructionsCounter)
int ciMethod::inline_instructions_size() {
if (_inline_instructions_size == -1) {
GUARDED_VM_ENTRY(
CompiledMethod* code = get_Method()->code();
if (code != NULL && (code->comp_level() == CompLevel_full_optimization)) {
_instructions_size = code->insts_end() - code->verified_entry_point();
int isize = code->insts_end() - code->verified_entry_point() - code->skipped_instructions_size();
_inline_instructions_size = isize > 0 ? isize : 0;
} else {
_instructions_size = 0;
_inline_instructions_size = 0;
}
);
}
return _instructions_size;
return _inline_instructions_size;
}
// ------------------------------------------------------------------
@ -1315,7 +1318,7 @@ void ciMethod::dump_replay_data(outputStream* st) {
mcs == NULL ? 0 : mcs->backedge_counter()->raw_counter(),
interpreter_invocation_count(),
interpreter_throwout_count(),
_instructions_size);
_inline_instructions_size);
}
// ------------------------------------------------------------------

View file

@ -82,7 +82,7 @@ class ciMethod : public ciMetadata {
int _handler_count;
int _interpreter_invocation_count;
int _interpreter_throwout_count;
int _instructions_size;
int _inline_instructions_size;
int _size_of_parameters;
bool _uses_monitors;
@ -315,7 +315,8 @@ class ciMethod : public ciMetadata {
bool check_call(int refinfo_index, bool is_static) const;
bool ensure_method_data(); // make sure it exists in the VM also
MethodCounters* ensure_method_counters();
int instructions_size();
int inline_instructions_size();
int scale_count(int count, float prof_factor = 1.); // make MDO count commensurate with IIC
// Stack walking support

View file

@ -1529,7 +1529,7 @@ void ciReplay::initialize(ciMethod* m) {
} else {
EXCEPTION_CONTEXT;
// m->_instructions_size = rec->_instructions_size;
m->_instructions_size = -1;
m->_inline_instructions_size = -1;
m->_interpreter_invocation_count = rec->_interpreter_invocation_count;
m->_interpreter_throwout_count = rec->_interpreter_throwout_count;
MethodCounters* mcs = method->get_method_counters(CHECK_AND_CLEAR);

View file

@ -281,6 +281,8 @@ public:
bool consts_contains(address addr) const { return consts_begin() <= addr && addr < consts_end(); }
int consts_size() const { return consts_end() - consts_begin(); }
virtual int skipped_instructions_size() const = 0;
virtual address stub_begin() const = 0;
virtual address stub_end() const = 0;
bool stub_contains(address addr) const { return stub_begin() <= addr && addr < stub_end(); }

View file

@ -677,6 +677,7 @@ nmethod::nmethod(
_dependencies_offset = _scopes_pcs_offset;
_handler_table_offset = _dependencies_offset;
_nul_chk_table_offset = _handler_table_offset;
_skipped_instructions_size = code_buffer->total_skipped_instructions_size();
#if INCLUDE_JVMCI
_speculations_offset = _nul_chk_table_offset;
_jvmci_data_offset = _speculations_offset;
@ -813,6 +814,7 @@ nmethod::nmethod(
_consts_offset = content_offset() + code_buffer->total_offset_of(code_buffer->consts());
_stub_offset = content_offset() + code_buffer->total_offset_of(code_buffer->stubs());
set_ctable_begin(header_begin() + _consts_offset);
_skipped_instructions_size = code_buffer->total_skipped_instructions_size();
#if INCLUDE_JVMCI
if (compiler->is_jvmci()) {

View file

@ -260,6 +260,8 @@ class nmethod : public CompiledMethod {
// Protected by CompiledMethod_lock
volatile signed char _state; // {not_installed, in_use, not_used, not_entrant}
int _skipped_instructions_size;
// For native wrappers
nmethod(Method* method,
CompilerType type,
@ -393,6 +395,9 @@ class nmethod : public CompiledMethod {
address handler_table_begin () const { return header_begin() + _handler_table_offset ; }
address handler_table_end () const { return header_begin() + _nul_chk_table_offset ; }
address nul_chk_table_begin () const { return header_begin() + _nul_chk_table_offset ; }
int skipped_instructions_size () const { return _skipped_instructions_size ; }
#if INCLUDE_JVMCI
address nul_chk_table_end () const { return header_begin() + _speculations_offset ; }
address speculations_begin () const { return header_begin() + _speculations_offset ; }

View file

@ -180,7 +180,7 @@ bool InlineTree::should_inline(ciMethod* callee_method, ciMethod* caller_method,
} else {
// Not hot. Check for medium-sized pre-existing nmethod at cold sites.
if (callee_method->has_compiled_code() &&
callee_method->instructions_size() > inline_small_code_size) {
callee_method->inline_instructions_size() > inline_small_code_size) {
set_msg("already compiled into a medium method");
return false;
}
@ -278,7 +278,7 @@ bool InlineTree::should_not_inline(ciMethod* callee_method, ciMethod* caller_met
}
if (callee_method->has_compiled_code() &&
callee_method->instructions_size() > InlineSmallCode) {
callee_method->inline_instructions_size() > InlineSmallCode) {
set_msg("already compiled into a big method");
return true;
}

View file

@ -814,7 +814,7 @@
\
nonstatic_field(ciMethod, _interpreter_invocation_count, int) \
nonstatic_field(ciMethod, _interpreter_throwout_count, int) \
nonstatic_field(ciMethod, _instructions_size, int) \
nonstatic_field(ciMethod, _inline_instructions_size, int) \
\
nonstatic_field(ciMethodData, _data_size, int) \
nonstatic_field(ciMethodData, _state, u_char) \