From 89a032dc057d04c996632ad317a0303cf3560852 Mon Sep 17 00:00:00 2001 From: Sergey Kuksenko Date: Wed, 18 Jan 2023 00:16:34 +0000 Subject: [PATCH] 8300002: Performance regression caused by non-inlined hot methods due to post call noop instructions Reviewed-by: kvn, iveresov, eosterlund --- .../cpu/aarch64/macroAssembler_aarch64.cpp | 1 + src/hotspot/cpu/ppc/macroAssembler_ppc.cpp | 1 + src/hotspot/cpu/x86/macroAssembler_x86.cpp | 1 + src/hotspot/share/asm/assembler.hpp | 16 +++++++++ src/hotspot/share/asm/codeBuffer.cpp | 11 +++++++ src/hotspot/share/asm/codeBuffer.hpp | 9 +++++ src/hotspot/share/ci/ciMethod.cpp | 33 ++++++++++--------- src/hotspot/share/ci/ciMethod.hpp | 5 +-- src/hotspot/share/ci/ciReplay.cpp | 2 +- src/hotspot/share/code/compiledMethod.hpp | 2 ++ src/hotspot/share/code/nmethod.cpp | 2 ++ src/hotspot/share/code/nmethod.hpp | 5 +++ src/hotspot/share/opto/bytecodeInfo.cpp | 4 +-- src/hotspot/share/runtime/vmStructs.cpp | 2 +- 14 files changed, 73 insertions(+), 21 deletions(-) diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index b16140dac67..bfda0d54d87 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -1101,6 +1101,7 @@ void MacroAssembler::post_call_nop() { } InstructionMark im(this); relocate(post_call_nop_Relocation::spec()); + InlineSkippedInstructionsCounter skipCounter(this); nop(); movk(zr, 0); movk(zr, 0); diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index 49166a2e1ad..fb6ec91161b 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -1185,6 +1185,7 @@ void MacroAssembler::post_call_nop() { if (!Continuations::enabled()) { return; } + InlineSkippedInstructionsCounter skipCounter(this); nop(); } diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 0a5314d8faa..b3aab73913a 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -2035,6 +2035,7 @@ void MacroAssembler::post_call_nop() { } InstructionMark im(this); relocate(post_call_nop_Relocation::spec()); + InlineSkippedInstructionsCounter skipCounter(this); emit_int8((int8_t)0x0f); emit_int8((int8_t)0x1f); emit_int8((int8_t)0x84); diff --git a/src/hotspot/share/asm/assembler.hpp b/src/hotspot/share/asm/assembler.hpp index a5ec5015ac0..1593d388579 100644 --- a/src/hotspot/share/asm/assembler.hpp +++ b/src/hotspot/share/asm/assembler.hpp @@ -241,6 +241,19 @@ class AbstractAssembler : public ResourceObj { } }; friend class InstructionMark; + + // count size of instructions which are skipped from inline heuristics + class InlineSkippedInstructionsCounter: public StackObj { + private: + AbstractAssembler* _assm; + address _start; + public: + InlineSkippedInstructionsCounter(AbstractAssembler* assm) : _assm(assm), _start(assm->pc()) { + } + ~InlineSkippedInstructionsCounter() { + _assm->register_skipped(_assm->pc() - _start); + } + }; #ifdef ASSERT // Make it return true on platforms which need to verify // instruction boundaries for some operations. @@ -333,10 +346,13 @@ class AbstractAssembler : public ResourceObj { OopRecorder* oop_recorder() const { return _oop_recorder; } void set_oop_recorder(OopRecorder* r) { _oop_recorder = r; } + void register_skipped(int size) { code_section()->register_skipped(size); } + address inst_mark() const { return code_section()->mark(); } void set_inst_mark() { code_section()->set_mark(); } void clear_inst_mark() { code_section()->clear_mark(); } + // Constants in code void relocate(RelocationHolder const& rspec, int format = 0) { assert(!pd_check_instruction_mark() diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp index f9ffd169e38..e87fd7cd887 100644 --- a/src/hotspot/share/asm/codeBuffer.cpp +++ b/src/hotspot/share/asm/codeBuffer.cpp @@ -596,6 +596,17 @@ csize_t CodeBuffer::total_offset_of(const CodeSection* cs) const { return -1; } +int CodeBuffer::total_skipped_instructions_size() const { + int total_skipped_size = 0; + for (int n = (int) SECT_FIRST; n < (int) SECT_LIMIT; n++) { + const CodeSection* cur_cs = code_section(n); + if (!cur_cs->is_empty()) { + total_skipped_size += cur_cs->_skipped_instructions_size; + } + } + return total_skipped_size; +} + csize_t CodeBuffer::total_relocation_size() const { csize_t total = copy_relocations_to(NULL); // dry run only return (csize_t) align_up(total, HeapWordSize); diff --git a/src/hotspot/share/asm/codeBuffer.hpp b/src/hotspot/share/asm/codeBuffer.hpp index db11cfa698e..c03281c7812 100644 --- a/src/hotspot/share/asm/codeBuffer.hpp +++ b/src/hotspot/share/asm/codeBuffer.hpp @@ -98,6 +98,7 @@ class CodeSection { address _locs_point; // last relocated position (grows upward) bool _locs_own; // did I allocate the locs myself? bool _scratch_emit; // Buffer is used for scratch emit, don't relocate. + int _skipped_instructions_size; char _index; // my section number (SECT_INST, etc.) CodeBuffer* _outer; // enclosing CodeBuffer @@ -114,6 +115,7 @@ class CodeSection { _locs_point = NULL; _locs_own = false; _scratch_emit = false; + _skipped_instructions_size = 0; debug_only(_index = (char)-1); debug_only(_outer = (CodeBuffer*)badAddress); } @@ -144,6 +146,7 @@ class CodeSection { _end = cs->_end; _limit = cs->_limit; _locs_point = cs->_locs_point; + _skipped_instructions_size = cs->_skipped_instructions_size; } public: @@ -204,6 +207,10 @@ class CodeSection { _locs_point = pc; } + void register_skipped(int size) { + _skipped_instructions_size += size; + } + // Code emission void emit_int8(uint8_t x1) { address curr = end(); @@ -638,6 +645,8 @@ class CodeBuffer: public StackObj DEBUG_ONLY(COMMA private Scrubber) { // allocated size of all relocation data, including index, rounded up csize_t total_relocation_size() const; + int total_skipped_instructions_size() const; + csize_t copy_relocations_to(address buf, csize_t buf_limit, bool only_inst) const; // allocated size of any and all recorded oops diff --git a/src/hotspot/share/ci/ciMethod.cpp b/src/hotspot/share/ci/ciMethod.cpp index 78b3e0ec793..7fff3f02073 100644 --- a/src/hotspot/share/ci/ciMethod.cpp +++ b/src/hotspot/share/ci/ciMethod.cpp @@ -151,7 +151,7 @@ ciMethod::ciMethod(const methodHandle& h_m, ciInstanceKlass* holder) : } if (_interpreter_invocation_count == 0) _interpreter_invocation_count = 1; - _instructions_size = -1; + _inline_instructions_size = -1; if (ReplayCompiles) { ciReplay::initialize(this); } @@ -172,7 +172,7 @@ ciMethod::ciMethod(ciInstanceKlass* holder, _method_data( NULL), _method_blocks( NULL), _intrinsic_id( vmIntrinsics::_none), - _instructions_size(-1), + _inline_instructions_size(-1), _can_be_statically_bound(false), _can_omit_stack_trace(true), _liveness( NULL) @@ -1087,7 +1087,7 @@ bool ciMethod::can_be_compiled() { // ------------------------------------------------------------------ // ciMethod::has_compiled_code bool ciMethod::has_compiled_code() { - return instructions_size() > 0; + return inline_instructions_size() > 0; } int ciMethod::highest_osr_comp_level() { @@ -1110,25 +1110,28 @@ int ciMethod::code_size_for_inlining() { } // ------------------------------------------------------------------ -// ciMethod::instructions_size +// ciMethod::inline_instructions_size // // This is a rough metric for "fat" methods, compared before inlining // with InlineSmallCode. The CodeBlob::code_size accessor includes // junk like exception handler, stubs, and constant table, which are // not highly relevant to an inlined method. So we use the more // specific accessor nmethod::insts_size. -int ciMethod::instructions_size() { - if (_instructions_size == -1) { +// Also some instructions inside the code are excluded from inline +// heuristic (e.g. post call nop instructions; see InlineSkippedInstructionsCounter) +int ciMethod::inline_instructions_size() { + if (_inline_instructions_size == -1) { GUARDED_VM_ENTRY( - CompiledMethod* code = get_Method()->code(); - if (code != NULL && (code->comp_level() == CompLevel_full_optimization)) { - _instructions_size = code->insts_end() - code->verified_entry_point(); - } else { - _instructions_size = 0; - } - ); + CompiledMethod* code = get_Method()->code(); + if (code != NULL && (code->comp_level() == CompLevel_full_optimization)) { + int isize = code->insts_end() - code->verified_entry_point() - code->skipped_instructions_size(); + _inline_instructions_size = isize > 0 ? isize : 0; + } else { + _inline_instructions_size = 0; + } + ); } - return _instructions_size; + return _inline_instructions_size; } // ------------------------------------------------------------------ @@ -1315,7 +1318,7 @@ void ciMethod::dump_replay_data(outputStream* st) { mcs == NULL ? 0 : mcs->backedge_counter()->raw_counter(), interpreter_invocation_count(), interpreter_throwout_count(), - _instructions_size); + _inline_instructions_size); } // ------------------------------------------------------------------ diff --git a/src/hotspot/share/ci/ciMethod.hpp b/src/hotspot/share/ci/ciMethod.hpp index 4bc4cb1961c..11086a9a6e3 100644 --- a/src/hotspot/share/ci/ciMethod.hpp +++ b/src/hotspot/share/ci/ciMethod.hpp @@ -82,7 +82,7 @@ class ciMethod : public ciMetadata { int _handler_count; int _interpreter_invocation_count; int _interpreter_throwout_count; - int _instructions_size; + int _inline_instructions_size; int _size_of_parameters; bool _uses_monitors; @@ -315,7 +315,8 @@ class ciMethod : public ciMetadata { bool check_call(int refinfo_index, bool is_static) const; bool ensure_method_data(); // make sure it exists in the VM also MethodCounters* ensure_method_counters(); - int instructions_size(); + + int inline_instructions_size(); int scale_count(int count, float prof_factor = 1.); // make MDO count commensurate with IIC // Stack walking support diff --git a/src/hotspot/share/ci/ciReplay.cpp b/src/hotspot/share/ci/ciReplay.cpp index 2d88aa64701..77158cb614f 100644 --- a/src/hotspot/share/ci/ciReplay.cpp +++ b/src/hotspot/share/ci/ciReplay.cpp @@ -1529,7 +1529,7 @@ void ciReplay::initialize(ciMethod* m) { } else { EXCEPTION_CONTEXT; // m->_instructions_size = rec->_instructions_size; - m->_instructions_size = -1; + m->_inline_instructions_size = -1; m->_interpreter_invocation_count = rec->_interpreter_invocation_count; m->_interpreter_throwout_count = rec->_interpreter_throwout_count; MethodCounters* mcs = method->get_method_counters(CHECK_AND_CLEAR); diff --git a/src/hotspot/share/code/compiledMethod.hpp b/src/hotspot/share/code/compiledMethod.hpp index 00e62ed9695..f1a7bd1997d 100644 --- a/src/hotspot/share/code/compiledMethod.hpp +++ b/src/hotspot/share/code/compiledMethod.hpp @@ -281,6 +281,8 @@ public: bool consts_contains(address addr) const { return consts_begin() <= addr && addr < consts_end(); } int consts_size() const { return consts_end() - consts_begin(); } + virtual int skipped_instructions_size() const = 0; + virtual address stub_begin() const = 0; virtual address stub_end() const = 0; bool stub_contains(address addr) const { return stub_begin() <= addr && addr < stub_end(); } diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp index f2304e35c24..64e2f575d68 100644 --- a/src/hotspot/share/code/nmethod.cpp +++ b/src/hotspot/share/code/nmethod.cpp @@ -677,6 +677,7 @@ nmethod::nmethod( _dependencies_offset = _scopes_pcs_offset; _handler_table_offset = _dependencies_offset; _nul_chk_table_offset = _handler_table_offset; + _skipped_instructions_size = code_buffer->total_skipped_instructions_size(); #if INCLUDE_JVMCI _speculations_offset = _nul_chk_table_offset; _jvmci_data_offset = _speculations_offset; @@ -813,6 +814,7 @@ nmethod::nmethod( _consts_offset = content_offset() + code_buffer->total_offset_of(code_buffer->consts()); _stub_offset = content_offset() + code_buffer->total_offset_of(code_buffer->stubs()); set_ctable_begin(header_begin() + _consts_offset); + _skipped_instructions_size = code_buffer->total_skipped_instructions_size(); #if INCLUDE_JVMCI if (compiler->is_jvmci()) { diff --git a/src/hotspot/share/code/nmethod.hpp b/src/hotspot/share/code/nmethod.hpp index d9b792e0ef6..9a425db9183 100644 --- a/src/hotspot/share/code/nmethod.hpp +++ b/src/hotspot/share/code/nmethod.hpp @@ -260,6 +260,8 @@ class nmethod : public CompiledMethod { // Protected by CompiledMethod_lock volatile signed char _state; // {not_installed, in_use, not_used, not_entrant} + int _skipped_instructions_size; + // For native wrappers nmethod(Method* method, CompilerType type, @@ -393,6 +395,9 @@ class nmethod : public CompiledMethod { address handler_table_begin () const { return header_begin() + _handler_table_offset ; } address handler_table_end () const { return header_begin() + _nul_chk_table_offset ; } address nul_chk_table_begin () const { return header_begin() + _nul_chk_table_offset ; } + + int skipped_instructions_size () const { return _skipped_instructions_size ; } + #if INCLUDE_JVMCI address nul_chk_table_end () const { return header_begin() + _speculations_offset ; } address speculations_begin () const { return header_begin() + _speculations_offset ; } diff --git a/src/hotspot/share/opto/bytecodeInfo.cpp b/src/hotspot/share/opto/bytecodeInfo.cpp index 1191913bfb9..13f58b90c44 100644 --- a/src/hotspot/share/opto/bytecodeInfo.cpp +++ b/src/hotspot/share/opto/bytecodeInfo.cpp @@ -180,7 +180,7 @@ bool InlineTree::should_inline(ciMethod* callee_method, ciMethod* caller_method, } else { // Not hot. Check for medium-sized pre-existing nmethod at cold sites. if (callee_method->has_compiled_code() && - callee_method->instructions_size() > inline_small_code_size) { + callee_method->inline_instructions_size() > inline_small_code_size) { set_msg("already compiled into a medium method"); return false; } @@ -278,7 +278,7 @@ bool InlineTree::should_not_inline(ciMethod* callee_method, ciMethod* caller_met } if (callee_method->has_compiled_code() && - callee_method->instructions_size() > InlineSmallCode) { + callee_method->inline_instructions_size() > InlineSmallCode) { set_msg("already compiled into a big method"); return true; } diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp index 7bdcb019c2a..673cdb1b337 100644 --- a/src/hotspot/share/runtime/vmStructs.cpp +++ b/src/hotspot/share/runtime/vmStructs.cpp @@ -814,7 +814,7 @@ \ nonstatic_field(ciMethod, _interpreter_invocation_count, int) \ nonstatic_field(ciMethod, _interpreter_throwout_count, int) \ - nonstatic_field(ciMethod, _instructions_size, int) \ + nonstatic_field(ciMethod, _inline_instructions_size, int) \ \ nonstatic_field(ciMethodData, _data_size, int) \ nonstatic_field(ciMethodData, _state, u_char) \