8288040: x86: Loom: Improve cont/monitor-count helper methods

Reviewed-by: coleenp, rpressler
This commit is contained in:
Aleksey Shipilev 2022-06-09 08:16:22 +00:00
parent 5a89cb01bc
commit e5b56bafa9
8 changed files with 121 additions and 79 deletions

View file

@ -460,8 +460,7 @@ int LIR_Assembler::emit_unwind_handler() {
__ unlock_object(rdi, rsi, rax, *stub->entry()); __ unlock_object(rdi, rsi, rax, *stub->entry());
} }
__ bind(*stub->continuation()); __ bind(*stub->continuation());
NOT_LP64(__ get_thread(thread);) __ dec_held_monitor_count();
__ dec_held_monitor_count(thread);
} }
if (compilation()->env()->dtrace_method_probes()) { if (compilation()->env()->dtrace_method_probes()) {
@ -3516,32 +3515,12 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) {
// will be skipped. Solution is // will be skipped. Solution is
// 1. Increase only in fastpath // 1. Increase only in fastpath
// 2. Runtime1::monitorenter increase count after locking // 2. Runtime1::monitorenter increase count after locking
#ifndef _LP64 __ inc_held_monitor_count();
Register thread = rsi;
__ push(thread);
__ get_thread(thread);
#else
Register thread = r15_thread;
#endif
__ inc_held_monitor_count(thread);
#ifndef _LP64
__ pop(thread);
#endif
} }
__ bind(*op->stub()->continuation()); __ bind(*op->stub()->continuation());
if (op->code() == lir_unlock) { if (op->code() == lir_unlock) {
// unlock in slowpath is JRT_Leaf stub, no deoptimization can happen // unlock in slowpath is JRT_Leaf stub, no deoptimization can happen
#ifndef _LP64 __ dec_held_monitor_count();
Register thread = rsi;
__ push(thread);
__ get_thread(thread);
#else
Register thread = r15_thread;
#endif
__ dec_held_monitor_count(thread);
#ifndef _LP64
__ pop(thread);
#endif
} }
} }

View file

@ -1064,8 +1064,7 @@ void InterpreterMacroAssembler::remove_activation(
bind(unlock); bind(unlock);
unlock_object(robj); unlock_object(robj);
NOT_LP64(get_thread(rthread);) dec_held_monitor_count();
dec_held_monitor_count(rthread);
pop(state); pop(state);
@ -1111,8 +1110,7 @@ void InterpreterMacroAssembler::remove_activation(
push(state); push(state);
mov(robj, rmon); // nop if robj and rmon are the same mov(robj, rmon); // nop if robj and rmon are the same
unlock_object(robj); unlock_object(robj);
NOT_LP64(get_thread(rthread);) dec_held_monitor_count();
dec_held_monitor_count(rthread);
pop(state); pop(state);
if (install_monitor_exception) { if (install_monitor_exception) {
@ -1173,7 +1171,7 @@ void InterpreterMacroAssembler::remove_activation(
leave(); // remove frame anchor leave(); // remove frame anchor
pop(ret_addr); // get return address pop(ret_addr); // get return address
mov(rsp, rbx); // set sp to sender sp mov(rsp, rbx); // set sp to sender sp
pop_cont_fastpath(rthread); pop_cont_fastpath();
} }
void InterpreterMacroAssembler::get_method_counters(Register method, void InterpreterMacroAssembler::get_method_counters(Register method,

View file

@ -2833,36 +2833,122 @@ void MacroAssembler::push_IU_state() {
pusha(); pusha();
} }
void MacroAssembler::push_cont_fastpath(Register java_thread) { void MacroAssembler::push_cont_fastpath() {
if (!Continuations::enabled()) return; if (!Continuations::enabled()) return;
#ifndef _LP64
Register rthread = rax;
Register rrealsp = rbx;
push(rthread);
push(rrealsp);
get_thread(rthread);
// The code below wants the original RSP.
// Move it back after the pushes above.
movptr(rrealsp, rsp);
addptr(rrealsp, 2*wordSize);
#else
Register rthread = r15_thread;
Register rrealsp = rsp;
#endif
Label done; Label done;
cmpptr(rsp, Address(java_thread, JavaThread::cont_fastpath_offset())); cmpptr(rrealsp, Address(rthread, JavaThread::cont_fastpath_offset()));
jccb(Assembler::belowEqual, done); jccb(Assembler::belowEqual, done);
movptr(Address(java_thread, JavaThread::cont_fastpath_offset()), rsp); movptr(Address(rthread, JavaThread::cont_fastpath_offset()), rrealsp);
bind(done); bind(done);
#ifndef _LP64
pop(rrealsp);
pop(rthread);
#endif
} }
void MacroAssembler::pop_cont_fastpath(Register java_thread) { void MacroAssembler::pop_cont_fastpath() {
if (!Continuations::enabled()) return; if (!Continuations::enabled()) return;
#ifndef _LP64
Register rthread = rax;
Register rrealsp = rbx;
push(rthread);
push(rrealsp);
get_thread(rthread);
// The code below wants the original RSP.
// Move it back after the pushes above.
movptr(rrealsp, rsp);
addptr(rrealsp, 2*wordSize);
#else
Register rthread = r15_thread;
Register rrealsp = rsp;
#endif
Label done; Label done;
cmpptr(rsp, Address(java_thread, JavaThread::cont_fastpath_offset())); cmpptr(rrealsp, Address(rthread, JavaThread::cont_fastpath_offset()));
jccb(Assembler::below, done); jccb(Assembler::below, done);
movptr(Address(java_thread, JavaThread::cont_fastpath_offset()), 0); movptr(Address(rthread, JavaThread::cont_fastpath_offset()), 0);
bind(done); bind(done);
#ifndef _LP64
pop(rrealsp);
pop(rthread);
#endif
} }
void MacroAssembler::inc_held_monitor_count(Register java_thread) { void MacroAssembler::inc_held_monitor_count() {
if (!Continuations::enabled()) return; if (!Continuations::enabled()) return;
incrementl(Address(java_thread, JavaThread::held_monitor_count_offset()));
#ifndef _LP64
Register thread = rax;
push(thread);
get_thread(thread);
#else
Register thread = r15_thread;
#endif
incrementl(Address(thread, JavaThread::held_monitor_count_offset()));
#ifndef _LP64
pop(thread);
#endif
} }
void MacroAssembler::dec_held_monitor_count(Register java_thread) { void MacroAssembler::dec_held_monitor_count() {
if (!Continuations::enabled()) return; if (!Continuations::enabled()) return;
decrementl(Address(java_thread, JavaThread::held_monitor_count_offset()));
#ifndef _LP64
Register thread = rax;
push(thread);
get_thread(thread);
#else
Register thread = r15_thread;
#endif
decrementl(Address(thread, JavaThread::held_monitor_count_offset()));
#ifndef _LP64
pop(thread);
#endif
} }
void MacroAssembler::reset_held_monitor_count(Register java_thread) { void MacroAssembler::reset_held_monitor_count() {
movl(Address(java_thread, JavaThread::held_monitor_count_offset()), (int32_t)0); if (!Continuations::enabled()) return;
#ifndef _LP64
Register thread = rax;
push(thread);
get_thread(thread);
#else
Register thread = r15_thread;
#endif
movl(Address(thread, JavaThread::held_monitor_count_offset()), (int32_t)0);
#ifndef _LP64
pop(thread);
#endif
} }
#ifdef ASSERT #ifdef ASSERT

View file

@ -524,11 +524,11 @@ class MacroAssembler: public Assembler {
void push_CPU_state(); void push_CPU_state();
void pop_CPU_state(); void pop_CPU_state();
void push_cont_fastpath(Register java_thread); void push_cont_fastpath();
void pop_cont_fastpath(Register java_thread); void pop_cont_fastpath();
void inc_held_monitor_count(Register java_thread); void inc_held_monitor_count();
void dec_held_monitor_count(Register java_thread); void dec_held_monitor_count();
void reset_held_monitor_count(Register java_thread); void reset_held_monitor_count();
DEBUG_ONLY(void stop_if_in_cont(Register cont_reg, const char* name);) DEBUG_ONLY(void stop_if_in_cont(Register cont_reg, const char* name);)
// Round up to a power of two // Round up to a power of two

View file

@ -941,7 +941,7 @@ void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
} }
} }
__ push_cont_fastpath(r15_thread); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
// 6243940 We might end up in handle_wrong_method if // 6243940 We might end up in handle_wrong_method if
// the callee is deoptimized as we race thru here. If that // the callee is deoptimized as we race thru here. If that

View file

@ -393,7 +393,7 @@ class StubGenerator: public StubCodeGenerator {
} }
#endif #endif
__ pop_cont_fastpath(r15_thread); __ pop_cont_fastpath();
// restore regs belonging to calling function // restore regs belonging to calling function
#ifdef _WIN64 #ifdef _WIN64
@ -8338,7 +8338,7 @@ void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Regist
__ movl(Address(rsp, ContinuationEntry::parent_held_monitor_count_offset()), rax); __ movl(Address(rsp, ContinuationEntry::parent_held_monitor_count_offset()), rax);
__ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), 0); __ movptr(Address(r15_thread, JavaThread::cont_fastpath_offset()), 0);
__ reset_held_monitor_count(r15_thread); __ reset_held_monitor_count();
} }
//---------------------------- continuation_enter_cleanup --------------------------- //---------------------------- continuation_enter_cleanup ---------------------------

View file

@ -367,14 +367,10 @@ address TemplateInterpreterGenerator::generate_safept_entry_for(
address runtime_entry) { address runtime_entry) {
address entry = __ pc(); address entry = __ pc();
const Register rthread = NOT_LP64(rcx) LP64_ONLY(r15_thread);
__ push(state); __ push(state);
NOT_LP64(__ get_thread(rthread);) __ push_cont_fastpath();
__ push_cont_fastpath(rthread);
__ call_VM(noreg, runtime_entry); __ call_VM(noreg, runtime_entry);
NOT_LP64(__ get_thread(rthread);) __ pop_cont_fastpath();
__ pop_cont_fastpath(rthread);
__ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
return entry; return entry;
@ -609,9 +605,7 @@ void TemplateInterpreterGenerator::lock_method() {
__ movptr(lockreg, rsp); // object address __ movptr(lockreg, rsp); // object address
__ lock_object(lockreg); __ lock_object(lockreg);
Register rthread = NOT_LP64(rax) LP64_ONLY(r15_thread); __ inc_held_monitor_count();
NOT_LP64(__ get_thread(rthread);)
__ inc_held_monitor_count(rthread);
} }
// Generate a fixed interpreter frame. This is identical setup for // Generate a fixed interpreter frame. This is identical setup for
@ -666,24 +660,15 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
address TemplateInterpreterGenerator::generate_Continuation_doYield_entry(void) { address TemplateInterpreterGenerator::generate_Continuation_doYield_entry(void) {
if (!Continuations::enabled()) return nullptr; if (!Continuations::enabled()) return nullptr;
#ifdef _LP64
address entry = __ pc(); address entry = __ pc();
assert(StubRoutines::cont_doYield() != NULL, "stub not yet generated"); assert(StubRoutines::cont_doYield() != NULL, "stub not yet generated");
// __ movl(c_rarg1, Address(rsp, wordSize)); // scopes __ push_cont_fastpath();
const Register thread1 = NOT_LP64(rdi) LP64_ONLY(r15_thread);
NOT_LP64(__ get_thread(thread1));
__ push_cont_fastpath(thread1);
__ jump(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::cont_doYield()))); __ jump(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::cont_doYield())));
// return value is in rax // return value is in rax
return entry; return entry;
#else
// Not implemented. Allow startup of legacy Java code that does not touch
// Continuation.doYield yet. Throw AbstractMethodError on access.
return generate_abstract_entry();
#endif
} }
// Method entry for java.lang.ref.Reference.get. // Method entry for java.lang.ref.Reference.get.
@ -1279,8 +1264,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ bind(unlock); __ bind(unlock);
__ unlock_object(regmon); __ unlock_object(regmon);
NOT_LP64(__ get_thread(thread);) __ dec_held_monitor_count();
__ dec_held_monitor_count(thread);
} }
__ bind(L); __ bind(L);
} }

View file

@ -2592,11 +2592,10 @@ void TemplateTable::_return(TosState state) {
#endif #endif
__ jcc(Assembler::zero, no_safepoint); __ jcc(Assembler::zero, no_safepoint);
__ push(state); __ push(state);
__ push_cont_fastpath(NOT_LP64(thread) LP64_ONLY(r15_thread)); __ push_cont_fastpath();
__ call_VM(noreg, CAST_FROM_FN_PTR(address, __ call_VM(noreg, CAST_FROM_FN_PTR(address,
InterpreterRuntime::at_safepoint)); InterpreterRuntime::at_safepoint));
NOT_LP64(__ get_thread(thread);) __ pop_cont_fastpath();
__ pop_cont_fastpath(NOT_LP64(thread) LP64_ONLY(r15_thread));
__ pop(state); __ pop(state);
__ bind(no_safepoint); __ bind(no_safepoint);
} }
@ -4365,9 +4364,7 @@ void TemplateTable::monitorenter() {
__ lock_object(rmon); __ lock_object(rmon);
// The object is stored so counter should be increased even if stackoverflow is generated // The object is stored so counter should be increased even if stackoverflow is generated
Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rbx); __ inc_held_monitor_count();
NOT_LP64(__ get_thread(rthread);)
__ inc_held_monitor_count(rthread);
// check to make sure this monitor doesn't cause stack overflow after locking // check to make sure this monitor doesn't cause stack overflow after locking
__ save_bcp(); // in case of exception __ save_bcp(); // in case of exception
@ -4428,9 +4425,7 @@ void TemplateTable::monitorexit() {
__ push_ptr(rax); // make sure object is on stack (contract with oopMaps) __ push_ptr(rax); // make sure object is on stack (contract with oopMaps)
__ unlock_object(rtop); __ unlock_object(rtop);
Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rax); __ dec_held_monitor_count();
NOT_LP64(__ get_thread(rthread);)
__ dec_held_monitor_count(rthread);
__ pop_ptr(rax); // discard object __ pop_ptr(rax); // discard object
} }