8308469: [PPC64] Implement alternative fast-locking scheme

Reviewed-by: rrich, lucy
This commit is contained in:
Martin Doerr 2023-06-01 17:24:50 +00:00
parent ec55539534
commit 0ab09630c6
7 changed files with 354 additions and 154 deletions

View file

@ -114,41 +114,46 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox
bne(CCR0, slow_int); bne(CCR0, slow_int);
} }
// ... and mark it unlocked. if (LockingMode == LM_LIGHTWEIGHT) {
ori(Rmark, Rmark, markWord::unlocked_value); fast_lock(Roop, Rmark, Rscratch, slow_int);
} else if (LockingMode == LM_LEGACY) {
// ... and mark it unlocked.
ori(Rmark, Rmark, markWord::unlocked_value);
// Save unlocked object header into the displaced header location on the stack. // Save unlocked object header into the displaced header location on the stack.
std(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox); std(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox);
// Compare object markWord with Rmark and if equal exchange Rscratch with object markWord. // Compare object markWord with Rmark and if equal exchange Rscratch with object markWord.
assert(oopDesc::mark_offset_in_bytes() == 0, "cas must take a zero displacement"); assert(oopDesc::mark_offset_in_bytes() == 0, "cas must take a zero displacement");
cmpxchgd(/*flag=*/CCR0, cmpxchgd(/*flag=*/CCR0,
/*current_value=*/Rscratch, /*current_value=*/Rscratch,
/*compare_value=*/Rmark, /*compare_value=*/Rmark,
/*exchange_value=*/Rbox, /*exchange_value=*/Rbox,
/*where=*/Roop/*+0==mark_offset_in_bytes*/, /*where=*/Roop/*+0==mark_offset_in_bytes*/,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock(), MacroAssembler::cmpxchgx_hint_acquire_lock(),
noreg, noreg,
&cas_failed, &cas_failed,
/*check without membar and ldarx first*/true); /*check without membar and ldarx first*/true);
// If compare/exchange succeeded we found an unlocked object and we now have locked it // If compare/exchange succeeded we found an unlocked object and we now have locked it
// hence we are done. // hence we are done.
}
b(done); b(done);
bind(slow_int); bind(slow_int);
b(slow_case); // far b(slow_case); // far
bind(cas_failed); if (LockingMode == LM_LEGACY) {
// We did not find an unlocked object so see if this is a recursive case. bind(cas_failed);
sub(Rscratch, Rscratch, R1_SP); // We did not find an unlocked object so see if this is a recursive case.
load_const_optimized(R0, (~(os::vm_page_size()-1) | markWord::lock_mask_in_place)); sub(Rscratch, Rscratch, R1_SP);
and_(R0/*==0?*/, Rscratch, R0); load_const_optimized(R0, (~(os::vm_page_size()-1) | markWord::lock_mask_in_place));
std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), Rbox); and_(R0/*==0?*/, Rscratch, R0);
bne(CCR0, slow_int); std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), Rbox);
bne(CCR0, slow_int);
}
bind(done); bind(done);
inc_held_monitor_count(Rmark /*tmp*/); inc_held_monitor_count(Rmark /*tmp*/);
} }
@ -161,33 +166,41 @@ void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rb
Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
assert(mark_addr.disp() == 0, "cas must take a zero displacement"); assert(mark_addr.disp() == 0, "cas must take a zero displacement");
// Test first if it is a fast recursive unlock. if (LockingMode != LM_LIGHTWEIGHT) {
ld(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox); // Test first if it is a fast recursive unlock.
cmpdi(CCR0, Rmark, 0); ld(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox);
beq(CCR0, done); cmpdi(CCR0, Rmark, 0);
beq(CCR0, done);
}
// Load object. // Load object.
ld(Roop, in_bytes(BasicObjectLock::obj_offset()), Rbox); ld(Roop, in_bytes(BasicObjectLock::obj_offset()), Rbox);
verify_oop(Roop, FILE_AND_LINE); verify_oop(Roop, FILE_AND_LINE);
// Check if it is still a light weight lock, this is is true if we see if (LockingMode == LM_LIGHTWEIGHT) {
// the stack address of the basicLock in the markWord of the object. ld(Rmark, oopDesc::mark_offset_in_bytes(), Roop);
cmpxchgd(/*flag=*/CCR0, andi_(R0, Rmark, markWord::monitor_value);
/*current_value=*/R0, bne(CCR0, slow_int);
/*compare_value=*/Rbox, fast_unlock(Roop, Rmark, slow_int);
/*exchange_value=*/Rmark, } else if (LockingMode == LM_LEGACY) {
/*where=*/Roop, // Check if it is still a light weight lock, this is is true if we see
MacroAssembler::MemBarRel, // the stack address of the basicLock in the markWord of the object.
MacroAssembler::cmpxchgx_hint_release_lock(), cmpxchgd(/*flag=*/CCR0,
noreg, /*current_value=*/R0,
&slow_int); /*compare_value=*/Rbox,
/*exchange_value=*/Rmark,
/*where=*/Roop,
MacroAssembler::MemBarRel,
MacroAssembler::cmpxchgx_hint_release_lock(),
noreg,
&slow_int);
}
b(done); b(done);
bind(slow_int); bind(slow_int);
b(slow_case); // far b(slow_case); // far
// Done // Done
bind(done); bind(done);
dec_held_monitor_count(Rmark /*tmp*/); dec_held_monitor_count(Rmark /*tmp*/);
} }

View file

@ -924,7 +924,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
if (LockingMode == LM_MONITOR) { if (LockingMode == LM_MONITOR) {
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor); call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
} else { } else {
// template code: // template code (for LM_LEGACY):
// //
// markWord displaced_header = obj->mark().set_unlocked(); // markWord displaced_header = obj->mark().set_unlocked();
// monitor->lock()->set_displaced_header(displaced_header); // monitor->lock()->set_displaced_header(displaced_header);
@ -938,7 +938,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// InterpreterRuntime::monitorenter(THREAD, monitor); // InterpreterRuntime::monitorenter(THREAD, monitor);
// } // }
const Register displaced_header = R7_ARG5; const Register header = R7_ARG5;
const Register object_mark_addr = R8_ARG6; const Register object_mark_addr = R8_ARG6;
const Register current_header = R9_ARG7; const Register current_header = R9_ARG7;
const Register tmp = R10_ARG8; const Register tmp = R10_ARG8;
@ -946,12 +946,12 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
Label count_locking, done; Label count_locking, done;
Label cas_failed, slow_case; Label cas_failed, slow_case;
assert_different_registers(displaced_header, object_mark_addr, current_header, tmp); assert_different_registers(header, object_mark_addr, current_header, tmp);
// markWord displaced_header = obj->mark().set_unlocked(); // markWord displaced_header = obj->mark().set_unlocked();
// Load markWord from object into displaced_header. // Load markWord from object into header.
ld(displaced_header, oopDesc::mark_offset_in_bytes(), object); ld(header, oopDesc::mark_offset_in_bytes(), object);
if (DiagnoseSyncOnValueBasedClasses != 0) { if (DiagnoseSyncOnValueBasedClasses != 0) {
load_klass(tmp, object); load_klass(tmp, object);
@ -960,58 +960,64 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
bne(CCR0, slow_case); bne(CCR0, slow_case);
} }
// Set displaced_header to be (markWord of object | UNLOCK_VALUE). if (LockingMode == LM_LIGHTWEIGHT) {
ori(displaced_header, displaced_header, markWord::unlocked_value); fast_lock(object, /* mark word */ header, tmp, slow_case);
b(count_locking);
} else if (LockingMode == LM_LEGACY) {
// monitor->lock()->set_displaced_header(displaced_header); // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
const int lock_offset = in_bytes(BasicObjectLock::lock_offset()); ori(header, header, markWord::unlocked_value);
const int mark_offset = lock_offset +
BasicLock::displaced_header_offset_in_bytes();
// Initialize the box (Must happen before we update the object mark!). // monitor->lock()->set_displaced_header(displaced_header);
std(displaced_header, mark_offset, monitor); const int lock_offset = in_bytes(BasicObjectLock::lock_offset());
const int mark_offset = lock_offset +
BasicLock::displaced_header_offset_in_bytes();
// if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) { // Initialize the box (Must happen before we update the object mark!).
std(header, mark_offset, monitor);
// Store stack address of the BasicObjectLock (this is monitor) into object. // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) {
addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
// Must fence, otherwise, preceding store(s) may float below cmpxchg. // Store stack address of the BasicObjectLock (this is monitor) into object.
// CmpxchgX sets CCR0 to cmpX(current, displaced). addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
cmpxchgd(/*flag=*/CCR0,
/*current_value=*/current_header,
/*compare_value=*/displaced_header, /*exchange_value=*/monitor,
/*where=*/object_mark_addr,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock(),
noreg,
&cas_failed,
/*check without membar and ldarx first*/true);
// If the compare-and-exchange succeeded, then we found an unlocked // Must fence, otherwise, preceding store(s) may float below cmpxchg.
// object and we have now locked it. // CmpxchgX sets CCR0 to cmpX(current, displaced).
b(count_locking); cmpxchgd(/*flag=*/CCR0,
bind(cas_failed); /*current_value=*/current_header,
/*compare_value=*/header, /*exchange_value=*/monitor,
/*where=*/object_mark_addr,
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock(),
noreg,
&cas_failed,
/*check without membar and ldarx first*/true);
// } else if (THREAD->is_lock_owned((address)displaced_header)) // If the compare-and-exchange succeeded, then we found an unlocked
// // Simple recursive case. // object and we have now locked it.
// monitor->lock()->set_displaced_header(nullptr); b(count_locking);
bind(cas_failed);
// We did not see an unlocked object so try the fast recursive case. // } else if (THREAD->is_lock_owned((address)displaced_header))
// // Simple recursive case.
// monitor->lock()->set_displaced_header(nullptr);
// Check if owner is self by comparing the value in the markWord of object // We did not see an unlocked object so try the fast recursive case.
// (current_header) with the stack pointer.
sub(current_header, current_header, R1_SP);
assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); // Check if owner is self by comparing the value in the markWord of object
load_const_optimized(tmp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place); // (current_header) with the stack pointer.
sub(current_header, current_header, R1_SP);
and_(R0/*==0?*/, current_header, tmp); assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
// If condition is true we are done and hence we can store 0 in the displaced load_const_optimized(tmp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place);
// header indicating it is a recursive lock.
bne(CCR0, slow_case); and_(R0/*==0?*/, current_header, tmp);
std(R0/*==0!*/, mark_offset, monitor); // If condition is true we are done and hence we can store 0 in the displaced
b(count_locking); // header indicating it is a recursive lock.
bne(CCR0, slow_case);
std(R0/*==0!*/, mark_offset, monitor);
b(count_locking);
}
// } else { // } else {
// // Slow path. // // Slow path.
@ -1020,7 +1026,11 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// None of the above fast optimizations worked so we have to get into the // None of the above fast optimizations worked so we have to get into the
// slow case of monitor enter. // slow case of monitor enter.
bind(slow_case); bind(slow_case);
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor); if (LockingMode == LM_LIGHTWEIGHT) {
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj), object);
} else {
call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
}
b(done); b(done);
// } // }
align(32, 12); align(32, 12);
@ -1042,7 +1052,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), monitor); call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), monitor);
} else { } else {
// template code: // template code (for LM_LEGACY):
// //
// if ((displaced_header = monitor->displaced_header()) == nullptr) { // if ((displaced_header = monitor->displaced_header()) == nullptr) {
// // Recursive unlock. Mark the monitor unlocked by setting the object field to null. // // Recursive unlock. Mark the monitor unlocked by setting the object field to null.
@ -1056,22 +1066,24 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
// } // }
const Register object = R7_ARG5; const Register object = R7_ARG5;
const Register displaced_header = R8_ARG6; const Register header = R8_ARG6;
const Register object_mark_addr = R9_ARG7; const Register object_mark_addr = R9_ARG7;
const Register current_header = R10_ARG8; const Register current_header = R10_ARG8;
Label free_slot; Label free_slot;
Label slow_case; Label slow_case;
assert_different_registers(object, displaced_header, object_mark_addr, current_header); assert_different_registers(object, header, object_mark_addr, current_header);
// Test first if we are in the fast recursive case. if (LockingMode != LM_LIGHTWEIGHT) {
ld(displaced_header, in_bytes(BasicObjectLock::lock_offset()) + // Test first if we are in the fast recursive case.
BasicLock::displaced_header_offset_in_bytes(), monitor); ld(header, in_bytes(BasicObjectLock::lock_offset()) +
BasicLock::displaced_header_offset_in_bytes(), monitor);
// If the displaced header is zero, we have a recursive unlock. // If the displaced header is zero, we have a recursive unlock.
cmpdi(CCR0, displaced_header, 0); cmpdi(CCR0, header, 0);
beq(CCR0, free_slot); // recursive unlock beq(CCR0, free_slot); // recursive unlock
}
// } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) { // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) {
// // We swapped the unlocked mark in displaced_header into the object's mark word. // // We swapped the unlocked mark in displaced_header into the object's mark word.
@ -1081,20 +1093,41 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) {
// The object address from the monitor is in object. // The object address from the monitor is in object.
ld(object, in_bytes(BasicObjectLock::obj_offset()), monitor); ld(object, in_bytes(BasicObjectLock::obj_offset()), monitor);
addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
// We have the displaced header in displaced_header. If the lock is still if (LockingMode == LM_LIGHTWEIGHT) {
// lightweight, it will contain the monitor address and we'll store the // Check for non-symmetric locking. This is allowed by the spec and the interpreter
// displaced header back into the object's mark word. // must handle it.
// CmpxchgX sets CCR0 to cmpX(current, monitor). Register tmp = current_header;
cmpxchgd(/*flag=*/CCR0, // First check for lock-stack underflow.
/*current_value=*/current_header, lwz(tmp, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
/*compare_value=*/monitor, /*exchange_value=*/displaced_header, cmplwi(CCR0, tmp, (unsigned)LockStack::start_offset());
/*where=*/object_mark_addr, ble(CCR0, slow_case);
MacroAssembler::MemBarRel, // Then check if the top of the lock-stack matches the unlocked object.
MacroAssembler::cmpxchgx_hint_release_lock(), addi(tmp, tmp, -oopSize);
noreg, ldx(tmp, tmp, R16_thread);
&slow_case); cmpd(CCR0, tmp, object);
bne(CCR0, slow_case);
ld(header, oopDesc::mark_offset_in_bytes(), object);
andi_(R0, header, markWord::monitor_value);
bne(CCR0, slow_case);
fast_unlock(object, header, slow_case);
} else {
addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
// We have the displaced header in displaced_header. If the lock is still
// lightweight, it will contain the monitor address and we'll store the
// displaced header back into the object's mark word.
// CmpxchgX sets CCR0 to cmpX(current, monitor).
cmpxchgd(/*flag=*/CCR0,
/*current_value=*/current_header,
/*compare_value=*/monitor, /*exchange_value=*/header,
/*where=*/object_mark_addr,
MacroAssembler::MemBarRel,
MacroAssembler::cmpxchgx_hint_release_lock(),
noreg,
&slow_case);
}
b(free_slot); b(free_slot);
// } else { // } else {

View file

@ -2629,8 +2629,7 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
Metadata* method_data, Metadata* method_data,
bool use_rtm, bool profile_rtm) { bool use_rtm, bool profile_rtm) {
assert_different_registers(oop, box, temp, displaced_header, current_header); assert_different_registers(oop, box, temp, displaced_header, current_header);
assert(flag != CCR0, "bad condition register"); assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register");
Label cont;
Label object_has_monitor; Label object_has_monitor;
Label cas_failed; Label cas_failed;
Label success, failure; Label success, failure;
@ -2649,7 +2648,7 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
if (UseRTMForStackLocks && use_rtm) { if (UseRTMForStackLocks && use_rtm) {
rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header, rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
stack_rtm_counters, method_data, profile_rtm, stack_rtm_counters, method_data, profile_rtm,
cont, object_has_monitor); success, object_has_monitor);
} }
#endif // INCLUDE_RTM_OPT #endif // INCLUDE_RTM_OPT
@ -2658,7 +2657,11 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
andi_(temp, displaced_header, markWord::monitor_value); andi_(temp, displaced_header, markWord::monitor_value);
bne(CCR0, object_has_monitor); bne(CCR0, object_has_monitor);
if (LockingMode != LM_MONITOR) { if (LockingMode == LM_MONITOR) {
// Set NE to indicate 'failure' -> take slow-path.
crandc(flag, Assembler::equal, flag, Assembler::equal);
b(failure);
} else if (LockingMode == LM_LEGACY) {
// Set displaced_header to be (markWord of object | UNLOCK_VALUE). // Set displaced_header to be (markWord of object | UNLOCK_VALUE).
ori(displaced_header, displaced_header, markWord::unlocked_value); ori(displaced_header, displaced_header, markWord::unlocked_value);
@ -2683,28 +2686,31 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
// If the compare-and-exchange succeeded, then we found an unlocked // If the compare-and-exchange succeeded, then we found an unlocked
// object and we have now locked it. // object and we have now locked it.
b(success); b(success);
} else {
// Set NE to indicate 'failure' -> take slow-path. bind(cas_failed);
crandc(flag, Assembler::equal, flag, Assembler::equal); // We did not see an unlocked object so try the fast recursive case.
// Check if the owner is self by comparing the value in the markWord of object
// (current_header) with the stack pointer.
sub(current_header, current_header, R1_SP);
load_const_optimized(temp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place);
and_(R0/*==0?*/, current_header, temp);
// If condition is true we are cont and hence we can store 0 as the
// displaced header in the box, which indicates that it is a recursive lock.
std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
if (flag != CCR0) {
mcrf(flag, CCR0);
}
beq(CCR0, success);
b(failure); b(failure);
} else {
assert(LockingMode == LM_LIGHTWEIGHT, "must be");
fast_lock(oop, displaced_header, temp, failure);
b(success);
} }
bind(cas_failed);
// We did not see an unlocked object so try the fast recursive case.
// Check if the owner is self by comparing the value in the markWord of object
// (current_header) with the stack pointer.
sub(current_header, current_header, R1_SP);
load_const_optimized(temp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place);
and_(R0/*==0?*/, current_header, temp);
// If condition is true we are cont and hence we can store 0 as the
// displaced header in the box, which indicates that it is a recursive lock.
mcrf(flag,CCR0);
std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box);
b(cont);
// Handle existing monitor. // Handle existing monitor.
bind(object_has_monitor); bind(object_has_monitor);
// The object's monitor m is unlocked iff m->owner is null, // The object's monitor m is unlocked iff m->owner is null,
@ -2714,7 +2720,8 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
// Use the same RTM locking code in 32- and 64-bit VM. // Use the same RTM locking code in 32- and 64-bit VM.
if (use_rtm) { if (use_rtm) {
rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header, rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
rtm_counters, method_data, profile_rtm, cont); rtm_counters, method_data, profile_rtm, success);
bne(flag, failure);
} else { } else {
#endif // INCLUDE_RTM_OPT #endif // INCLUDE_RTM_OPT
@ -2728,8 +2735,10 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
MacroAssembler::cmpxchgx_hint_acquire_lock()); MacroAssembler::cmpxchgx_hint_acquire_lock());
// Store a non-null value into the box. if (LockingMode != LM_LIGHTWEIGHT) {
std(box, BasicLock::displaced_header_offset_in_bytes(), box); // Store a non-null value into the box.
std(box, BasicLock::displaced_header_offset_in_bytes(), box);
}
beq(flag, success); beq(flag, success);
// Check for recursive locking. // Check for recursive locking.
@ -2746,10 +2755,8 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
} // use_rtm() } // use_rtm()
#endif #endif
bind(cont);
// flag == EQ indicates success, increment held monitor count // flag == EQ indicates success, increment held monitor count
// flag == NE indicates failure // flag == NE indicates failure
bne(flag, failure);
bind(success); bind(success);
inc_held_monitor_count(temp); inc_held_monitor_count(temp);
bind(failure); bind(failure);
@ -2759,9 +2766,8 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
Register temp, Register displaced_header, Register current_header, Register temp, Register displaced_header, Register current_header,
bool use_rtm) { bool use_rtm) {
assert_different_registers(oop, box, temp, displaced_header, current_header); assert_different_registers(oop, box, temp, displaced_header, current_header);
assert(flag != CCR0, "bad condition register"); assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register");
Label object_has_monitor, notRecursive; Label success, failure, object_has_monitor, notRecursive;
Label success, failure;
#if INCLUDE_RTM_OPT #if INCLUDE_RTM_OPT
if (UseRTMForStackLocks && use_rtm) { if (UseRTMForStackLocks && use_rtm) {
@ -2776,7 +2782,7 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
} }
#endif #endif
if (LockingMode != LM_MONITOR) { if (LockingMode == LM_LEGACY) {
// Find the lock address and load the displaced header from the stack. // Find the lock address and load the displaced header from the stack.
ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box); ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
@ -2792,7 +2798,11 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
andi_(R0, current_header, markWord::monitor_value); andi_(R0, current_header, markWord::monitor_value);
bne(CCR0, object_has_monitor); bne(CCR0, object_has_monitor);
if (LockingMode != LM_MONITOR) { if (LockingMode == LM_MONITOR) {
// Set NE to indicate 'failure' -> take slow-path.
crandc(flag, Assembler::equal, flag, Assembler::equal);
b(failure);
} else if (LockingMode == LM_LEGACY) {
// Check if it is still a light weight lock, this is is true if we see // Check if it is still a light weight lock, this is is true if we see
// the stack address of the basicLock in the markWord of the object. // the stack address of the basicLock in the markWord of the object.
// Cmpxchg sets flag to cmpd(current_header, box). // Cmpxchg sets flag to cmpd(current_header, box).
@ -2808,9 +2818,9 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
b(success); b(success);
} else { } else {
// Set NE to indicate 'failure' -> take slow-path. assert(LockingMode == LM_LIGHTWEIGHT, "must be");
crandc(flag, Assembler::equal, flag, Assembler::equal); fast_unlock(oop, current_header, failure);
b(failure); b(success);
} }
// Handle existing monitor. // Handle existing monitor.
@ -2819,7 +2829,7 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
addi(current_header, current_header, -(int)markWord::monitor_value); // monitor addi(current_header, current_header, -(int)markWord::monitor_value); // monitor
ld(temp, in_bytes(ObjectMonitor::owner_offset()), current_header); ld(temp, in_bytes(ObjectMonitor::owner_offset()), current_header);
// It's inflated. // It's inflated.
#if INCLUDE_RTM_OPT #if INCLUDE_RTM_OPT
if (use_rtm) { if (use_rtm) {
Label L_regular_inflated_unlock; Label L_regular_inflated_unlock;
@ -2832,15 +2842,20 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
} }
#endif #endif
ld(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header); // In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0.
// This is handled like owner thread mismatches: We take the slow path.
cmpd(flag, temp, R16_thread); cmpd(flag, temp, R16_thread);
bne(flag, failure); bne(flag, failure);
ld(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header);
addic_(displaced_header, displaced_header, -1); addic_(displaced_header, displaced_header, -1);
blt(CCR0, notRecursive); // Not recursive if negative after decrement. blt(CCR0, notRecursive); // Not recursive if negative after decrement.
std(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header); std(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header);
b(success); // flag is already EQ here. if (flag == CCR0) { // Otherwise, flag is already EQ, here.
crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set CCR0 EQ
}
b(success);
bind(notRecursive); bind(notRecursive);
ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header); ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header);
@ -4410,6 +4425,7 @@ void MacroAssembler::pop_cont_fastpath() {
bind(done); bind(done);
} }
// Note: Must preserve CCR0 EQ (invariant).
void MacroAssembler::inc_held_monitor_count(Register tmp) { void MacroAssembler::inc_held_monitor_count(Register tmp) {
ld(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); ld(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread);
#ifdef ASSERT #ifdef ASSERT
@ -4418,11 +4434,13 @@ void MacroAssembler::inc_held_monitor_count(Register tmp) {
bge_predict_taken(CCR0, ok); bge_predict_taken(CCR0, ok);
stop("held monitor count is negativ at increment"); stop("held monitor count is negativ at increment");
bind(ok); bind(ok);
crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Restore CCR0 EQ
#endif #endif
addi(tmp, tmp, 1); addi(tmp, tmp, 1);
std(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); std(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread);
} }
// Note: Must preserve CCR0 EQ (invariant).
void MacroAssembler::dec_held_monitor_count(Register tmp) { void MacroAssembler::dec_held_monitor_count(Register tmp) {
ld(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); ld(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread);
#ifdef ASSERT #ifdef ASSERT
@ -4431,7 +4449,136 @@ void MacroAssembler::dec_held_monitor_count(Register tmp) {
bgt_predict_taken(CCR0, ok); bgt_predict_taken(CCR0, ok);
stop("held monitor count is <= 0 at decrement"); stop("held monitor count is <= 0 at decrement");
bind(ok); bind(ok);
crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Restore CCR0 EQ
#endif #endif
addi(tmp, tmp, -1); addi(tmp, tmp, -1);
std(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); std(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread);
} }
// Function to flip between unlocked and locked state (fast locking).
// Branches to failed if the state is not as expected with CCR0 NE.
// Falls through upon success with CCR0 EQ.
// This requires fewer instructions and registers and is easier to use than the
// cmpxchg based implementation.
void MacroAssembler::atomically_flip_locked_state(bool is_unlock, Register obj, Register tmp, Label& failed, int semantics) {
assert_different_registers(obj, tmp, R0);
Label retry;
if (semantics & MemBarRel) {
release();
}
bind(retry);
STATIC_ASSERT(markWord::locked_value == 0); // Or need to change this!
if (!is_unlock) {
ldarx(tmp, obj, MacroAssembler::cmpxchgx_hint_acquire_lock());
xori(tmp, tmp, markWord::unlocked_value); // flip unlocked bit
andi_(R0, tmp, markWord::lock_mask_in_place);
bne(CCR0, failed); // failed if new header doesn't contain locked_value (which is 0)
} else {
ldarx(tmp, obj, MacroAssembler::cmpxchgx_hint_release_lock());
andi_(R0, tmp, markWord::lock_mask_in_place);
bne(CCR0, failed); // failed if old header doesn't contain locked_value (which is 0)
ori(tmp, tmp, markWord::unlocked_value); // set unlocked bit
}
stdcx_(tmp, obj);
bne(CCR0, retry);
if (semantics & MemBarFenceAfter) {
fence();
} else if (semantics & MemBarAcq) {
isync();
}
}
// Implements fast-locking.
// Branches to slow upon failure to lock the object, with CCR0 NE.
// Falls through upon success with CCR0 EQ.
//
// - obj: the object to be locked
// - hdr: the header, already loaded from obj, will be destroyed
// - t1: temporary register
void MacroAssembler::fast_lock(Register obj, Register hdr, Register t1, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
assert_different_registers(obj, hdr, t1);
// Check if we would have space on lock-stack for the object.
lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
cmplwi(CCR0, t1, LockStack::end_offset() - 1);
bgt(CCR0, slow);
// Quick check: Do not reserve cache line for atomic update if not unlocked.
// (Similar to contention_hint in cmpxchg solutions.)
xori(R0, hdr, markWord::unlocked_value); // flip unlocked bit
andi_(R0, R0, markWord::lock_mask_in_place);
bne(CCR0, slow); // failed if new header doesn't contain locked_value (which is 0)
// Note: We're not publishing anything (like the displaced header in LM_LEGACY)
// to other threads at this point. Hence, no release barrier, here.
// (The obj has been written to the BasicObjectLock at obj_offset() within the own thread stack.)
atomically_flip_locked_state(/* is_unlock */ false, obj, hdr, slow, MacroAssembler::MemBarAcq);
// After successful lock, push object on lock-stack
stdx(obj, t1, R16_thread);
addi(t1, t1, oopSize);
stw(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
}
// Implements fast-unlocking.
// Branches to slow upon failure, with CCR0 NE.
// Falls through upon success, with CCR0 EQ.
//
// - obj: the object to be unlocked
// - hdr: the (pre-loaded) header of the object, will be destroyed
void MacroAssembler::fast_unlock(Register obj, Register hdr, Label& slow) {
assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking");
assert_different_registers(obj, hdr);
#ifdef ASSERT
{
// Check that hdr is fast-locked.
Label hdr_ok;
andi_(R0, hdr, markWord::lock_mask_in_place);
beq(CCR0, hdr_ok);
stop("Header is not fast-locked");
bind(hdr_ok);
}
Register t1 = hdr; // Reuse in debug build.
{
// The following checks rely on the fact that LockStack is only ever modified by
// its owning thread, even if the lock got inflated concurrently; removal of LockStack
// entries after inflation will happen delayed in that case.
// Check for lock-stack underflow.
Label stack_ok;
lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
cmplwi(CCR0, t1, LockStack::start_offset());
bgt(CCR0, stack_ok);
stop("Lock-stack underflow");
bind(stack_ok);
}
{
// Check if the top of the lock-stack matches the unlocked object.
Label tos_ok;
addi(t1, t1, -oopSize);
ldx(t1, t1, R16_thread);
cmpd(CCR0, t1, obj);
beq(CCR0, tos_ok);
stop("Top of lock-stack does not match the unlocked object");
bind(tos_ok);
}
#endif
// Release the lock.
atomically_flip_locked_state(/* is_unlock */ true, obj, hdr, slow, MacroAssembler::MemBarRel);
// After successful unlock, pop object from lock-stack
Register t2 = hdr;
lwz(t2, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
addi(t2, t2, -oopSize);
#ifdef ASSERT
li(R0, 0);
stdx(R0, t2, R16_thread);
#endif
stw(t2, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread);
}

View file

@ -606,6 +606,9 @@ class MacroAssembler: public Assembler {
void pop_cont_fastpath(); void pop_cont_fastpath();
void inc_held_monitor_count(Register tmp); void inc_held_monitor_count(Register tmp);
void dec_held_monitor_count(Register tmp); void dec_held_monitor_count(Register tmp);
void atomically_flip_locked_state(bool is_unlock, Register obj, Register tmp, Label& failed, int semantics);
void fast_lock(Register obj, Register hdr, Register t1, Label& slow);
void fast_unlock(Register obj, Register hdr, Label& slow);
// allocation (for C1) // allocation (for C1)
void tlab_allocate( void tlab_allocate(

View file

@ -12139,7 +12139,7 @@ instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P supe
// inlined locking and unlocking // inlined locking and unlocking
instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{ instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{
match(Set crx (FastLock oop box)); match(Set crx (FastLock oop box));
effect(TEMP tmp1, TEMP tmp2); effect(TEMP tmp1, TEMP tmp2);
predicate(!Compile::current()->use_rtm()); predicate(!Compile::current()->use_rtm());
@ -12175,7 +12175,7 @@ instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1
ins_pipe(pipe_class_compare); ins_pipe(pipe_class_compare);
%} %}
instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ instruct cmpFastUnlock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
match(Set crx (FastUnlock oop box)); match(Set crx (FastUnlock oop box));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
predicate(!Compile::current()->use_rtm()); predicate(!Compile::current()->use_rtm());

View file

@ -394,6 +394,10 @@ void VM_Version::initialize() {
// high lock contention. For now we do not use it by default. // high lock contention. For now we do not use it by default.
vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
} }
if (LockingMode != LM_LEGACY) {
warning("UseRTMLocking requires LockingMode = 1");
FLAG_SET_DEFAULT(UseRTMLocking, false);
}
#else #else
// Only C2 does RTM locking optimization. // Only C2 does RTM locking optimization.
vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); vm_exit_during_initialization("RTM locking optimization is not supported in this VM");

View file

@ -1905,7 +1905,7 @@ bool Arguments::check_vm_args_consistency() {
#endif #endif
#if !defined(X86) && !defined(AARCH64) && !defined(RISCV64) && !defined(ARM) #if !defined(X86) && !defined(AARCH64) && !defined(RISCV64) && !defined(ARM) && !defined(PPC64)
if (LockingMode == LM_LIGHTWEIGHT) { if (LockingMode == LM_LIGHTWEIGHT) {
FLAG_SET_CMDLINE(LockingMode, LM_LEGACY); FLAG_SET_CMDLINE(LockingMode, LM_LEGACY);
warning("New lightweight locking not supported on this platform"); warning("New lightweight locking not supported on this platform");