diff --git a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp index bcc991029a0..bf0122ee737 100644 --- a/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_MacroAssembler_ppc.cpp @@ -114,41 +114,46 @@ void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox bne(CCR0, slow_int); } - // ... and mark it unlocked. - ori(Rmark, Rmark, markWord::unlocked_value); + if (LockingMode == LM_LIGHTWEIGHT) { + fast_lock(Roop, Rmark, Rscratch, slow_int); + } else if (LockingMode == LM_LEGACY) { + // ... and mark it unlocked. + ori(Rmark, Rmark, markWord::unlocked_value); - // Save unlocked object header into the displaced header location on the stack. - std(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox); + // Save unlocked object header into the displaced header location on the stack. + std(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox); - // Compare object markWord with Rmark and if equal exchange Rscratch with object markWord. - assert(oopDesc::mark_offset_in_bytes() == 0, "cas must take a zero displacement"); - cmpxchgd(/*flag=*/CCR0, - /*current_value=*/Rscratch, - /*compare_value=*/Rmark, - /*exchange_value=*/Rbox, - /*where=*/Roop/*+0==mark_offset_in_bytes*/, - MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, - MacroAssembler::cmpxchgx_hint_acquire_lock(), - noreg, - &cas_failed, - /*check without membar and ldarx first*/true); - // If compare/exchange succeeded we found an unlocked object and we now have locked it - // hence we are done. + // Compare object markWord with Rmark and if equal exchange Rscratch with object markWord. + assert(oopDesc::mark_offset_in_bytes() == 0, "cas must take a zero displacement"); + cmpxchgd(/*flag=*/CCR0, + /*current_value=*/Rscratch, + /*compare_value=*/Rmark, + /*exchange_value=*/Rbox, + /*where=*/Roop/*+0==mark_offset_in_bytes*/, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock(), + noreg, + &cas_failed, + /*check without membar and ldarx first*/true); + // If compare/exchange succeeded we found an unlocked object and we now have locked it + // hence we are done. + } b(done); bind(slow_int); b(slow_case); // far - bind(cas_failed); - // We did not find an unlocked object so see if this is a recursive case. - sub(Rscratch, Rscratch, R1_SP); - load_const_optimized(R0, (~(os::vm_page_size()-1) | markWord::lock_mask_in_place)); - and_(R0/*==0?*/, Rscratch, R0); - std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), Rbox); - bne(CCR0, slow_int); + if (LockingMode == LM_LEGACY) { + bind(cas_failed); + // We did not find an unlocked object so see if this is a recursive case. + sub(Rscratch, Rscratch, R1_SP); + load_const_optimized(R0, (~(os::vm_page_size()-1) | markWord::lock_mask_in_place)); + and_(R0/*==0?*/, Rscratch, R0); + std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), Rbox); + bne(CCR0, slow_int); + } bind(done); - inc_held_monitor_count(Rmark /*tmp*/); } @@ -161,33 +166,41 @@ void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rb Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); assert(mark_addr.disp() == 0, "cas must take a zero displacement"); - // Test first if it is a fast recursive unlock. - ld(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox); - cmpdi(CCR0, Rmark, 0); - beq(CCR0, done); + if (LockingMode != LM_LIGHTWEIGHT) { + // Test first if it is a fast recursive unlock. + ld(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox); + cmpdi(CCR0, Rmark, 0); + beq(CCR0, done); + } // Load object. ld(Roop, in_bytes(BasicObjectLock::obj_offset()), Rbox); verify_oop(Roop, FILE_AND_LINE); - // Check if it is still a light weight lock, this is is true if we see - // the stack address of the basicLock in the markWord of the object. - cmpxchgd(/*flag=*/CCR0, - /*current_value=*/R0, - /*compare_value=*/Rbox, - /*exchange_value=*/Rmark, - /*where=*/Roop, - MacroAssembler::MemBarRel, - MacroAssembler::cmpxchgx_hint_release_lock(), - noreg, - &slow_int); + if (LockingMode == LM_LIGHTWEIGHT) { + ld(Rmark, oopDesc::mark_offset_in_bytes(), Roop); + andi_(R0, Rmark, markWord::monitor_value); + bne(CCR0, slow_int); + fast_unlock(Roop, Rmark, slow_int); + } else if (LockingMode == LM_LEGACY) { + // Check if it is still a light weight lock, this is is true if we see + // the stack address of the basicLock in the markWord of the object. + cmpxchgd(/*flag=*/CCR0, + /*current_value=*/R0, + /*compare_value=*/Rbox, + /*exchange_value=*/Rmark, + /*where=*/Roop, + MacroAssembler::MemBarRel, + MacroAssembler::cmpxchgx_hint_release_lock(), + noreg, + &slow_int); + } b(done); bind(slow_int); b(slow_case); // far // Done bind(done); - dec_held_monitor_count(Rmark /*tmp*/); } diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp index bef0577d285..97eb07dec73 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp @@ -924,7 +924,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { if (LockingMode == LM_MONITOR) { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor); } else { - // template code: + // template code (for LM_LEGACY): // // markWord displaced_header = obj->mark().set_unlocked(); // monitor->lock()->set_displaced_header(displaced_header); @@ -938,7 +938,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // InterpreterRuntime::monitorenter(THREAD, monitor); // } - const Register displaced_header = R7_ARG5; + const Register header = R7_ARG5; const Register object_mark_addr = R8_ARG6; const Register current_header = R9_ARG7; const Register tmp = R10_ARG8; @@ -946,12 +946,12 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { Label count_locking, done; Label cas_failed, slow_case; - assert_different_registers(displaced_header, object_mark_addr, current_header, tmp); + assert_different_registers(header, object_mark_addr, current_header, tmp); // markWord displaced_header = obj->mark().set_unlocked(); - // Load markWord from object into displaced_header. - ld(displaced_header, oopDesc::mark_offset_in_bytes(), object); + // Load markWord from object into header. + ld(header, oopDesc::mark_offset_in_bytes(), object); if (DiagnoseSyncOnValueBasedClasses != 0) { load_klass(tmp, object); @@ -960,58 +960,64 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { bne(CCR0, slow_case); } - // Set displaced_header to be (markWord of object | UNLOCK_VALUE). - ori(displaced_header, displaced_header, markWord::unlocked_value); + if (LockingMode == LM_LIGHTWEIGHT) { + fast_lock(object, /* mark word */ header, tmp, slow_case); + b(count_locking); + } else if (LockingMode == LM_LEGACY) { - // monitor->lock()->set_displaced_header(displaced_header); - const int lock_offset = in_bytes(BasicObjectLock::lock_offset()); - const int mark_offset = lock_offset + - BasicLock::displaced_header_offset_in_bytes(); + // Set displaced_header to be (markWord of object | UNLOCK_VALUE). + ori(header, header, markWord::unlocked_value); - // Initialize the box (Must happen before we update the object mark!). - std(displaced_header, mark_offset, monitor); + // monitor->lock()->set_displaced_header(displaced_header); + const int lock_offset = in_bytes(BasicObjectLock::lock_offset()); + const int mark_offset = lock_offset + + BasicLock::displaced_header_offset_in_bytes(); - // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) { + // Initialize the box (Must happen before we update the object mark!). + std(header, mark_offset, monitor); - // Store stack address of the BasicObjectLock (this is monitor) into object. - addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes()); + // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) { - // Must fence, otherwise, preceding store(s) may float below cmpxchg. - // CmpxchgX sets CCR0 to cmpX(current, displaced). - cmpxchgd(/*flag=*/CCR0, - /*current_value=*/current_header, - /*compare_value=*/displaced_header, /*exchange_value=*/monitor, - /*where=*/object_mark_addr, - MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, - MacroAssembler::cmpxchgx_hint_acquire_lock(), - noreg, - &cas_failed, - /*check without membar and ldarx first*/true); + // Store stack address of the BasicObjectLock (this is monitor) into object. + addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes()); - // If the compare-and-exchange succeeded, then we found an unlocked - // object and we have now locked it. - b(count_locking); - bind(cas_failed); + // Must fence, otherwise, preceding store(s) may float below cmpxchg. + // CmpxchgX sets CCR0 to cmpX(current, displaced). + cmpxchgd(/*flag=*/CCR0, + /*current_value=*/current_header, + /*compare_value=*/header, /*exchange_value=*/monitor, + /*where=*/object_mark_addr, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock(), + noreg, + &cas_failed, + /*check without membar and ldarx first*/true); - // } else if (THREAD->is_lock_owned((address)displaced_header)) - // // Simple recursive case. - // monitor->lock()->set_displaced_header(nullptr); + // If the compare-and-exchange succeeded, then we found an unlocked + // object and we have now locked it. + b(count_locking); + bind(cas_failed); - // We did not see an unlocked object so try the fast recursive case. + // } else if (THREAD->is_lock_owned((address)displaced_header)) + // // Simple recursive case. + // monitor->lock()->set_displaced_header(nullptr); - // Check if owner is self by comparing the value in the markWord of object - // (current_header) with the stack pointer. - sub(current_header, current_header, R1_SP); + // We did not see an unlocked object so try the fast recursive case. - assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); - load_const_optimized(tmp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place); + // Check if owner is self by comparing the value in the markWord of object + // (current_header) with the stack pointer. + sub(current_header, current_header, R1_SP); - and_(R0/*==0?*/, current_header, tmp); - // If condition is true we are done and hence we can store 0 in the displaced - // header indicating it is a recursive lock. - bne(CCR0, slow_case); - std(R0/*==0!*/, mark_offset, monitor); - b(count_locking); + assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); + load_const_optimized(tmp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place); + + and_(R0/*==0?*/, current_header, tmp); + // If condition is true we are done and hence we can store 0 in the displaced + // header indicating it is a recursive lock. + bne(CCR0, slow_case); + std(R0/*==0!*/, mark_offset, monitor); + b(count_locking); + } // } else { // // Slow path. @@ -1020,7 +1026,11 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // None of the above fast optimizations worked so we have to get into the // slow case of monitor enter. bind(slow_case); - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor); + if (LockingMode == LM_LIGHTWEIGHT) { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj), object); + } else { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor); + } b(done); // } align(32, 12); @@ -1042,7 +1052,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) { call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), monitor); } else { - // template code: + // template code (for LM_LEGACY): // // if ((displaced_header = monitor->displaced_header()) == nullptr) { // // Recursive unlock. Mark the monitor unlocked by setting the object field to null. @@ -1056,22 +1066,24 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) { // } const Register object = R7_ARG5; - const Register displaced_header = R8_ARG6; + const Register header = R8_ARG6; const Register object_mark_addr = R9_ARG7; const Register current_header = R10_ARG8; Label free_slot; Label slow_case; - assert_different_registers(object, displaced_header, object_mark_addr, current_header); + assert_different_registers(object, header, object_mark_addr, current_header); - // Test first if we are in the fast recursive case. - ld(displaced_header, in_bytes(BasicObjectLock::lock_offset()) + - BasicLock::displaced_header_offset_in_bytes(), monitor); + if (LockingMode != LM_LIGHTWEIGHT) { + // Test first if we are in the fast recursive case. + ld(header, in_bytes(BasicObjectLock::lock_offset()) + + BasicLock::displaced_header_offset_in_bytes(), monitor); - // If the displaced header is zero, we have a recursive unlock. - cmpdi(CCR0, displaced_header, 0); - beq(CCR0, free_slot); // recursive unlock + // If the displaced header is zero, we have a recursive unlock. + cmpdi(CCR0, header, 0); + beq(CCR0, free_slot); // recursive unlock + } // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) { // // We swapped the unlocked mark in displaced_header into the object's mark word. @@ -1081,20 +1093,41 @@ void InterpreterMacroAssembler::unlock_object(Register monitor) { // The object address from the monitor is in object. ld(object, in_bytes(BasicObjectLock::obj_offset()), monitor); - addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes()); - // We have the displaced header in displaced_header. If the lock is still - // lightweight, it will contain the monitor address and we'll store the - // displaced header back into the object's mark word. - // CmpxchgX sets CCR0 to cmpX(current, monitor). - cmpxchgd(/*flag=*/CCR0, - /*current_value=*/current_header, - /*compare_value=*/monitor, /*exchange_value=*/displaced_header, - /*where=*/object_mark_addr, - MacroAssembler::MemBarRel, - MacroAssembler::cmpxchgx_hint_release_lock(), - noreg, - &slow_case); + if (LockingMode == LM_LIGHTWEIGHT) { + // Check for non-symmetric locking. This is allowed by the spec and the interpreter + // must handle it. + Register tmp = current_header; + // First check for lock-stack underflow. + lwz(tmp, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + cmplwi(CCR0, tmp, (unsigned)LockStack::start_offset()); + ble(CCR0, slow_case); + // Then check if the top of the lock-stack matches the unlocked object. + addi(tmp, tmp, -oopSize); + ldx(tmp, tmp, R16_thread); + cmpd(CCR0, tmp, object); + bne(CCR0, slow_case); + + ld(header, oopDesc::mark_offset_in_bytes(), object); + andi_(R0, header, markWord::monitor_value); + bne(CCR0, slow_case); + fast_unlock(object, header, slow_case); + } else { + addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes()); + + // We have the displaced header in displaced_header. If the lock is still + // lightweight, it will contain the monitor address and we'll store the + // displaced header back into the object's mark word. + // CmpxchgX sets CCR0 to cmpX(current, monitor). + cmpxchgd(/*flag=*/CCR0, + /*current_value=*/current_header, + /*compare_value=*/monitor, /*exchange_value=*/header, + /*where=*/object_mark_addr, + MacroAssembler::MemBarRel, + MacroAssembler::cmpxchgx_hint_release_lock(), + noreg, + &slow_case); + } b(free_slot); // } else { diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index fd2e7d30185..027c6fe4ce8 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -2629,8 +2629,7 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register Metadata* method_data, bool use_rtm, bool profile_rtm) { assert_different_registers(oop, box, temp, displaced_header, current_header); - assert(flag != CCR0, "bad condition register"); - Label cont; + assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register"); Label object_has_monitor; Label cas_failed; Label success, failure; @@ -2649,7 +2648,7 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register if (UseRTMForStackLocks && use_rtm) { rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header, stack_rtm_counters, method_data, profile_rtm, - cont, object_has_monitor); + success, object_has_monitor); } #endif // INCLUDE_RTM_OPT @@ -2658,7 +2657,11 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register andi_(temp, displaced_header, markWord::monitor_value); bne(CCR0, object_has_monitor); - if (LockingMode != LM_MONITOR) { + if (LockingMode == LM_MONITOR) { + // Set NE to indicate 'failure' -> take slow-path. + crandc(flag, Assembler::equal, flag, Assembler::equal); + b(failure); + } else if (LockingMode == LM_LEGACY) { // Set displaced_header to be (markWord of object | UNLOCK_VALUE). ori(displaced_header, displaced_header, markWord::unlocked_value); @@ -2683,28 +2686,31 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register // If the compare-and-exchange succeeded, then we found an unlocked // object and we have now locked it. b(success); - } else { - // Set NE to indicate 'failure' -> take slow-path. - crandc(flag, Assembler::equal, flag, Assembler::equal); + + bind(cas_failed); + // We did not see an unlocked object so try the fast recursive case. + + // Check if the owner is self by comparing the value in the markWord of object + // (current_header) with the stack pointer. + sub(current_header, current_header, R1_SP); + load_const_optimized(temp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place); + + and_(R0/*==0?*/, current_header, temp); + // If condition is true we are cont and hence we can store 0 as the + // displaced header in the box, which indicates that it is a recursive lock. + std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box); + + if (flag != CCR0) { + mcrf(flag, CCR0); + } + beq(CCR0, success); b(failure); + } else { + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + fast_lock(oop, displaced_header, temp, failure); + b(success); } - bind(cas_failed); - // We did not see an unlocked object so try the fast recursive case. - - // Check if the owner is self by comparing the value in the markWord of object - // (current_header) with the stack pointer. - sub(current_header, current_header, R1_SP); - load_const_optimized(temp, ~(os::vm_page_size()-1) | markWord::lock_mask_in_place); - - and_(R0/*==0?*/, current_header, temp); - // If condition is true we are cont and hence we can store 0 as the - // displaced header in the box, which indicates that it is a recursive lock. - mcrf(flag,CCR0); - std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), box); - - b(cont); - // Handle existing monitor. bind(object_has_monitor); // The object's monitor m is unlocked iff m->owner is null, @@ -2714,7 +2720,8 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register // Use the same RTM locking code in 32- and 64-bit VM. if (use_rtm) { rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header, - rtm_counters, method_data, profile_rtm, cont); + rtm_counters, method_data, profile_rtm, success); + bne(flag, failure); } else { #endif // INCLUDE_RTM_OPT @@ -2728,8 +2735,10 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, MacroAssembler::cmpxchgx_hint_acquire_lock()); - // Store a non-null value into the box. - std(box, BasicLock::displaced_header_offset_in_bytes(), box); + if (LockingMode != LM_LIGHTWEIGHT) { + // Store a non-null value into the box. + std(box, BasicLock::displaced_header_offset_in_bytes(), box); + } beq(flag, success); // Check for recursive locking. @@ -2746,10 +2755,8 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register } // use_rtm() #endif - bind(cont); // flag == EQ indicates success, increment held monitor count // flag == NE indicates failure - bne(flag, failure); bind(success); inc_held_monitor_count(temp); bind(failure); @@ -2759,9 +2766,8 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe Register temp, Register displaced_header, Register current_header, bool use_rtm) { assert_different_registers(oop, box, temp, displaced_header, current_header); - assert(flag != CCR0, "bad condition register"); - Label object_has_monitor, notRecursive; - Label success, failure; + assert(LockingMode != LM_LIGHTWEIGHT || flag == CCR0, "bad condition register"); + Label success, failure, object_has_monitor, notRecursive; #if INCLUDE_RTM_OPT if (UseRTMForStackLocks && use_rtm) { @@ -2776,7 +2782,7 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe } #endif - if (LockingMode != LM_MONITOR) { + if (LockingMode == LM_LEGACY) { // Find the lock address and load the displaced header from the stack. ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box); @@ -2792,7 +2798,11 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe andi_(R0, current_header, markWord::monitor_value); bne(CCR0, object_has_monitor); - if (LockingMode != LM_MONITOR) { + if (LockingMode == LM_MONITOR) { + // Set NE to indicate 'failure' -> take slow-path. + crandc(flag, Assembler::equal, flag, Assembler::equal); + b(failure); + } else if (LockingMode == LM_LEGACY) { // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markWord of the object. // Cmpxchg sets flag to cmpd(current_header, box). @@ -2808,9 +2818,9 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); b(success); } else { - // Set NE to indicate 'failure' -> take slow-path. - crandc(flag, Assembler::equal, flag, Assembler::equal); - b(failure); + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + fast_unlock(oop, current_header, failure); + b(success); } // Handle existing monitor. @@ -2819,7 +2829,7 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe addi(current_header, current_header, -(int)markWord::monitor_value); // monitor ld(temp, in_bytes(ObjectMonitor::owner_offset()), current_header); - // It's inflated. + // It's inflated. #if INCLUDE_RTM_OPT if (use_rtm) { Label L_regular_inflated_unlock; @@ -2832,15 +2842,20 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe } #endif - ld(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header); - + // In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0. + // This is handled like owner thread mismatches: We take the slow path. cmpd(flag, temp, R16_thread); bne(flag, failure); + ld(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header); + addic_(displaced_header, displaced_header, -1); blt(CCR0, notRecursive); // Not recursive if negative after decrement. std(displaced_header, in_bytes(ObjectMonitor::recursions_offset()), current_header); - b(success); // flag is already EQ here. + if (flag == CCR0) { // Otherwise, flag is already EQ, here. + crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Set CCR0 EQ + } + b(success); bind(notRecursive); ld(temp, in_bytes(ObjectMonitor::EntryList_offset()), current_header); @@ -4410,6 +4425,7 @@ void MacroAssembler::pop_cont_fastpath() { bind(done); } +// Note: Must preserve CCR0 EQ (invariant). void MacroAssembler::inc_held_monitor_count(Register tmp) { ld(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); #ifdef ASSERT @@ -4418,11 +4434,13 @@ void MacroAssembler::inc_held_monitor_count(Register tmp) { bge_predict_taken(CCR0, ok); stop("held monitor count is negativ at increment"); bind(ok); + crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Restore CCR0 EQ #endif addi(tmp, tmp, 1); std(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); } +// Note: Must preserve CCR0 EQ (invariant). void MacroAssembler::dec_held_monitor_count(Register tmp) { ld(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); #ifdef ASSERT @@ -4431,7 +4449,136 @@ void MacroAssembler::dec_held_monitor_count(Register tmp) { bgt_predict_taken(CCR0, ok); stop("held monitor count is <= 0 at decrement"); bind(ok); + crorc(CCR0, Assembler::equal, CCR0, Assembler::equal); // Restore CCR0 EQ #endif addi(tmp, tmp, -1); std(tmp, in_bytes(JavaThread::held_monitor_count_offset()), R16_thread); } + +// Function to flip between unlocked and locked state (fast locking). +// Branches to failed if the state is not as expected with CCR0 NE. +// Falls through upon success with CCR0 EQ. +// This requires fewer instructions and registers and is easier to use than the +// cmpxchg based implementation. +void MacroAssembler::atomically_flip_locked_state(bool is_unlock, Register obj, Register tmp, Label& failed, int semantics) { + assert_different_registers(obj, tmp, R0); + Label retry; + + if (semantics & MemBarRel) { + release(); + } + + bind(retry); + STATIC_ASSERT(markWord::locked_value == 0); // Or need to change this! + if (!is_unlock) { + ldarx(tmp, obj, MacroAssembler::cmpxchgx_hint_acquire_lock()); + xori(tmp, tmp, markWord::unlocked_value); // flip unlocked bit + andi_(R0, tmp, markWord::lock_mask_in_place); + bne(CCR0, failed); // failed if new header doesn't contain locked_value (which is 0) + } else { + ldarx(tmp, obj, MacroAssembler::cmpxchgx_hint_release_lock()); + andi_(R0, tmp, markWord::lock_mask_in_place); + bne(CCR0, failed); // failed if old header doesn't contain locked_value (which is 0) + ori(tmp, tmp, markWord::unlocked_value); // set unlocked bit + } + stdcx_(tmp, obj); + bne(CCR0, retry); + + if (semantics & MemBarFenceAfter) { + fence(); + } else if (semantics & MemBarAcq) { + isync(); + } +} + +// Implements fast-locking. +// Branches to slow upon failure to lock the object, with CCR0 NE. +// Falls through upon success with CCR0 EQ. +// +// - obj: the object to be locked +// - hdr: the header, already loaded from obj, will be destroyed +// - t1: temporary register +void MacroAssembler::fast_lock(Register obj, Register hdr, Register t1, Label& slow) { + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); + assert_different_registers(obj, hdr, t1); + + // Check if we would have space on lock-stack for the object. + lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + cmplwi(CCR0, t1, LockStack::end_offset() - 1); + bgt(CCR0, slow); + + // Quick check: Do not reserve cache line for atomic update if not unlocked. + // (Similar to contention_hint in cmpxchg solutions.) + xori(R0, hdr, markWord::unlocked_value); // flip unlocked bit + andi_(R0, R0, markWord::lock_mask_in_place); + bne(CCR0, slow); // failed if new header doesn't contain locked_value (which is 0) + + // Note: We're not publishing anything (like the displaced header in LM_LEGACY) + // to other threads at this point. Hence, no release barrier, here. + // (The obj has been written to the BasicObjectLock at obj_offset() within the own thread stack.) + atomically_flip_locked_state(/* is_unlock */ false, obj, hdr, slow, MacroAssembler::MemBarAcq); + + // After successful lock, push object on lock-stack + stdx(obj, t1, R16_thread); + addi(t1, t1, oopSize); + stw(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); +} + +// Implements fast-unlocking. +// Branches to slow upon failure, with CCR0 NE. +// Falls through upon success, with CCR0 EQ. +// +// - obj: the object to be unlocked +// - hdr: the (pre-loaded) header of the object, will be destroyed +void MacroAssembler::fast_unlock(Register obj, Register hdr, Label& slow) { + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); + assert_different_registers(obj, hdr); + +#ifdef ASSERT + { + // Check that hdr is fast-locked. + Label hdr_ok; + andi_(R0, hdr, markWord::lock_mask_in_place); + beq(CCR0, hdr_ok); + stop("Header is not fast-locked"); + bind(hdr_ok); + } + Register t1 = hdr; // Reuse in debug build. + { + // The following checks rely on the fact that LockStack is only ever modified by + // its owning thread, even if the lock got inflated concurrently; removal of LockStack + // entries after inflation will happen delayed in that case. + + // Check for lock-stack underflow. + Label stack_ok; + lwz(t1, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + cmplwi(CCR0, t1, LockStack::start_offset()); + bgt(CCR0, stack_ok); + stop("Lock-stack underflow"); + bind(stack_ok); + } + { + // Check if the top of the lock-stack matches the unlocked object. + Label tos_ok; + addi(t1, t1, -oopSize); + ldx(t1, t1, R16_thread); + cmpd(CCR0, t1, obj); + beq(CCR0, tos_ok); + stop("Top of lock-stack does not match the unlocked object"); + bind(tos_ok); + } +#endif + + // Release the lock. + atomically_flip_locked_state(/* is_unlock */ true, obj, hdr, slow, MacroAssembler::MemBarRel); + + // After successful unlock, pop object from lock-stack + Register t2 = hdr; + lwz(t2, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); + addi(t2, t2, -oopSize); +#ifdef ASSERT + li(R0, 0); + stdx(R0, t2, R16_thread); +#endif + stw(t2, in_bytes(JavaThread::lock_stack_top_offset()), R16_thread); +} diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp index d8bc8018a58..902edda0039 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp @@ -606,6 +606,9 @@ class MacroAssembler: public Assembler { void pop_cont_fastpath(); void inc_held_monitor_count(Register tmp); void dec_held_monitor_count(Register tmp); + void atomically_flip_locked_state(bool is_unlock, Register obj, Register tmp, Label& failed, int semantics); + void fast_lock(Register obj, Register hdr, Register t1, Label& slow); + void fast_unlock(Register obj, Register hdr, Label& slow); // allocation (for C1) void tlab_allocate( diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 4454bf57e44..b9dc8697b1e 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -12139,7 +12139,7 @@ instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P supe // inlined locking and unlocking -instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{ +instruct cmpFastLock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2) %{ match(Set crx (FastLock oop box)); effect(TEMP tmp1, TEMP tmp2); predicate(!Compile::current()->use_rtm()); @@ -12175,7 +12175,7 @@ instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1 ins_pipe(pipe_class_compare); %} -instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ +instruct cmpFastUnlock(flagsRegCR0 crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ match(Set crx (FastUnlock oop box)); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); predicate(!Compile::current()->use_rtm()); diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.cpp b/src/hotspot/cpu/ppc/vm_version_ppc.cpp index 33736a9c12c..fbb99fd68d8 100644 --- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp +++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp @@ -394,6 +394,10 @@ void VM_Version::initialize() { // high lock contention. For now we do not use it by default. vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); } + if (LockingMode != LM_LEGACY) { + warning("UseRTMLocking requires LockingMode = 1"); + FLAG_SET_DEFAULT(UseRTMLocking, false); + } #else // Only C2 does RTM locking optimization. vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index d83a54b0023..c294bbbf9ed 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -1905,7 +1905,7 @@ bool Arguments::check_vm_args_consistency() { #endif -#if !defined(X86) && !defined(AARCH64) && !defined(RISCV64) && !defined(ARM) +#if !defined(X86) && !defined(AARCH64) && !defined(RISCV64) && !defined(ARM) && !defined(PPC64) if (LockingMode == LM_LIGHTWEIGHT) { FLAG_SET_CMDLINE(LockingMode, LM_LEGACY); warning("New lightweight locking not supported on this platform");