diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 77415989caf..a7c6ddd792c 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -3822,7 +3822,7 @@ encode %{ Register tmp = as_Register($tmp2$$reg); Label cont; Label object_has_monitor; - Label no_count; + Label count, no_count; assert_different_registers(oop, box, tmp, disp_hdr); @@ -3839,7 +3839,10 @@ encode %{ // Check for existing monitor __ tbnz(disp_hdr, exact_log2(markWord::monitor_value), object_has_monitor); - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0. + __ b(cont); + } else if (LockingMode == LM_LEGACY) { // Set tmp to be (markWord of object | UNLOCK_VALUE). __ orr(tmp, disp_hdr, markWord::unlocked_value); @@ -3867,10 +3870,12 @@ encode %{ // displaced header in the box, which indicates that it is a recursive lock. __ ands(tmp/*==0?*/, disp_hdr, tmp); // Sets flags for result __ str(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); + __ b(cont); } else { - __ tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0. + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + __ fast_lock(oop, disp_hdr, tmp, rscratch1, no_count); + __ b(count); } - __ b(cont); // Handle existing monitor. __ bind(object_has_monitor); @@ -3883,13 +3888,14 @@ encode %{ __ cmpxchg(tmp, zr, rthread, Assembler::xword, /*acquire*/ true, /*release*/ true, /*weak*/ false, rscratch1); // Sets flags for result - // Store a non-null value into the box to avoid looking like a re-entrant - // lock. The fast-path monitor unlock code checks for - // markWord::monitor_value so use markWord::unused_mark which has the - // relevant bit set, and also matches ObjectSynchronizer::enter. - __ mov(tmp, (address)markWord::unused_mark().value()); - __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); - + if (LockingMode != LM_LIGHTWEIGHT) { + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for + // markWord::monitor_value so use markWord::unused_mark which has the + // relevant bit set, and also matches ObjectSynchronizer::enter. + __ mov(tmp, (address)markWord::unused_mark().value()); + __ str(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + } __ br(Assembler::EQ, cont); // CAS success means locking succeeded __ cmp(rscratch1, rthread); @@ -3904,6 +3910,7 @@ encode %{ // flag == NE indicates failure __ br(Assembler::NE, no_count); + __ bind(count); __ increment(Address(rthread, JavaThread::held_monitor_count_offset())); __ bind(no_count); @@ -3917,11 +3924,11 @@ encode %{ Register tmp = as_Register($tmp2$$reg); Label cont; Label object_has_monitor; - Label no_count; + Label count, no_count; assert_different_registers(oop, box, tmp, disp_hdr); - if (!UseHeavyMonitors) { + if (LockingMode == LM_LEGACY) { // Find the lock address and load the displaced header from the stack. __ ldr(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); @@ -3934,17 +3941,22 @@ encode %{ __ ldr(tmp, Address(oop, oopDesc::mark_offset_in_bytes())); __ tbnz(tmp, exact_log2(markWord::monitor_value), object_has_monitor); - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0. + __ b(cont); + } else if (LockingMode == LM_LEGACY) { // Check if it is still a light weight lock, this is is true if we // see the stack address of the basicLock in the markWord of the // object. __ cmpxchg(oop, box, disp_hdr, Assembler::xword, /*acquire*/ false, /*release*/ true, /*weak*/ false, tmp); + __ b(cont); } else { - __ tst(oop, oop); // Set NE to indicate 'failure' -> take slow-path. We know that oop != 0. + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + __ fast_unlock(oop, tmp, box, disp_hdr, no_count); + __ b(count); } - __ b(cont); assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); @@ -3952,6 +3964,20 @@ encode %{ __ bind(object_has_monitor); STATIC_ASSERT(markWord::monitor_value <= INT_MAX); __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor + + if (LockingMode == LM_LIGHTWEIGHT) { + // If the owner is anonymous, we need to fix it -- in an outline stub. + Register tmp2 = disp_hdr; + __ ldr(tmp2, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); + // We cannot use tbnz here, the target might be too far away and cannot + // be encoded. + __ tst(tmp2, (uint64_t)ObjectMonitor::ANONYMOUS_OWNER); + C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmp, tmp2); + Compile::current()->output()->add_stub(stub); + __ br(Assembler::NE, stub->entry()); + __ bind(stub->continuation()); + } + __ ldr(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); Label notRecursive; @@ -3978,6 +4004,7 @@ encode %{ // flag == NE indicates failure __ br(Assembler::NE, no_count); + __ bind(count); __ decrement(Address(rthread, JavaThread::held_monitor_count_offset())); __ bind(no_count); diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index b493fbc4a71..dc19c72fd11 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -431,7 +431,7 @@ int LIR_Assembler::emit_unwind_handler() { if (method()->is_synchronized()) { monitor_address(0, FrameMap::r0_opr); stub = new MonitorExitStub(FrameMap::r0_opr, true, 0); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { __ b(*stub->entry()); } else { __ unlock_object(r5, r4, r0, *stub->entry()); @@ -2558,7 +2558,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { Register obj = op->obj_opr()->as_register(); // may not be an oop Register hdr = op->hdr_opr()->as_register(); Register lock = op->lock_opr()->as_register(); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { if (op->info() != nullptr) { add_debug_info_for_null_check_here(op->info()); __ null_check(obj, -1); diff --git a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp index ccc05005822..439a2b690d1 100644 --- a/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_MacroAssembler_aarch64.cpp @@ -63,8 +63,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { const int aligned_mask = BytesPerWord -1; const int hdr_offset = oopDesc::mark_offset_in_bytes(); - assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); - Label done; + assert_different_registers(hdr, obj, disp_hdr); int null_check_offset = -1; verify_oop(obj); @@ -83,39 +82,44 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr // Load object header ldr(hdr, Address(obj, hdr_offset)); - // and mark it as unlocked - orr(hdr, hdr, markWord::unlocked_value); - // save unlocked object header into the displaced header location on the stack - str(hdr, Address(disp_hdr, 0)); - // test if object header is still the same (i.e. unlocked), and if so, store the - // displaced header address in the object header - if it is not the same, get the - // object header instead - lea(rscratch2, Address(obj, hdr_offset)); - cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/nullptr); - // if the object header was the same, we're done - // if the object header was not the same, it is now in the hdr register - // => test if it is a stack pointer into the same stack (recursive locking), i.e.: - // - // 1) (hdr & aligned_mask) == 0 - // 2) sp <= hdr - // 3) hdr <= sp + page_size - // - // these 3 tests can be done by evaluating the following expression: - // - // (hdr - sp) & (aligned_mask - page_size) - // - // assuming both the stack pointer and page_size have their least - // significant 2 bits cleared and page_size is a power of 2 - mov(rscratch1, sp); - sub(hdr, hdr, rscratch1); - ands(hdr, hdr, aligned_mask - (int)os::vm_page_size()); - // for recursive locking, the result is zero => save it in the displaced header - // location (null in the displaced hdr location indicates recursive locking) - str(hdr, Address(disp_hdr, 0)); - // otherwise we don't care about the result and handle locking via runtime call - cbnz(hdr, slow_case); - // done - bind(done); + if (LockingMode == LM_LIGHTWEIGHT) { + fast_lock(obj, hdr, rscratch1, rscratch2, slow_case); + } else if (LockingMode == LM_LEGACY) { + Label done; + // and mark it as unlocked + orr(hdr, hdr, markWord::unlocked_value); + // save unlocked object header into the displaced header location on the stack + str(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + lea(rscratch2, Address(obj, hdr_offset)); + cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/nullptr); + // if the object header was the same, we're done + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) sp <= hdr + // 3) hdr <= sp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr - sp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + mov(rscratch1, sp); + sub(hdr, hdr, rscratch1); + ands(hdr, hdr, aligned_mask - (int)os::vm_page_size()); + // for recursive locking, the result is zero => save it in the displaced header + // location (null in the displaced hdr location indicates recursive locking) + str(hdr, Address(disp_hdr, 0)); + // otherwise we don't care about the result and handle locking via runtime call + cbnz(hdr, slow_case); + // done + bind(done); + } increment(Address(rthread, JavaThread::held_monitor_count_offset())); return null_check_offset; } @@ -127,27 +131,40 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); Label done; - // load displaced header - ldr(hdr, Address(disp_hdr, 0)); - // if the loaded hdr is null we had recursive locking - // if we had recursive locking, we are done - cbz(hdr, done); + if (LockingMode != LM_LIGHTWEIGHT) { + // load displaced header + ldr(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is null we had recursive locking + // if we had recursive locking, we are done + cbz(hdr, done); + } + // load object ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); verify_oop(obj); - // test if object header is pointing to the displaced header, and if so, restore - // the displaced header in the object - if the object header is not pointing to - // the displaced header, get the object header instead - // if the object header was not pointing to the displaced header, - // we do unlocking via runtime call - if (hdr_offset) { - lea(rscratch1, Address(obj, hdr_offset)); - cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case); - } else { - cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case); + + if (LockingMode == LM_LIGHTWEIGHT) { + ldr(hdr, Address(obj, oopDesc::mark_offset_in_bytes())); + // We cannot use tbnz here, the target might be too far away and cannot + // be encoded. + tst(hdr, markWord::monitor_value); + br(Assembler::NE, slow_case); + fast_unlock(obj, hdr, rscratch1, rscratch2, slow_case); + } else if (LockingMode == LM_LEGACY) { + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + if (hdr_offset) { + lea(rscratch1, Address(obj, hdr_offset)); + cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case); + } else { + cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case); + } + // done + bind(done); } - // done - bind(done); decrement(Address(rthread, JavaThread::held_monitor_count_offset())); } diff --git a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp index 81bde9a6611..9f131607a27 100644 --- a/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_CodeStubs_aarch64.cpp @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "opto/c2_MacroAssembler.hpp" #include "opto/c2_CodeStubs.hpp" +#include "runtime/objectMonitor.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -63,4 +64,31 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) { __ emit_int32(0); // nmethod guard value } +int C2HandleAnonOMOwnerStub::max_size() const { + // Max size of stub has been determined by testing with 0, in which case + // C2CodeStubList::emit() will throw an assertion and report the actual size that + // is needed. + return 24; +} + +void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { + __ bind(entry()); + Register mon = monitor(); + Register t = tmp(); + assert(t != noreg, "need tmp register"); + + // Fix owner to be the current thread. + __ str(rthread, Address(mon, ObjectMonitor::owner_offset_in_bytes())); + + // Pop owner object from lock-stack. + __ ldrw(t, Address(rthread, JavaThread::lock_stack_top_offset())); + __ subw(t, t, oopSize); +#ifdef ASSERT + __ str(zr, Address(rthread, t)); +#endif + __ strw(t, Address(rthread, JavaThread::lock_stack_top_offset())); + + __ b(continuation()); +} + #undef __ diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp index 9c7fec16ae2..0af15d2548b 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp @@ -730,7 +730,7 @@ void InterpreterMacroAssembler::remove_activation( void InterpreterMacroAssembler::lock_object(Register lock_reg) { assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); @@ -758,62 +758,73 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) br(Assembler::NE, slow_case); } - // Load (object->mark() | 1) into swap_reg - ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - orr(swap_reg, rscratch1, 1); + if (LockingMode == LM_LIGHTWEIGHT) { + ldr(tmp, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + fast_lock(obj_reg, tmp, rscratch1, rscratch2, slow_case); + b(count); + } else if (LockingMode == LM_LEGACY) { + // Load (object->mark() | 1) into swap_reg + ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + orr(swap_reg, rscratch1, 1); - // Save (object->mark() | 1) into BasicLock's displaced header - str(swap_reg, Address(lock_reg, mark_offset)); + // Save (object->mark() | 1) into BasicLock's displaced header + str(swap_reg, Address(lock_reg, mark_offset)); - assert(lock_offset == 0, - "displached header must be first word in BasicObjectLock"); + assert(lock_offset == 0, + "displached header must be first word in BasicObjectLock"); - Label fail; - cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, count, /*fallthrough*/nullptr); + Label fail; + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, rscratch1, count, /*fallthrough*/nullptr); - // Fast check for recursive lock. - // - // Can apply the optimization only if this is a stack lock - // allocated in this thread. For efficiency, we can focus on - // recently allocated stack locks (instead of reading the stack - // base and checking whether 'mark' points inside the current - // thread stack): - // 1) (mark & 7) == 0, and - // 2) sp <= mark < mark + os::pagesize() - // - // Warning: sp + os::pagesize can overflow the stack base. We must - // neither apply the optimization for an inflated lock allocated - // just above the thread stack (this is why condition 1 matters) - // nor apply the optimization if the stack lock is inside the stack - // of another thread. The latter is avoided even in case of overflow - // because we have guard pages at the end of all stacks. Hence, if - // we go over the stack base and hit the stack of another thread, - // this should not be in a writeable area that could contain a - // stack lock allocated by that thread. As a consequence, a stack - // lock less than page size away from sp is guaranteed to be - // owned by the current thread. - // - // These 3 tests can be done by evaluating the following - // expression: ((mark - sp) & (7 - os::vm_page_size())), - // assuming both stack pointer and pagesize have their - // least significant 3 bits clear. - // NOTE: the mark is in swap_reg %r0 as the result of cmpxchg - // NOTE2: aarch64 does not like to subtract sp from rn so take a - // copy - mov(rscratch1, sp); - sub(swap_reg, swap_reg, rscratch1); - ands(swap_reg, swap_reg, (uint64_t)(7 - (int)os::vm_page_size())); - - // Save the test result, for recursive case, the result is zero - str(swap_reg, Address(lock_reg, mark_offset)); - br(Assembler::EQ, count); + // Fast check for recursive lock. + // + // Can apply the optimization only if this is a stack lock + // allocated in this thread. For efficiency, we can focus on + // recently allocated stack locks (instead of reading the stack + // base and checking whether 'mark' points inside the current + // thread stack): + // 1) (mark & 7) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // + // Warning: sp + os::pagesize can overflow the stack base. We must + // neither apply the optimization for an inflated lock allocated + // just above the thread stack (this is why condition 1 matters) + // nor apply the optimization if the stack lock is inside the stack + // of another thread. The latter is avoided even in case of overflow + // because we have guard pages at the end of all stacks. Hence, if + // we go over the stack base and hit the stack of another thread, + // this should not be in a writeable area that could contain a + // stack lock allocated by that thread. As a consequence, a stack + // lock less than page size away from sp is guaranteed to be + // owned by the current thread. + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (7 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 3 bits clear. + // NOTE: the mark is in swap_reg %r0 as the result of cmpxchg + // NOTE2: aarch64 does not like to subtract sp from rn so take a + // copy + mov(rscratch1, sp); + sub(swap_reg, swap_reg, rscratch1); + ands(swap_reg, swap_reg, (uint64_t)(7 - (int)os::vm_page_size())); + // Save the test result, for recursive case, the result is zero + str(swap_reg, Address(lock_reg, mark_offset)); + br(Assembler::EQ, count); + } bind(slow_case); // Call the runtime routine for slow case - call_VM(noreg, - CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), - lock_reg); + if (LockingMode == LM_LIGHTWEIGHT) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj), + obj_reg); + } else { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } b(done); bind(count); @@ -839,7 +850,7 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); } else { Label count, done; @@ -850,9 +861,11 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) save_bcp(); // Save in case of exception - // Convert from BasicObjectLock structure to object and BasicLock - // structure Store the BasicLock address into %r0 - lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + if (LockingMode != LM_LIGHTWEIGHT) { + // Convert from BasicObjectLock structure to object and BasicLock + // structure Store the BasicLock address into %r0 + lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + } // Load oop into obj_reg(%c_rarg3) ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); @@ -860,16 +873,38 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) // Free entry str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); - // Load the old header from BasicLock structure - ldr(header_reg, Address(swap_reg, - BasicLock::displaced_header_offset_in_bytes())); + if (LockingMode == LM_LIGHTWEIGHT) { + Label slow_case; - // Test for recursion - cbz(header_reg, count); + // Check for non-symmetric locking. This is allowed by the spec and the interpreter + // must handle it. + Register tmp = rscratch1; + // First check for lock-stack underflow. + ldrw(tmp, Address(rthread, JavaThread::lock_stack_top_offset())); + cmpw(tmp, (unsigned)LockStack::start_offset()); + br(Assembler::LE, slow_case); + // Then check if the top of the lock-stack matches the unlocked object. + subw(tmp, tmp, oopSize); + ldr(tmp, Address(rthread, tmp)); + cmpoop(tmp, obj_reg); + br(Assembler::NE, slow_case); - // Atomic swap back the old header - cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, count, /*fallthrough*/nullptr); + ldr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + tbnz(header_reg, exact_log2(markWord::monitor_value), slow_case); + fast_unlock(obj_reg, header_reg, swap_reg, rscratch1, slow_case); + b(count); + bind(slow_case); + } else if (LockingMode == LM_LEGACY) { + // Load the old header from BasicLock structure + ldr(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); + // Test for recursion + cbz(header_reg, count); + + // Atomic swap back the old header + cmpxchg_obj_header(swap_reg, header_reg, obj_reg, rscratch1, count, /*fallthrough*/nullptr); + } // Call the runtime routine for slow case. str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 350f8082c34..ef2fe7cef8c 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -6206,3 +6206,97 @@ void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) { strd(src.first()->as_FloatRegister(), Address(sp, reg2offset_out(dst.first()))); } } + +// Implements fast-locking. +// Branches to slow upon failure to lock the object, with ZF cleared. +// Falls through upon success with ZF set. +// +// - obj: the object to be locked +// - hdr: the header, already loaded from obj, will be destroyed +// - t1, t2: temporary registers, will be destroyed +void MacroAssembler::fast_lock(Register obj, Register hdr, Register t1, Register t2, Label& slow) { + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); + assert_different_registers(obj, hdr, t1, t2); + + // Check if we would have space on lock-stack for the object. + ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); + cmpw(t1, (unsigned)LockStack::end_offset() - 1); + br(Assembler::GT, slow); + + // Load (object->mark() | 1) into hdr + orr(hdr, hdr, markWord::unlocked_value); + // Clear lock-bits, into t2 + eor(t2, hdr, markWord::unlocked_value); + // Try to swing header from unlocked to locked + cmpxchg(/*addr*/ obj, /*expected*/ hdr, /*new*/ t2, Assembler::xword, + /*acquire*/ true, /*release*/ true, /*weak*/ false, t1); + br(Assembler::NE, slow); + + // After successful lock, push object on lock-stack + ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); + str(obj, Address(rthread, t1)); + addw(t1, t1, oopSize); + strw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); +} + +// Implements fast-unlocking. +// Branches to slow upon failure, with ZF cleared. +// Falls through upon success, with ZF set. +// +// - obj: the object to be unlocked +// - hdr: the (pre-loaded) header of the object +// - t1, t2: temporary registers +void MacroAssembler::fast_unlock(Register obj, Register hdr, Register t1, Register t2, Label& slow) { + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); + assert_different_registers(obj, hdr, t1, t2); + +#ifdef ASSERT + { + // The following checks rely on the fact that LockStack is only ever modified by + // its owning thread, even if the lock got inflated concurrently; removal of LockStack + // entries after inflation will happen delayed in that case. + + // Check for lock-stack underflow. + Label stack_ok; + ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); + cmpw(t1, (unsigned)LockStack::start_offset()); + br(Assembler::GT, stack_ok); + STOP("Lock-stack underflow"); + bind(stack_ok); + } + { + // Check if the top of the lock-stack matches the unlocked object. + Label tos_ok; + subw(t1, t1, oopSize); + ldr(t1, Address(rthread, t1)); + cmpoop(t1, obj); + br(Assembler::EQ, tos_ok); + STOP("Top of lock-stack does not match the unlocked object"); + bind(tos_ok); + } + { + // Check that hdr is fast-locked. + Label hdr_ok; + tst(hdr, markWord::lock_mask_in_place); + br(Assembler::EQ, hdr_ok); + STOP("Header is not fast-locked"); + bind(hdr_ok); + } +#endif + + // Load the new header (unlocked) into t1 + orr(t1, hdr, markWord::unlocked_value); + + // Try to swing header from locked to unlocked + cmpxchg(obj, hdr, t1, Assembler::xword, + /*acquire*/ true, /*release*/ true, /*weak*/ false, t2); + br(Assembler::NE, slow); + + // After successful unlock, pop object from lock-stack + ldrw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); + subw(t1, t1, oopSize); +#ifdef ASSERT + str(zr, Address(rthread, t1)); +#endif + strw(t1, Address(rthread, JavaThread::lock_stack_top_offset())); +} diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 6211f1e74f2..7e58720727c 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -1580,6 +1580,9 @@ public: // Code for java.lang.Thread::onSpinWait() intrinsic. void spin_wait(); + void fast_lock(Register obj, Register hdr, Register t1, Register t2, Label& slow); + void fast_unlock(Register obj, Register hdr, Register t1, Register t2, Label& slow); + private: // Check the current thread doesn't need a cross modify fence. void verify_cross_modify_fence_not_required() PRODUCT_RETURN; diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index 82d4cd64cd8..47661a935d8 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -1778,7 +1778,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Load the oop from the handle __ ldr(obj_reg, Address(oop_handle_reg, 0)); - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ b(slow_path_lock); + } else if (LockingMode == LM_LEGACY) { // Load (object->mark() | 1) into swap_reg %r0 __ ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes())); __ orr(swap_reg, rscratch1, 1); @@ -1808,7 +1810,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ str(swap_reg, Address(lock_reg, mark_word_offset)); __ br(Assembler::NE, slow_path_lock); } else { - __ b(slow_path_lock); + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + __ ldr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ fast_lock(obj_reg, swap_reg, tmp, rscratch1, slow_path_lock); } __ bind(count); __ increment(Address(rthread, JavaThread::held_monitor_count_offset())); @@ -1917,7 +1921,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, Label done, not_recursive; - if (!UseHeavyMonitors) { + if (LockingMode == LM_LEGACY) { // Simple recursive lock? __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); __ cbnz(rscratch1, not_recursive); @@ -1932,7 +1936,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, save_native_result(masm, ret_type, stack_slots); } - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ b(slow_path_unlock); + } else if (LockingMode == LM_LEGACY) { // get address of the stack lock __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); // get old displaced header @@ -1944,7 +1950,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ bind(count); __ decrement(Address(rthread, JavaThread::held_monitor_count_offset())); } else { - __ b(slow_path_unlock); + assert(LockingMode == LM_LIGHTWEIGHT, ""); + __ ldr(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ tbnz(old_hdr, exact_log2(markWord::monitor_value), slow_path_unlock); + __ fast_unlock(obj_reg, old_hdr, swap_reg, rscratch1, slow_path_unlock); + __ decrement(Address(rthread, JavaThread::held_monitor_count_offset())); } // slow path re-enters here diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp index 0afbd2bc507..45786898458 100644 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp @@ -2431,7 +2431,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { Register hdr = op->hdr_opr()->as_pointer_register(); Register lock = op->lock_opr()->as_pointer_register(); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { if (op->info() != nullptr) { add_debug_info_for_null_check_here(op->info()); __ null_check(obj); diff --git a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp index cdc421c7501..53001b9f124 100644 --- a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp @@ -30,6 +30,7 @@ #include "gc/shared/collectedHeap.hpp" #include "gc/shared/tlab_globals.hpp" #include "interpreter/interpreter.hpp" +#include "logging/log.hpp" #include "oops/arrayOop.hpp" #include "oops/markWord.hpp" #include "runtime/basicLock.hpp" @@ -199,6 +200,7 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); const int mark_offset = BasicLock::displaced_header_offset_in_bytes(); + // save object being locked into the BasicObjectLock str(obj, Address(disp_hdr, obj_offset)); null_check_offset = offset(); @@ -212,38 +214,51 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions"); - // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. - // That would be acceptable as ether CAS or slow case path is taken in that case. + if (LockingMode == LM_LIGHTWEIGHT) { + log_trace(fastlock)("C1_MacroAssembler::lock fast"); - // Must be the first instruction here, because implicit null check relies on it - ldr(hdr, Address(obj, oopDesc::mark_offset_in_bytes())); + Register t1 = disp_hdr; // Needs saving, probably + Register t2 = hdr; // blow + Register t3 = Rtemp; // blow - tst(hdr, markWord::unlocked_value); - b(fast_lock, ne); + fast_lock_2(obj /* obj */, t1, t2, t3, 1 /* savemask - save t1 */, slow_case); + // Success: fall through - // Check for recursive locking - // See comments in InterpreterMacroAssembler::lock_object for - // explanations on the fast recursive locking check. - // -1- test low 2 bits - movs(tmp2, AsmOperand(hdr, lsl, 30)); - // -2- test (hdr - SP) if the low two bits are 0 - sub(tmp2, hdr, SP, eq); - movs(tmp2, AsmOperand(tmp2, lsr, exact_log2(os::vm_page_size())), eq); - // If still 'eq' then recursive locking OK - // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8267042) - str(tmp2, Address(disp_hdr, mark_offset)); - b(fast_lock_done, eq); - // else need slow case - b(slow_case); + } else if (LockingMode == LM_LEGACY) { + + // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. + // That would be acceptable as ether CAS or slow case path is taken in that case. + + // Must be the first instruction here, because implicit null check relies on it + ldr(hdr, Address(obj, oopDesc::mark_offset_in_bytes())); + + tst(hdr, markWord::unlocked_value); + b(fast_lock, ne); + + // Check for recursive locking + // See comments in InterpreterMacroAssembler::lock_object for + // explanations on the fast recursive locking check. + // -1- test low 2 bits + movs(tmp2, AsmOperand(hdr, lsl, 30)); + // -2- test (hdr - SP) if the low two bits are 0 + sub(tmp2, hdr, SP, eq); + movs(tmp2, AsmOperand(tmp2, lsr, exact_log2(os::vm_page_size())), eq); + // If still 'eq' then recursive locking OK + // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8267042) + str(tmp2, Address(disp_hdr, mark_offset)); + b(fast_lock_done, eq); + // else need slow case + b(slow_case); - bind(fast_lock); - // Save previous object header in BasicLock structure and update the header - str(hdr, Address(disp_hdr, mark_offset)); + bind(fast_lock); + // Save previous object header in BasicLock structure and update the header + str(hdr, Address(disp_hdr, mark_offset)); - cas_for_lock_acquire(hdr, disp_hdr, obj, tmp2, slow_case); + cas_for_lock_acquire(hdr, disp_hdr, obj, tmp2, slow_case); - bind(fast_lock_done); + bind(fast_lock_done); + } bind(done); return null_check_offset; @@ -261,21 +276,35 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ assert(oopDesc::mark_offset_in_bytes() == 0, "Required by atomic instructions"); - // Load displaced header and object from the lock - ldr(hdr, Address(disp_hdr, mark_offset)); - // If hdr is null, we've got recursive locking and there's nothing more to do - cbz(hdr, done); + if (LockingMode == LM_LIGHTWEIGHT) { + log_trace(fastlock)("C1_MacroAssembler::unlock fast"); - // load object - ldr(obj, Address(disp_hdr, obj_offset)); + ldr(obj, Address(disp_hdr, obj_offset)); - // Restore the object header - cas_for_lock_release(disp_hdr, hdr, obj, tmp2, slow_case); + Register t1 = disp_hdr; // Needs saving, probably + Register t2 = hdr; // blow + Register t3 = Rtemp; // blow + fast_unlock_2(obj /* object */, t1, t2, t3, 1 /* savemask (save t1) */, + slow_case); + // Success: Fall through + + } else if (LockingMode == LM_LEGACY) { + + // Load displaced header and object from the lock + ldr(hdr, Address(disp_hdr, mark_offset)); + // If hdr is null, we've got recursive locking and there's nothing more to do + cbz(hdr, done); + + // load object + ldr(obj, Address(disp_hdr, obj_offset)); + + // Restore the object header + cas_for_lock_release(disp_hdr, hdr, obj, tmp2, slow_case); + } bind(done); } - #ifndef PRODUCT void C1_MacroAssembler::verify_stack_oop(int stack_offset) { diff --git a/src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp b/src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp index 70c16a2ab01..ae4c42fc887 100644 --- a/src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c2_MacroAssembler_arm.cpp @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "asm/assembler.hpp" #include "asm/assembler.inline.hpp" +#include "logging/log.hpp" #include "opto/c2_MacroAssembler.hpp" #include "runtime/basicLock.hpp" @@ -80,13 +81,7 @@ void C2_MacroAssembler::char_arrays_equals(Register ary1, Register ary2, void C2_MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2) { assert(VM_Version::supports_ldrex(), "unsupported, yet?"); - - Register Rmark = Rscratch2; - - assert(Roop != Rscratch, ""); - assert(Roop != Rmark, ""); - assert(Rbox != Rscratch, ""); - assert(Rbox != Rmark, ""); + assert_different_registers(Roop, Rbox, Rscratch, Rscratch2); Label fast_lock, done; @@ -97,29 +92,43 @@ void C2_MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratc b(done, ne); } - ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); - tst(Rmark, markWord::unlocked_value); - b(fast_lock, ne); + if (LockingMode == LM_LIGHTWEIGHT) { + log_trace(fastlock)("C2_MacroAssembler::lock fast"); - // Check for recursive lock - // See comments in InterpreterMacroAssembler::lock_object for - // explanations on the fast recursive locking check. - // -1- test low 2 bits - movs(Rscratch, AsmOperand(Rmark, lsl, 30)); - // -2- test (hdr - SP) if the low two bits are 0 - sub(Rscratch, Rmark, SP, eq); - movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); - // If still 'eq' then recursive locking OK - // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107) - str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); - b(done); + fast_lock_2(Roop /* obj */, Rbox /* t1 */, Rscratch /* t2 */, Rscratch2 /* t3 */, + 1 /* savemask (save t1) */, done); - bind(fast_lock); - str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + // Success: set Z + cmp(Roop, Roop); - bool allow_fallthrough_on_failure = true; - bool one_shot = true; - cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); + } else if (LockingMode == LM_LEGACY) { + + Register Rmark = Rscratch2; + + ldr(Rmark, Address(Roop, oopDesc::mark_offset_in_bytes())); + tst(Rmark, markWord::unlocked_value); + b(fast_lock, ne); + + // Check for recursive lock + // See comments in InterpreterMacroAssembler::lock_object for + // explanations on the fast recursive locking check. + // -1- test low 2 bits + movs(Rscratch, AsmOperand(Rmark, lsl, 30)); + // -2- test (hdr - SP) if the low two bits are 0 + sub(Rscratch, Rmark, SP, eq); + movs(Rscratch, AsmOperand(Rscratch, lsr, exact_log2(os::vm_page_size())), eq); + // If still 'eq' then recursive locking OK + // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8153107) + str(Rscratch, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + b(done); + + bind(fast_lock); + str(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + + bool allow_fallthrough_on_failure = true; + bool one_shot = true; + cas_for_lock_acquire(Rmark, Rbox, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); + } bind(done); @@ -130,26 +139,37 @@ void C2_MacroAssembler::fast_lock(Register Roop, Register Rbox, Register Rscratc void C2_MacroAssembler::fast_unlock(Register Roop, Register Rbox, Register Rscratch, Register Rscratch2) { assert(VM_Version::supports_ldrex(), "unsupported, yet?"); - - Register Rmark = Rscratch2; - - assert(Roop != Rscratch, ""); - assert(Roop != Rmark, ""); - assert(Rbox != Rscratch, ""); - assert(Rbox != Rmark, ""); + assert_different_registers(Roop, Rbox, Rscratch, Rscratch2); Label done; - ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); - // If hdr is null, we've got recursive locking and there's nothing more to do - cmp(Rmark, 0); - b(done, eq); + if (LockingMode == LM_LIGHTWEIGHT) { + log_trace(fastlock)("C2_MacroAssembler::unlock fast"); - // Restore the object header - bool allow_fallthrough_on_failure = true; - bool one_shot = true; - cas_for_lock_release(Rbox, Rmark, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); + fast_unlock_2(Roop /* obj */, Rbox /* t1 */, Rscratch /* t2 */, Rscratch2 /* t3 */, + 1 /* savemask (save t1) */, done); + cmp(Roop, Roop); // Success: Set Z + // Fall through + + } else if (LockingMode == LM_LEGACY) { + + Register Rmark = Rscratch2; + + // Find the lock address and load the displaced header from the stack. + ldr(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + // If hdr is null, we've got recursive locking and there's nothing more to do + cmp(Rmark, 0); + b(done, eq); + + // Restore the object header + bool allow_fallthrough_on_failure = true; + bool one_shot = true; + cas_for_lock_release(Rbox, Rmark, Roop, Rscratch, done, allow_fallthrough_on_failure, one_shot); + } bind(done); -} + // At this point flags are set as follows: + // EQ -> Success + // NE -> Failure, branch to slow path +} diff --git a/src/hotspot/cpu/arm/interp_masm_arm.cpp b/src/hotspot/cpu/arm/interp_masm_arm.cpp index b3c4e85474a..9b1902cae8b 100644 --- a/src/hotspot/cpu/arm/interp_masm_arm.cpp +++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp @@ -885,7 +885,7 @@ void InterpreterMacroAssembler::set_do_not_unlock_if_synchronized(bool flag, Reg void InterpreterMacroAssembler::lock_object(Register Rlock) { assert(Rlock == R1, "the second argument"); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); } else { Label done; @@ -910,79 +910,91 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) { b(slow_case, ne); } - // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. - // That would be acceptable as ether CAS or slow case path is taken in that case. - // Exception to that is if the object is locked by the calling thread, then the recursive test will pass (guaranteed as - // loads are satisfied from a store queue if performed on the same processor). + if (LockingMode == LM_LIGHTWEIGHT) { + log_trace(fastlock)("InterpreterMacroAssembler lock fast"); + fast_lock_2(Robj, R0 /* t1 */, Rmark /* t2 */, Rtemp /* t3 */, 0 /* savemask */, slow_case); + b(done); + } else if (LockingMode == LM_LEGACY) { + // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. + // That would be acceptable as ether CAS or slow case path is taken in that case. + // Exception to that is if the object is locked by the calling thread, then the recursive test will pass (guaranteed as + // loads are satisfied from a store queue if performed on the same processor). - assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); - ldr(Rmark, Address(Robj, oopDesc::mark_offset_in_bytes())); + assert(oopDesc::mark_offset_in_bytes() == 0, "must be"); + ldr(Rmark, Address(Robj, oopDesc::mark_offset_in_bytes())); - // Test if object is already locked - tst(Rmark, markWord::unlocked_value); - b(already_locked, eq); + // Test if object is already locked + tst(Rmark, markWord::unlocked_value); + b(already_locked, eq); - // Save old object->mark() into BasicLock's displaced header - str(Rmark, Address(Rlock, mark_offset)); + // Save old object->mark() into BasicLock's displaced header + str(Rmark, Address(Rlock, mark_offset)); - cas_for_lock_acquire(Rmark, Rlock, Robj, Rtemp, slow_case); + cas_for_lock_acquire(Rmark, Rlock, Robj, Rtemp, slow_case); - b(done); + b(done); - // If we got here that means the object is locked by ether calling thread or another thread. - bind(already_locked); - // Handling of locked objects: recursive locks and slow case. + // If we got here that means the object is locked by ether calling thread or another thread. + bind(already_locked); + // Handling of locked objects: recursive locks and slow case. - // Fast check for recursive lock. - // - // Can apply the optimization only if this is a stack lock - // allocated in this thread. For efficiency, we can focus on - // recently allocated stack locks (instead of reading the stack - // base and checking whether 'mark' points inside the current - // thread stack): - // 1) (mark & 3) == 0 - // 2) SP <= mark < SP + os::pagesize() - // - // Warning: SP + os::pagesize can overflow the stack base. We must - // neither apply the optimization for an inflated lock allocated - // just above the thread stack (this is why condition 1 matters) - // nor apply the optimization if the stack lock is inside the stack - // of another thread. The latter is avoided even in case of overflow - // because we have guard pages at the end of all stacks. Hence, if - // we go over the stack base and hit the stack of another thread, - // this should not be in a writeable area that could contain a - // stack lock allocated by that thread. As a consequence, a stack - // lock less than page size away from SP is guaranteed to be - // owned by the current thread. - // - // Note: assuming SP is aligned, we can check the low bits of - // (mark-SP) instead of the low bits of mark. In that case, - // assuming page size is a power of 2, we can merge the two - // conditions into a single test: - // => ((mark - SP) & (3 - os::pagesize())) == 0 + // Fast check for recursive lock. + // + // Can apply the optimization only if this is a stack lock + // allocated in this thread. For efficiency, we can focus on + // recently allocated stack locks (instead of reading the stack + // base and checking whether 'mark' points inside the current + // thread stack): + // 1) (mark & 3) == 0 + // 2) SP <= mark < SP + os::pagesize() + // + // Warning: SP + os::pagesize can overflow the stack base. We must + // neither apply the optimization for an inflated lock allocated + // just above the thread stack (this is why condition 1 matters) + // nor apply the optimization if the stack lock is inside the stack + // of another thread. The latter is avoided even in case of overflow + // because we have guard pages at the end of all stacks. Hence, if + // we go over the stack base and hit the stack of another thread, + // this should not be in a writeable area that could contain a + // stack lock allocated by that thread. As a consequence, a stack + // lock less than page size away from SP is guaranteed to be + // owned by the current thread. + // + // Note: assuming SP is aligned, we can check the low bits of + // (mark-SP) instead of the low bits of mark. In that case, + // assuming page size is a power of 2, we can merge the two + // conditions into a single test: + // => ((mark - SP) & (3 - os::pagesize())) == 0 - // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand. - // Check independently the low bits and the distance to SP. - // -1- test low 2 bits - movs(R0, AsmOperand(Rmark, lsl, 30)); - // -2- test (mark - SP) if the low two bits are 0 - sub(R0, Rmark, SP, eq); - movs(R0, AsmOperand(R0, lsr, exact_log2(os::vm_page_size())), eq); - // If still 'eq' then recursive locking OK: store 0 into lock record - str(R0, Address(Rlock, mark_offset), eq); + // (3 - os::pagesize()) cannot be encoded as an ARM immediate operand. + // Check independently the low bits and the distance to SP. + // -1- test low 2 bits + movs(R0, AsmOperand(Rmark, lsl, 30)); + // -2- test (mark - SP) if the low two bits are 0 + sub(R0, Rmark, SP, eq); + movs(R0, AsmOperand(R0, lsr, exact_log2(os::vm_page_size())), eq); + // If still 'eq' then recursive locking OK: store 0 into lock record + str(R0, Address(Rlock, mark_offset), eq); - b(done, eq); + b(done, eq); + } bind(slow_case); // Call the runtime routine for slow case - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); - + if (LockingMode == LM_LIGHTWEIGHT) { + // Pass oop, not lock, in fast lock case. call_VM wants R1 though. + push(R1); + mov(R1, Robj); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj), R1); + pop(R1); + } else { + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), Rlock); + } bind(done); } } - // Unlocks an object. Used in monitorexit bytecode and remove_activation. // // Argument: R0: Points to BasicObjectLock structure for lock @@ -991,7 +1003,7 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) { void InterpreterMacroAssembler::unlock_object(Register Rlock) { assert(Rlock == R0, "the first argument"); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), Rlock); } else { Label done, slow_case; @@ -1012,18 +1024,38 @@ void InterpreterMacroAssembler::unlock_object(Register Rlock) { // Free entry str(Rzero, Address(Rlock, obj_offset)); - // Load the old header from BasicLock structure - ldr(Rmark, Address(Rlock, mark_offset)); + if (LockingMode == LM_LIGHTWEIGHT) { - // Test for recursion (zero mark in BasicLock) - cbz(Rmark, done); + log_trace(fastlock)("InterpreterMacroAssembler unlock fast"); - bool allow_fallthrough_on_failure = true; + // Check for non-symmetric locking. This is allowed by the spec and the interpreter + // must handle it. + ldr(Rtemp, Address(Rthread, JavaThread::lock_stack_top_offset())); + sub(Rtemp, Rtemp, oopSize); + ldr(Rtemp, Address(Rthread, Rtemp)); + cmpoop(Rtemp, Robj); + b(slow_case, ne); - cas_for_lock_release(Rlock, Rmark, Robj, Rtemp, slow_case, allow_fallthrough_on_failure); + fast_unlock_2(Robj /* obj */, Rlock /* t1 */, Rmark /* t2 */, Rtemp /* t3 */, + 1 /* savemask (save t1) */, slow_case); - b(done, eq); + b(done); + } else if (LockingMode == LM_LEGACY) { + + // Load the old header from BasicLock structure + ldr(Rmark, Address(Rlock, mark_offset)); + + // Test for recursion (zero mark in BasicLock) + cbz(Rmark, done); + + bool allow_fallthrough_on_failure = true; + + cas_for_lock_release(Rlock, Rmark, Robj, Rtemp, slow_case, allow_fallthrough_on_failure); + + b(done, eq); + + } bind(slow_case); // Call the runtime routine for slow case. @@ -1034,7 +1066,6 @@ void InterpreterMacroAssembler::unlock_object(Register Rlock) { } } - // Test ImethodDataPtr. If it is null, continue at the specified label void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, Label& zero_continue) { assert(ProfileInterpreter, "must be profiling interpreter"); diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.cpp b/src/hotspot/cpu/arm/macroAssembler_arm.cpp index 9f152d6933a..81f59adb230 100644 --- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2023, Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -42,6 +43,7 @@ #include "oops/klass.inline.hpp" #include "prims/methodHandles.hpp" #include "runtime/interfaceSupport.inline.hpp" +#include "runtime/javaThread.hpp" #include "runtime/jniHandles.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/os.hpp" @@ -1194,11 +1196,15 @@ void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval, atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp); } + // Here, on success, EQ is set, NE otherwise + // MemBarAcquireLock barrier // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore, // but that doesn't prevent a load or store from floating up between // the load and store in the CAS sequence, so play it safe and // do a full fence. + // Note: we preserve flags here. + // Todo: Do we really need this also for the CAS fail case? membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg); if (!fallthrough_is_success && !allow_fallthrough_on_failure) { b(slow_case, ne); @@ -1209,7 +1215,6 @@ void MacroAssembler::cas_for_lock_release(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure, bool one_shot) { - bool fallthrough_is_success = false; assert_different_registers(oldval,newval,base,tmp); @@ -1713,3 +1718,145 @@ void MacroAssembler::read_polling_page(Register dest, relocInfo::relocType rtype ldr(dest, Address(dest)); } +#define PUSH_REG(mask, bit, Reg) \ + if (mask & ((unsigned)1 << bit)) { \ + push(Reg); \ + } + +#define POP_REG(mask, bit, Reg, condition) \ + if (mask & ((unsigned)1 << bit)) { \ + pop(Reg, condition); \ + } + +#define PUSH_REGS(mask, R1, R2, R3) \ + PUSH_REG(mask, 0, R1) \ + PUSH_REG(mask, 1, R2) \ + PUSH_REG(mask, 2, R3) + +#define POP_REGS(mask, R1, R2, R3, condition) \ + POP_REG(mask, 0, R1, condition) \ + POP_REG(mask, 1, R2, condition) \ + POP_REG(mask, 2, R3, condition) + +#define POISON_REG(mask, bit, Reg, poison) \ + if (mask & ((unsigned)1 << bit)) { \ + mov(Reg, poison); \ + } + +#define POISON_REGS(mask, R1, R2, R3, poison) \ + POISON_REG(mask, 0, R1, poison) \ + POISON_REG(mask, 1, R2, poison) \ + POISON_REG(mask, 2, R3, poison) + +// Attempt to fast-lock an object +// Registers: +// - obj: the object to be locked +// - t1, t2, t3: temp registers. If corresponding bit in savemask is set, they get saved, otherwise blown. +// Result: +// - Success: fallthrough +// - Error: break to slow, Z cleared. +void MacroAssembler::fast_lock_2(Register obj, Register t1, Register t2, Register t3, unsigned savemask, Label& slow) { + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); + assert_different_registers(obj, t1, t2, t3); + +#ifdef ASSERT + // Poison scratch regs + POISON_REGS((~savemask), t1, t2, t3, 0x10000001); +#endif + + PUSH_REGS(savemask, t1, t2, t3); + + // Check if we would have space on lock-stack for the object. + ldr(t1, Address(Rthread, JavaThread::lock_stack_top_offset())); + // cmp(t1, (unsigned)LockStack::end_offset()); // too complicated constant: 1132 (46c) + movw(t2, LockStack::end_offset() - 1); + cmp(t1, t2); + POP_REGS(savemask, t1, t2, t3, gt); + b(slow, gt); // Z is cleared + + // Prepare old, new header + Register old_hdr = t1; + Register new_hdr = t2; + ldr(new_hdr, Address(obj, oopDesc::mark_offset_in_bytes())); + bic(new_hdr, new_hdr, markWord::lock_mask_in_place); // new header (00) + orr(old_hdr, new_hdr, markWord::unlocked_value); // old header (01) + + Label dummy; + + cas_for_lock_acquire(old_hdr /* old */, new_hdr /* new */, + obj /* location */, t3 /* scratch */, dummy, + true /* allow_fallthrough_on_failure */, true /* one_shot */); + + POP_REGS(savemask, t1, t2, t3, ne); // Cas failed -> slow + b(slow, ne); // Cas failed -> slow + + // After successful lock, push object onto lock-stack + ldr(t1, Address(Rthread, JavaThread::lock_stack_top_offset())); + str(obj, Address(Rthread, t1)); + add(t1, t1, oopSize); + str(t1, Address(Rthread, JavaThread::lock_stack_top_offset())); + + POP_REGS(savemask, t1, t2, t3, al); + +#ifdef ASSERT + // Poison scratch regs + POISON_REGS((~savemask), t1, t2, t3, 0x20000002); +#endif + + // Success: fall through +} + +// Attempt to fast-unlock an object +// Registers: +// - obj: the object to be unlocked +// - t1, t2, t3: temp registers. If corresponding bit in savemask is set, they get saved, otherwise blown. +// Result: +// - Success: fallthrough +// - Error: break to slow, Z cleared. +void MacroAssembler::fast_unlock_2(Register obj, Register t1, Register t2, Register t3, unsigned savemask, Label& slow) { + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); + assert_different_registers(obj, t1, t2, t3); + +#ifdef ASSERT + // Poison scratch regs + POISON_REGS((~savemask), t1, t2, t3, 0x30000003); +#endif + + PUSH_REGS(savemask, t1, t2, t3); + + // Prepare old, new header + Register old_hdr = t1; + Register new_hdr = t2; + ldr(old_hdr, Address(obj, oopDesc::mark_offset_in_bytes())); + bic(old_hdr, old_hdr, markWord::lock_mask_in_place); // old header (00) + orr(new_hdr, old_hdr, markWord::unlocked_value); // new header (01) + + // Try to swing header from locked to unlocked + Label dummy; + cas_for_lock_release(old_hdr /* old */, new_hdr /* new */, + obj /* location */, t3 /* scratch */, dummy, + true /* allow_fallthrough_on_failure */, true /* one_shot */); + + POP_REGS(savemask, t1, t2, t3, ne); // Cas failed -> slow + b(slow, ne); // Cas failed -> slow + + // After successful unlock, pop object from lock-stack + ldr(t1, Address(Rthread, JavaThread::lock_stack_top_offset())); + sub(t1, t1, oopSize); + str(t1, Address(Rthread, JavaThread::lock_stack_top_offset())); + +#ifdef ASSERT + // zero out popped slot + mov(t2, 0); + str(t2, Address(Rthread, t1)); +#endif + + POP_REGS(savemask, t1, t2, t3, al); + +#ifdef ASSERT + // Poison scratch regs + POISON_REGS((~savemask), t1, t2, t3, 0x40000004); +#endif + + // Fallthrough: success +} diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.hpp b/src/hotspot/cpu/arm/macroAssembler_arm.hpp index 55e3ec4bf4b..359ad93b91b 100644 --- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp @@ -1009,6 +1009,24 @@ public: void cas_for_lock_acquire(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure = false, bool one_shot = false); void cas_for_lock_release(Register oldval, Register newval, Register base, Register tmp, Label &slow_case, bool allow_fallthrough_on_failure = false, bool one_shot = false); + // Attempt to fast-lock an object + // Registers: + // - obj: the object to be locked + // - t1, t2, t3: temp registers. If corresponding bit in savemask is set, they get saved, otherwise blown. + // Result: + // - Success: fallthrough + // - Error: break to slow, Z cleared. + void fast_lock_2(Register obj, Register t1, Register t2, Register t3, unsigned savemask, Label& slow); + + // Attempt to fast-unlock an object + // Registers: + // - obj: the object to be unlocked + // - t1, t2, t3: temp registers. If corresponding bit in savemask is set, they get saved, otherwise blown. + // Result: + // - Success: fallthrough + // - Error: break to slow, Z cleared. + void fast_unlock_2(Register obj, Register t1, Register t2, Register t3, unsigned savemask, Label& slow); + #ifndef PRODUCT // Preserves flags and all registers. // On SMP the updated value might not be visible to external observers without a synchronization barrier diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp index 15e597444c0..ab7bfb6eee4 100644 --- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp +++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp @@ -1153,35 +1153,41 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Remember the handle for the unlocking code __ mov(sync_handle, R1); - const Register mark = tmp; - // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. - // That would be acceptable as either CAS or slow case path is taken in that case + if (LockingMode == LM_LIGHTWEIGHT) { + log_trace(fastlock)("SharedRuntime lock fast"); + __ fast_lock_2(sync_obj /* object */, disp_hdr /* t1 */, tmp /* t2 */, Rtemp /* t3 */, + 0x7 /* savemask */, slow_lock); + // Fall through to lock_done + } else if (LockingMode == LM_LEGACY) { + const Register mark = tmp; + // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread. + // That would be acceptable as either CAS or slow case path is taken in that case - __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes())); - __ sub(disp_hdr, FP, lock_slot_fp_offset); - __ tst(mark, markWord::unlocked_value); - __ b(fast_lock, ne); + __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes())); + __ sub(disp_hdr, FP, lock_slot_fp_offset); + __ tst(mark, markWord::unlocked_value); + __ b(fast_lock, ne); - // Check for recursive lock - // See comments in InterpreterMacroAssembler::lock_object for - // explanations on the fast recursive locking check. - // Check independently the low bits and the distance to SP - // -1- test low 2 bits - __ movs(Rtemp, AsmOperand(mark, lsl, 30)); - // -2- test (hdr - SP) if the low two bits are 0 - __ sub(Rtemp, mark, SP, eq); - __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq); - // If still 'eq' then recursive locking OK - // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8267042) - __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); - __ b(lock_done, eq); - __ b(slow_lock); + // Check for recursive lock + // See comments in InterpreterMacroAssembler::lock_object for + // explanations on the fast recursive locking check. + // Check independently the low bits and the distance to SP + // -1- test low 2 bits + __ movs(Rtemp, AsmOperand(mark, lsl, 30)); + // -2- test (hdr - SP) if the low two bits are 0 + __ sub(Rtemp, mark, SP, eq); + __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq); + // If still 'eq' then recursive locking OK + // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8267042) + __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); + __ b(lock_done, eq); + __ b(slow_lock); - __ bind(fast_lock); - __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); - - __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock); + __ bind(fast_lock); + __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); + __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock); + } __ bind(lock_done); } @@ -1234,14 +1240,21 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, Label slow_unlock, unlock_done; if (method->is_synchronized()) { - __ ldr(sync_obj, Address(sync_handle)); + if (LockingMode == LM_LIGHTWEIGHT) { + log_trace(fastlock)("SharedRuntime unlock fast"); + __ fast_unlock_2(sync_obj, R2 /* t1 */, tmp /* t2 */, Rtemp /* t3 */, + 7 /* savemask */, slow_unlock); + // Fall through + } else if (LockingMode == LM_LEGACY) { + // See C1_MacroAssembler::unlock_object() for more comments + __ ldr(sync_obj, Address(sync_handle)); - // See C1_MacroAssembler::unlock_object() for more comments - __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); - __ cbz(R2, unlock_done); - - __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock); + // See C1_MacroAssembler::unlock_object() for more comments + __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes())); + __ cbz(R2, unlock_done); + __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock); + } __ bind(unlock_done); } diff --git a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp index 17faf7ecbc8..161a8a7376f 100644 --- a/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRAssembler_riscv.cpp @@ -360,7 +360,7 @@ int LIR_Assembler::emit_unwind_handler() { if (method()->is_synchronized()) { monitor_address(0, FrameMap::r10_opr); stub = new MonitorExitStub(FrameMap::r10_opr, true, 0); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { __ j(*stub->entry()); } else { __ unlock_object(x15, x14, x10, *stub->entry()); @@ -1499,7 +1499,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { Register obj = op->obj_opr()->as_register(); // may not be an oop Register hdr = op->hdr_opr()->as_register(); Register lock = op->lock_opr()->as_register(); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { if (op->info() != nullptr) { add_debug_info_for_null_check_here(op->info()); __ null_check(obj); diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp index 8e1ee4f588c..69b6f200f4b 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -52,8 +52,7 @@ void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result, int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { const int aligned_mask = BytesPerWord - 1; const int hdr_offset = oopDesc::mark_offset_in_bytes(); - assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); - Label done; + assert_different_registers(hdr, obj, disp_hdr); int null_check_offset = -1; verify_oop(obj); @@ -72,39 +71,46 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr // Load object header ld(hdr, Address(obj, hdr_offset)); - // and mark it as unlocked - ori(hdr, hdr, markWord::unlocked_value); - // save unlocked object header into the displaced header location on the stack - sd(hdr, Address(disp_hdr, 0)); - // test if object header is still the same (i.e. unlocked), and if so, store the - // displaced header address in the object header - if it is not the same, get the - // object header instead - la(t1, Address(obj, hdr_offset)); - cmpxchgptr(hdr, disp_hdr, t1, t0, done, /*fallthough*/nullptr); - // if the object header was the same, we're done - // if the object header was not the same, it is now in the hdr register - // => test if it is a stack pointer into the same stack (recursive locking), i.e.: - // - // 1) (hdr & aligned_mask) == 0 - // 2) sp <= hdr - // 3) hdr <= sp + page_size - // - // these 3 tests can be done by evaluating the following expression: - // - // (hdr -sp) & (aligned_mask - page_size) - // - // assuming both the stack pointer and page_size have their least - // significant 2 bits cleared and page_size is a power of 2 - sub(hdr, hdr, sp); - mv(t0, aligned_mask - (int)os::vm_page_size()); - andr(hdr, hdr, t0); - // for recursive locking, the result is zero => save it in the displaced header - // location (null in the displaced hdr location indicates recursive locking) - sd(hdr, Address(disp_hdr, 0)); - // otherwise we don't care about the result and handle locking via runtime call - bnez(hdr, slow_case, /* is_far */ true); - // done - bind(done); + + if (LockingMode == LM_LIGHTWEIGHT) { + fast_lock(obj, hdr, t0, t1, slow_case); + } else if (LockingMode == LM_LEGACY) { + Label done; + // and mark it as unlocked + ori(hdr, hdr, markWord::unlocked_value); + // save unlocked object header into the displaced header location on the stack + sd(hdr, Address(disp_hdr, 0)); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + la(t1, Address(obj, hdr_offset)); + cmpxchgptr(hdr, disp_hdr, t1, t0, done, /*fallthough*/nullptr); + // if the object header was the same, we're done + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) sp <= hdr + // 3) hdr <= sp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr -sp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + sub(hdr, hdr, sp); + mv(t0, aligned_mask - (int)os::vm_page_size()); + andr(hdr, hdr, t0); + // for recursive locking, the result is zero => save it in the displaced header + // location (null in the displaced hdr location indicates recursive locking) + sd(hdr, Address(disp_hdr, 0)); + // otherwise we don't care about the result and handle locking via runtime call + bnez(hdr, slow_case, /* is_far */ true); + // done + bind(done); + } + increment(Address(xthread, JavaThread::held_monitor_count_offset())); return null_check_offset; } @@ -115,27 +121,39 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); Label done; - // load displaced header - ld(hdr, Address(disp_hdr, 0)); - // if the loaded hdr is null we had recursive locking - // if we had recursive locking, we are done - beqz(hdr, done); + if (LockingMode != LM_LIGHTWEIGHT) { + // load displaced header + ld(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is null we had recursive locking + // if we had recursive locking, we are done + beqz(hdr, done); + } + // load object ld(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); verify_oop(obj); - // test if object header is pointing to the displaced header, and if so, restore - // the displaced header in the object - if the object header is not pointing to - // the displaced header, get the object header instead - // if the object header was not pointing to the displaced header, - // we do unlocking via runtime call - if (hdr_offset) { - la(t0, Address(obj, hdr_offset)); - cmpxchgptr(disp_hdr, hdr, t0, t1, done, &slow_case); - } else { - cmpxchgptr(disp_hdr, hdr, obj, t1, done, &slow_case); + + if (LockingMode == LM_LIGHTWEIGHT) { + ld(hdr, Address(obj, oopDesc::mark_offset_in_bytes())); + andi(t0, hdr, markWord::monitor_value); + bnez(t0, slow_case, /* is_far */ true); + fast_unlock(obj, hdr, t0, t1, slow_case); + } else if (LockingMode == LM_LEGACY) { + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + if (hdr_offset) { + la(t0, Address(obj, hdr_offset)); + cmpxchgptr(disp_hdr, hdr, t0, t1, done, &slow_case); + } else { + cmpxchgptr(disp_hdr, hdr, obj, t1, done, &slow_case); + } + // done + bind(done); } - // done - bind(done); + decrement(Address(xthread, JavaThread::held_monitor_count_offset())); } diff --git a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp index 833766de4b4..71d5315b185 100644 --- a/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_CodeStubs_riscv.cpp @@ -26,6 +26,7 @@ #include "precompiled.hpp" #include "opto/c2_CodeStubs.hpp" #include "opto/c2_MacroAssembler.hpp" +#include "runtime/objectMonitor.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -72,4 +73,32 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) { __ emit_int32(0); // nmethod guard value } +int C2HandleAnonOMOwnerStub::max_size() const { + // Max size of stub has been determined by testing with 0 without using RISC-V compressed + // instruction-set extension, in which case C2CodeStubList::emit() will throw an assertion + // and report the actual size that is needed. + return 20 DEBUG_ONLY(+8); +} + +void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { + __ bind(entry()); + Register mon = monitor(); + Register t = tmp(); + assert(t != noreg, "need tmp register"); + + // Fix owner to be the current thread. + __ sd(xthread, Address(mon, ObjectMonitor::owner_offset_in_bytes())); + + // Pop owner object from lock-stack. + __ lwu(t, Address(xthread, JavaThread::lock_stack_top_offset())); + __ subw(t, t, oopSize); +#ifdef ASSERT + __ add(t0, xthread, t); + __ sd(zr, Address(t0, 0)); +#endif + __ sw(t, Address(xthread, JavaThread::lock_stack_top_offset())); + + __ j(continuation()); +} + #undef __ diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index 1873e71db14..093a762cc06 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -781,7 +781,7 @@ void InterpreterMacroAssembler::remove_activation( void InterpreterMacroAssembler::lock_object(Register lock_reg) { assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); @@ -809,42 +809,53 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) bnez(tmp, slow_case); } - // Load (object->mark() | 1) into swap_reg - ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - ori(swap_reg, t0, 1); + if (LockingMode == LM_LIGHTWEIGHT) { + ld(tmp, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + fast_lock(obj_reg, tmp, t0, t1, slow_case); + j(count); + } else if (LockingMode == LM_LEGACY) { + // Load (object->mark() | 1) into swap_reg + ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + ori(swap_reg, t0, 1); - // Save (object->mark() | 1) into BasicLock's displaced header - sd(swap_reg, Address(lock_reg, mark_offset)); + // Save (object->mark() | 1) into BasicLock's displaced header + sd(swap_reg, Address(lock_reg, mark_offset)); - assert(lock_offset == 0, - "displached header must be first word in BasicObjectLock"); + assert(lock_offset == 0, + "displached header must be first word in BasicObjectLock"); - cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, count, /*fallthrough*/nullptr); + cmpxchg_obj_header(swap_reg, lock_reg, obj_reg, t0, count, /*fallthrough*/nullptr); - // Test if the oopMark is an obvious stack pointer, i.e., - // 1) (mark & 7) == 0, and - // 2) sp <= mark < mark + os::pagesize() - // - // These 3 tests can be done by evaluating the following - // expression: ((mark - sp) & (7 - os::vm_page_size())), - // assuming both stack pointer and pagesize have their - // least significant 3 bits clear. - // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg - sub(swap_reg, swap_reg, sp); - mv(t0, (int64_t)(7 - (int)os::vm_page_size())); - andr(swap_reg, swap_reg, t0); + // Test if the oopMark is an obvious stack pointer, i.e., + // 1) (mark & 7) == 0, and + // 2) sp <= mark < mark + os::pagesize() + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - sp) & (7 - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant 3 bits clear. + // NOTE: the oopMark is in swap_reg x10 as the result of cmpxchg + sub(swap_reg, swap_reg, sp); + mv(t0, (int64_t)(7 - (int)os::vm_page_size())); + andr(swap_reg, swap_reg, t0); - // Save the test result, for recursive case, the result is zero - sd(swap_reg, Address(lock_reg, mark_offset)); - beqz(swap_reg, count); + // Save the test result, for recursive case, the result is zero + sd(swap_reg, Address(lock_reg, mark_offset)); + beqz(swap_reg, count); + } bind(slow_case); // Call the runtime routine for slow case - call_VM(noreg, - CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), - lock_reg); - + if (LockingMode == LM_LIGHTWEIGHT) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj), + obj_reg); + } else { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } j(done); bind(count); @@ -870,7 +881,7 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1"); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); } else { Label count, done; @@ -881,9 +892,11 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) save_bcp(); // Save in case of exception - // Convert from BasicObjectLock structure to object and BasicLock - // structure Store the BasicLock address into x10 - la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + if (LockingMode != LM_LIGHTWEIGHT) { + // Convert from BasicObjectLock structure to object and BasicLock + // structure Store the BasicLock address into x10 + la(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + } // Load oop into obj_reg(c_rarg3) ld(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); @@ -891,15 +904,41 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) // Free entry sd(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); - // Load the old header from BasicLock structure - ld(header_reg, Address(swap_reg, - BasicLock::displaced_header_offset_in_bytes())); + if (LockingMode == LM_LIGHTWEIGHT) { + Label slow_case; - // Test for recursion - beqz(header_reg, count); + // Check for non-symmetric locking. This is allowed by the spec and the interpreter + // must handle it. + Register tmp1 = t0; + Register tmp2 = header_reg; + // First check for lock-stack underflow. + lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); + mv(tmp2, (unsigned)LockStack::start_offset()); + ble(tmp1, tmp2, slow_case); + // Then check if the top of the lock-stack matches the unlocked object. + subw(tmp1, tmp1, oopSize); + add(tmp1, xthread, tmp1); + ld(tmp1, Address(tmp1, 0)); + bne(tmp1, obj_reg, slow_case); - // Atomic swap back the old header - cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, count, /*fallthrough*/nullptr); + ld(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andi(t0, header_reg, markWord::monitor_value); + bnez(t0, slow_case); + fast_unlock(obj_reg, header_reg, swap_reg, t0, slow_case); + j(count); + + bind(slow_case); + } else if (LockingMode == LM_LEGACY) { + // Load the old header from BasicLock structure + ld(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); + + // Test for recursion + beqz(header_reg, count); + + // Atomic swap back the old header + cmpxchg_obj_header(swap_reg, header_reg, obj_reg, t0, count, /*fallthrough*/nullptr); + } // Call the runtime routine for slow case. sd(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 034072a4fd9..c47cc1c9677 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -59,6 +59,7 @@ #else #define BLOCK_COMMENT(str) block_comment(str) #endif +#define STOP(str) stop(str); #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":") static void pass_arg0(MacroAssembler* masm, Register arg) { @@ -2416,7 +2417,7 @@ void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acqui membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore); } if (at_return) { - bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */); + bgtu(in_nmethod ? sp : fp, t0, slow_path, /* is_far */ true); } else { test_bit(t0, t0, exact_log2(SafepointMechanism::poll_bit())); bnez(t0, slow_path, true /* is_far */); @@ -4486,3 +4487,100 @@ void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos, Regist } andi(Rd, Rs, 1UL << bit_pos, tmp); } + +// Implements fast-locking. +// Branches to slow upon failure to lock the object. +// Falls through upon success. +// +// - obj: the object to be locked +// - hdr: the header, already loaded from obj, will be destroyed +// - tmp1, tmp2: temporary registers, will be destroyed +void MacroAssembler::fast_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); + assert_different_registers(obj, hdr, tmp1, tmp2); + + // Check if we would have space on lock-stack for the object. + lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); + mv(tmp2, (unsigned)LockStack::end_offset()); + bge(tmp1, tmp2, slow, /* is_far */ true); + + // Load (object->mark() | 1) into hdr + ori(hdr, hdr, markWord::unlocked_value); + // Clear lock-bits, into tmp2 + xori(tmp2, hdr, markWord::unlocked_value); + + // Try to swing header from unlocked to locked + Label success; + cmpxchgptr(hdr, tmp2, obj, tmp1, success, &slow); + bind(success); + + // After successful lock, push object on lock-stack + lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); + add(tmp2, xthread, tmp1); + sd(obj, Address(tmp2, 0)); + addw(tmp1, tmp1, oopSize); + sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); +} + +// Implements fast-unlocking. +// Branches to slow upon failure. +// Falls through upon success. +// +// - obj: the object to be unlocked +// - hdr: the (pre-loaded) header of the object +// - tmp1, tmp2: temporary registers +void MacroAssembler::fast_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) { + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); + assert_different_registers(obj, hdr, tmp1, tmp2); + +#ifdef ASSERT + { + // The following checks rely on the fact that LockStack is only ever modified by + // its owning thread, even if the lock got inflated concurrently; removal of LockStack + // entries after inflation will happen delayed in that case. + + // Check for lock-stack underflow. + Label stack_ok; + lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); + mv(tmp2, (unsigned)LockStack::start_offset()); + bgt(tmp1, tmp2, stack_ok); + STOP("Lock-stack underflow"); + bind(stack_ok); + } + { + // Check if the top of the lock-stack matches the unlocked object. + Label tos_ok; + subw(tmp1, tmp1, oopSize); + add(tmp1, xthread, tmp1); + ld(tmp1, Address(tmp1, 0)); + beq(tmp1, obj, tos_ok); + STOP("Top of lock-stack does not match the unlocked object"); + bind(tos_ok); + } + { + // Check that hdr is fast-locked. + Label hdr_ok; + andi(tmp1, hdr, markWord::lock_mask_in_place); + beqz(tmp1, hdr_ok); + STOP("Header is not fast-locked"); + bind(hdr_ok); + } +#endif + + // Load the new header (unlocked) into tmp1 + ori(tmp1, hdr, markWord::unlocked_value); + + // Try to swing header from locked to unlocked + Label success; + cmpxchgptr(hdr, tmp1, obj, tmp2, success, &slow); + bind(success); + + // After successful unlock, pop object from lock-stack + lwu(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); + subw(tmp1, tmp1, oopSize); +#ifdef ASSERT + add(tmp2, xthread, tmp1); + sd(zr, Address(tmp2, 0)); +#endif + sw(tmp1, Address(xthread, JavaThread::lock_stack_top_offset())); +} diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index c60d1a5ad66..e6a286d4ea3 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1418,6 +1418,10 @@ private: void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); + +public: + void fast_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); + void fast_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); }; #ifdef ASSERT diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index 8662710b97e..ab2d15d459b 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -2485,7 +2485,7 @@ encode %{ } %} - // using the cr register as the bool result: 0 for success; others failed. + // Use cr register to indicate the fast_lock result: zero for success; non-zero for failure. enc_class riscv_enc_fast_lock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{ C2_MacroAssembler _masm(&cbuf); Register flag = t1; @@ -2495,7 +2495,7 @@ encode %{ Register tmp = as_Register($tmp2$$reg); Label cont; Label object_has_monitor; - Label no_count; + Label count, no_count; assert_different_registers(oop, box, tmp, disp_hdr, t0); @@ -2513,7 +2513,10 @@ encode %{ __ test_bit(t0, disp_hdr, exact_log2(markWord::monitor_value)); __ bnez(t0, object_has_monitor); - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path + __ j(cont); + } else if (LockingMode == LM_LEGACY) { // Set tmp to be (markWord of object | UNLOCK_VALUE). __ ori(tmp, disp_hdr, markWord::unlocked_value); @@ -2544,11 +2547,19 @@ encode %{ __ andr(tmp/*==0?*/, disp_hdr, tmp); __ sd(tmp/*==0, perhaps*/, Address(box, BasicLock::displaced_header_offset_in_bytes())); __ mv(flag, tmp); // we can use the value of tmp as the result here + __ j(cont); } else { - __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path - } + assert(LockingMode == LM_LIGHTWEIGHT, ""); + Label slow; + __ fast_lock(oop, disp_hdr, tmp, t0, slow); - __ j(cont); + // Indicate success on completion. + __ mv(flag, zr); + __ j(count); + __ bind(slow); + __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow-path + __ j(no_count); + } // Handle existing monitor. __ bind(object_has_monitor); @@ -2560,12 +2571,14 @@ encode %{ __ cmpxchg(/*memory address*/tmp, /*expected value*/zr, /*new value*/xthread, Assembler::int64, Assembler::aq, Assembler::rl, /*result*/flag); // cas succeeds if flag == zr(expected) - // Store a non-null value into the box to avoid looking like a re-entrant - // lock. The fast-path monitor unlock code checks for - // markWord::monitor_value so use markWord::unused_mark which has the - // relevant bit set, and also matches ObjectSynchronizer::slow_enter. - __ mv(tmp, (address)markWord::unused_mark().value()); - __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + if (LockingMode != LM_LIGHTWEIGHT) { + // Store a non-null value into the box to avoid looking like a re-entrant + // lock. The fast-path monitor unlock code checks for + // markWord::monitor_value so use markWord::unused_mark which has the + // relevant bit set, and also matches ObjectSynchronizer::slow_enter. + __ mv(tmp, (address)markWord::unused_mark().value()); + __ sd(tmp, Address(box, BasicLock::displaced_header_offset_in_bytes())); + } __ beqz(flag, cont); // CAS success means locking succeeded @@ -2576,15 +2589,17 @@ encode %{ __ increment(Address(disp_hdr, ObjectMonitor::recursions_offset_in_bytes() - markWord::monitor_value), 1, t0, tmp); __ bind(cont); - + // zero flag indicates success + // non-zero flag indicates failure __ bnez(flag, no_count); + __ bind(count); __ increment(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp); __ bind(no_count); %} - // using cr flag to indicate the fast_unlock result: 0 for success; others failed. + // Use cr register to indicate the fast_unlock result: zero for success; non-zero for failure. enc_class riscv_enc_fast_unlock(iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2) %{ C2_MacroAssembler _masm(&cbuf); Register flag = t1; @@ -2594,11 +2609,11 @@ encode %{ Register tmp = as_Register($tmp2$$reg); Label cont; Label object_has_monitor; - Label no_count; + Label count, no_count; assert_different_registers(oop, box, tmp, disp_hdr, flag); - if (!UseHeavyMonitors) { + if (LockingMode == LM_LEGACY) { // Find the lock address and load the displaced header from the stack. __ ld(disp_hdr, Address(box, BasicLock::displaced_header_offset_in_bytes())); @@ -2612,7 +2627,10 @@ encode %{ __ test_bit(t0, tmp, exact_log2(markWord::monitor_value)); __ bnez(t0, object_has_monitor); - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path + __ j(cont); + } else if (LockingMode == LM_LEGACY) { // Check if it is still a light weight lock, this is true if we // see the stack address of the basicLock in the markWord of the // object. @@ -2620,10 +2638,19 @@ encode %{ __ cmpxchg(/*memory address*/oop, /*expected value*/box, /*new value*/disp_hdr, Assembler::int64, Assembler::relaxed, Assembler::rl, /*result*/tmp); __ xorr(flag, box, tmp); // box == tmp if cas succeeds + __ j(cont); } else { + assert(LockingMode == LM_LIGHTWEIGHT, ""); + Label slow; + __ fast_unlock(oop, tmp, box, disp_hdr, slow); + + // Indicate success on completion. + __ mv(flag, zr); + __ j(count); + __ bind(slow); __ mv(flag, 1); // Set non-zero flag to indicate 'failure' -> take slow path + __ j(no_count); } - __ j(cont); assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); @@ -2631,6 +2658,18 @@ encode %{ __ bind(object_has_monitor); STATIC_ASSERT(markWord::monitor_value <= INT_MAX); __ add(tmp, tmp, -(int)markWord::monitor_value); // monitor + + if (LockingMode == LM_LIGHTWEIGHT) { + // If the owner is anonymous, we need to fix it -- in an outline stub. + Register tmp2 = disp_hdr; + __ ld(tmp2, Address(tmp, ObjectMonitor::owner_offset_in_bytes())); + __ andi(t0, tmp2, (int64_t)ObjectMonitor::ANONYMOUS_OWNER); + C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmp, tmp2); + Compile::current()->output()->add_stub(stub); + __ bnez(t0, stub->entry(), /* is_far */ true); + __ bind(stub->continuation()); + } + __ ld(disp_hdr, Address(tmp, ObjectMonitor::recursions_offset_in_bytes())); Label notRecursive; @@ -2653,9 +2692,11 @@ encode %{ __ sd(zr, Address(tmp)); // set unowned __ bind(cont); - + // zero flag indicates success + // non-zero flag indicates failure __ bnez(flag, no_count); + __ bind(count); __ decrement(Address(xthread, JavaThread::held_monitor_count_offset()), 1, t0, tmp); __ bind(no_count); diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp index a1ef47dcd5e..643550f80a6 100644 --- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -1671,7 +1671,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Load the oop from the handle __ ld(obj_reg, Address(oop_handle_reg, 0)); - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ j(slow_path_lock); + } else if (LockingMode == LM_LEGACY) { // Load (object->mark() | 1) into swap_reg % x10 __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes())); __ ori(swap_reg, t0, 1); @@ -1698,7 +1700,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ sd(swap_reg, Address(lock_reg, mark_word_offset)); __ bnez(swap_reg, slow_path_lock); } else { - __ j(slow_path_lock); + assert(LockingMode == LM_LIGHTWEIGHT, ""); + __ ld(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ fast_lock(obj_reg, swap_reg, tmp, t0, slow_path_lock); } __ bind(count); @@ -1793,7 +1797,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, Label done, not_recursive; - if (!UseHeavyMonitors) { + if (LockingMode == LM_LEGACY) { // Simple recursive lock? __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); __ bnez(t0, not_recursive); @@ -1808,7 +1812,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, save_native_result(masm, ret_type, stack_slots); } - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ j(slow_path_unlock); + } else if (LockingMode == LM_LEGACY) { // get address of the stack lock __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size)); // get old displaced header @@ -1820,7 +1826,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ bind(count); __ decrement(Address(xthread, JavaThread::held_monitor_count_offset())); } else { - __ j(slow_path_unlock); + assert(LockingMode == LM_LIGHTWEIGHT, ""); + __ ld(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ andi(t0, old_hdr, markWord::monitor_value); + __ bnez(t0, slow_path_unlock); + __ fast_unlock(obj_reg, old_hdr, swap_reg, t0, slow_path_unlock); + __ decrement(Address(xthread, JavaThread::held_monitor_count_offset())); } // slow path re-enters here diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 0e9dfa36483..88b45516fdb 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -454,7 +454,7 @@ int LIR_Assembler::emit_unwind_handler() { if (method()->is_synchronized()) { monitor_address(0, FrameMap::rax_opr); stub = new MonitorExitStub(FrameMap::rax_opr, true, 0); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { __ jmp(*stub->entry()); } else { __ unlock_object(rdi, rsi, rax, *stub->entry()); @@ -3500,7 +3500,7 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { Register obj = op->obj_opr()->as_register(); // may not be an oop Register hdr = op->hdr_opr()->as_register(); Register lock = op->lock_opr()->as_register(); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { if (op->info() != nullptr) { add_debug_info_for_null_check_here(op->info()); __ null_check(obj); @@ -3508,8 +3508,9 @@ void LIR_Assembler::emit_lock(LIR_OpLock* op) { __ jmp(*op->stub()->entry()); } else if (op->code() == lir_lock) { assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header"); + Register tmp = LockingMode == LM_LIGHTWEIGHT ? op->scratch_opr()->as_register() : noreg; // add debug info for NullPointerException only if one is possible - int null_check_offset = __ lock_object(hdr, obj, lock, *op->stub()->entry()); + int null_check_offset = __ lock_object(hdr, obj, lock, tmp, *op->stub()->entry()); if (op->info() != nullptr) { add_debug_info_for_null_check(null_check_offset, op->info()); } diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 2f34d4d2333..db332274a68 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -319,7 +319,8 @@ void LIRGenerator::do_MonitorEnter(MonitorEnter* x) { // this CodeEmitInfo must not have the xhandlers because here the // object is already locked (xhandlers expect object to be unlocked) CodeEmitInfo* info = state_for(x, x->state(), true); - monitor_enter(obj.result(), lock, syncTempOpr(), LIR_OprFact::illegalOpr, + LIR_Opr tmp = LockingMode == LM_LIGHTWEIGHT ? new_register(T_ADDRESS) : LIR_OprFact::illegalOpr; + monitor_enter(obj.result(), lock, syncTempOpr(), tmp, x->monitor_no(), info_for_exception, info); } diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index b7ad05276ea..8216ce5d4b4 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -38,12 +38,11 @@ #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" -int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { +int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register tmp, Label& slow_case) { const int aligned_mask = BytesPerWord -1; const int hdr_offset = oopDesc::mark_offset_in_bytes(); assert(hdr == rax, "hdr must be rax, for the cmpxchg instruction"); - assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); - Label done; + assert_different_registers(hdr, obj, disp_hdr, tmp); int null_check_offset = -1; verify_oop(obj); @@ -62,39 +61,51 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr // Load object header movptr(hdr, Address(obj, hdr_offset)); - // and mark it as unlocked - orptr(hdr, markWord::unlocked_value); - // save unlocked object header into the displaced header location on the stack - movptr(Address(disp_hdr, 0), hdr); - // test if object header is still the same (i.e. unlocked), and if so, store the - // displaced header address in the object header - if it is not the same, get the - // object header instead - MacroAssembler::lock(); // must be immediately before cmpxchg! - cmpxchgptr(disp_hdr, Address(obj, hdr_offset)); - // if the object header was the same, we're done - jcc(Assembler::equal, done); - // if the object header was not the same, it is now in the hdr register - // => test if it is a stack pointer into the same stack (recursive locking), i.e.: - // - // 1) (hdr & aligned_mask) == 0 - // 2) rsp <= hdr - // 3) hdr <= rsp + page_size - // - // these 3 tests can be done by evaluating the following expression: - // - // (hdr - rsp) & (aligned_mask - page_size) - // - // assuming both the stack pointer and page_size have their least - // significant 2 bits cleared and page_size is a power of 2 - subptr(hdr, rsp); - andptr(hdr, aligned_mask - (int)os::vm_page_size()); - // for recursive locking, the result is zero => save it in the displaced header - // location (null in the displaced hdr location indicates recursive locking) - movptr(Address(disp_hdr, 0), hdr); - // otherwise we don't care about the result and handle locking via runtime call - jcc(Assembler::notZero, slow_case); - // done - bind(done); + + if (LockingMode == LM_LIGHTWEIGHT) { +#ifdef _LP64 + const Register thread = r15_thread; +#else + const Register thread = disp_hdr; + get_thread(thread); +#endif + fast_lock_impl(obj, hdr, thread, tmp, slow_case); + } else if (LockingMode == LM_LEGACY) { + Label done; + // and mark it as unlocked + orptr(hdr, markWord::unlocked_value); + // save unlocked object header into the displaced header location on the stack + movptr(Address(disp_hdr, 0), hdr); + // test if object header is still the same (i.e. unlocked), and if so, store the + // displaced header address in the object header - if it is not the same, get the + // object header instead + MacroAssembler::lock(); // must be immediately before cmpxchg! + cmpxchgptr(disp_hdr, Address(obj, hdr_offset)); + // if the object header was the same, we're done + jcc(Assembler::equal, done); + // if the object header was not the same, it is now in the hdr register + // => test if it is a stack pointer into the same stack (recursive locking), i.e.: + // + // 1) (hdr & aligned_mask) == 0 + // 2) rsp <= hdr + // 3) hdr <= rsp + page_size + // + // these 3 tests can be done by evaluating the following expression: + // + // (hdr - rsp) & (aligned_mask - page_size) + // + // assuming both the stack pointer and page_size have their least + // significant 2 bits cleared and page_size is a power of 2 + subptr(hdr, rsp); + andptr(hdr, aligned_mask - (int)os::vm_page_size()); + // for recursive locking, the result is zero => save it in the displaced header + // location (null in the displaced hdr location indicates recursive locking) + movptr(Address(disp_hdr, 0), hdr); + // otherwise we don't care about the result and handle locking via runtime call + jcc(Assembler::notZero, slow_case); + // done + bind(done); + } inc_held_monitor_count(); @@ -108,27 +119,35 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different"); Label done; - // load displaced header - movptr(hdr, Address(disp_hdr, 0)); - // if the loaded hdr is null we had recursive locking - testptr(hdr, hdr); - // if we had recursive locking, we are done - jcc(Assembler::zero, done); + if (LockingMode != LM_LIGHTWEIGHT) { + // load displaced header + movptr(hdr, Address(disp_hdr, 0)); + // if the loaded hdr is null we had recursive locking + testptr(hdr, hdr); + // if we had recursive locking, we are done + jcc(Assembler::zero, done); + } + // load object movptr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes())); - verify_oop(obj); - // test if object header is pointing to the displaced header, and if so, restore - // the displaced header in the object - if the object header is not pointing to - // the displaced header, get the object header instead - MacroAssembler::lock(); // must be immediately before cmpxchg! - cmpxchgptr(hdr, Address(obj, hdr_offset)); - // if the object header was not pointing to the displaced header, - // we do unlocking via runtime call - jcc(Assembler::notEqual, slow_case); - // done - bind(done); + if (LockingMode == LM_LIGHTWEIGHT) { + movptr(disp_hdr, Address(obj, hdr_offset)); + andptr(disp_hdr, ~(int32_t)markWord::lock_mask_in_place); + fast_unlock_impl(obj, disp_hdr, hdr, slow_case); + } else if (LockingMode == LM_LEGACY) { + // test if object header is pointing to the displaced header, and if so, restore + // the displaced header in the object - if the object header is not pointing to + // the displaced header, get the object header instead + MacroAssembler::lock(); // must be immediately before cmpxchg! + cmpxchgptr(hdr, Address(obj, hdr_offset)); + // if the object header was not pointing to the displaced header, + // we do unlocking via runtime call + jcc(Assembler::notEqual, slow_case); + // done + } + bind(done); dec_held_monitor_count(); } diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp index fed8297791c..b3593feb056 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.hpp @@ -50,7 +50,7 @@ // obj : must point to the object to lock, contents preserved // disp_hdr: must point to the displaced header location, contents preserved // returns code offset at which to add null check debug information - int lock_object (Register swap, Register obj, Register disp_hdr, Label& slow_case); + int lock_object (Register swap, Register obj, Register disp_hdr, Register tmp, Label& slow_case); // unlocking // hdr : contents destroyed diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp index 5490e561240..cd5e87b29ec 100644 --- a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "opto/c2_MacroAssembler.hpp" #include "opto/c2_CodeStubs.hpp" +#include "runtime/objectMonitor.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -72,4 +73,26 @@ void C2EntryBarrierStub::emit(C2_MacroAssembler& masm) { __ jmp(continuation(), false /* maybe_short */); } +#ifdef _LP64 +int C2HandleAnonOMOwnerStub::max_size() const { + // Max size of stub has been determined by testing with 0, in which case + // C2CodeStubList::emit() will throw an assertion and report the actual size that + // is needed. + return DEBUG_ONLY(36) NOT_DEBUG(21); +} + +void C2HandleAnonOMOwnerStub::emit(C2_MacroAssembler& masm) { + __ bind(entry()); + Register mon = monitor(); + Register t = tmp(); + __ movptr(Address(mon, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), r15_thread); + __ subl(Address(r15_thread, JavaThread::lock_stack_top_offset()), oopSize); +#ifdef ASSERT + __ movl(t, Address(r15_thread, JavaThread::lock_stack_top_offset())); + __ movptr(Address(r15_thread, t), 0); +#endif + __ jmp(continuation()); +} +#endif + #undef __ diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index f78da4bba18..eb708165f49 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -548,7 +548,7 @@ void C2_MacroAssembler::rtm_inflated_locking(Register objReg, Register boxReg, R // rax,: tmp -- KILLED // scr: tmp -- KILLED void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg, - Register scrReg, Register cx1Reg, Register cx2Reg, + Register scrReg, Register cx1Reg, Register cx2Reg, Register thread, RTMLockingCounters* rtm_counters, RTMLockingCounters* stack_rtm_counters, Metadata* method_data, @@ -590,7 +590,7 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp #if INCLUDE_RTM_OPT if (UseRTMForStackLocks && use_rtm) { - assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive"); + assert(LockingMode != LM_MONITOR, "LockingMode == 0 (LM_MONITOR) and +UseRTMForStackLocks are mutually exclusive"); rtm_stack_locking(objReg, tmpReg, scrReg, cx2Reg, stack_rtm_counters, method_data, profile_rtm, DONE_LABEL, IsInflated); @@ -599,9 +599,12 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // [FETCH] testptr(tmpReg, markWord::monitor_value); // inflated vs stack-locked|neutral - jccb(Assembler::notZero, IsInflated); + jcc(Assembler::notZero, IsInflated); - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0. + testptr(objReg, objReg); + } else if (LockingMode == LM_LEGACY) { // Attempt stack-locking ... orptr (tmpReg, markWord::unlocked_value); movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS @@ -617,8 +620,9 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) ); movptr(Address(boxReg, 0), tmpReg); } else { - // Clear ZF so that we take the slow path at the DONE label. objReg is known to be not 0. - testptr(objReg, objReg); + assert(LockingMode == LM_LIGHTWEIGHT, ""); + fast_lock_impl(objReg, tmpReg, thread, scrReg, NO_COUNT); + jmp(COUNT); } jmp(DONE_LABEL); @@ -681,14 +685,14 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp movq(scrReg, tmpReg); xorq(tmpReg, tmpReg); lock(); - cmpxchgptr(r15_thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); + cmpxchgptr(thread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); // Unconditionally set box->_displaced_header = markWord::unused_mark(). // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), checked_cast(markWord::unused_mark().value())); // Propagate ICC.ZF from CAS above into DONE_LABEL. jccb(Assembler::equal, COUNT); // CAS above succeeded; propagate ZF = 1 (success) - cmpptr(r15_thread, rax); // Check if we are already the owner (recursive lock) + cmpptr(thread, rax); // Check if we are already the owner (recursive lock) jccb(Assembler::notEqual, NO_COUNT); // If not recursive, ZF = 0 at this point (fail) incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success @@ -704,12 +708,7 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp bind(COUNT); // Count monitors in fast path -#ifndef _LP64 - get_thread(tmpReg); - incrementl(Address(tmpReg, JavaThread::held_monitor_count_offset())); -#else // _LP64 - incrementq(Address(r15_thread, JavaThread::held_monitor_count_offset())); -#endif + increment(Address(thread, JavaThread::held_monitor_count_offset())); xorl(tmpReg, tmpReg); // Set ZF == 1 @@ -761,7 +760,7 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t #if INCLUDE_RTM_OPT if (UseRTMForStackLocks && use_rtm) { - assert(!UseHeavyMonitors, "+UseHeavyMonitors and +UseRTMForStackLocks are mutually exclusive"); + assert(LockingMode != LM_MONITOR, "LockingMode == 0 (LM_MONITOR) and +UseRTMForStackLocks are mutually exclusive"); Label L_regular_unlock; movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // fetch markword andptr(tmpReg, markWord::lock_mask_in_place); // look at 2 lock bits @@ -773,17 +772,35 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t } #endif - if (!UseHeavyMonitors) { + if (LockingMode == LM_LEGACY) { cmpptr(Address(boxReg, 0), NULL_WORD); // Examine the displaced header jcc (Assembler::zero, COUNT); // 0 indicates recursive stack-lock } movptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Examine the object's markword - if (!UseHeavyMonitors) { + if (LockingMode != LM_MONITOR) { testptr(tmpReg, markWord::monitor_value); // Inflated? - jccb (Assembler::zero, Stacked); + jcc(Assembler::zero, Stacked); } // It's inflated. + if (LockingMode == LM_LIGHTWEIGHT) { + // If the owner is ANONYMOUS, we need to fix it - in an outline stub. + testb(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t) ObjectMonitor::ANONYMOUS_OWNER); +#ifdef _LP64 + if (!Compile::current()->output()->in_scratch_emit_size()) { + C2HandleAnonOMOwnerStub* stub = new (Compile::current()->comp_arena()) C2HandleAnonOMOwnerStub(tmpReg, boxReg); + Compile::current()->output()->add_stub(stub); + jcc(Assembler::notEqual, stub->entry()); + bind(stub->continuation()); + } else +#endif + { + // We can't easily implement this optimization on 32 bit because we don't have a thread register. + // Call the slow-path instead. + jcc(Assembler::notEqual, NO_COUNT); + } + } + #if INCLUDE_RTM_OPT if (use_rtm) { Label L_regular_inflated_unlock; @@ -792,7 +809,7 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t testptr(boxReg, boxReg); jccb(Assembler::notZero, L_regular_inflated_unlock); xend(); - jmpb(DONE_LABEL); + jmp(DONE_LABEL); bind(L_regular_inflated_unlock); } #endif @@ -904,11 +921,17 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t jmpb (DONE_LABEL); #endif - if (!UseHeavyMonitors) { + if (LockingMode != LM_MONITOR) { bind (Stacked); - movptr(tmpReg, Address (boxReg, 0)); // re-fetch - lock(); - cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box + if (LockingMode == LM_LIGHTWEIGHT) { + mov(boxReg, tmpReg); + fast_unlock_impl(objReg, boxReg, tmpReg, NO_COUNT); + jmp(COUNT); + } else if (LockingMode == LM_LEGACY) { + movptr(tmpReg, Address (boxReg, 0)); // re-fetch + lock(); + cmpxchgptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes())); // Uses RAX which is box + } // Intentional fall-thru into DONE_LABEL } bind(DONE_LABEL); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index e91937b6e34..952d99e4f77 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -36,7 +36,7 @@ public: // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file. // See full description in macroAssembler_x86.cpp. void fast_lock(Register obj, Register box, Register tmp, - Register scr, Register cx1, Register cx2, + Register scr, Register cx1, Register cx2, Register thread, RTMLockingCounters* rtm_counters, RTMLockingCounters* stack_rtm_counters, Metadata* method_data, diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp index 2b9db41b99c..5b57299d4f6 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.cpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp @@ -1196,7 +1196,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { assert(lock_reg == LP64_ONLY(c_rarg1) NOT_LP64(rdx), "The argument is only for looks. It must be c_rarg1"); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); @@ -1223,69 +1223,86 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { jcc(Assembler::notZero, slow_case); } - // Load immediate 1 into swap_reg %rax - movl(swap_reg, 1); + if (LockingMode == LM_LIGHTWEIGHT) { +#ifdef _LP64 + const Register thread = r15_thread; +#else + const Register thread = lock_reg; + get_thread(thread); +#endif + // Load object header, prepare for CAS from unlocked to locked. + movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + fast_lock_impl(obj_reg, swap_reg, thread, tmp_reg, slow_case); + } else if (LockingMode == LM_LEGACY) { + // Load immediate 1 into swap_reg %rax + movl(swap_reg, 1); - // Load (object->mark() | 1) into swap_reg %rax - orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + // Load (object->mark() | 1) into swap_reg %rax + orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - // Save (object->mark() | 1) into BasicLock's displaced header - movptr(Address(lock_reg, mark_offset), swap_reg); + // Save (object->mark() | 1) into BasicLock's displaced header + movptr(Address(lock_reg, mark_offset), swap_reg); - assert(lock_offset == 0, - "displaced header must be first word in BasicObjectLock"); + assert(lock_offset == 0, + "displaced header must be first word in BasicObjectLock"); - lock(); - cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - jcc(Assembler::zero, count_locking); + lock(); + cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + jcc(Assembler::zero, count_locking); - const int zero_bits = LP64_ONLY(7) NOT_LP64(3); + const int zero_bits = LP64_ONLY(7) NOT_LP64(3); - // Fast check for recursive lock. - // - // Can apply the optimization only if this is a stack lock - // allocated in this thread. For efficiency, we can focus on - // recently allocated stack locks (instead of reading the stack - // base and checking whether 'mark' points inside the current - // thread stack): - // 1) (mark & zero_bits) == 0, and - // 2) rsp <= mark < mark + os::pagesize() - // - // Warning: rsp + os::pagesize can overflow the stack base. We must - // neither apply the optimization for an inflated lock allocated - // just above the thread stack (this is why condition 1 matters) - // nor apply the optimization if the stack lock is inside the stack - // of another thread. The latter is avoided even in case of overflow - // because we have guard pages at the end of all stacks. Hence, if - // we go over the stack base and hit the stack of another thread, - // this should not be in a writeable area that could contain a - // stack lock allocated by that thread. As a consequence, a stack - // lock less than page size away from rsp is guaranteed to be - // owned by the current thread. - // - // These 3 tests can be done by evaluating the following - // expression: ((mark - rsp) & (zero_bits - os::vm_page_size())), - // assuming both stack pointer and pagesize have their - // least significant bits clear. - // NOTE: the mark is in swap_reg %rax as the result of cmpxchg - subptr(swap_reg, rsp); - andptr(swap_reg, zero_bits - (int)os::vm_page_size()); + // Fast check for recursive lock. + // + // Can apply the optimization only if this is a stack lock + // allocated in this thread. For efficiency, we can focus on + // recently allocated stack locks (instead of reading the stack + // base and checking whether 'mark' points inside the current + // thread stack): + // 1) (mark & zero_bits) == 0, and + // 2) rsp <= mark < mark + os::pagesize() + // + // Warning: rsp + os::pagesize can overflow the stack base. We must + // neither apply the optimization for an inflated lock allocated + // just above the thread stack (this is why condition 1 matters) + // nor apply the optimization if the stack lock is inside the stack + // of another thread. The latter is avoided even in case of overflow + // because we have guard pages at the end of all stacks. Hence, if + // we go over the stack base and hit the stack of another thread, + // this should not be in a writeable area that could contain a + // stack lock allocated by that thread. As a consequence, a stack + // lock less than page size away from rsp is guaranteed to be + // owned by the current thread. + // + // These 3 tests can be done by evaluating the following + // expression: ((mark - rsp) & (zero_bits - os::vm_page_size())), + // assuming both stack pointer and pagesize have their + // least significant bits clear. + // NOTE: the mark is in swap_reg %rax as the result of cmpxchg + subptr(swap_reg, rsp); + andptr(swap_reg, zero_bits - (int)os::vm_page_size()); - // Save the test result, for recursive case, the result is zero - movptr(Address(lock_reg, mark_offset), swap_reg); - jcc(Assembler::notZero, slow_case); + // Save the test result, for recursive case, the result is zero + movptr(Address(lock_reg, mark_offset), swap_reg); + jcc(Assembler::notZero, slow_case); - bind(count_locking); + bind(count_locking); + } inc_held_monitor_count(); jmp(done); bind(slow_case); // Call the runtime routine for slow case - call_VM(noreg, - CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), - lock_reg); - + if (LockingMode == LM_LIGHTWEIGHT) { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj), + obj_reg); + } else { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + lock_reg); + } bind(done); } } @@ -1307,7 +1324,7 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { assert(lock_reg == LP64_ONLY(c_rarg1) NOT_LP64(rdx), "The argument is only for looks. It must be c_rarg1"); - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); } else { Label count_locking, done, slow_case; @@ -1318,9 +1335,11 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { save_bcp(); // Save in case of exception - // Convert from BasicObjectLock structure to object and BasicLock - // structure Store the BasicLock address into %rax - lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + if (LockingMode != LM_LIGHTWEIGHT) { + // Convert from BasicObjectLock structure to object and BasicLock + // structure Store the BasicLock address into %rax + lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes())); + } // Load oop into obj_reg(%c_rarg3) movptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); @@ -1328,24 +1347,42 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { // Free entry movptr(Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()), NULL_WORD); - // Load the old header from BasicLock structure - movptr(header_reg, Address(swap_reg, - BasicLock::displaced_header_offset_in_bytes())); + if (LockingMode == LM_LIGHTWEIGHT) { +#ifdef _LP64 + const Register thread = r15_thread; +#else + const Register thread = header_reg; + get_thread(thread); +#endif + // Handle unstructured locking. + Register tmp = swap_reg; + movl(tmp, Address(thread, JavaThread::lock_stack_top_offset())); + cmpptr(obj_reg, Address(thread, tmp, Address::times_1, -oopSize)); + jcc(Assembler::notEqual, slow_case); + // Try to swing header from locked to unlocked. + movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andptr(swap_reg, ~(int32_t)markWord::lock_mask_in_place); + fast_unlock_impl(obj_reg, swap_reg, header_reg, slow_case); + } else if (LockingMode == LM_LEGACY) { + // Load the old header from BasicLock structure + movptr(header_reg, Address(swap_reg, + BasicLock::displaced_header_offset_in_bytes())); - // Test for recursion - testptr(header_reg, header_reg); + // Test for recursion + testptr(header_reg, header_reg); - // zero for recursive case - jcc(Assembler::zero, count_locking); + // zero for recursive case + jcc(Assembler::zero, count_locking); - // Atomic swap back the old header - lock(); - cmpxchgptr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + // Atomic swap back the old header + lock(); + cmpxchgptr(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - // zero for simple unlock of a stack-lock case - jcc(Assembler::notZero, slow_case); + // zero for simple unlock of a stack-lock case + jcc(Assembler::notZero, slow_case); - bind(count_locking); + bind(count_locking); + } dec_held_monitor_count(); jmp(done); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 575d268ce74..89978b6bebb 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -9670,3 +9670,70 @@ void MacroAssembler::check_stack_alignment(Register sp, const char* msg, unsigne stop(msg); bind(L_stack_ok); } + +// Implements fast-locking. +// Branches to slow upon failure to lock the object, with ZF cleared. +// Falls through upon success with unspecified ZF. +// +// obj: the object to be locked +// hdr: the (pre-loaded) header of the object, must be rax +// thread: the thread which attempts to lock obj +// tmp: a temporary register +void MacroAssembler::fast_lock_impl(Register obj, Register hdr, Register thread, Register tmp, Label& slow) { + assert(hdr == rax, "header must be in rax for cmpxchg"); + assert_different_registers(obj, hdr, thread, tmp); + + // First we need to check if the lock-stack has room for pushing the object reference. + // Note: we subtract 1 from the end-offset so that we can do a 'greater' comparison, instead + // of 'greaterEqual' below, which readily clears the ZF. This makes C2 code a little simpler and + // avoids one branch. + cmpl(Address(thread, JavaThread::lock_stack_top_offset()), LockStack::end_offset() - 1); + jcc(Assembler::greater, slow); + + // Now we attempt to take the fast-lock. + // Clear lock_mask bits (locked state). + andptr(hdr, ~(int32_t)markWord::lock_mask_in_place); + movptr(tmp, hdr); + // Set unlocked_value bit. + orptr(hdr, markWord::unlocked_value); + lock(); + cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes())); + jcc(Assembler::notEqual, slow); + + // If successful, push object to lock-stack. + movl(tmp, Address(thread, JavaThread::lock_stack_top_offset())); + movptr(Address(thread, tmp), obj); + incrementl(tmp, oopSize); + movl(Address(thread, JavaThread::lock_stack_top_offset()), tmp); +} + +// Implements fast-unlocking. +// Branches to slow upon failure, with ZF cleared. +// Falls through upon success, with unspecified ZF. +// +// obj: the object to be unlocked +// hdr: the (pre-loaded) header of the object, must be rax +// tmp: a temporary register +void MacroAssembler::fast_unlock_impl(Register obj, Register hdr, Register tmp, Label& slow) { + assert(hdr == rax, "header must be in rax for cmpxchg"); + assert_different_registers(obj, hdr, tmp); + + // Mark-word must be lock_mask now, try to swing it back to unlocked_value. + movptr(tmp, hdr); // The expected old value + orptr(tmp, markWord::unlocked_value); + lock(); + cmpxchgptr(tmp, Address(obj, oopDesc::mark_offset_in_bytes())); + jcc(Assembler::notEqual, slow); + // Pop the lock object from the lock-stack. +#ifdef _LP64 + const Register thread = r15_thread; +#else + const Register thread = rax; + get_thread(thread); +#endif + subl(Address(thread, JavaThread::lock_stack_top_offset()), oopSize); +#ifdef ASSERT + movl(tmp, Address(thread, JavaThread::lock_stack_top_offset())); + movptr(Address(thread, tmp), 0); +#endif +} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index b30517535a6..3df81129a55 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -149,6 +149,8 @@ class MacroAssembler: public Assembler { void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; } + void increment(Address dst, int value = 1) { LP64_ONLY(incrementq(dst, value)) NOT_LP64(incrementl(dst, value)) ; } + void decrement(Address dst, int value = 1) { LP64_ONLY(decrementq(dst, value)) NOT_LP64(decrementl(dst, value)) ; } void decrementl(Address dst, int value = 1); void decrementl(Register reg, int value = 1); @@ -2007,6 +2009,8 @@ public: void check_stack_alignment(Register sp, const char* msg, unsigned bias = 0, Register tmp = noreg); + void fast_lock_impl(Register obj, Register hdr, Register thread, Register tmp, Label& slow); + void fast_unlock_impl(Register obj, Register hdr, Register tmp, Label& slow); }; /** diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp index 99464fb1884..23350702634 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp @@ -1680,7 +1680,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Load the oop from the handle __ movptr(obj_reg, Address(oop_handle_reg, 0)); - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ jmp(slow_path_lock); + } else if (LockingMode == LM_LEGACY) { // Load immediate 1 into swap_reg %rax, __ movptr(swap_reg, 1); @@ -1712,7 +1714,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ movptr(Address(lock_reg, mark_word_offset), swap_reg); __ jcc(Assembler::notEqual, slow_path_lock); } else { - __ jmp(slow_path_lock); + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + // Load object header + __ movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ fast_lock_impl(obj_reg, swap_reg, thread, lock_reg, slow_path_lock); } __ bind(count_mon); __ inc_held_monitor_count(); @@ -1836,7 +1841,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Get locked oop from the handle we passed to jni __ movptr(obj_reg, Address(oop_handle_reg, 0)); - if (!UseHeavyMonitors) { + if (LockingMode == LM_LEGACY) { Label not_recur; // Simple recursive lock? __ cmpptr(Address(rbp, lock_slot_rbp_offset), NULL_WORD); @@ -1851,7 +1856,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, save_native_result(masm, ret_type, stack_slots); } - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ jmp(slow_path_unlock); + } else if (LockingMode == LM_LEGACY) { // get old displaced header __ movptr(rbx, Address(rbp, lock_slot_rbp_offset)); @@ -1866,7 +1873,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ jcc(Assembler::notEqual, slow_path_unlock); __ dec_held_monitor_count(); } else { - __ jmp(slow_path_unlock); + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + __ movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ andptr(swap_reg, ~(int32_t)markWord::lock_mask_in_place); + __ fast_unlock_impl(obj_reg, swap_reg, lock_reg, slow_path_unlock); + __ dec_held_monitor_count(); } // slow path re-enters here diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp index 127fa9072c7..6d77efdfc5a 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp @@ -2149,8 +2149,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Load the oop from the handle __ movptr(obj_reg, Address(oop_handle_reg, 0)); - if (!UseHeavyMonitors) { - + if (LockingMode == LM_MONITOR) { + __ jmp(slow_path_lock); + } else if (LockingMode == LM_LEGACY) { // Load immediate 1 into swap_reg %rax __ movl(swap_reg, 1); @@ -2183,7 +2184,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ movptr(Address(lock_reg, mark_word_offset), swap_reg); __ jcc(Assembler::notEqual, slow_path_lock); } else { - __ jmp(slow_path_lock); + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + // Load object header + __ movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ fast_lock_impl(obj_reg, swap_reg, r15_thread, rscratch1, slow_path_lock); } __ bind(count_mon); __ inc_held_monitor_count(); @@ -2295,7 +2299,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Get locked oop from the handle we passed to jni __ movptr(obj_reg, Address(oop_handle_reg, 0)); - if (!UseHeavyMonitors) { + if (LockingMode == LM_LEGACY) { Label not_recur; // Simple recursive lock? __ cmpptr(Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size), NULL_WORD); @@ -2310,7 +2314,9 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, save_native_result(masm, ret_type, stack_slots); } - if (!UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { + __ jmp(slow_path_unlock); + } else if (LockingMode == LM_LEGACY) { // get address of the stack lock __ lea(rax, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size)); // get old displaced header @@ -2322,7 +2328,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ jcc(Assembler::notEqual, slow_path_unlock); __ dec_held_monitor_count(); } else { - __ jmp(slow_path_unlock); + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + __ movptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + __ andptr(swap_reg, ~(int32_t)markWord::lock_mask_in_place); + __ fast_unlock_impl(obj_reg, swap_reg, lock_reg, slow_path_unlock); + __ dec_held_monitor_count(); } // slow path re-enters here diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad index 0b8bd0b2157..baa81934012 100644 --- a/src/hotspot/cpu/x86/x86_32.ad +++ b/src/hotspot/cpu/x86/x86_32.ad @@ -13760,15 +13760,16 @@ instruct RethrowException() // inlined locking and unlocking -instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ +instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2, eRegP thread) %{ predicate(Compile::current()->use_rtm()); match(Set cr (FastLock object box)); - effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); + effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box, TEMP thread); ins_cost(300); format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} ins_encode %{ + __ get_thread($thread$$Register); __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, - $scr$$Register, $cx1$$Register, $cx2$$Register, + $scr$$Register, $cx1$$Register, $cx2$$Register, $thread$$Register, _rtm_counters, _stack_rtm_counters, ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), true, ra_->C->profile_rtm()); @@ -13776,15 +13777,16 @@ instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eD ins_pipe(pipe_slow); %} -instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ +instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ predicate(!Compile::current()->use_rtm()); match(Set cr (FastLock object box)); - effect(TEMP tmp, TEMP scr, USE_KILL box); + effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); ins_cost(300); format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} ins_encode %{ + __ get_thread($thread$$Register); __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, - $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false); + $scr$$Register, noreg, noreg, $thread$$Register, nullptr, nullptr, nullptr, false, false); %} ins_pipe(pipe_slow); %} diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index ff381e5e7a2..2168ee4252a 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -13291,7 +13291,7 @@ instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, - $scr$$Register, $cx1$$Register, $cx2$$Register, + $scr$$Register, $cx1$$Register, $cx2$$Register, r15_thread, _rtm_counters, _stack_rtm_counters, ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), true, ra_->C->profile_rtm()); @@ -13307,7 +13307,7 @@ instruct cmpFastLock(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rRe format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr" %} ins_encode %{ __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, - $scr$$Register, noreg, noreg, NULL, NULL, NULL, false, false); + $scr$$Register, noreg, noreg, r15_thread, nullptr, nullptr, nullptr, false, false); %} ins_pipe(pipe_slow); %} diff --git a/src/hotspot/cpu/zero/vm_version_zero.cpp b/src/hotspot/cpu/zero/vm_version_zero.cpp index 4c5e343dbbf..3d17e159a61 100644 --- a/src/hotspot/cpu/zero/vm_version_zero.cpp +++ b/src/hotspot/cpu/zero/vm_version_zero.cpp @@ -116,6 +116,11 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); } + if ((LockingMode != LM_LEGACY) && (LockingMode != LM_MONITOR)) { + warning("Unsupported locking mode for this CPU."); + FLAG_SET_DEFAULT(LockingMode, LM_LEGACY); + } + // Enable error context decoding on known platforms #if defined(IA32) || defined(AMD64) || defined(ARM) || \ defined(AARCH64) || defined(PPC) || defined(RISCV) || \ diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp index a195a076091..d92b43f0ea5 100644 --- a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp +++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp @@ -333,7 +333,7 @@ int ZeroInterpreter::native_entry(Method* method, intptr_t UNUSED, TRAPS) { oop lockee = monitor->obj(); markWord disp = lockee->mark().set_unlocked(); monitor->lock()->set_displaced_header(disp); - bool call_vm = UseHeavyMonitors; + bool call_vm = (LockingMode == LM_MONITOR); bool inc_monitor_count = true; if (call_vm || lockee->cas_set_mark(markWord::from_pointer(monitor), disp) != disp) { // Is it simple recursive case? diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 41f93645581..4ff2bf91996 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -621,7 +621,7 @@ void LIRGenerator::monitor_exit(LIR_Opr object, LIR_Opr lock, LIR_Opr new_hdr, L // setup registers LIR_Opr hdr = lock; lock = new_hdr; - CodeStub* slow_path = new MonitorExitStub(lock, !UseHeavyMonitors, monitor_no); + CodeStub* slow_path = new MonitorExitStub(lock, LockingMode != LM_MONITOR, monitor_no); __ load_stack_address_monitor(monitor_no, lock); __ unlock_object(hdr, object, lock, scratch, slow_path); } diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp index bfe83c019be..2c8d9aa47f5 100644 --- a/src/hotspot/share/c1/c1_Runtime1.cpp +++ b/src/hotspot/share/c1/c1_Runtime1.cpp @@ -754,11 +754,11 @@ JRT_BLOCK_ENTRY(void, Runtime1::monitorenter(JavaThread* current, oopDesc* obj, _monitorenter_slowcase_cnt++; } #endif - if (UseHeavyMonitors) { + if (LockingMode == LM_MONITOR) { lock->set_obj(obj); } - assert(obj == lock->obj(), "must match"); - SharedRuntime::monitor_enter_helper(obj, lock->lock(), current); + assert(LockingMode == LM_LIGHTWEIGHT || obj == lock->obj(), "must match"); + SharedRuntime::monitor_enter_helper(obj, LockingMode == LM_LIGHTWEIGHT ? nullptr : lock->lock(), current); JRT_END diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp index f9a795c5235..ece13cde989 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.cpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp @@ -735,6 +735,7 @@ void InterpreterRuntime::resolve_get_put(JavaThread* current, Bytecodes::Code by //%note monitor_1 JRT_ENTRY_NO_ASYNC(void, InterpreterRuntime::monitorenter(JavaThread* current, BasicObjectLock* elem)) + assert(LockingMode != LM_LIGHTWEIGHT, "Should call monitorenter_obj() when using the new lightweight locking"); #ifdef ASSERT current->last_frame().interpreter_frame_verify_monitor(elem); #endif @@ -749,6 +750,22 @@ JRT_ENTRY_NO_ASYNC(void, InterpreterRuntime::monitorenter(JavaThread* current, B #endif JRT_END +// NOTE: We provide a separate implementation for the new lightweight locking to workaround a limitation +// of registers in x86_32. This entry point accepts an oop instead of a BasicObjectLock*. +// The problem is that we would need to preserve the register that holds the BasicObjectLock, +// but we are using that register to hold the thread. We don't have enough registers to +// also keep the BasicObjectLock, but we don't really need it anyway, we only need +// the object. See also InterpreterMacroAssembler::lock_object(). +// As soon as legacy stack-locking goes away we could remove the other monitorenter() entry +// point, and only use oop-accepting entries (same for monitorexit() below). +JRT_ENTRY_NO_ASYNC(void, InterpreterRuntime::monitorenter_obj(JavaThread* current, oopDesc* obj)) + assert(LockingMode == LM_LIGHTWEIGHT, "Should call monitorenter() when not using the new lightweight locking"); + Handle h_obj(current, cast_to_oop(obj)); + assert(Universe::heap()->is_in_or_null(h_obj()), + "must be null or an object"); + ObjectSynchronizer::enter(h_obj, nullptr, current); + return; +JRT_END JRT_LEAF(void, InterpreterRuntime::monitorexit(BasicObjectLock* elem)) oop obj = elem->obj(); diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp index ee3337d8f7f..97cfcb1eae6 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.hpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp @@ -103,6 +103,7 @@ class InterpreterRuntime: AllStatic { public: // Synchronization static void monitorenter(JavaThread* current, BasicObjectLock* elem); + static void monitorenter_obj(JavaThread* current, oopDesc* obj); static void monitorexit (BasicObjectLock* elem); static void throw_illegal_monitor_state_exception(JavaThread* current); diff --git a/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp b/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp index 8ee8d9ffd5d..92587394a8e 100644 --- a/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp +++ b/src/hotspot/share/interpreter/zero/bytecodeInterpreter.cpp @@ -624,7 +624,7 @@ void BytecodeInterpreter::run(interpreterState istate) { // Traditional lightweight locking. markWord displaced = rcvr->mark().set_unlocked(); mon->lock()->set_displaced_header(displaced); - bool call_vm = UseHeavyMonitors; + bool call_vm = (LockingMode == LM_MONITOR); bool inc_monitor_count = true; if (call_vm || rcvr->cas_set_mark(markWord::from_pointer(mon), displaced) != displaced) { // Is it simple recursive case? @@ -723,7 +723,7 @@ void BytecodeInterpreter::run(interpreterState istate) { // traditional lightweight locking markWord displaced = lockee->mark().set_unlocked(); entry->lock()->set_displaced_header(displaced); - bool call_vm = UseHeavyMonitors; + bool call_vm = (LockingMode == LM_MONITOR); bool inc_monitor_count = true; if (call_vm || lockee->cas_set_mark(markWord::from_pointer(entry), displaced) != displaced) { // Is it simple recursive case? @@ -1653,7 +1653,7 @@ run: // traditional lightweight locking markWord displaced = lockee->mark().set_unlocked(); entry->lock()->set_displaced_header(displaced); - bool call_vm = UseHeavyMonitors; + bool call_vm = (LockingMode == LM_MONITOR); bool inc_monitor_count = true; if (call_vm || lockee->cas_set_mark(markWord::from_pointer(entry), displaced) != displaced) { // Is it simple recursive case? @@ -1689,7 +1689,7 @@ run: // If it isn't recursive we either must swap old header or call the runtime bool dec_monitor_count = true; - bool call_vm = UseHeavyMonitors; + bool call_vm = (LockingMode == LM_MONITOR); if (header.to_pointer() != nullptr || call_vm) { markWord old_header = markWord::encode(lock); if (call_vm || lockee->cas_set_mark(header, old_header) != old_header) { @@ -3189,7 +3189,7 @@ run: illegal_state_oop = Handle(THREAD, THREAD->pending_exception()); THREAD->clear_pending_exception(); } - } else if (UseHeavyMonitors) { + } else if (LockingMode == LM_MONITOR) { InterpreterRuntime::monitorexit(base); if (THREAD->has_pending_exception()) { if (!suppress_error) illegal_state_oop = Handle(THREAD, THREAD->pending_exception()); diff --git a/src/hotspot/share/logging/logTag.hpp b/src/hotspot/share/logging/logTag.hpp index e10a8c8fe47..edf33afa43d 100644 --- a/src/hotspot/share/logging/logTag.hpp +++ b/src/hotspot/share/logging/logTag.hpp @@ -77,6 +77,7 @@ class outputStream; LOG_TAG(event) \ LOG_TAG(exceptions) \ LOG_TAG(exit) \ + LOG_TAG(fastlock) \ LOG_TAG(finalizer) \ LOG_TAG(fingerprint) \ NOT_PRODUCT(LOG_TAG(foreign)) \ diff --git a/src/hotspot/share/oops/markWord.hpp b/src/hotspot/share/oops/markWord.hpp index 2250d2a95bf..d7dc61d8024 100644 --- a/src/hotspot/share/oops/markWord.hpp +++ b/src/hotspot/share/oops/markWord.hpp @@ -50,11 +50,12 @@ // // - the two lock bits are used to describe three states: locked/unlocked and monitor. // -// [ptr | 00] locked ptr points to real header on stack +// [ptr | 00] locked ptr points to real header on stack (stack-locking in use) +// [header | 00] locked locked regular object header (fast-locking in use) // [header | 01] unlocked regular object header -// [ptr | 10] monitor inflated lock (header is wapped out) +// [ptr | 10] monitor inflated lock (header is swapped out) // [ptr | 11] marked used to mark an object -// [0 ............ 0| 00] inflating inflation in progress +// [0 ............ 0| 00] inflating inflation in progress (stack-locking in use) // // We assume that stack/thread pointers have the lowest two bits cleared. // @@ -156,6 +157,7 @@ class markWord { // check for and avoid overwriting a 0 value installed by some // other thread. (They should spin or block instead. The 0 value // is transient and *should* be short-lived). + // Fast-locking does not use INFLATING. static markWord INFLATING() { return zero(); } // inflate-in-progress // Should this header be preserved during GC? @@ -170,12 +172,23 @@ class markWord { return markWord(value() | unlocked_value); } bool has_locker() const { - return ((value() & lock_mask_in_place) == locked_value); + assert(LockingMode == LM_LEGACY, "should only be called with legacy stack locking"); + return (value() & lock_mask_in_place) == locked_value; } BasicLock* locker() const { assert(has_locker(), "check"); return (BasicLock*) value(); } + + bool is_fast_locked() const { + assert(LockingMode == LM_LIGHTWEIGHT, "should only be called with new lightweight locking"); + return (value() & lock_mask_in_place) == locked_value; + } + markWord set_fast_locked() const { + // Clear the lock_mask_in_place bits to set locked_value: + return markWord(value() & ~lock_mask_in_place); + } + bool has_monitor() const { return ((value() & lock_mask_in_place) == monitor_value); } @@ -185,7 +198,9 @@ class markWord { return (ObjectMonitor*) (value() ^ monitor_value); } bool has_displaced_mark_helper() const { - return ((value() & unlocked_value) == 0); + intptr_t lockbits = value() & lock_mask_in_place; + return LockingMode == LM_LIGHTWEIGHT ? lockbits == monitor_value // monitor? + : (lockbits & unlocked_value) == 0; // monitor | stack-locked? } markWord displaced_mark_helper() const; void set_displaced_mark_helper(markWord m) const; diff --git a/src/hotspot/share/oops/oop.cpp b/src/hotspot/share/oops/oop.cpp index 7d170177ab7..eb5624e402b 100644 --- a/src/hotspot/share/oops/oop.cpp +++ b/src/hotspot/share/oops/oop.cpp @@ -119,7 +119,7 @@ bool oopDesc::is_oop(oop obj, bool ignore_mark_word) { } // Header verification: the mark is typically non-zero. If we're - // at a safepoint, it must not be zero. + // at a safepoint, it must not be zero, except when using the new lightweight locking. // Outside of a safepoint, the header could be changing (for example, // another thread could be inflating a lock on this object). if (ignore_mark_word) { @@ -128,7 +128,7 @@ bool oopDesc::is_oop(oop obj, bool ignore_mark_word) { if (obj->mark().value() != 0) { return true; } - return !SafepointSynchronize::is_at_safepoint(); + return LockingMode == LM_LIGHTWEIGHT || !SafepointSynchronize::is_at_safepoint(); } // used only for asserts and guarantees diff --git a/src/hotspot/share/opto/c2_CodeStubs.hpp b/src/hotspot/share/opto/c2_CodeStubs.hpp index 5ef6966656b..8af906d0542 100644 --- a/src/hotspot/share/opto/c2_CodeStubs.hpp +++ b/src/hotspot/share/opto/c2_CodeStubs.hpp @@ -86,4 +86,19 @@ public: void emit(C2_MacroAssembler& masm); }; +#ifdef _LP64 +class C2HandleAnonOMOwnerStub : public C2CodeStub { +private: + Register _monitor; + Register _tmp; +public: + C2HandleAnonOMOwnerStub(Register monitor, Register tmp = noreg) : C2CodeStub(), + _monitor(monitor), _tmp(tmp) {} + Register monitor() { return _monitor; } + Register tmp() { return _tmp; } + int max_size() const; + void emit(C2_MacroAssembler& masm); +}; +#endif + #endif // SHARE_OPTO_C2_CODESTUBS_HPP diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index 0a37ea11ddd..c5d750a18b4 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -1939,27 +1939,43 @@ bool Arguments::check_vm_args_consistency() { } #endif -#if !defined(X86) && !defined(AARCH64) && !defined(PPC64) && !defined(RISCV64) + +#if !defined(X86) && !defined(AARCH64) && !defined(RISCV64) && !defined(ARM) + if (LockingMode == LM_LIGHTWEIGHT) { + FLAG_SET_CMDLINE(LockingMode, LM_LEGACY); + warning("New lightweight locking not supported on this platform"); + } +#endif + if (UseHeavyMonitors) { + if (FLAG_IS_CMDLINE(LockingMode) && LockingMode != LM_MONITOR) { + jio_fprintf(defaultStream::error_stream(), + "Conflicting -XX:+UseHeavyMonitors and -XX:LockingMode=%d flags", LockingMode); + return false; + } + FLAG_SET_CMDLINE(LockingMode, LM_MONITOR); + } + +#if !defined(X86) && !defined(AARCH64) && !defined(PPC64) && !defined(RISCV64) + if (LockingMode == LM_MONITOR) { jio_fprintf(defaultStream::error_stream(), - "UseHeavyMonitors is not fully implemented on this architecture"); + "LockingMode == 0 (LM_MONITOR) is not fully implemented on this architecture"); return false; } #endif #if (defined(X86) || defined(PPC64)) && !defined(ZERO) - if (UseHeavyMonitors && UseRTMForStackLocks) { + if (LockingMode == LM_MONITOR && UseRTMForStackLocks) { jio_fprintf(defaultStream::error_stream(), - "-XX:+UseHeavyMonitors and -XX:+UseRTMForStackLocks are mutually exclusive"); + "LockingMode == 0 (LM_MONITOR) and -XX:+UseRTMForStackLocks are mutually exclusive"); return false; } #endif - if (VerifyHeavyMonitors && !UseHeavyMonitors) { + if (VerifyHeavyMonitors && LockingMode != LM_MONITOR) { jio_fprintf(defaultStream::error_stream(), - "-XX:+VerifyHeavyMonitors requires -XX:+UseHeavyMonitors"); + "-XX:+VerifyHeavyMonitors requires LockingMode == 0 (LM_MONITOR)"); return false; } - return status; } diff --git a/src/hotspot/share/runtime/deoptimization.cpp b/src/hotspot/share/runtime/deoptimization.cpp index 1e3783e0628..d62100c4cf8 100644 --- a/src/hotspot/share/runtime/deoptimization.cpp +++ b/src/hotspot/share/runtime/deoptimization.cpp @@ -1630,7 +1630,7 @@ bool Deoptimization::relock_objects(JavaThread* thread, GrowableArrayowner()); markWord mark = obj->mark(); if (exec_mode == Unpack_none) { - if (mark.has_locker() && fr.sp() > (intptr_t*)mark.locker()) { + if (LockingMode == LM_LEGACY && mark.has_locker() && fr.sp() > (intptr_t*)mark.locker()) { // With exec_mode == Unpack_none obj may be thread local and locked in // a callee frame. Make the lock in the callee a recursive lock and restore the displaced header. markWord dmw = mark.displaced_mark_helper(); diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index 51fb9292b06..23eb62bf06d 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -1979,6 +1979,13 @@ const int ObjectAlignmentInBytes = 8; false AARCH64_ONLY(DEBUG_ONLY(||true)), \ "Mark all threads after a safepoint, and clear on a modify " \ "fence. Add cleanliness checks.") \ + \ + product(int, LockingMode, LM_LEGACY, EXPERIMENTAL, \ + "Select locking mode: " \ + "0: monitors only (LM_MONITOR), " \ + "1: monitors & legacy stack-locking (LM_LEGACY, default), " \ + "2: monitors & new lightweight locking (LM_LIGHTWEIGHT)") \ + range(0, 2) \ // end of RUNTIME_FLAGS diff --git a/src/hotspot/share/runtime/javaThread.cpp b/src/hotspot/share/runtime/javaThread.cpp index dde001a30fc..7874689a497 100644 --- a/src/hotspot/share/runtime/javaThread.cpp +++ b/src/hotspot/share/runtime/javaThread.cpp @@ -70,6 +70,7 @@ #include "runtime/javaCalls.hpp" #include "runtime/javaThread.inline.hpp" #include "runtime/jniHandles.inline.hpp" +#include "runtime/lockStack.inline.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/orderAccess.hpp" #include "runtime/osThread.hpp" @@ -490,8 +491,9 @@ JavaThread::JavaThread() : _class_to_be_initialized(nullptr), - _SleepEvent(ParkEvent::Allocate(this)) -{ + _SleepEvent(ParkEvent::Allocate(this)), + + _lock_stack(this) { set_jni_functions(jni_functions()); #if INCLUDE_JVMCI @@ -994,6 +996,7 @@ JavaThread* JavaThread::active() { } bool JavaThread::is_lock_owned(address adr) const { + assert(LockingMode != LM_LIGHTWEIGHT, "should not be called with new lightweight locking"); if (Thread::is_lock_owned(adr)) return true; for (MonitorChunk* chunk = monitor_chunks(); chunk != nullptr; chunk = chunk->next()) { @@ -1387,6 +1390,10 @@ void JavaThread::oops_do_no_frames(OopClosure* f, CodeBlobClosure* cf) { f->do_oop((oop*)entry->chunk_addr()); entry = entry->parent(); } + + if (LockingMode == LM_LIGHTWEIGHT) { + lock_stack().oops_do(f); + } } void JavaThread::oops_do_frames(OopClosure* f, CodeBlobClosure* cf) { diff --git a/src/hotspot/share/runtime/javaThread.hpp b/src/hotspot/share/runtime/javaThread.hpp index c56a6d3b175..f2cb56646d4 100644 --- a/src/hotspot/share/runtime/javaThread.hpp +++ b/src/hotspot/share/runtime/javaThread.hpp @@ -34,6 +34,7 @@ #include "runtime/globals.hpp" #include "runtime/handshake.hpp" #include "runtime/javaFrameAnchor.hpp" +#include "runtime/lockStack.hpp" #include "runtime/park.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/stackWatermarkSet.hpp" @@ -1146,6 +1147,19 @@ public: void interrupt(); bool is_interrupted(bool clear_interrupted); +private: + LockStack _lock_stack; + +public: + LockStack& lock_stack() { return _lock_stack; } + + static ByteSize lock_stack_offset() { return byte_offset_of(JavaThread, _lock_stack); } + // Those offsets are used in code generators to access the LockStack that is embedded in this + // JavaThread structure. Those accesses are relative to the current thread, which + // is typically in a dedicated register. + static ByteSize lock_stack_top_offset() { return lock_stack_offset() + LockStack::top_offset(); } + static ByteSize lock_stack_base_offset() { return lock_stack_offset() + LockStack::base_offset(); } + static OopStorage* thread_oop_storage(); static void verify_cross_modify_fence_failure(JavaThread *thread) PRODUCT_RETURN; diff --git a/src/hotspot/share/runtime/lockStack.cpp b/src/hotspot/share/runtime/lockStack.cpp new file mode 100644 index 00000000000..5fd5297fd5c --- /dev/null +++ b/src/hotspot/share/runtime/lockStack.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2022, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "memory/allocation.hpp" +#include "runtime/lockStack.inline.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/stackWatermark.hpp" +#include "runtime/stackWatermarkSet.inline.hpp" +#include "runtime/thread.hpp" +#include "utilities/copy.hpp" +#include "utilities/ostream.hpp" + +const int LockStack::lock_stack_offset = in_bytes(JavaThread::lock_stack_offset()); +const int LockStack::lock_stack_top_offset = in_bytes(JavaThread::lock_stack_top_offset()); +const int LockStack::lock_stack_base_offset = in_bytes(JavaThread::lock_stack_base_offset()); + +LockStack::LockStack(JavaThread* jt) : + _top(lock_stack_base_offset), _base() { +#ifdef ASSERT + for (int i = 0; i < CAPACITY; i++) { + _base[i] = nullptr; + } +#endif +} + +uint32_t LockStack::start_offset() { + int offset = lock_stack_base_offset; + assert(offset > 0, "must be positive offset"); + return static_cast(offset); +} + +uint32_t LockStack::end_offset() { + int offset = lock_stack_base_offset + CAPACITY * oopSize; + assert(offset > 0, "must be positive offset"); + return static_cast(offset); +} + +#ifndef PRODUCT +void LockStack::verify(const char* msg) const { + assert(LockingMode == LM_LIGHTWEIGHT, "never use lock-stack when light weight locking is disabled"); + assert((_top <= end_offset()), "lockstack overflow: _top %d end_offset %d", _top, end_offset()); + assert((_top >= start_offset()), "lockstack underflow: _top %d end_offset %d", _top, start_offset()); + if (SafepointSynchronize::is_at_safepoint() || (Thread::current()->is_Java_thread() && is_owning_thread())) { + int top = to_index(_top); + for (int i = 0; i < top; i++) { + assert(_base[i] != nullptr, "no zapped before top"); + for (int j = i + 1; j < top; j++) { + assert(_base[i] != _base[j], "entries must be unique: %s", msg); + } + } + for (int i = top; i < CAPACITY; i++) { + assert(_base[i] == nullptr, "only zapped entries after top: i: %d, top: %d, entry: " PTR_FORMAT, i, top, p2i(_base[i])); + } + } +} +#endif diff --git a/src/hotspot/share/runtime/lockStack.hpp b/src/hotspot/share/runtime/lockStack.hpp new file mode 100644 index 00000000000..ce6a96bcfe6 --- /dev/null +++ b/src/hotspot/share/runtime/lockStack.hpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2022, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_RUNTIME_LOCKSTACK_HPP +#define SHARE_RUNTIME_LOCKSTACK_HPP + +#include "oops/oopsHierarchy.hpp" +#include "utilities/globalDefinitions.hpp" +#include "utilities/sizes.hpp" + +class Thread; +class OopClosure; + +class LockStack { + friend class VMStructs; +private: + static const int CAPACITY = 8; + + // TODO: It would be very useful if JavaThread::lock_stack_offset() and friends were constexpr, + // but this is currently not the case because we're using offset_of() which is non-constexpr, + // GCC would warn about non-standard-layout types if we were using offsetof() (which *is* constexpr). + static const int lock_stack_offset; + static const int lock_stack_top_offset; + static const int lock_stack_base_offset; + + // The offset of the next element, in bytes, relative to the JavaThread structure. + // We do this instead of a simple index into the array because this allows for + // efficient addressing in generated code. + uint32_t _top; + oop _base[CAPACITY]; + + // Get the owning thread of this lock-stack. + inline JavaThread* get_thread() const; + + // Tests if the calling thread is the thread that owns this lock-stack. + bool is_owning_thread() const; + + // Verifies consistency of the lock-stack. + void verify(const char* msg) const PRODUCT_RETURN; + + // Given an offset (in bytes) calculate the index into the lock-stack. + static inline int to_index(uint32_t offset); + +public: + static ByteSize top_offset() { return byte_offset_of(LockStack, _top); } + static ByteSize base_offset() { return byte_offset_of(LockStack, _base); } + + LockStack(JavaThread* jt); + + // The boundary indicies of the lock-stack. + static uint32_t start_offset(); + static uint32_t end_offset(); + + // Return true if we have room to push onto this lock-stack, false otherwise. + inline bool can_push() const; + + // Pushes an oop on this lock-stack. + inline void push(oop o); + + // Pops an oop from this lock-stack. + inline oop pop(); + + // Removes an oop from an arbitrary location of this lock-stack. + inline void remove(oop o); + + // Tests whether the oop is on this lock-stack. + inline bool contains(oop o) const; + + // GC support + inline void oops_do(OopClosure* cl); + +}; + +#endif // SHARE_RUNTIME_LOCKSTACK_HPP diff --git a/src/hotspot/share/runtime/lockStack.inline.hpp b/src/hotspot/share/runtime/lockStack.inline.hpp new file mode 100644 index 00000000000..186c7169fae --- /dev/null +++ b/src/hotspot/share/runtime/lockStack.inline.hpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2022, Red Hat, Inc. All rights reserved. + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_RUNTIME_LOCKSTACK_INLINE_HPP +#define SHARE_RUNTIME_LOCKSTACK_INLINE_HPP + +#include "memory/iterator.hpp" +#include "runtime/javaThread.hpp" +#include "runtime/lockStack.hpp" +#include "runtime/safepoint.hpp" +#include "runtime/stackWatermark.hpp" +#include "runtime/stackWatermarkSet.inline.hpp" + +inline int LockStack::to_index(uint32_t offset) { + return (offset - lock_stack_base_offset) / oopSize; +} + +JavaThread* LockStack::get_thread() const { + char* addr = reinterpret_cast(const_cast(this)); + return reinterpret_cast(addr - lock_stack_offset); +} + +inline bool LockStack::can_push() const { + return to_index(_top) < CAPACITY; +} + +inline bool LockStack::is_owning_thread() const { + JavaThread* thread = JavaThread::current(); + bool is_owning = &thread->lock_stack() == this; + assert(is_owning == (get_thread() == thread), "is_owning sanity"); + return is_owning; +} + +inline void LockStack::push(oop o) { + verify("pre-push"); + assert(oopDesc::is_oop(o), "must be"); + assert(!contains(o), "entries must be unique"); + assert(can_push(), "must have room"); + assert(_base[to_index(_top)] == nullptr, "expect zapped entry"); + _base[to_index(_top)] = o; + _top += oopSize; + verify("post-push"); +} + +inline oop LockStack::pop() { + verify("pre-pop"); + assert(to_index(_top) > 0, "underflow, probably unbalanced push/pop"); + _top -= oopSize; + oop o = _base[to_index(_top)]; +#ifdef ASSERT + _base[to_index(_top)] = nullptr; +#endif + assert(!contains(o), "entries must be unique: " PTR_FORMAT, p2i(o)); + verify("post-pop"); + return o; +} + +inline void LockStack::remove(oop o) { + verify("pre-remove"); + assert(contains(o), "entry must be present: " PTR_FORMAT, p2i(o)); + int end = to_index(_top); + for (int i = 0; i < end; i++) { + if (_base[i] == o) { + int last = end - 1; + for (; i < last; i++) { + _base[i] = _base[i + 1]; + } + _top -= oopSize; +#ifdef ASSERT + _base[to_index(_top)] = nullptr; +#endif + break; + } + } + assert(!contains(o), "entries must be unique: " PTR_FORMAT, p2i(o)); + verify("post-remove"); +} + +inline bool LockStack::contains(oop o) const { + verify("pre-contains"); + if (!SafepointSynchronize::is_at_safepoint() && !is_owning_thread()) { + // When a foreign thread inspects this thread's lock-stack, it may see + // bad references here when a concurrent collector has not gotten + // to processing the lock-stack, yet. Call StackWaterMark::start_processing() + // to ensure that all references are valid. + StackWatermark* watermark = StackWatermarkSet::get(get_thread(), StackWatermarkKind::gc); + if (watermark != nullptr) { + watermark->start_processing(); + } + } + int end = to_index(_top); + for (int i = end - 1; i >= 0; i--) { + if (_base[i] == o) { + verify("post-contains"); + return true; + } + } + verify("post-contains"); + return false; +} + +inline void LockStack::oops_do(OopClosure* cl) { + verify("pre-oops-do"); + int end = to_index(_top); + for (int i = 0; i < end; i++) { + cl->do_oop(&_base[i]); + } + verify("post-oops-do"); +} + +#endif // SHARE_RUNTIME_LOCKSTACK_INLINE_HPP diff --git a/src/hotspot/share/runtime/objectMonitor.cpp b/src/hotspot/share/runtime/objectMonitor.cpp index f4eb4e477a9..525dd4b754b 100644 --- a/src/hotspot/share/runtime/objectMonitor.cpp +++ b/src/hotspot/share/runtime/objectMonitor.cpp @@ -334,7 +334,7 @@ bool ObjectMonitor::enter(JavaThread* current) { return true; } - if (current->is_lock_owned((address)cur)) { + if (LockingMode != LM_LIGHTWEIGHT && current->is_lock_owned((address)cur)) { assert(_recursions == 0, "internal state error"); _recursions = 1; set_owner_from_BasicLock(cur, current); // Convert from BasicLock* to Thread*. @@ -1135,7 +1135,7 @@ void ObjectMonitor::UnlinkAfterAcquire(JavaThread* current, ObjectWaiter* curren void ObjectMonitor::exit(JavaThread* current, bool not_suspended) { void* cur = owner_raw(); if (current != cur) { - if (current->is_lock_owned((address)cur)) { + if (LockingMode != LM_LIGHTWEIGHT && current->is_lock_owned((address)cur)) { assert(_recursions == 0, "invariant"); set_owner_from_BasicLock(cur, current); // Convert from BasicLock* to Thread*. _recursions = 0; @@ -1350,7 +1350,7 @@ intx ObjectMonitor::complete_exit(JavaThread* current) { void* cur = owner_raw(); if (current != cur) { - if (current->is_lock_owned((address)cur)) { + if (LockingMode != LM_LIGHTWEIGHT && current->is_lock_owned((address)cur)) { assert(_recursions == 0, "internal state error"); set_owner_from_BasicLock(cur, current); // Convert from BasicLock* to Thread*. _recursions = 0; @@ -1385,10 +1385,11 @@ intx ObjectMonitor::complete_exit(JavaThread* current) { bool ObjectMonitor::check_owner(TRAPS) { JavaThread* current = THREAD; void* cur = owner_raw(); + assert(cur != anon_owner_ptr(), "no anon owner here"); if (cur == current) { return true; } - if (current->is_lock_owned((address)cur)) { + if (LockingMode != LM_LIGHTWEIGHT && current->is_lock_owned((address)cur)) { set_owner_from_BasicLock(cur, current); // Convert from BasicLock* to Thread*. _recursions = 0; return true; diff --git a/src/hotspot/share/runtime/objectMonitor.hpp b/src/hotspot/share/runtime/objectMonitor.hpp index eca244b9693..121cfa74489 100644 --- a/src/hotspot/share/runtime/objectMonitor.hpp +++ b/src/hotspot/share/runtime/objectMonitor.hpp @@ -144,8 +144,22 @@ class ObjectMonitor : public CHeapObj { // its cache line with _header. DEFINE_PAD_MINUS_SIZE(0, OM_CACHE_LINE_SIZE, sizeof(volatile markWord) + sizeof(WeakHandle)); - // Used by async deflation as a marker in the _owner field: - #define DEFLATER_MARKER reinterpret_cast(-1) + // Used by async deflation as a marker in the _owner field. + // Note that the choice of the two markers is peculiar: + // - They need to represent values that cannot be pointers. In particular, + // we achieve this by using the lowest two bits. + // - ANONYMOUS_OWNER should be a small value, it is used in generated code + // and small values encode much better. + // - We test for anonymous owner by testing for the lowest bit, therefore + // DEFLATER_MARKER must *not* have that bit set. + #define DEFLATER_MARKER reinterpret_cast(2) +public: + // NOTE: Typed as uintptr_t so that we can pick it up in SA, via vmStructs. + static const uintptr_t ANONYMOUS_OWNER = 1; + +private: + static void* anon_owner_ptr() { return reinterpret_cast(ANONYMOUS_OWNER); } + void* volatile _owner; // pointer to owning thread OR BasicLock volatile uint64_t _previous_owner_tid; // thread id of the previous owner of the monitor // Separate _owner and _next_om on different cache lines since @@ -178,6 +192,7 @@ class ObjectMonitor : public CHeapObj { volatile int _WaitSetLock; // protects Wait Queue - simple spinlock public: + static void Initialize(); // Only perform a PerfData operation if the PerfData object has been @@ -242,7 +257,7 @@ class ObjectMonitor : public CHeapObj { } const char* is_busy_to_string(stringStream* ss); - intptr_t is_entered(JavaThread* current) const; + bool is_entered(JavaThread* current) const; // Returns true if this OM has an owner, false otherwise. bool has_owner() const; @@ -263,6 +278,18 @@ class ObjectMonitor : public CHeapObj { // _owner field. Returns the prior value of the _owner field. void* try_set_owner_from(void* old_value, void* new_value); + void set_owner_anonymous() { + set_owner_from(nullptr, anon_owner_ptr()); + } + + bool is_owner_anonymous() const { + return owner_raw() == anon_owner_ptr(); + } + + void set_owner_from_anonymous(Thread* owner) { + set_owner_from(anon_owner_ptr(), owner); + } + // Simply get _next_om field. ObjectMonitor* next_om() const; // Simply set _next_om field to new_value. diff --git a/src/hotspot/share/runtime/objectMonitor.inline.hpp b/src/hotspot/share/runtime/objectMonitor.inline.hpp index 800585a9fbb..36790925b71 100644 --- a/src/hotspot/share/runtime/objectMonitor.inline.hpp +++ b/src/hotspot/share/runtime/objectMonitor.inline.hpp @@ -30,14 +30,23 @@ #include "logging/log.hpp" #include "oops/access.inline.hpp" #include "runtime/atomic.hpp" +#include "runtime/lockStack.inline.hpp" #include "runtime/synchronizer.hpp" -inline intptr_t ObjectMonitor::is_entered(JavaThread* current) const { - void* owner = owner_raw(); - if (current == owner || current->is_lock_owned((address)owner)) { - return 1; +inline bool ObjectMonitor::is_entered(JavaThread* current) const { + if (LockingMode == LM_LIGHTWEIGHT) { + if (is_owner_anonymous()) { + return current->lock_stack().contains(object()); + } else { + return current == owner_raw(); + } + } else { + void* owner = owner_raw(); + if (current == owner || current->is_lock_owned((address)owner)) { + return true; + } } - return 0; + return false; } inline markWord ObjectMonitor::header() const { diff --git a/src/hotspot/share/runtime/synchronizer.cpp b/src/hotspot/share/runtime/synchronizer.cpp index 62387262564..3479bde358d 100644 --- a/src/hotspot/share/runtime/synchronizer.cpp +++ b/src/hotspot/share/runtime/synchronizer.cpp @@ -40,6 +40,7 @@ #include "runtime/handshake.hpp" #include "runtime/interfaceSupport.inline.hpp" #include "runtime/javaThread.hpp" +#include "runtime/lockStack.inline.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/objectMonitor.hpp" #include "runtime/objectMonitor.inline.hpp" @@ -324,10 +325,18 @@ bool ObjectSynchronizer::quick_notify(oopDesc* obj, JavaThread* current, bool al if (obj == nullptr) return false; // slow-path for invalid obj const markWord mark = obj->mark(); - if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) { - // Degenerate notify - // stack-locked by caller so by definition the implied waitset is empty. - return true; + if (LockingMode == LM_LIGHTWEIGHT) { + if (mark.is_fast_locked() && current->lock_stack().contains(cast_to_oop(obj))) { + // Degenerate notify + // fast-locked by caller so by definition the implied waitset is empty. + return true; + } + } else if (LockingMode == LM_LEGACY) { + if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) { + // Degenerate notify + // stack-locked by caller so by definition the implied waitset is empty. + return true; + } } if (mark.has_monitor()) { @@ -398,16 +407,18 @@ bool ObjectSynchronizer::quick_enter(oop obj, JavaThread* current, return true; } - // This Java Monitor is inflated so obj's header will never be - // displaced to this thread's BasicLock. Make the displaced header - // non-null so this BasicLock is not seen as recursive nor as - // being locked. We do this unconditionally so that this thread's - // BasicLock cannot be mis-interpreted by any stack walkers. For - // performance reasons, stack walkers generally first check for - // stack-locking in the object's header, the second check is for - // recursive stack-locking in the displaced header in the BasicLock, - // and last are the inflated Java Monitor (ObjectMonitor) checks. - lock->set_displaced_header(markWord::unused_mark()); + if (LockingMode != LM_LIGHTWEIGHT) { + // This Java Monitor is inflated so obj's header will never be + // displaced to this thread's BasicLock. Make the displaced header + // non-null so this BasicLock is not seen as recursive nor as + // being locked. We do this unconditionally so that this thread's + // BasicLock cannot be mis-interpreted by any stack walkers. For + // performance reasons, stack walkers generally first check for + // stack-locking in the object's header, the second check is for + // recursive stack-locking in the displaced header in the BasicLock, + // and last are the inflated Java Monitor (ObjectMonitor) checks. + lock->set_displaced_header(markWord::unused_mark()); + } if (owner == nullptr && m->try_set_owner_from(nullptr, current) == nullptr) { assert(m->_recursions == 0, "invariant"); @@ -476,7 +487,7 @@ void ObjectSynchronizer::handle_sync_on_value_based_class(Handle obj, JavaThread static bool useHeavyMonitors() { #if defined(X86) || defined(AARCH64) || defined(PPC64) || defined(RISCV64) - return UseHeavyMonitors; + return LockingMode == LM_MONITOR; #else return false; #endif @@ -496,30 +507,50 @@ void ObjectSynchronizer::enter(Handle obj, BasicLock* lock, JavaThread* current) current->inc_held_monitor_count(); if (!useHeavyMonitors()) { - markWord mark = obj->mark(); - if (mark.is_neutral()) { - // Anticipate successful CAS -- the ST of the displaced mark must - // be visible <= the ST performed by the CAS. - lock->set_displaced_header(mark); - if (mark == obj()->cas_set_mark(markWord::from_pointer(lock), mark)) { + if (LockingMode == LM_LIGHTWEIGHT) { + // Fast-locking does not use the 'lock' argument. + LockStack& lock_stack = current->lock_stack(); + if (lock_stack.can_push()) { + markWord mark = obj()->mark_acquire(); + if (mark.is_neutral()) { + assert(!lock_stack.contains(obj()), "thread must not already hold the lock"); + // Try to swing into 'fast-locked' state. + markWord locked_mark = mark.set_fast_locked(); + markWord old_mark = obj()->cas_set_mark(locked_mark, mark); + if (old_mark == mark) { + // Successfully fast-locked, push object to lock-stack and return. + lock_stack.push(obj()); + return; + } + } + } + // All other paths fall-through to inflate-enter. + } else if (LockingMode == LM_LEGACY) { + markWord mark = obj->mark(); + if (mark.is_neutral()) { + // Anticipate successful CAS -- the ST of the displaced mark must + // be visible <= the ST performed by the CAS. + lock->set_displaced_header(mark); + if (mark == obj()->cas_set_mark(markWord::from_pointer(lock), mark)) { + return; + } + // Fall through to inflate() ... + } else if (mark.has_locker() && + current->is_lock_owned((address) mark.locker())) { + assert(lock != mark.locker(), "must not re-lock the same lock"); + assert(lock != (BasicLock*) obj->mark().value(), "don't relock with same BasicLock"); + lock->set_displaced_header(markWord::from_pointer(nullptr)); return; } - // Fall through to inflate() ... - } else if (mark.has_locker() && - current->is_lock_owned((address)mark.locker())) { - assert(lock != mark.locker(), "must not re-lock the same lock"); - assert(lock != (BasicLock*)obj->mark().value(), "don't relock with same BasicLock"); - lock->set_displaced_header(markWord::from_pointer(nullptr)); - return; - } - // The object header will never be displaced to this lock, - // so it does not matter what the value is, except that it - // must be non-zero to avoid looking like a re-entrant lock, - // and must not look locked either. - lock->set_displaced_header(markWord::unused_mark()); + // The object header will never be displaced to this lock, + // so it does not matter what the value is, except that it + // must be non-zero to avoid looking like a re-entrant lock, + // and must not look locked either. + lock->set_displaced_header(markWord::unused_mark()); + } } else if (VerifyHeavyMonitors) { - guarantee(!obj->mark().has_locker(), "must not be stack-locked"); + guarantee((obj->mark().value() & markWord::lock_mask_in_place) != markWord::locked_value, "must not be lightweight/stack-locked"); } // An async deflation can race after the inflate() call and before @@ -538,53 +569,81 @@ void ObjectSynchronizer::exit(oop object, BasicLock* lock, JavaThread* current) if (!useHeavyMonitors()) { markWord mark = object->mark(); - - markWord dhw = lock->displaced_header(); - if (dhw.value() == 0) { - // If the displaced header is null, then this exit matches up with - // a recursive enter. No real work to do here except for diagnostics. -#ifndef PRODUCT - if (mark != markWord::INFLATING()) { - // Only do diagnostics if we are not racing an inflation. Simply - // exiting a recursive enter of a Java Monitor that is being - // inflated is safe; see the has_monitor() comment below. - assert(!mark.is_neutral(), "invariant"); - assert(!mark.has_locker() || - current->is_lock_owned((address)mark.locker()), "invariant"); - if (mark.has_monitor()) { - // The BasicLock's displaced_header is marked as a recursive - // enter and we have an inflated Java Monitor (ObjectMonitor). - // This is a special case where the Java Monitor was inflated - // after this thread entered the stack-lock recursively. When a - // Java Monitor is inflated, we cannot safely walk the Java - // Monitor owner's stack and update the BasicLocks because a - // Java Monitor can be asynchronously inflated by a thread that - // does not own the Java Monitor. - ObjectMonitor* m = mark.monitor(); - assert(m->object()->mark() == mark, "invariant"); - assert(m->is_entered(current), "invariant"); + if (LockingMode == LM_LIGHTWEIGHT) { + // Fast-locking does not use the 'lock' argument. + if (mark.is_fast_locked()) { + markWord unlocked_mark = mark.set_unlocked(); + markWord old_mark = object->cas_set_mark(unlocked_mark, mark); + if (old_mark != mark) { + // Another thread won the CAS, it must have inflated the monitor. + // It can only have installed an anonymously locked monitor at this point. + // Fetch that monitor, set owner correctly to this thread, and + // exit it (allowing waiting threads to enter). + assert(old_mark.has_monitor(), "must have monitor"); + ObjectMonitor* monitor = old_mark.monitor(); + assert(monitor->is_owner_anonymous(), "must be anonymous owner"); + monitor->set_owner_from_anonymous(current); + monitor->exit(current); } - } -#endif - return; - } - - if (mark == markWord::from_pointer(lock)) { - // If the object is stack-locked by the current thread, try to - // swing the displaced header from the BasicLock back to the mark. - assert(dhw.is_neutral(), "invariant"); - if (object->cas_set_mark(dhw, mark) == mark) { + LockStack& lock_stack = current->lock_stack(); + lock_stack.remove(object); return; } + } else if (LockingMode == LM_LEGACY) { + markWord dhw = lock->displaced_header(); + if (dhw.value() == 0) { + // If the displaced header is null, then this exit matches up with + // a recursive enter. No real work to do here except for diagnostics. +#ifndef PRODUCT + if (mark != markWord::INFLATING()) { + // Only do diagnostics if we are not racing an inflation. Simply + // exiting a recursive enter of a Java Monitor that is being + // inflated is safe; see the has_monitor() comment below. + assert(!mark.is_neutral(), "invariant"); + assert(!mark.has_locker() || + current->is_lock_owned((address)mark.locker()), "invariant"); + if (mark.has_monitor()) { + // The BasicLock's displaced_header is marked as a recursive + // enter and we have an inflated Java Monitor (ObjectMonitor). + // This is a special case where the Java Monitor was inflated + // after this thread entered the stack-lock recursively. When a + // Java Monitor is inflated, we cannot safely walk the Java + // Monitor owner's stack and update the BasicLocks because a + // Java Monitor can be asynchronously inflated by a thread that + // does not own the Java Monitor. + ObjectMonitor* m = mark.monitor(); + assert(m->object()->mark() == mark, "invariant"); + assert(m->is_entered(current), "invariant"); + } + } +#endif + return; + } + + if (mark == markWord::from_pointer(lock)) { + // If the object is stack-locked by the current thread, try to + // swing the displaced header from the BasicLock back to the mark. + assert(dhw.is_neutral(), "invariant"); + if (object->cas_set_mark(dhw, mark) == mark) { + return; + } + } } } else if (VerifyHeavyMonitors) { - guarantee(!object->mark().has_locker(), "must not be stack-locked"); + guarantee((object->mark().value() & markWord::lock_mask_in_place) != markWord::locked_value, "must not be lightweight/stack-locked"); } // We have to take the slow-path of possible inflation and then exit. // The ObjectMonitor* can't be async deflated until ownership is // dropped inside exit() and the ObjectMonitor* must be !is_busy(). ObjectMonitor* monitor = inflate(current, object, inflate_cause_vm_internal); + if (LockingMode == LM_LIGHTWEIGHT && monitor->is_owner_anonymous()) { + // It must be owned by us. Pop lock object from lock stack. + LockStack& lock_stack = current->lock_stack(); + oop popped = lock_stack.pop(); + assert(popped == object, "must be owned by this thread"); + monitor->set_owner_from_anonymous(current); + } monitor->exit(current); } @@ -675,9 +734,16 @@ void ObjectSynchronizer::notify(Handle obj, TRAPS) { JavaThread* current = THREAD; markWord mark = obj->mark(); - if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) { - // Not inflated so there can't be any waiters to notify. - return; + if (LockingMode == LM_LIGHTWEIGHT) { + if ((mark.is_fast_locked() && current->lock_stack().contains(obj()))) { + // Not inflated so there can't be any waiters to notify. + return; + } + } else if (LockingMode == LM_LEGACY) { + if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) { + // Not inflated so there can't be any waiters to notify. + return; + } } // The ObjectMonitor* can't be async deflated until ownership is // dropped by the calling thread. @@ -690,9 +756,16 @@ void ObjectSynchronizer::notifyall(Handle obj, TRAPS) { JavaThread* current = THREAD; markWord mark = obj->mark(); - if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) { - // Not inflated so there can't be any waiters to notify. - return; + if (LockingMode == LM_LIGHTWEIGHT) { + if ((mark.is_fast_locked() && current->lock_stack().contains(obj()))) { + // Not inflated so there can't be any waiters to notify. + return; + } + } else if (LockingMode == LM_LEGACY) { + if (mark.has_locker() && current->is_lock_owned((address)mark.locker())) { + // Not inflated so there can't be any waiters to notify. + return; + } } // The ObjectMonitor* can't be async deflated until ownership is // dropped by the calling thread. @@ -718,7 +791,8 @@ static SharedGlobals GVars; static markWord read_stable_mark(oop obj) { markWord mark = obj->mark_acquire(); - if (!mark.is_being_inflated()) { + if (!mark.is_being_inflated() || LockingMode == LM_LIGHTWEIGHT) { + // New lightweight locking does not use the markWord::INFLATING() protocol. return mark; // normal fast-path return } @@ -833,6 +907,13 @@ static inline intptr_t get_next_hash(Thread* current, oop obj) { return value; } +// Can be called from non JavaThreads (e.g., VMThread) for FastHashCode +// calculations as part of JVM/TI tagging. +static bool is_lock_owned(Thread* thread, oop obj) { + assert(LockingMode == LM_LIGHTWEIGHT, "only call this with new lightweight locking enabled"); + return thread->is_Java_thread() ? JavaThread::cast(thread)->lock_stack().contains(obj) : false; +} + intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) { while (true) { @@ -841,8 +922,8 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) { intptr_t hash; markWord mark = read_stable_mark(obj); if (VerifyHeavyMonitors) { - assert(UseHeavyMonitors, "+VerifyHeavyMonitors requires +UseHeavyMonitors"); - guarantee(!mark.has_locker(), "must not be stack locked"); + assert(LockingMode == LM_MONITOR, "+VerifyHeavyMonitors requires LockingMode == 0 (LM_MONITOR)"); + guarantee((obj->mark().value() & markWord::lock_mask_in_place) != markWord::locked_value, "must not be lightweight/stack-locked"); } if (mark.is_neutral()) { // if this is a normal header hash = mark.hash(); @@ -887,8 +968,15 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) { } // Fall thru so we only have one place that installs the hash in // the ObjectMonitor. - } else if (current->is_lock_owned((address)mark.locker())) { - // This is a stack lock owned by the calling thread so fetch the + } else if (LockingMode == LM_LIGHTWEIGHT && mark.is_fast_locked() && is_lock_owned(current, obj)) { + // This is a fast-lock owned by the calling thread so use the + // markWord from the object. + hash = mark.hash(); + if (hash != 0) { // if it has a hash, just return it + return hash; + } + } else if (LockingMode == LM_LEGACY && mark.has_locker() && current->is_lock_owned((address)mark.locker())) { + // This is a stack-lock owned by the calling thread so fetch the // displaced markWord from the BasicLock on the stack. temp = mark.displaced_mark_helper(); assert(temp.is_neutral(), "invariant: header=" INTPTR_FORMAT, temp.value()); @@ -899,7 +987,7 @@ intptr_t ObjectSynchronizer::FastHashCode(Thread* current, oop obj) { // WARNING: // The displaced header in the BasicLock on a thread's stack // is strictly immutable. It CANNOT be changed in ANY cases. - // So we have to inflate the stack lock into an ObjectMonitor + // So we have to inflate the stack-lock into an ObjectMonitor // even if the current thread owns the lock. The BasicLock on // a thread's stack can be asynchronously read by other threads // during an inflate() call so any change to that stack memory @@ -952,12 +1040,18 @@ bool ObjectSynchronizer::current_thread_holds_lock(JavaThread* current, markWord mark = read_stable_mark(obj); - // Uncontended case, header points to stack - if (mark.has_locker()) { + if (LockingMode == LM_LEGACY && mark.has_locker()) { + // stack-locked case, header points into owner's stack return current->is_lock_owned((address)mark.locker()); } - // Contended case, header points to ObjectMonitor (tagged pointer) + + if (LockingMode == LM_LIGHTWEIGHT && mark.is_fast_locked()) { + // fast-locking case, see if lock is in current's lock stack + return current->lock_stack().contains(h_obj()); + } + if (mark.has_monitor()) { + // Inflated monitor so header points to ObjectMonitor (tagged pointer). // The first stage of async deflation does not affect any field // used by this comparison so the ObjectMonitor* is usable here. ObjectMonitor* monitor = mark.monitor(); @@ -970,27 +1064,28 @@ bool ObjectSynchronizer::current_thread_holds_lock(JavaThread* current, JavaThread* ObjectSynchronizer::get_lock_owner(ThreadsList * t_list, Handle h_obj) { oop obj = h_obj(); - address owner = nullptr; - markWord mark = read_stable_mark(obj); - // Uncontended case, header points to stack - if (mark.has_locker()) { - owner = (address) mark.locker(); + if (LockingMode == LM_LEGACY && mark.has_locker()) { + // stack-locked so header points into owner's stack. + // owning_thread_from_monitor_owner() may also return null here: + return Threads::owning_thread_from_monitor_owner(t_list, (address) mark.locker()); } - // Contended case, header points to ObjectMonitor (tagged pointer) - else if (mark.has_monitor()) { + if (LockingMode == LM_LIGHTWEIGHT && mark.is_fast_locked()) { + // fast-locked so get owner from the object. + // owning_thread_from_object() may also return null here: + return Threads::owning_thread_from_object(t_list, h_obj()); + } + + if (mark.has_monitor()) { + // Inflated monitor so header points to ObjectMonitor (tagged pointer). // The first stage of async deflation does not affect any field // used by this comparison so the ObjectMonitor* is usable here. ObjectMonitor* monitor = mark.monitor(); assert(monitor != nullptr, "monitor should be non-null"); - owner = (address) monitor->owner(); - } - - if (owner != nullptr) { - // owning_thread_from_monitor_owner() may also return null here - return Threads::owning_thread_from_monitor_owner(t_list, owner); + // owning_thread_from_monitor() may also return null here: + return Threads::owning_thread_from_monitor(t_list, monitor); } // Unlocked case, header in place @@ -1004,7 +1099,7 @@ JavaThread* ObjectSynchronizer::get_lock_owner(ThreadsList * t_list, Handle h_ob // Visitors ... // Iterate ObjectMonitors where the owner == thread; this does NOT include -// ObjectMonitors where owner is set to a stack lock address in thread. +// ObjectMonitors where owner is set to a stack-lock address in thread. // // This version of monitors_iterate() works with the in-use monitor list. // @@ -1014,7 +1109,7 @@ void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure, JavaThread* t ObjectMonitor* mid = iter.next(); if (mid->owner() != thread) { // Not owned by the target thread and intentionally skips when owner - // is set to a stack lock address in the target thread. + // is set to a stack-lock address in the target thread. continue; } if (!mid->is_being_async_deflated() && mid->object_peek() != nullptr) { @@ -1040,7 +1135,7 @@ void ObjectSynchronizer::monitors_iterate(MonitorClosure* closure, ObjectMonitorIterator iter(list->head()); while (!iter.is_empty()) { ObjectMonitor* mid = *iter.next(); - // Owner set to a stack lock address in thread should never be seen here: + // Owner set to a stack-lock address in thread should never be seen here: assert(mid->owner() == thread, "must be"); if (!mid->is_being_async_deflated() && mid->object_peek() != nullptr) { // Only process with closure if the object is set. @@ -1224,43 +1319,109 @@ ObjectMonitor* ObjectSynchronizer::inflate(Thread* current, oop object, const markWord mark = object->mark_acquire(); // The mark can be in one of the following states: - // * Inflated - just return - // * Stack-locked - coerce it to inflated - // * INFLATING - busy wait for conversion to complete - // * Neutral - aggressively inflate the object. + // * inflated - Just return if using stack-locking. + // If using fast-locking and the ObjectMonitor owner + // is anonymous and the current thread owns the + // object lock, then we make the current thread the + // ObjectMonitor owner and remove the lock from the + // current thread's lock stack. + // * fast-locked - Coerce it to inflated from fast-locked. + // * stack-locked - Coerce it to inflated from stack-locked. + // * INFLATING - Busy wait for conversion from stack-locked to + // inflated. + // * neutral - Aggressively inflate the object. // CASE: inflated if (mark.has_monitor()) { ObjectMonitor* inf = mark.monitor(); markWord dmw = inf->header(); assert(dmw.is_neutral(), "invariant: header=" INTPTR_FORMAT, dmw.value()); + if (LockingMode == LM_LIGHTWEIGHT && inf->is_owner_anonymous() && is_lock_owned(current, object)) { + inf->set_owner_from_anonymous(current); + JavaThread::cast(current)->lock_stack().remove(object); + } return inf; } - // CASE: inflation in progress - inflating over a stack-lock. - // Some other thread is converting from stack-locked to inflated. - // Only that thread can complete inflation -- other threads must wait. - // The INFLATING value is transient. - // Currently, we spin/yield/park and poll the markword, waiting for inflation to finish. - // We could always eliminate polling by parking the thread on some auxiliary list. - if (mark == markWord::INFLATING()) { - read_stable_mark(object); - continue; + if (LockingMode != LM_LIGHTWEIGHT) { + // New lightweight locking does not use INFLATING. + // CASE: inflation in progress - inflating over a stack-lock. + // Some other thread is converting from stack-locked to inflated. + // Only that thread can complete inflation -- other threads must wait. + // The INFLATING value is transient. + // Currently, we spin/yield/park and poll the markword, waiting for inflation to finish. + // We could always eliminate polling by parking the thread on some auxiliary list. + if (mark == markWord::INFLATING()) { + read_stable_mark(object); + continue; + } + } + + // CASE: fast-locked + // Could be fast-locked either by current or by some other thread. + // + // Note that we allocate the ObjectMonitor speculatively, _before_ + // attempting to set the object's mark to the new ObjectMonitor. If + // this thread owns the monitor, then we set the ObjectMonitor's + // owner to this thread. Otherwise, we set the ObjectMonitor's owner + // to anonymous. If we lose the race to set the object's mark to the + // new ObjectMonitor, then we just delete it and loop around again. + // + LogStreamHandle(Trace, monitorinflation) lsh; + if (LockingMode == LM_LIGHTWEIGHT && mark.is_fast_locked()) { + ObjectMonitor* monitor = new ObjectMonitor(object); + monitor->set_header(mark.set_unlocked()); + bool own = is_lock_owned(current, object); + if (own) { + // Owned by us. + monitor->set_owner_from(nullptr, current); + } else { + // Owned by somebody else. + monitor->set_owner_anonymous(); + } + markWord monitor_mark = markWord::encode(monitor); + markWord old_mark = object->cas_set_mark(monitor_mark, mark); + if (old_mark == mark) { + // Success! Return inflated monitor. + if (own) { + JavaThread::cast(current)->lock_stack().remove(object); + } + // Once the ObjectMonitor is configured and object is associated + // with the ObjectMonitor, it is safe to allow async deflation: + _in_use_list.add(monitor); + + // Hopefully the performance counters are allocated on distinct + // cache lines to avoid false sharing on MP systems ... + OM_PERFDATA_OP(Inflations, inc()); + if (log_is_enabled(Trace, monitorinflation)) { + ResourceMark rm(current); + lsh.print_cr("inflate(has_locker): object=" INTPTR_FORMAT ", mark=" + INTPTR_FORMAT ", type='%s'", p2i(object), + object->mark().value(), object->klass()->external_name()); + } + if (event.should_commit()) { + post_monitor_inflate_event(&event, object, cause); + } + return monitor; + } else { + delete monitor; + continue; // Interference -- just retry + } } // CASE: stack-locked - // Could be stack-locked either by this thread or by some other thread. + // Could be stack-locked either by current or by some other thread. // // Note that we allocate the ObjectMonitor speculatively, _before_ attempting // to install INFLATING into the mark word. We originally installed INFLATING, // allocated the ObjectMonitor, and then finally STed the address of the // ObjectMonitor into the mark. This was correct, but artificially lengthened // the interval in which INFLATING appeared in the mark, thus increasing - // the odds of inflation contention. - - LogStreamHandle(Trace, monitorinflation) lsh; - - if (mark.has_locker()) { + // the odds of inflation contention. If we lose the race to set INFLATING, + // then we just delete the ObjectMonitor and loop around again. + // + if (LockingMode == LM_LEGACY && mark.has_locker()) { + assert(LockingMode != LM_LIGHTWEIGHT, "cannot happen with new lightweight locking"); ObjectMonitor* m = new ObjectMonitor(object); // Optimistically prepare the ObjectMonitor - anticipate successful CAS // We do this before the CAS in order to minimize the length of time @@ -1422,10 +1583,10 @@ void ObjectSynchronizer::chk_for_block_req(JavaThread* current, const char* op_n // // If table != nullptr, we gather owned ObjectMonitors indexed by the // owner in the table. Please note that ObjectMonitors where the owner -// is set to a stack lock address are NOT associated with the JavaThread -// that holds that stack lock. All of the current consumers of +// is set to a stack-lock address are NOT associated with the JavaThread +// that holds that stack-lock. All of the current consumers of // ObjectMonitorsHashtable info only care about JNI locked monitors and -// those do not have the owner set to a stack lock address. +// those do not have the owner set to a stack-lock address. // size_t ObjectSynchronizer::deflate_monitor_list(Thread* current, LogStream* ls, elapsedTimer* timer_p, @@ -1442,7 +1603,7 @@ size_t ObjectSynchronizer::deflate_monitor_list(Thread* current, LogStream* ls, deflated_count++; } else if (table != nullptr) { // The caller is interested in the owned ObjectMonitors. This does - // not include when owner is set to a stack lock address in thread. + // not include when owner is set to a stack-lock address in thread. // This also does not capture unowned ObjectMonitors that cannot be // deflated because of a waiter. void* key = mid->owner(); diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp index df693064c74..ef06eb29e9d 100644 --- a/src/hotspot/share/runtime/thread.cpp +++ b/src/hotspot/share/runtime/thread.cpp @@ -526,6 +526,7 @@ void Thread::print_owned_locks_on(outputStream* st) const { // should be revisited, and they should be removed if possible. bool Thread::is_lock_owned(address adr) const { + assert(LockingMode != LM_LIGHTWEIGHT, "should not be called with new lightweight locking"); return is_in_full_stack(adr); } diff --git a/src/hotspot/share/runtime/threads.cpp b/src/hotspot/share/runtime/threads.cpp index ff1a41f00e6..25ed7c8f456 100644 --- a/src/hotspot/share/runtime/threads.cpp +++ b/src/hotspot/share/runtime/threads.cpp @@ -69,6 +69,7 @@ #include "runtime/javaThread.inline.hpp" #include "runtime/jniHandles.inline.hpp" #include "runtime/jniPeriodicChecker.hpp" +#include "runtime/lockStack.inline.hpp" #include "runtime/monitorDeflationThread.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/nonJavaThread.hpp" @@ -1177,6 +1178,7 @@ GrowableArray* Threads::get_pending_threads(ThreadsList * t_list, JavaThread *Threads::owning_thread_from_monitor_owner(ThreadsList * t_list, address owner) { + assert(LockingMode != LM_LIGHTWEIGHT, "Not with new lightweight locking"); // null owner means not locked so we can skip the search if (owner == nullptr) return nullptr; @@ -1188,7 +1190,7 @@ JavaThread *Threads::owning_thread_from_monitor_owner(ThreadsList * t_list, // Cannot assert on lack of success here since this function may be // used by code that is trying to report useful problem information // like deadlock detection. - if (UseHeavyMonitors) return nullptr; + if (LockingMode == LM_MONITOR) return nullptr; // If we didn't find a matching Java thread and we didn't force use of // heavyweight monitors, then the owner is the stack address of the @@ -1206,9 +1208,29 @@ JavaThread *Threads::owning_thread_from_monitor_owner(ThreadsList * t_list, return the_owner; } +JavaThread* Threads::owning_thread_from_object(ThreadsList * t_list, oop obj) { + assert(LockingMode == LM_LIGHTWEIGHT, "Only with new lightweight locking"); + for (JavaThread* q : *t_list) { + if (q->lock_stack().contains(obj)) { + return q; + } + } + return nullptr; +} + JavaThread* Threads::owning_thread_from_monitor(ThreadsList* t_list, ObjectMonitor* monitor) { - address owner = (address)monitor->owner(); - return owning_thread_from_monitor_owner(t_list, owner); + if (LockingMode == LM_LIGHTWEIGHT) { + if (monitor->is_owner_anonymous()) { + return owning_thread_from_object(t_list, monitor->object()); + } else { + Thread* owner = reinterpret_cast(monitor->owner()); + assert(owner == nullptr || owner->is_Java_thread(), "only JavaThreads own monitors"); + return reinterpret_cast(owner); + } + } else { + address owner = (address)monitor->owner(); + return owning_thread_from_monitor_owner(t_list, owner); + } } class PrintOnClosure : public ThreadClosure { diff --git a/src/hotspot/share/runtime/threads.hpp b/src/hotspot/share/runtime/threads.hpp index b81c74caa13..8d61431f0ce 100644 --- a/src/hotspot/share/runtime/threads.hpp +++ b/src/hotspot/share/runtime/threads.hpp @@ -139,6 +139,7 @@ public: static JavaThread *owning_thread_from_monitor_owner(ThreadsList * t_list, address owner); + static JavaThread* owning_thread_from_object(ThreadsList* t_list, oop obj); static JavaThread* owning_thread_from_monitor(ThreadsList* t_list, ObjectMonitor* owner); // Number of threads on the active threads list diff --git a/src/hotspot/share/runtime/vmStructs.cpp b/src/hotspot/share/runtime/vmStructs.cpp index b74012122a7..e36846c6b3b 100644 --- a/src/hotspot/share/runtime/vmStructs.cpp +++ b/src/hotspot/share/runtime/vmStructs.cpp @@ -702,6 +702,9 @@ nonstatic_field(ThreadShadow, _exception_line, int) \ nonstatic_field(Thread, _tlab, ThreadLocalAllocBuffer) \ nonstatic_field(Thread, _allocated_bytes, jlong) \ + nonstatic_field(JavaThread, _lock_stack, LockStack) \ + nonstatic_field(LockStack, _top, uint32_t) \ + nonstatic_field(LockStack, _base[0], oop) \ nonstatic_field(NamedThread, _name, char*) \ nonstatic_field(NamedThread, _processed_thread, Thread*) \ nonstatic_field(JavaThread, _threadObj, OopHandle) \ @@ -1317,6 +1320,7 @@ \ declare_toplevel_type(ThreadsSMRSupport) \ declare_toplevel_type(ThreadsList) \ + declare_toplevel_type(LockStack) \ \ /***************/ \ /* Interpreter */ \ @@ -2413,6 +2417,14 @@ declare_constant(T_NARROWKLASS_size) \ declare_constant(T_VOID_size) \ \ + /**********************************************/ \ + /* LockingMode enum (globalDefinitions.hpp) */ \ + /**********************************************/ \ + \ + declare_constant(LM_MONITOR) \ + declare_constant(LM_LEGACY) \ + declare_constant(LM_LIGHTWEIGHT) \ + \ /*********************/ \ /* Matcher (C2 only) */ \ /*********************/ \ @@ -2597,8 +2609,10 @@ \ /* InvocationCounter constants */ \ declare_constant(InvocationCounter::count_increment) \ - declare_constant(InvocationCounter::count_shift) - + declare_constant(InvocationCounter::count_shift) \ + \ + /* ObjectMonitor constants */ \ + declare_constant(ObjectMonitor::ANONYMOUS_OWNER) \ //-------------------------------------------------------------------------------- // diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp index 1f9e342db82..c8bb643cc5a 100644 --- a/src/hotspot/share/utilities/globalDefinitions.hpp +++ b/src/hotspot/share/utilities/globalDefinitions.hpp @@ -1042,6 +1042,15 @@ enum JavaThreadState { _thread_max_state = 12 // maximum thread state+1 - used for statistics allocation }; +enum LockingMode { + // Use only heavy monitors for locking + LM_MONITOR = 0, + // Legacy stack-locking, with monitors as 2nd tier + LM_LEGACY = 1, + // New lightweight locking, with monitors as 2nd tier + LM_LIGHTWEIGHT = 2 +}; + //---------------------------------------------------------------------------------------------------- // Special constants for debugging diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java index 863273cd79f..03e0ed4f29a 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaThread.java @@ -44,6 +44,8 @@ public class JavaThread extends Thread { private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.JavaThread.DEBUG") != null; private static long threadObjFieldOffset; + private static long lockStackTopOffset; + private static long lockStackBaseOffset; private static AddressField anchorField; private static AddressField lastJavaSPField; private static AddressField lastJavaPCField; @@ -53,6 +55,7 @@ public class JavaThread extends Thread { private static CIntegerField stackSizeField; private static CIntegerField terminatedField; private static AddressField activeHandlesField; + private static long oopPtrSize; private static JavaThreadPDAccess access; @@ -85,6 +88,7 @@ public class JavaThread extends Thread { private static synchronized void initialize(TypeDataBase db) { Type type = db.lookupType("JavaThread"); Type anchorType = db.lookupType("JavaFrameAnchor"); + Type typeLockStack = db.lookupType("LockStack"); threadObjFieldOffset = type.getField("_threadObj").getOffset(); @@ -98,6 +102,10 @@ public class JavaThread extends Thread { terminatedField = type.getCIntegerField("_terminated"); activeHandlesField = type.getAddressField("_active_handles"); + lockStackTopOffset = type.getField("_lock_stack").getOffset() + typeLockStack.getField("_top").getOffset(); + lockStackBaseOffset = type.getField("_lock_stack").getOffset() + typeLockStack.getField("_base[0]").getOffset(); + oopPtrSize = VM.getVM().getAddressSize(); + UNINITIALIZED = db.lookupIntConstant("_thread_uninitialized").intValue(); NEW = db.lookupIntConstant("_thread_new").intValue(); NEW_TRANS = db.lookupIntConstant("_thread_new_trans").intValue(); @@ -394,6 +402,23 @@ public class JavaThread extends Thread { return stackBase.greaterThan(a) && sp.lessThanOrEqual(a); } + public boolean isLockOwned(OopHandle obj) { + long current = lockStackBaseOffset; + long end = addr.getJIntAt(lockStackTopOffset); + if (Assert.ASSERTS_ENABLED) { + Assert.that(current <= end, "current stack offset must be above base offset"); + } + + while (current < end) { + Address oop = addr.getAddressAt(current); + if (oop.equals(obj)) { + return true; + } + current += oopPtrSize; + } + return false; + } + public boolean isLockOwned(Address a) { Address stackBase = getStackBase(); Address stackLimit = stackBase.addOffsetTo(-getStackSize()); diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java index d7eb16d330a..7a360942572 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/JavaVFrame.java @@ -82,6 +82,10 @@ public abstract class JavaVFrame extends VFrame { if (mark.hasMonitor() && ( // we have marked ourself as pending on this monitor mark.monitor().equals(thread.getCurrentPendingMonitor()) || + // Owned anonymously means that we are not the owner of + // the monitor and must be waiting for the owner to + // exit it. + mark.monitor().isOwnedAnonymous() || // we are not the owner of this monitor !mark.monitor().isEntered(thread) )) { diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/LockingMode.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/LockingMode.java new file mode 100644 index 00000000000..2046fd075ad --- /dev/null +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/LockingMode.java @@ -0,0 +1,60 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package sun.jvm.hotspot.runtime; + +import sun.jvm.hotspot.types.TypeDataBase; + + +/** Encapsulates the LockingMode enum in globalDefinitions.hpp in + the VM. */ + +public class LockingMode { + private static int monitor; + private static int legacy; + private static int lightweight; + + static { + VM.registerVMInitializedObserver( + (o, d) -> initialize(VM.getVM().getTypeDataBase())); + } + + private static synchronized void initialize(TypeDataBase db) { + monitor = db.lookupIntConstant("LM_MONITOR").intValue(); + legacy = db.lookupIntConstant("LM_LEGACY").intValue(); + lightweight = db.lookupIntConstant("LM_LIGHTWEIGHT").intValue(); + } + + public static int getMonitor() { + return monitor; + } + + public static int getLegacy() { + return legacy; + } + + public static int getLightweight() { + return lightweight; + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java index a8b1fa066c1..3f3c67fbf26 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/ObjectMonitor.java @@ -44,6 +44,7 @@ public class ObjectMonitor extends VMObject { private static synchronized void initialize(TypeDataBase db) throws WrongTypeException { heap = VM.getVM().getObjectHeap(); Type type = db.lookupType("ObjectMonitor"); + sun.jvm.hotspot.types.Field f = type.getField("_header"); headerFieldOffset = f.getOffset(); f = type.getField("_object"); @@ -55,6 +56,8 @@ public class ObjectMonitor extends VMObject { contentionsField = new CIntField(type.getCIntegerField("_contentions"), 0); waitersField = new CIntField(type.getCIntegerField("_waiters"), 0); recursionsField = type.getCIntegerField("_recursions"); + + ANONYMOUS_OWNER = db.lookupLongConstant("ObjectMonitor::ANONYMOUS_OWNER").longValue(); } public ObjectMonitor(Address addr) { @@ -79,6 +82,10 @@ public class ObjectMonitor extends VMObject { return false; } + public boolean isOwnedAnonymous() { + return addr.getAddressAt(ownerFieldOffset).asLongValue() == ANONYMOUS_OWNER; + } + public Address owner() { return addr.getAddressAt(ownerFieldOffset); } // FIXME // void set_owner(void* owner); @@ -114,5 +121,7 @@ public class ObjectMonitor extends VMObject { private static CIntField contentionsField; private static CIntField waitersField; private static CIntegerField recursionsField; + private static long ANONYMOUS_OWNER; + // FIXME: expose platform-dependent stuff } diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java index 87ee52c7c3e..d0fa7c8d3f1 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java @@ -211,6 +211,7 @@ public class Threads { // refer to Threads::owning_thread_from_monitor_owner public JavaThread owningThreadFromMonitor(Address o) { + assert(VM.getVM().getCommandLineFlag("LockingMode").getInt() != LockingMode.getLightweight()); if (o == null) return null; for (int i = 0; i < getNumberOfThreads(); i++) { JavaThread thread = getJavaThreadAt(i); @@ -228,7 +229,24 @@ public class Threads { } public JavaThread owningThreadFromMonitor(ObjectMonitor monitor) { - return owningThreadFromMonitor(monitor.owner()); + if (VM.getVM().getCommandLineFlag("LockingMode").getInt() == LockingMode.getLightweight()) { + if (monitor.isOwnedAnonymous()) { + OopHandle object = monitor.object(); + for (int i = 0; i < getNumberOfThreads(); i++) { + JavaThread thread = getJavaThreadAt(i); + if (thread.isLockOwned(object)) { + return thread; + } + } + throw new InternalError("We should have found a thread that owns the anonymous lock"); + } + // Owner can only be threads at this point. + Address o = monitor.owner(); + if (o == null) return null; + return new JavaThread(o); + } else { + return owningThreadFromMonitor(monitor.owner()); + } } // refer to Threads::get_pending_threads