From 3fe6e0faca78e8106e33a3a53de78f8864be92b7 Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Tue, 26 Sep 2023 03:40:11 +0000 Subject: [PATCH] 8308479: [s390x] Implement alternative fast-locking scheme Reviewed-by: lucy, mdoerr --- .../cpu/s390/c1_MacroAssembler_s390.cpp | 73 ++++---- .../cpu/s390/c1_MacroAssembler_s390.hpp | 21 ++- src/hotspot/cpu/s390/interp_masm_s390.cpp | 177 +++++++++++------- src/hotspot/cpu/s390/macroAssembler_s390.cpp | 172 ++++++++++++++--- src/hotspot/cpu/s390/macroAssembler_s390.hpp | 2 + src/hotspot/cpu/s390/sharedRuntime_s390.cpp | 7 +- src/hotspot/share/runtime/arguments.cpp | 3 +- 7 files changed, 320 insertions(+), 135 deletions(-) diff --git a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp index 02b1e730c59..2824389d5b8 100644 --- a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2016 SAP SE. All rights reserved. + * Copyright (c) 2016, 2023 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -83,60 +83,63 @@ void C1_MacroAssembler::verified_entry(bool breakAtEntry) { if (breakAtEntry) z_illtrap(0xC1); } -void C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { +void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox, Label& slow_case) { const int hdr_offset = oopDesc::mark_offset_in_bytes(); - assert_different_registers(hdr, obj, disp_hdr); - verify_oop(obj, FILE_AND_LINE); + const Register tmp = Z_R1_scratch; + + assert_different_registers(Rmark, Roop, Rbox, tmp); + + verify_oop(Roop, FILE_AND_LINE); // Load object header. - z_lg(hdr, Address(obj, hdr_offset)); + z_lg(Rmark, Address(Roop, hdr_offset)); // Save object being locked into the BasicObjectLock... - z_stg(obj, Address(disp_hdr, BasicObjectLock::obj_offset())); + z_stg(Roop, Address(Rbox, BasicObjectLock::obj_offset())); if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(Z_R1_scratch, obj); - testbit(Address(Z_R1_scratch, Klass::access_flags_offset()), exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); + load_klass(tmp, Roop); + testbit(Address(tmp, Klass::access_flags_offset()), exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); z_btrue(slow_case); } assert(LockingMode != LM_MONITOR, "LM_MONITOR is already handled, by emit_lock()"); if (LockingMode == LM_LIGHTWEIGHT) { - Unimplemented(); + fast_lock(Roop, Rmark, tmp, slow_case); } else if (LockingMode == LM_LEGACY) { NearLabel done; // and mark it as unlocked. - z_oill(hdr, markWord::unlocked_value); + z_oill(Rmark, markWord::unlocked_value); // Save unlocked object header into the displaced header location on the stack. - z_stg(hdr, Address(disp_hdr, (intptr_t) 0)); + z_stg(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // Test if object header is still the same (i.e. unlocked), and if so, store the // displaced header address in the object header. If it is not the same, get the // object header instead. - z_csg(hdr, disp_hdr, hdr_offset, obj); + z_csg(Rmark, Rbox, hdr_offset, Roop); // If the object header was the same, we're done. branch_optimized(Assembler::bcondEqual, done); - // If the object header was not the same, it is now in the hdr register. + // If the object header was not the same, it is now in the Rmark register. // => Test if it is a stack pointer into the same stack (recursive locking), i.e.: // - // 1) (hdr & markWord::lock_mask_in_place) == 0 - // 2) rsp <= hdr - // 3) hdr <= rsp + page_size + // 1) (Rmark & markWord::lock_mask_in_place) == 0 + // 2) rsp <= Rmark + // 3) Rmark <= rsp + page_size // // These 3 tests can be done by evaluating the following expression: // - // (hdr - Z_SP) & (~(page_size-1) | markWord::lock_mask_in_place) + // (Rmark - Z_SP) & (~(page_size-1) | markWord::lock_mask_in_place) // // assuming both the stack pointer and page_size have their least // significant 2 bits cleared and page_size is a power of 2 - z_sgr(hdr, Z_SP); + z_sgr(Rmark, Z_SP); load_const_optimized(Z_R0_scratch, (~(os::vm_page_size() - 1) | markWord::lock_mask_in_place)); - z_ngr(hdr, Z_R0_scratch); // AND sets CC (result eq/ne 0). + z_ngr(Rmark, Z_R0_scratch); // AND sets CC (result eq/ne 0). // For recursive locking, the result is zero. => Save it in the displaced header - // location (null in the displaced hdr location indicates recursive locking). - z_stg(hdr, Address(disp_hdr, (intptr_t) 0)); + // location (null in the displaced Rmark location indicates recursive locking). + z_stg(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); // Otherwise we don't care about the result and handle locking via runtime call. branch_optimized(Assembler::bcondNotZero, slow_case); // done @@ -144,35 +147,41 @@ void C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hd } } -void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) { - const int aligned_mask = BytesPerWord -1; +void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rbox, Label& slow_case) { const int hdr_offset = oopDesc::mark_offset_in_bytes(); - assert_different_registers(hdr, obj, disp_hdr); + + assert_different_registers(Rmark, Roop, Rbox); + NearLabel done; if (LockingMode != LM_LIGHTWEIGHT) { // Load displaced header. - z_ltg(hdr, Address(disp_hdr, (intptr_t) 0)); - // If the loaded hdr is null we had recursive locking, and we are done. + z_ltg(Rmark, Address(Rbox, BasicLock::displaced_header_offset_in_bytes())); + // If the loaded Rmark is null we had recursive locking, and we are done. z_bre(done); } // Load object. - z_lg(obj, Address(disp_hdr, BasicObjectLock::obj_offset())); - verify_oop(obj, FILE_AND_LINE); + z_lg(Roop, Address(Rbox, BasicObjectLock::obj_offset())); + verify_oop(Roop, FILE_AND_LINE); if (LockingMode == LM_LIGHTWEIGHT) { - Unimplemented(); - } else { + const Register tmp = Z_R1_scratch; + z_lg(Rmark, Address(Roop, hdr_offset)); + z_lgr(tmp, Rmark); + z_nill(tmp, markWord::monitor_value); + z_brnz(slow_case); + fast_unlock(Roop, Rmark, tmp, slow_case); + } else if (LockingMode == LM_LEGACY) { // Test if object header is pointing to the displaced header, and if so, restore // the displaced header in the object. If the object header is not pointing to // the displaced header, get the object header instead. - z_csg(disp_hdr, hdr, hdr_offset, obj); + z_csg(Rbox, Rmark, hdr_offset, Roop); // If the object header was not pointing to the displaced header, // we do unlocking via runtime call. branch_optimized(Assembler::bcondNotEqual, slow_case); - // done } + // done bind(done); } diff --git a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.hpp b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.hpp index 1ff914b7b71..7a4f76af154 100644 --- a/src/hotspot/cpu/s390/c1_MacroAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/c1_MacroAssembler_s390.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2016 SAP SE. All rights reserved. + * Copyright (c) 2016, 2023 SAP SE. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,17 +41,18 @@ void initialize_body(Register objectFields, Register len_in_bytes, Register Rzero); // locking - // hdr : Used to hold locked markWord to be CASed into obj, contents destroyed. - // obj : Must point to the object to lock, contents preserved. - // disp_hdr: Must point to the displaced header location, contents preserved. - // Returns code offset at which to add null check debug information. - void lock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case); + // Rmark : Used to hold locked markWord to be CASed into obj, contents destroyed. + // Roop : Must point to the object to lock, contents preserved. + // Rbox : Must point to the displaced header location, contents preserved. + // Z_R1_scratch : Used as temp and will be killed + void lock_object(Register Rmark, Register Roop, Register Rbox, Label& slow_case); // unlocking - // hdr : Used to hold original markWord to be CASed back into obj, contents destroyed. - // obj : Must point to the object to lock, contents preserved. - // disp_hdr: Must point to the displaced header location, contents destroyed. - void unlock_object(Register hdr, Register obj, Register lock, Label& slow_case); + // Rmark : Used to hold original markWord to be CASed back into obj, contents destroyed. + // Roop : Must point to the object to lock, contents preserved. + // Rbox : Must point to the displaced header location, contents destroyed. + // Z_R1_scratch : Used as temp and will be killed + void unlock_object(Register Rmark, Register Roop, Register Rbox, Label& slow_case); void initialize_object( Register obj, // result: Pointer to object after successful allocation. diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp index 5265aff7b53..d7c5179e27c 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.cpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp @@ -984,9 +984,10 @@ void InterpreterMacroAssembler::remove_activation(TosState state, // lock object // // Registers alive -// monitor - Address of the BasicObjectLock to be used for locking, +// monitor (Z_R10) - Address of the BasicObjectLock to be used for locking, // which must be initialized with the object to lock. -// object - Address of the object to be locked. +// object (Z_R11, Z_R2) - Address of the object to be locked. +// templateTable (monitorenter) is using Z_R2 for object void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { if (LockingMode == LM_MONITOR) { @@ -994,7 +995,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { return; } - // template code: + // template code: (for LM_LEGACY) // // markWord displaced_header = obj->mark().set_unlocked(); // monitor->lock()->set_displaced_header(displaced_header); @@ -1008,68 +1009,77 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // InterpreterRuntime::monitorenter(THREAD, monitor); // } - const Register displaced_header = Z_ARG5; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + + const Register header = Z_ARG5; const Register object_mark_addr = Z_ARG4; const Register current_header = Z_ARG5; + const Register tmp = Z_R1_scratch; - NearLabel done; - NearLabel slow_case; + NearLabel done, slow_case; - // markWord displaced_header = obj->mark().set_unlocked(); + // markWord header = obj->mark().set_unlocked(); - // Load markWord from object into displaced_header. - z_lg(displaced_header, oopDesc::mark_offset_in_bytes(), object); + // Load markWord from object into header. + z_lg(header, hdr_offset, object); if (DiagnoseSyncOnValueBasedClasses != 0) { - load_klass(Z_R1_scratch, object); - testbit(Address(Z_R1_scratch, Klass::access_flags_offset()), exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); + load_klass(tmp, object); + testbit(Address(tmp, Klass::access_flags_offset()), exact_log2(JVM_ACC_IS_VALUE_BASED_CLASS)); z_btrue(slow_case); } - // Set displaced_header to be (markWord of object | UNLOCK_VALUE). - z_oill(displaced_header, markWord::unlocked_value); + if (LockingMode == LM_LIGHTWEIGHT) { + fast_lock(object, /* mark word */ header, tmp, slow_case); + } else if (LockingMode == LM_LEGACY) { - // monitor->lock()->set_displaced_header(displaced_header); + // Set header to be (markWord of object | UNLOCK_VALUE). + // This will not change anything if it was unlocked before. + z_oill(header, markWord::unlocked_value); - // Initialize the box (Must happen before we update the object mark!). - z_stg(displaced_header, in_bytes(BasicObjectLock::lock_offset()) + - BasicLock::displaced_header_offset_in_bytes(), monitor); + // monitor->lock()->set_displaced_header(displaced_header); + const int lock_offset = in_bytes(BasicObjectLock::lock_offset()); + const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); - // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) { + // Initialize the box (Must happen before we update the object mark!). + z_stg(header, mark_offset, monitor); - // Store stack address of the BasicObjectLock (this is monitor) into object. - add2reg(object_mark_addr, oopDesc::mark_offset_in_bytes(), object); + // if (Atomic::cmpxchg(/*addr*/obj->mark_addr(), /*cmp*/displaced_header, /*ex=*/monitor) == displaced_header) { - z_csg(displaced_header, monitor, 0, object_mark_addr); - assert(current_header==displaced_header, "must be same register"); // Identified two registers from z/Architecture. + // not necessary, use offset in instruction directly. + // add2reg(object_mark_addr, hdr_offset, object); - z_bre(done); + // Store stack address of the BasicObjectLock (this is monitor) into object. + z_csg(header, monitor, hdr_offset, object); + assert(current_header == header, + "must be same register"); // Identified two registers from z/Architecture. - // } else if (THREAD->is_lock_owned((address)displaced_header)) - // // Simple recursive case. - // monitor->lock()->set_displaced_header(nullptr); + z_bre(done); - // We did not see an unlocked object so try the fast recursive case. + // } else if (THREAD->is_lock_owned((address)displaced_header)) + // // Simple recursive case. + // monitor->lock()->set_displaced_header(nullptr); - // Check if owner is self by comparing the value in the markWord of object - // (current_header) with the stack pointer. - z_sgr(current_header, Z_SP); + // We did not see an unlocked object so try the fast recursive case. - assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); + // Check if owner is self by comparing the value in the markWord of object + // (current_header) with the stack pointer. + z_sgr(current_header, Z_SP); - // The prior sequence "LGR, NGR, LTGR" can be done better - // (Z_R1 is temp and not used after here). - load_const_optimized(Z_R0, (~(os::vm_page_size()-1) | markWord::lock_mask_in_place)); - z_ngr(Z_R0, current_header); // AND sets CC (result eq/ne 0) + assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); - // If condition is true we are done and hence we can store 0 in the displaced - // header indicating it is a recursive lock and be done. - z_brne(slow_case); - z_release(); // Membar unnecessary on zarch AND because the above csg does a sync before and after. - z_stg(Z_R0/*==0!*/, in_bytes(BasicObjectLock::lock_offset()) + - BasicLock::displaced_header_offset_in_bytes(), monitor); + // The prior sequence "LGR, NGR, LTGR" can be done better + // (Z_R1 is temp and not used after here). + load_const_optimized(Z_R0, (~(os::vm_page_size() - 1) | markWord::lock_mask_in_place)); + z_ngr(Z_R0, current_header); // AND sets CC (result eq/ne 0) + + // If condition is true we are done and hence we can store 0 in the displaced + // header indicating it is a recursive lock and be done. + z_brne(slow_case); + z_release(); // Member unnecessary on zarch AND because the above csg does a sync before and after. + z_stg(Z_R0/*==0!*/, mark_offset, monitor); + } z_bru(done); - // } else { // // Slow path. // InterpreterRuntime::monitorenter(THREAD, monitor); @@ -1077,8 +1087,16 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // None of the above fast optimizations worked so we have to get into the // slow case of monitor enter. bind(slow_case); - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor); - + if (LockingMode == LM_LIGHTWEIGHT) { + // for fast locking we need to use monitorenter_obj, see interpreterRuntime.cpp + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter_obj), + object); + } else { + call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), + monitor); + } // } bind(done); @@ -1099,7 +1117,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, Register object) } // else { - // template code: + // template code: (for LM_LEGACY): // // if ((displaced_header = monitor->displaced_header()) == nullptr) { // // Recursive unlock. Mark the monitor unlocked by setting the object field to null. @@ -1112,10 +1130,12 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, Register object) // InterpreterRuntime::monitorexit(monitor); // } - const Register displaced_header = Z_ARG4; - const Register current_header = Z_R1; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + + const Register header = Z_ARG4; + const Register current_header = Z_R1_scratch; Address obj_entry(monitor, BasicObjectLock::obj_offset()); - Label done; + Label done, slow_case; if (object == noreg) { // In the template interpreter, we must assure that the object @@ -1125,35 +1145,63 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, Register object) z_lg(object, obj_entry); } - assert_different_registers(monitor, object, displaced_header, current_header); + assert_different_registers(monitor, object, header, current_header); // if ((displaced_header = monitor->displaced_header()) == nullptr) { // // Recursive unlock. Mark the monitor unlocked by setting the object field to null. // monitor->set_obj(nullptr); - clear_mem(obj_entry, sizeof(oop)); + // monitor->lock()->set_displaced_header(displaced_header); + const int lock_offset = in_bytes(BasicObjectLock::lock_offset()); + const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); - // Test first if we are in the fast recursive case. - MacroAssembler::load_and_test_long(displaced_header, - Address(monitor, in_bytes(BasicObjectLock::lock_offset()) + - BasicLock::displaced_header_offset_in_bytes())); - z_bre(done); // displaced_header == 0 -> goto done + clear_mem(obj_entry, sizeof(oop)); + if (LockingMode != LM_LIGHTWEIGHT) { + // Test first if we are in the fast recursive case. + MacroAssembler::load_and_test_long(header, Address(monitor, mark_offset)); + z_bre(done); // header == 0 -> goto done + } // } else if (Atomic::cmpxchg(obj->mark_addr(), monitor, displaced_header) == monitor) { // // We swapped the unlocked mark in displaced_header into the object's mark word. // monitor->set_obj(nullptr); // If we still have a lightweight lock, unlock the object and be done. + if (LockingMode == LM_LIGHTWEIGHT) { + // Check for non-symmetric locking. This is allowed by the spec and the interpreter + // must handle it. - // The markword is expected to be at offset 0. - assert(oopDesc::mark_offset_in_bytes() == 0, "unlock_object: review code below"); + Register tmp = current_header; - // We have the displaced header in displaced_header. If the lock is still - // lightweight, it will contain the monitor address and we'll store the - // displaced header back into the object's mark word. - z_lgr(current_header, monitor); - z_csg(current_header, displaced_header, 0, object); - z_bre(done); + // First check for lock-stack underflow. + z_lgf(tmp, Address(Z_thread, JavaThread::lock_stack_top_offset())); + compareU32_and_branch(tmp, (unsigned)LockStack::start_offset(), Assembler::bcondNotHigh, slow_case); + + // Then check if the top of the lock-stack matches the unlocked object. + z_aghi(tmp, -oopSize); + z_lg(tmp, Address(Z_thread, tmp)); + compare64_and_branch(tmp, object, Assembler::bcondNotEqual, slow_case); + + z_lg(header, Address(object, hdr_offset)); + z_lgr(tmp, header); + z_nill(tmp, markWord::monitor_value); + z_brne(slow_case); + + fast_unlock(object, header, tmp, slow_case); + + z_bru(done); + } else { + // The markword is expected to be at offset 0. + // This is not required on s390, at least not here. + assert(hdr_offset == 0, "unlock_object: review code below"); + + // We have the displaced header in header. If the lock is still + // lightweight, it will contain the monitor address and we'll store the + // displaced header back into the object's mark word. + z_lgr(current_header, monitor); + z_csg(current_header, header, hdr_offset, object); + z_bre(done); + } // } else { // // Slow path. @@ -1161,6 +1209,7 @@ void InterpreterMacroAssembler::unlock_object(Register monitor, Register object) // The lock has been converted into a heavy lock and hence // we need to get into the slow case. + bind(slow_case); z_stg(object, obj_entry); // Restore object entry, has been cleared above. call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), monitor); @@ -2221,6 +2270,6 @@ void InterpreterMacroAssembler::pop_interpreter_frame(Register return_pc, Regist void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { if (VerifyFPU) { - unimplemented("verfiyFPU"); + unimplemented("verifyFPU"); } } diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index 08dc39cd41e..4da8037582c 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -3166,11 +3166,15 @@ void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Regis // Handle existing monitor. // The object has an existing monitor iff (mark & monitor_value) != 0. guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); - z_lr(temp, displacedHeader); + z_lgr(temp, displacedHeader); z_nill(temp, markWord::monitor_value); z_brne(object_has_monitor); - if (LockingMode != LM_MONITOR) { + if (LockingMode == LM_MONITOR) { + // Set NE to indicate 'failure' -> take slow-path + z_ltgr(oop, oop); + z_bru(done); + } else if (LockingMode == LM_LEGACY) { // Set mark to markWord | markWord::unlocked_value. z_oill(displacedHeader, markWord::unlocked_value); @@ -3187,24 +3191,24 @@ void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Regis z_csg(displacedHeader, box, 0, oop); assert(currentHeader == displacedHeader, "must be same register"); // Identified two registers from z/Architecture. z_bre(done); + + // We did not see an unlocked object so try the fast recursive case. + + z_sgr(currentHeader, Z_SP); + load_const_optimized(temp, (~(os::vm_page_size() - 1) | markWord::lock_mask_in_place)); + + z_ngr(currentHeader, temp); + // z_brne(done); + // z_release(); + z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box); + + z_bru(done); } else { - // Set NE to indicate 'failure' -> take slow-path - z_ltgr(oop, oop); + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + fast_lock(oop, displacedHeader, temp, done); z_bru(done); } - // We did not see an unlocked object so try the fast recursive case. - - z_sgr(currentHeader, Z_SP); - load_const_optimized(temp, (~(os::vm_page_size()-1) | markWord::lock_mask_in_place)); - - z_ngr(currentHeader, temp); - // z_brne(done); - // z_release(); - z_stg(currentHeader/*==0 or not 0*/, BasicLock::displaced_header_offset_in_bytes(), box); - - z_bru(done); - Register zero = temp; Register monitor_tagged = displacedHeader; // Tagged with markWord::monitor_value. bind(object_has_monitor); @@ -3215,8 +3219,10 @@ void MacroAssembler::compiler_fast_lock_object(Register oop, Register box, Regis z_lghi(zero, 0); // If m->owner is null, then csg succeeds and sets m->owner=THREAD and CR=EQ. z_csg(zero, Z_thread, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), monitor_tagged); - // Store a non-null value into the box. - z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box); + if (LockingMode != LM_LIGHTWEIGHT) { + // Store a non-null value into the box. + z_stg(box, BasicLock::displaced_header_offset_in_bytes(), box); + } #ifdef ASSERT z_brne(done); // We've acquired the monitor, check some invariants. @@ -3239,11 +3245,13 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg Register temp = temp1; Register monitor = temp2; + const int hdr_offset = oopDesc::mark_offset_in_bytes(); + Label done, object_has_monitor; BLOCK_COMMENT("compiler_fast_unlock_object {"); - if (LockingMode != LM_MONITOR) { + if (LockingMode == LM_LEGACY) { // Find the lock address and load the displaced header from the stack. // if the displaced header is zero, we have a recursive unlock. load_and_test_long(displacedHeader, Address(box, BasicLock::displaced_header_offset_in_bytes())); @@ -3252,27 +3260,41 @@ void MacroAssembler::compiler_fast_unlock_object(Register oop, Register box, Reg // Handle existing monitor. // The object has an existing monitor iff (mark & monitor_value) != 0. - z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); + z_lg(currentHeader, hdr_offset, oop); guarantee(Immediate::is_uimm16(markWord::monitor_value), "must be half-word"); + if (LockingMode == LM_LIGHTWEIGHT) { + z_lgr(temp, currentHeader); + } z_nill(currentHeader, markWord::monitor_value); z_brne(object_has_monitor); - if (LockingMode != LM_MONITOR) { + if (LockingMode == LM_MONITOR) { + // Set NE to indicate 'failure' -> take slow-path + z_ltgr(oop, oop); + z_bru(done); + } else if (LockingMode == LM_LEGACY) { // Check if it is still a light weight lock, this is true if we see // the stack address of the basicLock in the markWord of the object // copy box to currentHeader such that csg does not kill it. z_lgr(currentHeader, box); z_csg(currentHeader, displacedHeader, 0, oop); - z_bru(done); // Csg sets CR as desired. + z_bru(done); // csg sets CR as desired. } else { - // Set NE to indicate 'failure' -> take slow-path - z_ltgr(oop, oop); + assert(LockingMode == LM_LIGHTWEIGHT, "must be"); + + // don't load currentHead again from stack-top after monitor check, as it is possible + // some other thread modified it. + // currentHeader is altered, but it's contents are copied in temp as well + fast_unlock(oop, temp, currentHeader, done); z_bru(done); } + // In case of LM_LIGHTWEIGHT, we may reach here with (temp & ObjectMonitor::ANONYMOUS_OWNER) != 0. + // This is handled like owner thread mismatches: We take the slow path. + // Handle existing monitor. bind(object_has_monitor); - z_lg(currentHeader, oopDesc::mark_offset_in_bytes(), oop); // CurrentHeader is tagged with monitor_value set. + z_lg(currentHeader, hdr_offset, oop); // CurrentHeader is tagged with monitor_value set. load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); z_brne(done); load_and_test_long(temp, Address(currentHeader, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner))); @@ -5622,3 +5644,103 @@ SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value SkipIfEqual::~SkipIfEqual() { _masm->bind(_label); } + +// Implements fast-locking. +// Branches to slow upon failure to lock the object. +// Falls through upon success. +// +// - obj: the object to be locked, contents preserved. +// - hdr: the header, already loaded from obj, contents destroyed. +// Note: make sure Z_R1 is not manipulated here when C2 compiler is in play +void MacroAssembler::fast_lock(Register obj, Register hdr, Register temp, Label& slow_case) { + + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); + assert_different_registers(obj, hdr, temp); + + // First we need to check if the lock-stack has room for pushing the object reference. + z_lgf(temp, Address(Z_thread, JavaThread::lock_stack_top_offset())); + + compareU32_and_branch(temp, (unsigned)LockStack::end_offset()-1, bcondHigh, slow_case); + + // attempting a fast_lock + // Load (object->mark() | 1) into hdr + z_oill(hdr, markWord::unlocked_value); + + z_lgr(temp, hdr); + + // Clear lock-bits from hdr (locked state) + z_xilf(temp, markWord::unlocked_value); + + z_csg(hdr, temp, oopDesc::mark_offset_in_bytes(), obj); + branch_optimized(Assembler::bcondNotEqual, slow_case); + + // After successful lock, push object on lock-stack + z_lgf(temp, Address(Z_thread, JavaThread::lock_stack_top_offset())); + z_stg(obj, Address(Z_thread, temp)); + z_ahi(temp, oopSize); + z_st(temp, Address(Z_thread, JavaThread::lock_stack_top_offset())); + + // as locking was successful, set CC to EQ + z_cr(temp, temp); +} + +// Implements fast-unlocking. +// Branches to slow upon failure. +// Falls through upon success. +// +// - obj: the object to be unlocked +// - hdr: the (pre-loaded) header of the object, will be destroyed +// - Z_R1_scratch: will be killed in case of Interpreter & C1 Compiler +void MacroAssembler::fast_unlock(Register obj, Register hdr, Register tmp, Label& slow) { + + assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); + assert_different_registers(obj, hdr, tmp); + +#ifdef ASSERT + { + // Check that hdr is fast-locked. + Label hdr_ok; + z_lgr(tmp, hdr); + z_nill(tmp, markWord::lock_mask_in_place); + z_bre(hdr_ok); + stop("Header is not fast-locked"); + bind(hdr_ok); + } + { + // The following checks rely on the fact that LockStack is only ever modified by + // its owning thread, even if the lock got inflated concurrently; removal of LockStack + // entries after inflation will happen delayed in that case. + + // Check for lock-stack underflow. + Label stack_ok; + z_lgf(tmp, Address(Z_thread, JavaThread::lock_stack_top_offset())); + compareU32_and_branch(tmp, (unsigned)LockStack::start_offset(), Assembler::bcondHigh, stack_ok); + stop("Lock-stack underflow"); + bind(stack_ok); + } + { + // Check if the top of the lock-stack matches the unlocked object. + Label tos_ok; + z_aghi(tmp, -oopSize); + z_lg(tmp, Address(Z_thread, tmp)); + compare64_and_branch(tmp, obj, Assembler::bcondEqual, tos_ok); + stop("Top of lock-stack does not match the unlocked object"); + bind(tos_ok); + } +#endif // ASSERT + + z_lgr(tmp, hdr); + z_oill(tmp, markWord::unlocked_value); + z_csg(hdr, tmp, oopDesc::mark_offset_in_bytes(), obj); + branch_optimized(Assembler::bcondNotEqual, slow); + + // After successful unlock, pop object from lock-stack +#ifdef ASSERT + z_lgf(tmp, Address(Z_thread, JavaThread::lock_stack_top_offset())); + z_aghi(tmp, -oopSize); + z_agr(tmp, Z_thread); + z_xc(0, oopSize-1, tmp, 0, tmp); // wipe out lock-stack entry +#endif + z_alsi(in_bytes(JavaThread::lock_stack_top_offset()), Z_thread, -oopSize); // pop object + z_cr(tmp, tmp); // set CC to EQ +} diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp index fad35cf08b2..dd3e04c2a17 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp @@ -722,6 +722,8 @@ class MacroAssembler: public Assembler { void compiler_fast_lock_object(Register oop, Register box, Register temp1, Register temp2); void compiler_fast_unlock_object(Register oop, Register box, Register temp1, Register temp2); + void fast_lock(Register obj, Register hdr, Register tmp, Label& slow); + void fast_unlock(Register obj, Register hdr, Register tmp, Label& slow); void resolve_jobject(Register value, Register tmp1, Register tmp2); diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp index 26469e2fb3d..05b607ec03c 100644 --- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp +++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp @@ -1716,7 +1716,9 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ add2reg(r_box, lock_offset, Z_SP); // Try fastpath for locking. - // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!) + // Fast_lock kills r_temp_1, r_temp_2. + // in case of DiagnoseSyncOnValueBasedClasses content for Z_R1_scratch + // will be destroyed, So avoid using Z_R1 as temp here. __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2); __ z_bre(done); @@ -1915,7 +1917,8 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ add2reg(r_box, lock_offset, Z_SP); // Try fastpath for unlocking. - __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp. + // Fast_unlock kills r_tmp1, r_tmp2. + __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); __ z_bre(done); // Slow path for unlocking. diff --git a/src/hotspot/share/runtime/arguments.cpp b/src/hotspot/share/runtime/arguments.cpp index 2d52fa29a0f..1bdd20336ba 100644 --- a/src/hotspot/share/runtime/arguments.cpp +++ b/src/hotspot/share/runtime/arguments.cpp @@ -1903,8 +1903,7 @@ bool Arguments::check_vm_args_consistency() { } #endif - -#if !defined(X86) && !defined(AARCH64) && !defined(RISCV64) && !defined(ARM) && !defined(PPC64) +#if !defined(X86) && !defined(AARCH64) && !defined(RISCV64) && !defined(ARM) && !defined(PPC64) && !defined(S390) if (LockingMode == LM_LIGHTWEIGHT) { FLAG_SET_CMDLINE(LockingMode, LM_LEGACY); warning("New lightweight locking not supported on this platform");