mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 07:14:30 +02:00
6988353: refactor contended sync subsystem
Reduce complexity by factoring synchronizer.cpp Reviewed-by: dholmes, never, coleenp
This commit is contained in:
parent
daa052114f
commit
22929fb78f
29 changed files with 4556 additions and 4792 deletions
|
@ -2995,8 +2995,8 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
|
|||
// crash Linux VM, see notes in os_linux.cpp.
|
||||
main_thread->create_stack_guard_pages();
|
||||
|
||||
// Initialize Java-Leve synchronization subsystem
|
||||
ObjectSynchronizer::Initialize() ;
|
||||
// Initialize Java-Level synchronization subsystem
|
||||
ObjectMonitor::Initialize() ;
|
||||
|
||||
// Initialize global modules
|
||||
jint status = init_globals();
|
||||
|
@ -3965,215 +3965,272 @@ void Threads::print_on_error(outputStream* st, Thread* current, char* buf, int b
|
|||
}
|
||||
}
|
||||
|
||||
// Internal SpinLock and Mutex
|
||||
// Based on ParkEvent
|
||||
|
||||
// Lifecycle management for TSM ParkEvents.
|
||||
// ParkEvents are type-stable (TSM).
|
||||
// In our particular implementation they happen to be immortal.
|
||||
// Ad-hoc mutual exclusion primitives: SpinLock and Mux
|
||||
//
|
||||
// We manage concurrency on the FreeList with a CAS-based
|
||||
// detach-modify-reattach idiom that avoids the ABA problems
|
||||
// that would otherwise be present in a simple CAS-based
|
||||
// push-pop implementation. (push-one and pop-all)
|
||||
// We employ SpinLocks _only for low-contention, fixed-length
|
||||
// short-duration critical sections where we're concerned
|
||||
// about native mutex_t or HotSpot Mutex:: latency.
|
||||
// The mux construct provides a spin-then-block mutual exclusion
|
||||
// mechanism.
|
||||
//
|
||||
// Caveat: Allocate() and Release() may be called from threads
|
||||
// other than the thread associated with the Event!
|
||||
// If we need to call Allocate() when running as the thread in
|
||||
// question then look for the PD calls to initialize native TLS.
|
||||
// Native TLS (Win32/Linux/Solaris) can only be initialized or
|
||||
// accessed by the associated thread.
|
||||
// See also pd_initialize().
|
||||
// Testing has shown that contention on the ListLock guarding gFreeList
|
||||
// is common. If we implement ListLock as a simple SpinLock it's common
|
||||
// for the JVM to devolve to yielding with little progress. This is true
|
||||
// despite the fact that the critical sections protected by ListLock are
|
||||
// extremely short.
|
||||
//
|
||||
// Note that we could defer associating a ParkEvent with a thread
|
||||
// until the 1st time the thread calls park(). unpark() calls to
|
||||
// an unprovisioned thread would be ignored. The first park() call
|
||||
// for a thread would allocate and associate a ParkEvent and return
|
||||
// immediately.
|
||||
// TODO-FIXME: ListLock should be of type SpinLock.
|
||||
// We should make this a 1st-class type, integrated into the lock
|
||||
// hierarchy as leaf-locks. Critically, the SpinLock structure
|
||||
// should have sufficient padding to avoid false-sharing and excessive
|
||||
// cache-coherency traffic.
|
||||
|
||||
volatile int ParkEvent::ListLock = 0 ;
|
||||
ParkEvent * volatile ParkEvent::FreeList = NULL ;
|
||||
|
||||
ParkEvent * ParkEvent::Allocate (Thread * t) {
|
||||
// In rare cases -- JVM_RawMonitor* operations -- we can find t == null.
|
||||
ParkEvent * ev ;
|
||||
typedef volatile int SpinLockT ;
|
||||
|
||||
// Start by trying to recycle an existing but unassociated
|
||||
// ParkEvent from the global free list.
|
||||
void Thread::SpinAcquire (volatile int * adr, const char * LockName) {
|
||||
if (Atomic::cmpxchg (1, adr, 0) == 0) {
|
||||
return ; // normal fast-path return
|
||||
}
|
||||
|
||||
// Slow-path : We've encountered contention -- Spin/Yield/Block strategy.
|
||||
TEVENT (SpinAcquire - ctx) ;
|
||||
int ctr = 0 ;
|
||||
int Yields = 0 ;
|
||||
for (;;) {
|
||||
ev = FreeList ;
|
||||
if (ev == NULL) break ;
|
||||
// 1: Detach - sequester or privatize the list
|
||||
// Tantamount to ev = Swap (&FreeList, NULL)
|
||||
if (Atomic::cmpxchg_ptr (NULL, &FreeList, ev) != ev) {
|
||||
continue ;
|
||||
}
|
||||
|
||||
// We've detached the list. The list in-hand is now
|
||||
// local to this thread. This thread can operate on the
|
||||
// list without risk of interference from other threads.
|
||||
// 2: Extract -- pop the 1st element from the list.
|
||||
ParkEvent * List = ev->FreeNext ;
|
||||
if (List == NULL) break ;
|
||||
for (;;) {
|
||||
// 3: Try to reattach the residual list
|
||||
guarantee (List != NULL, "invariant") ;
|
||||
ParkEvent * Arv = (ParkEvent *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
|
||||
if (Arv == NULL) break ;
|
||||
|
||||
// New nodes arrived. Try to detach the recent arrivals.
|
||||
if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
|
||||
continue ;
|
||||
while (*adr != 0) {
|
||||
++ctr ;
|
||||
if ((ctr & 0xFFF) == 0 || !os::is_MP()) {
|
||||
if (Yields > 5) {
|
||||
// Consider using a simple NakedSleep() instead.
|
||||
// Then SpinAcquire could be called by non-JVM threads
|
||||
Thread::current()->_ParkEvent->park(1) ;
|
||||
} else {
|
||||
os::NakedYield() ;
|
||||
++Yields ;
|
||||
}
|
||||
} else {
|
||||
SpinPause() ;
|
||||
}
|
||||
guarantee (Arv != NULL, "invariant") ;
|
||||
// 4: Merge Arv into List
|
||||
ParkEvent * Tail = List ;
|
||||
while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
|
||||
Tail->FreeNext = Arv ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
if (Atomic::cmpxchg (1, adr, 0) == 0) return ;
|
||||
}
|
||||
|
||||
if (ev != NULL) {
|
||||
guarantee (ev->AssociatedWith == NULL, "invariant") ;
|
||||
} else {
|
||||
// Do this the hard way -- materialize a new ParkEvent.
|
||||
// In rare cases an allocating thread might detach a long list --
|
||||
// installing null into FreeList -- and then stall or be obstructed.
|
||||
// A 2nd thread calling Allocate() would see FreeList == null.
|
||||
// The list held privately by the 1st thread is unavailable to the 2nd thread.
|
||||
// In that case the 2nd thread would have to materialize a new ParkEvent,
|
||||
// even though free ParkEvents existed in the system. In this case we end up
|
||||
// with more ParkEvents in circulation than we need, but the race is
|
||||
// rare and the outcome is benign. Ideally, the # of extant ParkEvents
|
||||
// is equal to the maximum # of threads that existed at any one time.
|
||||
// Because of the race mentioned above, segments of the freelist
|
||||
// can be transiently inaccessible. At worst we may end up with the
|
||||
// # of ParkEvents in circulation slightly above the ideal.
|
||||
// Note that if we didn't have the TSM/immortal constraint, then
|
||||
// when reattaching, above, we could trim the list.
|
||||
ev = new ParkEvent () ;
|
||||
guarantee ((intptr_t(ev) & 0xFF) == 0, "invariant") ;
|
||||
}
|
||||
ev->reset() ; // courtesy to caller
|
||||
ev->AssociatedWith = t ; // Associate ev with t
|
||||
ev->FreeNext = NULL ;
|
||||
return ev ;
|
||||
}
|
||||
|
||||
void ParkEvent::Release (ParkEvent * ev) {
|
||||
if (ev == NULL) return ;
|
||||
guarantee (ev->FreeNext == NULL , "invariant") ;
|
||||
ev->AssociatedWith = NULL ;
|
||||
void Thread::SpinRelease (volatile int * adr) {
|
||||
assert (*adr != 0, "invariant") ;
|
||||
OrderAccess::fence() ; // guarantee at least release consistency.
|
||||
// Roach-motel semantics.
|
||||
// It's safe if subsequent LDs and STs float "up" into the critical section,
|
||||
// but prior LDs and STs within the critical section can't be allowed
|
||||
// to reorder or float past the ST that releases the lock.
|
||||
*adr = 0 ;
|
||||
}
|
||||
|
||||
// muxAcquire and muxRelease:
|
||||
//
|
||||
// * muxAcquire and muxRelease support a single-word lock-word construct.
|
||||
// The LSB of the word is set IFF the lock is held.
|
||||
// The remainder of the word points to the head of a singly-linked list
|
||||
// of threads blocked on the lock.
|
||||
//
|
||||
// * The current implementation of muxAcquire-muxRelease uses its own
|
||||
// dedicated Thread._MuxEvent instance. If we're interested in
|
||||
// minimizing the peak number of extant ParkEvent instances then
|
||||
// we could eliminate _MuxEvent and "borrow" _ParkEvent as long
|
||||
// as certain invariants were satisfied. Specifically, care would need
|
||||
// to be taken with regards to consuming unpark() "permits".
|
||||
// A safe rule of thumb is that a thread would never call muxAcquire()
|
||||
// if it's enqueued (cxq, EntryList, WaitList, etc) and will subsequently
|
||||
// park(). Otherwise the _ParkEvent park() operation in muxAcquire() could
|
||||
// consume an unpark() permit intended for monitorenter, for instance.
|
||||
// One way around this would be to widen the restricted-range semaphore
|
||||
// implemented in park(). Another alternative would be to provide
|
||||
// multiple instances of the PlatformEvent() for each thread. One
|
||||
// instance would be dedicated to muxAcquire-muxRelease, for instance.
|
||||
//
|
||||
// * Usage:
|
||||
// -- Only as leaf locks
|
||||
// -- for short-term locking only as muxAcquire does not perform
|
||||
// thread state transitions.
|
||||
//
|
||||
// Alternatives:
|
||||
// * We could implement muxAcquire and muxRelease with MCS or CLH locks
|
||||
// but with parking or spin-then-park instead of pure spinning.
|
||||
// * Use Taura-Oyama-Yonenzawa locks.
|
||||
// * It's possible to construct a 1-0 lock if we encode the lockword as
|
||||
// (List,LockByte). Acquire will CAS the full lockword while Release
|
||||
// will STB 0 into the LockByte. The 1-0 scheme admits stranding, so
|
||||
// acquiring threads use timers (ParkTimed) to detect and recover from
|
||||
// the stranding window. Thread/Node structures must be aligned on 256-byte
|
||||
// boundaries by using placement-new.
|
||||
// * Augment MCS with advisory back-link fields maintained with CAS().
|
||||
// Pictorially: LockWord -> T1 <-> T2 <-> T3 <-> ... <-> Tn <-> Owner.
|
||||
// The validity of the backlinks must be ratified before we trust the value.
|
||||
// If the backlinks are invalid the exiting thread must back-track through the
|
||||
// the forward links, which are always trustworthy.
|
||||
// * Add a successor indication. The LockWord is currently encoded as
|
||||
// (List, LOCKBIT:1). We could also add a SUCCBIT or an explicit _succ variable
|
||||
// to provide the usual futile-wakeup optimization.
|
||||
// See RTStt for details.
|
||||
// * Consider schedctl.sc_nopreempt to cover the critical section.
|
||||
//
|
||||
|
||||
|
||||
typedef volatile intptr_t MutexT ; // Mux Lock-word
|
||||
enum MuxBits { LOCKBIT = 1 } ;
|
||||
|
||||
void Thread::muxAcquire (volatile intptr_t * Lock, const char * LockName) {
|
||||
intptr_t w = Atomic::cmpxchg_ptr (LOCKBIT, Lock, 0) ;
|
||||
if (w == 0) return ;
|
||||
if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
|
||||
return ;
|
||||
}
|
||||
|
||||
TEVENT (muxAcquire - Contention) ;
|
||||
ParkEvent * const Self = Thread::current()->_MuxEvent ;
|
||||
assert ((intptr_t(Self) & LOCKBIT) == 0, "invariant") ;
|
||||
for (;;) {
|
||||
// Push ev onto FreeList
|
||||
// The mechanism is "half" lock-free.
|
||||
ParkEvent * List = FreeList ;
|
||||
ev->FreeNext = List ;
|
||||
if (Atomic::cmpxchg_ptr (ev, &FreeList, List) == List) break ;
|
||||
}
|
||||
}
|
||||
int its = (os::is_MP() ? 100 : 0) + 1 ;
|
||||
|
||||
// Override operator new and delete so we can ensure that the
|
||||
// least significant byte of ParkEvent addresses is 0.
|
||||
// Beware that excessive address alignment is undesirable
|
||||
// as it can result in D$ index usage imbalance as
|
||||
// well as bank access imbalance on Niagara-like platforms,
|
||||
// although Niagara's hash function should help.
|
||||
// Optional spin phase: spin-then-park strategy
|
||||
while (--its >= 0) {
|
||||
w = *Lock ;
|
||||
if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
|
||||
return ;
|
||||
}
|
||||
}
|
||||
|
||||
void * ParkEvent::operator new (size_t sz) {
|
||||
return (void *) ((intptr_t (CHeapObj::operator new (sz + 256)) + 256) & -256) ;
|
||||
}
|
||||
|
||||
void ParkEvent::operator delete (void * a) {
|
||||
// ParkEvents are type-stable and immortal ...
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
||||
|
||||
// 6399321 As a temporary measure we copied & modified the ParkEvent::
|
||||
// allocate() and release() code for use by Parkers. The Parker:: forms
|
||||
// will eventually be removed as we consolide and shift over to ParkEvents
|
||||
// for both builtin synchronization and JSR166 operations.
|
||||
|
||||
volatile int Parker::ListLock = 0 ;
|
||||
Parker * volatile Parker::FreeList = NULL ;
|
||||
|
||||
Parker * Parker::Allocate (JavaThread * t) {
|
||||
guarantee (t != NULL, "invariant") ;
|
||||
Parker * p ;
|
||||
|
||||
// Start by trying to recycle an existing but unassociated
|
||||
// Parker from the global free list.
|
||||
for (;;) {
|
||||
p = FreeList ;
|
||||
if (p == NULL) break ;
|
||||
// 1: Detach
|
||||
// Tantamount to p = Swap (&FreeList, NULL)
|
||||
if (Atomic::cmpxchg_ptr (NULL, &FreeList, p) != p) {
|
||||
continue ;
|
||||
}
|
||||
|
||||
// We've detached the list. The list in-hand is now
|
||||
// local to this thread. This thread can operate on the
|
||||
// list without risk of interference from other threads.
|
||||
// 2: Extract -- pop the 1st element from the list.
|
||||
Parker * List = p->FreeNext ;
|
||||
if (List == NULL) break ;
|
||||
for (;;) {
|
||||
// 3: Try to reattach the residual list
|
||||
guarantee (List != NULL, "invariant") ;
|
||||
Parker * Arv = (Parker *) Atomic::cmpxchg_ptr (List, &FreeList, NULL) ;
|
||||
if (Arv == NULL) break ;
|
||||
|
||||
// New nodes arrived. Try to detach the recent arrivals.
|
||||
if (Atomic::cmpxchg_ptr (NULL, &FreeList, Arv) != Arv) {
|
||||
continue ;
|
||||
Self->reset() ;
|
||||
Self->OnList = intptr_t(Lock) ;
|
||||
// The following fence() isn't _strictly necessary as the subsequent
|
||||
// CAS() both serializes execution and ratifies the fetched *Lock value.
|
||||
OrderAccess::fence();
|
||||
for (;;) {
|
||||
w = *Lock ;
|
||||
if ((w & LOCKBIT) == 0) {
|
||||
if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
|
||||
Self->OnList = 0 ; // hygiene - allows stronger asserts
|
||||
return ;
|
||||
}
|
||||
continue ; // Interference -- *Lock changed -- Just retry
|
||||
}
|
||||
guarantee (Arv != NULL, "invariant") ;
|
||||
// 4: Merge Arv into List
|
||||
Parker * Tail = List ;
|
||||
while (Tail->FreeNext != NULL) Tail = Tail->FreeNext ;
|
||||
Tail->FreeNext = Arv ;
|
||||
}
|
||||
break ;
|
||||
}
|
||||
assert (w & LOCKBIT, "invariant") ;
|
||||
Self->ListNext = (ParkEvent *) (w & ~LOCKBIT );
|
||||
if (Atomic::cmpxchg_ptr (intptr_t(Self)|LOCKBIT, Lock, w) == w) break ;
|
||||
}
|
||||
|
||||
if (p != NULL) {
|
||||
guarantee (p->AssociatedWith == NULL, "invariant") ;
|
||||
} else {
|
||||
// Do this the hard way -- materialize a new Parker..
|
||||
// In rare cases an allocating thread might detach
|
||||
// a long list -- installing null into FreeList --and
|
||||
// then stall. Another thread calling Allocate() would see
|
||||
// FreeList == null and then invoke the ctor. In this case we
|
||||
// end up with more Parkers in circulation than we need, but
|
||||
// the race is rare and the outcome is benign.
|
||||
// Ideally, the # of extant Parkers is equal to the
|
||||
// maximum # of threads that existed at any one time.
|
||||
// Because of the race mentioned above, segments of the
|
||||
// freelist can be transiently inaccessible. At worst
|
||||
// we may end up with the # of Parkers in circulation
|
||||
// slightly above the ideal.
|
||||
p = new Parker() ;
|
||||
while (Self->OnList != 0) {
|
||||
Self->park() ;
|
||||
}
|
||||
}
|
||||
p->AssociatedWith = t ; // Associate p with t
|
||||
p->FreeNext = NULL ;
|
||||
return p ;
|
||||
}
|
||||
|
||||
void Thread::muxAcquireW (volatile intptr_t * Lock, ParkEvent * ev) {
|
||||
intptr_t w = Atomic::cmpxchg_ptr (LOCKBIT, Lock, 0) ;
|
||||
if (w == 0) return ;
|
||||
if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
|
||||
return ;
|
||||
}
|
||||
|
||||
void Parker::Release (Parker * p) {
|
||||
if (p == NULL) return ;
|
||||
guarantee (p->AssociatedWith != NULL, "invariant") ;
|
||||
guarantee (p->FreeNext == NULL , "invariant") ;
|
||||
p->AssociatedWith = NULL ;
|
||||
TEVENT (muxAcquire - Contention) ;
|
||||
ParkEvent * ReleaseAfter = NULL ;
|
||||
if (ev == NULL) {
|
||||
ev = ReleaseAfter = ParkEvent::Allocate (NULL) ;
|
||||
}
|
||||
assert ((intptr_t(ev) & LOCKBIT) == 0, "invariant") ;
|
||||
for (;;) {
|
||||
// Push p onto FreeList
|
||||
Parker * List = FreeList ;
|
||||
p->FreeNext = List ;
|
||||
if (Atomic::cmpxchg_ptr (p, &FreeList, List) == List) break ;
|
||||
guarantee (ev->OnList == 0, "invariant") ;
|
||||
int its = (os::is_MP() ? 100 : 0) + 1 ;
|
||||
|
||||
// Optional spin phase: spin-then-park strategy
|
||||
while (--its >= 0) {
|
||||
w = *Lock ;
|
||||
if ((w & LOCKBIT) == 0 && Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
|
||||
if (ReleaseAfter != NULL) {
|
||||
ParkEvent::Release (ReleaseAfter) ;
|
||||
}
|
||||
return ;
|
||||
}
|
||||
}
|
||||
|
||||
ev->reset() ;
|
||||
ev->OnList = intptr_t(Lock) ;
|
||||
// The following fence() isn't _strictly necessary as the subsequent
|
||||
// CAS() both serializes execution and ratifies the fetched *Lock value.
|
||||
OrderAccess::fence();
|
||||
for (;;) {
|
||||
w = *Lock ;
|
||||
if ((w & LOCKBIT) == 0) {
|
||||
if (Atomic::cmpxchg_ptr (w|LOCKBIT, Lock, w) == w) {
|
||||
ev->OnList = 0 ;
|
||||
// We call ::Release while holding the outer lock, thus
|
||||
// artificially lengthening the critical section.
|
||||
// Consider deferring the ::Release() until the subsequent unlock(),
|
||||
// after we've dropped the outer lock.
|
||||
if (ReleaseAfter != NULL) {
|
||||
ParkEvent::Release (ReleaseAfter) ;
|
||||
}
|
||||
return ;
|
||||
}
|
||||
continue ; // Interference -- *Lock changed -- Just retry
|
||||
}
|
||||
assert (w & LOCKBIT, "invariant") ;
|
||||
ev->ListNext = (ParkEvent *) (w & ~LOCKBIT );
|
||||
if (Atomic::cmpxchg_ptr (intptr_t(ev)|LOCKBIT, Lock, w) == w) break ;
|
||||
}
|
||||
|
||||
while (ev->OnList != 0) {
|
||||
ev->park() ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Release() must extract a successor from the list and then wake that thread.
|
||||
// It can "pop" the front of the list or use a detach-modify-reattach (DMR) scheme
|
||||
// similar to that used by ParkEvent::Allocate() and ::Release(). DMR-based
|
||||
// Release() would :
|
||||
// (A) CAS() or swap() null to *Lock, releasing the lock and detaching the list.
|
||||
// (B) Extract a successor from the private list "in-hand"
|
||||
// (C) attempt to CAS() the residual back into *Lock over null.
|
||||
// If there were any newly arrived threads and the CAS() would fail.
|
||||
// In that case Release() would detach the RATs, re-merge the list in-hand
|
||||
// with the RATs and repeat as needed. Alternately, Release() might
|
||||
// detach and extract a successor, but then pass the residual list to the wakee.
|
||||
// The wakee would be responsible for reattaching and remerging before it
|
||||
// competed for the lock.
|
||||
//
|
||||
// Both "pop" and DMR are immune from ABA corruption -- there can be
|
||||
// multiple concurrent pushers, but only one popper or detacher.
|
||||
// This implementation pops from the head of the list. This is unfair,
|
||||
// but tends to provide excellent throughput as hot threads remain hot.
|
||||
// (We wake recently run threads first).
|
||||
|
||||
void Thread::muxRelease (volatile intptr_t * Lock) {
|
||||
for (;;) {
|
||||
const intptr_t w = Atomic::cmpxchg_ptr (0, Lock, LOCKBIT) ;
|
||||
assert (w & LOCKBIT, "invariant") ;
|
||||
if (w == LOCKBIT) return ;
|
||||
ParkEvent * List = (ParkEvent *) (w & ~LOCKBIT) ;
|
||||
assert (List != NULL, "invariant") ;
|
||||
assert (List->OnList == intptr_t(Lock), "invariant") ;
|
||||
ParkEvent * nxt = List->ListNext ;
|
||||
|
||||
// The following CAS() releases the lock and pops the head element.
|
||||
if (Atomic::cmpxchg_ptr (intptr_t(nxt), Lock, w) != w) {
|
||||
continue ;
|
||||
}
|
||||
List->OnList = 0 ;
|
||||
OrderAccess::fence() ;
|
||||
List->unpark () ;
|
||||
return ;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void Threads::verify() {
|
||||
ALL_JAVA_THREADS(p) {
|
||||
p->verify();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue