mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 15:24:43 +02:00
6821693: 64-bit TaskQueue capacity still too small
6821507: Alignment problem in GC taskqueue Reviewed-by: tonyp, apetrusenko
This commit is contained in:
parent
1121581e07
commit
3474b7fa5b
1 changed files with 99 additions and 114 deletions
|
@ -22,94 +22,90 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifdef LP64
|
|
||||||
typedef juint TAG_TYPE;
|
|
||||||
// for a taskqueue size of 4M
|
|
||||||
#define LOG_TASKQ_SIZE 22
|
|
||||||
#else
|
|
||||||
typedef jushort TAG_TYPE;
|
|
||||||
// for a taskqueue size of 16K
|
|
||||||
#define LOG_TASKQ_SIZE 14
|
|
||||||
#endif
|
|
||||||
|
|
||||||
class TaskQueueSuper: public CHeapObj {
|
class TaskQueueSuper: public CHeapObj {
|
||||||
protected:
|
protected:
|
||||||
// The first free element after the last one pushed (mod _n).
|
// Internal type for indexing the queue; also used for the tag.
|
||||||
|
typedef NOT_LP64(uint16_t) LP64_ONLY(uint32_t) idx_t;
|
||||||
|
|
||||||
|
// The first free element after the last one pushed (mod N).
|
||||||
volatile uint _bottom;
|
volatile uint _bottom;
|
||||||
|
|
||||||
// log2 of the size of the queue.
|
enum {
|
||||||
enum SomeProtectedConstants {
|
N = 1 << NOT_LP64(14) LP64_ONLY(17), // Queue size: 16K or 128K
|
||||||
Log_n = LOG_TASKQ_SIZE
|
MOD_N_MASK = N - 1 // To compute x mod N efficiently.
|
||||||
};
|
};
|
||||||
#undef LOG_TASKQ_SIZE
|
|
||||||
|
|
||||||
// Size of the queue.
|
class Age {
|
||||||
uint n() { return (1 << Log_n); }
|
public:
|
||||||
// For computing "x mod n" efficiently.
|
Age(size_t data = 0) { _data = data; }
|
||||||
uint n_mod_mask() { return n() - 1; }
|
Age(const Age& age) { _data = age._data; }
|
||||||
|
Age(idx_t top, idx_t tag) { _fields._top = top; _fields._tag = tag; }
|
||||||
|
|
||||||
struct Age {
|
Age get() const volatile { return _data; }
|
||||||
TAG_TYPE _top;
|
void set(Age age) volatile { _data = age._data; }
|
||||||
TAG_TYPE _tag;
|
|
||||||
|
|
||||||
TAG_TYPE tag() const { return _tag; }
|
idx_t top() const volatile { return _fields._top; }
|
||||||
TAG_TYPE top() const { return _top; }
|
idx_t tag() const volatile { return _fields._tag; }
|
||||||
|
|
||||||
Age() { _tag = 0; _top = 0; }
|
// Increment top; if it wraps, increment tag also.
|
||||||
|
void increment() {
|
||||||
friend bool operator ==(const Age& a1, const Age& a2) {
|
_fields._top = increment_index(_fields._top);
|
||||||
return a1.tag() == a2.tag() && a1.top() == a2.top();
|
if (_fields._top == 0) ++_fields._tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Age cmpxchg(const Age new_age, const Age old_age) volatile {
|
||||||
|
return (size_t) Atomic::cmpxchg_ptr((intptr_t)new_age._data,
|
||||||
|
(volatile intptr_t *)&_data,
|
||||||
|
(intptr_t)old_age._data);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator ==(const Age& other) const { return _data == other._data; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct fields {
|
||||||
|
idx_t _top;
|
||||||
|
idx_t _tag;
|
||||||
};
|
};
|
||||||
Age _age;
|
union {
|
||||||
// These make sure we do single atomic reads and writes.
|
size_t _data;
|
||||||
Age get_age() {
|
fields _fields;
|
||||||
uint res = *(volatile uint*)(&_age);
|
};
|
||||||
return *(Age*)(&res);
|
};
|
||||||
|
|
||||||
|
volatile Age _age;
|
||||||
|
|
||||||
|
// These both operate mod N.
|
||||||
|
static uint increment_index(uint ind) {
|
||||||
|
return (ind + 1) & MOD_N_MASK;
|
||||||
}
|
}
|
||||||
void set_age(Age a) {
|
static uint decrement_index(uint ind) {
|
||||||
*(volatile uint*)(&_age) = *(uint*)(&a);
|
return (ind - 1) & MOD_N_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
TAG_TYPE get_top() {
|
// Returns a number in the range [0..N). If the result is "N-1", it should be
|
||||||
return get_age().top();
|
// interpreted as 0.
|
||||||
}
|
|
||||||
|
|
||||||
// These both operate mod _n.
|
|
||||||
uint increment_index(uint ind) {
|
|
||||||
return (ind + 1) & n_mod_mask();
|
|
||||||
}
|
|
||||||
uint decrement_index(uint ind) {
|
|
||||||
return (ind - 1) & n_mod_mask();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns a number in the range [0.._n). If the result is "n-1", it
|
|
||||||
// should be interpreted as 0.
|
|
||||||
uint dirty_size(uint bot, uint top) {
|
uint dirty_size(uint bot, uint top) {
|
||||||
return ((int)bot - (int)top) & n_mod_mask();
|
return (bot - top) & MOD_N_MASK;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the size corresponding to the given "bot" and "top".
|
// Returns the size corresponding to the given "bot" and "top".
|
||||||
uint size(uint bot, uint top) {
|
uint size(uint bot, uint top) {
|
||||||
uint sz = dirty_size(bot, top);
|
uint sz = dirty_size(bot, top);
|
||||||
// Has the queue "wrapped", so that bottom is less than top?
|
// Has the queue "wrapped", so that bottom is less than top? There's a
|
||||||
// There's a complicated special case here. A pair of threads could
|
// complicated special case here. A pair of threads could perform pop_local
|
||||||
// perform pop_local and pop_global operations concurrently, starting
|
// and pop_global operations concurrently, starting from a state in which
|
||||||
// from a state in which _bottom == _top+1. The pop_local could
|
// _bottom == _top+1. The pop_local could succeed in decrementing _bottom,
|
||||||
// succeed in decrementing _bottom, and the pop_global in incrementing
|
// and the pop_global in incrementing _top (in which case the pop_global
|
||||||
// _top (in which case the pop_global will be awarded the contested
|
// will be awarded the contested queue element.) The resulting state must
|
||||||
// queue element.) The resulting state must be interpreted as an empty
|
// be interpreted as an empty queue. (We only need to worry about one such
|
||||||
// queue. (We only need to worry about one such event: only the queue
|
// event: only the queue owner performs pop_local's, and several concurrent
|
||||||
// owner performs pop_local's, and several concurrent threads
|
// threads attempting to perform the pop_global will all perform the same
|
||||||
// attempting to perform the pop_global will all perform the same CAS,
|
// CAS, and only one can succeed.) Any stealing thread that reads after
|
||||||
// and only one can succeed. Any stealing thread that reads after
|
// either the increment or decrement will see an empty queue, and will not
|
||||||
// either the increment or decrement will see an empty queue, and will
|
// join the competitors. The "sz == -1 || sz == N-1" state will not be
|
||||||
// not join the competitors. The "sz == -1 || sz == _n-1" state will
|
// modified by concurrent queues, so the owner thread can reset the state to
|
||||||
// not be modified by concurrent queues, so the owner thread can reset
|
// _bottom == top so subsequent pushes will be performed normally.
|
||||||
// the state to _bottom == top so subsequent pushes will be performed
|
return (sz == N - 1) ? 0 : sz;
|
||||||
// normally.
|
|
||||||
if (sz == (n()-1)) return 0;
|
|
||||||
else return sz;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
@ -122,22 +118,21 @@ public:
|
||||||
// The "careful" version admits the possibility of pop_local/pop_global
|
// The "careful" version admits the possibility of pop_local/pop_global
|
||||||
// races.
|
// races.
|
||||||
uint size() {
|
uint size() {
|
||||||
return size(_bottom, get_top());
|
return size(_bottom, _age.top());
|
||||||
}
|
}
|
||||||
|
|
||||||
uint dirty_size() {
|
uint dirty_size() {
|
||||||
return dirty_size(_bottom, get_top());
|
return dirty_size(_bottom, _age.top());
|
||||||
}
|
}
|
||||||
|
|
||||||
void set_empty() {
|
void set_empty() {
|
||||||
_bottom = 0;
|
_bottom = 0;
|
||||||
_age = Age();
|
_age.set(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Maximum number of elements allowed in the queue. This is two less
|
// Maximum number of elements allowed in the queue. This is two less
|
||||||
// than the actual queue size, for somewhat complicated reasons.
|
// than the actual queue size, for somewhat complicated reasons.
|
||||||
uint max_elems() { return n() - 2; }
|
uint max_elems() { return N - 2; }
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class E> class GenericTaskQueue: public TaskQueueSuper {
|
template<class E> class GenericTaskQueue: public TaskQueueSuper {
|
||||||
|
@ -179,12 +174,12 @@ private:
|
||||||
|
|
||||||
template<class E>
|
template<class E>
|
||||||
GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() {
|
GenericTaskQueue<E>::GenericTaskQueue():TaskQueueSuper() {
|
||||||
assert(sizeof(Age) == sizeof(int), "Depends on this.");
|
assert(sizeof(Age) == sizeof(size_t), "Depends on this.");
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class E>
|
template<class E>
|
||||||
void GenericTaskQueue<E>::initialize() {
|
void GenericTaskQueue<E>::initialize() {
|
||||||
_elems = NEW_C_HEAP_ARRAY(E, n());
|
_elems = NEW_C_HEAP_ARRAY(E, N);
|
||||||
guarantee(_elems != NULL, "Allocation failed.");
|
guarantee(_elems != NULL, "Allocation failed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -208,13 +203,13 @@ void GenericTaskQueue<E>::oops_do(OopClosure* f) {
|
||||||
|
|
||||||
template<class E>
|
template<class E>
|
||||||
bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) {
|
bool GenericTaskQueue<E>::push_slow(E t, uint dirty_n_elems) {
|
||||||
if (dirty_n_elems == n() - 1) {
|
if (dirty_n_elems == N - 1) {
|
||||||
// Actually means 0, so do the push.
|
// Actually means 0, so do the push.
|
||||||
uint localBot = _bottom;
|
uint localBot = _bottom;
|
||||||
_elems[localBot] = t;
|
_elems[localBot] = t;
|
||||||
_bottom = increment_index(localBot);
|
_bottom = increment_index(localBot);
|
||||||
return true;
|
return true;
|
||||||
} else
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -230,53 +225,45 @@ pop_local_slow(uint localBot, Age oldAge) {
|
||||||
// then have the owner thread do a pop followed by another push. Without
|
// then have the owner thread do a pop followed by another push. Without
|
||||||
// the incrementing of "tag", the pop_global's CAS could succeed,
|
// the incrementing of "tag", the pop_global's CAS could succeed,
|
||||||
// allowing it to believe it has claimed the stale element.)
|
// allowing it to believe it has claimed the stale element.)
|
||||||
Age newAge;
|
Age newAge((idx_t)localBot, oldAge.tag() + 1);
|
||||||
newAge._top = localBot;
|
|
||||||
newAge._tag = oldAge.tag() + 1;
|
|
||||||
// Perhaps a competing pop_global has already incremented "top", in which
|
// Perhaps a competing pop_global has already incremented "top", in which
|
||||||
// case it wins the element.
|
// case it wins the element.
|
||||||
if (localBot == oldAge.top()) {
|
if (localBot == oldAge.top()) {
|
||||||
Age tempAge;
|
|
||||||
// No competing pop_global has yet incremented "top"; we'll try to
|
// No competing pop_global has yet incremented "top"; we'll try to
|
||||||
// install new_age, thus claiming the element.
|
// install new_age, thus claiming the element.
|
||||||
assert(sizeof(Age) == sizeof(int), "Assumption about CAS unit.");
|
Age tempAge = _age.cmpxchg(newAge, oldAge);
|
||||||
*(uint*)&tempAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
|
|
||||||
if (tempAge == oldAge) {
|
if (tempAge == oldAge) {
|
||||||
// We win.
|
// We win.
|
||||||
assert(dirty_size(localBot, get_top()) != n() - 1,
|
assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
|
||||||
"Shouldn't be possible...");
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We fail; a completing pop_global gets the element. But the queue is
|
// We lose; a completing pop_global gets the element. But the queue is empty
|
||||||
// empty (and top is greater than bottom.) Fix this representation of
|
// and top is greater than bottom. Fix this representation of the empty queue
|
||||||
// the empty queue to become the canonical one.
|
// to become the canonical one.
|
||||||
set_age(newAge);
|
_age.set(newAge);
|
||||||
assert(dirty_size(localBot, get_top()) != n() - 1,
|
assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
|
||||||
"Shouldn't be possible...");
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class E>
|
template<class E>
|
||||||
bool GenericTaskQueue<E>::pop_global(E& t) {
|
bool GenericTaskQueue<E>::pop_global(E& t) {
|
||||||
Age newAge;
|
Age oldAge = _age.get();
|
||||||
Age oldAge = get_age();
|
|
||||||
uint localBot = _bottom;
|
uint localBot = _bottom;
|
||||||
uint n_elems = size(localBot, oldAge.top());
|
uint n_elems = size(localBot, oldAge.top());
|
||||||
if (n_elems == 0) {
|
if (n_elems == 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
t = _elems[oldAge.top()];
|
t = _elems[oldAge.top()];
|
||||||
newAge = oldAge;
|
Age newAge(oldAge);
|
||||||
newAge._top = increment_index(newAge.top());
|
newAge.increment();
|
||||||
if ( newAge._top == 0 ) newAge._tag++;
|
Age resAge = _age.cmpxchg(newAge, oldAge);
|
||||||
Age resAge;
|
|
||||||
*(uint*)&resAge = Atomic::cmpxchg(*(uint*)&newAge, (volatile uint*)&_age, *(uint*)&oldAge);
|
|
||||||
// Note that using "_bottom" here might fail, since a pop_local might
|
// Note that using "_bottom" here might fail, since a pop_local might
|
||||||
// have decremented it.
|
// have decremented it.
|
||||||
assert(dirty_size(localBot, newAge._top) != n() - 1,
|
assert(dirty_size(localBot, newAge.top()) != N - 1, "sanity");
|
||||||
"Shouldn't be possible...");
|
return resAge == oldAge;
|
||||||
return (resAge == oldAge);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class E>
|
template<class E>
|
||||||
|
@ -459,7 +446,7 @@ public:
|
||||||
return offer_termination(NULL);
|
return offer_termination(NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
// As above, but it also terminates of the should_exit_termination()
|
// As above, but it also terminates if the should_exit_termination()
|
||||||
// method of the terminator parameter returns true. If terminator is
|
// method of the terminator parameter returns true. If terminator is
|
||||||
// NULL, then it is ignored.
|
// NULL, then it is ignored.
|
||||||
bool offer_termination(TerminatorTerminator* terminator);
|
bool offer_termination(TerminatorTerminator* terminator);
|
||||||
|
@ -492,11 +479,10 @@ template<class E> inline bool GenericTaskQueue<E>::push(E t) {
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
uint localBot = _bottom;
|
uint localBot = _bottom;
|
||||||
assert((localBot >= 0) && (localBot < n()), "_bottom out of range.");
|
assert((localBot >= 0) && (localBot < N), "_bottom out of range.");
|
||||||
TAG_TYPE top = get_top();
|
idx_t top = _age.top();
|
||||||
uint dirty_n_elems = dirty_size(localBot, top);
|
uint dirty_n_elems = dirty_size(localBot, top);
|
||||||
assert((dirty_n_elems >= 0) && (dirty_n_elems < n()),
|
assert((dirty_n_elems >= 0) && (dirty_n_elems < N), "n_elems out of range.");
|
||||||
"n_elems out of range.");
|
|
||||||
if (dirty_n_elems < max_elems()) {
|
if (dirty_n_elems < max_elems()) {
|
||||||
_elems[localBot] = t;
|
_elems[localBot] = t;
|
||||||
_bottom = increment_index(localBot);
|
_bottom = increment_index(localBot);
|
||||||
|
@ -517,12 +503,12 @@ template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) {
|
||||||
return true;
|
return true;
|
||||||
#else
|
#else
|
||||||
uint localBot = _bottom;
|
uint localBot = _bottom;
|
||||||
// This value cannot be n-1. That can only occur as a result of
|
// This value cannot be N-1. That can only occur as a result of
|
||||||
// the assignment to bottom in this method. If it does, this method
|
// the assignment to bottom in this method. If it does, this method
|
||||||
// resets the size( to 0 before the next call (which is sequential,
|
// resets the size( to 0 before the next call (which is sequential,
|
||||||
// since this is pop_local.)
|
// since this is pop_local.)
|
||||||
uint dirty_n_elems = dirty_size(localBot, get_top());
|
uint dirty_n_elems = dirty_size(localBot, _age.top());
|
||||||
assert(dirty_n_elems != n() - 1, "Shouldn't be possible...");
|
assert(dirty_n_elems != N - 1, "Shouldn't be possible...");
|
||||||
if (dirty_n_elems == 0) return false;
|
if (dirty_n_elems == 0) return false;
|
||||||
localBot = decrement_index(localBot);
|
localBot = decrement_index(localBot);
|
||||||
_bottom = localBot;
|
_bottom = localBot;
|
||||||
|
@ -534,15 +520,14 @@ template<class E> inline bool GenericTaskQueue<E>::pop_local(E& t) {
|
||||||
// If there's still at least one element in the queue, based on the
|
// If there's still at least one element in the queue, based on the
|
||||||
// "_bottom" and "age" we've read, then there can be no interference with
|
// "_bottom" and "age" we've read, then there can be no interference with
|
||||||
// a "pop_global" operation, and we're done.
|
// a "pop_global" operation, and we're done.
|
||||||
TAG_TYPE tp = get_top(); // XXX
|
idx_t tp = _age.top(); // XXX
|
||||||
if (size(localBot, tp) > 0) {
|
if (size(localBot, tp) > 0) {
|
||||||
assert(dirty_size(localBot, tp) != n() - 1,
|
assert(dirty_size(localBot, tp) != N - 1, "sanity");
|
||||||
"Shouldn't be possible...");
|
|
||||||
return true;
|
return true;
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, the queue contained exactly one element; we take the slow
|
// Otherwise, the queue contained exactly one element; we take the slow
|
||||||
// path.
|
// path.
|
||||||
return pop_local_slow(localBot, get_age());
|
return pop_local_slow(localBot, _age.get());
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue