6962947: shared TaskQueue statistics

Reviewed-by: tonyp, ysr
This commit is contained in:
John Coomes 2010-07-16 21:33:21 -07:00
parent 4efdcb87d1
commit daf491a814
11 changed files with 236 additions and 137 deletions

View file

@ -664,19 +664,14 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
return; return;
} }
// XXX use a global constant instead of 64! typedef Padded<OopTaskQueue> PaddedOopTaskQueue;
typedef struct OopTaskQueuePadded {
OopTaskQueue work_queue;
char pad[64 - sizeof(OopTaskQueue)]; // prevent false sharing
} OopTaskQueuePadded;
for (i = 0; i < num_queues; i++) { for (i = 0; i < num_queues; i++) {
OopTaskQueuePadded *q_padded = new OopTaskQueuePadded(); PaddedOopTaskQueue *q = new PaddedOopTaskQueue();
if (q_padded == NULL) { if (q == NULL) {
warning("work_queue allocation failure."); warning("work_queue allocation failure.");
return; return;
} }
_task_queues->register_queue(i, &q_padded->work_queue); _task_queues->register_queue(i, q);
} }
for (i = 0; i < num_queues; i++) { for (i = 0; i < num_queues; i++) {
_task_queues->queue(i)->initialize(); _task_queues->queue(i)->initialize();

View file

@ -539,10 +539,9 @@ ParNewGeneration(ReservedSpace rs, size_t initial_byte_size, int level)
guarantee(_task_queues != NULL, "task_queues allocation failure."); guarantee(_task_queues != NULL, "task_queues allocation failure.");
for (uint i1 = 0; i1 < ParallelGCThreads; i1++) { for (uint i1 = 0; i1 < ParallelGCThreads; i1++) {
ObjToScanQueuePadded *q_padded = new ObjToScanQueuePadded(); ObjToScanQueue *q = new ObjToScanQueue();
guarantee(q_padded != NULL, "work_queue Allocation failure."); guarantee(q != NULL, "work_queue Allocation failure.");
_task_queues->register_queue(i1, q);
_task_queues->register_queue(i1, &q_padded->work_queue);
} }
for (uint i2 = 0; i2 < ParallelGCThreads; i2++) for (uint i2 = 0; i2 < ParallelGCThreads; i2++)

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -33,8 +33,8 @@ class ParEvacuateFollowersClosure;
// but they must be here to allow ParScanClosure::do_oop_work to be defined // but they must be here to allow ParScanClosure::do_oop_work to be defined
// in genOopClosures.inline.hpp. // in genOopClosures.inline.hpp.
typedef OopTaskQueue ObjToScanQueue; typedef Padded<OopTaskQueue> ObjToScanQueue;
typedef OopTaskQueueSet ObjToScanQueueSet; typedef GenericTaskQueueSet<ObjToScanQueue> ObjToScanQueueSet;
// Enable this to get push/pop/steal stats. // Enable this to get push/pop/steal stats.
const int PAR_STATS_ENABLED = 0; const int PAR_STATS_ENABLED = 0;
@ -304,12 +304,6 @@ class ParNewGeneration: public DefNewGeneration {
friend class ParEvacuateFollowersClosure; friend class ParEvacuateFollowersClosure;
private: private:
// XXX use a global constant instead of 64!
struct ObjToScanQueuePadded {
ObjToScanQueue work_queue;
char pad[64 - sizeof(ObjToScanQueue)]; // prevent false sharing
};
// The per-worker-thread work queues // The per-worker-thread work queues
ObjToScanQueueSet* _task_queues; ObjToScanQueueSet* _task_queues;

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2007, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -26,7 +26,8 @@
class ParScanThreadState; class ParScanThreadState;
class ParNewGeneration; class ParNewGeneration;
typedef OopTaskQueueSet ObjToScanQueueSet; typedef Padded<OopTaskQueue> ObjToScanQueue;
typedef GenericTaskQueueSet<ObjToScanQueue> ObjToScanQueueSet;
class ParallelTaskTerminator; class ParallelTaskTerminator;
class ParScanClosure: public OopsInGenClosure { class ParScanClosure: public OopsInGenClosure {

View file

@ -90,10 +90,7 @@ void PSPromotionManager::pre_scavenge() {
} }
void PSPromotionManager::post_scavenge() { void PSPromotionManager::post_scavenge() {
#if PS_PM_STATS TASKQUEUE_STATS_ONLY(if (PrintGCDetails && ParallelGCVerbose) print_stats());
print_stats();
#endif // PS_PM_STATS
for (uint i = 0; i < ParallelGCThreads + 1; i++) { for (uint i = 0; i < ParallelGCThreads + 1; i++) {
PSPromotionManager* manager = manager_array(i); PSPromotionManager* manager = manager_array(i);
if (UseDepthFirstScavengeOrder) { if (UseDepthFirstScavengeOrder) {
@ -105,37 +102,58 @@ void PSPromotionManager::post_scavenge() {
} }
} }
#if PS_PM_STATS #if TASKQUEUE_STATS
void
PSPromotionManager::print_taskqueue_stats(uint i) const {
const TaskQueueStats& stats = depth_first() ?
_claimed_stack_depth.stats : _claimed_stack_breadth.stats;
tty->print("%3u ", i);
stats.print();
tty->cr();
}
void void
PSPromotionManager::print_stats(uint i) { PSPromotionManager::print_local_stats(uint i) const {
tty->print_cr("---- GC Worker %2d Stats", i); #define FMT " " SIZE_FORMAT_W(10)
tty->print_cr(" total pushes %8d", _total_pushes); tty->print_cr("%3u" FMT FMT FMT FMT, i, _masked_pushes, _masked_steals,
tty->print_cr(" masked pushes %8d", _masked_pushes); _arrays_chunked, _array_chunks_processed);
tty->print_cr(" overflow pushes %8d", _overflow_pushes); #undef FMT
tty->print_cr(" max overflow length %8d", _max_overflow_length);
tty->print_cr("");
tty->print_cr(" arrays chunked %8d", _arrays_chunked);
tty->print_cr(" array chunks processed %8d", _array_chunks_processed);
tty->print_cr("");
tty->print_cr(" total steals %8d", _total_steals);
tty->print_cr(" masked steals %8d", _masked_steals);
tty->print_cr("");
} }
static const char* const pm_stats_hdr[] = {
" --------masked------- arrays array",
"thr push steal chunked chunks",
"--- ---------- ---------- ---------- ----------"
};
void void
PSPromotionManager::print_stats() { PSPromotionManager::print_stats() {
tty->print_cr("== GC Tasks Stats (%s), GC %3d", const bool df = UseDepthFirstScavengeOrder;
(UseDepthFirstScavengeOrder) ? "Depth-First" : "Breadth-First", tty->print_cr("== GC Task Stats (%s-First), GC %3d", df ? "Depth" : "Breadth",
Universe::heap()->total_collections()); Universe::heap()->total_collections());
for (uint i = 0; i < ParallelGCThreads+1; ++i) { tty->print("thr "); TaskQueueStats::print_header(1); tty->cr();
PSPromotionManager* manager = manager_array(i); tty->print("--- "); TaskQueueStats::print_header(2); tty->cr();
manager->print_stats(i); for (uint i = 0; i < ParallelGCThreads + 1; ++i) {
manager_array(i)->print_taskqueue_stats(i);
}
const uint hlines = sizeof(pm_stats_hdr) / sizeof(pm_stats_hdr[0]);
for (uint i = 0; i < hlines; ++i) tty->print_cr(pm_stats_hdr[i]);
for (uint i = 0; i < ParallelGCThreads + 1; ++i) {
manager_array(i)->print_local_stats(i);
} }
} }
#endif // PS_PM_STATS void
PSPromotionManager::reset_stats() {
TaskQueueStats& stats = depth_first() ?
claimed_stack_depth()->stats : claimed_stack_breadth()->stats;
stats.reset();
_masked_pushes = _masked_steals = 0;
_arrays_chunked = _array_chunks_processed = 0;
}
#endif // TASKQUEUE_STATS
PSPromotionManager::PSPromotionManager() { PSPromotionManager::PSPromotionManager() {
ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
@ -189,16 +207,7 @@ void PSPromotionManager::reset() {
_prefetch_queue.clear(); _prefetch_queue.clear();
#if PS_PM_STATS TASKQUEUE_STATS_ONLY(reset_stats());
_total_pushes = 0;
_masked_pushes = 0;
_overflow_pushes = 0;
_max_overflow_length = 0;
_arrays_chunked = 0;
_array_chunks_processed = 0;
_total_steals = 0;
_masked_steals = 0;
#endif // PS_PM_STATS
} }
@ -423,14 +432,9 @@ oop PSPromotionManager::copy_to_survivor_space(oop o, bool depth_first) {
new_obj->is_objArray() && new_obj->is_objArray() &&
PSChunkLargeArrays) { PSChunkLargeArrays) {
// we'll chunk it // we'll chunk it
#if PS_PM_STATS
++_arrays_chunked;
#endif // PS_PM_STATS
oop* const masked_o = mask_chunked_array_oop(o); oop* const masked_o = mask_chunked_array_oop(o);
push_depth(masked_o); push_depth(masked_o);
#if PS_PM_STATS TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
++_masked_pushes;
#endif // PS_PM_STATS
} else { } else {
// we'll just push its contents // we'll just push its contents
new_obj->push_contents(this); new_obj->push_contents(this);
@ -494,9 +498,7 @@ void PSPromotionManager::process_array_chunk(oop old) {
assert(old->is_objArray(), "invariant"); assert(old->is_objArray(), "invariant");
assert(old->is_forwarded(), "invariant"); assert(old->is_forwarded(), "invariant");
#if PS_PM_STATS TASKQUEUE_STATS_ONLY(++_array_chunks_processed);
++_array_chunks_processed;
#endif // PS_PM_STATS
oop const obj = old->forwardee(); oop const obj = old->forwardee();
@ -508,9 +510,7 @@ void PSPromotionManager::process_array_chunk(oop old) {
assert(start > 0, "invariant"); assert(start > 0, "invariant");
arrayOop(old)->set_length(start); arrayOop(old)->set_length(start);
push_depth(mask_chunked_array_oop(old)); push_depth(mask_chunked_array_oop(old));
#if PS_PM_STATS TASKQUEUE_STATS_ONLY(++_masked_pushes);
++_masked_pushes;
#endif // PS_PM_STATS
} else { } else {
// this is the final chunk for this array // this is the final chunk for this array
start = 0; start = 0;

View file

@ -42,8 +42,6 @@ class MutableSpace;
class PSOldGen; class PSOldGen;
class ParCompactionManager; class ParCompactionManager;
#define PS_PM_STATS 0
class PSPromotionManager : public CHeapObj { class PSPromotionManager : public CHeapObj {
friend class PSScavenge; friend class PSScavenge;
friend class PSRefProcTaskExecutor; friend class PSRefProcTaskExecutor;
@ -54,22 +52,18 @@ class PSPromotionManager : public CHeapObj {
static PSOldGen* _old_gen; static PSOldGen* _old_gen;
static MutableSpace* _young_space; static MutableSpace* _young_space;
#if PS_PM_STATS #if TASKQUEUE_STATS
uint _total_pushes; size_t _masked_pushes;
uint _masked_pushes; size_t _masked_steals;
size_t _arrays_chunked;
size_t _array_chunks_processed;
uint _overflow_pushes; void print_taskqueue_stats(uint i) const;
uint _max_overflow_length; void print_local_stats(uint i) const;
uint _arrays_chunked;
uint _array_chunks_processed;
uint _total_steals;
uint _masked_steals;
void print_stats(uint i);
static void print_stats(); static void print_stats();
#endif // PS_PM_STATS
void reset_stats();
#endif // TASKQUEUE_STATS
PSYoungPromotionLAB _young_lab; PSYoungPromotionLAB _young_lab;
PSOldPromotionLAB _old_lab; PSOldPromotionLAB _old_lab;
@ -143,42 +137,12 @@ class PSPromotionManager : public CHeapObj {
template <class T> void push_depth(T* p) { template <class T> void push_depth(T* p) {
assert(depth_first(), "pre-condition"); assert(depth_first(), "pre-condition");
#if PS_PM_STATS
++_total_pushes;
int stack_length = claimed_stack_depth()->overflow_stack()->length();
#endif // PS_PM_STATS
claimed_stack_depth()->push(p); claimed_stack_depth()->push(p);
#if PS_PM_STATS
if (claimed_stack_depth()->overflow_stack()->length() != stack_length) {
++_overflow_pushes;
if ((uint)stack_length + 1 > _max_overflow_length) {
_max_overflow_length = (uint)stack_length + 1;
}
}
#endif // PS_PM_STATS
} }
void push_breadth(oop o) { void push_breadth(oop o) {
assert(!depth_first(), "pre-condition"); assert(!depth_first(), "pre-condition");
#if PS_PM_STATS
++_total_pushes;
int stack_length = claimed_stack_breadth()->overflow_stack()->length();
#endif // PS_PM_STATS
claimed_stack_breadth()->push(o); claimed_stack_breadth()->push(o);
#if PS_PM_STATS
if (claimed_stack_breadth()->overflow_stack()->length() != stack_length) {
++_overflow_pushes;
if ((uint)stack_length + 1 > _max_overflow_length) {
_max_overflow_length = (uint)stack_length + 1;
}
}
#endif // PS_PM_STATS
} }
protected: protected:
@ -256,12 +220,5 @@ class PSPromotionManager : public CHeapObj {
template <class T> inline void claim_or_forward_depth(T* p); template <class T> inline void claim_or_forward_depth(T* p);
template <class T> inline void claim_or_forward_breadth(T* p); template <class T> inline void claim_or_forward_breadth(T* p);
#if PS_PM_STATS TASKQUEUE_STATS_ONLY(inline void record_steal(StarTask& p);)
void increment_steals(oop* p = NULL) {
_total_steals += 1;
if (p != NULL && is_oop_masked(p)) {
_masked_steals += 1;
}
}
#endif // PS_PM_STATS
}; };

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -124,3 +124,11 @@ inline void PSPromotionManager::process_popped_location_depth(StarTask p) {
} }
} }
} }
#if TASKQUEUE_STATS
void PSPromotionManager::record_steal(StarTask& p) {
if (is_oop_masked(p)) {
++_masked_steals;
}
}
#endif // TASKQUEUE_STATS

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -148,9 +148,7 @@ void StealTask::do_it(GCTaskManager* manager, uint which) {
while(true) { while(true) {
StarTask p; StarTask p;
if (PSPromotionManager::steal_depth(which, &random_seed, p)) { if (PSPromotionManager::steal_depth(which, &random_seed, p)) {
#if PS_PM_STATS TASKQUEUE_STATS_ONLY(pm->record_steal(p));
pm->increment_steals(p);
#endif // PS_PM_STATS
pm->process_popped_location_depth(p); pm->process_popped_location_depth(p);
pm->drain_stacks_depth(true); pm->drain_stacks_depth(true);
} else { } else {
@ -163,9 +161,6 @@ void StealTask::do_it(GCTaskManager* manager, uint which) {
while(true) { while(true) {
oop obj; oop obj;
if (PSPromotionManager::steal_breadth(which, &random_seed, obj)) { if (PSPromotionManager::steal_breadth(which, &random_seed, obj)) {
#if PS_PM_STATS
pm->increment_steals();
#endif // PS_PM_STATS
obj->copy_contents(pm); obj->copy_contents(pm);
pm->drain_stacks_breadth(true); pm->drain_stacks_breadth(true);
} else { } else {

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1997, 2009, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -345,6 +345,35 @@ inline intptr_t align_object_offset(intptr_t offset) {
return align_size_up(offset, HeapWordsPerLong); return align_size_up(offset, HeapWordsPerLong);
} }
// The expected size in bytes of a cache line, used to pad data structures.
#define DEFAULT_CACHE_LINE_SIZE 64
// Bytes needed to pad type to avoid cache-line sharing; alignment should be the
// expected cache line size (a power of two). The first addend avoids sharing
// when the start address is not a multiple of alignment; the second maintains
// alignment of starting addresses that happen to be a multiple.
#define PADDING_SIZE(type, alignment) \
((alignment) + align_size_up_(sizeof(type), alignment))
// Templates to create a subclass padded to avoid cache line sharing. These are
// effective only when applied to derived-most (leaf) classes.
// When no args are passed to the base ctor.
template <class T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
class Padded: public T {
private:
char _pad_buf_[PADDING_SIZE(T, alignment)];
};
// When either 0 or 1 args may be passed to the base ctor.
template <class T, typename Arg1T, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
class Padded01: public T {
public:
Padded01(): T() { }
Padded01(Arg1T arg1): T(arg1) { }
private:
char _pad_buf_[PADDING_SIZE(T, alignment)];
};
//---------------------------------------------------------------------------------------------------- //----------------------------------------------------------------------------------------------------
// Utility macros for compilers // Utility macros for compilers

View file

@ -31,6 +31,48 @@ uint ParallelTaskTerminator::_total_spins = 0;
uint ParallelTaskTerminator::_total_peeks = 0; uint ParallelTaskTerminator::_total_peeks = 0;
#endif #endif
#if TASKQUEUE_STATS
const char * const TaskQueueStats::_names[last_stat_id] = {
"qpush", "qpop", "qpop-s", "qattempt", "qsteal", "opush", "omax"
};
void TaskQueueStats::print_header(unsigned int line, outputStream* const stream,
unsigned int width)
{
// Use a width w: 1 <= w <= max_width
const unsigned int max_width = 40;
const unsigned int w = MAX2(MIN2(width, max_width), 1U);
if (line == 0) { // spaces equal in width to the header
const unsigned int hdr_width = w * last_stat_id + last_stat_id - 1;
stream->print("%*s", hdr_width, " ");
} else if (line == 1) { // labels
stream->print("%*s", w, _names[0]);
for (unsigned int i = 1; i < last_stat_id; ++i) {
stream->print(" %*s", w, _names[i]);
}
} else if (line == 2) { // dashed lines
char dashes[max_width + 1];
memset(dashes, '-', w);
dashes[w] = '\0';
stream->print("%s", dashes);
for (unsigned int i = 1; i < last_stat_id; ++i) {
stream->print(" %s", dashes);
}
}
}
void TaskQueueStats::print(outputStream* stream, unsigned int width) const
{
#define FMT SIZE_FORMAT_W(*)
stream->print(FMT, width, _stats[0]);
for (unsigned int i = 1; i < last_stat_id; ++i) {
stream->print(" " FMT, width, _stats[i]);
}
#undef FMT
}
#endif // TASKQUEUE_STATS
int TaskQueueSetSuper::randomParkAndMiller(int *seed0) { int TaskQueueSetSuper::randomParkAndMiller(int *seed0) {
const int a = 16807; const int a = 16807;
const int m = 2147483647; const int m = 2147483647;

View file

@ -22,6 +22,72 @@
* *
*/ */
// Simple TaskQueue stats that are collected by default in debug builds.
#if !defined(TASKQUEUE_STATS) && defined(ASSERT)
#define TASKQUEUE_STATS 1
#elif !defined(TASKQUEUE_STATS)
#define TASKQUEUE_STATS 0
#endif
#if TASKQUEUE_STATS
#define TASKQUEUE_STATS_ONLY(code) code
#else
#define TASKQUEUE_STATS_ONLY(code)
#endif // TASKQUEUE_STATS
#if TASKQUEUE_STATS
class TaskQueueStats {
public:
enum StatId {
push, // number of taskqueue pushes
pop, // number of taskqueue pops
pop_slow, // subset of taskqueue pops that were done slow-path
steal_attempt, // number of taskqueue steal attempts
steal, // number of taskqueue steals
overflow, // number of overflow pushes
overflow_max_len, // max length of overflow stack
last_stat_id
};
public:
inline TaskQueueStats() { reset(); }
inline void record_push() { ++_stats[push]; }
inline void record_pop() { ++_stats[pop]; }
inline void record_pop_slow() { record_pop(); ++_stats[pop_slow]; }
inline void record_steal(bool success);
inline void record_overflow(size_t new_length);
inline size_t get(StatId id) const { return _stats[id]; }
inline const size_t* get() const { return _stats; }
inline void reset();
static void print_header(unsigned int line, outputStream* const stream = tty,
unsigned int width = 10);
void print(outputStream* const stream = tty, unsigned int width = 10) const;
private:
size_t _stats[last_stat_id];
static const char * const _names[last_stat_id];
};
void TaskQueueStats::record_steal(bool success) {
++_stats[steal_attempt];
if (success) ++_stats[steal];
}
void TaskQueueStats::record_overflow(size_t new_len) {
++_stats[overflow];
if (new_len > _stats[overflow_max_len]) _stats[overflow_max_len] = new_len;
}
void TaskQueueStats::reset() {
memset(_stats, 0, sizeof(_stats));
}
#endif // TASKQUEUE_STATS
template <unsigned int N> template <unsigned int N>
class TaskQueueSuper: public CHeapObj { class TaskQueueSuper: public CHeapObj {
protected: protected:
@ -135,6 +201,8 @@ public:
// Total size of queue. // Total size of queue.
static const uint total_size() { return N; } static const uint total_size() { return N; }
TASKQUEUE_STATS_ONLY(TaskQueueStats stats;)
}; };
template<class E, unsigned int N = TASKQUEUE_SIZE> template<class E, unsigned int N = TASKQUEUE_SIZE>
@ -152,6 +220,7 @@ protected:
public: public:
using TaskQueueSuper<N>::max_elems; using TaskQueueSuper<N>::max_elems;
using TaskQueueSuper<N>::size; using TaskQueueSuper<N>::size;
TASKQUEUE_STATS_ONLY(using TaskQueueSuper<N>::stats;)
private: private:
// Slow paths for push, pop_local. (pop_global has no fast path.) // Slow paths for push, pop_local. (pop_global has no fast path.)
@ -224,14 +293,14 @@ bool GenericTaskQueue<E, N>::push_slow(E t, uint dirty_n_elems) {
// g++ complains if the volatile result of the assignment is unused. // g++ complains if the volatile result of the assignment is unused.
const_cast<E&>(_elems[localBot] = t); const_cast<E&>(_elems[localBot] = t);
OrderAccess::release_store(&_bottom, increment_index(localBot)); OrderAccess::release_store(&_bottom, increment_index(localBot));
TASKQUEUE_STATS_ONLY(stats.record_push());
return true; return true;
} }
return false; return false;
} }
template<class E, unsigned int N> template<class E, unsigned int N>
bool GenericTaskQueue<E, N>:: bool GenericTaskQueue<E, N>::pop_local_slow(uint localBot, Age oldAge) {
pop_local_slow(uint localBot, Age oldAge) {
// This queue was observed to contain exactly one element; either this // This queue was observed to contain exactly one element; either this
// thread will claim it, or a competing "pop_global". In either case, // thread will claim it, or a competing "pop_global". In either case,
// the queue will be logically empty afterwards. Create a new Age value // the queue will be logically empty afterwards. Create a new Age value
@ -251,6 +320,7 @@ pop_local_slow(uint localBot, Age oldAge) {
if (tempAge == oldAge) { if (tempAge == oldAge) {
// We win. // We win.
assert(dirty_size(localBot, _age.top()) != N - 1, "sanity"); assert(dirty_size(localBot, _age.top()) != N - 1, "sanity");
TASKQUEUE_STATS_ONLY(stats.record_pop_slow());
return true; return true;
} }
} }
@ -306,6 +376,8 @@ public:
typedef GrowableArray<E> overflow_t; typedef GrowableArray<E> overflow_t;
typedef GenericTaskQueue<E, N> taskqueue_t; typedef GenericTaskQueue<E, N> taskqueue_t;
TASKQUEUE_STATS_ONLY(using taskqueue_t::stats;)
OverflowTaskQueue(); OverflowTaskQueue();
~OverflowTaskQueue(); ~OverflowTaskQueue();
void initialize(); void initialize();
@ -356,6 +428,7 @@ bool OverflowTaskQueue<E, N>::push(E t)
{ {
if (!taskqueue_t::push(t)) { if (!taskqueue_t::push(t)) {
overflow_stack()->push(t); overflow_stack()->push(t);
TASKQUEUE_STATS_ONLY(stats.record_overflow(overflow_stack()->length()));
} }
return true; return true;
} }
@ -424,9 +497,13 @@ GenericTaskQueueSet<T>::queue(uint i) {
template<class T> bool template<class T> bool
GenericTaskQueueSet<T>::steal(uint queue_num, int* seed, E& t) { GenericTaskQueueSet<T>::steal(uint queue_num, int* seed, E& t) {
for (uint i = 0; i < 2 * _n; i++) for (uint i = 0; i < 2 * _n; i++) {
if (steal_best_of_2(queue_num, seed, t)) if (steal_best_of_2(queue_num, seed, t)) {
TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(true));
return true; return true;
}
}
TASKQUEUE_STATS_ONLY(queue(queue_num)->stats.record_steal(false));
return false; return false;
} }
@ -574,6 +651,7 @@ GenericTaskQueue<E, N>::push(E t) {
// g++ complains if the volatile result of the assignment is unused. // g++ complains if the volatile result of the assignment is unused.
const_cast<E&>(_elems[localBot] = t); const_cast<E&>(_elems[localBot] = t);
OrderAccess::release_store(&_bottom, increment_index(localBot)); OrderAccess::release_store(&_bottom, increment_index(localBot));
TASKQUEUE_STATS_ONLY(stats.record_push());
return true; return true;
} else { } else {
return push_slow(t, dirty_n_elems); return push_slow(t, dirty_n_elems);
@ -603,6 +681,7 @@ GenericTaskQueue<E, N>::pop_local(E& t) {
idx_t tp = _age.top(); // XXX idx_t tp = _age.top(); // XXX
if (size(localBot, tp) > 0) { if (size(localBot, tp) > 0) {
assert(dirty_size(localBot, tp) != N - 1, "sanity"); assert(dirty_size(localBot, tp) != N - 1, "sanity");
TASKQUEUE_STATS_ONLY(stats.record_pop());
return true; return true;
} else { } else {
// Otherwise, the queue contained exactly one element; we take the slow // Otherwise, the queue contained exactly one element; we take the slow