6984287: Regularize how GC parallel workers are specified

Associate number of GC workers with the workgang as opposed to the task.

Reviewed-by: johnc, ysr
This commit is contained in:
Jon Masamitsu 2010-09-20 14:38:38 -07:00
parent 3af63c10ab
commit 28e56b8970
29 changed files with 504 additions and 216 deletions

View file

@ -195,7 +195,7 @@ ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
"Offset of FreeChunk::_prev within FreeChunk must match"
" that of OopDesc::_klass within OopDesc");
)
if (ParallelGCThreads > 0) {
if (CollectedHeap::use_parallel_gc_threads()) {
typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
_par_gc_thread_states =
NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads);
@ -616,7 +616,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
}
// Support for multi-threaded concurrent phases
if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) {
if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) {
if (FLAG_IS_DEFAULT(ConcGCThreads)) {
// just for now
FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
@ -628,6 +628,8 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
warning("GC/CMS: _conc_workers allocation failure: "
"forcing -CMSConcurrentMTEnabled");
CMSConcurrentMTEnabled = false;
} else {
_conc_workers->initialize_workers();
}
} else {
CMSConcurrentMTEnabled = false;
@ -936,7 +938,7 @@ void ConcurrentMarkSweepGeneration::reset_after_compaction() {
// along with all the other pointers into the heap but
// compaction is expected to be a rare event with
// a heap using cms so don't do it without seeing the need.
if (ParallelGCThreads > 0) {
if (CollectedHeap::use_parallel_gc_threads()) {
for (uint i = 0; i < ParallelGCThreads; i++) {
_par_gc_thread_states[i]->promo.reset();
}
@ -2630,7 +2632,8 @@ void CMSCollector::gc_prologue(bool full) {
// Should call gc_prologue_work() for all cms gens we are responsible for
bool registerClosure = _collectorState >= Marking
&& _collectorState < Sweeping;
ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar
ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ?
&_modUnionClosurePar
: &_modUnionClosure;
_cmsGen->gc_prologue_work(full, registerClosure, muc);
_permGen->gc_prologue_work(full, registerClosure, muc);
@ -2731,7 +2734,7 @@ void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) {
collector()->gc_epilogue(full);
// Also reset promotion tracking in par gc thread states.
if (ParallelGCThreads > 0) {
if (CollectedHeap::use_parallel_gc_threads()) {
for (uint i = 0; i < ParallelGCThreads; i++) {
_par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
}
@ -3731,7 +3734,6 @@ class CMSConcMarkingTerminator: public ParallelTaskTerminator {
// MT Concurrent Marking Task
class CMSConcMarkingTask: public YieldingFlexibleGangTask {
CMSCollector* _collector;
YieldingFlexibleWorkGang* _workers; // the whole gang
int _n_workers; // requested/desired # workers
bool _asynch;
bool _result;
@ -3751,21 +3753,19 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
CMSConcMarkingTask(CMSCollector* collector,
CompactibleFreeListSpace* cms_space,
CompactibleFreeListSpace* perm_space,
bool asynch, int n_workers,
bool asynch,
YieldingFlexibleWorkGang* workers,
OopTaskQueueSet* task_queues):
YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
_collector(collector),
_cms_space(cms_space),
_perm_space(perm_space),
_asynch(asynch), _n_workers(n_workers), _result(true),
_workers(workers), _task_queues(task_queues),
_term(n_workers, task_queues, _collector, asynch),
_asynch(asynch), _n_workers(0), _result(true),
_task_queues(task_queues),
_term(_n_workers, task_queues, _collector, asynch),
_bit_map_lock(collector->bitMapLock())
{
assert(n_workers <= workers->total_workers(),
"Else termination won't work correctly today"); // XXX FIX ME!
_requested_size = n_workers;
_requested_size = _n_workers;
_term.set_task(this);
assert(_cms_space->bottom() < _perm_space->bottom(),
"Finger incorrectly initialized below");
@ -3781,6 +3781,10 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
CMSConcMarkingTerminator* terminator() { return &_term; }
virtual void set_for_termination(int active_workers) {
terminator()->reset_for_reuse(active_workers);
}
void work(int i);
virtual void coordinator_yield(); // stuff done by coordinator
@ -4220,9 +4224,12 @@ bool CMSCollector::do_marking_mt(bool asynch) {
CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace();
CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
CMSConcMarkingTask tsk(this, cms_space, perm_space,
asynch, num_workers /* number requested XXX */,
conc_workers(), task_queues());
CMSConcMarkingTask tsk(this,
cms_space,
perm_space,
asynch,
conc_workers(),
task_queues());
// Since the actual number of workers we get may be different
// from the number we requested above, do we need to do anything different
@ -4326,6 +4333,10 @@ void CMSCollector::preclean() {
verify_overflow_empty();
_abort_preclean = false;
if (CMSPrecleaningEnabled) {
// Precleaning is currently not MT but the reference processor
// may be set for MT. Disable it temporarily here.
ReferenceProcessor* rp = ref_processor();
ReferenceProcessorMTProcMutator z(rp, false);
_eden_chunk_index = 0;
size_t used = get_eden_used();
size_t capacity = get_eden_capacity();
@ -4918,7 +4929,7 @@ void CMSCollector::checkpointRootsFinalWork(bool asynch,
// dirtied since the first checkpoint in this GC cycle and prior to
// the most recent young generation GC, minus those cleaned up by the
// concurrent precleaning.
if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) {
if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty);
do_remark_parallel();
} else {
@ -5012,7 +5023,6 @@ void CMSCollector::checkpointRootsFinalWork(bool asynch,
// Parallel remark task
class CMSParRemarkTask: public AbstractGangTask {
CMSCollector* _collector;
WorkGang* _workers;
int _n_workers;
CompactibleFreeListSpace* _cms_space;
CompactibleFreeListSpace* _perm_space;
@ -5025,21 +5035,21 @@ class CMSParRemarkTask: public AbstractGangTask {
CMSParRemarkTask(CMSCollector* collector,
CompactibleFreeListSpace* cms_space,
CompactibleFreeListSpace* perm_space,
int n_workers, WorkGang* workers,
int n_workers, FlexibleWorkGang* workers,
OopTaskQueueSet* task_queues):
AbstractGangTask("Rescan roots and grey objects in parallel"),
_collector(collector),
_cms_space(cms_space), _perm_space(perm_space),
_n_workers(n_workers),
_workers(workers),
_task_queues(task_queues),
_term(workers->total_workers(), task_queues) { }
_term(n_workers, task_queues) { }
OopTaskQueueSet* task_queues() { return _task_queues; }
OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
ParallelTaskTerminator* terminator() { return &_term; }
int n_workers() { return _n_workers; }
void work(int i);
@ -5057,6 +5067,11 @@ class CMSParRemarkTask: public AbstractGangTask {
void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed);
};
// work_queue(i) is passed to the closure
// Par_MarkRefsIntoAndScanClosure. The "i" parameter
// also is passed to do_dirty_card_rescan_tasks() and to
// do_work_steal() to select the i-th task_queue.
void CMSParRemarkTask::work(int i) {
elapsedTimer _timer;
ResourceMark rm;
@ -5128,6 +5143,7 @@ void CMSParRemarkTask::work(int i) {
// Do the rescan tasks for each of the two spaces
// (cms_space and perm_space) in turn.
// "i" is passed to select the "i-th" task_queue
do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
_timer.stop();
@ -5150,6 +5166,7 @@ void CMSParRemarkTask::work(int i) {
}
}
// Note that parameter "i" is not used.
void
CMSParRemarkTask::do_young_space_rescan(int i,
Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space,
@ -5309,8 +5326,13 @@ CMSParRemarkTask::do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl,
size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
(size_t)ParGCDesiredObjsFromOverflowList);
// Now check if there's any work in the overflow list
// Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
// only affects the number of attempts made to get work from the
// overflow list and does not affect the number of workers. Just
// pass ParallelGCThreads so this behavior is unchanged.
if (_collector->par_take_from_overflow_list(num_from_overflow_list,
work_q)) {
work_q,
ParallelGCThreads)) {
// found something in global overflow list;
// not yet ready to go stealing work from others.
// We'd like to assert(work_q->size() != 0, ...)
@ -5367,11 +5389,12 @@ void CMSCollector::reset_survivor_plab_arrays() {
// Merge the per-thread plab arrays into the global survivor chunk
// array which will provide the partitioning of the survivor space
// for CMS rescan.
void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
int no_of_gc_threads) {
assert(_survivor_plab_array != NULL, "Error");
assert(_survivor_chunk_array != NULL, "Error");
assert(_collectorState == FinalMarking, "Error");
for (uint j = 0; j < ParallelGCThreads; j++) {
for (int j = 0; j < no_of_gc_threads; j++) {
_cursor[j] = 0;
}
HeapWord* top = surv->top();
@ -5379,7 +5402,7 @@ void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
for (i = 0; i < _survivor_chunk_capacity; i++) { // all sca entries
HeapWord* min_val = top; // Higher than any PLAB address
uint min_tid = 0; // position of min_val this round
for (uint j = 0; j < ParallelGCThreads; j++) {
for (int j = 0; j < no_of_gc_threads; j++) {
ChunkArray* cur_sca = &_survivor_plab_array[j];
if (_cursor[j] == cur_sca->end()) {
continue;
@ -5413,7 +5436,7 @@ void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
// Verify that we used up all the recorded entries
#ifdef ASSERT
size_t total = 0;
for (uint j = 0; j < ParallelGCThreads; j++) {
for (int j = 0; j < no_of_gc_threads; j++) {
assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant");
total += _cursor[j];
}
@ -5448,13 +5471,15 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
// Each valid entry in [0, _eden_chunk_index) represents a task.
size_t n_tasks = _eden_chunk_index + 1;
assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
pst->set_par_threads(n_threads);
// Sets the condition for completion of the subtask (how many threads
// need to finish in order to be done).
pst->set_n_threads(n_threads);
pst->set_n_tasks((int)n_tasks);
}
// Merge the survivor plab arrays into _survivor_chunk_array
if (_survivor_plab_array != NULL) {
merge_survivor_plab_arrays(dng->from());
merge_survivor_plab_arrays(dng->from(), n_threads);
} else {
assert(_survivor_chunk_index == 0, "Error");
}
@ -5463,7 +5488,9 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
{
SequentialSubTasksDone* pst = dng->to()->par_seq_tasks();
assert(!pst->valid(), "Clobbering existing data?");
pst->set_par_threads(n_threads);
// Sets the condition for completion of the subtask (how many threads
// need to finish in order to be done).
pst->set_n_threads(n_threads);
pst->set_n_tasks(1);
assert(pst->valid(), "Error");
}
@ -5474,7 +5501,9 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
assert(!pst->valid(), "Clobbering existing data?");
size_t n_tasks = _survivor_chunk_index + 1;
assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
pst->set_par_threads(n_threads);
// Sets the condition for completion of the subtask (how many threads
// need to finish in order to be done).
pst->set_n_threads(n_threads);
pst->set_n_tasks((int)n_tasks);
assert(pst->valid(), "Error");
}
@ -5483,7 +5512,7 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
// Parallel version of remark
void CMSCollector::do_remark_parallel() {
GenCollectedHeap* gch = GenCollectedHeap::heap();
WorkGang* workers = gch->workers();
FlexibleWorkGang* workers = gch->workers();
assert(workers != NULL, "Need parallel worker threads.");
int n_workers = workers->total_workers();
CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace();
@ -5636,13 +5665,11 @@ void CMSCollector::do_remark_non_parallel() {
////////////////////////////////////////////////////////
// Parallel Reference Processing Task Proxy Class
////////////////////////////////////////////////////////
class CMSRefProcTaskProxy: public AbstractGangTask {
class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues {
typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
CMSCollector* _collector;
CMSBitMap* _mark_bit_map;
const MemRegion _span;
OopTaskQueueSet* _task_queues;
ParallelTaskTerminator _term;
ProcessTask& _task;
public:
@ -5650,24 +5677,21 @@ public:
CMSCollector* collector,
const MemRegion& span,
CMSBitMap* mark_bit_map,
int total_workers,
AbstractWorkGang* workers,
OopTaskQueueSet* task_queues):
AbstractGangTask("Process referents by policy in parallel"),
AbstractGangTaskWOopQueues("Process referents by policy in parallel",
task_queues),
_task(task),
_collector(collector), _span(span), _mark_bit_map(mark_bit_map),
_task_queues(task_queues),
_term(total_workers, task_queues)
_collector(collector), _span(span), _mark_bit_map(mark_bit_map)
{
assert(_collector->_span.equals(_span) && !_span.is_empty(),
"Inconsistency in _span");
}
OopTaskQueueSet* task_queues() { return _task_queues; }
OopTaskQueueSet* task_queues() { return queues(); }
OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
ParallelTaskTerminator* terminator() { return &_term; }
void do_work_steal(int i,
CMSParDrainMarkingStackClosure* drain,
CMSParKeepAliveClosure* keep_alive,
@ -5739,8 +5763,13 @@ void CMSRefProcTaskProxy::do_work_steal(int i,
size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
(size_t)ParGCDesiredObjsFromOverflowList);
// Now check if there's any work in the overflow list
// Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
// only affects the number of attempts made to get work from the
// overflow list and does not affect the number of workers. Just
// pass ParallelGCThreads so this behavior is unchanged.
if (_collector->par_take_from_overflow_list(num_from_overflow_list,
work_q)) {
work_q,
ParallelGCThreads)) {
// Found something in global overflow list;
// not yet ready to go stealing work from others.
// We'd like to assert(work_q->size() != 0, ...)
@ -5773,13 +5802,12 @@ void CMSRefProcTaskProxy::do_work_steal(int i,
void CMSRefProcTaskExecutor::execute(ProcessTask& task)
{
GenCollectedHeap* gch = GenCollectedHeap::heap();
WorkGang* workers = gch->workers();
FlexibleWorkGang* workers = gch->workers();
assert(workers != NULL, "Need parallel worker threads.");
int n_workers = workers->total_workers();
CMSRefProcTaskProxy rp_task(task, &_collector,
_collector.ref_processor()->span(),
_collector.markBitMap(),
n_workers, _collector.task_queues());
workers, _collector.task_queues());
workers->run_task(&rp_task);
}
@ -5787,7 +5815,7 @@ void CMSRefProcTaskExecutor::execute(EnqueueTask& task)
{
GenCollectedHeap* gch = GenCollectedHeap::heap();
WorkGang* workers = gch->workers();
FlexibleWorkGang* workers = gch->workers();
assert(workers != NULL, "Need parallel worker threads.");
CMSRefEnqueueTaskProxy enq_task(task);
workers->run_task(&enq_task);
@ -5814,6 +5842,14 @@ void CMSCollector::refProcessingWork(bool asynch, bool clear_all_soft_refs) {
{
TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty);
if (rp->processing_is_mt()) {
// Set the degree of MT here. If the discovery is done MT, there
// may have been a different number of threads doing the discovery
// and a different number of discovered lists may have Ref objects.
// That is OK as long as the Reference lists are balanced (see
// balance_all_queues() and balance_queues()).
rp->set_mt_degree(ParallelGCThreads);
CMSRefProcTaskExecutor task_executor(*this);
rp->process_discovered_references(&_is_alive_closure,
&cmsKeepAliveClosure,
@ -5874,6 +5910,7 @@ void CMSCollector::refProcessingWork(bool asynch, bool clear_all_soft_refs) {
rp->set_enqueuing_is_done(true);
if (rp->processing_is_mt()) {
rp->balance_all_queues();
CMSRefProcTaskExecutor task_executor(*this);
rp->enqueue_discovered_references(&task_executor);
} else {
@ -8708,7 +8745,8 @@ bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
// similar changes might be needed.
// CR 6797058 has been filed to consolidate the common code.
bool CMSCollector::par_take_from_overflow_list(size_t num,
OopTaskQueue* work_q) {
OopTaskQueue* work_q,
int no_of_gc_threads) {
assert(work_q->size() == 0, "First empty local work queue");
assert(num < work_q->max_elems(), "Can't bite more than we can chew");
if (_overflow_list == NULL) {
@ -8717,7 +8755,9 @@ bool CMSCollector::par_take_from_overflow_list(size_t num,
// Grab the entire list; we'll put back a suffix
oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
Thread* tid = Thread::current();
size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
// Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
// set to ParallelGCThreads.
size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
size_t sleep_time_millis = MAX2((size_t)1, num/100);
// If the list is busy, we spin for a short while,
// sleeping between attempts to get the list.
@ -9256,4 +9296,3 @@ TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(): TraceMemoryManagerStat
true /* recordGCEndTime */,
true /* countCollection */ );
}