6984287: Regularize how GC parallel workers are specified

Associate number of GC workers with the workgang as opposed to the task. Reviewed-by: johnc, ysr
2025-08-26 14:24:46 +02:00 · 2010-09-20 14:38:38 -07:00 · 2010-09-20 14:38:38 -07:00 · 28e56b8970
commit 28e56b8970
parent 3af63c10ab
29 changed files with 504 additions and 216 deletions
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@ -195,7 +195,7 @@ ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
           "Offset of FreeChunk::_prev within FreeChunk must match"
           "  that of OopDesc::_klass within OopDesc");
  )
-  if (ParallelGCThreads > 0) {
+  if (CollectedHeap::use_parallel_gc_threads()) {
    typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
    _par_gc_thread_states =
      NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads);
@ -616,7 +616,7 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
  }

  // Support for multi-threaded concurrent phases
-  if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) {
+  if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) {
    if (FLAG_IS_DEFAULT(ConcGCThreads)) {
      // just for now
      FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
@ -628,6 +628,8 @@ CMSCollector::CMSCollector(ConcurrentMarkSweepGeneration* cmsGen,
        warning("GC/CMS: _conc_workers allocation failure: "
              "forcing -CMSConcurrentMTEnabled");
        CMSConcurrentMTEnabled = false;
+      } else {
+        _conc_workers->initialize_workers();
      }
    } else {
      CMSConcurrentMTEnabled = false;
@ -936,7 +938,7 @@ void ConcurrentMarkSweepGeneration::reset_after_compaction() {
  // along with all the other pointers into the heap but
  // compaction is expected to be a rare event with
  // a heap using cms so don't do it without seeing the need.
-  if (ParallelGCThreads > 0) {
+  if (CollectedHeap::use_parallel_gc_threads()) {
    for (uint i = 0; i < ParallelGCThreads; i++) {
      _par_gc_thread_states[i]->promo.reset();
    }
@ -2630,7 +2632,8 @@ void CMSCollector::gc_prologue(bool full) {
  // Should call gc_prologue_work() for all cms gens we are responsible for
  bool registerClosure =    _collectorState >= Marking
                         && _collectorState < Sweeping;
-  ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar
+  ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ?
+                                               &_modUnionClosurePar
                                               : &_modUnionClosure;
  _cmsGen->gc_prologue_work(full, registerClosure, muc);
  _permGen->gc_prologue_work(full, registerClosure, muc);
@ -2731,7 +2734,7 @@ void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) {
  collector()->gc_epilogue(full);

  // Also reset promotion tracking in par gc thread states.
-  if (ParallelGCThreads > 0) {
+  if (CollectedHeap::use_parallel_gc_threads()) {
    for (uint i = 0; i < ParallelGCThreads; i++) {
      _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
    }
@ -3731,7 +3734,6 @@ class CMSConcMarkingTerminator: public ParallelTaskTerminator {
 // MT Concurrent Marking Task
 class CMSConcMarkingTask: public YieldingFlexibleGangTask {
  CMSCollector* _collector;
-  YieldingFlexibleWorkGang* _workers;        // the whole gang
  int           _n_workers;                  // requested/desired # workers
  bool          _asynch;
  bool          _result;
@ -3751,21 +3753,19 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
  CMSConcMarkingTask(CMSCollector* collector,
                 CompactibleFreeListSpace* cms_space,
                 CompactibleFreeListSpace* perm_space,
-                 bool asynch, int n_workers,
+                 bool asynch,
                 YieldingFlexibleWorkGang* workers,
                 OopTaskQueueSet* task_queues):
    YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
    _collector(collector),
    _cms_space(cms_space),
    _perm_space(perm_space),
-    _asynch(asynch), _n_workers(n_workers), _result(true),
-    _workers(workers), _task_queues(task_queues),
-    _term(n_workers, task_queues, _collector, asynch),
+    _asynch(asynch), _n_workers(0), _result(true),
+    _task_queues(task_queues),
+    _term(_n_workers, task_queues, _collector, asynch),
    _bit_map_lock(collector->bitMapLock())
  {
-    assert(n_workers <= workers->total_workers(),
-           "Else termination won't work correctly today"); // XXX FIX ME!
-    _requested_size = n_workers;
+    _requested_size = _n_workers;
    _term.set_task(this);
    assert(_cms_space->bottom() < _perm_space->bottom(),
           "Finger incorrectly initialized below");
@ -3781,6 +3781,10 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {

  CMSConcMarkingTerminator* terminator() { return &_term; }

+  virtual void set_for_termination(int active_workers) {
+    terminator()->reset_for_reuse(active_workers);
+  }
+
  void work(int i);

  virtual void coordinator_yield();  // stuff done by coordinator
@ -4220,9 +4224,12 @@ bool CMSCollector::do_marking_mt(bool asynch) {
  CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
  CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();

-  CMSConcMarkingTask tsk(this, cms_space, perm_space,
-                         asynch, num_workers /* number requested XXX */,
-                         conc_workers(), task_queues());
+  CMSConcMarkingTask tsk(this,
+                         cms_space,
+                         perm_space,
+                         asynch,
+                         conc_workers(),
+                         task_queues());

  // Since the actual number of workers we get may be different
  // from the number we requested above, do we need to do anything different
@ -4326,6 +4333,10 @@ void CMSCollector::preclean() {
  verify_overflow_empty();
  _abort_preclean = false;
  if (CMSPrecleaningEnabled) {
+    // Precleaning is currently not MT but the reference processor
+    // may be set for MT.  Disable it temporarily here.
+    ReferenceProcessor* rp = ref_processor();
+    ReferenceProcessorMTProcMutator z(rp, false);
    _eden_chunk_index = 0;
    size_t used = get_eden_used();
    size_t capacity = get_eden_capacity();
@ -4918,7 +4929,7 @@ void CMSCollector::checkpointRootsFinalWork(bool asynch,
      // dirtied since the first checkpoint in this GC cycle and prior to
      // the most recent young generation GC, minus those cleaned up by the
      // concurrent precleaning.
-      if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) {
+      if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
        TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty);
        do_remark_parallel();
      } else {
@ -5012,7 +5023,6 @@ void CMSCollector::checkpointRootsFinalWork(bool asynch,
 // Parallel remark task
 class CMSParRemarkTask: public AbstractGangTask {
  CMSCollector* _collector;
-  WorkGang*     _workers;
  int           _n_workers;
  CompactibleFreeListSpace* _cms_space;
  CompactibleFreeListSpace* _perm_space;
@ -5025,21 +5035,21 @@ class CMSParRemarkTask: public AbstractGangTask {
  CMSParRemarkTask(CMSCollector* collector,
                   CompactibleFreeListSpace* cms_space,
                   CompactibleFreeListSpace* perm_space,
-                   int n_workers, WorkGang* workers,
+                   int n_workers, FlexibleWorkGang* workers,
                   OopTaskQueueSet* task_queues):
    AbstractGangTask("Rescan roots and grey objects in parallel"),
    _collector(collector),
    _cms_space(cms_space), _perm_space(perm_space),
    _n_workers(n_workers),
-    _workers(workers),
    _task_queues(task_queues),
-    _term(workers->total_workers(), task_queues) { }
+    _term(n_workers, task_queues) { }

  OopTaskQueueSet* task_queues() { return _task_queues; }

  OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }

  ParallelTaskTerminator* terminator() { return &_term; }
+  int n_workers() { return _n_workers; }

  void work(int i);

@ -5057,6 +5067,11 @@ class CMSParRemarkTask: public AbstractGangTask {
  void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed);
 };

+// work_queue(i) is passed to the closure
+// Par_MarkRefsIntoAndScanClosure.  The "i" parameter
+// also is passed to do_dirty_card_rescan_tasks() and to
+// do_work_steal() to select the i-th task_queue.
+
 void CMSParRemarkTask::work(int i) {
  elapsedTimer _timer;
  ResourceMark rm;
@ -5128,6 +5143,7 @@ void CMSParRemarkTask::work(int i) {

  // Do the rescan tasks for each of the two spaces
  // (cms_space and perm_space) in turn.
+  // "i" is passed to select the "i-th" task_queue
  do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
  do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
  _timer.stop();
@ -5150,6 +5166,7 @@ void CMSParRemarkTask::work(int i) {
  }
 }

+// Note that parameter "i" is not used.
 void
 CMSParRemarkTask::do_young_space_rescan(int i,
  Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space,
@ -5309,8 +5326,13 @@ CMSParRemarkTask::do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl,
    size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
                                         (size_t)ParGCDesiredObjsFromOverflowList);
    // Now check if there's any work in the overflow list
+    // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
+    // only affects the number of attempts made to get work from the
+    // overflow list and does not affect the number of workers.  Just
+    // pass ParallelGCThreads so this behavior is unchanged.
    if (_collector->par_take_from_overflow_list(num_from_overflow_list,
-                                                work_q)) {
+                                                work_q,
+                                                ParallelGCThreads)) {
      // found something in global overflow list;
      // not yet ready to go stealing work from others.
      // We'd like to assert(work_q->size() != 0, ...)
@ -5367,11 +5389,12 @@ void CMSCollector::reset_survivor_plab_arrays() {
 // Merge the per-thread plab arrays into the global survivor chunk
 // array which will provide the partitioning of the survivor space
 // for CMS rescan.
-void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
+void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
+                                              int no_of_gc_threads) {
  assert(_survivor_plab_array  != NULL, "Error");
  assert(_survivor_chunk_array != NULL, "Error");
  assert(_collectorState == FinalMarking, "Error");
-  for (uint j = 0; j < ParallelGCThreads; j++) {
+  for (int j = 0; j < no_of_gc_threads; j++) {
    _cursor[j] = 0;
  }
  HeapWord* top = surv->top();
@ -5379,7 +5402,7 @@ void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
  for (i = 0; i < _survivor_chunk_capacity; i++) {  // all sca entries
    HeapWord* min_val = top;          // Higher than any PLAB address
    uint      min_tid = 0;            // position of min_val this round
-    for (uint j = 0; j < ParallelGCThreads; j++) {
+    for (int j = 0; j < no_of_gc_threads; j++) {
      ChunkArray* cur_sca = &_survivor_plab_array[j];
      if (_cursor[j] == cur_sca->end()) {
        continue;
@ -5413,7 +5436,7 @@ void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
  // Verify that we used up all the recorded entries
  #ifdef ASSERT
    size_t total = 0;
-    for (uint j = 0; j < ParallelGCThreads; j++) {
+    for (int j = 0; j < no_of_gc_threads; j++) {
      assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant");
      total += _cursor[j];
    }
@ -5448,13 +5471,15 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
    // Each valid entry in [0, _eden_chunk_index) represents a task.
    size_t n_tasks = _eden_chunk_index + 1;
    assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
-    pst->set_par_threads(n_threads);
+    // Sets the condition for completion of the subtask (how many threads
+    // need to finish in order to be done).
+    pst->set_n_threads(n_threads);
    pst->set_n_tasks((int)n_tasks);
  }

  // Merge the survivor plab arrays into _survivor_chunk_array
  if (_survivor_plab_array != NULL) {
-    merge_survivor_plab_arrays(dng->from());
+    merge_survivor_plab_arrays(dng->from(), n_threads);
  } else {
    assert(_survivor_chunk_index == 0, "Error");
  }
@ -5463,7 +5488,9 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
  {
    SequentialSubTasksDone* pst = dng->to()->par_seq_tasks();
    assert(!pst->valid(), "Clobbering existing data?");
-    pst->set_par_threads(n_threads);
+    // Sets the condition for completion of the subtask (how many threads
+    // need to finish in order to be done).
+    pst->set_n_threads(n_threads);
    pst->set_n_tasks(1);
    assert(pst->valid(), "Error");
  }
@ -5474,7 +5501,9 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
    assert(!pst->valid(), "Clobbering existing data?");
    size_t n_tasks = _survivor_chunk_index + 1;
    assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
-    pst->set_par_threads(n_threads);
+    // Sets the condition for completion of the subtask (how many threads
+    // need to finish in order to be done).
+    pst->set_n_threads(n_threads);
    pst->set_n_tasks((int)n_tasks);
    assert(pst->valid(), "Error");
  }
@ -5483,7 +5512,7 @@ initialize_sequential_subtasks_for_young_gen_rescan(int n_threads) {
 // Parallel version of remark
 void CMSCollector::do_remark_parallel() {
  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  WorkGang* workers = gch->workers();
+  FlexibleWorkGang* workers = gch->workers();
  assert(workers != NULL, "Need parallel worker threads.");
  int n_workers = workers->total_workers();
  CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
@ -5636,13 +5665,11 @@ void CMSCollector::do_remark_non_parallel() {
 ////////////////////////////////////////////////////////
 // Parallel Reference Processing Task Proxy Class
 ////////////////////////////////////////////////////////
-class CMSRefProcTaskProxy: public AbstractGangTask {
+class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues {
  typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
  CMSCollector*          _collector;
  CMSBitMap*             _mark_bit_map;
  const MemRegion        _span;
-  OopTaskQueueSet*       _task_queues;
-  ParallelTaskTerminator _term;
  ProcessTask&           _task;

 public:
@ -5650,24 +5677,21 @@ public:
                      CMSCollector*    collector,
                      const MemRegion& span,
                      CMSBitMap*       mark_bit_map,
-                      int              total_workers,
+                      AbstractWorkGang* workers,
                      OopTaskQueueSet* task_queues):
-    AbstractGangTask("Process referents by policy in parallel"),
+    AbstractGangTaskWOopQueues("Process referents by policy in parallel",
+      task_queues),
    _task(task),
-    _collector(collector), _span(span), _mark_bit_map(mark_bit_map),
-    _task_queues(task_queues),
-    _term(total_workers, task_queues)
+    _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
    {
      assert(_collector->_span.equals(_span) && !_span.is_empty(),
             "Inconsistency in _span");
    }

-  OopTaskQueueSet* task_queues() { return _task_queues; }
+  OopTaskQueueSet* task_queues() { return queues(); }

  OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }

-  ParallelTaskTerminator* terminator() { return &_term; }
-
  void do_work_steal(int i,
                     CMSParDrainMarkingStackClosure* drain,
                     CMSParKeepAliveClosure* keep_alive,
@ -5739,8 +5763,13 @@ void CMSRefProcTaskProxy::do_work_steal(int i,
    size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
                                         (size_t)ParGCDesiredObjsFromOverflowList);
    // Now check if there's any work in the overflow list
+    // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
+    // only affects the number of attempts made to get work from the
+    // overflow list and does not affect the number of workers.  Just
+    // pass ParallelGCThreads so this behavior is unchanged.
    if (_collector->par_take_from_overflow_list(num_from_overflow_list,
-                                                work_q)) {
+                                                work_q,
+                                                ParallelGCThreads)) {
      // Found something in global overflow list;
      // not yet ready to go stealing work from others.
      // We'd like to assert(work_q->size() != 0, ...)
@ -5773,13 +5802,12 @@ void CMSRefProcTaskProxy::do_work_steal(int i,
 void CMSRefProcTaskExecutor::execute(ProcessTask& task)
 {
  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  WorkGang* workers = gch->workers();
+  FlexibleWorkGang* workers = gch->workers();
  assert(workers != NULL, "Need parallel worker threads.");
-  int n_workers = workers->total_workers();
  CMSRefProcTaskProxy rp_task(task, &_collector,
                              _collector.ref_processor()->span(),
                              _collector.markBitMap(),
-                              n_workers, _collector.task_queues());
+                              workers, _collector.task_queues());
  workers->run_task(&rp_task);
 }

@ -5787,7 +5815,7 @@ void CMSRefProcTaskExecutor::execute(EnqueueTask& task)
 {

  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  WorkGang* workers = gch->workers();
+  FlexibleWorkGang* workers = gch->workers();
  assert(workers != NULL, "Need parallel worker threads.");
  CMSRefEnqueueTaskProxy enq_task(task);
  workers->run_task(&enq_task);
@ -5814,6 +5842,14 @@ void CMSCollector::refProcessingWork(bool asynch, bool clear_all_soft_refs) {
  {
    TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty);
    if (rp->processing_is_mt()) {
+      // Set the degree of MT here.  If the discovery is done MT, there
+      // may have been a different number of threads doing the discovery
+      // and a different number of discovered lists may have Ref objects.
+      // That is OK as long as the Reference lists are balanced (see
+      // balance_all_queues() and balance_queues()).
+
+
+      rp->set_mt_degree(ParallelGCThreads);
      CMSRefProcTaskExecutor task_executor(*this);
      rp->process_discovered_references(&_is_alive_closure,
                                        &cmsKeepAliveClosure,
@ -5874,6 +5910,7 @@ void CMSCollector::refProcessingWork(bool asynch, bool clear_all_soft_refs) {

  rp->set_enqueuing_is_done(true);
  if (rp->processing_is_mt()) {
+    rp->balance_all_queues();
    CMSRefProcTaskExecutor task_executor(*this);
    rp->enqueue_discovered_references(&task_executor);
  } else {
@ -8708,7 +8745,8 @@ bool CMSCollector::take_from_overflow_list(size_t num, CMSMarkStack* stack) {
 // similar changes might be needed.
 // CR 6797058 has been filed to consolidate the common code.
 bool CMSCollector::par_take_from_overflow_list(size_t num,
-                                               OopTaskQueue* work_q) {
+                                               OopTaskQueue* work_q,
+                                               int no_of_gc_threads) {
  assert(work_q->size() == 0, "First empty local work queue");
  assert(num < work_q->max_elems(), "Can't bite more than we can chew");
  if (_overflow_list == NULL) {
@ -8717,7 +8755,9 @@ bool CMSCollector::par_take_from_overflow_list(size_t num,
  // Grab the entire list; we'll put back a suffix
  oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
  Thread* tid = Thread::current();
-  size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
+  // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
+  // set to ParallelGCThreads.
+  size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
  size_t sleep_time_millis = MAX2((size_t)1, num/100);
  // If the list is busy, we spin for a short while,
  // sleeping between attempts to get the list.
@ -9256,4 +9296,3 @@ TraceCMSMemoryManagerStats::TraceCMSMemoryManagerStats(): TraceMemoryManagerStat
             true /* recordGCEndTime */,
             true /* countCollection */ );
 }
-