8224661: Parallel GC: Use WorkGang (3: UpdateDensePrefixAndCompactionTask)

Reviewed-by: stefank, kbarrett, tschatzl
This commit is contained in:
Leo Korinth 2019-08-16 09:18:26 +02:00
parent 855895f61b
commit 3eba0ec9ba
5 changed files with 136 additions and 258 deletions

View file

@ -1,102 +0,0 @@
/*
* Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "aot/aotLoader.hpp"
#include "classfile/classLoaderDataGraph.hpp"
#include "classfile/systemDictionary.hpp"
#include "code/codeCache.hpp"
#include "gc/parallel/parallelScavengeHeap.hpp"
#include "gc/parallel/pcTasks.hpp"
#include "gc/parallel/psCompactionManager.inline.hpp"
#include "gc/parallel/psParallelCompact.inline.hpp"
#include "gc/shared/collectedHeap.hpp"
#include "gc/shared/gcTimer.hpp"
#include "gc/shared/gcTraceTime.inline.hpp"
#include "logging/log.hpp"
#include "memory/iterator.inline.hpp"
#include "memory/resourceArea.hpp"
#include "memory/universe.hpp"
#include "oops/objArrayKlass.inline.hpp"
#include "oops/oop.inline.hpp"
#include "prims/jvmtiExport.hpp"
#include "runtime/jniHandles.hpp"
#include "runtime/thread.hpp"
#include "runtime/vmThread.hpp"
#include "services/management.hpp"
#include "utilities/stack.inline.hpp"
//
// CompactionWithStealingTask
//
CompactionWithStealingTask::CompactionWithStealingTask(ParallelTaskTerminator* t):
_terminator(t) {}
void CompactionWithStealingTask::do_it(GCTaskManager* manager, uint which) {
assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
ParCompactionManager* cm =
ParCompactionManager::gc_thread_compaction_manager(which);
// Drain the stacks that have been preloaded with regions
// that are ready to fill.
cm->drain_region_stacks();
guarantee(cm->region_stack()->is_empty(), "Not empty");
size_t region_index = 0;
while(true) {
if (ParCompactionManager::steal(which, region_index)) {
PSParallelCompact::fill_and_update_region(cm, region_index);
cm->drain_region_stacks();
} else {
if (terminator()->offer_termination()) {
break;
}
// Go around again.
}
}
return;
}
UpdateDensePrefixTask::UpdateDensePrefixTask(
PSParallelCompact::SpaceId space_id,
size_t region_index_start,
size_t region_index_end) :
_space_id(space_id), _region_index_start(region_index_start),
_region_index_end(region_index_end) {}
void UpdateDensePrefixTask::do_it(GCTaskManager* manager, uint which) {
ParCompactionManager* cm =
ParCompactionManager::gc_thread_compaction_manager(which);
PSParallelCompact::update_and_deadwood_in_dense_prefix(cm,
_space_id,
_region_index_start,
_region_index_end);
}

View file

@ -1,109 +0,0 @@
/*
* Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_GC_PARALLEL_PCTASKS_HPP
#define SHARE_GC_PARALLEL_PCTASKS_HPP
#include "gc/parallel/gcTaskManager.hpp"
#include "gc/parallel/psParallelCompact.hpp"
#include "gc/parallel/psTasks.hpp"
#include "gc/shared/referenceProcessor.hpp"
// Tasks for parallel compaction of the old generation
//
// Tasks are created and enqueued on a task queue. The
// tasks for parallel old collector for marking objects
// are MarkFromRootsTask and ThreadRootsMarkingTask.
//
// MarkFromRootsTask's are created
// with a root group (e.g., jni_handles) and when the do_it()
// method of a MarkFromRootsTask is executed, it starts marking
// form it's root group.
//
// ThreadRootsMarkingTask's are created for each Java thread. When
// the do_it() method of a ThreadRootsMarkingTask is executed, it
// starts marking from the thread's roots.
//
// The enqueueing of the MarkFromRootsTask and ThreadRootsMarkingTask
// do little more than create the task and put it on a queue. The
// queue is a GCTaskQueue and threads steal tasks from this GCTaskQueue.
//
// In addition to the MarkFromRootsTask and ThreadRootsMarkingTask
// tasks there are StealMarkingTask tasks. The StealMarkingTask's
// steal a reference from the marking stack of another
// thread and transitively marks the object of the reference
// and internal references. After successfully stealing a reference
// and marking it, the StealMarkingTask drains its marking stack
// stack before attempting another steal.
//
// ThreadRootsMarkingTask
//
// This task marks from the roots of a single thread. This task
// enables marking of thread roots in parallel.
//
class ParallelTaskTerminator;
//
// CompactionWithStealingTask
//
// This task is used to distribute work to idle threads.
//
class CompactionWithStealingTask : public GCTask {
private:
ParallelTaskTerminator* const _terminator;
public:
CompactionWithStealingTask(ParallelTaskTerminator* t);
char* name() { return (char *)"steal-region-task"; }
ParallelTaskTerminator* terminator() { return _terminator; }
virtual void do_it(GCTaskManager* manager, uint which);
};
//
// UpdateDensePrefixTask
//
// This task is used to update the dense prefix
// of a space.
//
class UpdateDensePrefixTask : public GCTask {
private:
PSParallelCompact::SpaceId _space_id;
size_t _region_index_start;
size_t _region_index_end;
public:
char* name() { return (char *)"update-dense_prefix-task"; }
UpdateDensePrefixTask(PSParallelCompact::SpaceId space_id,
size_t region_index_start,
size_t region_index_end);
virtual void do_it(GCTaskManager* manager, uint which);
};
#endif // SHARE_GC_PARALLEL_PCTASKS_HPP

View file

@ -46,6 +46,7 @@ class ParCompactionManager : public CHeapObj<mtGC> {
friend class IdleGCTask; friend class IdleGCTask;
friend class PCRefProcTask; friend class PCRefProcTask;
friend class MarkFromRootsTask; friend class MarkFromRootsTask;
friend class UpdateDensePrefixAndCompactionTask;
public: public:

View file

@ -34,7 +34,6 @@
#include "gc/parallel/parallelArguments.hpp" #include "gc/parallel/parallelArguments.hpp"
#include "gc/parallel/parallelScavengeHeap.inline.hpp" #include "gc/parallel/parallelScavengeHeap.inline.hpp"
#include "gc/parallel/parMarkBitMap.inline.hpp" #include "gc/parallel/parMarkBitMap.inline.hpp"
#include "gc/parallel/pcTasks.hpp"
#include "gc/parallel/psAdaptiveSizePolicy.hpp" #include "gc/parallel/psAdaptiveSizePolicy.hpp"
#include "gc/parallel/psCompactionManager.inline.hpp" #include "gc/parallel/psCompactionManager.inline.hpp"
#include "gc/parallel/psOldGen.hpp" #include "gc/parallel/psOldGen.hpp"
@ -2409,13 +2408,12 @@ public:
} }
}; };
void PSParallelCompact::prepare_region_draining_tasks(GCTaskQueue* q, void PSParallelCompact::prepare_region_draining_tasks(uint parallel_gc_threads)
uint parallel_gc_threads)
{ {
GCTraceTime(Trace, gc, phases) tm("Drain Task Setup", &_gc_timer); GCTraceTime(Trace, gc, phases) tm("Drain Task Setup", &_gc_timer);
// Find the threads that are active // Find the threads that are active
unsigned int which = 0; uint worker_id = 0;
// Find all regions that are available (can be filled immediately) and // Find all regions that are available (can be filled immediately) and
// distribute them to the thread stacks. The iteration is done in reverse // distribute them to the thread stacks. The iteration is done in reverse
@ -2423,7 +2421,6 @@ void PSParallelCompact::prepare_region_draining_tasks(GCTaskQueue* q,
const ParallelCompactData& sd = PSParallelCompact::summary_data(); const ParallelCompactData& sd = PSParallelCompact::summary_data();
which = 0;
// id + 1 is used to test termination so unsigned can // id + 1 is used to test termination so unsigned can
// be used with an old_space_id == 0. // be used with an old_space_id == 0.
FillableRegionLogger region_logger; FillableRegionLogger region_logger;
@ -2438,12 +2435,12 @@ void PSParallelCompact::prepare_region_draining_tasks(GCTaskQueue* q,
for (size_t cur = end_region - 1; cur + 1 > beg_region; --cur) { for (size_t cur = end_region - 1; cur + 1 > beg_region; --cur) {
if (sd.region(cur)->claim_unsafe()) { if (sd.region(cur)->claim_unsafe()) {
ParCompactionManager* cm = ParCompactionManager::manager_array(which); ParCompactionManager* cm = ParCompactionManager::manager_array(worker_id);
cm->region_stack()->push(cur); cm->region_stack()->push(cur);
region_logger.handle(cur); region_logger.handle(cur);
// Assign regions to tasks in round-robin fashion. // Assign regions to tasks in round-robin fashion.
if (++which == parallel_gc_threads) { if (++worker_id == parallel_gc_threads) {
which = 0; worker_id = 0;
} }
} }
} }
@ -2451,10 +2448,40 @@ void PSParallelCompact::prepare_region_draining_tasks(GCTaskQueue* q,
} }
} }
class TaskQueue : StackObj {
volatile uint _counter;
uint _size;
uint _insert_index;
PSParallelCompact::UpdateDensePrefixTask* _backing_array;
public:
explicit TaskQueue(uint size) : _counter(0), _size(size), _insert_index(0), _backing_array(NULL) {
_backing_array = NEW_C_HEAP_ARRAY(PSParallelCompact::UpdateDensePrefixTask, _size, mtGC);
}
~TaskQueue() {
assert(_counter >= _insert_index, "not all queue elements were claimed");
FREE_C_HEAP_ARRAY(T, _backing_array);
}
void push(const PSParallelCompact::UpdateDensePrefixTask& value) {
assert(_insert_index < _size, "too small backing array");
_backing_array[_insert_index++] = value;
}
bool try_claim(PSParallelCompact::UpdateDensePrefixTask& reference) {
uint claimed = Atomic::add(1u, &_counter) - 1; // -1 is so that we start with zero
if (claimed < _insert_index) {
reference = _backing_array[claimed];
return true;
} else {
return false;
}
}
};
#define PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING 4 #define PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING 4
void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q, void PSParallelCompact::enqueue_dense_prefix_tasks(TaskQueue& task_queue,
uint parallel_gc_threads) { uint parallel_gc_threads) {
GCTraceTime(Trace, gc, phases) tm("Dense Prefix Task Setup", &_gc_timer); GCTraceTime(Trace, gc, phases) tm("Dense Prefix Task Setup", &_gc_timer);
ParallelCompactData& sd = PSParallelCompact::summary_data(); ParallelCompactData& sd = PSParallelCompact::summary_data();
@ -2517,35 +2544,22 @@ void PSParallelCompact::enqueue_dense_prefix_tasks(GCTaskQueue* q,
// region_index_end is not processed // region_index_end is not processed
size_t region_index_end = MIN2(region_index_start + regions_per_thread, size_t region_index_end = MIN2(region_index_start + regions_per_thread,
region_index_end_dense_prefix); region_index_end_dense_prefix);
q->enqueue(new UpdateDensePrefixTask(SpaceId(space_id), task_queue.push(UpdateDensePrefixTask(SpaceId(space_id),
region_index_start, region_index_start,
region_index_end)); region_index_end));
region_index_start = region_index_end; region_index_start = region_index_end;
} }
} }
// This gets any part of the dense prefix that did not // This gets any part of the dense prefix that did not
// fit evenly. // fit evenly.
if (region_index_start < region_index_end_dense_prefix) { if (region_index_start < region_index_end_dense_prefix) {
q->enqueue(new UpdateDensePrefixTask(SpaceId(space_id), task_queue.push(UpdateDensePrefixTask(SpaceId(space_id),
region_index_start, region_index_start,
region_index_end_dense_prefix)); region_index_end_dense_prefix));
} }
} }
} }
void PSParallelCompact::enqueue_region_stealing_tasks(
GCTaskQueue* q,
ParallelTaskTerminator* terminator_ptr,
uint parallel_gc_threads) {
GCTraceTime(Trace, gc, phases) tm("Steal Task Setup", &_gc_timer);
// Once a thread has drained it's stack, it should try to steal regions from
// other threads.
for (uint j = 0; j < parallel_gc_threads; j++) {
q->enqueue(new CompactionWithStealingTask(terminator_ptr));
}
}
#ifdef ASSERT #ifdef ASSERT
// Write a histogram of the number of times the block table was filled for a // Write a histogram of the number of times the block table was filled for a
// region. // region.
@ -2588,26 +2602,87 @@ void PSParallelCompact::write_block_fill_histogram()
} }
#endif // #ifdef ASSERT #endif // #ifdef ASSERT
static void compaction_with_stealing_work(ParallelTaskTerminator* terminator, uint worker_id) {
assert(ParallelScavengeHeap::heap()->is_gc_active(), "called outside gc");
ParCompactionManager* cm =
ParCompactionManager::gc_thread_compaction_manager(worker_id);
// Drain the stacks that have been preloaded with regions
// that are ready to fill.
cm->drain_region_stacks();
guarantee(cm->region_stack()->is_empty(), "Not empty");
size_t region_index = 0;
while (true) {
if (ParCompactionManager::steal(worker_id, region_index)) {
PSParallelCompact::fill_and_update_region(cm, region_index);
cm->drain_region_stacks();
} else {
if (terminator->offer_termination()) {
break;
}
// Go around again.
}
}
return;
}
class UpdateDensePrefixAndCompactionTask: public AbstractGangTask {
typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
TaskQueue& _tq;
TaskTerminator _terminator;
uint _active_workers;
public:
UpdateDensePrefixAndCompactionTask(TaskQueue& tq, uint active_workers) :
AbstractGangTask("UpdateDensePrefixAndCompactionTask"),
_tq(tq),
_terminator(active_workers, ParCompactionManager::region_array()),
_active_workers(active_workers) {
}
virtual void work(uint worker_id) {
ParCompactionManager* cm = ParCompactionManager::gc_thread_compaction_manager(worker_id);
for (PSParallelCompact::UpdateDensePrefixTask task; _tq.try_claim(task); /* empty */) {
PSParallelCompact::update_and_deadwood_in_dense_prefix(cm,
task._space_id,
task._region_index_start,
task._region_index_end);
}
// Once a thread has drained it's stack, it should try to steal regions from
// other threads.
compaction_with_stealing_work(_terminator.terminator(), worker_id);
}
};
void PSParallelCompact::compact() { void PSParallelCompact::compact() {
GCTraceTime(Info, gc, phases) tm("Compaction Phase", &_gc_timer); GCTraceTime(Info, gc, phases) tm("Compaction Phase", &_gc_timer);
ParallelScavengeHeap* heap = ParallelScavengeHeap::heap(); ParallelScavengeHeap* heap = ParallelScavengeHeap::heap();
PSOldGen* old_gen = heap->old_gen(); PSOldGen* old_gen = heap->old_gen();
old_gen->start_array()->reset(); old_gen->start_array()->reset();
uint parallel_gc_threads = heap->gc_task_manager()->workers(); uint active_gc_threads = ParallelScavengeHeap::heap()->workers().active_workers();
uint active_gc_threads = heap->gc_task_manager()->active_workers();
TaskQueueSetSuper* qset = ParCompactionManager::region_array();
TaskTerminator terminator(active_gc_threads, qset);
GCTaskQueue* q = GCTaskQueue::create(); // for [0..last_space_id)
prepare_region_draining_tasks(q, active_gc_threads); // for [0..active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)
enqueue_dense_prefix_tasks(q, active_gc_threads); // push
enqueue_region_stealing_tasks(q, terminator.terminator(), active_gc_threads); // push
//
// max push count is thus: last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1)
TaskQueue task_queue(last_space_id * (active_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING + 1));
prepare_region_draining_tasks(active_gc_threads);
enqueue_dense_prefix_tasks(task_queue, active_gc_threads);
{ {
GCTraceTime(Trace, gc, phases) tm("Par Compact", &_gc_timer); GCTraceTime(Trace, gc, phases) tm("Par Compact", &_gc_timer);
gc_task_manager()->execute_and_wait(q); UpdateDensePrefixAndCompactionTask task(task_queue, active_gc_threads);
ParallelScavengeHeap::heap()->workers().run_task(&task);
#ifdef ASSERT #ifdef ASSERT
// Verify that all regions have been processed before the deferred updates. // Verify that all regions have been processed before the deferred updates.

View file

@ -913,6 +913,8 @@ inline void ParMarkBitMapClosure::decrement_words_remaining(size_t words) {
// region that can be put on the ready list. The regions are atomically added // region that can be put on the ready list. The regions are atomically added
// and removed from the ready list. // and removed from the ready list.
class TaskQueue;
class PSParallelCompact : AllStatic { class PSParallelCompact : AllStatic {
public: public:
// Convenient access to type names. // Convenient access to type names.
@ -925,6 +927,24 @@ class PSParallelCompact : AllStatic {
from_space_id, to_space_id, last_space_id from_space_id, to_space_id, last_space_id
} SpaceId; } SpaceId;
struct UpdateDensePrefixTask : public CHeapObj<mtGC> {
SpaceId _space_id;
size_t _region_index_start;
size_t _region_index_end;
UpdateDensePrefixTask() :
_space_id(SpaceId(0)),
_region_index_start(0),
_region_index_end(0) {}
UpdateDensePrefixTask(SpaceId space_id,
size_t region_index_start,
size_t region_index_end) :
_space_id(space_id),
_region_index_start(region_index_start),
_region_index_end(region_index_end) {}
};
public: public:
// Inline closure decls // Inline closure decls
// //
@ -1050,19 +1070,12 @@ class PSParallelCompact : AllStatic {
static void compact(); static void compact();
// Add available regions to the stack and draining tasks to the task queue. // Add available regions to the stack and draining tasks to the task queue.
static void prepare_region_draining_tasks(GCTaskQueue* q, static void prepare_region_draining_tasks(uint parallel_gc_threads);
uint parallel_gc_threads);
// Add dense prefix update tasks to the task queue. // Add dense prefix update tasks to the task queue.
static void enqueue_dense_prefix_tasks(GCTaskQueue* q, static void enqueue_dense_prefix_tasks(TaskQueue& task_queue,
uint parallel_gc_threads); uint parallel_gc_threads);
// Add region stealing tasks to the task queue.
static void enqueue_region_stealing_tasks(
GCTaskQueue* q,
ParallelTaskTerminator* terminator_ptr,
uint parallel_gc_threads);
// If objects are left in eden after a collection, try to move the boundary // If objects are left in eden after a collection, try to move the boundary
// and absorb them into the old gen. Returns true if eden was emptied. // and absorb them into the old gen. Returns true if eden was emptied.
static bool absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy, static bool absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,