mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-26 22:34:27 +02:00
6888898: CMS: ReduceInitialCardMarks unsafe in the presence of cms precleaning
6889757: G1: enable card mark elision for initializing writes from compiled code (ReduceInitialCardMarks) Defer the (compiler-elided) card-mark upon a slow-path allocation until after the store and before the next subsequent safepoint; G1 now answers yes to can_elide_tlab_write_barriers(). Reviewed-by: jcoomes, kvn, never
This commit is contained in:
parent
a67426faf8
commit
928ac69fcd
13 changed files with 209 additions and 73 deletions
|
@ -992,11 +992,39 @@ public:
|
|||
|
||||
// Can a compiler initialize a new object without store barriers?
|
||||
// This permission only extends from the creation of a new object
|
||||
// via a TLAB up to the first subsequent safepoint.
|
||||
// via a TLAB up to the first subsequent safepoint. If such permission
|
||||
// is granted for this heap type, the compiler promises to call
|
||||
// defer_store_barrier() below on any slow path allocation of
|
||||
// a new object for which such initializing store barriers will
|
||||
// have been elided. G1, like CMS, allows this, but should be
|
||||
// ready to provide a compensating write barrier as necessary
|
||||
// if that storage came out of a non-young region. The efficiency
|
||||
// of this implementation depends crucially on being able to
|
||||
// answer very efficiently in constant time whether a piece of
|
||||
// storage in the heap comes from a young region or not.
|
||||
// See ReduceInitialCardMarks.
|
||||
virtual bool can_elide_tlab_store_barriers() const {
|
||||
// Since G1's TLAB's may, on occasion, come from non-young regions
|
||||
// as well. (Is there a flag controlling that? XXX)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool is_in_young(oop obj) {
|
||||
HeapRegion* hr = heap_region_containing(obj);
|
||||
return hr != NULL && hr->is_young();
|
||||
}
|
||||
|
||||
// We don't need barriers for initializing stores to objects
|
||||
// in the young gen: for the SATB pre-barrier, there is no
|
||||
// pre-value that needs to be remembered; for the remembered-set
|
||||
// update logging post-barrier, we don't maintain remembered set
|
||||
// information for young gen objects. Note that non-generational
|
||||
// G1 does not have any "young" objects, should not elide
|
||||
// the rs logging barrier and so should always answer false below.
|
||||
// However, non-generational G1 (-XX:-G1Gen) appears to have
|
||||
// bit-rotted so was not tested below.
|
||||
virtual bool can_elide_initializing_store_barrier(oop new_obj) {
|
||||
assert(G1Gen || !is_in_young(new_obj),
|
||||
"Non-generational G1 should never return true below");
|
||||
return is_in_young(new_obj);
|
||||
}
|
||||
|
||||
// Can a compiler elide a store barrier when it writes
|
||||
|
|
|
@ -314,41 +314,6 @@ bool ParallelScavengeHeap::is_in_reserved(const void* p) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Static method
|
||||
bool ParallelScavengeHeap::is_in_young(oop* p) {
|
||||
ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
|
||||
assert(heap->kind() == CollectedHeap::ParallelScavengeHeap,
|
||||
"Must be ParallelScavengeHeap");
|
||||
|
||||
PSYoungGen* young_gen = heap->young_gen();
|
||||
|
||||
if (young_gen->is_in_reserved(p)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Static method
|
||||
bool ParallelScavengeHeap::is_in_old_or_perm(oop* p) {
|
||||
ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
|
||||
assert(heap->kind() == CollectedHeap::ParallelScavengeHeap,
|
||||
"Must be ParallelScavengeHeap");
|
||||
|
||||
PSOldGen* old_gen = heap->old_gen();
|
||||
PSPermGen* perm_gen = heap->perm_gen();
|
||||
|
||||
if (old_gen->is_in_reserved(p)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (perm_gen->is_in_reserved(p)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// There are two levels of allocation policy here.
|
||||
//
|
||||
// When an allocation request fails, the requesting thread must invoke a VM
|
||||
|
@ -764,6 +729,13 @@ void ParallelScavengeHeap::resize_all_tlabs() {
|
|||
CollectedHeap::resize_all_tlabs();
|
||||
}
|
||||
|
||||
bool ParallelScavengeHeap::can_elide_initializing_store_barrier(oop new_obj) {
|
||||
// We don't need barriers for stores to objects in the
|
||||
// young gen and, a fortiori, for initializing stores to
|
||||
// objects therein.
|
||||
return is_in_young(new_obj);
|
||||
}
|
||||
|
||||
// This method is used by System.gc() and JVMTI.
|
||||
void ParallelScavengeHeap::collect(GCCause::Cause cause) {
|
||||
assert(!Heap_lock->owned_by_self(),
|
||||
|
|
|
@ -129,8 +129,8 @@ class ParallelScavengeHeap : public CollectedHeap {
|
|||
return perm_gen()->is_in(p);
|
||||
}
|
||||
|
||||
static bool is_in_young(oop *p); // reserved part
|
||||
static bool is_in_old_or_perm(oop *p); // reserved part
|
||||
inline bool is_in_young(oop p); // reserved part
|
||||
inline bool is_in_old_or_perm(oop p); // reserved part
|
||||
|
||||
// Memory allocation. "gc_time_limit_was_exceeded" will
|
||||
// be set to true if the adaptive size policy determine that
|
||||
|
@ -191,6 +191,10 @@ class ParallelScavengeHeap : public CollectedHeap {
|
|||
return true;
|
||||
}
|
||||
|
||||
// Return true if we don't we need a store barrier for
|
||||
// initializing stores to an object at this address.
|
||||
virtual bool can_elide_initializing_store_barrier(oop new_obj);
|
||||
|
||||
// Can a compiler elide a store barrier when it writes
|
||||
// a permanent oop into the heap? Applies when the compiler
|
||||
// is storing x to the heap, where x->is_perm() is true.
|
||||
|
|
|
@ -41,3 +41,11 @@ inline void ParallelScavengeHeap::invoke_full_gc(bool maximum_compaction)
|
|||
PSMarkSweep::invoke(maximum_compaction);
|
||||
}
|
||||
}
|
||||
|
||||
inline bool ParallelScavengeHeap::is_in_young(oop p) {
|
||||
return young_gen()->is_in_reserved(p);
|
||||
}
|
||||
|
||||
inline bool ParallelScavengeHeap::is_in_old_or_perm(oop p) {
|
||||
return old_gen()->is_in_reserved(p) || perm_gen()->is_in_reserved(p);
|
||||
}
|
||||
|
|
|
@ -137,6 +137,89 @@ HeapWord* CollectedHeap::allocate_from_tlab_slow(Thread* thread, size_t size) {
|
|||
return obj;
|
||||
}
|
||||
|
||||
void CollectedHeap::flush_deferred_store_barrier(JavaThread* thread) {
|
||||
MemRegion deferred = thread->deferred_card_mark();
|
||||
if (!deferred.is_empty()) {
|
||||
{
|
||||
// Verify that the storage points to a parsable object in heap
|
||||
DEBUG_ONLY(oop old_obj = oop(deferred.start());)
|
||||
assert(is_in(old_obj), "Not in allocated heap");
|
||||
assert(!can_elide_initializing_store_barrier(old_obj),
|
||||
"Else should have been filtered in defer_store_barrier()");
|
||||
assert(!is_in_permanent(old_obj), "Sanity: not expected");
|
||||
assert(old_obj->is_oop(true), "Not an oop");
|
||||
assert(old_obj->is_parsable(), "Will not be concurrently parsable");
|
||||
assert(deferred.word_size() == (size_t)(old_obj->size()),
|
||||
"Mismatch: multiple objects?");
|
||||
}
|
||||
BarrierSet* bs = barrier_set();
|
||||
assert(bs->has_write_region_opt(), "No write_region() on BarrierSet");
|
||||
bs->write_region(deferred);
|
||||
// "Clear" the deferred_card_mark field
|
||||
thread->set_deferred_card_mark(MemRegion());
|
||||
}
|
||||
assert(thread->deferred_card_mark().is_empty(), "invariant");
|
||||
}
|
||||
|
||||
// Helper for ReduceInitialCardMarks. For performance,
|
||||
// compiled code may elide card-marks for initializing stores
|
||||
// to a newly allocated object along the fast-path. We
|
||||
// compensate for such elided card-marks as follows:
|
||||
// (a) Generational, non-concurrent collectors, such as
|
||||
// GenCollectedHeap(ParNew,DefNew,Tenured) and
|
||||
// ParallelScavengeHeap(ParallelGC, ParallelOldGC)
|
||||
// need the card-mark if and only if the region is
|
||||
// in the old gen, and do not care if the card-mark
|
||||
// succeeds or precedes the initializing stores themselves,
|
||||
// so long as the card-mark is completed before the next
|
||||
// scavenge. For all these cases, we can do a card mark
|
||||
// at the point at which we do a slow path allocation
|
||||
// in the old gen. For uniformity, however, we end
|
||||
// up using the same scheme (see below) for all three
|
||||
// cases (deferring the card-mark appropriately).
|
||||
// (b) GenCollectedHeap(ConcurrentMarkSweepGeneration) requires
|
||||
// in addition that the card-mark for an old gen allocated
|
||||
// object strictly follow any associated initializing stores.
|
||||
// In these cases, the memRegion remembered below is
|
||||
// used to card-mark the entire region either just before the next
|
||||
// slow-path allocation by this thread or just before the next scavenge or
|
||||
// CMS-associated safepoint, whichever of these events happens first.
|
||||
// (The implicit assumption is that the object has been fully
|
||||
// initialized by this point, a fact that we assert when doing the
|
||||
// card-mark.)
|
||||
// (c) G1CollectedHeap(G1) uses two kinds of write barriers. When a
|
||||
// G1 concurrent marking is in progress an SATB (pre-write-)barrier is
|
||||
// is used to remember the pre-value of any store. Initializing
|
||||
// stores will not need this barrier, so we need not worry about
|
||||
// compensating for the missing pre-barrier here. Turning now
|
||||
// to the post-barrier, we note that G1 needs a RS update barrier
|
||||
// which simply enqueues a (sequence of) dirty cards which may
|
||||
// optionally be refined by the concurrent update threads. Note
|
||||
// that this barrier need only be applied to a non-young write,
|
||||
// but, like in CMS, because of the presence of concurrent refinement
|
||||
// (much like CMS' precleaning), must strictly follow the oop-store.
|
||||
// Thus, using the same protocol for maintaining the intended
|
||||
// invariants turns out, serendepitously, to be the same for all
|
||||
// three collectors/heap types above.
|
||||
//
|
||||
// For each future collector, this should be reexamined with
|
||||
// that specific collector in mind.
|
||||
oop CollectedHeap::defer_store_barrier(JavaThread* thread, oop new_obj) {
|
||||
// If a previous card-mark was deferred, flush it now.
|
||||
flush_deferred_store_barrier(thread);
|
||||
if (can_elide_initializing_store_barrier(new_obj)) {
|
||||
// The deferred_card_mark region should be empty
|
||||
// following the flush above.
|
||||
assert(thread->deferred_card_mark().is_empty(), "Error");
|
||||
} else {
|
||||
// Remember info for the newly deferred store barrier
|
||||
MemRegion deferred = MemRegion((HeapWord*)new_obj, new_obj->size());
|
||||
assert(!deferred.is_empty(), "Error");
|
||||
thread->set_deferred_card_mark(deferred);
|
||||
}
|
||||
return new_obj;
|
||||
}
|
||||
|
||||
size_t CollectedHeap::filler_array_hdr_size() {
|
||||
return size_t(arrayOopDesc::header_size(T_INT));
|
||||
}
|
||||
|
@ -225,16 +308,6 @@ void CollectedHeap::fill_with_objects(HeapWord* start, size_t words)
|
|||
fill_with_object_impl(start, words);
|
||||
}
|
||||
|
||||
oop CollectedHeap::new_store_barrier(oop new_obj) {
|
||||
// %%% This needs refactoring. (It was imported from the server compiler.)
|
||||
guarantee(can_elide_tlab_store_barriers(), "store barrier elision not supported");
|
||||
BarrierSet* bs = this->barrier_set();
|
||||
assert(bs->has_write_region_opt(), "Barrier set does not have write_region");
|
||||
int new_size = new_obj->size();
|
||||
bs->write_region(MemRegion((HeapWord*)new_obj, new_size));
|
||||
return new_obj;
|
||||
}
|
||||
|
||||
HeapWord* CollectedHeap::allocate_new_tlab(size_t size) {
|
||||
guarantee(false, "thread-local allocation buffers not supported");
|
||||
return NULL;
|
||||
|
|
|
@ -415,9 +415,14 @@ class CollectedHeap : public CHeapObj {
|
|||
guarantee(false, "thread-local allocation buffers not supported");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Can a compiler initialize a new object without store barriers?
|
||||
// This permission only extends from the creation of a new object
|
||||
// via a TLAB up to the first subsequent safepoint.
|
||||
// via a TLAB up to the first subsequent safepoint. If such permission
|
||||
// is granted for this heap type, the compiler promises to call
|
||||
// defer_store_barrier() below on any slow path allocation of
|
||||
// a new object for which such initializing store barriers will
|
||||
// have been elided.
|
||||
virtual bool can_elide_tlab_store_barriers() const = 0;
|
||||
|
||||
// If a compiler is eliding store barriers for TLAB-allocated objects,
|
||||
|
@ -426,7 +431,18 @@ class CollectedHeap : public CHeapObj {
|
|||
// promises to call this function on such a slow-path-allocated
|
||||
// object before performing initializations that have elided
|
||||
// store barriers. Returns new_obj, or maybe a safer copy thereof.
|
||||
virtual oop new_store_barrier(oop new_obj);
|
||||
virtual oop defer_store_barrier(JavaThread* thread, oop new_obj);
|
||||
|
||||
// Answers whether an initializing store to a new object currently
|
||||
// allocated at the given address doesn't need a (deferred) store
|
||||
// barrier. Returns "true" if it doesn't need an initializing
|
||||
// store barrier; answers "false" if it does.
|
||||
virtual bool can_elide_initializing_store_barrier(oop new_obj) = 0;
|
||||
|
||||
// If the CollectedHeap was asked to defer a store barrier above,
|
||||
// this informs it to flush such a deferred store barrier to the
|
||||
// remembered set.
|
||||
virtual void flush_deferred_store_barrier(JavaThread* thread);
|
||||
|
||||
// Can a compiler elide a store barrier when it writes
|
||||
// a permanent oop into the heap? Applies when the compiler
|
||||
|
|
|
@ -260,6 +260,17 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
// We don't need barriers for stores to objects in the
|
||||
// young gen and, a fortiori, for initializing stores to
|
||||
// objects therein. This applies to {DefNew,ParNew}+{Tenured,CMS}
|
||||
// only and may need to be re-examined in case other
|
||||
// kinds of collectors are implemented in the future.
|
||||
virtual bool can_elide_initializing_store_barrier(oop new_obj) {
|
||||
assert(UseParNewGC || UseSerialGC || UseConcMarkSweepGC,
|
||||
"Check can_elide_initializing_store_barrier() for this collector");
|
||||
return is_in_youngest((void*)new_obj);
|
||||
}
|
||||
|
||||
// Can a compiler elide a store barrier when it writes
|
||||
// a permanent oop into the heap? Applies when the compiler
|
||||
// is storing x to the heap, where x->is_perm() is true.
|
||||
|
|
|
@ -3186,6 +3186,15 @@ void GraphKit::write_barrier_post(Node* oop_store,
|
|||
return;
|
||||
}
|
||||
|
||||
if (use_ReduceInitialCardMarks()
|
||||
&& obj == just_allocated_object(control())) {
|
||||
// We can skip marks on a freshly-allocated object in Eden.
|
||||
// Keep this code in sync with maybe_defer_card_mark() in runtime.cpp.
|
||||
// That routine informs GC to take appropriate compensating steps
|
||||
// so as to make this card-mark elision safe.
|
||||
return;
|
||||
}
|
||||
|
||||
if (!use_precise) {
|
||||
// All card marks for a (non-array) instance are in one place:
|
||||
adr = obj;
|
||||
|
|
|
@ -4160,13 +4160,13 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
|
|||
result_mem ->set_req(_objArray_path, reset_memory());
|
||||
}
|
||||
}
|
||||
// We can dispense with card marks if we know the allocation
|
||||
// comes out of eden (TLAB)... In fact, ReduceInitialCardMarks
|
||||
// causes the non-eden paths to simulate a fresh allocation,
|
||||
// insofar that no further card marks are required to initialize
|
||||
// the object.
|
||||
|
||||
// Otherwise, there are no card marks to worry about.
|
||||
// (We can dispense with card marks if we know the allocation
|
||||
// comes out of eden (TLAB)... In fact, ReduceInitialCardMarks
|
||||
// causes the non-eden paths to take compensating steps to
|
||||
// simulate a fresh allocation, so that no further
|
||||
// card marks are required in compiled code to initialize
|
||||
// the object.)
|
||||
|
||||
if (!stopped()) {
|
||||
copy_to_clone(obj, alloc_obj, obj_size, true, false);
|
||||
|
|
|
@ -143,18 +143,20 @@ const char* OptoRuntime::stub_name(address entry) {
|
|||
// We failed the fast-path allocation. Now we need to do a scavenge or GC
|
||||
// and try allocation again.
|
||||
|
||||
void OptoRuntime::do_eager_card_mark(JavaThread* thread) {
|
||||
void OptoRuntime::maybe_defer_card_mark(JavaThread* thread) {
|
||||
// After any safepoint, just before going back to compiled code,
|
||||
// we perform a card mark. This lets the compiled code omit
|
||||
// card marks for initialization of new objects.
|
||||
// Keep this code consistent with GraphKit::store_barrier.
|
||||
// we inform the GC that we will be doing initializing writes to
|
||||
// this object in the future without emitting card-marks, so
|
||||
// GC may take any compensating steps.
|
||||
// NOTE: Keep this code consistent with GraphKit::store_barrier.
|
||||
|
||||
oop new_obj = thread->vm_result();
|
||||
if (new_obj == NULL) return;
|
||||
|
||||
assert(Universe::heap()->can_elide_tlab_store_barriers(),
|
||||
"compiler must check this first");
|
||||
new_obj = Universe::heap()->new_store_barrier(new_obj);
|
||||
// GC may decide to give back a safer copy of new_obj.
|
||||
new_obj = Universe::heap()->defer_store_barrier(thread, new_obj);
|
||||
thread->set_vm_result(new_obj);
|
||||
}
|
||||
|
||||
|
@ -197,8 +199,8 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_instance_C(klassOopDesc* klass, JavaThrea
|
|||
JRT_BLOCK_END;
|
||||
|
||||
if (GraphKit::use_ReduceInitialCardMarks()) {
|
||||
// do them now so we don't have to do them on the fast path
|
||||
do_eager_card_mark(thread);
|
||||
// inform GC that we won't do card marks for initializing writes.
|
||||
maybe_defer_card_mark(thread);
|
||||
}
|
||||
JRT_END
|
||||
|
||||
|
@ -236,8 +238,8 @@ JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_C(klassOopDesc* array_type, int len
|
|||
JRT_BLOCK_END;
|
||||
|
||||
if (GraphKit::use_ReduceInitialCardMarks()) {
|
||||
// do them now so we don't have to do them on the fast path
|
||||
do_eager_card_mark(thread);
|
||||
// inform GC that we won't do card marks for initializing writes.
|
||||
maybe_defer_card_mark(thread);
|
||||
}
|
||||
JRT_END
|
||||
|
||||
|
|
|
@ -133,8 +133,8 @@ class OptoRuntime : public AllStatic {
|
|||
// Allocate storage for a objArray or typeArray
|
||||
static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
|
||||
|
||||
// Post-allocation step for implementing ReduceInitialCardMarks:
|
||||
static void do_eager_card_mark(JavaThread* thread);
|
||||
// Post-slow-path-allocation step for implementing ReduceInitialCardMarks:
|
||||
static void maybe_defer_card_mark(JavaThread* thread);
|
||||
|
||||
// Allocate storage for a multi-dimensional arrays
|
||||
// Note: needs to be fixed for arbitrary number of dimensions
|
||||
|
|
|
@ -1213,6 +1213,7 @@ JavaThread::JavaThread(bool is_attaching) :
|
|||
{
|
||||
initialize();
|
||||
_is_attaching = is_attaching;
|
||||
assert(_deferred_card_mark.is_empty(), "Default MemRegion ctor");
|
||||
}
|
||||
|
||||
bool JavaThread::reguard_stack(address cur_sp) {
|
||||
|
@ -2318,6 +2319,10 @@ void JavaThread::gc_prologue() {
|
|||
|
||||
|
||||
void JavaThread::oops_do(OopClosure* f, CodeBlobClosure* cf) {
|
||||
// Flush deferred store-barriers, if any, associated with
|
||||
// initializing stores done by this JavaThread in the current epoch.
|
||||
Universe::heap()->flush_deferred_store_barrier(this);
|
||||
|
||||
// The ThreadProfiler oops_do is done from FlatProfiler::oops_do
|
||||
// since there may be more than one thread using each ThreadProfiler.
|
||||
|
||||
|
|
|
@ -687,6 +687,11 @@ class JavaThread: public Thread {
|
|||
oop _vm_result; // Used to pass back an oop result into Java code, GC-preserved
|
||||
oop _vm_result_2; // Used to pass back an oop result into Java code, GC-preserved
|
||||
|
||||
// See ReduceInitialCardMarks: this holds the precise space interval of
|
||||
// the most recent slow path allocation for which compiled code has
|
||||
// elided card-marks for performance along the fast-path.
|
||||
MemRegion _deferred_card_mark;
|
||||
|
||||
MonitorChunk* _monitor_chunks; // Contains the off stack monitors
|
||||
// allocated during deoptimization
|
||||
// and by JNI_MonitorEnter/Exit
|
||||
|
@ -1082,6 +1087,9 @@ class JavaThread: public Thread {
|
|||
oop vm_result_2() const { return _vm_result_2; }
|
||||
void set_vm_result_2 (oop x) { _vm_result_2 = x; }
|
||||
|
||||
MemRegion deferred_card_mark() const { return _deferred_card_mark; }
|
||||
void set_deferred_card_mark(MemRegion mr) { _deferred_card_mark = mr; }
|
||||
|
||||
// Exception handling for compiled methods
|
||||
oop exception_oop() const { return _exception_oop; }
|
||||
int exception_stack_size() const { return _exception_stack_size; }
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue