7037276: Unnecessary double traversal of dirty card windows

Short-circuited an unnecessary double traversal of dirty card windows when iterating younger refs. Also renamed some cardtable methods for more clarity.

Reviewed-by: jmasa, stefank, poonam
This commit is contained in:
Y. Srinivas Ramakrishna 2011-04-20 19:19:30 -07:00
parent 12d1d9acc1
commit b4d40650b2
5 changed files with 198 additions and 162 deletions

View file

@ -33,11 +33,11 @@
#include "runtime/mutexLocker.hpp" #include "runtime/mutexLocker.hpp"
#include "runtime/virtualspace.hpp" #include "runtime/virtualspace.hpp"
void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr, void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
DirtyCardToOopClosure* dcto_cl, DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl, ClearNoncleanCardWrapper* cl,
int n_threads) { int n_threads) {
if (n_threads > 0) { assert(n_threads > 0, "Error: expected n_threads > 0");
assert((n_threads == 1 && ParallelGCThreads == 0) || assert((n_threads == 1 && ParallelGCThreads == 0) ||
n_threads <= (int)ParallelGCThreads, n_threads <= (int)ParallelGCThreads,
"# worker threads != # requested!"); "# worker threads != # requested!");
@ -72,7 +72,6 @@ void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
lowest_non_clean[ind] = NULL; lowest_non_clean[ind] = NULL;
} }
} }
}
} }
void void
@ -81,7 +80,7 @@ process_stride(Space* sp,
MemRegion used, MemRegion used,
jint stride, int n_strides, jint stride, int n_strides,
DirtyCardToOopClosure* dcto_cl, DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl, ClearNoncleanCardWrapper* cl,
jbyte** lowest_non_clean, jbyte** lowest_non_clean,
uintptr_t lowest_non_clean_base_chunk_index, uintptr_t lowest_non_clean_base_chunk_index,
size_t lowest_non_clean_chunk_size) { size_t lowest_non_clean_chunk_size) {
@ -127,7 +126,11 @@ process_stride(Space* sp,
lowest_non_clean_base_chunk_index, lowest_non_clean_base_chunk_index,
lowest_non_clean_chunk_size); lowest_non_clean_chunk_size);
non_clean_card_iterate_work(chunk_mr, cl); // We do not call the non_clean_card_iterate_serial() version because
// we want to clear the cards, and the ClearNoncleanCardWrapper closure
// itself does the work of finding contiguous dirty ranges of cards to
// process (and clear).
cl->do_MemRegion(chunk_mr);
// Find the next chunk of the stride. // Find the next chunk of the stride.
chunk_card_start += CardsPerStrideChunk * n_strides; chunk_card_start += CardsPerStrideChunk * n_strides;

View file

@ -456,30 +456,34 @@ bool CardTableModRefBS::mark_card_deferred(size_t card_index) {
} }
void CardTableModRefBS::non_clean_card_iterate(Space* sp, void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp,
MemRegion mr, MemRegion mr,
DirtyCardToOopClosure* dcto_cl, DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl) { ClearNoncleanCardWrapper* cl) {
if (!mr.is_empty()) { if (!mr.is_empty()) {
int n_threads = SharedHeap::heap()->n_par_threads(); int n_threads = SharedHeap::heap()->n_par_threads();
if (n_threads > 0) { if (n_threads > 0) {
#ifndef SERIALGC #ifndef SERIALGC
par_non_clean_card_iterate_work(sp, mr, dcto_cl, cl, n_threads); non_clean_card_iterate_parallel_work(sp, mr, dcto_cl, cl, n_threads);
#else // SERIALGC #else // SERIALGC
fatal("Parallel gc not supported here."); fatal("Parallel gc not supported here.");
#endif // SERIALGC #endif // SERIALGC
} else { } else {
non_clean_card_iterate_work(mr, cl); // We do not call the non_clean_card_iterate_serial() version below because
// we want to clear the cards (which non_clean_card_iterate_serial() does not
// do for us), and the ClearNoncleanCardWrapper closure itself does the work
// of finding contiguous dirty ranges of cards to process (and clear).
cl->do_MemRegion(mr);
} }
} }
} }
// NOTE: For this to work correctly, it is important that // The iterator itself is not MT-aware, but
// we look for non-clean cards below (so as to catch those // MT-aware callers and closures can use this to
// marked precleaned), rather than look explicitly for dirty // accomplish dirty card iteration in parallel. The
// cards (and miss those marked precleaned). In that sense, // iterator itself does not clear the dirty cards, or
// the name precleaned is currently somewhat of a misnomer. // change their values in any manner.
void CardTableModRefBS::non_clean_card_iterate_work(MemRegion mr, void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr,
MemRegionClosure* cl) { MemRegionClosure* cl) {
for (int i = 0; i < _cur_covered_regions; i++) { for (int i = 0; i < _cur_covered_regions; i++) {
MemRegion mri = mr.intersection(_covered[i]); MemRegion mri = mr.intersection(_covered[i]);
@ -661,7 +665,7 @@ public:
void CardTableModRefBS::verify_clean_region(MemRegion mr) { void CardTableModRefBS::verify_clean_region(MemRegion mr) {
GuaranteeNotModClosure blk(this); GuaranteeNotModClosure blk(this);
non_clean_card_iterate_work(mr, &blk); non_clean_card_iterate_serial(mr, &blk);
} }
// To verify a MemRegion is entirely dirty this closure is passed to // To verify a MemRegion is entirely dirty this closure is passed to

View file

@ -44,6 +44,7 @@
class Generation; class Generation;
class OopsInGenClosure; class OopsInGenClosure;
class DirtyCardToOopClosure; class DirtyCardToOopClosure;
class ClearNoncleanCardWrapper;
class CardTableModRefBS: public ModRefBarrierSet { class CardTableModRefBS: public ModRefBarrierSet {
// Some classes get to look at some private stuff. // Some classes get to look at some private stuff.
@ -165,22 +166,28 @@ class CardTableModRefBS: public ModRefBarrierSet {
// Iterate over the portion of the card-table which covers the given // Iterate over the portion of the card-table which covers the given
// region mr in the given space and apply cl to any dirty sub-regions // region mr in the given space and apply cl to any dirty sub-regions
// of mr. cl and dcto_cl must either be the same closure or cl must // of mr. Dirty cards are _not_ cleared by the iterator method itself,
// wrap dcto_cl. Both are required - neither may be NULL. Also, dcto_cl // but closures may arrange to do so on their own should they so wish.
// may be modified. Note that this function will operate in a parallel void non_clean_card_iterate_serial(MemRegion mr, MemRegionClosure* cl);
// mode if worker threads are available.
void non_clean_card_iterate(Space* sp, MemRegion mr,
DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl);
// Utility function used to implement the other versions below. // A variant of the above that will operate in a parallel mode if
void non_clean_card_iterate_work(MemRegion mr, MemRegionClosure* cl); // worker threads are available, and clear the dirty cards as it
// processes them.
void par_non_clean_card_iterate_work(Space* sp, MemRegion mr, // ClearNoncleanCardWrapper cl must wrap the DirtyCardToOopClosure dcto_cl,
// which may itself be modified by the method.
void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr,
DirtyCardToOopClosure* dcto_cl, DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl, ClearNoncleanCardWrapper* cl);
private:
// Work method used to implement non_clean_card_iterate_possibly_parallel()
// above in the parallel case.
void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
DirtyCardToOopClosure* dcto_cl,
ClearNoncleanCardWrapper* cl,
int n_threads); int n_threads);
protected:
// Dirty the bytes corresponding to "mr" (not all of which must be // Dirty the bytes corresponding to "mr" (not all of which must be
// covered.) // covered.)
void dirty_MemRegion(MemRegion mr); void dirty_MemRegion(MemRegion mr);
@ -237,7 +244,7 @@ class CardTableModRefBS: public ModRefBarrierSet {
MemRegion used, MemRegion used,
jint stride, int n_strides, jint stride, int n_strides,
DirtyCardToOopClosure* dcto_cl, DirtyCardToOopClosure* dcto_cl,
MemRegionClosure* cl, ClearNoncleanCardWrapper* cl,
jbyte** lowest_non_clean, jbyte** lowest_non_clean,
uintptr_t lowest_non_clean_base_chunk_index, uintptr_t lowest_non_clean_base_chunk_index,
size_t lowest_non_clean_chunk_size); size_t lowest_non_clean_chunk_size);
@ -409,14 +416,14 @@ public:
// marking, where a dirty card may cause scanning, and summarization // marking, where a dirty card may cause scanning, and summarization
// marking, of objects that extend onto subsequent cards.) // marking, of objects that extend onto subsequent cards.)
void mod_card_iterate(MemRegionClosure* cl) { void mod_card_iterate(MemRegionClosure* cl) {
non_clean_card_iterate_work(_whole_heap, cl); non_clean_card_iterate_serial(_whole_heap, cl);
} }
// Like the "mod_cards_iterate" above, except only invokes the closure // Like the "mod_cards_iterate" above, except only invokes the closure
// for cards within the MemRegion "mr" (which is required to be // for cards within the MemRegion "mr" (which is required to be
// card-aligned and sized.) // card-aligned and sized.)
void mod_card_iterate(MemRegion mr, MemRegionClosure* cl) { void mod_card_iterate(MemRegion mr, MemRegionClosure* cl) {
non_clean_card_iterate_work(mr, cl); non_clean_card_iterate_serial(mr, cl);
} }
static uintx ct_max_alignment_constraint(); static uintx ct_max_alignment_constraint();
@ -493,4 +500,5 @@ public:
void set_CTRS(CardTableRS* rs) { _rs = rs; } void set_CTRS(CardTableRS* rs) { _rs = rs; }
}; };
#endif // SHARE_VM_MEMORY_CARDTABLEMODREFBS_HPP #endif // SHARE_VM_MEMORY_CARDTABLEMODREFBS_HPP

View file

@ -105,15 +105,15 @@ void CardTableRS::younger_refs_iterate(Generation* g,
g->younger_refs_iterate(blk); g->younger_refs_iterate(blk);
} }
class ClearNoncleanCardWrapper: public MemRegionClosure { inline bool ClearNoncleanCardWrapper::clear_card(jbyte* entry) {
MemRegionClosure* _dirty_card_closure;
CardTableRS* _ct;
bool _is_par;
private:
// Clears the given card, return true if the corresponding card should be
// processed.
bool clear_card(jbyte* entry) {
if (_is_par) { if (_is_par) {
return clear_card_parallel(entry);
} else {
return clear_card_serial(entry);
}
}
inline bool ClearNoncleanCardWrapper::clear_card_parallel(jbyte* entry) {
while (true) { while (true) {
// In the parallel case, we may have to do this several times. // In the parallel case, we may have to do this several times.
jbyte entry_val = *entry; jbyte entry_val = *entry;
@ -147,7 +147,10 @@ private:
} }
} }
return true; return true;
} else { }
inline bool ClearNoncleanCardWrapper::clear_card_serial(jbyte* entry) {
jbyte entry_val = *entry; jbyte entry_val = *entry;
assert(entry_val != CardTableRS::clean_card_val(), assert(entry_val != CardTableRS::clean_card_val(),
"We shouldn't be looking at clean cards, and this should " "We shouldn't be looking at clean cards, and this should "
@ -156,56 +159,57 @@ private:
"This should be possible in the sequential case."); "This should be possible in the sequential case.");
*entry = CardTableRS::clean_card_val(); *entry = CardTableRS::clean_card_val();
return true; return true;
} }
}
public: ClearNoncleanCardWrapper::ClearNoncleanCardWrapper(
ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, MemRegionClosure* dirty_card_closure, CardTableRS* ct) :
CardTableRS* ct) :
_dirty_card_closure(dirty_card_closure), _ct(ct) { _dirty_card_closure(dirty_card_closure), _ct(ct) {
_is_par = (SharedHeap::heap()->n_par_threads() > 0); _is_par = (SharedHeap::heap()->n_par_threads() > 0);
} }
void do_MemRegion(MemRegion mr) {
// We start at the high end of "mr", walking backwards void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
// while accumulating a contiguous dirty range of cards in assert(mr.word_size() > 0, "Error");
// [start_of_non_clean, end_of_non_clean) which we then assert(_ct->is_aligned(mr.start()), "mr.start() should be card aligned");
// process en masse. // mr.end() may not necessarily be card aligned.
jbyte* cur_entry = _ct->byte_for(mr.last());
const jbyte* limit = _ct->byte_for(mr.start());
HeapWord* end_of_non_clean = mr.end(); HeapWord* end_of_non_clean = mr.end();
HeapWord* start_of_non_clean = end_of_non_clean; HeapWord* start_of_non_clean = end_of_non_clean;
jbyte* entry = _ct->byte_for(mr.last()); while (cur_entry >= limit) {
const jbyte* first_entry = _ct->byte_for(mr.start()); HeapWord* cur_hw = _ct->addr_for(cur_entry);
while (entry >= first_entry) { if ((*cur_entry != CardTableRS::clean_card_val()) && clear_card(cur_entry)) {
HeapWord* cur = _ct->addr_for(entry); // Continue the dirty range by opening the
if (!clear_card(entry)) { // dirty window one card to the left.
// We hit a clean card; process any non-empty start_of_non_clean = cur_hw;
// dirty range accumulated so far. } else {
// We hit a "clean" card; process any non-empty
// "dirty" range accumulated so far.
if (start_of_non_clean < end_of_non_clean) { if (start_of_non_clean < end_of_non_clean) {
MemRegion mr2(start_of_non_clean, end_of_non_clean); const MemRegion mrd(start_of_non_clean, end_of_non_clean);
_dirty_card_closure->do_MemRegion(mr2); _dirty_card_closure->do_MemRegion(mrd);
} }
// Reset the dirty window while continuing to // Reset the dirty window, while continuing to look
// look for the next dirty window to process. // for the next dirty card that will start a
end_of_non_clean = cur; // new dirty window.
start_of_non_clean = end_of_non_clean; end_of_non_clean = cur_hw;
start_of_non_clean = cur_hw;
} }
// Open the left end of the window one card to the left. // Note that "cur_entry" leads "start_of_non_clean" in
start_of_non_clean = cur;
// Note that "entry" leads "start_of_non_clean" in
// its leftward excursion after this point // its leftward excursion after this point
// in the loop and, when we hit the left end of "mr", // in the loop and, when we hit the left end of "mr",
// will point off of the left end of the card-table // will point off of the left end of the card-table
// for "mr". // for "mr".
entry--; cur_entry--;
} }
// If the first card of "mr" was dirty, we will have // If the first card of "mr" was dirty, we will have
// been left with a dirty window, co-initial with "mr", // been left with a dirty window, co-initial with "mr",
// which we now process. // which we now process.
if (start_of_non_clean < end_of_non_clean) { if (start_of_non_clean < end_of_non_clean) {
MemRegion mr2(start_of_non_clean, end_of_non_clean); const MemRegion mrd(start_of_non_clean, end_of_non_clean);
_dirty_card_closure->do_MemRegion(mr2); _dirty_card_closure->do_MemRegion(mrd);
} }
} }
};
// clean (by dirty->clean before) ==> cur_younger_gen // clean (by dirty->clean before) ==> cur_younger_gen
// dirty ==> cur_youngergen_and_prev_nonclean_card // dirty ==> cur_youngergen_and_prev_nonclean_card
// precleaned ==> cur_youngergen_and_prev_nonclean_card // precleaned ==> cur_youngergen_and_prev_nonclean_card
@ -246,7 +250,7 @@ void CardTableRS::younger_refs_in_space_iterate(Space* sp,
cl->gen_boundary()); cl->gen_boundary());
ClearNoncleanCardWrapper clear_cl(dcto_cl, this); ClearNoncleanCardWrapper clear_cl(dcto_cl, this);
_ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(), _ct_bs->non_clean_card_iterate_possibly_parallel(sp, sp->used_region_at_save_marks(),
dcto_cl, &clear_cl); dcto_cl, &clear_cl);
} }

View file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -166,4 +166,21 @@ public:
}; };
class ClearNoncleanCardWrapper: public MemRegionClosure {
MemRegionClosure* _dirty_card_closure;
CardTableRS* _ct;
bool _is_par;
private:
// Clears the given card, return true if the corresponding card should be
// processed.
inline bool clear_card(jbyte* entry);
// Work methods called by the clear_card()
inline bool clear_card_serial(jbyte* entry);
inline bool clear_card_parallel(jbyte* entry);
public:
ClearNoncleanCardWrapper(MemRegionClosure* dirty_card_closure, CardTableRS* ct);
void do_MemRegion(MemRegion mr);
};
#endif // SHARE_VM_MEMORY_CARDTABLERS_HPP #endif // SHARE_VM_MEMORY_CARDTABLERS_HPP