mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-26 22:34:27 +02:00
6725714: par compact - add a table to speed up bitmap searches
Reviewed-by: jmasa, tschatzl
This commit is contained in:
parent
c13149ab8d
commit
de314e391b
3 changed files with 309 additions and 52 deletions
|
@ -59,13 +59,25 @@
|
|||
#include <math.h>
|
||||
|
||||
// All sizes are in HeapWords.
|
||||
const size_t ParallelCompactData::Log2RegionSize = 9; // 512 words
|
||||
const size_t ParallelCompactData::Log2RegionSize = 16; // 64K words
|
||||
const size_t ParallelCompactData::RegionSize = (size_t)1 << Log2RegionSize;
|
||||
const size_t ParallelCompactData::RegionSizeBytes =
|
||||
RegionSize << LogHeapWordSize;
|
||||
const size_t ParallelCompactData::RegionSizeOffsetMask = RegionSize - 1;
|
||||
const size_t ParallelCompactData::RegionAddrOffsetMask = RegionSizeBytes - 1;
|
||||
const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask;
|
||||
const size_t ParallelCompactData::RegionAddrMask = ~RegionAddrOffsetMask;
|
||||
|
||||
const size_t ParallelCompactData::Log2BlockSize = 7; // 128 words
|
||||
const size_t ParallelCompactData::BlockSize = (size_t)1 << Log2BlockSize;
|
||||
const size_t ParallelCompactData::BlockSizeBytes =
|
||||
BlockSize << LogHeapWordSize;
|
||||
const size_t ParallelCompactData::BlockSizeOffsetMask = BlockSize - 1;
|
||||
const size_t ParallelCompactData::BlockAddrOffsetMask = BlockSizeBytes - 1;
|
||||
const size_t ParallelCompactData::BlockAddrMask = ~BlockAddrOffsetMask;
|
||||
|
||||
const size_t ParallelCompactData::BlocksPerRegion = RegionSize / BlockSize;
|
||||
const size_t ParallelCompactData::Log2BlocksPerRegion =
|
||||
Log2RegionSize - Log2BlockSize;
|
||||
|
||||
const ParallelCompactData::RegionData::region_sz_t
|
||||
ParallelCompactData::RegionData::dc_shift = 27;
|
||||
|
@ -359,6 +371,10 @@ ParallelCompactData::ParallelCompactData()
|
|||
_reserved_byte_size = 0;
|
||||
_region_data = 0;
|
||||
_region_count = 0;
|
||||
|
||||
_block_vspace = 0;
|
||||
_block_data = 0;
|
||||
_block_count = 0;
|
||||
}
|
||||
|
||||
bool ParallelCompactData::initialize(MemRegion covered_region)
|
||||
|
@ -372,8 +388,7 @@ bool ParallelCompactData::initialize(MemRegion covered_region)
|
|||
assert((region_size & RegionSizeOffsetMask) == 0,
|
||||
"region size not a multiple of RegionSize");
|
||||
|
||||
bool result = initialize_region_data(region_size);
|
||||
|
||||
bool result = initialize_region_data(region_size) && initialize_block_data();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -418,17 +433,36 @@ bool ParallelCompactData::initialize_region_data(size_t region_size)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool ParallelCompactData::initialize_block_data()
|
||||
{
|
||||
assert(_region_count != 0, "region data must be initialized first");
|
||||
const size_t count = _region_count << Log2BlocksPerRegion;
|
||||
_block_vspace = create_vspace(count, sizeof(BlockData));
|
||||
if (_block_vspace != 0) {
|
||||
_block_data = (BlockData*)_block_vspace->reserved_low_addr();
|
||||
_block_count = count;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void ParallelCompactData::clear()
|
||||
{
|
||||
memset(_region_data, 0, _region_vspace->committed_size());
|
||||
memset(_block_data, 0, _block_vspace->committed_size());
|
||||
}
|
||||
|
||||
void ParallelCompactData::clear_range(size_t beg_region, size_t end_region) {
|
||||
assert(beg_region <= _region_count, "beg_region out of range");
|
||||
assert(end_region <= _region_count, "end_region out of range");
|
||||
assert(RegionSize % BlockSize == 0, "RegionSize not a multiple of BlockSize");
|
||||
|
||||
const size_t region_cnt = end_region - beg_region;
|
||||
memset(_region_data + beg_region, 0, region_cnt * sizeof(RegionData));
|
||||
|
||||
const size_t beg_block = beg_region * BlocksPerRegion;
|
||||
const size_t block_cnt = region_cnt * BlocksPerRegion;
|
||||
memset(_block_data + beg_block, 0, block_cnt * sizeof(BlockData));
|
||||
}
|
||||
|
||||
HeapWord* ParallelCompactData::partial_obj_end(size_t region_idx) const
|
||||
|
@ -707,49 +741,48 @@ bool ParallelCompactData::summarize(SplitInfo& split_info,
|
|||
|
||||
HeapWord* ParallelCompactData::calc_new_pointer(HeapWord* addr) {
|
||||
assert(addr != NULL, "Should detect NULL oop earlier");
|
||||
assert(PSParallelCompact::gc_heap()->is_in(addr), "addr not in heap");
|
||||
#ifdef ASSERT
|
||||
if (PSParallelCompact::mark_bitmap()->is_unmarked(addr)) {
|
||||
gclog_or_tty->print_cr("calc_new_pointer:: addr " PTR_FORMAT, addr);
|
||||
}
|
||||
#endif
|
||||
assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "obj not marked");
|
||||
assert(PSParallelCompact::gc_heap()->is_in(addr), "not in heap");
|
||||
assert(PSParallelCompact::mark_bitmap()->is_marked(addr), "not marked");
|
||||
|
||||
// Region covering the object.
|
||||
size_t region_index = addr_to_region_idx(addr);
|
||||
const RegionData* const region_ptr = region(region_index);
|
||||
HeapWord* const region_addr = region_align_down(addr);
|
||||
|
||||
assert(addr < region_addr + RegionSize, "Region does not cover object");
|
||||
assert(addr_to_region_ptr(region_addr) == region_ptr, "sanity check");
|
||||
|
||||
RegionData* const region_ptr = addr_to_region_ptr(addr);
|
||||
HeapWord* result = region_ptr->destination();
|
||||
|
||||
// If all the data in the region is live, then the new location of the object
|
||||
// can be calculated from the destination of the region plus the offset of the
|
||||
// object in the region.
|
||||
// If the entire Region is live, the new location is region->destination + the
|
||||
// offset of the object within in the Region.
|
||||
|
||||
// Run some performance tests to determine if this special case pays off. It
|
||||
// is worth it for pointers into the dense prefix. If the optimization to
|
||||
// avoid pointer updates in regions that only point to the dense prefix is
|
||||
// ever implemented, this should be revisited.
|
||||
if (region_ptr->data_size() == RegionSize) {
|
||||
result += pointer_delta(addr, region_addr);
|
||||
DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
|
||||
result += region_offset(addr);
|
||||
return result;
|
||||
}
|
||||
|
||||
// The new location of the object is
|
||||
// region destination +
|
||||
// size of the partial object extending onto the region +
|
||||
// sizes of the live objects in the Region that are to the left of addr
|
||||
const size_t partial_obj_size = region_ptr->partial_obj_size();
|
||||
HeapWord* const search_start = region_addr + partial_obj_size;
|
||||
// Otherwise, the new location is region->destination + block offset + the
|
||||
// number of live words in the Block that are (a) to the left of addr and (b)
|
||||
// due to objects that start in the Block.
|
||||
|
||||
// Fill in the block table if necessary. This is unsynchronized, so multiple
|
||||
// threads may fill the block table for a region (harmless, since it is
|
||||
// idempotent).
|
||||
if (!region_ptr->blocks_filled()) {
|
||||
PSParallelCompact::fill_blocks(addr_to_region_idx(addr));
|
||||
region_ptr->set_blocks_filled();
|
||||
}
|
||||
|
||||
HeapWord* const search_start = block_align_down(addr);
|
||||
const size_t block_offset = addr_to_block_ptr(addr)->offset();
|
||||
|
||||
const ParMarkBitMap* bitmap = PSParallelCompact::mark_bitmap();
|
||||
size_t live_to_left = bitmap->live_words_in_range(search_start, oop(addr));
|
||||
|
||||
result += partial_obj_size + live_to_left;
|
||||
DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result);)
|
||||
const size_t live = bitmap->live_words_in_range(search_start, oop(addr));
|
||||
result += block_offset + live;
|
||||
DEBUG_ONLY(PSParallelCompact::check_new_location(addr, result));
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
#ifdef ASSERT
|
||||
void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace)
|
||||
{
|
||||
const size_t* const beg = (const size_t*)vspace->committed_low_addr();
|
||||
|
@ -762,16 +795,10 @@ void ParallelCompactData::verify_clear(const PSVirtualSpace* vspace)
|
|||
void ParallelCompactData::verify_clear()
|
||||
{
|
||||
verify_clear(_region_vspace);
|
||||
verify_clear(_block_vspace);
|
||||
}
|
||||
#endif // #ifdef ASSERT
|
||||
|
||||
#ifdef NOT_PRODUCT
|
||||
ParallelCompactData::RegionData* debug_region(size_t region_index) {
|
||||
ParallelCompactData& sd = PSParallelCompact::summary_data();
|
||||
return sd.region(region_index);
|
||||
}
|
||||
#endif
|
||||
|
||||
elapsedTimer PSParallelCompact::_accumulated_time;
|
||||
unsigned int PSParallelCompact::_total_invocations = 0;
|
||||
unsigned int PSParallelCompact::_maximum_compaction_gc_num = 0;
|
||||
|
@ -1961,11 +1988,6 @@ void PSParallelCompact::invoke(bool maximum_heap_compaction) {
|
|||
maximum_heap_compaction);
|
||||
}
|
||||
|
||||
bool ParallelCompactData::region_contains(size_t region_index, HeapWord* addr) {
|
||||
size_t addr_region_index = addr_to_region_idx(addr);
|
||||
return region_index == addr_region_index;
|
||||
}
|
||||
|
||||
// This method contains no policy. You should probably
|
||||
// be calling invoke() instead.
|
||||
bool PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
|
||||
|
@ -2627,6 +2649,41 @@ void PSParallelCompact::enqueue_region_stealing_tasks(
|
|||
}
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
// Write a histogram of the number of times the block table was filled for a
|
||||
// region.
|
||||
void PSParallelCompact::write_block_fill_histogram(outputStream* const out)
|
||||
{
|
||||
if (!TraceParallelOldGCCompactionPhase) return;
|
||||
|
||||
typedef ParallelCompactData::RegionData rd_t;
|
||||
ParallelCompactData& sd = summary_data();
|
||||
|
||||
for (unsigned int id = old_space_id; id < last_space_id; ++id) {
|
||||
MutableSpace* const spc = _space_info[id].space();
|
||||
if (spc->bottom() != spc->top()) {
|
||||
const rd_t* const beg = sd.addr_to_region_ptr(spc->bottom());
|
||||
HeapWord* const top_aligned_up = sd.region_align_up(spc->top());
|
||||
const rd_t* const end = sd.addr_to_region_ptr(top_aligned_up);
|
||||
|
||||
size_t histo[5] = { 0, 0, 0, 0, 0 };
|
||||
const size_t histo_len = sizeof(histo) / sizeof(size_t);
|
||||
const size_t region_cnt = pointer_delta(end, beg, sizeof(rd_t));
|
||||
|
||||
for (const rd_t* cur = beg; cur < end; ++cur) {
|
||||
++histo[MIN2(cur->blocks_filled_count(), histo_len - 1)];
|
||||
}
|
||||
out->print("%u %-4s" SIZE_FORMAT_W(5), id, space_names[id], region_cnt);
|
||||
for (size_t i = 0; i < histo_len; ++i) {
|
||||
out->print(" " SIZE_FORMAT_W(5) " %5.1f%%",
|
||||
histo[i], 100.0 * histo[i] / region_cnt);
|
||||
}
|
||||
out->cr();
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // #ifdef ASSERT
|
||||
|
||||
void PSParallelCompact::compact() {
|
||||
// trace("5");
|
||||
TraceTime tm("compaction phase", print_phases(), true, gclog_or_tty);
|
||||
|
@ -2666,6 +2723,8 @@ void PSParallelCompact::compact() {
|
|||
update_deferred_objects(cm, SpaceId(id));
|
||||
}
|
||||
}
|
||||
|
||||
DEBUG_ONLY(write_block_fill_histogram(gclog_or_tty));
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
|
@ -3130,6 +3189,57 @@ void PSParallelCompact::fill_region(ParCompactionManager* cm, size_t region_idx)
|
|||
} while (true);
|
||||
}
|
||||
|
||||
void PSParallelCompact::fill_blocks(size_t region_idx)
|
||||
{
|
||||
// Fill in the block table elements for the specified region. Each block
|
||||
// table element holds the number of live words in the region that are to the
|
||||
// left of the first object that starts in the block. Thus only blocks in
|
||||
// which an object starts need to be filled.
|
||||
//
|
||||
// The algorithm scans the section of the bitmap that corresponds to the
|
||||
// region, keeping a running total of the live words. When an object start is
|
||||
// found, if it's the first to start in the block that contains it, the
|
||||
// current total is written to the block table element.
|
||||
const size_t Log2BlockSize = ParallelCompactData::Log2BlockSize;
|
||||
const size_t Log2RegionSize = ParallelCompactData::Log2RegionSize;
|
||||
const size_t RegionSize = ParallelCompactData::RegionSize;
|
||||
|
||||
ParallelCompactData& sd = summary_data();
|
||||
const size_t partial_obj_size = sd.region(region_idx)->partial_obj_size();
|
||||
if (partial_obj_size >= RegionSize) {
|
||||
return; // No objects start in this region.
|
||||
}
|
||||
|
||||
// Ensure the first loop iteration decides that the block has changed.
|
||||
size_t cur_block = sd.block_count();
|
||||
|
||||
const ParMarkBitMap* const bitmap = mark_bitmap();
|
||||
|
||||
const size_t Log2BitsPerBlock = Log2BlockSize - LogMinObjAlignment;
|
||||
assert((size_t)1 << Log2BitsPerBlock ==
|
||||
bitmap->words_to_bits(ParallelCompactData::BlockSize), "sanity");
|
||||
|
||||
size_t beg_bit = bitmap->words_to_bits(region_idx << Log2RegionSize);
|
||||
const size_t range_end = beg_bit + bitmap->words_to_bits(RegionSize);
|
||||
size_t live_bits = bitmap->words_to_bits(partial_obj_size);
|
||||
beg_bit = bitmap->find_obj_beg(beg_bit + live_bits, range_end);
|
||||
while (beg_bit < range_end) {
|
||||
const size_t new_block = beg_bit >> Log2BitsPerBlock;
|
||||
if (new_block != cur_block) {
|
||||
cur_block = new_block;
|
||||
sd.block(cur_block)->set_offset(bitmap->bits_to_words(live_bits));
|
||||
}
|
||||
|
||||
const size_t end_bit = bitmap->find_obj_end(beg_bit, range_end);
|
||||
if (end_bit < range_end - 1) {
|
||||
live_bits += end_bit - beg_bit + 1;
|
||||
beg_bit = bitmap->find_obj_beg(end_bit + 1, range_end);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
PSParallelCompact::move_and_update(ParCompactionManager* cm, SpaceId space_id) {
|
||||
const MutableSpace* sp = space(space_id);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue