mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 15:24:43 +02:00
8035815: Cache-align and pad the from card cache
The from card cache is a very frequently accessed data structure. It is essentially a 2d array of per-region values, one row of values for every GC thread. Pad and align the data structure to avoid false sharing. Reviewed-by: stefank
This commit is contained in:
parent
329e55e7b0
commit
cde8aa670b
4 changed files with 71 additions and 28 deletions
|
@ -29,6 +29,7 @@
|
||||||
#include "gc_implementation/g1/heapRegionRemSet.hpp"
|
#include "gc_implementation/g1/heapRegionRemSet.hpp"
|
||||||
#include "gc_implementation/g1/heapRegionSeq.inline.hpp"
|
#include "gc_implementation/g1/heapRegionSeq.inline.hpp"
|
||||||
#include "memory/allocation.hpp"
|
#include "memory/allocation.hpp"
|
||||||
|
#include "memory/padded.inline.hpp"
|
||||||
#include "memory/space.inline.hpp"
|
#include "memory/space.inline.hpp"
|
||||||
#include "oops/oop.inline.hpp"
|
#include "oops/oop.inline.hpp"
|
||||||
#include "utilities/bitMap.inline.hpp"
|
#include "utilities/bitMap.inline.hpp"
|
||||||
|
@ -358,27 +359,29 @@ void OtherRegionsTable::unlink_from_all(PerRegionTable* prt) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int** OtherRegionsTable::_from_card_cache = NULL;
|
int** OtherRegionsTable::_from_card_cache = NULL;
|
||||||
size_t OtherRegionsTable::_from_card_cache_max_regions = 0;
|
uint OtherRegionsTable::_from_card_cache_max_regions = 0;
|
||||||
size_t OtherRegionsTable::_from_card_cache_mem_size = 0;
|
size_t OtherRegionsTable::_from_card_cache_mem_size = 0;
|
||||||
|
|
||||||
void OtherRegionsTable::init_from_card_cache(size_t max_regions) {
|
void OtherRegionsTable::init_from_card_cache(uint max_regions) {
|
||||||
_from_card_cache_max_regions = max_regions;
|
guarantee(_from_card_cache == NULL, "Should not call this multiple times");
|
||||||
|
uint n_par_rs = HeapRegionRemSet::num_par_rem_sets();
|
||||||
|
|
||||||
int n_par_rs = HeapRegionRemSet::num_par_rem_sets();
|
_from_card_cache_max_regions = max_regions;
|
||||||
_from_card_cache = NEW_C_HEAP_ARRAY(int*, n_par_rs, mtGC);
|
_from_card_cache = Padded2DArray<int, mtGC>::create_unfreeable(n_par_rs,
|
||||||
for (int i = 0; i < n_par_rs; i++) {
|
_from_card_cache_max_regions,
|
||||||
_from_card_cache[i] = NEW_C_HEAP_ARRAY(int, max_regions, mtGC);
|
&_from_card_cache_mem_size);
|
||||||
for (size_t j = 0; j < max_regions; j++) {
|
|
||||||
|
for (uint i = 0; i < n_par_rs; i++) {
|
||||||
|
for (uint j = 0; j < _from_card_cache_max_regions; j++) {
|
||||||
_from_card_cache[i][j] = -1; // An invalid value.
|
_from_card_cache[i][j] = -1; // An invalid value.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_from_card_cache_mem_size = n_par_rs * max_regions * sizeof(int);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) {
|
void OtherRegionsTable::shrink_from_card_cache(uint new_n_regs) {
|
||||||
for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
|
for (uint i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
|
||||||
assert(new_n_regs <= _from_card_cache_max_regions, "Must be within max.");
|
assert(new_n_regs <= _from_card_cache_max_regions, "Must be within max.");
|
||||||
for (size_t j = new_n_regs; j < _from_card_cache_max_regions; j++) {
|
for (uint j = new_n_regs; j < _from_card_cache_max_regions; j++) {
|
||||||
_from_card_cache[i][j] = -1; // An invalid value.
|
_from_card_cache[i][j] = -1; // An invalid value.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -386,8 +389,8 @@ void OtherRegionsTable::shrink_from_card_cache(size_t new_n_regs) {
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
void OtherRegionsTable::print_from_card_cache() {
|
void OtherRegionsTable::print_from_card_cache() {
|
||||||
for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
|
for (uint i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
|
||||||
for (size_t j = 0; j < _from_card_cache_max_regions; j++) {
|
for (uint j = 0; j < _from_card_cache_max_regions; j++) {
|
||||||
gclog_or_tty->print_cr("_from_card_cache[%d][%d] = %d.",
|
gclog_or_tty->print_cr("_from_card_cache[%d][%d] = %d.",
|
||||||
i, j, _from_card_cache[i][j]);
|
i, j, _from_card_cache[i][j]);
|
||||||
}
|
}
|
||||||
|
@ -727,8 +730,8 @@ size_t OtherRegionsTable::fl_mem_size() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void OtherRegionsTable::clear_fcc() {
|
void OtherRegionsTable::clear_fcc() {
|
||||||
size_t hrs_idx = hr()->hrs_index();
|
uint hrs_idx = hr()->hrs_index();
|
||||||
for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
|
for (uint i = 0; i < HeapRegionRemSet::num_par_rem_sets(); i++) {
|
||||||
_from_card_cache[i][hrs_idx] = -1;
|
_from_card_cache[i][hrs_idx] = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -762,8 +765,8 @@ void OtherRegionsTable::clear_incoming_entry(HeapRegion* from_hr) {
|
||||||
_coarse_map.par_at_put(hrs_ind, 0);
|
_coarse_map.par_at_put(hrs_ind, 0);
|
||||||
}
|
}
|
||||||
// Check to see if any of the fcc entries come from here.
|
// Check to see if any of the fcc entries come from here.
|
||||||
size_t hr_ind = (size_t) hr()->hrs_index();
|
uint hr_ind = hr()->hrs_index();
|
||||||
for (int tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) {
|
for (uint tid = 0; tid < HeapRegionRemSet::num_par_rem_sets(); tid++) {
|
||||||
int fcc_ent = _from_card_cache[tid][hr_ind];
|
int fcc_ent = _from_card_cache[tid][hr_ind];
|
||||||
if (fcc_ent != -1) {
|
if (fcc_ent != -1) {
|
||||||
HeapWord* card_addr = (HeapWord*)
|
HeapWord* card_addr = (HeapWord*)
|
||||||
|
@ -838,8 +841,8 @@ OtherRegionsTable::do_cleanup_work(HRRSCleanupTask* hrrs_cleanup_task) {
|
||||||
// Determines how many threads can add records to an rset in parallel.
|
// Determines how many threads can add records to an rset in parallel.
|
||||||
// This can be done by either mutator threads together with the
|
// This can be done by either mutator threads together with the
|
||||||
// concurrent refinement threads or GC threads.
|
// concurrent refinement threads or GC threads.
|
||||||
int HeapRegionRemSet::num_par_rem_sets() {
|
uint HeapRegionRemSet::num_par_rem_sets() {
|
||||||
return (int)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads);
|
return (uint)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads);
|
||||||
}
|
}
|
||||||
|
|
||||||
HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
|
HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
|
||||||
|
|
|
@ -121,7 +121,7 @@ class OtherRegionsTable VALUE_OBJ_CLASS_SPEC {
|
||||||
|
|
||||||
// Indexed by thread X heap region, to minimize thread contention.
|
// Indexed by thread X heap region, to minimize thread contention.
|
||||||
static int** _from_card_cache;
|
static int** _from_card_cache;
|
||||||
static size_t _from_card_cache_max_regions;
|
static uint _from_card_cache_max_regions;
|
||||||
static size_t _from_card_cache_mem_size;
|
static size_t _from_card_cache_mem_size;
|
||||||
|
|
||||||
// link/add the given fine grain remembered set into the "all" list
|
// link/add the given fine grain remembered set into the "all" list
|
||||||
|
@ -170,11 +170,11 @@ public:
|
||||||
|
|
||||||
// Declare the heap size (in # of regions) to the OtherRegionsTable.
|
// Declare the heap size (in # of regions) to the OtherRegionsTable.
|
||||||
// (Uses it to initialize from_card_cache).
|
// (Uses it to initialize from_card_cache).
|
||||||
static void init_from_card_cache(size_t max_regions);
|
static void init_from_card_cache(uint max_regions);
|
||||||
|
|
||||||
// Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
|
// Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
|
||||||
// Make sure any entries for higher regions are invalid.
|
// Make sure any entries for higher regions are invalid.
|
||||||
static void shrink_from_card_cache(size_t new_n_regs);
|
static void shrink_from_card_cache(uint new_n_regs);
|
||||||
|
|
||||||
static void print_from_card_cache();
|
static void print_from_card_cache();
|
||||||
};
|
};
|
||||||
|
@ -222,7 +222,7 @@ private:
|
||||||
public:
|
public:
|
||||||
HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, HeapRegion* hr);
|
HeapRegionRemSet(G1BlockOffsetSharedArray* bosa, HeapRegion* hr);
|
||||||
|
|
||||||
static int num_par_rem_sets();
|
static uint num_par_rem_sets();
|
||||||
static void setup_remset_size();
|
static void setup_remset_size();
|
||||||
|
|
||||||
HeapRegion* hr() const {
|
HeapRegion* hr() const {
|
||||||
|
@ -358,12 +358,12 @@ public:
|
||||||
// (Uses it to initialize from_card_cache).
|
// (Uses it to initialize from_card_cache).
|
||||||
static void init_heap(uint max_regions) {
|
static void init_heap(uint max_regions) {
|
||||||
G1CodeRootSet::initialize();
|
G1CodeRootSet::initialize();
|
||||||
OtherRegionsTable::init_from_card_cache((size_t) max_regions);
|
OtherRegionsTable::init_from_card_cache(max_regions);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
|
// Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
|
||||||
static void shrink_heap(uint new_n_regs) {
|
static void shrink_heap(uint new_n_regs) {
|
||||||
OtherRegionsTable::shrink_from_card_cache((size_t) new_n_regs);
|
OtherRegionsTable::shrink_from_card_cache(new_n_regs);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -90,4 +90,15 @@ class PaddedArray {
|
||||||
static PaddedEnd<T>* create_unfreeable(uint length);
|
static PaddedEnd<T>* create_unfreeable(uint length);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Helper class to create an array of references to arrays of primitive types
|
||||||
|
// Both the array of references and the data arrays are aligned to the given
|
||||||
|
// alignment. The allocated memory is zero-filled.
|
||||||
|
template <class T, MEMFLAGS flags, size_t alignment = DEFAULT_CACHE_LINE_SIZE>
|
||||||
|
class Padded2DArray {
|
||||||
|
public:
|
||||||
|
// Creates an aligned padded 2D array.
|
||||||
|
// The memory cannot be deleted since the raw memory chunk is not returned.
|
||||||
|
static T** create_unfreeable(uint rows, uint columns, size_t* allocation_size = NULL);
|
||||||
|
};
|
||||||
|
|
||||||
#endif // SHARE_VM_MEMORY_PADDED_HPP
|
#endif // SHARE_VM_MEMORY_PADDED_HPP
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2013, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
@ -47,3 +47,32 @@ PaddedEnd<T>* PaddedArray<T, flags, alignment>::create_unfreeable(uint length) {
|
||||||
|
|
||||||
return aligned_padded_array;
|
return aligned_padded_array;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class T, MEMFLAGS flags, size_t alignment>
|
||||||
|
T** Padded2DArray<T, flags, alignment>::create_unfreeable(uint rows, uint columns, size_t* allocation_size) {
|
||||||
|
// Calculate and align the size of the first dimension's table.
|
||||||
|
size_t table_size = align_size_up_(rows * sizeof(T*), alignment);
|
||||||
|
// The size of the separate rows.
|
||||||
|
size_t row_size = align_size_up_(columns * sizeof(T), alignment);
|
||||||
|
// Total size consists of the indirection table plus the rows.
|
||||||
|
size_t total_size = table_size + rows * row_size + alignment;
|
||||||
|
|
||||||
|
// Allocate a chunk of memory large enough to allow alignment of the chunk.
|
||||||
|
void* chunk = AllocateHeap(total_size, flags);
|
||||||
|
// Clear the allocated memory.
|
||||||
|
memset(chunk, 0, total_size);
|
||||||
|
// Align the chunk of memory.
|
||||||
|
T** result = (T**)align_pointer_up(chunk, alignment);
|
||||||
|
void* data_start = (void*)((uintptr_t)result + table_size);
|
||||||
|
|
||||||
|
// Fill in the row table.
|
||||||
|
for (size_t i = 0; i < rows; i++) {
|
||||||
|
result[i] = (T*)((uintptr_t)data_start + i * row_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (allocation_size != NULL) {
|
||||||
|
*allocation_size = total_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue