mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-25 22:04:51 +02:00
7158800: Improve storage of symbol tables
Use an alternate version of hashing algorithm for symbol string tables and after a certain bucket size to improve performance Reviewed-by: pbk, kamg, dlong, kvn, fparain
This commit is contained in:
parent
7c17e9e75b
commit
55b09e7016
17 changed files with 32031 additions and 55 deletions
|
@ -23,6 +23,7 @@
|
|||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "classfile/altHashing.hpp"
|
||||
#include "classfile/javaClasses.hpp"
|
||||
#include "classfile/symbolTable.hpp"
|
||||
#include "classfile/systemDictionary.hpp"
|
||||
|
@ -34,12 +35,15 @@
|
|||
#include "oops/oop.inline2.hpp"
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "utilities/hashtable.inline.hpp"
|
||||
#include "utilities/numberSeq.hpp"
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
SymbolTable* SymbolTable::_the_table = NULL;
|
||||
// Static arena for symbols that are not deallocated
|
||||
Arena* SymbolTable::_arena = NULL;
|
||||
bool SymbolTable::_needs_rehashing = false;
|
||||
jint SymbolTable::_seed = 0;
|
||||
|
||||
Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
|
||||
// Don't allow symbols to be created which cannot fit in a Symbol*.
|
||||
|
@ -121,12 +125,41 @@ void SymbolTable::unlink() {
|
|||
}
|
||||
}
|
||||
|
||||
unsigned int SymbolTable::new_hash(Symbol* sym) {
|
||||
ResourceMark rm;
|
||||
// Use alternate hashing algorithm on this symbol.
|
||||
return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length());
|
||||
}
|
||||
|
||||
// Create a new table and using alternate hash code, populate the new table
|
||||
// with the existing strings. Set flag to use the alternate hash code afterwards.
|
||||
void SymbolTable::rehash_table() {
|
||||
assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
|
||||
assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump");
|
||||
// Create a new symbol table
|
||||
SymbolTable* new_table = new SymbolTable();
|
||||
|
||||
// Initialize the global seed for hashing.
|
||||
_seed = AltHashing::compute_seed();
|
||||
assert(seed() != 0, "shouldn't be zero");
|
||||
|
||||
the_table()->move_to(new_table);
|
||||
|
||||
// Delete the table and buckets (entries are reused in new table).
|
||||
delete _the_table;
|
||||
// Don't check if we need rehashing until the table gets unbalanced again.
|
||||
// Then rehash with a new global seed.
|
||||
_needs_rehashing = false;
|
||||
_the_table = new_table;
|
||||
}
|
||||
|
||||
// Lookup a symbol in a bucket.
|
||||
|
||||
Symbol* SymbolTable::lookup(int index, const char* name,
|
||||
int len, unsigned int hash) {
|
||||
int count = 0;
|
||||
for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) {
|
||||
count++; // count all entries in this bucket, not just ones with same hash
|
||||
if (e->hash() == hash) {
|
||||
Symbol* sym = e->literal();
|
||||
if (sym->equals(name, len)) {
|
||||
|
@ -136,9 +169,21 @@ Symbol* SymbolTable::lookup(int index, const char* name,
|
|||
}
|
||||
}
|
||||
}
|
||||
// If the bucket size is too deep check if this hash code is insufficient.
|
||||
if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
|
||||
_needs_rehashing = check_rehash_table(count);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Pick hashing algorithm, but return value already given if not using a new
|
||||
// hash algorithm.
|
||||
unsigned int SymbolTable::hash_symbol(const char* s, int len, unsigned int hashValue) {
|
||||
return use_alternate_hashcode() ?
|
||||
AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
|
||||
(hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len));
|
||||
}
|
||||
|
||||
|
||||
// We take care not to be blocking while holding the
|
||||
// SymbolTable_lock. Otherwise, the system might deadlock, since the
|
||||
|
@ -287,13 +332,17 @@ Symbol* SymbolTable::new_permanent_symbol(const char* name, TRAPS) {
|
|||
}
|
||||
|
||||
Symbol* SymbolTable::basic_add(int index, u1 *name, int len,
|
||||
unsigned int hashValue, bool c_heap, TRAPS) {
|
||||
unsigned int hashValue_arg, bool c_heap, TRAPS) {
|
||||
assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
|
||||
"proposed name of symbol must be stable");
|
||||
|
||||
// Grab SymbolTable_lock first.
|
||||
MutexLocker ml(SymbolTable_lock, THREAD);
|
||||
|
||||
// Check if the symbol table has been rehashed, if so, need to recalculate
|
||||
// the hash value.
|
||||
unsigned int hashValue = hash_symbol((const char*)name, len, hashValue_arg);
|
||||
|
||||
// Since look-up was done lock-free, we need to check if another
|
||||
// thread beat us in the race to insert the symbol.
|
||||
Symbol* test = lookup(index, (char*)name, len, hashValue);
|
||||
|
@ -332,10 +381,13 @@ bool SymbolTable::basic_add(Handle class_loader, constantPoolHandle cp,
|
|||
MutexLocker ml(SymbolTable_lock, THREAD);
|
||||
|
||||
for (int i=0; i<names_count; i++) {
|
||||
// Check if the symbol table has been rehashed, if so, need to recalculate
|
||||
// the hash value.
|
||||
unsigned int hashValue = hash_symbol(names[i], lengths[i], hashValues[i]);
|
||||
// Since look-up was done lock-free, we need to check if another
|
||||
// thread beat us in the race to insert the symbol.
|
||||
int index = hash_to_index(hashValues[i]);
|
||||
Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]);
|
||||
int index = hash_to_index(hashValue);
|
||||
Symbol* test = lookup(index, names[i], lengths[i], hashValue);
|
||||
if (test != NULL) {
|
||||
// A race occurred and another thread introduced the symbol, this one
|
||||
// will be dropped and collected. Use test instead.
|
||||
|
@ -347,7 +399,7 @@ bool SymbolTable::basic_add(Handle class_loader, constantPoolHandle cp,
|
|||
bool c_heap = class_loader() != NULL;
|
||||
Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
|
||||
assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized"); // why wouldn't it be???
|
||||
HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym);
|
||||
HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
|
||||
add_entry(index, entry);
|
||||
cp->symbol_at_put(cp_indices[i], sym);
|
||||
}
|
||||
|
@ -370,6 +422,24 @@ void SymbolTable::verify() {
|
|||
}
|
||||
}
|
||||
|
||||
void SymbolTable::dump(outputStream* st) {
|
||||
NumberSeq summary;
|
||||
for (int i = 0; i < the_table()->table_size(); ++i) {
|
||||
int count = 0;
|
||||
for (HashtableEntry<Symbol*>* e = the_table()->bucket(i);
|
||||
e != NULL; e = e->next()) {
|
||||
count++;
|
||||
}
|
||||
summary.add((double)count);
|
||||
}
|
||||
st->print_cr("SymbolTable statistics:");
|
||||
st->print_cr("Number of buckets : %7d", summary.num());
|
||||
st->print_cr("Average bucket size : %7.0f", summary.avg());
|
||||
st->print_cr("Variance of bucket size : %7.0f", summary.variance());
|
||||
st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
|
||||
st->print_cr("Maximum bucket size : %7.0f", summary.maximum());
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
// Non-product code
|
||||
|
@ -468,7 +538,6 @@ void SymbolTable::print() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // PRODUCT
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
|
@ -514,21 +583,36 @@ class StableMemoryChecker : public StackObj {
|
|||
// --------------------------------------------------------------------------
|
||||
StringTable* StringTable::_the_table = NULL;
|
||||
|
||||
bool StringTable::_needs_rehashing = false;
|
||||
jint StringTable::_seed = 0;
|
||||
|
||||
// Pick hashing algorithm
|
||||
unsigned int StringTable::hash_string(const jchar* s, int len, unsigned int hashValue) {
|
||||
return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
|
||||
(hashValue != 0 ? hashValue : java_lang_String::to_hash(s, len));
|
||||
}
|
||||
|
||||
oop StringTable::lookup(int index, jchar* name,
|
||||
int len, unsigned int hash) {
|
||||
int count = 0;
|
||||
for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) {
|
||||
count++;
|
||||
if (l->hash() == hash) {
|
||||
if (java_lang_String::equals(l->literal(), name, len)) {
|
||||
return l->literal();
|
||||
}
|
||||
}
|
||||
}
|
||||
// If the bucket size is too deep check if this hash code is insufficient.
|
||||
if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
|
||||
_needs_rehashing = check_rehash_table(count);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
oop StringTable::basic_add(int index, Handle string_or_null, jchar* name,
|
||||
int len, unsigned int hashValue, TRAPS) {
|
||||
int len, unsigned int hashValue_arg, TRAPS) {
|
||||
debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
|
||||
assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
|
||||
"proposed name of symbol must be stable");
|
||||
|
@ -547,6 +631,10 @@ oop StringTable::basic_add(int index, Handle string_or_null, jchar* name,
|
|||
assert(java_lang_String::equals(string(), name, len),
|
||||
"string must be properly initialized");
|
||||
|
||||
// Check if the symbol table has been rehashed, if so, need to recalculate
|
||||
// the hash value before second lookup.
|
||||
unsigned int hashValue = hash_string(name, len, hashValue_arg);
|
||||
|
||||
// Since look-up was done lock-free, we need to check if another
|
||||
// thread beat us in the race to insert the symbol.
|
||||
|
||||
|
@ -566,7 +654,7 @@ oop StringTable::lookup(Symbol* symbol) {
|
|||
ResourceMark rm;
|
||||
int length;
|
||||
jchar* chars = symbol->as_unicode(length);
|
||||
unsigned int hashValue = java_lang_String::hash_string(chars, length);
|
||||
unsigned int hashValue = hash_string(chars, length);
|
||||
int index = the_table()->hash_to_index(hashValue);
|
||||
return the_table()->lookup(index, chars, length, hashValue);
|
||||
}
|
||||
|
@ -574,7 +662,7 @@ oop StringTable::lookup(Symbol* symbol) {
|
|||
|
||||
oop StringTable::intern(Handle string_or_null, jchar* name,
|
||||
int len, TRAPS) {
|
||||
unsigned int hashValue = java_lang_String::hash_string(name, len);
|
||||
unsigned int hashValue = hash_string(name, len);
|
||||
int index = the_table()->hash_to_index(hashValue);
|
||||
oop string = the_table()->lookup(index, name, len, hashValue);
|
||||
|
||||
|
@ -675,3 +763,52 @@ void StringTable::verify() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StringTable::dump(outputStream* st) {
|
||||
NumberSeq summary;
|
||||
for (int i = 0; i < the_table()->table_size(); ++i) {
|
||||
HashtableEntry<oop>* p = the_table()->bucket(i);
|
||||
int count = 0;
|
||||
for ( ; p != NULL; p = p->next()) {
|
||||
count++;
|
||||
}
|
||||
summary.add((double)count);
|
||||
}
|
||||
st->print_cr("StringTable statistics:");
|
||||
st->print_cr("Number of buckets : %7d", summary.num());
|
||||
st->print_cr("Average bucket size : %7.0f", summary.avg());
|
||||
st->print_cr("Variance of bucket size : %7.0f", summary.variance());
|
||||
st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
|
||||
st->print_cr("Maximum bucket size : %7.0f", summary.maximum());
|
||||
}
|
||||
|
||||
|
||||
unsigned int StringTable::new_hash(oop string) {
|
||||
ResourceMark rm;
|
||||
int length;
|
||||
jchar* chars = java_lang_String::as_unicode_string(string, length);
|
||||
// Use alternate hashing algorithm on the string
|
||||
return AltHashing::murmur3_32(seed(), chars, length);
|
||||
}
|
||||
|
||||
// Create a new table and using alternate hash code, populate the new table
|
||||
// with the existing strings. Set flag to use the alternate hash code afterwards.
|
||||
void StringTable::rehash_table() {
|
||||
assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
|
||||
assert(!DumpSharedSpaces, "this should never happen with -Xshare:dump");
|
||||
StringTable* new_table = new StringTable();
|
||||
|
||||
// Initialize new global seed for hashing.
|
||||
_seed = AltHashing::compute_seed();
|
||||
assert(seed() != 0, "shouldn't be zero");
|
||||
|
||||
// Rehash the table
|
||||
the_table()->move_to(new_table);
|
||||
|
||||
// Delete the table and buckets (entries are reused in new table).
|
||||
delete _the_table;
|
||||
// Don't check if we need rehashing until the table gets unbalanced again.
|
||||
// Then rehash with a new global seed.
|
||||
_needs_rehashing = false;
|
||||
_the_table = new_table;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue