mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-18 10:04:42 +02:00
6954029: Improve implicit null check generation with compressed oops
Hoist DecodeN instruction above null check Reviewed-by: never, twisti
This commit is contained in:
parent
be95b163a6
commit
a3005a16fc
9 changed files with 119 additions and 21 deletions
|
@ -1760,6 +1760,12 @@ const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
||||||
// registers? True for Intel but false for most RISCs
|
// registers? True for Intel but false for most RISCs
|
||||||
const bool Matcher::clone_shift_expressions = false;
|
const bool Matcher::clone_shift_expressions = false;
|
||||||
|
|
||||||
|
bool Matcher::narrow_oop_use_complex_address() {
|
||||||
|
NOT_LP64(ShouldNotCallThis());
|
||||||
|
assert(UseCompressedOops, "only for compressed oops code");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Is it better to copy float constants, or load them directly from memory?
|
// Is it better to copy float constants, or load them directly from memory?
|
||||||
// Intel can load a float constant from a direct address, requiring no
|
// Intel can load a float constant from a direct address, requiring no
|
||||||
// extra registers. Most RISCs will have to materialize an address into a
|
// extra registers. Most RISCs will have to materialize an address into a
|
||||||
|
|
|
@ -65,13 +65,6 @@ void VM_Version::initialize() {
|
||||||
FLAG_SET_DEFAULT(UseInlineCaches, false);
|
FLAG_SET_DEFAULT(UseInlineCaches, false);
|
||||||
}
|
}
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
// Single issue niagara1 is slower for CompressedOops
|
|
||||||
// but niagaras after that it's fine.
|
|
||||||
if (!is_niagara1_plus()) {
|
|
||||||
if (FLAG_IS_DEFAULT(UseCompressedOops)) {
|
|
||||||
FLAG_SET_ERGO(bool, UseCompressedOops, false);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// 32-bit oops don't make sense for the 64-bit VM on sparc
|
// 32-bit oops don't make sense for the 64-bit VM on sparc
|
||||||
// since the 32-bit VM has the same registers and smaller objects.
|
// since the 32-bit VM has the same registers and smaller objects.
|
||||||
Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
|
Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
|
||||||
|
|
|
@ -1377,6 +1377,12 @@ const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
||||||
// registers? True for Intel but false for most RISCs
|
// registers? True for Intel but false for most RISCs
|
||||||
const bool Matcher::clone_shift_expressions = true;
|
const bool Matcher::clone_shift_expressions = true;
|
||||||
|
|
||||||
|
bool Matcher::narrow_oop_use_complex_address() {
|
||||||
|
ShouldNotCallThis();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Is it better to copy float constants, or load them directly from memory?
|
// Is it better to copy float constants, or load them directly from memory?
|
||||||
// Intel can load a float constant from a direct address, requiring no
|
// Intel can load a float constant from a direct address, requiring no
|
||||||
// extra registers. Most RISCs will have to materialize an address into a
|
// extra registers. Most RISCs will have to materialize an address into a
|
||||||
|
|
|
@ -2037,6 +2037,11 @@ const int Matcher::init_array_short_size = 8 * BytesPerLong;
|
||||||
// into registers? True for Intel but false for most RISCs
|
// into registers? True for Intel but false for most RISCs
|
||||||
const bool Matcher::clone_shift_expressions = true;
|
const bool Matcher::clone_shift_expressions = true;
|
||||||
|
|
||||||
|
bool Matcher::narrow_oop_use_complex_address() {
|
||||||
|
assert(UseCompressedOops, "only for compressed oops code");
|
||||||
|
return (LogMinObjAlignmentInBytes <= 3);
|
||||||
|
}
|
||||||
|
|
||||||
// Is it better to copy float constants, or load them directly from
|
// Is it better to copy float constants, or load them directly from
|
||||||
// memory? Intel can load a float constant from a direct address,
|
// memory? Intel can load a float constant from a direct address,
|
||||||
// requiring no extra registers. Most RISCs will have to materialize
|
// requiring no extra registers. Most RISCs will have to materialize
|
||||||
|
|
|
@ -2176,14 +2176,14 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
|
||||||
|
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
case Op_CastPP:
|
case Op_CastPP:
|
||||||
if (n->in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks()) {
|
if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {
|
||||||
Compile* C = Compile::current();
|
Compile* C = Compile::current();
|
||||||
Node* in1 = n->in(1);
|
Node* in1 = n->in(1);
|
||||||
const Type* t = n->bottom_type();
|
const Type* t = n->bottom_type();
|
||||||
Node* new_in1 = in1->clone();
|
Node* new_in1 = in1->clone();
|
||||||
new_in1->as_DecodeN()->set_type(t);
|
new_in1->as_DecodeN()->set_type(t);
|
||||||
|
|
||||||
if (!Matcher::clone_shift_expressions) {
|
if (!Matcher::narrow_oop_use_complex_address()) {
|
||||||
//
|
//
|
||||||
// x86, ARM and friends can handle 2 adds in addressing mode
|
// x86, ARM and friends can handle 2 adds in addressing mode
|
||||||
// and Matcher can fold a DecodeN node into address by using
|
// and Matcher can fold a DecodeN node into address by using
|
||||||
|
@ -2231,7 +2231,11 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
|
||||||
new_in2 = in2->in(1);
|
new_in2 = in2->in(1);
|
||||||
} else if (in2->Opcode() == Op_ConP) {
|
} else if (in2->Opcode() == Op_ConP) {
|
||||||
const Type* t = in2->bottom_type();
|
const Type* t = in2->bottom_type();
|
||||||
if (t == TypePtr::NULL_PTR && Universe::narrow_oop_use_implicit_null_checks()) {
|
if (t == TypePtr::NULL_PTR) {
|
||||||
|
// Don't convert CmpP null check into CmpN if compressed
|
||||||
|
// oops implicit null check is not generated.
|
||||||
|
// This will allow to generate normal oop implicit null check.
|
||||||
|
if (Matcher::gen_narrow_oop_implicit_null_checks())
|
||||||
new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
|
new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
|
||||||
//
|
//
|
||||||
// This transformation together with CastPP transformation above
|
// This transformation together with CastPP transformation above
|
||||||
|
@ -2289,9 +2293,9 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
|
||||||
|
|
||||||
case Op_DecodeN:
|
case Op_DecodeN:
|
||||||
assert(!n->in(1)->is_EncodeP(), "should be optimized out");
|
assert(!n->in(1)->is_EncodeP(), "should be optimized out");
|
||||||
// DecodeN could be pinned on Sparc where it can't be fold into
|
// DecodeN could be pinned when it can't be fold into
|
||||||
// an address expression, see the code for Op_CastPP above.
|
// an address expression, see the code for Op_CastPP above.
|
||||||
assert(n->in(0) == NULL || !Matcher::clone_shift_expressions, "no control except on sparc");
|
assert(n->in(0) == NULL || !Matcher::narrow_oop_use_complex_address(), "no control");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Op_EncodeP: {
|
case Op_EncodeP: {
|
||||||
|
@ -2496,6 +2500,10 @@ static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Re
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Skip next transformation if compressed oops are not used.
|
||||||
|
if (!UseCompressedOops || !Matcher::gen_narrow_oop_implicit_null_checks())
|
||||||
|
return;
|
||||||
|
|
||||||
// Go over safepoints nodes to skip DecodeN nodes for debug edges.
|
// Go over safepoints nodes to skip DecodeN nodes for debug edges.
|
||||||
// It could be done for an uncommon traps or any safepoints/calls
|
// It could be done for an uncommon traps or any safepoints/calls
|
||||||
// if the DecodeN node is referenced only in a debug info.
|
// if the DecodeN node is referenced only in a debug info.
|
||||||
|
|
|
@ -437,7 +437,7 @@ Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
|
||||||
// If not converting int->oop, throw away cast after constant propagation
|
// If not converting int->oop, throw away cast after constant propagation
|
||||||
Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
|
Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
|
||||||
const Type *t = ccp->type(in(1));
|
const Type *t = ccp->type(in(1));
|
||||||
if (!t->isa_oop_ptr() || (in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks())) {
|
if (!t->isa_oop_ptr() || (in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks())) {
|
||||||
return NULL; // do not transform raw pointers or narrow oops
|
return NULL; // do not transform raw pointers or narrow oops
|
||||||
}
|
}
|
||||||
return ConstraintCastNode::Ideal_DU_postCCP(ccp);
|
return ConstraintCastNode::Ideal_DU_postCCP(ccp);
|
||||||
|
|
|
@ -32,7 +32,8 @@
|
||||||
// with suitable memory ops nearby. Use the memory op to do the NULL check.
|
// with suitable memory ops nearby. Use the memory op to do the NULL check.
|
||||||
// I can generate a memory op if there is not one nearby.
|
// I can generate a memory op if there is not one nearby.
|
||||||
// The proj is the control projection for the not-null case.
|
// The proj is the control projection for the not-null case.
|
||||||
// The val is the pointer being checked for nullness.
|
// The val is the pointer being checked for nullness or
|
||||||
|
// decodeHeapOop_not_null node if it did not fold into address.
|
||||||
void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) {
|
void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) {
|
||||||
// Assume if null check need for 0 offset then always needed
|
// Assume if null check need for 0 offset then always needed
|
||||||
// Intel solaris doesn't support any null checks yet and no
|
// Intel solaris doesn't support any null checks yet and no
|
||||||
|
@ -96,6 +97,13 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for decodeHeapOop_not_null node which did not fold into address
|
||||||
|
bool is_decoden = ((intptr_t)val) & 1;
|
||||||
|
val = (Node*)(((intptr_t)val) & ~1);
|
||||||
|
|
||||||
|
assert(!is_decoden || (val->in(0) == NULL) && val->is_Mach() &&
|
||||||
|
(val->as_Mach()->ideal_Opcode() == Op_DecodeN), "sanity");
|
||||||
|
|
||||||
// Search the successor block for a load or store who's base value is also
|
// Search the successor block for a load or store who's base value is also
|
||||||
// the tested value. There may be several.
|
// the tested value. There may be several.
|
||||||
Node_List *out = new Node_List(Thread::current()->resource_area());
|
Node_List *out = new Node_List(Thread::current()->resource_area());
|
||||||
|
@ -148,7 +156,8 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe
|
||||||
if( !mach->needs_anti_dependence_check() )
|
if( !mach->needs_anti_dependence_check() )
|
||||||
continue; // Not an memory op; skip it
|
continue; // Not an memory op; skip it
|
||||||
{
|
{
|
||||||
// Check that value is used in memory address.
|
// Check that value is used in memory address in
|
||||||
|
// instructions with embedded load (CmpP val1,(val2+off)).
|
||||||
Node* base;
|
Node* base;
|
||||||
Node* index;
|
Node* index;
|
||||||
const MachOper* oper = mach->memory_inputs(base, index);
|
const MachOper* oper = mach->memory_inputs(base, index);
|
||||||
|
@ -213,7 +222,11 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe
|
||||||
uint vidx = 0; // Capture index of value into memop
|
uint vidx = 0; // Capture index of value into memop
|
||||||
uint j;
|
uint j;
|
||||||
for( j = mach->req()-1; j > 0; j-- ) {
|
for( j = mach->req()-1; j > 0; j-- ) {
|
||||||
if( mach->in(j) == val ) vidx = j;
|
if( mach->in(j) == val ) {
|
||||||
|
vidx = j;
|
||||||
|
// Ignore DecodeN val which could be hoisted to where needed.
|
||||||
|
if( is_decoden ) continue;
|
||||||
|
}
|
||||||
// Block of memory-op input
|
// Block of memory-op input
|
||||||
Block *inb = cfg->_bbs[mach->in(j)->_idx];
|
Block *inb = cfg->_bbs[mach->in(j)->_idx];
|
||||||
Block *b = this; // Start from nul check
|
Block *b = this; // Start from nul check
|
||||||
|
@ -270,6 +283,26 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe
|
||||||
extern int implicit_null_checks;
|
extern int implicit_null_checks;
|
||||||
implicit_null_checks++;
|
implicit_null_checks++;
|
||||||
|
|
||||||
|
if( is_decoden ) {
|
||||||
|
// Check if we need to hoist decodeHeapOop_not_null first.
|
||||||
|
Block *valb = cfg->_bbs[val->_idx];
|
||||||
|
if( this != valb && this->_dom_depth < valb->_dom_depth ) {
|
||||||
|
// Hoist it up to the end of the test block.
|
||||||
|
valb->find_remove(val);
|
||||||
|
this->add_inst(val);
|
||||||
|
cfg->_bbs.map(val->_idx,this);
|
||||||
|
// DecodeN on x86 may kill flags. Check for flag-killing projections
|
||||||
|
// that also need to be hoisted.
|
||||||
|
for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {
|
||||||
|
Node* n = val->fast_out(j);
|
||||||
|
if( n->Opcode() == Op_MachProj ) {
|
||||||
|
cfg->_bbs[n->_idx]->find_remove(n);
|
||||||
|
this->add_inst(n);
|
||||||
|
cfg->_bbs.map(n->_idx,this);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
// Hoist the memory candidate up to the end of the test block.
|
// Hoist the memory candidate up to the end of the test block.
|
||||||
Block *old_block = cfg->_bbs[best->_idx];
|
Block *old_block = cfg->_bbs[best->_idx];
|
||||||
old_block->find_remove(best);
|
old_block->find_remove(best);
|
||||||
|
|
|
@ -1334,7 +1334,7 @@ static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool s
|
||||||
if( j == max_scan ) // No post-domination before scan end?
|
if( j == max_scan ) // No post-domination before scan end?
|
||||||
return true; // Then break the match tree up
|
return true; // Then break the match tree up
|
||||||
}
|
}
|
||||||
if (m->is_DecodeN() && Matcher::clone_shift_expressions) {
|
if (m->is_DecodeN() && Matcher::narrow_oop_use_complex_address()) {
|
||||||
// These are commonly used in address expressions and can
|
// These are commonly used in address expressions and can
|
||||||
// efficiently fold into them on X64 in some cases.
|
// efficiently fold into them on X64 in some cases.
|
||||||
return false;
|
return false;
|
||||||
|
@ -2110,8 +2110,8 @@ void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) {
|
||||||
_null_check_tests.push(proj);
|
_null_check_tests.push(proj);
|
||||||
Node* val = cmp->in(1);
|
Node* val = cmp->in(1);
|
||||||
#ifdef _LP64
|
#ifdef _LP64
|
||||||
if (UseCompressedOops && !Matcher::clone_shift_expressions &&
|
if (val->bottom_type()->isa_narrowoop() &&
|
||||||
val->bottom_type()->isa_narrowoop()) {
|
!Matcher::narrow_oop_use_complex_address()) {
|
||||||
//
|
//
|
||||||
// Look for DecodeN node which should be pinned to orig_proj.
|
// Look for DecodeN node which should be pinned to orig_proj.
|
||||||
// On platforms (Sparc) which can not handle 2 adds
|
// On platforms (Sparc) which can not handle 2 adds
|
||||||
|
@ -2127,6 +2127,9 @@ void Matcher::collect_null_checks( Node *proj, Node *orig_proj ) {
|
||||||
if (d->is_DecodeN() && d->in(1) == val) {
|
if (d->is_DecodeN() && d->in(1) == val) {
|
||||||
val = d;
|
val = d;
|
||||||
val->set_req(0, NULL); // Unpin now.
|
val->set_req(0, NULL); // Unpin now.
|
||||||
|
// Mark this as special case to distinguish from
|
||||||
|
// a regular case: CmpP(DecodeN, NULL).
|
||||||
|
val = (Node*)(((intptr_t)val) | 1);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2146,9 +2149,21 @@ void Matcher::validate_null_checks( ) {
|
||||||
for( uint i=0; i < cnt; i+=2 ) {
|
for( uint i=0; i < cnt; i+=2 ) {
|
||||||
Node *test = _null_check_tests[i];
|
Node *test = _null_check_tests[i];
|
||||||
Node *val = _null_check_tests[i+1];
|
Node *val = _null_check_tests[i+1];
|
||||||
|
bool is_decoden = ((intptr_t)val) & 1;
|
||||||
|
val = (Node*)(((intptr_t)val) & ~1);
|
||||||
if (has_new_node(val)) {
|
if (has_new_node(val)) {
|
||||||
|
Node* new_val = new_node(val);
|
||||||
|
if (is_decoden) {
|
||||||
|
assert(val->is_DecodeN() && val->in(0) == NULL, "sanity");
|
||||||
|
// Note: new_val may have a control edge if
|
||||||
|
// the original ideal node DecodeN was matched before
|
||||||
|
// it was unpinned in Matcher::collect_null_checks().
|
||||||
|
// Unpin the mach node and mark it.
|
||||||
|
new_val->set_req(0, NULL);
|
||||||
|
new_val = (Node*)(((intptr_t)new_val) | 1);
|
||||||
|
}
|
||||||
// Is a match-tree root, so replace with the matched value
|
// Is a match-tree root, so replace with the matched value
|
||||||
_null_check_tests.map(i+1, new_node(val));
|
_null_check_tests.map(i+1, new_val);
|
||||||
} else {
|
} else {
|
||||||
// Yank from candidate list
|
// Yank from candidate list
|
||||||
_null_check_tests.map(i+1,_null_check_tests[--cnt]);
|
_null_check_tests.map(i+1,_null_check_tests[--cnt]);
|
||||||
|
|
|
@ -352,6 +352,38 @@ public:
|
||||||
// registers? True for Intel but false for most RISCs
|
// registers? True for Intel but false for most RISCs
|
||||||
static const bool clone_shift_expressions;
|
static const bool clone_shift_expressions;
|
||||||
|
|
||||||
|
static bool narrow_oop_use_complex_address();
|
||||||
|
|
||||||
|
// Generate implicit null check for narrow oops if it can fold
|
||||||
|
// into address expression (x64).
|
||||||
|
//
|
||||||
|
// [R12 + narrow_oop_reg<<3 + offset] // fold into address expression
|
||||||
|
// NullCheck narrow_oop_reg
|
||||||
|
//
|
||||||
|
// When narrow oops can't fold into address expression (Sparc) and
|
||||||
|
// base is not null use decode_not_null and normal implicit null check.
|
||||||
|
// Note, decode_not_null node can be used here since it is referenced
|
||||||
|
// only on non null path but it requires special handling, see
|
||||||
|
// collect_null_checks():
|
||||||
|
//
|
||||||
|
// decode_not_null narrow_oop_reg, oop_reg // 'shift' and 'add base'
|
||||||
|
// [oop_reg + offset]
|
||||||
|
// NullCheck oop_reg
|
||||||
|
//
|
||||||
|
// With Zero base and when narrow oops can not fold into address
|
||||||
|
// expression use normal implicit null check since only shift
|
||||||
|
// is needed to decode narrow oop.
|
||||||
|
//
|
||||||
|
// decode narrow_oop_reg, oop_reg // only 'shift'
|
||||||
|
// [oop_reg + offset]
|
||||||
|
// NullCheck oop_reg
|
||||||
|
//
|
||||||
|
inline static bool gen_narrow_oop_implicit_null_checks() {
|
||||||
|
return Universe::narrow_oop_use_implicit_null_checks() &&
|
||||||
|
(narrow_oop_use_complex_address() ||
|
||||||
|
Universe::narrow_oop_base() != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
// Is it better to copy float constants, or load them directly from memory?
|
// Is it better to copy float constants, or load them directly from memory?
|
||||||
// Intel can load a float constant from a direct address, requiring no
|
// Intel can load a float constant from a direct address, requiring no
|
||||||
// extra registers. Most RISCs will have to materialize an address into a
|
// extra registers. Most RISCs will have to materialize an address into a
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue