mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-17 17:44:40 +02:00
8252372: Check if cloning is required to move loads out of loops in PhaseIdealLoop::split_if_with_blocks_post()
Reviewed-by: thartmann, kvn
This commit is contained in:
parent
039441689d
commit
9d305b9c06
14 changed files with 541 additions and 215 deletions
|
@ -8735,6 +8735,61 @@ instruct castLL(iRegL dst)
|
|||
ins_pipe(pipe_class_empty);
|
||||
%}
|
||||
|
||||
instruct castFF(vRegF dst)
|
||||
%{
|
||||
match(Set dst (CastFF dst));
|
||||
|
||||
size(0);
|
||||
format %{ "# castFF of $dst" %}
|
||||
ins_encode(/* empty encoding */);
|
||||
ins_cost(0);
|
||||
ins_pipe(pipe_class_empty);
|
||||
%}
|
||||
|
||||
instruct castDD(vRegD dst)
|
||||
%{
|
||||
match(Set dst (CastDD dst));
|
||||
|
||||
size(0);
|
||||
format %{ "# castDD of $dst" %}
|
||||
ins_encode(/* empty encoding */);
|
||||
ins_cost(0);
|
||||
ins_pipe(pipe_class_empty);
|
||||
%}
|
||||
|
||||
instruct castVVD(vecD dst)
|
||||
%{
|
||||
match(Set dst (CastVV dst));
|
||||
|
||||
size(0);
|
||||
format %{ "# castVV of $dst" %}
|
||||
ins_encode(/* empty encoding */);
|
||||
ins_cost(0);
|
||||
ins_pipe(pipe_class_empty);
|
||||
%}
|
||||
|
||||
instruct castVVX(vecX dst)
|
||||
%{
|
||||
match(Set dst (CastVV dst));
|
||||
|
||||
size(0);
|
||||
format %{ "# castVV of $dst" %}
|
||||
ins_encode(/* empty encoding */);
|
||||
ins_cost(0);
|
||||
ins_pipe(pipe_class_empty);
|
||||
%}
|
||||
|
||||
instruct castVV(vReg dst)
|
||||
%{
|
||||
match(Set dst (CastVV dst));
|
||||
|
||||
size(0);
|
||||
format %{ "# castVV of $dst" %}
|
||||
ins_encode(/* empty encoding */);
|
||||
ins_cost(0);
|
||||
ins_pipe(pipe_class_empty);
|
||||
%}
|
||||
|
||||
// ============================================================================
|
||||
// Atomic operation instructions
|
||||
//
|
||||
|
|
|
@ -5182,6 +5182,39 @@ instruct castLL( iRegL dst ) %{
|
|||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
instruct castFF( regF dst ) %{
|
||||
match(Set dst (CastFF dst));
|
||||
format %{ "! castFF of $dst" %}
|
||||
ins_encode( /*empty encoding*/ );
|
||||
ins_cost(0);
|
||||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
instruct castDD( regD dst ) %{
|
||||
match(Set dst (CastDD dst));
|
||||
format %{ "! castDD of $dst" %}
|
||||
ins_encode( /*empty encoding*/ );
|
||||
ins_cost(0);
|
||||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
instruct castVVD( vecD dst ) %{
|
||||
match(Set dst (CastVV dst));
|
||||
format %{ "! castVV of $dst" %}
|
||||
ins_encode( /*empty encoding*/ );
|
||||
ins_cost(0);
|
||||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
instruct castVVX( vecX dst ) %{
|
||||
match(Set dst (CastVV dst));
|
||||
format %{ "! castVV of $dst" %}
|
||||
ins_encode( /*empty encoding*/ );
|
||||
ins_cost(0);
|
||||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
|
||||
//----------Arithmetic Instructions--------------------------------------------
|
||||
// Addition Instructions
|
||||
// Register Addition
|
||||
|
|
|
@ -10335,6 +10335,38 @@ instruct castLL(iRegLdst dst) %{
|
|||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct castFF(regF dst) %{
|
||||
match(Set dst (CastFF dst));
|
||||
format %{ " -- \t// castFF of $dst" %}
|
||||
size(0);
|
||||
ins_encode( /*empty*/ );
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct castDD(regD dst) %{
|
||||
match(Set dst (CastDD dst));
|
||||
format %{ " -- \t// castDD of $dst" %}
|
||||
size(0);
|
||||
ins_encode( /*empty*/ );
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct castVV8(iRegLdst dst) %{
|
||||
match(Set dst (CastVV dst));
|
||||
format %{ " -- \t// castVV of $dst" %}
|
||||
size(0);
|
||||
ins_encode( /*empty*/ );
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct castVV16(vecX dst) %{
|
||||
match(Set dst (CastVV dst));
|
||||
format %{ " -- \t// castVV of $dst" %}
|
||||
size(0);
|
||||
ins_encode( /*empty*/ );
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
||||
instruct checkCastPP(iRegPdst dst) %{
|
||||
match(Set dst (CheckCastPP dst));
|
||||
format %{ " -- \t// checkcastPP of $dst" %}
|
||||
|
|
|
@ -5260,6 +5260,30 @@ instruct castLL(iRegL dst) %{
|
|||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
instruct castFF(regF dst) %{
|
||||
match(Set dst (CastFF dst));
|
||||
size(0);
|
||||
format %{ "# castFF of $dst" %}
|
||||
ins_encode(/*empty*/);
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
instruct castDD(regD dst) %{
|
||||
match(Set dst (CastDD dst));
|
||||
size(0);
|
||||
format %{ "# castDD of $dst" %}
|
||||
ins_encode(/*empty*/);
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
instruct castVV(iRegL dst) %{
|
||||
match(Set dst (CastVV dst));
|
||||
size(0);
|
||||
format %{ "# castVV of $dst" %}
|
||||
ins_encode(/*empty*/);
|
||||
ins_pipe(pipe_class_dummy);
|
||||
%}
|
||||
|
||||
//----------Conditional_store--------------------------------------------------
|
||||
// Conditional-store of the updated heap-top.
|
||||
// Used during allocation of the shared heap.
|
||||
|
|
|
@ -8127,3 +8127,25 @@ instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp,
|
|||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
#endif // _LP64
|
||||
|
||||
instruct castVV(vec dst)
|
||||
%{
|
||||
match(Set dst (CastVV dst));
|
||||
|
||||
size(0);
|
||||
format %{ "# castVV of $dst" %}
|
||||
ins_encode(/* empty encoding */);
|
||||
ins_cost(0);
|
||||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
instruct castVVLeg(legVec dst)
|
||||
%{
|
||||
match(Set dst (CastVV dst));
|
||||
|
||||
size(0);
|
||||
format %{ "# castVV of $dst" %}
|
||||
ins_encode(/* empty encoding */);
|
||||
ins_cost(0);
|
||||
ins_pipe(empty);
|
||||
%}
|
||||
|
|
|
@ -7178,6 +7178,22 @@ instruct castLL( eRegL dst ) %{
|
|||
ins_pipe( empty );
|
||||
%}
|
||||
|
||||
instruct castFF( regF dst ) %{
|
||||
match(Set dst (CastFF dst));
|
||||
format %{ "#castFF of $dst" %}
|
||||
ins_encode( /*empty encoding*/ );
|
||||
ins_cost(0);
|
||||
ins_pipe( empty );
|
||||
%}
|
||||
|
||||
instruct castDD( regD dst ) %{
|
||||
match(Set dst (CastDD dst));
|
||||
format %{ "#castDD of $dst" %}
|
||||
ins_encode( /*empty encoding*/ );
|
||||
ins_cost(0);
|
||||
ins_pipe( empty );
|
||||
%}
|
||||
|
||||
// Load-locked - same as a regular pointer load when used with compare-swap
|
||||
instruct loadPLocked(eRegP dst, memory mem) %{
|
||||
match(Set dst (LoadPLocked mem));
|
||||
|
|
|
@ -7624,6 +7624,28 @@ instruct castLL(rRegL dst)
|
|||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
instruct castFF(regF dst)
|
||||
%{
|
||||
match(Set dst (CastFF dst));
|
||||
|
||||
size(0);
|
||||
format %{ "# castFF of $dst" %}
|
||||
ins_encode(/* empty encoding */);
|
||||
ins_cost(0);
|
||||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
instruct castDD(regD dst)
|
||||
%{
|
||||
match(Set dst (CastDD dst));
|
||||
|
||||
size(0);
|
||||
format %{ "# castDD of $dst" %}
|
||||
ins_encode(/* empty encoding */);
|
||||
ins_cost(0);
|
||||
ins_pipe(empty);
|
||||
%}
|
||||
|
||||
// LoadP-locked same as a regular LoadP when used with compare-swap
|
||||
instruct loadPLocked(rRegP dst, memory mem)
|
||||
%{
|
||||
|
|
|
@ -764,6 +764,11 @@ int InstructForm::memory_operand(FormDict &globals) const {
|
|||
bool InstructForm::captures_bottom_type(FormDict &globals) const {
|
||||
if (_matrule && _matrule->_rChild &&
|
||||
(!strcmp(_matrule->_rChild->_opType,"CastPP") || // new result type
|
||||
!strcmp(_matrule->_rChild->_opType,"CastDD") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"CastFF") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"CastII") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"CastLL") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"CastVV") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"CastX2P") || // new result type
|
||||
!strcmp(_matrule->_rChild->_opType,"DecodeN") ||
|
||||
!strcmp(_matrule->_rChild->_opType,"EncodeP") ||
|
||||
|
|
|
@ -115,6 +115,37 @@ public:
|
|||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
};
|
||||
|
||||
class CastFFNode: public ConstraintCastNode {
|
||||
public:
|
||||
CastFFNode(Node* n, const Type* t, bool carry_dependency = false)
|
||||
: ConstraintCastNode(n, t, carry_dependency){
|
||||
init_class_id(Class_CastFF);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
virtual uint ideal_reg() const { return Op_RegF; }
|
||||
};
|
||||
|
||||
class CastDDNode: public ConstraintCastNode {
|
||||
public:
|
||||
CastDDNode(Node* n, const Type* t, bool carry_dependency = false)
|
||||
: ConstraintCastNode(n, t, carry_dependency){
|
||||
init_class_id(Class_CastDD);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
};
|
||||
|
||||
class CastVVNode: public ConstraintCastNode {
|
||||
public:
|
||||
CastVVNode(Node* n, const Type* t, bool carry_dependency = false)
|
||||
: ConstraintCastNode(n, t, carry_dependency){
|
||||
init_class_id(Class_CastVV);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
virtual uint ideal_reg() const { return in(1)->ideal_reg(); }
|
||||
};
|
||||
|
||||
|
||||
//------------------------------CastPPNode-------------------------------------
|
||||
// cast pointer to pointer (different type)
|
||||
class CastPPNode: public ConstraintCastNode {
|
||||
|
|
|
@ -62,8 +62,11 @@ macro(CallLeafNoFP)
|
|||
macro(CallRuntime)
|
||||
macro(CallNative)
|
||||
macro(CallStaticJava)
|
||||
macro(CastDD)
|
||||
macro(CastFF)
|
||||
macro(CastII)
|
||||
macro(CastLL)
|
||||
macro(CastVV)
|
||||
macro(CastX2P)
|
||||
macro(CastP2X)
|
||||
macro(CastPP)
|
||||
|
|
|
@ -4991,52 +4991,64 @@ Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
|
|||
}
|
||||
#endif
|
||||
|
||||
// if this is a load, check for anti-dependent stores
|
||||
// We use a conservative algorithm to identify potential interfering
|
||||
// instructions and for rescheduling the load. The users of the memory
|
||||
// input of this load are examined. Any use which is not a load and is
|
||||
// dominated by early is considered a potentially interfering store.
|
||||
// This can produce false positives.
|
||||
if (n->is_Load() && LCA != early) {
|
||||
int load_alias_idx = C->get_alias_index(n->adr_type());
|
||||
if (C->alias_type(load_alias_idx)->is_rewritable()) {
|
||||
Unique_Node_List worklist;
|
||||
LCA = get_late_ctrl_with_anti_dep(n->as_Load(), early, LCA);
|
||||
}
|
||||
|
||||
Node* mem = n->in(MemNode::Memory);
|
||||
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
|
||||
Node* s = mem->fast_out(i);
|
||||
worklist.push(s);
|
||||
}
|
||||
for (uint i = 0; i < worklist.size() && LCA != early; i++) {
|
||||
Node* s = worklist.at(i);
|
||||
if (s->is_Load() || s->Opcode() == Op_SafePoint ||
|
||||
(s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0) ||
|
||||
s->is_Phi()) {
|
||||
continue;
|
||||
} else if (s->is_MergeMem()) {
|
||||
for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
|
||||
Node* s1 = s->fast_out(i);
|
||||
worklist.push(s1);
|
||||
}
|
||||
} else {
|
||||
Node* sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
|
||||
assert(sctrl != NULL || !s->is_reachable_from_root(), "must have control");
|
||||
if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) {
|
||||
const TypePtr* adr_type = s->adr_type();
|
||||
if (s->is_ArrayCopy()) {
|
||||
// Copy to known instance needs destination type to test for aliasing
|
||||
const TypePtr* dest_type = s->as_ArrayCopy()->_dest_type;
|
||||
if (dest_type != TypeOopPtr::BOTTOM) {
|
||||
adr_type = dest_type;
|
||||
}
|
||||
assert(LCA == find_non_split_ctrl(LCA), "unexpected late control");
|
||||
return LCA;
|
||||
}
|
||||
|
||||
// if this is a load, check for anti-dependent stores
|
||||
// We use a conservative algorithm to identify potential interfering
|
||||
// instructions and for rescheduling the load. The users of the memory
|
||||
// input of this load are examined. Any use which is not a load and is
|
||||
// dominated by early is considered a potentially interfering store.
|
||||
// This can produce false positives.
|
||||
Node* PhaseIdealLoop::get_late_ctrl_with_anti_dep(LoadNode* n, Node* early, Node* LCA) {
|
||||
int load_alias_idx = C->get_alias_index(n->adr_type());
|
||||
if (C->alias_type(load_alias_idx)->is_rewritable()) {
|
||||
Unique_Node_List worklist;
|
||||
|
||||
Node* mem = n->in(MemNode::Memory);
|
||||
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
|
||||
Node* s = mem->fast_out(i);
|
||||
worklist.push(s);
|
||||
}
|
||||
for (uint i = 0; i < worklist.size() && LCA != early; i++) {
|
||||
Node* s = worklist.at(i);
|
||||
if (s->is_Load() || s->Opcode() == Op_SafePoint ||
|
||||
(s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0) ||
|
||||
s->is_Phi()) {
|
||||
continue;
|
||||
} else if (s->is_MergeMem()) {
|
||||
for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
|
||||
Node* s1 = s->fast_out(i);
|
||||
worklist.push(s1);
|
||||
}
|
||||
} else {
|
||||
Node* sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
|
||||
assert(sctrl != NULL || !s->is_reachable_from_root(), "must have control");
|
||||
if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) {
|
||||
const TypePtr* adr_type = s->adr_type();
|
||||
if (s->is_ArrayCopy()) {
|
||||
// Copy to known instance needs destination type to test for aliasing
|
||||
const TypePtr* dest_type = s->as_ArrayCopy()->_dest_type;
|
||||
if (dest_type != TypeOopPtr::BOTTOM) {
|
||||
adr_type = dest_type;
|
||||
}
|
||||
if (C->can_alias(adr_type, load_alias_idx)) {
|
||||
LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
|
||||
} else if (s->is_CFG()) {
|
||||
for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
|
||||
Node* s1 = s->fast_out(i);
|
||||
if (_igvn.type(s1) == Type::MEMORY) {
|
||||
worklist.push(s1);
|
||||
}
|
||||
if (C->can_alias(adr_type, load_alias_idx)) {
|
||||
LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
|
||||
} else if (s->is_CFG() && s->is_Multi()) {
|
||||
// Look for the memory use of s (that is the use of its memory projection)
|
||||
for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
|
||||
Node* s1 = s->fast_out(i);
|
||||
assert(s1->is_Proj(), "projection expected");
|
||||
if (_igvn.type(s1) == Type::MEMORY) {
|
||||
for (DUIterator_Fast jmax, j = s1->fast_outs(jmax); j < jmax; j++) {
|
||||
Node* s2 = s1->fast_out(j);
|
||||
worklist.push(s2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5062,8 +5074,6 @@ Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(LCA == find_non_split_ctrl(LCA), "unexpected late control");
|
||||
return LCA;
|
||||
}
|
||||
|
||||
|
@ -5091,23 +5101,24 @@ bool PhaseIdealLoop::is_dominator(Node *d, Node *n) {
|
|||
// does not need to be cleared between calls to get_late_ctrl().
|
||||
// Algorithm trades a larger constant factor for better asymptotic behavior
|
||||
//
|
||||
Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, Node *tag ) {
|
||||
Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal(Node *n1, Node *n2, Node *tag_node) {
|
||||
uint d1 = dom_depth(n1);
|
||||
uint d2 = dom_depth(n2);
|
||||
jlong tag = tag_node->_idx | (((jlong)_dom_lca_tags_round) << 32);
|
||||
|
||||
do {
|
||||
if (d1 > d2) {
|
||||
// current lca is deeper than n2
|
||||
_dom_lca_tags.map(n1->_idx, tag);
|
||||
_dom_lca_tags.at_put_grow(n1->_idx, tag);
|
||||
n1 = idom(n1);
|
||||
d1 = dom_depth(n1);
|
||||
} else if (d1 < d2) {
|
||||
// n2 is deeper than current lca
|
||||
Node *memo = _dom_lca_tags[n2->_idx];
|
||||
if( memo == tag ) {
|
||||
jlong memo = _dom_lca_tags.at_grow(n2->_idx, 0);
|
||||
if (memo == tag) {
|
||||
return n1; // Return the current LCA
|
||||
}
|
||||
_dom_lca_tags.map(n2->_idx, tag);
|
||||
_dom_lca_tags.at_put_grow(n2->_idx, tag);
|
||||
n2 = idom(n2);
|
||||
d2 = dom_depth(n2);
|
||||
} else {
|
||||
|
@ -5116,19 +5127,19 @@ Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, No
|
|||
// to be searched more carefully.
|
||||
|
||||
// Scan up all the n1's with equal depth, looking for n2.
|
||||
_dom_lca_tags.map(n1->_idx, tag);
|
||||
_dom_lca_tags.at_put_grow(n1->_idx, tag);
|
||||
Node *t1 = idom(n1);
|
||||
while (dom_depth(t1) == d1) {
|
||||
if (t1 == n2) return n2;
|
||||
_dom_lca_tags.map(t1->_idx, tag);
|
||||
_dom_lca_tags.at_put_grow(t1->_idx, tag);
|
||||
t1 = idom(t1);
|
||||
}
|
||||
// Scan up all the n2's with equal depth, looking for n1.
|
||||
_dom_lca_tags.map(n2->_idx, tag);
|
||||
_dom_lca_tags.at_put_grow(n2->_idx, tag);
|
||||
Node *t2 = idom(n2);
|
||||
while (dom_depth(t2) == d2) {
|
||||
if (t2 == n1) return n1;
|
||||
_dom_lca_tags.map(t2->_idx, tag);
|
||||
_dom_lca_tags.at_put_grow(t2->_idx, tag);
|
||||
t2 = idom(t2);
|
||||
}
|
||||
// Move up to a new dominator-depth value as well as up the dom-tree.
|
||||
|
@ -5147,25 +5158,11 @@ Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, No
|
|||
// be of fixed size.
|
||||
void PhaseIdealLoop::init_dom_lca_tags() {
|
||||
uint limit = C->unique() + 1;
|
||||
_dom_lca_tags.map( limit, NULL );
|
||||
_dom_lca_tags.at_grow(limit, 0);
|
||||
_dom_lca_tags_round = 0;
|
||||
#ifdef ASSERT
|
||||
for( uint i = 0; i < limit; ++i ) {
|
||||
assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
|
||||
}
|
||||
#endif // ASSERT
|
||||
}
|
||||
|
||||
//------------------------------clear_dom_lca_tags------------------------------
|
||||
// Tag could be a node's integer index, 32bits instead of 64bits in some cases
|
||||
// Intended use does not involve any growth for the array, so it could
|
||||
// be of fixed size.
|
||||
void PhaseIdealLoop::clear_dom_lca_tags() {
|
||||
uint limit = C->unique() + 1;
|
||||
_dom_lca_tags.map( limit, NULL );
|
||||
_dom_lca_tags.clear();
|
||||
#ifdef ASSERT
|
||||
for( uint i = 0; i < limit; ++i ) {
|
||||
assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
|
||||
for (uint i = 0; i < limit; ++i) {
|
||||
assert(_dom_lca_tags.at(i) == 0, "Must be distinct from each node pointer");
|
||||
}
|
||||
#endif // ASSERT
|
||||
}
|
||||
|
|
|
@ -867,9 +867,9 @@ private:
|
|||
|
||||
// Support for faster execution of get_late_ctrl()/dom_lca()
|
||||
// when a node has many uses and dominator depth is deep.
|
||||
Node_Array _dom_lca_tags;
|
||||
GrowableArray<jlong> _dom_lca_tags;
|
||||
uint _dom_lca_tags_round;
|
||||
void init_dom_lca_tags();
|
||||
void clear_dom_lca_tags();
|
||||
|
||||
// Helper for debugging bad dominance relationships
|
||||
bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early);
|
||||
|
@ -1063,7 +1063,6 @@ private:
|
|||
_igvn(igvn),
|
||||
_verify_me(nullptr),
|
||||
_verify_only(false),
|
||||
_dom_lca_tags(arena()), // Thread::resource_area
|
||||
_nodes_required(UINT_MAX) {
|
||||
assert(mode != LoopOptsVerify, "wrong constructor to verify IdealLoop");
|
||||
build_and_optimize(mode);
|
||||
|
@ -1077,7 +1076,6 @@ private:
|
|||
_igvn(igvn),
|
||||
_verify_me(verify_me),
|
||||
_verify_only(verify_me == nullptr),
|
||||
_dom_lca_tags(arena()), // Thread::resource_area
|
||||
_nodes_required(UINT_MAX) {
|
||||
build_and_optimize(LoopOptsVerify);
|
||||
}
|
||||
|
@ -1485,7 +1483,7 @@ private:
|
|||
void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
|
||||
bool split_up( Node *n, Node *blk1, Node *blk2 );
|
||||
void sink_use( Node *use, Node *post_loop );
|
||||
Node *place_near_use( Node *useblock ) const;
|
||||
Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
|
||||
Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
|
||||
void try_move_store_after_loop(Node* n);
|
||||
bool identical_backtoback_ifs(Node *n);
|
||||
|
@ -1613,7 +1611,15 @@ public:
|
|||
|
||||
LoopNode* create_inner_head(IdealLoopTree* loop, LongCountedLoopNode* head, LongCountedLoopEndNode* exit_test);
|
||||
|
||||
bool is_safe_load_ctrl(Node* ctrl);
|
||||
Node* get_late_ctrl_with_anti_dep(LoadNode* n, Node* early, Node* LCA);
|
||||
|
||||
bool ctrl_of_use_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop, Node* ctrl);
|
||||
|
||||
bool ctrl_of_all_uses_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop);
|
||||
|
||||
Node* compute_early_ctrl(Node* n, Node* n_ctrl);
|
||||
|
||||
void try_sink_out_of_loop(Node* n);
|
||||
};
|
||||
|
||||
|
||||
|
|
|
@ -1134,21 +1134,26 @@ static bool merge_point_safe(Node* region) {
|
|||
}
|
||||
|
||||
|
||||
//------------------------------place_near_use---------------------------------
|
||||
// Place some computation next to use but not inside inner loops.
|
||||
// For inner loop uses move it to the preheader area.
|
||||
Node *PhaseIdealLoop::place_near_use(Node *useblock) const {
|
||||
IdealLoopTree *u_loop = get_loop( useblock );
|
||||
if (u_loop->_irreducible) {
|
||||
return useblock;
|
||||
//------------------------------place_outside_loop---------------------------------
|
||||
// Place some computation outside of this loop on the path to the use passed as argument
|
||||
Node* PhaseIdealLoop::place_outside_loop(Node* useblock, IdealLoopTree* loop) const {
|
||||
Node* head = loop->_head;
|
||||
assert(!loop->is_member(get_loop(useblock)), "must be outside loop");
|
||||
if (head->is_Loop() && head->as_Loop()->is_strip_mined()) {
|
||||
loop = loop->_parent;
|
||||
assert(loop->_head->is_OuterStripMinedLoop(), "malformed strip mined loop");
|
||||
}
|
||||
if (u_loop->_child) {
|
||||
if (useblock == u_loop->_head && u_loop->_head->is_OuterStripMinedLoop()) {
|
||||
return u_loop->_head->in(LoopNode::EntryControl);
|
||||
|
||||
// Pick control right outside the loop
|
||||
for (;;) {
|
||||
Node* dom = idom(useblock);
|
||||
if (loop->is_member(get_loop(dom))) {
|
||||
break;
|
||||
}
|
||||
return useblock;
|
||||
useblock = dom;
|
||||
}
|
||||
return u_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
|
||||
assert(find_non_split_ctrl(useblock) == useblock, "should be non split control");
|
||||
return useblock;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1402,128 +1407,7 @@ void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
|
|||
}
|
||||
}
|
||||
|
||||
// See if a shared loop-varying computation has no loop-varying uses.
|
||||
// Happens if something is only used for JVM state in uncommon trap exits,
|
||||
// like various versions of induction variable+offset. Clone the
|
||||
// computation per usage to allow it to sink out of the loop.
|
||||
if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
|
||||
Node *n_ctrl = get_ctrl(n);
|
||||
IdealLoopTree *n_loop = get_loop(n_ctrl);
|
||||
if( n_loop != _ltree_root ) {
|
||||
DUIterator_Fast imax, i = n->fast_outs(imax);
|
||||
for (; i < imax; i++) {
|
||||
Node* u = n->fast_out(i);
|
||||
if( !has_ctrl(u) ) break; // Found control user
|
||||
IdealLoopTree *u_loop = get_loop(get_ctrl(u));
|
||||
if( u_loop == n_loop ) break; // Found loop-varying use
|
||||
if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
|
||||
if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
|
||||
}
|
||||
bool did_break = (i < imax); // Did we break out of the previous loop?
|
||||
if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
|
||||
Node *late_load_ctrl = NULL;
|
||||
if (n->is_Load()) {
|
||||
// If n is a load, get and save the result from get_late_ctrl(),
|
||||
// to be later used in calculating the control for n's clones.
|
||||
clear_dom_lca_tags();
|
||||
late_load_ctrl = get_late_ctrl(n, n_ctrl);
|
||||
}
|
||||
// If n is a load, and the late control is the same as the current
|
||||
// control, then the cloning of n is a pointless exercise, because
|
||||
// GVN will ensure that we end up where we started.
|
||||
if (!n->is_Load() || (late_load_ctrl != n_ctrl && is_safe_load_ctrl(late_load_ctrl))) {
|
||||
Node* outer_loop_clone = NULL;
|
||||
for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
|
||||
Node *u = n->last_out(j); // Clone private computation per use
|
||||
_igvn.rehash_node_delayed(u);
|
||||
Node *x = n->clone(); // Clone computation
|
||||
Node *x_ctrl = NULL;
|
||||
if( u->is_Phi() ) {
|
||||
// Replace all uses of normal nodes. Replace Phi uses
|
||||
// individually, so the separate Nodes can sink down
|
||||
// different paths.
|
||||
uint k = 1;
|
||||
while( u->in(k) != n ) k++;
|
||||
u->set_req( k, x );
|
||||
// x goes next to Phi input path
|
||||
x_ctrl = u->in(0)->in(k);
|
||||
--j;
|
||||
} else { // Normal use
|
||||
// Replace all uses
|
||||
for( uint k = 0; k < u->req(); k++ ) {
|
||||
if( u->in(k) == n ) {
|
||||
u->set_req( k, x );
|
||||
--j;
|
||||
}
|
||||
}
|
||||
x_ctrl = get_ctrl(u);
|
||||
}
|
||||
|
||||
// Find control for 'x' next to use but not inside inner loops.
|
||||
// For inner loop uses get the preheader area.
|
||||
x_ctrl = place_near_use(x_ctrl);
|
||||
|
||||
if (n->is_Load()) {
|
||||
// For loads, add a control edge to a CFG node outside of the loop
|
||||
// to force them to not combine and return back inside the loop
|
||||
// during GVN optimization (4641526).
|
||||
//
|
||||
// Because we are setting the actual control input, factor in
|
||||
// the result from get_late_ctrl() so we respect any
|
||||
// anti-dependences. (6233005).
|
||||
x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
|
||||
|
||||
// Don't allow the control input to be a CFG splitting node.
|
||||
// Such nodes should only have ProjNodes as outs, e.g. IfNode
|
||||
// should only have IfTrueNode and IfFalseNode (4985384).
|
||||
x_ctrl = find_non_split_ctrl(x_ctrl);
|
||||
|
||||
IdealLoopTree* x_loop = get_loop(x_ctrl);
|
||||
Node* x_head = x_loop->_head;
|
||||
if (x_head->is_Loop() && (x_head->is_OuterStripMinedLoop() || x_head->as_Loop()->is_strip_mined())) {
|
||||
if (is_dominator(n_ctrl, x_head) && n_ctrl != x_head) {
|
||||
// Anti dependence analysis is sometimes too
|
||||
// conservative: a store in the outer strip mined loop
|
||||
// can prevent a load from floating out of the outer
|
||||
// strip mined loop but the load may not be referenced
|
||||
// from the safepoint: loop strip mining verification
|
||||
// code reports a problem in that case. Make sure the
|
||||
// load is not moved in the outer strip mined loop in
|
||||
// that case.
|
||||
x_ctrl = x_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
|
||||
} else if (x_head->is_OuterStripMinedLoop()) {
|
||||
// Do not add duplicate LoadNodes to the outer strip mined loop
|
||||
if (outer_loop_clone != NULL) {
|
||||
_igvn.replace_node(x, outer_loop_clone);
|
||||
continue;
|
||||
}
|
||||
outer_loop_clone = x;
|
||||
}
|
||||
}
|
||||
assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
|
||||
|
||||
x->set_req(0, x_ctrl);
|
||||
}
|
||||
register_new_node(x, x_ctrl);
|
||||
|
||||
// Some institutional knowledge is needed here: 'x' is
|
||||
// yanked because if the optimizer runs GVN on it all the
|
||||
// cloned x's will common up and undo this optimization and
|
||||
// be forced back in the loop.
|
||||
// I tried setting control edges on the x's to force them to
|
||||
// not combine, but the matching gets worried when it tries
|
||||
// to fold a StoreP and an AddP together (as part of an
|
||||
// address expression) and the AddP and StoreP have
|
||||
// different controls.
|
||||
if (!x->is_Load() && !x->is_DecodeNarrowPtr()) {
|
||||
_igvn._worklist.yank(x);
|
||||
}
|
||||
}
|
||||
_igvn.remove_dead_node(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
try_sink_out_of_loop(n);
|
||||
|
||||
try_move_store_after_loop(n);
|
||||
|
||||
|
@ -1536,9 +1420,199 @@ void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
|
|||
}
|
||||
}
|
||||
|
||||
bool PhaseIdealLoop::is_safe_load_ctrl(Node* ctrl) {
|
||||
if (ctrl->is_Proj() && ctrl->in(0)->is_Call() && ctrl->has_out_with(Op_Catch)) {
|
||||
return false;
|
||||
// See if a shared loop-varying computation has no loop-varying uses.
|
||||
// Happens if something is only used for JVM state in uncommon trap exits,
|
||||
// like various versions of induction variable+offset. Clone the
|
||||
// computation per usage to allow it to sink out of the loop.
|
||||
void PhaseIdealLoop::try_sink_out_of_loop(Node* n) {
|
||||
if (has_ctrl(n) &&
|
||||
!n->is_Phi() &&
|
||||
!n->is_Bool() &&
|
||||
!n->is_Proj() &&
|
||||
!n->is_MergeMem() &&
|
||||
!n->is_CMove() &&
|
||||
n->Opcode() != Op_Opaque4) {
|
||||
Node *n_ctrl = get_ctrl(n);
|
||||
IdealLoopTree *n_loop = get_loop(n_ctrl);
|
||||
if (n_loop != _ltree_root && n->outcnt() > 1) {
|
||||
// Compute early control: needed for anti-dependence analysis. It's also possible that as a result of
|
||||
// previous transformations in this loop opts round, the node can be hoisted now: early control will tell us.
|
||||
Node* early_ctrl = compute_early_ctrl(n, n_ctrl);
|
||||
if (n_loop->is_member(get_loop(early_ctrl)) && // check that this one can't be hoisted now
|
||||
ctrl_of_all_uses_out_of_loop(n, early_ctrl, n_loop)) { // All uses in outer loops!
|
||||
assert(!n->is_Store() && !n->is_LoadStore(), "no node with a side effect");
|
||||
Node* outer_loop_clone = NULL;
|
||||
for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin;) {
|
||||
Node* u = n->last_out(j); // Clone private computation per use
|
||||
_igvn.rehash_node_delayed(u);
|
||||
Node* x = n->clone(); // Clone computation
|
||||
Node* x_ctrl = NULL;
|
||||
if (u->is_Phi()) {
|
||||
// Replace all uses of normal nodes. Replace Phi uses
|
||||
// individually, so the separate Nodes can sink down
|
||||
// different paths.
|
||||
uint k = 1;
|
||||
while (u->in(k) != n) k++;
|
||||
u->set_req(k, x);
|
||||
// x goes next to Phi input path
|
||||
x_ctrl = u->in(0)->in(k);
|
||||
// Find control for 'x' next to use but not inside inner loops.
|
||||
x_ctrl = place_outside_loop(x_ctrl, n_loop);
|
||||
--j;
|
||||
} else { // Normal use
|
||||
if (has_ctrl(u)) {
|
||||
x_ctrl = get_ctrl(u);
|
||||
} else {
|
||||
x_ctrl = u->in(0);
|
||||
}
|
||||
// Find control for 'x' next to use but not inside inner loops.
|
||||
x_ctrl = place_outside_loop(x_ctrl, n_loop);
|
||||
// Replace all uses
|
||||
if (u->is_ConstraintCast() && u->bottom_type()->higher_equal(_igvn.type(n)) && u->in(0) == x_ctrl) {
|
||||
// If we're sinking a chain of data nodes, we might have inserted a cast to pin the use which is not necessary
|
||||
// anymore now that we're going to pin n as well
|
||||
_igvn.replace_node(u, x);
|
||||
--j;
|
||||
} else {
|
||||
int nb = u->replace_edge(n, x, &_igvn);
|
||||
j -= nb;
|
||||
}
|
||||
}
|
||||
|
||||
if (n->is_Load()) {
|
||||
// For loads, add a control edge to a CFG node outside of the loop
|
||||
// to force them to not combine and return back inside the loop
|
||||
// during GVN optimization (4641526).
|
||||
assert(x_ctrl == get_late_ctrl_with_anti_dep(x->as_Load(), early_ctrl, x_ctrl), "anti-dependences were already checked");
|
||||
|
||||
IdealLoopTree* x_loop = get_loop(x_ctrl);
|
||||
Node* x_head = x_loop->_head;
|
||||
if (x_head->is_Loop() && x_head->is_OuterStripMinedLoop()) {
|
||||
// Do not add duplicate LoadNodes to the outer strip mined loop
|
||||
if (outer_loop_clone != NULL) {
|
||||
_igvn.replace_node(x, outer_loop_clone);
|
||||
continue;
|
||||
}
|
||||
outer_loop_clone = x;
|
||||
}
|
||||
x->set_req(0, x_ctrl);
|
||||
} else if (n->in(0) != NULL){
|
||||
x->set_req(0, x_ctrl);
|
||||
}
|
||||
assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
|
||||
assert(!n_loop->is_member(get_loop(x_ctrl)), "should have moved out of loop");
|
||||
register_new_node(x, x_ctrl);
|
||||
|
||||
if (x->in(0) == NULL && !x->is_DecodeNarrowPtr()) {
|
||||
assert(!x->is_Load(), "load should be pinned");
|
||||
// Use a cast node to pin clone out of loop
|
||||
Node* cast = NULL;
|
||||
for (uint k = 0; k < x->req(); k++) {
|
||||
Node* in = x->in(k);
|
||||
if (in != NULL && n_loop->is_member(get_loop(get_ctrl(in)))) {
|
||||
const Type* in_t = _igvn.type(in);
|
||||
if (in_t->isa_int()) {
|
||||
cast = new CastIINode(in, in_t, true);
|
||||
} else if (in_t->isa_long()) {
|
||||
cast = new CastLLNode(in, in_t, true);
|
||||
} else if (in_t->isa_ptr()) {
|
||||
cast = new CastPPNode(in, in_t, true);
|
||||
} else if (in_t->isa_float()) {
|
||||
cast = new CastFFNode(in, in_t, true);
|
||||
} else if (in_t->isa_double()) {
|
||||
cast = new CastDDNode(in, in_t, true);
|
||||
} else if (in_t->isa_vect()) {
|
||||
cast = new CastVVNode(in, in_t, true);
|
||||
}
|
||||
}
|
||||
if (cast != NULL) {
|
||||
cast->set_req(0, x_ctrl);
|
||||
register_new_node(cast, x_ctrl);
|
||||
x->replace_edge(in, cast);
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(cast != NULL, "must have added a cast to pin the node");
|
||||
}
|
||||
}
|
||||
_igvn.remove_dead_node(n);
|
||||
}
|
||||
_dom_lca_tags_round = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Node* PhaseIdealLoop::compute_early_ctrl(Node* n, Node* n_ctrl) {
|
||||
Node* early_ctrl = NULL;
|
||||
ResourceMark rm;
|
||||
Unique_Node_List wq;
|
||||
wq.push(n);
|
||||
for (uint i = 0; i < wq.size(); i++) {
|
||||
Node* m = wq.at(i);
|
||||
Node* c = NULL;
|
||||
if (m->is_CFG()) {
|
||||
c = m;
|
||||
} else if (m->pinned()) {
|
||||
c = m->in(0);
|
||||
} else {
|
||||
for (uint j = 0; j < m->req(); j++) {
|
||||
Node* in = m->in(j);
|
||||
if (in == NULL) {
|
||||
continue;
|
||||
}
|
||||
wq.push(in);
|
||||
}
|
||||
}
|
||||
if (c != NULL) {
|
||||
assert(is_dominator(c, n_ctrl), "");
|
||||
if (early_ctrl == NULL) {
|
||||
early_ctrl = c;
|
||||
} else if (is_dominator(early_ctrl, c)) {
|
||||
early_ctrl = c;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(is_dominator(early_ctrl, n_ctrl), "early control must dominate current control");
|
||||
return early_ctrl;
|
||||
}
|
||||
|
||||
bool PhaseIdealLoop::ctrl_of_all_uses_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop) {
|
||||
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
|
||||
Node* u = n->fast_out(i);
|
||||
if (u->Opcode() == Op_Opaque1) {
|
||||
return false; // Found loop limit, bugfix for 4677003
|
||||
}
|
||||
// We can't reuse tags in PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal() so make sure calls to
|
||||
// get_late_ctrl_with_anti_dep() use their own tag
|
||||
_dom_lca_tags_round++;
|
||||
assert(_dom_lca_tags_round != 0, "shouldn't wrap around");
|
||||
|
||||
if (u->is_Phi()) {
|
||||
for (uint j = 1; j < u->req(); ++j) {
|
||||
if (u->in(j) == n && !ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, u->in(0)->in(j))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Node* ctrl = has_ctrl(u) ? get_ctrl(u) : u->in(0);
|
||||
if (!ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, ctrl)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PhaseIdealLoop::ctrl_of_use_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop, Node* ctrl) {
|
||||
if (n->is_Load()) {
|
||||
ctrl = get_late_ctrl_with_anti_dep(n->as_Load(), n_ctrl, ctrl);
|
||||
}
|
||||
IdealLoopTree *u_loop = get_loop(ctrl);
|
||||
if (u_loop == n_loop) {
|
||||
return false; // Found loop-varying use
|
||||
}
|
||||
if (n_loop->is_member(u_loop)) {
|
||||
return false; // Found use in inner loop
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -57,6 +57,9 @@ class CallNode;
|
|||
class CallRuntimeNode;
|
||||
class CallNativeNode;
|
||||
class CallStaticJavaNode;
|
||||
class CastFFNode;
|
||||
class CastDDNode;
|
||||
class CastVVNode;
|
||||
class CastIINode;
|
||||
class CastLLNode;
|
||||
class CatchNode;
|
||||
|
@ -691,6 +694,9 @@ public:
|
|||
DEFINE_CLASS_ID(CastII, ConstraintCast, 0)
|
||||
DEFINE_CLASS_ID(CheckCastPP, ConstraintCast, 1)
|
||||
DEFINE_CLASS_ID(CastLL, ConstraintCast, 2)
|
||||
DEFINE_CLASS_ID(CastFF, ConstraintCast, 3)
|
||||
DEFINE_CLASS_ID(CastDD, ConstraintCast, 4)
|
||||
DEFINE_CLASS_ID(CastVV, ConstraintCast, 5)
|
||||
DEFINE_CLASS_ID(CMove, Type, 3)
|
||||
DEFINE_CLASS_ID(SafePointScalarObject, Type, 4)
|
||||
DEFINE_CLASS_ID(DecodeNarrowPtr, Type, 5)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue