8252372: Check if cloning is required to move loads out of loops in PhaseIdealLoop::split_if_with_blocks_post()

Reviewed-by: thartmann, kvn
This commit is contained in:
Roland Westrelin 2021-05-26 09:20:42 +00:00
parent 039441689d
commit 9d305b9c06
14 changed files with 541 additions and 215 deletions

View file

@ -8735,6 +8735,61 @@ instruct castLL(iRegL dst)
ins_pipe(pipe_class_empty);
%}
instruct castFF(vRegF dst)
%{
match(Set dst (CastFF dst));
size(0);
format %{ "# castFF of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
instruct castDD(vRegD dst)
%{
match(Set dst (CastDD dst));
size(0);
format %{ "# castDD of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
instruct castVVD(vecD dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
instruct castVVX(vecX dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
instruct castVV(vReg dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
// ============================================================================
// Atomic operation instructions
//

View file

@ -5182,6 +5182,39 @@ instruct castLL( iRegL dst ) %{
ins_pipe(empty);
%}
instruct castFF( regF dst ) %{
match(Set dst (CastFF dst));
format %{ "! castFF of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe(empty);
%}
instruct castDD( regD dst ) %{
match(Set dst (CastDD dst));
format %{ "! castDD of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe(empty);
%}
instruct castVVD( vecD dst ) %{
match(Set dst (CastVV dst));
format %{ "! castVV of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe(empty);
%}
instruct castVVX( vecX dst ) %{
match(Set dst (CastVV dst));
format %{ "! castVV of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe(empty);
%}
//----------Arithmetic Instructions--------------------------------------------
// Addition Instructions
// Register Addition

View file

@ -10335,6 +10335,38 @@ instruct castLL(iRegLdst dst) %{
ins_pipe(pipe_class_default);
%}
instruct castFF(regF dst) %{
match(Set dst (CastFF dst));
format %{ " -- \t// castFF of $dst" %}
size(0);
ins_encode( /*empty*/ );
ins_pipe(pipe_class_default);
%}
instruct castDD(regD dst) %{
match(Set dst (CastDD dst));
format %{ " -- \t// castDD of $dst" %}
size(0);
ins_encode( /*empty*/ );
ins_pipe(pipe_class_default);
%}
instruct castVV8(iRegLdst dst) %{
match(Set dst (CastVV dst));
format %{ " -- \t// castVV of $dst" %}
size(0);
ins_encode( /*empty*/ );
ins_pipe(pipe_class_default);
%}
instruct castVV16(vecX dst) %{
match(Set dst (CastVV dst));
format %{ " -- \t// castVV of $dst" %}
size(0);
ins_encode( /*empty*/ );
ins_pipe(pipe_class_default);
%}
instruct checkCastPP(iRegPdst dst) %{
match(Set dst (CheckCastPP dst));
format %{ " -- \t// checkcastPP of $dst" %}

View file

@ -5260,6 +5260,30 @@ instruct castLL(iRegL dst) %{
ins_pipe(pipe_class_dummy);
%}
instruct castFF(regF dst) %{
match(Set dst (CastFF dst));
size(0);
format %{ "# castFF of $dst" %}
ins_encode(/*empty*/);
ins_pipe(pipe_class_dummy);
%}
instruct castDD(regD dst) %{
match(Set dst (CastDD dst));
size(0);
format %{ "# castDD of $dst" %}
ins_encode(/*empty*/);
ins_pipe(pipe_class_dummy);
%}
instruct castVV(iRegL dst) %{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/*empty*/);
ins_pipe(pipe_class_dummy);
%}
//----------Conditional_store--------------------------------------------------
// Conditional-store of the updated heap-top.
// Used during allocation of the shared heap.

View file

@ -8127,3 +8127,25 @@ instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp,
ins_pipe( pipe_slow );
%}
#endif // _LP64
instruct castVV(vec dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(empty);
%}
instruct castVVLeg(legVec dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(empty);
%}

View file

@ -7178,6 +7178,22 @@ instruct castLL( eRegL dst ) %{
ins_pipe( empty );
%}
instruct castFF( regF dst ) %{
match(Set dst (CastFF dst));
format %{ "#castFF of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe( empty );
%}
instruct castDD( regD dst ) %{
match(Set dst (CastDD dst));
format %{ "#castDD of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe( empty );
%}
// Load-locked - same as a regular pointer load when used with compare-swap
instruct loadPLocked(eRegP dst, memory mem) %{
match(Set dst (LoadPLocked mem));

View file

@ -7624,6 +7624,28 @@ instruct castLL(rRegL dst)
ins_pipe(empty);
%}
instruct castFF(regF dst)
%{
match(Set dst (CastFF dst));
size(0);
format %{ "# castFF of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(empty);
%}
instruct castDD(regD dst)
%{
match(Set dst (CastDD dst));
size(0);
format %{ "# castDD of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(empty);
%}
// LoadP-locked same as a regular LoadP when used with compare-swap
instruct loadPLocked(rRegP dst, memory mem)
%{

View file

@ -764,6 +764,11 @@ int InstructForm::memory_operand(FormDict &globals) const {
bool InstructForm::captures_bottom_type(FormDict &globals) const {
if (_matrule && _matrule->_rChild &&
(!strcmp(_matrule->_rChild->_opType,"CastPP") || // new result type
!strcmp(_matrule->_rChild->_opType,"CastDD") ||
!strcmp(_matrule->_rChild->_opType,"CastFF") ||
!strcmp(_matrule->_rChild->_opType,"CastII") ||
!strcmp(_matrule->_rChild->_opType,"CastLL") ||
!strcmp(_matrule->_rChild->_opType,"CastVV") ||
!strcmp(_matrule->_rChild->_opType,"CastX2P") || // new result type
!strcmp(_matrule->_rChild->_opType,"DecodeN") ||
!strcmp(_matrule->_rChild->_opType,"EncodeP") ||

View file

@ -115,6 +115,37 @@ public:
virtual uint ideal_reg() const { return Op_RegL; }
};
class CastFFNode: public ConstraintCastNode {
public:
CastFFNode(Node* n, const Type* t, bool carry_dependency = false)
: ConstraintCastNode(n, t, carry_dependency){
init_class_id(Class_CastFF);
}
virtual int Opcode() const;
virtual uint ideal_reg() const { return Op_RegF; }
};
class CastDDNode: public ConstraintCastNode {
public:
CastDDNode(Node* n, const Type* t, bool carry_dependency = false)
: ConstraintCastNode(n, t, carry_dependency){
init_class_id(Class_CastDD);
}
virtual int Opcode() const;
virtual uint ideal_reg() const { return Op_RegD; }
};
class CastVVNode: public ConstraintCastNode {
public:
CastVVNode(Node* n, const Type* t, bool carry_dependency = false)
: ConstraintCastNode(n, t, carry_dependency){
init_class_id(Class_CastVV);
}
virtual int Opcode() const;
virtual uint ideal_reg() const { return in(1)->ideal_reg(); }
};
//------------------------------CastPPNode-------------------------------------
// cast pointer to pointer (different type)
class CastPPNode: public ConstraintCastNode {

View file

@ -62,8 +62,11 @@ macro(CallLeafNoFP)
macro(CallRuntime)
macro(CallNative)
macro(CallStaticJava)
macro(CastDD)
macro(CastFF)
macro(CastII)
macro(CastLL)
macro(CastVV)
macro(CastX2P)
macro(CastP2X)
macro(CastPP)

View file

@ -4991,13 +4991,21 @@ Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
}
#endif
// if this is a load, check for anti-dependent stores
// We use a conservative algorithm to identify potential interfering
// instructions and for rescheduling the load. The users of the memory
// input of this load are examined. Any use which is not a load and is
// dominated by early is considered a potentially interfering store.
// This can produce false positives.
if (n->is_Load() && LCA != early) {
LCA = get_late_ctrl_with_anti_dep(n->as_Load(), early, LCA);
}
assert(LCA == find_non_split_ctrl(LCA), "unexpected late control");
return LCA;
}
// if this is a load, check for anti-dependent stores
// We use a conservative algorithm to identify potential interfering
// instructions and for rescheduling the load. The users of the memory
// input of this load are examined. Any use which is not a load and is
// dominated by early is considered a potentially interfering store.
// This can produce false positives.
Node* PhaseIdealLoop::get_late_ctrl_with_anti_dep(LoadNode* n, Node* early, Node* LCA) {
int load_alias_idx = C->get_alias_index(n->adr_type());
if (C->alias_type(load_alias_idx)->is_rewritable()) {
Unique_Node_List worklist;
@ -5032,11 +5040,15 @@ Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
}
if (C->can_alias(adr_type, load_alias_idx)) {
LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
} else if (s->is_CFG()) {
} else if (s->is_CFG() && s->is_Multi()) {
// Look for the memory use of s (that is the use of its memory projection)
for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
Node* s1 = s->fast_out(i);
assert(s1->is_Proj(), "projection expected");
if (_igvn.type(s1) == Type::MEMORY) {
worklist.push(s1);
for (DUIterator_Fast jmax, j = s1->fast_outs(jmax); j < jmax; j++) {
Node* s2 = s1->fast_out(j);
worklist.push(s2);
}
}
}
@ -5062,8 +5074,6 @@ Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
}
}
}
assert(LCA == find_non_split_ctrl(LCA), "unexpected late control");
return LCA;
}
@ -5091,23 +5101,24 @@ bool PhaseIdealLoop::is_dominator(Node *d, Node *n) {
// does not need to be cleared between calls to get_late_ctrl().
// Algorithm trades a larger constant factor for better asymptotic behavior
//
Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, Node *tag ) {
Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal(Node *n1, Node *n2, Node *tag_node) {
uint d1 = dom_depth(n1);
uint d2 = dom_depth(n2);
jlong tag = tag_node->_idx | (((jlong)_dom_lca_tags_round) << 32);
do {
if (d1 > d2) {
// current lca is deeper than n2
_dom_lca_tags.map(n1->_idx, tag);
_dom_lca_tags.at_put_grow(n1->_idx, tag);
n1 = idom(n1);
d1 = dom_depth(n1);
} else if (d1 < d2) {
// n2 is deeper than current lca
Node *memo = _dom_lca_tags[n2->_idx];
if( memo == tag ) {
jlong memo = _dom_lca_tags.at_grow(n2->_idx, 0);
if (memo == tag) {
return n1; // Return the current LCA
}
_dom_lca_tags.map(n2->_idx, tag);
_dom_lca_tags.at_put_grow(n2->_idx, tag);
n2 = idom(n2);
d2 = dom_depth(n2);
} else {
@ -5116,19 +5127,19 @@ Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, No
// to be searched more carefully.
// Scan up all the n1's with equal depth, looking for n2.
_dom_lca_tags.map(n1->_idx, tag);
_dom_lca_tags.at_put_grow(n1->_idx, tag);
Node *t1 = idom(n1);
while (dom_depth(t1) == d1) {
if (t1 == n2) return n2;
_dom_lca_tags.map(t1->_idx, tag);
_dom_lca_tags.at_put_grow(t1->_idx, tag);
t1 = idom(t1);
}
// Scan up all the n2's with equal depth, looking for n1.
_dom_lca_tags.map(n2->_idx, tag);
_dom_lca_tags.at_put_grow(n2->_idx, tag);
Node *t2 = idom(n2);
while (dom_depth(t2) == d2) {
if (t2 == n1) return n1;
_dom_lca_tags.map(t2->_idx, tag);
_dom_lca_tags.at_put_grow(t2->_idx, tag);
t2 = idom(t2);
}
// Move up to a new dominator-depth value as well as up the dom-tree.
@ -5147,25 +5158,11 @@ Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, No
// be of fixed size.
void PhaseIdealLoop::init_dom_lca_tags() {
uint limit = C->unique() + 1;
_dom_lca_tags.map( limit, NULL );
_dom_lca_tags.at_grow(limit, 0);
_dom_lca_tags_round = 0;
#ifdef ASSERT
for( uint i = 0; i < limit; ++i ) {
assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
}
#endif // ASSERT
}
//------------------------------clear_dom_lca_tags------------------------------
// Tag could be a node's integer index, 32bits instead of 64bits in some cases
// Intended use does not involve any growth for the array, so it could
// be of fixed size.
void PhaseIdealLoop::clear_dom_lca_tags() {
uint limit = C->unique() + 1;
_dom_lca_tags.map( limit, NULL );
_dom_lca_tags.clear();
#ifdef ASSERT
for( uint i = 0; i < limit; ++i ) {
assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
for (uint i = 0; i < limit; ++i) {
assert(_dom_lca_tags.at(i) == 0, "Must be distinct from each node pointer");
}
#endif // ASSERT
}

View file

@ -867,9 +867,9 @@ private:
// Support for faster execution of get_late_ctrl()/dom_lca()
// when a node has many uses and dominator depth is deep.
Node_Array _dom_lca_tags;
GrowableArray<jlong> _dom_lca_tags;
uint _dom_lca_tags_round;
void init_dom_lca_tags();
void clear_dom_lca_tags();
// Helper for debugging bad dominance relationships
bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early);
@ -1063,7 +1063,6 @@ private:
_igvn(igvn),
_verify_me(nullptr),
_verify_only(false),
_dom_lca_tags(arena()), // Thread::resource_area
_nodes_required(UINT_MAX) {
assert(mode != LoopOptsVerify, "wrong constructor to verify IdealLoop");
build_and_optimize(mode);
@ -1077,7 +1076,6 @@ private:
_igvn(igvn),
_verify_me(verify_me),
_verify_only(verify_me == nullptr),
_dom_lca_tags(arena()), // Thread::resource_area
_nodes_required(UINT_MAX) {
build_and_optimize(LoopOptsVerify);
}
@ -1485,7 +1483,7 @@ private:
void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
bool split_up( Node *n, Node *blk1, Node *blk2 );
void sink_use( Node *use, Node *post_loop );
Node *place_near_use( Node *useblock ) const;
Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
void try_move_store_after_loop(Node* n);
bool identical_backtoback_ifs(Node *n);
@ -1613,7 +1611,15 @@ public:
LoopNode* create_inner_head(IdealLoopTree* loop, LongCountedLoopNode* head, LongCountedLoopEndNode* exit_test);
bool is_safe_load_ctrl(Node* ctrl);
Node* get_late_ctrl_with_anti_dep(LoadNode* n, Node* early, Node* LCA);
bool ctrl_of_use_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop, Node* ctrl);
bool ctrl_of_all_uses_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop);
Node* compute_early_ctrl(Node* n, Node* n_ctrl);
void try_sink_out_of_loop(Node* n);
};

View file

@ -1134,21 +1134,26 @@ static bool merge_point_safe(Node* region) {
}
//------------------------------place_near_use---------------------------------
// Place some computation next to use but not inside inner loops.
// For inner loop uses move it to the preheader area.
Node *PhaseIdealLoop::place_near_use(Node *useblock) const {
IdealLoopTree *u_loop = get_loop( useblock );
if (u_loop->_irreducible) {
//------------------------------place_outside_loop---------------------------------
// Place some computation outside of this loop on the path to the use passed as argument
Node* PhaseIdealLoop::place_outside_loop(Node* useblock, IdealLoopTree* loop) const {
Node* head = loop->_head;
assert(!loop->is_member(get_loop(useblock)), "must be outside loop");
if (head->is_Loop() && head->as_Loop()->is_strip_mined()) {
loop = loop->_parent;
assert(loop->_head->is_OuterStripMinedLoop(), "malformed strip mined loop");
}
// Pick control right outside the loop
for (;;) {
Node* dom = idom(useblock);
if (loop->is_member(get_loop(dom))) {
break;
}
useblock = dom;
}
assert(find_non_split_ctrl(useblock) == useblock, "should be non split control");
return useblock;
}
if (u_loop->_child) {
if (useblock == u_loop->_head && u_loop->_head->is_OuterStripMinedLoop()) {
return u_loop->_head->in(LoopNode::EntryControl);
}
return useblock;
}
return u_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
}
@ -1402,128 +1407,7 @@ void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
}
}
// See if a shared loop-varying computation has no loop-varying uses.
// Happens if something is only used for JVM state in uncommon trap exits,
// like various versions of induction variable+offset. Clone the
// computation per usage to allow it to sink out of the loop.
if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
Node *n_ctrl = get_ctrl(n);
IdealLoopTree *n_loop = get_loop(n_ctrl);
if( n_loop != _ltree_root ) {
DUIterator_Fast imax, i = n->fast_outs(imax);
for (; i < imax; i++) {
Node* u = n->fast_out(i);
if( !has_ctrl(u) ) break; // Found control user
IdealLoopTree *u_loop = get_loop(get_ctrl(u));
if( u_loop == n_loop ) break; // Found loop-varying use
if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
}
bool did_break = (i < imax); // Did we break out of the previous loop?
if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
Node *late_load_ctrl = NULL;
if (n->is_Load()) {
// If n is a load, get and save the result from get_late_ctrl(),
// to be later used in calculating the control for n's clones.
clear_dom_lca_tags();
late_load_ctrl = get_late_ctrl(n, n_ctrl);
}
// If n is a load, and the late control is the same as the current
// control, then the cloning of n is a pointless exercise, because
// GVN will ensure that we end up where we started.
if (!n->is_Load() || (late_load_ctrl != n_ctrl && is_safe_load_ctrl(late_load_ctrl))) {
Node* outer_loop_clone = NULL;
for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
Node *u = n->last_out(j); // Clone private computation per use
_igvn.rehash_node_delayed(u);
Node *x = n->clone(); // Clone computation
Node *x_ctrl = NULL;
if( u->is_Phi() ) {
// Replace all uses of normal nodes. Replace Phi uses
// individually, so the separate Nodes can sink down
// different paths.
uint k = 1;
while( u->in(k) != n ) k++;
u->set_req( k, x );
// x goes next to Phi input path
x_ctrl = u->in(0)->in(k);
--j;
} else { // Normal use
// Replace all uses
for( uint k = 0; k < u->req(); k++ ) {
if( u->in(k) == n ) {
u->set_req( k, x );
--j;
}
}
x_ctrl = get_ctrl(u);
}
// Find control for 'x' next to use but not inside inner loops.
// For inner loop uses get the preheader area.
x_ctrl = place_near_use(x_ctrl);
if (n->is_Load()) {
// For loads, add a control edge to a CFG node outside of the loop
// to force them to not combine and return back inside the loop
// during GVN optimization (4641526).
//
// Because we are setting the actual control input, factor in
// the result from get_late_ctrl() so we respect any
// anti-dependences. (6233005).
x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
// Don't allow the control input to be a CFG splitting node.
// Such nodes should only have ProjNodes as outs, e.g. IfNode
// should only have IfTrueNode and IfFalseNode (4985384).
x_ctrl = find_non_split_ctrl(x_ctrl);
IdealLoopTree* x_loop = get_loop(x_ctrl);
Node* x_head = x_loop->_head;
if (x_head->is_Loop() && (x_head->is_OuterStripMinedLoop() || x_head->as_Loop()->is_strip_mined())) {
if (is_dominator(n_ctrl, x_head) && n_ctrl != x_head) {
// Anti dependence analysis is sometimes too
// conservative: a store in the outer strip mined loop
// can prevent a load from floating out of the outer
// strip mined loop but the load may not be referenced
// from the safepoint: loop strip mining verification
// code reports a problem in that case. Make sure the
// load is not moved in the outer strip mined loop in
// that case.
x_ctrl = x_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
} else if (x_head->is_OuterStripMinedLoop()) {
// Do not add duplicate LoadNodes to the outer strip mined loop
if (outer_loop_clone != NULL) {
_igvn.replace_node(x, outer_loop_clone);
continue;
}
outer_loop_clone = x;
}
}
assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
x->set_req(0, x_ctrl);
}
register_new_node(x, x_ctrl);
// Some institutional knowledge is needed here: 'x' is
// yanked because if the optimizer runs GVN on it all the
// cloned x's will common up and undo this optimization and
// be forced back in the loop.
// I tried setting control edges on the x's to force them to
// not combine, but the matching gets worried when it tries
// to fold a StoreP and an AddP together (as part of an
// address expression) and the AddP and StoreP have
// different controls.
if (!x->is_Load() && !x->is_DecodeNarrowPtr()) {
_igvn._worklist.yank(x);
}
}
_igvn.remove_dead_node(n);
}
}
}
}
try_sink_out_of_loop(n);
try_move_store_after_loop(n);
@ -1536,10 +1420,200 @@ void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
}
}
bool PhaseIdealLoop::is_safe_load_ctrl(Node* ctrl) {
if (ctrl->is_Proj() && ctrl->in(0)->is_Call() && ctrl->has_out_with(Op_Catch)) {
// See if a shared loop-varying computation has no loop-varying uses.
// Happens if something is only used for JVM state in uncommon trap exits,
// like various versions of induction variable+offset. Clone the
// computation per usage to allow it to sink out of the loop.
void PhaseIdealLoop::try_sink_out_of_loop(Node* n) {
if (has_ctrl(n) &&
!n->is_Phi() &&
!n->is_Bool() &&
!n->is_Proj() &&
!n->is_MergeMem() &&
!n->is_CMove() &&
n->Opcode() != Op_Opaque4) {
Node *n_ctrl = get_ctrl(n);
IdealLoopTree *n_loop = get_loop(n_ctrl);
if (n_loop != _ltree_root && n->outcnt() > 1) {
// Compute early control: needed for anti-dependence analysis. It's also possible that as a result of
// previous transformations in this loop opts round, the node can be hoisted now: early control will tell us.
Node* early_ctrl = compute_early_ctrl(n, n_ctrl);
if (n_loop->is_member(get_loop(early_ctrl)) && // check that this one can't be hoisted now
ctrl_of_all_uses_out_of_loop(n, early_ctrl, n_loop)) { // All uses in outer loops!
assert(!n->is_Store() && !n->is_LoadStore(), "no node with a side effect");
Node* outer_loop_clone = NULL;
for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin;) {
Node* u = n->last_out(j); // Clone private computation per use
_igvn.rehash_node_delayed(u);
Node* x = n->clone(); // Clone computation
Node* x_ctrl = NULL;
if (u->is_Phi()) {
// Replace all uses of normal nodes. Replace Phi uses
// individually, so the separate Nodes can sink down
// different paths.
uint k = 1;
while (u->in(k) != n) k++;
u->set_req(k, x);
// x goes next to Phi input path
x_ctrl = u->in(0)->in(k);
// Find control for 'x' next to use but not inside inner loops.
x_ctrl = place_outside_loop(x_ctrl, n_loop);
--j;
} else { // Normal use
if (has_ctrl(u)) {
x_ctrl = get_ctrl(u);
} else {
x_ctrl = u->in(0);
}
// Find control for 'x' next to use but not inside inner loops.
x_ctrl = place_outside_loop(x_ctrl, n_loop);
// Replace all uses
if (u->is_ConstraintCast() && u->bottom_type()->higher_equal(_igvn.type(n)) && u->in(0) == x_ctrl) {
// If we're sinking a chain of data nodes, we might have inserted a cast to pin the use which is not necessary
// anymore now that we're going to pin n as well
_igvn.replace_node(u, x);
--j;
} else {
int nb = u->replace_edge(n, x, &_igvn);
j -= nb;
}
}
if (n->is_Load()) {
// For loads, add a control edge to a CFG node outside of the loop
// to force them to not combine and return back inside the loop
// during GVN optimization (4641526).
assert(x_ctrl == get_late_ctrl_with_anti_dep(x->as_Load(), early_ctrl, x_ctrl), "anti-dependences were already checked");
IdealLoopTree* x_loop = get_loop(x_ctrl);
Node* x_head = x_loop->_head;
if (x_head->is_Loop() && x_head->is_OuterStripMinedLoop()) {
// Do not add duplicate LoadNodes to the outer strip mined loop
if (outer_loop_clone != NULL) {
_igvn.replace_node(x, outer_loop_clone);
continue;
}
outer_loop_clone = x;
}
x->set_req(0, x_ctrl);
} else if (n->in(0) != NULL){
x->set_req(0, x_ctrl);
}
assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
assert(!n_loop->is_member(get_loop(x_ctrl)), "should have moved out of loop");
register_new_node(x, x_ctrl);
if (x->in(0) == NULL && !x->is_DecodeNarrowPtr()) {
assert(!x->is_Load(), "load should be pinned");
// Use a cast node to pin clone out of loop
Node* cast = NULL;
for (uint k = 0; k < x->req(); k++) {
Node* in = x->in(k);
if (in != NULL && n_loop->is_member(get_loop(get_ctrl(in)))) {
const Type* in_t = _igvn.type(in);
if (in_t->isa_int()) {
cast = new CastIINode(in, in_t, true);
} else if (in_t->isa_long()) {
cast = new CastLLNode(in, in_t, true);
} else if (in_t->isa_ptr()) {
cast = new CastPPNode(in, in_t, true);
} else if (in_t->isa_float()) {
cast = new CastFFNode(in, in_t, true);
} else if (in_t->isa_double()) {
cast = new CastDDNode(in, in_t, true);
} else if (in_t->isa_vect()) {
cast = new CastVVNode(in, in_t, true);
}
}
if (cast != NULL) {
cast->set_req(0, x_ctrl);
register_new_node(cast, x_ctrl);
x->replace_edge(in, cast);
break;
}
}
assert(cast != NULL, "must have added a cast to pin the node");
}
}
_igvn.remove_dead_node(n);
}
_dom_lca_tags_round = 0;
}
}
}
Node* PhaseIdealLoop::compute_early_ctrl(Node* n, Node* n_ctrl) {
Node* early_ctrl = NULL;
ResourceMark rm;
Unique_Node_List wq;
wq.push(n);
for (uint i = 0; i < wq.size(); i++) {
Node* m = wq.at(i);
Node* c = NULL;
if (m->is_CFG()) {
c = m;
} else if (m->pinned()) {
c = m->in(0);
} else {
for (uint j = 0; j < m->req(); j++) {
Node* in = m->in(j);
if (in == NULL) {
continue;
}
wq.push(in);
}
}
if (c != NULL) {
assert(is_dominator(c, n_ctrl), "");
if (early_ctrl == NULL) {
early_ctrl = c;
} else if (is_dominator(early_ctrl, c)) {
early_ctrl = c;
}
}
}
assert(is_dominator(early_ctrl, n_ctrl), "early control must dominate current control");
return early_ctrl;
}
bool PhaseIdealLoop::ctrl_of_all_uses_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop) {
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* u = n->fast_out(i);
if (u->Opcode() == Op_Opaque1) {
return false; // Found loop limit, bugfix for 4677003
}
// We can't reuse tags in PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal() so make sure calls to
// get_late_ctrl_with_anti_dep() use their own tag
_dom_lca_tags_round++;
assert(_dom_lca_tags_round != 0, "shouldn't wrap around");
if (u->is_Phi()) {
for (uint j = 1; j < u->req(); ++j) {
if (u->in(j) == n && !ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, u->in(0)->in(j))) {
return false;
}
}
} else {
Node* ctrl = has_ctrl(u) ? get_ctrl(u) : u->in(0);
if (!ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, ctrl)) {
return false;
}
}
}
return true;
}
bool PhaseIdealLoop::ctrl_of_use_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop, Node* ctrl) {
if (n->is_Load()) {
ctrl = get_late_ctrl_with_anti_dep(n->as_Load(), n_ctrl, ctrl);
}
IdealLoopTree *u_loop = get_loop(ctrl);
if (u_loop == n_loop) {
return false; // Found loop-varying use
}
if (n_loop->is_member(u_loop)) {
return false; // Found use in inner loop
}
return true;
}

View file

@ -57,6 +57,9 @@ class CallNode;
class CallRuntimeNode;
class CallNativeNode;
class CallStaticJavaNode;
class CastFFNode;
class CastDDNode;
class CastVVNode;
class CastIINode;
class CastLLNode;
class CatchNode;
@ -691,6 +694,9 @@ public:
DEFINE_CLASS_ID(CastII, ConstraintCast, 0)
DEFINE_CLASS_ID(CheckCastPP, ConstraintCast, 1)
DEFINE_CLASS_ID(CastLL, ConstraintCast, 2)
DEFINE_CLASS_ID(CastFF, ConstraintCast, 3)
DEFINE_CLASS_ID(CastDD, ConstraintCast, 4)
DEFINE_CLASS_ID(CastVV, ConstraintCast, 5)
DEFINE_CLASS_ID(CMove, Type, 3)
DEFINE_CLASS_ID(SafePointScalarObject, Type, 4)
DEFINE_CLASS_ID(DecodeNarrowPtr, Type, 5)