8252372: Check if cloning is required to move loads out of loops in PhaseIdealLoop::split_if_with_blocks_post()

Reviewed-by: thartmann, kvn
This commit is contained in:
Roland Westrelin 2021-05-26 09:20:42 +00:00
parent 039441689d
commit 9d305b9c06
14 changed files with 541 additions and 215 deletions

View file

@ -8735,6 +8735,61 @@ instruct castLL(iRegL dst)
ins_pipe(pipe_class_empty); ins_pipe(pipe_class_empty);
%} %}
instruct castFF(vRegF dst)
%{
match(Set dst (CastFF dst));
size(0);
format %{ "# castFF of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
instruct castDD(vRegD dst)
%{
match(Set dst (CastDD dst));
size(0);
format %{ "# castDD of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
instruct castVVD(vecD dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
instruct castVVX(vecX dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
instruct castVV(vReg dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(pipe_class_empty);
%}
// ============================================================================ // ============================================================================
// Atomic operation instructions // Atomic operation instructions
// //

View file

@ -5182,6 +5182,39 @@ instruct castLL( iRegL dst ) %{
ins_pipe(empty); ins_pipe(empty);
%} %}
instruct castFF( regF dst ) %{
match(Set dst (CastFF dst));
format %{ "! castFF of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe(empty);
%}
instruct castDD( regD dst ) %{
match(Set dst (CastDD dst));
format %{ "! castDD of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe(empty);
%}
instruct castVVD( vecD dst ) %{
match(Set dst (CastVV dst));
format %{ "! castVV of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe(empty);
%}
instruct castVVX( vecX dst ) %{
match(Set dst (CastVV dst));
format %{ "! castVV of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe(empty);
%}
//----------Arithmetic Instructions-------------------------------------------- //----------Arithmetic Instructions--------------------------------------------
// Addition Instructions // Addition Instructions
// Register Addition // Register Addition

View file

@ -10335,6 +10335,38 @@ instruct castLL(iRegLdst dst) %{
ins_pipe(pipe_class_default); ins_pipe(pipe_class_default);
%} %}
instruct castFF(regF dst) %{
match(Set dst (CastFF dst));
format %{ " -- \t// castFF of $dst" %}
size(0);
ins_encode( /*empty*/ );
ins_pipe(pipe_class_default);
%}
instruct castDD(regD dst) %{
match(Set dst (CastDD dst));
format %{ " -- \t// castDD of $dst" %}
size(0);
ins_encode( /*empty*/ );
ins_pipe(pipe_class_default);
%}
instruct castVV8(iRegLdst dst) %{
match(Set dst (CastVV dst));
format %{ " -- \t// castVV of $dst" %}
size(0);
ins_encode( /*empty*/ );
ins_pipe(pipe_class_default);
%}
instruct castVV16(vecX dst) %{
match(Set dst (CastVV dst));
format %{ " -- \t// castVV of $dst" %}
size(0);
ins_encode( /*empty*/ );
ins_pipe(pipe_class_default);
%}
instruct checkCastPP(iRegPdst dst) %{ instruct checkCastPP(iRegPdst dst) %{
match(Set dst (CheckCastPP dst)); match(Set dst (CheckCastPP dst));
format %{ " -- \t// checkcastPP of $dst" %} format %{ " -- \t// checkcastPP of $dst" %}

View file

@ -5260,6 +5260,30 @@ instruct castLL(iRegL dst) %{
ins_pipe(pipe_class_dummy); ins_pipe(pipe_class_dummy);
%} %}
instruct castFF(regF dst) %{
match(Set dst (CastFF dst));
size(0);
format %{ "# castFF of $dst" %}
ins_encode(/*empty*/);
ins_pipe(pipe_class_dummy);
%}
instruct castDD(regD dst) %{
match(Set dst (CastDD dst));
size(0);
format %{ "# castDD of $dst" %}
ins_encode(/*empty*/);
ins_pipe(pipe_class_dummy);
%}
instruct castVV(iRegL dst) %{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/*empty*/);
ins_pipe(pipe_class_dummy);
%}
//----------Conditional_store-------------------------------------------------- //----------Conditional_store--------------------------------------------------
// Conditional-store of the updated heap-top. // Conditional-store of the updated heap-top.
// Used during allocation of the shared heap. // Used during allocation of the shared heap.

View file

@ -8127,3 +8127,25 @@ instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp,
ins_pipe( pipe_slow ); ins_pipe( pipe_slow );
%} %}
#endif // _LP64 #endif // _LP64
instruct castVV(vec dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(empty);
%}
instruct castVVLeg(legVec dst)
%{
match(Set dst (CastVV dst));
size(0);
format %{ "# castVV of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(empty);
%}

View file

@ -7178,6 +7178,22 @@ instruct castLL( eRegL dst ) %{
ins_pipe( empty ); ins_pipe( empty );
%} %}
instruct castFF( regF dst ) %{
match(Set dst (CastFF dst));
format %{ "#castFF of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe( empty );
%}
instruct castDD( regD dst ) %{
match(Set dst (CastDD dst));
format %{ "#castDD of $dst" %}
ins_encode( /*empty encoding*/ );
ins_cost(0);
ins_pipe( empty );
%}
// Load-locked - same as a regular pointer load when used with compare-swap // Load-locked - same as a regular pointer load when used with compare-swap
instruct loadPLocked(eRegP dst, memory mem) %{ instruct loadPLocked(eRegP dst, memory mem) %{
match(Set dst (LoadPLocked mem)); match(Set dst (LoadPLocked mem));

View file

@ -7624,6 +7624,28 @@ instruct castLL(rRegL dst)
ins_pipe(empty); ins_pipe(empty);
%} %}
instruct castFF(regF dst)
%{
match(Set dst (CastFF dst));
size(0);
format %{ "# castFF of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(empty);
%}
instruct castDD(regD dst)
%{
match(Set dst (CastDD dst));
size(0);
format %{ "# castDD of $dst" %}
ins_encode(/* empty encoding */);
ins_cost(0);
ins_pipe(empty);
%}
// LoadP-locked same as a regular LoadP when used with compare-swap // LoadP-locked same as a regular LoadP when used with compare-swap
instruct loadPLocked(rRegP dst, memory mem) instruct loadPLocked(rRegP dst, memory mem)
%{ %{

View file

@ -764,6 +764,11 @@ int InstructForm::memory_operand(FormDict &globals) const {
bool InstructForm::captures_bottom_type(FormDict &globals) const { bool InstructForm::captures_bottom_type(FormDict &globals) const {
if (_matrule && _matrule->_rChild && if (_matrule && _matrule->_rChild &&
(!strcmp(_matrule->_rChild->_opType,"CastPP") || // new result type (!strcmp(_matrule->_rChild->_opType,"CastPP") || // new result type
!strcmp(_matrule->_rChild->_opType,"CastDD") ||
!strcmp(_matrule->_rChild->_opType,"CastFF") ||
!strcmp(_matrule->_rChild->_opType,"CastII") ||
!strcmp(_matrule->_rChild->_opType,"CastLL") ||
!strcmp(_matrule->_rChild->_opType,"CastVV") ||
!strcmp(_matrule->_rChild->_opType,"CastX2P") || // new result type !strcmp(_matrule->_rChild->_opType,"CastX2P") || // new result type
!strcmp(_matrule->_rChild->_opType,"DecodeN") || !strcmp(_matrule->_rChild->_opType,"DecodeN") ||
!strcmp(_matrule->_rChild->_opType,"EncodeP") || !strcmp(_matrule->_rChild->_opType,"EncodeP") ||

View file

@ -115,6 +115,37 @@ public:
virtual uint ideal_reg() const { return Op_RegL; } virtual uint ideal_reg() const { return Op_RegL; }
}; };
class CastFFNode: public ConstraintCastNode {
public:
CastFFNode(Node* n, const Type* t, bool carry_dependency = false)
: ConstraintCastNode(n, t, carry_dependency){
init_class_id(Class_CastFF);
}
virtual int Opcode() const;
virtual uint ideal_reg() const { return Op_RegF; }
};
class CastDDNode: public ConstraintCastNode {
public:
CastDDNode(Node* n, const Type* t, bool carry_dependency = false)
: ConstraintCastNode(n, t, carry_dependency){
init_class_id(Class_CastDD);
}
virtual int Opcode() const;
virtual uint ideal_reg() const { return Op_RegD; }
};
class CastVVNode: public ConstraintCastNode {
public:
CastVVNode(Node* n, const Type* t, bool carry_dependency = false)
: ConstraintCastNode(n, t, carry_dependency){
init_class_id(Class_CastVV);
}
virtual int Opcode() const;
virtual uint ideal_reg() const { return in(1)->ideal_reg(); }
};
//------------------------------CastPPNode------------------------------------- //------------------------------CastPPNode-------------------------------------
// cast pointer to pointer (different type) // cast pointer to pointer (different type)
class CastPPNode: public ConstraintCastNode { class CastPPNode: public ConstraintCastNode {

View file

@ -62,8 +62,11 @@ macro(CallLeafNoFP)
macro(CallRuntime) macro(CallRuntime)
macro(CallNative) macro(CallNative)
macro(CallStaticJava) macro(CallStaticJava)
macro(CastDD)
macro(CastFF)
macro(CastII) macro(CastII)
macro(CastLL) macro(CastLL)
macro(CastVV)
macro(CastX2P) macro(CastX2P)
macro(CastP2X) macro(CastP2X)
macro(CastPP) macro(CastPP)

View file

@ -4991,52 +4991,64 @@ Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
} }
#endif #endif
// if this is a load, check for anti-dependent stores
// We use a conservative algorithm to identify potential interfering
// instructions and for rescheduling the load. The users of the memory
// input of this load are examined. Any use which is not a load and is
// dominated by early is considered a potentially interfering store.
// This can produce false positives.
if (n->is_Load() && LCA != early) { if (n->is_Load() && LCA != early) {
int load_alias_idx = C->get_alias_index(n->adr_type()); LCA = get_late_ctrl_with_anti_dep(n->as_Load(), early, LCA);
if (C->alias_type(load_alias_idx)->is_rewritable()) { }
Unique_Node_List worklist;
Node* mem = n->in(MemNode::Memory); assert(LCA == find_non_split_ctrl(LCA), "unexpected late control");
for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) { return LCA;
Node* s = mem->fast_out(i); }
worklist.push(s);
} // if this is a load, check for anti-dependent stores
for (uint i = 0; i < worklist.size() && LCA != early; i++) { // We use a conservative algorithm to identify potential interfering
Node* s = worklist.at(i); // instructions and for rescheduling the load. The users of the memory
if (s->is_Load() || s->Opcode() == Op_SafePoint || // input of this load are examined. Any use which is not a load and is
(s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0) || // dominated by early is considered a potentially interfering store.
s->is_Phi()) { // This can produce false positives.
continue; Node* PhaseIdealLoop::get_late_ctrl_with_anti_dep(LoadNode* n, Node* early, Node* LCA) {
} else if (s->is_MergeMem()) { int load_alias_idx = C->get_alias_index(n->adr_type());
for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { if (C->alias_type(load_alias_idx)->is_rewritable()) {
Node* s1 = s->fast_out(i); Unique_Node_List worklist;
worklist.push(s1);
} Node* mem = n->in(MemNode::Memory);
} else { for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
Node* sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0); Node* s = mem->fast_out(i);
assert(sctrl != NULL || !s->is_reachable_from_root(), "must have control"); worklist.push(s);
if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) { }
const TypePtr* adr_type = s->adr_type(); for (uint i = 0; i < worklist.size() && LCA != early; i++) {
if (s->is_ArrayCopy()) { Node* s = worklist.at(i);
// Copy to known instance needs destination type to test for aliasing if (s->is_Load() || s->Opcode() == Op_SafePoint ||
const TypePtr* dest_type = s->as_ArrayCopy()->_dest_type; (s->is_CallStaticJava() && s->as_CallStaticJava()->uncommon_trap_request() != 0) ||
if (dest_type != TypeOopPtr::BOTTOM) { s->is_Phi()) {
adr_type = dest_type; continue;
} } else if (s->is_MergeMem()) {
for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
Node* s1 = s->fast_out(i);
worklist.push(s1);
}
} else {
Node* sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
assert(sctrl != NULL || !s->is_reachable_from_root(), "must have control");
if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) {
const TypePtr* adr_type = s->adr_type();
if (s->is_ArrayCopy()) {
// Copy to known instance needs destination type to test for aliasing
const TypePtr* dest_type = s->as_ArrayCopy()->_dest_type;
if (dest_type != TypeOopPtr::BOTTOM) {
adr_type = dest_type;
} }
if (C->can_alias(adr_type, load_alias_idx)) { }
LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n); if (C->can_alias(adr_type, load_alias_idx)) {
} else if (s->is_CFG()) { LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) { } else if (s->is_CFG() && s->is_Multi()) {
Node* s1 = s->fast_out(i); // Look for the memory use of s (that is the use of its memory projection)
if (_igvn.type(s1) == Type::MEMORY) { for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
worklist.push(s1); Node* s1 = s->fast_out(i);
assert(s1->is_Proj(), "projection expected");
if (_igvn.type(s1) == Type::MEMORY) {
for (DUIterator_Fast jmax, j = s1->fast_outs(jmax); j < jmax; j++) {
Node* s2 = s1->fast_out(j);
worklist.push(s2);
} }
} }
} }
@ -5062,8 +5074,6 @@ Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
} }
} }
} }
assert(LCA == find_non_split_ctrl(LCA), "unexpected late control");
return LCA; return LCA;
} }
@ -5091,23 +5101,24 @@ bool PhaseIdealLoop::is_dominator(Node *d, Node *n) {
// does not need to be cleared between calls to get_late_ctrl(). // does not need to be cleared between calls to get_late_ctrl().
// Algorithm trades a larger constant factor for better asymptotic behavior // Algorithm trades a larger constant factor for better asymptotic behavior
// //
Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, Node *tag ) { Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal(Node *n1, Node *n2, Node *tag_node) {
uint d1 = dom_depth(n1); uint d1 = dom_depth(n1);
uint d2 = dom_depth(n2); uint d2 = dom_depth(n2);
jlong tag = tag_node->_idx | (((jlong)_dom_lca_tags_round) << 32);
do { do {
if (d1 > d2) { if (d1 > d2) {
// current lca is deeper than n2 // current lca is deeper than n2
_dom_lca_tags.map(n1->_idx, tag); _dom_lca_tags.at_put_grow(n1->_idx, tag);
n1 = idom(n1); n1 = idom(n1);
d1 = dom_depth(n1); d1 = dom_depth(n1);
} else if (d1 < d2) { } else if (d1 < d2) {
// n2 is deeper than current lca // n2 is deeper than current lca
Node *memo = _dom_lca_tags[n2->_idx]; jlong memo = _dom_lca_tags.at_grow(n2->_idx, 0);
if( memo == tag ) { if (memo == tag) {
return n1; // Return the current LCA return n1; // Return the current LCA
} }
_dom_lca_tags.map(n2->_idx, tag); _dom_lca_tags.at_put_grow(n2->_idx, tag);
n2 = idom(n2); n2 = idom(n2);
d2 = dom_depth(n2); d2 = dom_depth(n2);
} else { } else {
@ -5116,19 +5127,19 @@ Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, No
// to be searched more carefully. // to be searched more carefully.
// Scan up all the n1's with equal depth, looking for n2. // Scan up all the n1's with equal depth, looking for n2.
_dom_lca_tags.map(n1->_idx, tag); _dom_lca_tags.at_put_grow(n1->_idx, tag);
Node *t1 = idom(n1); Node *t1 = idom(n1);
while (dom_depth(t1) == d1) { while (dom_depth(t1) == d1) {
if (t1 == n2) return n2; if (t1 == n2) return n2;
_dom_lca_tags.map(t1->_idx, tag); _dom_lca_tags.at_put_grow(t1->_idx, tag);
t1 = idom(t1); t1 = idom(t1);
} }
// Scan up all the n2's with equal depth, looking for n1. // Scan up all the n2's with equal depth, looking for n1.
_dom_lca_tags.map(n2->_idx, tag); _dom_lca_tags.at_put_grow(n2->_idx, tag);
Node *t2 = idom(n2); Node *t2 = idom(n2);
while (dom_depth(t2) == d2) { while (dom_depth(t2) == d2) {
if (t2 == n1) return n1; if (t2 == n1) return n1;
_dom_lca_tags.map(t2->_idx, tag); _dom_lca_tags.at_put_grow(t2->_idx, tag);
t2 = idom(t2); t2 = idom(t2);
} }
// Move up to a new dominator-depth value as well as up the dom-tree. // Move up to a new dominator-depth value as well as up the dom-tree.
@ -5147,25 +5158,11 @@ Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, No
// be of fixed size. // be of fixed size.
void PhaseIdealLoop::init_dom_lca_tags() { void PhaseIdealLoop::init_dom_lca_tags() {
uint limit = C->unique() + 1; uint limit = C->unique() + 1;
_dom_lca_tags.map( limit, NULL ); _dom_lca_tags.at_grow(limit, 0);
_dom_lca_tags_round = 0;
#ifdef ASSERT #ifdef ASSERT
for( uint i = 0; i < limit; ++i ) { for (uint i = 0; i < limit; ++i) {
assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer"); assert(_dom_lca_tags.at(i) == 0, "Must be distinct from each node pointer");
}
#endif // ASSERT
}
//------------------------------clear_dom_lca_tags------------------------------
// Tag could be a node's integer index, 32bits instead of 64bits in some cases
// Intended use does not involve any growth for the array, so it could
// be of fixed size.
void PhaseIdealLoop::clear_dom_lca_tags() {
uint limit = C->unique() + 1;
_dom_lca_tags.map( limit, NULL );
_dom_lca_tags.clear();
#ifdef ASSERT
for( uint i = 0; i < limit; ++i ) {
assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
} }
#endif // ASSERT #endif // ASSERT
} }

View file

@ -867,9 +867,9 @@ private:
// Support for faster execution of get_late_ctrl()/dom_lca() // Support for faster execution of get_late_ctrl()/dom_lca()
// when a node has many uses and dominator depth is deep. // when a node has many uses and dominator depth is deep.
Node_Array _dom_lca_tags; GrowableArray<jlong> _dom_lca_tags;
uint _dom_lca_tags_round;
void init_dom_lca_tags(); void init_dom_lca_tags();
void clear_dom_lca_tags();
// Helper for debugging bad dominance relationships // Helper for debugging bad dominance relationships
bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early); bool verify_dominance(Node* n, Node* use, Node* LCA, Node* early);
@ -1063,7 +1063,6 @@ private:
_igvn(igvn), _igvn(igvn),
_verify_me(nullptr), _verify_me(nullptr),
_verify_only(false), _verify_only(false),
_dom_lca_tags(arena()), // Thread::resource_area
_nodes_required(UINT_MAX) { _nodes_required(UINT_MAX) {
assert(mode != LoopOptsVerify, "wrong constructor to verify IdealLoop"); assert(mode != LoopOptsVerify, "wrong constructor to verify IdealLoop");
build_and_optimize(mode); build_and_optimize(mode);
@ -1077,7 +1076,6 @@ private:
_igvn(igvn), _igvn(igvn),
_verify_me(verify_me), _verify_me(verify_me),
_verify_only(verify_me == nullptr), _verify_only(verify_me == nullptr),
_dom_lca_tags(arena()), // Thread::resource_area
_nodes_required(UINT_MAX) { _nodes_required(UINT_MAX) {
build_and_optimize(LoopOptsVerify); build_and_optimize(LoopOptsVerify);
} }
@ -1485,7 +1483,7 @@ private:
void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ); void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
bool split_up( Node *n, Node *blk1, Node *blk2 ); bool split_up( Node *n, Node *blk1, Node *blk2 );
void sink_use( Node *use, Node *post_loop ); void sink_use( Node *use, Node *post_loop );
Node *place_near_use( Node *useblock ) const; Node* place_outside_loop(Node* useblock, IdealLoopTree* loop) const;
Node* try_move_store_before_loop(Node* n, Node *n_ctrl); Node* try_move_store_before_loop(Node* n, Node *n_ctrl);
void try_move_store_after_loop(Node* n); void try_move_store_after_loop(Node* n);
bool identical_backtoback_ifs(Node *n); bool identical_backtoback_ifs(Node *n);
@ -1613,7 +1611,15 @@ public:
LoopNode* create_inner_head(IdealLoopTree* loop, LongCountedLoopNode* head, LongCountedLoopEndNode* exit_test); LoopNode* create_inner_head(IdealLoopTree* loop, LongCountedLoopNode* head, LongCountedLoopEndNode* exit_test);
bool is_safe_load_ctrl(Node* ctrl); Node* get_late_ctrl_with_anti_dep(LoadNode* n, Node* early, Node* LCA);
bool ctrl_of_use_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop, Node* ctrl);
bool ctrl_of_all_uses_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop);
Node* compute_early_ctrl(Node* n, Node* n_ctrl);
void try_sink_out_of_loop(Node* n);
}; };

View file

@ -1134,21 +1134,26 @@ static bool merge_point_safe(Node* region) {
} }
//------------------------------place_near_use--------------------------------- //------------------------------place_outside_loop---------------------------------
// Place some computation next to use but not inside inner loops. // Place some computation outside of this loop on the path to the use passed as argument
// For inner loop uses move it to the preheader area. Node* PhaseIdealLoop::place_outside_loop(Node* useblock, IdealLoopTree* loop) const {
Node *PhaseIdealLoop::place_near_use(Node *useblock) const { Node* head = loop->_head;
IdealLoopTree *u_loop = get_loop( useblock ); assert(!loop->is_member(get_loop(useblock)), "must be outside loop");
if (u_loop->_irreducible) { if (head->is_Loop() && head->as_Loop()->is_strip_mined()) {
return useblock; loop = loop->_parent;
assert(loop->_head->is_OuterStripMinedLoop(), "malformed strip mined loop");
} }
if (u_loop->_child) {
if (useblock == u_loop->_head && u_loop->_head->is_OuterStripMinedLoop()) { // Pick control right outside the loop
return u_loop->_head->in(LoopNode::EntryControl); for (;;) {
Node* dom = idom(useblock);
if (loop->is_member(get_loop(dom))) {
break;
} }
return useblock; useblock = dom;
} }
return u_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl); assert(find_non_split_ctrl(useblock) == useblock, "should be non split control");
return useblock;
} }
@ -1402,128 +1407,7 @@ void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
} }
} }
// See if a shared loop-varying computation has no loop-varying uses. try_sink_out_of_loop(n);
// Happens if something is only used for JVM state in uncommon trap exits,
// like various versions of induction variable+offset. Clone the
// computation per usage to allow it to sink out of the loop.
if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
Node *n_ctrl = get_ctrl(n);
IdealLoopTree *n_loop = get_loop(n_ctrl);
if( n_loop != _ltree_root ) {
DUIterator_Fast imax, i = n->fast_outs(imax);
for (; i < imax; i++) {
Node* u = n->fast_out(i);
if( !has_ctrl(u) ) break; // Found control user
IdealLoopTree *u_loop = get_loop(get_ctrl(u));
if( u_loop == n_loop ) break; // Found loop-varying use
if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
}
bool did_break = (i < imax); // Did we break out of the previous loop?
if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
Node *late_load_ctrl = NULL;
if (n->is_Load()) {
// If n is a load, get and save the result from get_late_ctrl(),
// to be later used in calculating the control for n's clones.
clear_dom_lca_tags();
late_load_ctrl = get_late_ctrl(n, n_ctrl);
}
// If n is a load, and the late control is the same as the current
// control, then the cloning of n is a pointless exercise, because
// GVN will ensure that we end up where we started.
if (!n->is_Load() || (late_load_ctrl != n_ctrl && is_safe_load_ctrl(late_load_ctrl))) {
Node* outer_loop_clone = NULL;
for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
Node *u = n->last_out(j); // Clone private computation per use
_igvn.rehash_node_delayed(u);
Node *x = n->clone(); // Clone computation
Node *x_ctrl = NULL;
if( u->is_Phi() ) {
// Replace all uses of normal nodes. Replace Phi uses
// individually, so the separate Nodes can sink down
// different paths.
uint k = 1;
while( u->in(k) != n ) k++;
u->set_req( k, x );
// x goes next to Phi input path
x_ctrl = u->in(0)->in(k);
--j;
} else { // Normal use
// Replace all uses
for( uint k = 0; k < u->req(); k++ ) {
if( u->in(k) == n ) {
u->set_req( k, x );
--j;
}
}
x_ctrl = get_ctrl(u);
}
// Find control for 'x' next to use but not inside inner loops.
// For inner loop uses get the preheader area.
x_ctrl = place_near_use(x_ctrl);
if (n->is_Load()) {
// For loads, add a control edge to a CFG node outside of the loop
// to force them to not combine and return back inside the loop
// during GVN optimization (4641526).
//
// Because we are setting the actual control input, factor in
// the result from get_late_ctrl() so we respect any
// anti-dependences. (6233005).
x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
// Don't allow the control input to be a CFG splitting node.
// Such nodes should only have ProjNodes as outs, e.g. IfNode
// should only have IfTrueNode and IfFalseNode (4985384).
x_ctrl = find_non_split_ctrl(x_ctrl);
IdealLoopTree* x_loop = get_loop(x_ctrl);
Node* x_head = x_loop->_head;
if (x_head->is_Loop() && (x_head->is_OuterStripMinedLoop() || x_head->as_Loop()->is_strip_mined())) {
if (is_dominator(n_ctrl, x_head) && n_ctrl != x_head) {
// Anti dependence analysis is sometimes too
// conservative: a store in the outer strip mined loop
// can prevent a load from floating out of the outer
// strip mined loop but the load may not be referenced
// from the safepoint: loop strip mining verification
// code reports a problem in that case. Make sure the
// load is not moved in the outer strip mined loop in
// that case.
x_ctrl = x_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
} else if (x_head->is_OuterStripMinedLoop()) {
// Do not add duplicate LoadNodes to the outer strip mined loop
if (outer_loop_clone != NULL) {
_igvn.replace_node(x, outer_loop_clone);
continue;
}
outer_loop_clone = x;
}
}
assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
x->set_req(0, x_ctrl);
}
register_new_node(x, x_ctrl);
// Some institutional knowledge is needed here: 'x' is
// yanked because if the optimizer runs GVN on it all the
// cloned x's will common up and undo this optimization and
// be forced back in the loop.
// I tried setting control edges on the x's to force them to
// not combine, but the matching gets worried when it tries
// to fold a StoreP and an AddP together (as part of an
// address expression) and the AddP and StoreP have
// different controls.
if (!x->is_Load() && !x->is_DecodeNarrowPtr()) {
_igvn._worklist.yank(x);
}
}
_igvn.remove_dead_node(n);
}
}
}
}
try_move_store_after_loop(n); try_move_store_after_loop(n);
@ -1536,9 +1420,199 @@ void PhaseIdealLoop::split_if_with_blocks_post(Node *n) {
} }
} }
bool PhaseIdealLoop::is_safe_load_ctrl(Node* ctrl) { // See if a shared loop-varying computation has no loop-varying uses.
if (ctrl->is_Proj() && ctrl->in(0)->is_Call() && ctrl->has_out_with(Op_Catch)) { // Happens if something is only used for JVM state in uncommon trap exits,
return false; // like various versions of induction variable+offset. Clone the
// computation per usage to allow it to sink out of the loop.
void PhaseIdealLoop::try_sink_out_of_loop(Node* n) {
if (has_ctrl(n) &&
!n->is_Phi() &&
!n->is_Bool() &&
!n->is_Proj() &&
!n->is_MergeMem() &&
!n->is_CMove() &&
n->Opcode() != Op_Opaque4) {
Node *n_ctrl = get_ctrl(n);
IdealLoopTree *n_loop = get_loop(n_ctrl);
if (n_loop != _ltree_root && n->outcnt() > 1) {
// Compute early control: needed for anti-dependence analysis. It's also possible that as a result of
// previous transformations in this loop opts round, the node can be hoisted now: early control will tell us.
Node* early_ctrl = compute_early_ctrl(n, n_ctrl);
if (n_loop->is_member(get_loop(early_ctrl)) && // check that this one can't be hoisted now
ctrl_of_all_uses_out_of_loop(n, early_ctrl, n_loop)) { // All uses in outer loops!
assert(!n->is_Store() && !n->is_LoadStore(), "no node with a side effect");
Node* outer_loop_clone = NULL;
for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin;) {
Node* u = n->last_out(j); // Clone private computation per use
_igvn.rehash_node_delayed(u);
Node* x = n->clone(); // Clone computation
Node* x_ctrl = NULL;
if (u->is_Phi()) {
// Replace all uses of normal nodes. Replace Phi uses
// individually, so the separate Nodes can sink down
// different paths.
uint k = 1;
while (u->in(k) != n) k++;
u->set_req(k, x);
// x goes next to Phi input path
x_ctrl = u->in(0)->in(k);
// Find control for 'x' next to use but not inside inner loops.
x_ctrl = place_outside_loop(x_ctrl, n_loop);
--j;
} else { // Normal use
if (has_ctrl(u)) {
x_ctrl = get_ctrl(u);
} else {
x_ctrl = u->in(0);
}
// Find control for 'x' next to use but not inside inner loops.
x_ctrl = place_outside_loop(x_ctrl, n_loop);
// Replace all uses
if (u->is_ConstraintCast() && u->bottom_type()->higher_equal(_igvn.type(n)) && u->in(0) == x_ctrl) {
// If we're sinking a chain of data nodes, we might have inserted a cast to pin the use which is not necessary
// anymore now that we're going to pin n as well
_igvn.replace_node(u, x);
--j;
} else {
int nb = u->replace_edge(n, x, &_igvn);
j -= nb;
}
}
if (n->is_Load()) {
// For loads, add a control edge to a CFG node outside of the loop
// to force them to not combine and return back inside the loop
// during GVN optimization (4641526).
assert(x_ctrl == get_late_ctrl_with_anti_dep(x->as_Load(), early_ctrl, x_ctrl), "anti-dependences were already checked");
IdealLoopTree* x_loop = get_loop(x_ctrl);
Node* x_head = x_loop->_head;
if (x_head->is_Loop() && x_head->is_OuterStripMinedLoop()) {
// Do not add duplicate LoadNodes to the outer strip mined loop
if (outer_loop_clone != NULL) {
_igvn.replace_node(x, outer_loop_clone);
continue;
}
outer_loop_clone = x;
}
x->set_req(0, x_ctrl);
} else if (n->in(0) != NULL){
x->set_req(0, x_ctrl);
}
assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
assert(!n_loop->is_member(get_loop(x_ctrl)), "should have moved out of loop");
register_new_node(x, x_ctrl);
if (x->in(0) == NULL && !x->is_DecodeNarrowPtr()) {
assert(!x->is_Load(), "load should be pinned");
// Use a cast node to pin clone out of loop
Node* cast = NULL;
for (uint k = 0; k < x->req(); k++) {
Node* in = x->in(k);
if (in != NULL && n_loop->is_member(get_loop(get_ctrl(in)))) {
const Type* in_t = _igvn.type(in);
if (in_t->isa_int()) {
cast = new CastIINode(in, in_t, true);
} else if (in_t->isa_long()) {
cast = new CastLLNode(in, in_t, true);
} else if (in_t->isa_ptr()) {
cast = new CastPPNode(in, in_t, true);
} else if (in_t->isa_float()) {
cast = new CastFFNode(in, in_t, true);
} else if (in_t->isa_double()) {
cast = new CastDDNode(in, in_t, true);
} else if (in_t->isa_vect()) {
cast = new CastVVNode(in, in_t, true);
}
}
if (cast != NULL) {
cast->set_req(0, x_ctrl);
register_new_node(cast, x_ctrl);
x->replace_edge(in, cast);
break;
}
}
assert(cast != NULL, "must have added a cast to pin the node");
}
}
_igvn.remove_dead_node(n);
}
_dom_lca_tags_round = 0;
}
}
}
Node* PhaseIdealLoop::compute_early_ctrl(Node* n, Node* n_ctrl) {
Node* early_ctrl = NULL;
ResourceMark rm;
Unique_Node_List wq;
wq.push(n);
for (uint i = 0; i < wq.size(); i++) {
Node* m = wq.at(i);
Node* c = NULL;
if (m->is_CFG()) {
c = m;
} else if (m->pinned()) {
c = m->in(0);
} else {
for (uint j = 0; j < m->req(); j++) {
Node* in = m->in(j);
if (in == NULL) {
continue;
}
wq.push(in);
}
}
if (c != NULL) {
assert(is_dominator(c, n_ctrl), "");
if (early_ctrl == NULL) {
early_ctrl = c;
} else if (is_dominator(early_ctrl, c)) {
early_ctrl = c;
}
}
}
assert(is_dominator(early_ctrl, n_ctrl), "early control must dominate current control");
return early_ctrl;
}
bool PhaseIdealLoop::ctrl_of_all_uses_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop) {
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* u = n->fast_out(i);
if (u->Opcode() == Op_Opaque1) {
return false; // Found loop limit, bugfix for 4677003
}
// We can't reuse tags in PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal() so make sure calls to
// get_late_ctrl_with_anti_dep() use their own tag
_dom_lca_tags_round++;
assert(_dom_lca_tags_round != 0, "shouldn't wrap around");
if (u->is_Phi()) {
for (uint j = 1; j < u->req(); ++j) {
if (u->in(j) == n && !ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, u->in(0)->in(j))) {
return false;
}
}
} else {
Node* ctrl = has_ctrl(u) ? get_ctrl(u) : u->in(0);
if (!ctrl_of_use_out_of_loop(n, n_ctrl, n_loop, ctrl)) {
return false;
}
}
}
return true;
}
bool PhaseIdealLoop::ctrl_of_use_out_of_loop(const Node* n, Node* n_ctrl, IdealLoopTree* n_loop, Node* ctrl) {
if (n->is_Load()) {
ctrl = get_late_ctrl_with_anti_dep(n->as_Load(), n_ctrl, ctrl);
}
IdealLoopTree *u_loop = get_loop(ctrl);
if (u_loop == n_loop) {
return false; // Found loop-varying use
}
if (n_loop->is_member(u_loop)) {
return false; // Found use in inner loop
} }
return true; return true;
} }

View file

@ -57,6 +57,9 @@ class CallNode;
class CallRuntimeNode; class CallRuntimeNode;
class CallNativeNode; class CallNativeNode;
class CallStaticJavaNode; class CallStaticJavaNode;
class CastFFNode;
class CastDDNode;
class CastVVNode;
class CastIINode; class CastIINode;
class CastLLNode; class CastLLNode;
class CatchNode; class CatchNode;
@ -691,6 +694,9 @@ public:
DEFINE_CLASS_ID(CastII, ConstraintCast, 0) DEFINE_CLASS_ID(CastII, ConstraintCast, 0)
DEFINE_CLASS_ID(CheckCastPP, ConstraintCast, 1) DEFINE_CLASS_ID(CheckCastPP, ConstraintCast, 1)
DEFINE_CLASS_ID(CastLL, ConstraintCast, 2) DEFINE_CLASS_ID(CastLL, ConstraintCast, 2)
DEFINE_CLASS_ID(CastFF, ConstraintCast, 3)
DEFINE_CLASS_ID(CastDD, ConstraintCast, 4)
DEFINE_CLASS_ID(CastVV, ConstraintCast, 5)
DEFINE_CLASS_ID(CMove, Type, 3) DEFINE_CLASS_ID(CMove, Type, 3)
DEFINE_CLASS_ID(SafePointScalarObject, Type, 4) DEFINE_CLASS_ID(SafePointScalarObject, Type, 4)
DEFINE_CLASS_ID(DecodeNarrowPtr, Type, 5) DEFINE_CLASS_ID(DecodeNarrowPtr, Type, 5)