8223051: support loops with long (64b) trip counts

Reviewed-by: vlivanov, thartmann, jrose
This commit is contained in:
Roland Westrelin 2020-10-19 11:30:13 +00:00
parent e9be2db7ac
commit e76de18956
11 changed files with 839 additions and 79 deletions

View file

@ -787,7 +787,13 @@
"Move predicates out of loops based on profiling data") \
\
product(bool, ExpandSubTypeCheckAtParseTime, false, DIAGNOSTIC, \
"Do not use subtype check macro node")
"Do not use subtype check macro node") \
\
develop(uintx, StressLongCountedLoop, 0, \
"if > 0, convert int counted loops to long counted loops" \
"to stress handling of long counted loops: run inner loop" \
"for at most jint_max / StressLongCountedLoop") \
range(0, max_juint) \
// end of C2_FLAGS

View file

@ -967,6 +967,46 @@ bool CallJavaNode::cmp( const Node &n ) const {
return CallNode::cmp(call) && _method == call._method &&
_override_symbolic_info == call._override_symbolic_info;
}
void CallJavaNode::copy_call_debug_info(PhaseIterGVN* phase, SafePointNode *sfpt) {
// Copy debug information and adjust JVMState information
uint old_dbg_start = sfpt->is_Call() ? sfpt->as_Call()->tf()->domain()->cnt() : (uint)TypeFunc::Parms+1;
uint new_dbg_start = tf()->domain()->cnt();
int jvms_adj = new_dbg_start - old_dbg_start;
assert (new_dbg_start == req(), "argument count mismatch");
Compile* C = phase->C;
// SafePointScalarObject node could be referenced several times in debug info.
// Use Dict to record cloned nodes.
Dict* sosn_map = new Dict(cmpkey,hashkey);
for (uint i = old_dbg_start; i < sfpt->req(); i++) {
Node* old_in = sfpt->in(i);
// Clone old SafePointScalarObjectNodes, adjusting their field contents.
if (old_in != NULL && old_in->is_SafePointScalarObject()) {
SafePointScalarObjectNode* old_sosn = old_in->as_SafePointScalarObject();
bool new_node;
Node* new_in = old_sosn->clone(sosn_map, new_node);
if (new_node) { // New node?
new_in->set_req(0, C->root()); // reset control edge
new_in = phase->transform(new_in); // Register new node.
}
old_in = new_in;
}
add_req(old_in);
}
// JVMS may be shared so clone it before we modify it
set_jvms(sfpt->jvms() != NULL ? sfpt->jvms()->clone_deep(C) : NULL);
for (JVMState *jvms = this->jvms(); jvms != NULL; jvms = jvms->caller()) {
jvms->set_map(this);
jvms->set_locoff(jvms->locoff()+jvms_adj);
jvms->set_stkoff(jvms->stkoff()+jvms_adj);
jvms->set_monoff(jvms->monoff()+jvms_adj);
jvms->set_scloff(jvms->scloff()+jvms_adj);
jvms->set_endoff(jvms->endoff()+jvms_adj);
}
}
#ifdef ASSERT
bool CallJavaNode::validate_symbolic_info() const {
if (method() == NULL) {
@ -1159,7 +1199,9 @@ Node* SafePointNode::Identity(PhaseGVN* phase) {
if( in(TypeFunc::Control)->is_SafePoint() )
return in(TypeFunc::Control);
if( in(0)->is_Proj() ) {
// Transforming long counted loops requires a safepoint node. Do not
// eliminate a safepoint until loop opts are over.
if (in(0)->is_Proj() && !phase->C->major_progress()) {
Node *n0 = in(0)->in(0);
// Check if he is a call projection (except Leaf Call)
if( n0->is_Catch() ) {
@ -1332,11 +1374,13 @@ uint SafePointScalarObjectNode::match_edge(uint idx) const {
}
SafePointScalarObjectNode*
SafePointScalarObjectNode::clone(Dict* sosn_map) const {
SafePointScalarObjectNode::clone(Dict* sosn_map, bool& new_node) const {
void* cached = (*sosn_map)[(void*)this];
if (cached != NULL) {
new_node = false;
return (SafePointScalarObjectNode*)cached;
}
new_node = true;
SafePointScalarObjectNode* res = (SafePointScalarObjectNode*)Node::clone();
sosn_map->Insert((void*)this, (void*)res);
return res;

View file

@ -527,7 +527,7 @@ public:
// corresponds appropriately to "this" in "new_call". Assumes that
// "sosn_map" is a map, specific to the translation of "s" to "new_call",
// mapping old SafePointScalarObjectNodes to new, to avoid multiple copies.
SafePointScalarObjectNode* clone(Dict* sosn_map) const;
SafePointScalarObjectNode* clone(Dict* sosn_map, bool& new_node) const;
#ifndef PRODUCT
virtual void dump_spec(outputStream *st) const;
@ -635,6 +635,8 @@ public:
bool is_call_to_arraycopystub() const;
virtual void copy_call_debug_info(PhaseIterGVN* phase, SafePointNode *sfpt) {}
#ifndef PRODUCT
virtual void dump_req(outputStream *st = tty) const;
virtual void dump_spec(outputStream *st) const;
@ -677,6 +679,7 @@ public:
bool is_method_handle_invoke() const { return _method_handle_invoke; }
void set_override_symbolic_info(bool f) { _override_symbolic_info = f; }
bool override_symbolic_info() const { return _override_symbolic_info; }
void copy_call_debug_info(PhaseIterGVN* phase, SafePointNode *sfpt);
DEBUG_ONLY( bool validate_symbolic_info() const; )

View file

@ -3426,6 +3426,7 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f
}
break;
case Op_Loop:
assert(!n->as_Loop()->is_transformed_long_loop(), "should have been turned into a counted loop");
case Op_CountedLoop:
case Op_OuterStripMinedLoop:
if (n->as_Loop()->is_inner_loop()) {

View file

@ -449,9 +449,13 @@ const Type* ConvL2INode::Value(PhaseGVN* phase) const {
const Type *t = phase->type( in(1) );
if( t == Type::TOP ) return Type::TOP;
const TypeLong *tl = t->is_long();
if (tl->is_con())
if (tl->is_con()) {
// Easy case.
return TypeInt::make((jint)tl->get_con());
}
if (tl->_lo >= min_jint && tl->_hi <= max_jint) {
return TypeInt::make((jint)tl->_lo, (jint)tl->_hi, tl->_widen);
}
return bottom_type();
}

View file

@ -40,8 +40,11 @@
#include "opto/loopnode.hpp"
#include "opto/movenode.hpp"
#include "opto/mulnode.hpp"
#include "opto/opaquenode.hpp"
#include "opto/rootnode.hpp"
#include "opto/runtime.hpp"
#include "opto/superword.hpp"
#include "runtime/sharedRuntime.hpp"
#include "utilities/powerOfTwo.hpp"
//=============================================================================
@ -501,6 +504,729 @@ static int check_stride_overflow(jint stride_con, const TypeInt* limit_t) {
return 0;
}
static int check_stride_overflow(jlong stride_con, const TypeLong* limit_t) {
if (stride_con > 0) {
if (limit_t->_lo > (max_jlong - stride_con)) {
return -1;
}
if (limit_t->_hi > (max_jlong - stride_con)) {
return 1;
}
} else {
if (limit_t->_hi < (min_jlong - stride_con)) {
return -1;
}
if (limit_t->_lo < (min_jlong - stride_con)) {
return 1;
}
}
return 0;
}
static bool condition_stride_ok(BoolTest::mask bt, jlong stride_con) {
// If the condition is inverted and we will be rolling
// through MININT to MAXINT, then bail out.
if (bt == BoolTest::eq || // Bail out, but this loop trips at most twice!
// Odd stride
(bt == BoolTest::ne && stride_con != 1 && stride_con != -1) ||
// Count down loop rolls through MAXINT
((bt == BoolTest::le || bt == BoolTest::lt) && stride_con < 0) ||
// Count up loop rolls through MININT
((bt == BoolTest::ge || bt == BoolTest::gt) && stride_con > 0)) {
return false; // Bail out
}
return true;
}
void PhaseIdealLoop::long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head) {
Node* iv_as_long = new ConvI2LNode(inner_iv, TypeLong::INT);
register_new_node(iv_as_long, inner_head);
Node* iv_replacement = new AddLNode(outer_phi, iv_as_long);
register_new_node(iv_replacement, inner_head);
for (DUIterator_Last imin, i = iv_to_replace->last_outs(imin); i >= imin;) {
Node* u = iv_to_replace->last_out(i);
#ifdef ASSERT
if (!is_dominator(inner_head, ctrl_or_self(u))) {
assert(u->is_Phi(), "should be a Phi");
for (uint j = 1; j < u->req(); j++) {
if (u->in(j) == iv_to_replace) {
assert(is_dominator(inner_head, u->in(0)->in(j)), "iv use above loop?");
}
}
}
#endif
_igvn.rehash_node_delayed(u);
int nb = u->replace_edge(iv_to_replace, iv_replacement);
i -= nb;
}
}
void PhaseIdealLoop::add_empty_predicate(Deoptimization::DeoptReason reason, Node* inner_head, IdealLoopTree* loop, SafePointNode* sfpt) {
if (!C->too_many_traps(reason)) {
Node *cont = _igvn.intcon(1);
Node* opq = new Opaque1Node(C, cont);
_igvn.register_new_node_with_optimizer(opq);
Node *bol = new Conv2BNode(opq);
_igvn.register_new_node_with_optimizer(bol);
set_subtree_ctrl(bol);
IfNode* iff = new IfNode(inner_head->in(LoopNode::EntryControl), bol, PROB_MAX, COUNT_UNKNOWN);
register_control(iff, loop, inner_head->in(LoopNode::EntryControl));
Node* iffalse = new IfFalseNode(iff);
register_control(iffalse, _ltree_root, iff);
Node* iftrue = new IfTrueNode(iff);
register_control(iftrue, loop, iff);
C->add_predicate_opaq(opq);
int trap_request = Deoptimization::make_trap_request(reason, Deoptimization::Action_maybe_recompile);
address call_addr = SharedRuntime::uncommon_trap_blob()->entry_point();
const TypePtr* no_memory_effects = NULL;
JVMState* jvms = sfpt->jvms();
CallNode* unc = new CallStaticJavaNode(OptoRuntime::uncommon_trap_Type(), call_addr, "uncommon_trap",
jvms->bci(), no_memory_effects);
Node* mem = NULL;
Node* i_o = NULL;
if (sfpt->is_Call()) {
mem = sfpt->proj_out(TypeFunc::Memory);
i_o = sfpt->proj_out(TypeFunc::I_O);
} else {
mem = sfpt->memory();
i_o = sfpt->i_o();
}
Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
register_new_node(frame, C->start());
Node *ret = new ParmNode(C->start(), TypeFunc::ReturnAdr);
register_new_node(ret, C->start());
unc->init_req(TypeFunc::Control, iffalse);
unc->init_req(TypeFunc::I_O, i_o);
unc->init_req(TypeFunc::Memory, mem); // may gc ptrs
unc->init_req(TypeFunc::FramePtr, frame);
unc->init_req(TypeFunc::ReturnAdr, ret);
unc->init_req(TypeFunc::Parms+0, _igvn.intcon(trap_request));
unc->set_cnt(PROB_UNLIKELY_MAG(4));
unc->copy_call_debug_info(&_igvn, sfpt);
for (uint i = TypeFunc::Parms; i < unc->req(); i++) {
set_subtree_ctrl(unc->in(i));
}
register_control(unc, _ltree_root, iffalse);
Node* ctrl = new ProjNode(unc, TypeFunc::Control);
register_control(ctrl, _ltree_root, unc);
Node* halt = new HaltNode(ctrl, frame, "uncommon trap returned which should never happen" PRODUCT_ONLY(COMMA /*reachable*/false));
register_control(halt, _ltree_root, ctrl);
C->root()->add_req(halt);
_igvn.replace_input_of(inner_head, LoopNode::EntryControl, iftrue);
set_idom(inner_head, iftrue, dom_depth(inner_head));
}
}
// Find a safepoint node that dominates the back edge. We need a
// SafePointNode so we can use its jvm state to create empty
// predicates.
SafePointNode* PhaseIdealLoop::find_safepoint(Node* back_control, Node* x, IdealLoopTree* loop) {
IfNode* exit_test = back_control->in(0)->as_If();
SafePointNode* safepoint = NULL;
if (exit_test->in(0)->is_SafePoint() && exit_test->in(0)->outcnt() == 1) {
safepoint = exit_test->in(0)->as_SafePoint();
} else {
Node* c = back_control;
while (c != x && c->Opcode() != Op_SafePoint) {
c = idom(c);
}
if (c->Opcode() == Op_SafePoint) {
safepoint = c->as_SafePoint();
}
if (safepoint == NULL) {
return NULL;
}
Node* mem = safepoint->in(TypeFunc::Memory);
// We can only use that safepoint if there's not side effect
// between the backedge and the safepoint.
#ifdef ASSERT
// mm is used for book keeping
MergeMemNode* mm = NULL;
if (mem->is_MergeMem()) {
mm = mem->clone()->as_MergeMem();
for (MergeMemStream mms(mem->as_MergeMem()); mms.next_non_empty(); ) {
if (mms.alias_idx() != Compile::AliasIdxBot && loop != get_loop(ctrl_or_self(mms.memory()))) {
mm->set_memory_at(mms.alias_idx(), mem->as_MergeMem()->base_memory());
}
}
}
#endif
for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) {
Node* u = x->fast_out(i);
if (u->is_Phi() && u->bottom_type() == Type::MEMORY) {
Node* m = u->in(LoopNode::LoopBackControl);
if (u->adr_type() == TypePtr::BOTTOM) {
if (m->is_MergeMem() && mem->is_MergeMem()) {
if (m != mem DEBUG_ONLY(|| true)) {
for (MergeMemStream mms(m->as_MergeMem(), mem->as_MergeMem()); mms.next_non_empty2(); ) {
if (!mms.is_empty()) {
if (mms.memory() != mms.memory2()) {
return NULL;
}
#ifdef ASSERT
if (mms.alias_idx() != Compile::AliasIdxBot) {
mm->set_memory_at(mms.alias_idx(), mem->as_MergeMem()->base_memory());
}
#endif
}
}
}
} else if (mem->is_MergeMem()) {
if (m != mem->as_MergeMem()->base_memory()) {
return NULL;
}
} else {
return NULL;
}
} else {
if (mem->is_MergeMem()) {
if (m != mem->as_MergeMem()->memory_at(C->get_alias_index(u->adr_type()))) {
return NULL;
}
#ifdef ASSERT
mm->set_memory_at(C->get_alias_index(u->adr_type()), mem->as_MergeMem()->base_memory());
#endif
} else {
if (m != mem) {
return NULL;
}
}
}
}
}
#ifdef ASSERT
if (mm != NULL) {
assert (_igvn.transform(mm) == mem->as_MergeMem()->base_memory(), "all memory state should have been processed");
_igvn.remove_dead_node(mm);
}
#endif
}
return safepoint;
}
// If the loop has the shape of a counted loop but with a long
// induction variable, transform the loop in a loop nest: an inner
// loop that iterates for at most max int iterations with an integer
// induction variable and an outer loop that iterates over the full
// range of long values from the initial loop in (at most) max int
// steps. That is:
//
// x: for (long phi = init; phi < limit; phi += stride) {
// // phi := Phi(L, init, incr)
// // incr := AddL(phi, longcon(stride))
// // phi_incr := phi (test happens before increment)
// long incr = phi + stride;
// ... use phi and incr ...
// }
//
// OR:
//
// x: for (long phi = init; (phi += stride) < limit; ) {
// // phi := Phi(L, AddL(init, stride), incr)
// // incr := AddL(phi, longcon(stride))
// // phi_incr := NULL (test happens after increment)
// long incr = phi + stride;
// ... use phi and (phi + stride) ...
// }
//
// ==transform=>
//
// const ulong inner_iters_limit = INT_MAX - stride - 1; //near 0x7FFFFFF0
// assert(stride <= inner_iters_limit); // else abort transform
// assert((extralong)limit + stride <= LONG_MAX); // else deopt
// outer_head: for (long outer_phi = init;;) {
// // outer_phi := Phi(outer_head, init, AddL(outer_phi, I2L(inner_phi)))
// ulong inner_iters_max = (ulong) MAX(0, ((extralong)limit + stride - outer_phi));
// long inner_iters_actual = MIN(inner_iters_limit, inner_iters_max);
// assert(inner_iters_actual == (int)inner_iters_actual);
// int inner_phi, inner_incr;
// x: for (inner_phi = 0;; inner_phi = inner_incr) {
// // inner_phi := Phi(x, intcon(0), inner_incr)
// // inner_incr := AddI(inner_phi, intcon(stride))
// inner_incr = inner_phi + stride;
// if (inner_incr < inner_iters_actual) {
// ... use phi=>(outer_phi+inner_phi) and incr=>(outer_phi+inner_incr) ...
// continue;
// }
// else break;
// }
// if ((outer_phi+inner_phi) < limit) //OR (outer_phi+inner_incr) < limit
// continue;
// else break;
// }
bool PhaseIdealLoop::is_long_counted_loop(Node* x, IdealLoopTree* loop, Node_List &old_new) {
// Only for inner loops
if (loop->_child != NULL) {
return false;
}
// Checks whether the loop has the shape of a counted loop
Node* back_control = loop_exit_control(x, loop);
if (back_control == NULL) {
return false;
}
BoolTest::mask bt = BoolTest::illegal;
float cl_prob = 0;
Node* incr = NULL;
Node* limit = NULL;
Node* cmp = loop_exit_test(back_control, loop, incr, limit, bt, cl_prob);
if (cmp == NULL || cmp->Opcode() != Op_CmpL) {
return false; // Avoid pointer & float & 32-bit compares
}
Node* phi_incr = NULL;
incr = loop_iv_incr(incr, x, loop, phi_incr);
if (incr == NULL || incr->Opcode() != Op_AddL) {
return false;
}
Node* xphi = NULL;
Node* stride = loop_iv_stride(incr, loop, xphi);
if (stride == NULL) {
return false;
}
#ifndef PRODUCT
Atomic::inc(&_long_loop_candidates);
#endif
jlong stride_con = stride->get_long();
assert(stride_con != 0, "missed some peephole opt");
// We can't iterate for more than max int at a time.
if (stride_con != (jint)stride_con) {
return false;
}
// The number of iterations for the integer count loop: guarantee no
// overflow: max_jint - stride_con max. -1 so there's no need for a
// loop limit check if the exit test is <= or >=.
int iters_limit = max_jint - ABS(stride_con) - 1;
#ifdef ASSERT
if (StressLongCountedLoop > 0) {
iters_limit = iters_limit / StressLongCountedLoop;
}
#endif
// At least 2 iterations so counted loop construction doesn't fail
if (iters_limit/ABS(stride_con) < 2) {
return false;
}
PhiNode* phi = loop_iv_phi(xphi, phi_incr, x, loop);
if (phi == NULL || phi->in(LoopNode::LoopBackControl) != incr) {
return false;
}
// Safepoint on backedge not supported
if (x->in(LoopNode::LoopBackControl)->Opcode() == Op_SafePoint) {
return false;
}
// data nodes on back branch not supported
if (back_control->outcnt() > 1) {
return false;
}
if (!condition_stride_ok(bt, stride_con)) {
return false;
}
// We'll need to use the loop limit before the inner loop is entered
if (!is_dominator(get_ctrl(limit), x)) {
return false;
}
IfNode* exit_test = back_control->in(0)->as_If();
// We need a safepoint to insert empty predicates for the inner loop.
SafePointNode* safepoint = find_safepoint(back_control, x, loop);
if (safepoint == NULL) {
// If exit condition is ne, then a loop limit check is likely needed
if (bt == BoolTest::ne) {
return false;
}
} else if (C->too_many_traps(safepoint->jvms()->method(),
safepoint->jvms()->bci(),
Deoptimization::Reason_loop_limit_check)) {
// We must have transformed the loop already and a loop limit
// check must have failed.
return false;
}
Node* exit_branch = exit_test->proj_out(back_control->Opcode() == Op_IfFalse);
Node* entry_control = x->in(LoopNode::EntryControl);
// if the loop exit test is on the IV before it is incremented: i <
// limit, we transform the exit test so it is performed on the exit
// test after it is incremented: i + stride < limit + stride. We
// need limit + stride to not overflow. See adjusted_limit below.
bool limit_check_required = false;
if (phi_incr != NULL) {
const TypeLong* limit_t = _igvn.type(limit)->is_long();
int sov = check_stride_overflow(stride_con, limit_t);
if (sov != 0) {
if (sov < 0) {
return false; // Bailout: integer overflow is certain.
}
// Check that inserting a predicate is indeed possible
if (find_predicate_insertion_point(x->in(LoopNode::EntryControl), Deoptimization::Reason_loop_limit_check) == NULL) {
return false;
}
limit_check_required = true;
}
}
// Clone the control flow of the loop to build an outer loop
Node* outer_back_branch = back_control->clone();
Node* outer_exit_test = exit_test->clone();
Node* inner_exit_branch = exit_branch->clone();
Node* outer_head = new LoopNode(entry_control, outer_back_branch);
IdealLoopTree* outer_ilt = insert_outer_loop(loop, outer_head->as_Loop(), outer_back_branch);
const bool body_populated = true;
register_control(outer_head, outer_ilt, entry_control, body_populated);
_igvn.register_new_node_with_optimizer(inner_exit_branch);
set_loop(inner_exit_branch, outer_ilt);
set_idom(inner_exit_branch, exit_test, dom_depth(exit_branch));
outer_exit_test->set_req(0, inner_exit_branch);
register_control(outer_exit_test, outer_ilt, inner_exit_branch, body_populated);
_igvn.replace_input_of(exit_branch, 0, outer_exit_test);
set_idom(exit_branch, outer_exit_test, dom_depth(exit_branch));
outer_back_branch->set_req(0, outer_exit_test);
register_control(outer_back_branch, outer_ilt, outer_exit_test, body_populated);
_igvn.replace_input_of(x, LoopNode::EntryControl, outer_head);
set_idom(x, outer_head, dom_depth(x));
// add an iv phi to the outer loop and use it to compute the inner
// loop iteration limit
Node* outer_phi = phi->clone();
outer_phi->set_req(0, outer_head);
register_new_node(outer_phi, outer_head);
Node* adjusted_limit = limit;
if (phi_incr != NULL) {
// If compare points directly to the phi we need to adjust the
// compare so that it points to the incr.
Node* long_stride = _igvn.longcon(stride_con);
set_ctrl(long_stride, C->root());
adjusted_limit = new AddLNode(limit, long_stride);
_igvn.register_new_node_with_optimizer(adjusted_limit);
}
Node* inner_iters_max = NULL;
if (stride_con > 0) {
inner_iters_max = MaxNode::max_diff_with_zero(adjusted_limit, outer_phi, TypeLong::LONG, _igvn);
} else {
inner_iters_max = MaxNode::max_diff_with_zero(outer_phi, adjusted_limit, TypeLong::LONG, _igvn);
}
Node* inner_iters_limit = _igvn.longcon(iters_limit);
// inner_iters_max may not fit in a signed integer (iterating from
// Long.MIN_VALUE to Long.MAX_VALUE for instance). Use an unsigned
// min.
Node* inner_iters_actual = MaxNode::unsigned_min(inner_iters_max, inner_iters_limit, TypeLong::make(0, iters_limit, Type::WidenMin), _igvn);
Node* inner_iters_actual_int = new ConvL2INode(inner_iters_actual);
_igvn.register_new_node_with_optimizer(inner_iters_actual_int);
Node* zero = _igvn.intcon(0);
set_ctrl(zero, C->root());
if (stride_con < 0) {
inner_iters_actual_int = new SubINode(zero, inner_iters_actual_int);
_igvn.register_new_node_with_optimizer(inner_iters_actual_int);
}
// Clone the iv data nodes as an integer iv
Node* int_stride = _igvn.intcon((int)stride_con);
set_ctrl(int_stride, C->root());
Node* inner_phi = new PhiNode(x->in(0), TypeInt::INT);
Node* inner_incr = new AddINode(inner_phi, int_stride);
Node* inner_cmp = NULL;
if (cmp->in(1) == incr || cmp->in(1) == phi) {
inner_cmp = new CmpINode(inner_incr, inner_iters_actual_int);
} else {
assert(cmp->in(2) == incr || cmp->in(2) == phi, "bad iv shape");
inner_cmp = new CmpINode(inner_iters_actual_int, inner_incr);
}
Node* inner_bol = new BoolNode(inner_cmp, exit_test->in(1)->as_Bool()->_test._test);
inner_phi->set_req(LoopNode::EntryControl, zero);
inner_phi->set_req(LoopNode::LoopBackControl, inner_incr);
register_new_node(inner_phi, x);
register_new_node(inner_incr, x);
register_new_node(inner_cmp, x);
register_new_node(inner_bol, x);
_igvn.replace_input_of(exit_test, 1, inner_bol);
// Add a predicate to guarantee limit adjustment doesn't overflow
if (limit_check_required) {
assert(phi_incr != NULL, "only when exit test must be transformed");
ProjNode *limit_check_proj = find_predicate_insertion_point(outer_head->in(LoopNode::EntryControl), Deoptimization::Reason_loop_limit_check);
assert(limit_check_proj != NULL, "was tested before");
IfNode* check_iff = limit_check_proj->in(0)->as_If();
Node* cmp_limit;
Node* bol;
if (stride_con > 0) {
cmp_limit = new CmpLNode(limit, _igvn.longcon(max_jlong - stride_con));
bol = new BoolNode(cmp_limit, BoolTest::le);
} else {
cmp_limit = new CmpLNode(limit, _igvn.longcon(min_jlong - stride_con));
bol = new BoolNode(cmp_limit, BoolTest::ge);
}
insert_loop_limit_check(limit_check_proj, cmp_limit, bol);
Node* new_predicate = limit_check_proj->in(0)->in(0);
Node* above_predicate = new_predicate->in(0)->in(0);
Node* entry = outer_head->in(LoopNode::EntryControl);
_igvn.replace_input_of(limit_check_proj->in(0), 0, above_predicate);
_igvn.replace_input_of(new_predicate->in(0), 0, entry);
_igvn.replace_input_of(outer_head, LoopNode::EntryControl, new_predicate);
set_idom(new_predicate->in(0), entry, dom_depth(entry));
set_idom(new_predicate, new_predicate->in(0), dom_depth(entry));
Node* region = new_predicate->in(0)->as_If()->proj_out(new_predicate->Opcode() == Op_IfFalse)->unique_ctrl_out();
assert(region->is_Region(), "should be region merging predicates");
set_idom(region, entry, dom_depth(entry));
set_idom(limit_check_proj->in(0), above_predicate, dom_depth(above_predicate));
}
LoopNode* inner_head = x->as_Loop();
// Clone inner loop phis to outer loop
for (uint i = 0; i < inner_head->outcnt(); i++) {
Node* u = inner_head->raw_out(i);
if (u->is_Phi() && u != inner_phi && u != phi) {
assert(u->in(0) == inner_head, "inconsistent");
Node* clone = u->clone();
clone->set_req(0, outer_head);
register_new_node(clone, outer_head);
_igvn.replace_input_of(u, LoopNode::EntryControl, clone);
}
}
// Replace inner loop long iv phi as inner loop int iv phi + outer
// loop iv phi
long_loop_replace_long_iv(phi, inner_phi, outer_phi, inner_head);
// Replace inner loop long iv incr with inner loop int incr + outer
// loop iv phi
long_loop_replace_long_iv(incr, inner_incr, outer_phi, inner_head);
set_subtree_ctrl(inner_iters_actual_int);
// Summary of steps from inital loop to loop nest:
//
// == old IR nodes =>
//
// entry_control: {...}
// x:
// for (long phi = init;;) {
// // phi := Phi(x, init, incr)
// // incr := AddL(phi, longcon(stride))
// exit_test:
// if (phi < limit)
// back_control: fallthrough;
// else
// exit_branch: break;
// // test happens before increment => phi == phi_incr != NULL
// long incr = phi + stride;
// ... use phi and incr ...
// phi = incr;
// }
//
// == new IR nodes (just before final peel) =>
//
// entry_control: {...}
// long adjusted_limit = limit + stride; //because phi_incr != NULL
// assert(!limit_check_required || (extralong)limit + stride == adjusted_limit); // else deopt
// ulong inner_iters_limit = max_jint - ABS(stride) - 1; //near 0x7FFFFFF0
// outer_head:
// for (long outer_phi = init;;) {
// // outer_phi := phi->clone(), in(0):=outer_head, => Phi(outer_head, init, incr)
// // REPLACE phi => AddL(outer_phi, I2L(inner_phi))
// // REPLACE incr => AddL(outer_phi, I2L(inner_incr))
// // SO THAT outer_phi := Phi(outer_head, init, AddL(outer_phi, I2L(inner_incr)))
// ulong inner_iters_max = (ulong) MAX(0, ((extralong)adjusted_limit - outer_phi) * SGN(stride));
// int inner_iters_actual_int = (int) MIN(inner_iters_limit, inner_iters_max) * SGN(stride);
// inner_head: x: //in(1) := outer_head
// int inner_phi;
// for (inner_phi = 0;;) {
// // inner_phi := Phi(x, intcon(0), inner_phi + stride)
// int inner_incr = inner_phi + stride;
// bool inner_bol = (inner_incr < inner_iters_actual_int);
// exit_test: //exit_test->in(1) := inner_bol;
// if (inner_bol) // WAS (phi < limit)
// back_control: fallthrough;
// else
// inner_exit_branch: break; //exit_branch->clone()
// ... use phi=>(outer_phi+inner_phi) and incr=>(outer_phi+inner_incr) ...
// inner_phi = inner_phi + stride; // inner_incr
// }
// outer_exit_test: //exit_test->clone(), in(0):=inner_exit_branch
// if ((outer_phi+inner_phi) < limit) // WAS (phi < limit)
// outer_back_branch: fallthrough; //back_control->clone(), in(0):=outer_exit_test
// else
// exit_branch: break; //in(0) := outer_exit_test
// }
// Peel one iteration of the loop and use the safepoint at the end
// of the peeled iteration to insert empty predicates. If no well
// positioned safepoint peel to guarantee a safepoint in the outer
// loop.
if (safepoint != NULL || !loop->_has_call) {
old_new.clear();
do_peeling(loop, old_new);
}
if (safepoint != NULL) {
SafePointNode* cloned_sfpt = old_new[safepoint->_idx]->as_SafePoint();
if (UseLoopPredicate) {
add_empty_predicate(Deoptimization::Reason_predicate, inner_head, outer_ilt, cloned_sfpt);
}
if (UseProfiledLoopPredicate) {
add_empty_predicate(Deoptimization::Reason_profile_predicate, inner_head, outer_ilt, cloned_sfpt);
}
add_empty_predicate(Deoptimization::Reason_loop_limit_check, inner_head, outer_ilt, cloned_sfpt);
}
#ifndef PRODUCT
Atomic::inc(&_long_loop_nests);
#endif
inner_head->mark_transformed_long_loop();
return true;
}
#ifdef ASSERT
// convert an int counted loop to a long counted to stress handling of
// long counted loops
bool PhaseIdealLoop::convert_to_long_loop(Node* cmp, Node* phi, IdealLoopTree* loop) {
Unique_Node_List iv_nodes;
Node_List old_new;
iv_nodes.push(cmp);
bool failed = false;
for (uint i = 0; i < iv_nodes.size() && !failed; i++) {
Node* n = iv_nodes.at(i);
switch(n->Opcode()) {
case Op_Phi: {
Node* clone = new PhiNode(n->in(0), TypeLong::LONG);
old_new.map(n->_idx, clone);
break;
}
case Op_CmpI: {
Node* clone = new CmpLNode(NULL, NULL);
old_new.map(n->_idx, clone);
break;
}
case Op_AddI: {
Node* clone = new AddLNode(NULL, NULL);
old_new.map(n->_idx, clone);
break;
}
case Op_CastII: {
failed = true;
break;
}
default:
DEBUG_ONLY(n->dump());
fatal("unexpected");
}
for (uint i = 1; i < n->req(); i++) {
Node* in = n->in(i);
if (in == NULL) {
continue;
}
if (loop->is_member(get_loop(get_ctrl(in)))) {
iv_nodes.push(in);
}
}
}
if (failed) {
for (uint i = 0; i < iv_nodes.size(); i++) {
Node* n = iv_nodes.at(i);
Node* clone = old_new[n->_idx];
if (clone != NULL) {
_igvn.remove_dead_node(clone);
}
}
return false;
}
for (uint i = 0; i < iv_nodes.size(); i++) {
Node* n = iv_nodes.at(i);
Node* clone = old_new[n->_idx];
for (uint i = 1; i < n->req(); i++) {
Node* in = n->in(i);
if (in == NULL) {
continue;
}
Node* in_clone = old_new[in->_idx];
if (in_clone == NULL) {
assert(_igvn.type(in)->isa_int(), "");
in_clone = new ConvI2LNode(in);
_igvn.register_new_node_with_optimizer(in_clone);
set_subtree_ctrl(in_clone);
}
if (in_clone->in(0) == NULL) {
in_clone->set_req(0, C->top());
clone->set_req(i, in_clone);
in_clone->set_req(0, NULL);
} else {
clone->set_req(i, in_clone);
}
}
_igvn.register_new_node_with_optimizer(clone);
}
set_ctrl(old_new[phi->_idx], phi->in(0));
for (uint i = 0; i < iv_nodes.size(); i++) {
Node* n = iv_nodes.at(i);
Node* clone = old_new[n->_idx];
set_subtree_ctrl(clone);
Node* m = n->Opcode() == Op_CmpI ? clone : NULL;
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* u = n->fast_out(i);
if (iv_nodes.member(u)) {
continue;
}
if (m == NULL) {
m = new ConvL2INode(clone);
_igvn.register_new_node_with_optimizer(m);
set_subtree_ctrl(m);
}
_igvn.rehash_node_delayed(u);
int nb = u->replace_edge(n, m);
--i, imax -= nb;
}
}
return true;
}
#endif
//------------------------------is_counted_loop--------------------------------
bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
PhaseGVN *gvn = &_igvn;
@ -514,7 +1240,6 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
float cl_prob = 0;
Node* incr = NULL;
Node* limit = NULL;
Node* cmp = loop_exit_test(back_control, loop, incr, limit, bt, cl_prob);
if (cmp == NULL || cmp->Opcode() != Op_CmpI) {
return false; // Avoid pointer & float & 64-bit compares
@ -640,16 +1365,8 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
assert(trunc1 == NULL && trunc2 == NULL, "no truncation for int");
}
// If the condition is inverted and we will be rolling
// through MININT to MAXINT, then bail out.
if (bt == BoolTest::eq || // Bail out, but this loop trips at most twice!
// Odd stride
(bt == BoolTest::ne && stride_con != 1 && stride_con != -1) ||
// Count down loop rolls through MAXINT
((bt == BoolTest::le || bt == BoolTest::lt) && stride_con < 0) ||
// Count up loop rolls through MININT
((bt == BoolTest::ge || bt == BoolTest::gt) && stride_con > 0)) {
return false; // Bail out
if (!condition_stride_ok(bt, stride_con)) {
return false;
}
const TypeInt* init_t = gvn->type(init_trip)->is_int();
@ -722,6 +1439,7 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
if (sov < 0) {
return false; // Bailout: integer overflow is certain.
}
assert(!x->as_Loop()->is_transformed_long_loop(), "long loop was transformed");
// Generate loop's limit check.
// Loop limit check predicate should be near the loop.
ProjNode *limit_check_proj = find_predicate_insertion_point(init_control, Deoptimization::Reason_loop_limit_check);
@ -809,6 +1527,12 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
}
}
#ifdef ASSERT
if (!x->as_Loop()->is_transformed_long_loop() && StressLongCountedLoop > 0 && trunc1 == NULL && convert_to_long_loop(cmp, phi, loop)) {
return false;
}
#endif
if (phi_incr != NULL) {
// If compare points directly to the phi we need to adjust
// the compare so that it points to the incr. Limit have
@ -995,6 +1719,12 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
loop = outer_ilt;
}
#ifndef PRODUCT
if (x->as_Loop()->is_transformed_long_loop()) {
Atomic::inc(&_long_loop_counted_loops);
}
#endif
return true;
}
@ -2574,11 +3304,14 @@ void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
// Look for induction variables
phase->replace_parallel_iv(this);
} else if (_parent != NULL && !_irreducible) {
} else {
assert(!_head->is_Loop() || !_head->as_Loop()->is_transformed_long_loop(), "transformation to counted loop should not fail");
if (_parent != NULL && !_irreducible) {
// Not a counted loop. Keep one safepoint.
bool keep_one_sfpt = true;
remove_safepoints(phase, keep_one_sfpt);
}
}
// Recursively
assert(loop->_child != this || (loop->_head->as_Loop()->is_OuterStripMinedLoop() && _head->as_CountedLoop()->is_strip_mined()), "what kind of loop was added?");
@ -3223,7 +3956,12 @@ void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
return;
}
if (ReassociateInvariants) {
for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
IdealLoopTree* lpt = iter.current();
is_long_counted_loop(lpt->_head, lpt, worklist);
}
if (ReassociateInvariants && !C->major_progress()) {
// Reassociate invariants and prep for split_thru_phi
for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
IdealLoopTree* lpt = iter.current();
@ -3251,7 +3989,7 @@ void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
// Check for aggressive application of split-if and other transforms
// that require basic-block info (like cloning through Phi's)
if( SplitIfBlocks && do_split_ifs ) {
if (!C->major_progress() && SplitIfBlocks && do_split_ifs) {
visited.clear();
split_if_with_blocks( visited, nstack);
NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
@ -3369,8 +4107,11 @@ void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
//------------------------------print_statistics-------------------------------
int PhaseIdealLoop::_loop_invokes=0;// Count of PhaseIdealLoop invokes
int PhaseIdealLoop::_loop_work=0; // Sum of PhaseIdealLoop x unique
volatile int PhaseIdealLoop::_long_loop_candidates=0; // Number of long loops seen
volatile int PhaseIdealLoop::_long_loop_nests=0; // Number of long loops successfully transformed to a nest
volatile int PhaseIdealLoop::_long_loop_counted_loops=0; // Number of long loops successfully transformed to a counted loop
void PhaseIdealLoop::print_statistics() {
tty->print_cr("PhaseIdealLoop=%d, sum _unique=%d", _loop_invokes, _loop_work);
tty->print_cr("PhaseIdealLoop=%d, sum _unique=%d, long loops=%d/%d/%d", _loop_invokes, _loop_work, _long_loop_counted_loops, _long_loop_nests, _long_loop_candidates);
}
//------------------------------verify-----------------------------------------

View file

@ -76,7 +76,8 @@ protected:
IsMultiversioned=16384,
StripMined=32768,
SubwordLoop=65536,
ProfileTripFailed=131072};
ProfileTripFailed=131072,
TransformedLongLoop=262144};
char _unswitch_count;
enum { _unswitch_max=3 };
char _postloop_flags;
@ -101,6 +102,7 @@ public:
bool is_strip_mined() const { return _loop_flags & StripMined; }
bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
bool is_transformed_long_loop() const { return _loop_flags & TransformedLongLoop; }
void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
void mark_has_reductions() { _loop_flags |= HasReductions; }
@ -115,6 +117,7 @@ public:
void clear_strip_mined() { _loop_flags &= ~StripMined; }
void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
void mark_subword_loop() { _loop_flags |= SubwordLoop; }
void mark_transformed_long_loop() { _loop_flags |= TransformedLongLoop; }
int unswitch_max() { return _unswitch_max; }
int unswitch_count() { return _unswitch_count; }
@ -1049,6 +1052,13 @@ public:
PhiNode* loop_iv_phi(Node* xphi, Node* phi_incr, Node* x, IdealLoopTree* loop);
bool is_counted_loop(Node* n, IdealLoopTree* &loop);
void long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head);
bool is_long_counted_loop(Node* x, IdealLoopTree* loop, Node_List &old_new);
#ifdef ASSERT
bool convert_to_long_loop(Node* cmp, Node* phi, IdealLoopTree* loop);
#endif
void add_empty_predicate(Deoptimization::DeoptReason reason, Node* inner_head, IdealLoopTree* loop, SafePointNode* sfpt);
SafePointNode* find_safepoint(Node* back_control, Node* x, IdealLoopTree* loop);
IdealLoopTree* insert_outer_loop(IdealLoopTree* loop, LoopNode* outer_l, Node* outer_ift);
IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
IdealLoopTree* loop, float cl_prob, float le_fcnt,
@ -1471,6 +1481,9 @@ public:
static void print_statistics();
static int _loop_invokes; // Count of PhaseIdealLoop invokes
static int _loop_work; // Sum of PhaseIdealLoop x _unique
static volatile int _long_loop_candidates;
static volatile int _long_loop_nests;
static volatile int _long_loop_counted_loops;
#endif
void rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const;

View file

@ -1038,7 +1038,9 @@ Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
// Do not clone the trip counter through on a CountedLoop
// (messes up the canonical shape).
if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
if ((n_blk->is_CountedLoop() || (n_blk->is_Loop() && n_blk->as_Loop()->is_transformed_long_loop())) && n->Opcode() == Op_AddI) {
return n;
}
// Check for having no control input; not pinned. Allow
// dominating control.

View file

@ -94,44 +94,6 @@ void PhaseMacroExpand::migrate_outs(Node *old, Node *target) {
assert(old->outcnt() == 0, "all uses must be deleted");
}
void PhaseMacroExpand::copy_call_debug_info(CallNode *oldcall, CallNode * newcall) {
// Copy debug information and adjust JVMState information
uint old_dbg_start = oldcall->tf()->domain()->cnt();
uint new_dbg_start = newcall->tf()->domain()->cnt();
int jvms_adj = new_dbg_start - old_dbg_start;
assert (new_dbg_start == newcall->req(), "argument count mismatch");
// SafePointScalarObject node could be referenced several times in debug info.
// Use Dict to record cloned nodes.
Dict* sosn_map = new Dict(cmpkey,hashkey);
for (uint i = old_dbg_start; i < oldcall->req(); i++) {
Node* old_in = oldcall->in(i);
// Clone old SafePointScalarObjectNodes, adjusting their field contents.
if (old_in != NULL && old_in->is_SafePointScalarObject()) {
SafePointScalarObjectNode* old_sosn = old_in->as_SafePointScalarObject();
uint old_unique = C->unique();
Node* new_in = old_sosn->clone(sosn_map);
if (old_unique != C->unique()) { // New node?
new_in->set_req(0, C->root()); // reset control edge
new_in = transform_later(new_in); // Register new node.
}
old_in = new_in;
}
newcall->add_req(old_in);
}
// JVMS may be shared so clone it before we modify it
newcall->set_jvms(oldcall->jvms() != NULL ? oldcall->jvms()->clone_deep(C) : NULL);
for (JVMState *jvms = newcall->jvms(); jvms != NULL; jvms = jvms->caller()) {
jvms->set_map(newcall);
jvms->set_locoff(jvms->locoff()+jvms_adj);
jvms->set_stkoff(jvms->stkoff()+jvms_adj);
jvms->set_monoff(jvms->monoff()+jvms_adj);
jvms->set_scloff(jvms->scloff()+jvms_adj);
jvms->set_endoff(jvms->endoff()+jvms_adj);
}
}
Node* PhaseMacroExpand::opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path) {
Node* cmp;
if (mask != 0) {
@ -184,7 +146,7 @@ CallNode* PhaseMacroExpand::make_slow_call(CallNode *oldcall, const TypeFunc* sl
if (parm0 != NULL) call->init_req(TypeFunc::Parms+0, parm0);
if (parm1 != NULL) call->init_req(TypeFunc::Parms+1, parm1);
if (parm2 != NULL) call->init_req(TypeFunc::Parms+2, parm2);
copy_call_debug_info(oldcall, call);
call->copy_call_debug_info(&_igvn, oldcall);
call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON.
_igvn.replace_node(oldcall, call);
transform_later(call);
@ -1473,7 +1435,7 @@ void PhaseMacroExpand::expand_allocate_common(
// Copy debug information and adjust JVMState information, then replace
// allocate node with the call
copy_call_debug_info((CallNode *) alloc, call);
call->copy_call_debug_info(&_igvn, alloc);
if (expand_fast_path) {
call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON.
} else {

View file

@ -958,7 +958,7 @@ MergeMemNode* PhaseMacroExpand::generate_slow_arraycopy(ArrayCopyNode *ac,
call->init_req(TypeFunc::Parms+2, dest);
call->init_req(TypeFunc::Parms+3, dest_offset);
call->init_req(TypeFunc::Parms+4, copy_length);
copy_call_debug_info(ac, call);
call->copy_call_debug_info(&_igvn, ac);
call->set_cnt(PROB_UNLIKELY_MAG(4)); // Same effect as RC_UNCOMMON.
_igvn.replace_node(ac, call);

View file

@ -2254,23 +2254,7 @@ void Parse::return_current(Node* value) {
//------------------------------add_safepoint----------------------------------
void Parse::add_safepoint() {
// See if we can avoid this safepoint. No need for a SafePoint immediately
// after a Call (except Leaf Call) or another SafePoint.
Node *proj = control();
uint parms = TypeFunc::Parms+1;
if( proj->is_Proj() ) {
Node *n0 = proj->in(0);
if( n0->is_Catch() ) {
n0 = n0->in(0)->in(0);
assert( n0->is_Call(), "expect a call here" );
}
if( n0->is_Call() ) {
if( n0->as_Call()->guaranteed_safepoint() )
return;
} else if( n0->is_SafePoint() && n0->req() >= parms ) {
return;
}
}
// Clear out dead values from the debug info.
kill_dead_locals();