8223051: support loops with long (64b) trip counts

Reviewed-by: vlivanov, thartmann, jrose
2025-09-18 01:54:47 +02:00 · 2020-10-19 11:30:13 +00:00 · 2020-10-19 11:30:13 +00:00 · e76de18956
commit e76de18956
parent e9be2db7ac
11 changed files with 839 additions and 79 deletions
--- a/src/hotspot/share/opto/c2_globals.hpp
+++ b/src/hotspot/share/opto/c2_globals.hpp
@ -787,7 +787,13 @@
          "Move predicates out of loops based on profiling data")           \
                                                                            \
  product(bool, ExpandSubTypeCheckAtParseTime, false, DIAGNOSTIC,           \
-          "Do not use subtype check macro node")
+          "Do not use subtype check macro node")                            \
+                                                                            \
+  develop(uintx, StressLongCountedLoop, 0,                                  \
+          "if > 0, convert int counted loops to long counted loops"         \
+          "to stress handling of long counted loops: run inner loop"        \
+          "for at most jint_max / StressLongCountedLoop")                   \
+          range(0, max_juint)                                               \

 // end of C2_FLAGS

--- a/src/hotspot/share/opto/callnode.cpp
+++ b/src/hotspot/share/opto/callnode.cpp
@ -967,6 +967,46 @@ bool CallJavaNode::cmp( const Node &n ) const {
  return CallNode::cmp(call) && _method == call._method &&
         _override_symbolic_info == call._override_symbolic_info;
 }
+
+void CallJavaNode::copy_call_debug_info(PhaseIterGVN* phase, SafePointNode *sfpt) {
+  // Copy debug information and adjust JVMState information
+  uint old_dbg_start = sfpt->is_Call() ? sfpt->as_Call()->tf()->domain()->cnt() : (uint)TypeFunc::Parms+1;
+  uint new_dbg_start = tf()->domain()->cnt();
+  int jvms_adj  = new_dbg_start - old_dbg_start;
+  assert (new_dbg_start == req(), "argument count mismatch");
+  Compile* C = phase->C;
+
+  // SafePointScalarObject node could be referenced several times in debug info.
+  // Use Dict to record cloned nodes.
+  Dict* sosn_map = new Dict(cmpkey,hashkey);
+  for (uint i = old_dbg_start; i < sfpt->req(); i++) {
+    Node* old_in = sfpt->in(i);
+    // Clone old SafePointScalarObjectNodes, adjusting their field contents.
+    if (old_in != NULL && old_in->is_SafePointScalarObject()) {
+      SafePointScalarObjectNode* old_sosn = old_in->as_SafePointScalarObject();
+      bool new_node;
+      Node* new_in = old_sosn->clone(sosn_map, new_node);
+      if (new_node) { // New node?
+        new_in->set_req(0, C->root()); // reset control edge
+        new_in = phase->transform(new_in); // Register new node.
+      }
+      old_in = new_in;
+    }
+    add_req(old_in);
+  }
+
+  // JVMS may be shared so clone it before we modify it
+  set_jvms(sfpt->jvms() != NULL ? sfpt->jvms()->clone_deep(C) : NULL);
+  for (JVMState *jvms = this->jvms(); jvms != NULL; jvms = jvms->caller()) {
+    jvms->set_map(this);
+    jvms->set_locoff(jvms->locoff()+jvms_adj);
+    jvms->set_stkoff(jvms->stkoff()+jvms_adj);
+    jvms->set_monoff(jvms->monoff()+jvms_adj);
+    jvms->set_scloff(jvms->scloff()+jvms_adj);
+    jvms->set_endoff(jvms->endoff()+jvms_adj);
+  }
+}
+
 #ifdef ASSERT
 bool CallJavaNode::validate_symbolic_info() const {
  if (method() == NULL) {
@ -1159,7 +1199,9 @@ Node* SafePointNode::Identity(PhaseGVN* phase) {
  if( in(TypeFunc::Control)->is_SafePoint() )
    return in(TypeFunc::Control);

-  if( in(0)->is_Proj() ) {
+  // Transforming long counted loops requires a safepoint node. Do not
+  // eliminate a safepoint until loop opts are over.
+  if (in(0)->is_Proj() && !phase->C->major_progress()) {
    Node *n0 = in(0)->in(0);
    // Check if he is a call projection (except Leaf Call)
    if( n0->is_Catch() ) {
@ -1332,11 +1374,13 @@ uint SafePointScalarObjectNode::match_edge(uint idx) const {
 }

 SafePointScalarObjectNode*
-SafePointScalarObjectNode::clone(Dict* sosn_map) const {
+SafePointScalarObjectNode::clone(Dict* sosn_map, bool& new_node) const {
  void* cached = (*sosn_map)[(void*)this];
  if (cached != NULL) {
+    new_node = false;
    return (SafePointScalarObjectNode*)cached;
  }
+  new_node = true;
  SafePointScalarObjectNode* res = (SafePointScalarObjectNode*)Node::clone();
  sosn_map->Insert((void*)this, (void*)res);
  return res;
--- a/src/hotspot/share/opto/callnode.hpp
+++ b/src/hotspot/share/opto/callnode.hpp
@ -527,7 +527,7 @@ public:
  // corresponds appropriately to "this" in "new_call".  Assumes that
  // "sosn_map" is a map, specific to the translation of "s" to "new_call",
  // mapping old SafePointScalarObjectNodes to new, to avoid multiple copies.
-  SafePointScalarObjectNode* clone(Dict* sosn_map) const;
+  SafePointScalarObjectNode* clone(Dict* sosn_map, bool& new_node) const;

 #ifndef PRODUCT
  virtual void              dump_spec(outputStream *st) const;
@ -635,6 +635,8 @@ public:

  bool is_call_to_arraycopystub() const;

+  virtual void copy_call_debug_info(PhaseIterGVN* phase, SafePointNode *sfpt) {}
+
 #ifndef PRODUCT
  virtual void        dump_req(outputStream *st = tty) const;
  virtual void        dump_spec(outputStream *st) const;
@ -677,6 +679,7 @@ public:
  bool  is_method_handle_invoke() const    { return _method_handle_invoke; }
  void  set_override_symbolic_info(bool f) { _override_symbolic_info = f; }
  bool  override_symbolic_info() const     { return _override_symbolic_info; }
+  void copy_call_debug_info(PhaseIterGVN* phase, SafePointNode *sfpt);

  DEBUG_ONLY( bool validate_symbolic_info() const; )

--- a/src/hotspot/share/opto/compile.cpp
+++ b/src/hotspot/share/opto/compile.cpp
@ -3426,6 +3426,7 @@ void Compile::final_graph_reshaping_main_switch(Node* n, Final_Reshape_Counts& f
    }
    break;
  case Op_Loop:
+    assert(!n->as_Loop()->is_transformed_long_loop(), "should have been turned into a counted loop");
  case Op_CountedLoop:
  case Op_OuterStripMinedLoop:
    if (n->as_Loop()->is_inner_loop()) {
--- a/src/hotspot/share/opto/convertnode.cpp
+++ b/src/hotspot/share/opto/convertnode.cpp
@ -449,9 +449,13 @@ const Type* ConvL2INode::Value(PhaseGVN* phase) const {
  const Type *t = phase->type( in(1) );
  if( t == Type::TOP ) return Type::TOP;
  const TypeLong *tl = t->is_long();
-  if (tl->is_con())
+  if (tl->is_con()) {
  // Easy case.
  return TypeInt::make((jint)tl->get_con());
+  }
+  if (tl->_lo >= min_jint && tl->_hi <= max_jint) {
+    return TypeInt::make((jint)tl->_lo, (jint)tl->_hi, tl->_widen);
+  }
  return bottom_type();
 }

--- a/src/hotspot/share/opto/loopnode.cpp
+++ b/src/hotspot/share/opto/loopnode.cpp
@ -40,8 +40,11 @@
 #include "opto/loopnode.hpp"
 #include "opto/movenode.hpp"
 #include "opto/mulnode.hpp"
+#include "opto/opaquenode.hpp"
 #include "opto/rootnode.hpp"
+#include "opto/runtime.hpp"
 #include "opto/superword.hpp"
+#include "runtime/sharedRuntime.hpp"
 #include "utilities/powerOfTwo.hpp"

 //=============================================================================
@ -501,6 +504,729 @@ static int check_stride_overflow(jint stride_con, const TypeInt* limit_t) {
  return 0;
 }

+static int check_stride_overflow(jlong stride_con, const TypeLong* limit_t) {
+  if (stride_con > 0) {
+    if (limit_t->_lo > (max_jlong - stride_con)) {
+      return -1;
+    }
+    if (limit_t->_hi > (max_jlong - stride_con)) {
+      return 1;
+    }
+  } else {
+    if (limit_t->_hi < (min_jlong - stride_con)) {
+      return -1;
+    }
+    if (limit_t->_lo < (min_jlong - stride_con)) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+static bool condition_stride_ok(BoolTest::mask bt, jlong stride_con) {
+  // If the condition is inverted and we will be rolling
+  // through MININT to MAXINT, then bail out.
+  if (bt == BoolTest::eq || // Bail out, but this loop trips at most twice!
+      // Odd stride
+      (bt == BoolTest::ne && stride_con != 1 && stride_con != -1) ||
+      // Count down loop rolls through MAXINT
+      ((bt == BoolTest::le || bt == BoolTest::lt) && stride_con < 0) ||
+      // Count up loop rolls through MININT
+      ((bt == BoolTest::ge || bt == BoolTest::gt) && stride_con > 0)) {
+    return false; // Bail out
+  }
+  return true;
+}
+
+void PhaseIdealLoop::long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head) {
+  Node* iv_as_long = new ConvI2LNode(inner_iv, TypeLong::INT);
+  register_new_node(iv_as_long, inner_head);
+  Node* iv_replacement = new AddLNode(outer_phi, iv_as_long);
+  register_new_node(iv_replacement, inner_head);
+  for (DUIterator_Last imin, i = iv_to_replace->last_outs(imin); i >= imin;) {
+    Node* u = iv_to_replace->last_out(i);
+#ifdef ASSERT
+    if (!is_dominator(inner_head, ctrl_or_self(u))) {
+      assert(u->is_Phi(), "should be a Phi");
+      for (uint j = 1; j < u->req(); j++) {
+        if (u->in(j) == iv_to_replace) {
+          assert(is_dominator(inner_head, u->in(0)->in(j)), "iv use above loop?");
+        }
+      }
+    }
+#endif
+    _igvn.rehash_node_delayed(u);
+    int nb = u->replace_edge(iv_to_replace, iv_replacement);
+    i -= nb;
+  }
+}
+
+void PhaseIdealLoop::add_empty_predicate(Deoptimization::DeoptReason reason, Node* inner_head, IdealLoopTree* loop, SafePointNode* sfpt) {
+  if (!C->too_many_traps(reason)) {
+    Node *cont = _igvn.intcon(1);
+    Node* opq = new Opaque1Node(C, cont);
+    _igvn.register_new_node_with_optimizer(opq);
+    Node *bol = new Conv2BNode(opq);
+    _igvn.register_new_node_with_optimizer(bol);
+    set_subtree_ctrl(bol);
+    IfNode* iff = new IfNode(inner_head->in(LoopNode::EntryControl), bol, PROB_MAX, COUNT_UNKNOWN);
+    register_control(iff, loop, inner_head->in(LoopNode::EntryControl));
+    Node* iffalse = new IfFalseNode(iff);
+    register_control(iffalse, _ltree_root, iff);
+    Node* iftrue = new IfTrueNode(iff);
+    register_control(iftrue, loop, iff);
+    C->add_predicate_opaq(opq);
+
+    int trap_request = Deoptimization::make_trap_request(reason, Deoptimization::Action_maybe_recompile);
+    address call_addr = SharedRuntime::uncommon_trap_blob()->entry_point();
+    const TypePtr* no_memory_effects = NULL;
+    JVMState* jvms = sfpt->jvms();
+    CallNode* unc = new CallStaticJavaNode(OptoRuntime::uncommon_trap_Type(), call_addr, "uncommon_trap",
+                                           jvms->bci(), no_memory_effects);
+
+    Node* mem = NULL;
+    Node* i_o = NULL;
+    if (sfpt->is_Call()) {
+      mem = sfpt->proj_out(TypeFunc::Memory);
+      i_o = sfpt->proj_out(TypeFunc::I_O);
+    } else {
+      mem = sfpt->memory();
+      i_o = sfpt->i_o();
+    }
+
+    Node *frame = new ParmNode(C->start(), TypeFunc::FramePtr);
+    register_new_node(frame, C->start());
+    Node *ret = new ParmNode(C->start(), TypeFunc::ReturnAdr);
+    register_new_node(ret, C->start());
+
+    unc->init_req(TypeFunc::Control, iffalse);
+    unc->init_req(TypeFunc::I_O, i_o);
+    unc->init_req(TypeFunc::Memory, mem); // may gc ptrs
+    unc->init_req(TypeFunc::FramePtr, frame);
+    unc->init_req(TypeFunc::ReturnAdr, ret);
+    unc->init_req(TypeFunc::Parms+0, _igvn.intcon(trap_request));
+    unc->set_cnt(PROB_UNLIKELY_MAG(4));
+    unc->copy_call_debug_info(&_igvn, sfpt);
+
+    for (uint i = TypeFunc::Parms; i < unc->req(); i++) {
+      set_subtree_ctrl(unc->in(i));
+    }
+    register_control(unc, _ltree_root, iffalse);
+
+    Node* ctrl = new ProjNode(unc, TypeFunc::Control);
+    register_control(ctrl, _ltree_root, unc);
+    Node* halt = new HaltNode(ctrl, frame, "uncommon trap returned which should never happen" PRODUCT_ONLY(COMMA /*reachable*/false));
+    register_control(halt, _ltree_root, ctrl);
+    C->root()->add_req(halt);
+
+    _igvn.replace_input_of(inner_head, LoopNode::EntryControl, iftrue);
+    set_idom(inner_head, iftrue, dom_depth(inner_head));
+  }
+}
+
+// Find a safepoint node that dominates the back edge. We need a
+// SafePointNode so we can use its jvm state to create empty
+// predicates.
+SafePointNode* PhaseIdealLoop::find_safepoint(Node* back_control, Node* x, IdealLoopTree* loop) {
+  IfNode* exit_test = back_control->in(0)->as_If();
+  SafePointNode* safepoint = NULL;
+  if (exit_test->in(0)->is_SafePoint() && exit_test->in(0)->outcnt() == 1) {
+    safepoint = exit_test->in(0)->as_SafePoint();
+  } else {
+    Node* c = back_control;
+    while (c != x && c->Opcode() != Op_SafePoint) {
+      c = idom(c);
+    }
+
+    if (c->Opcode() == Op_SafePoint) {
+      safepoint = c->as_SafePoint();
+    }
+
+    if (safepoint == NULL) {
+      return NULL;
+    }
+
+    Node* mem = safepoint->in(TypeFunc::Memory);
+
+    // We can only use that safepoint if there's not side effect
+    // between the backedge and the safepoint.
+
+#ifdef ASSERT
+    // mm is used for book keeping
+    MergeMemNode* mm = NULL;
+    if (mem->is_MergeMem()) {
+      mm = mem->clone()->as_MergeMem();
+      for (MergeMemStream mms(mem->as_MergeMem()); mms.next_non_empty(); ) {
+        if (mms.alias_idx() != Compile::AliasIdxBot && loop != get_loop(ctrl_or_self(mms.memory()))) {
+          mm->set_memory_at(mms.alias_idx(), mem->as_MergeMem()->base_memory());
+        }
+      }
+    }
+#endif
+    for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) {
+      Node* u = x->fast_out(i);
+      if (u->is_Phi() && u->bottom_type() == Type::MEMORY) {
+        Node* m = u->in(LoopNode::LoopBackControl);
+        if (u->adr_type() == TypePtr::BOTTOM) {
+          if (m->is_MergeMem() && mem->is_MergeMem()) {
+            if (m != mem DEBUG_ONLY(|| true)) {
+              for (MergeMemStream mms(m->as_MergeMem(), mem->as_MergeMem()); mms.next_non_empty2(); ) {
+                if (!mms.is_empty()) {
+                  if (mms.memory() != mms.memory2()) {
+                    return NULL;
+                  }
+#ifdef ASSERT
+                  if (mms.alias_idx() != Compile::AliasIdxBot) {
+                    mm->set_memory_at(mms.alias_idx(), mem->as_MergeMem()->base_memory());
+                  }
+#endif
+                }
+              }
+            }
+          } else if (mem->is_MergeMem()) {
+            if (m != mem->as_MergeMem()->base_memory()) {
+              return NULL;
+            }
+          } else {
+            return NULL;
+          }
+        } else {
+          if (mem->is_MergeMem()) {
+            if (m != mem->as_MergeMem()->memory_at(C->get_alias_index(u->adr_type()))) {
+              return NULL;
+            }
+#ifdef ASSERT
+            mm->set_memory_at(C->get_alias_index(u->adr_type()), mem->as_MergeMem()->base_memory());
+#endif
+          } else {
+            if (m != mem) {
+              return NULL;
+            }
+          }
+        }
+      }
+    }
+#ifdef ASSERT
+    if (mm != NULL) {
+      assert (_igvn.transform(mm) == mem->as_MergeMem()->base_memory(), "all memory state should have been processed");
+      _igvn.remove_dead_node(mm);
+    }
+#endif
+  }
+  return safepoint;
+}
+
+// If the loop has the shape of a counted loop but with a long
+// induction variable, transform the loop in a loop nest: an inner
+// loop that iterates for at most max int iterations with an integer
+// induction variable and an outer loop that iterates over the full
+// range of long values from the initial loop in (at most) max int
+// steps. That is:
+//
+// x: for (long phi = init; phi < limit; phi += stride) {
+//   // phi := Phi(L, init, incr)
+//   // incr := AddL(phi, longcon(stride))
+//   // phi_incr := phi (test happens before increment)
+//   long incr = phi + stride;
+//   ... use phi and incr ...
+// }
+//
+// OR:
+//
+// x: for (long phi = init; (phi += stride) < limit; ) {
+//   // phi := Phi(L, AddL(init, stride), incr)
+//   // incr := AddL(phi, longcon(stride))
+//   // phi_incr := NULL (test happens after increment)
+//   long incr = phi + stride;
+//   ... use phi and (phi + stride) ...
+// }
+//
+// ==transform=>
+//
+// const ulong inner_iters_limit = INT_MAX - stride - 1;  //near 0x7FFFFFF0
+// assert(stride <= inner_iters_limit);  // else abort transform
+// assert((extralong)limit + stride <= LONG_MAX);  // else deopt
+// outer_head: for (long outer_phi = init;;) {
+//   // outer_phi := Phi(outer_head, init, AddL(outer_phi, I2L(inner_phi)))
+//   ulong inner_iters_max = (ulong) MAX(0, ((extralong)limit + stride - outer_phi));
+//   long inner_iters_actual = MIN(inner_iters_limit, inner_iters_max);
+//   assert(inner_iters_actual == (int)inner_iters_actual);
+//   int inner_phi, inner_incr;
+//   x: for (inner_phi = 0;; inner_phi = inner_incr) {
+//     // inner_phi := Phi(x, intcon(0), inner_incr)
+//     // inner_incr := AddI(inner_phi, intcon(stride))
+//     inner_incr = inner_phi + stride;
+//     if (inner_incr < inner_iters_actual) {
+//       ... use phi=>(outer_phi+inner_phi) and incr=>(outer_phi+inner_incr) ...
+//       continue;
+//     }
+//     else break;
+//   }
+//   if ((outer_phi+inner_phi) < limit)  //OR (outer_phi+inner_incr) < limit
+//     continue;
+//   else break;
+// }
+bool PhaseIdealLoop::is_long_counted_loop(Node* x, IdealLoopTree* loop, Node_List &old_new) {
+  // Only for inner loops
+  if (loop->_child != NULL) {
+    return false;
+  }
+
+  // Checks whether the loop has the shape of a counted loop
+  Node* back_control = loop_exit_control(x, loop);
+  if (back_control == NULL) {
+    return false;
+  }
+
+  BoolTest::mask bt = BoolTest::illegal;
+  float cl_prob = 0;
+  Node* incr = NULL;
+  Node* limit = NULL;
+
+  Node* cmp = loop_exit_test(back_control, loop, incr, limit, bt, cl_prob);
+  if (cmp == NULL || cmp->Opcode() != Op_CmpL) {
+    return false; // Avoid pointer & float & 32-bit compares
+  }
+
+  Node* phi_incr = NULL;
+  incr = loop_iv_incr(incr, x, loop, phi_incr);
+  if (incr == NULL || incr->Opcode() != Op_AddL) {
+    return false;
+  }
+
+  Node* xphi = NULL;
+  Node* stride = loop_iv_stride(incr, loop, xphi);
+
+  if (stride == NULL) {
+    return false;
+  }
+
+#ifndef PRODUCT
+  Atomic::inc(&_long_loop_candidates);
+#endif
+
+  jlong stride_con = stride->get_long();
+  assert(stride_con != 0, "missed some peephole opt");
+  // We can't iterate for more than max int at a time.
+  if (stride_con != (jint)stride_con) {
+    return false;
+  }
+  // The number of iterations for the integer count loop: guarantee no
+  // overflow: max_jint - stride_con max. -1 so there's no need for a
+  // loop limit check if the exit test is <= or >=.
+  int iters_limit = max_jint - ABS(stride_con) - 1;
+#ifdef ASSERT
+  if (StressLongCountedLoop > 0) {
+    iters_limit = iters_limit / StressLongCountedLoop;
+  }
+#endif
+  // At least 2 iterations so counted loop construction doesn't fail
+  if (iters_limit/ABS(stride_con) < 2) {
+    return false;
+  }
+
+  PhiNode* phi = loop_iv_phi(xphi, phi_incr, x, loop);
+
+  if (phi == NULL || phi->in(LoopNode::LoopBackControl) != incr) {
+    return false;
+  }
+
+  // Safepoint on backedge not supported
+  if (x->in(LoopNode::LoopBackControl)->Opcode() == Op_SafePoint) {
+    return false;
+  }
+
+  // data nodes on back branch not supported
+  if (back_control->outcnt() > 1) {
+    return false;
+  }
+
+  if (!condition_stride_ok(bt, stride_con)) {
+    return false;
+  }
+
+  // We'll need to use the loop limit before the inner loop is entered
+  if (!is_dominator(get_ctrl(limit), x)) {
+    return false;
+  }
+
+  IfNode* exit_test = back_control->in(0)->as_If();
+
+  // We need a safepoint to insert empty predicates for the inner loop.
+  SafePointNode* safepoint = find_safepoint(back_control, x, loop);
+  if (safepoint == NULL) {
+    // If exit condition is ne, then a loop limit check is likely needed
+    if (bt == BoolTest::ne) {
+      return false;
+    }
+  } else if (C->too_many_traps(safepoint->jvms()->method(),
+                        safepoint->jvms()->bci(),
+                        Deoptimization::Reason_loop_limit_check)) {
+    // We must have transformed the loop already and a loop limit
+    // check must have failed.
+    return false;
+  }
+
+  Node* exit_branch = exit_test->proj_out(back_control->Opcode() == Op_IfFalse);
+  Node* entry_control = x->in(LoopNode::EntryControl);
+
+  // if the loop exit test is on the IV before it is incremented: i <
+  // limit, we transform the exit test so it is performed on the exit
+  // test after it is incremented: i + stride < limit + stride.  We
+  // need limit + stride to not overflow. See adjusted_limit below.
+  bool limit_check_required = false;
+  if (phi_incr != NULL) {
+    const TypeLong* limit_t = _igvn.type(limit)->is_long();
+    int sov = check_stride_overflow(stride_con, limit_t);
+    if (sov != 0) {
+      if (sov < 0) {
+        return false;  // Bailout: integer overflow is certain.
+      }
+      // Check that inserting a predicate is indeed possible
+      if (find_predicate_insertion_point(x->in(LoopNode::EntryControl), Deoptimization::Reason_loop_limit_check) == NULL) {
+        return false;
+      }
+      limit_check_required = true;
+    }
+  }
+
+  // Clone the control flow of the loop to build an outer loop
+  Node* outer_back_branch = back_control->clone();
+  Node* outer_exit_test = exit_test->clone();
+  Node* inner_exit_branch = exit_branch->clone();
+
+  Node* outer_head = new LoopNode(entry_control, outer_back_branch);
+  IdealLoopTree* outer_ilt = insert_outer_loop(loop, outer_head->as_Loop(), outer_back_branch);
+
+  const bool body_populated = true;
+  register_control(outer_head, outer_ilt, entry_control, body_populated);
+
+  _igvn.register_new_node_with_optimizer(inner_exit_branch);
+  set_loop(inner_exit_branch, outer_ilt);
+  set_idom(inner_exit_branch, exit_test, dom_depth(exit_branch));
+
+  outer_exit_test->set_req(0, inner_exit_branch);
+  register_control(outer_exit_test, outer_ilt, inner_exit_branch, body_populated);
+
+  _igvn.replace_input_of(exit_branch, 0, outer_exit_test);
+  set_idom(exit_branch, outer_exit_test, dom_depth(exit_branch));
+
+  outer_back_branch->set_req(0, outer_exit_test);
+  register_control(outer_back_branch, outer_ilt, outer_exit_test, body_populated);
+
+  _igvn.replace_input_of(x, LoopNode::EntryControl, outer_head);
+  set_idom(x, outer_head, dom_depth(x));
+
+  // add an iv phi to the outer loop and use it to compute the inner
+  // loop iteration limit
+  Node* outer_phi = phi->clone();
+  outer_phi->set_req(0, outer_head);
+  register_new_node(outer_phi, outer_head);
+
+  Node* adjusted_limit = limit;
+  if (phi_incr != NULL) {
+    // If compare points directly to the phi we need to adjust the
+    // compare so that it points to the incr.
+    Node* long_stride = _igvn.longcon(stride_con);
+    set_ctrl(long_stride, C->root());
+    adjusted_limit = new AddLNode(limit, long_stride);
+    _igvn.register_new_node_with_optimizer(adjusted_limit);
+  }
+  Node* inner_iters_max = NULL;
+  if (stride_con > 0) {
+    inner_iters_max = MaxNode::max_diff_with_zero(adjusted_limit, outer_phi, TypeLong::LONG, _igvn);
+  } else {
+    inner_iters_max = MaxNode::max_diff_with_zero(outer_phi, adjusted_limit, TypeLong::LONG, _igvn);
+  }
+
+  Node* inner_iters_limit = _igvn.longcon(iters_limit);
+  // inner_iters_max may not fit in a signed integer (iterating from
+  // Long.MIN_VALUE to Long.MAX_VALUE for instance). Use an unsigned
+  // min.
+  Node* inner_iters_actual = MaxNode::unsigned_min(inner_iters_max, inner_iters_limit, TypeLong::make(0, iters_limit, Type::WidenMin), _igvn);
+
+  Node* inner_iters_actual_int = new ConvL2INode(inner_iters_actual);
+  _igvn.register_new_node_with_optimizer(inner_iters_actual_int);
+
+  Node* zero = _igvn.intcon(0);
+  set_ctrl(zero, C->root());
+  if (stride_con < 0) {
+    inner_iters_actual_int = new SubINode(zero, inner_iters_actual_int);
+    _igvn.register_new_node_with_optimizer(inner_iters_actual_int);
+  }
+
+  // Clone the iv data nodes as an integer iv
+  Node* int_stride = _igvn.intcon((int)stride_con);
+  set_ctrl(int_stride, C->root());
+  Node* inner_phi = new PhiNode(x->in(0), TypeInt::INT);
+  Node* inner_incr = new AddINode(inner_phi, int_stride);
+  Node* inner_cmp = NULL;
+  if (cmp->in(1) == incr || cmp->in(1) == phi) {
+    inner_cmp = new CmpINode(inner_incr, inner_iters_actual_int);
+  }  else {
+    assert(cmp->in(2) == incr || cmp->in(2) == phi, "bad iv shape");
+    inner_cmp = new CmpINode(inner_iters_actual_int, inner_incr);
+  }
+  Node* inner_bol = new BoolNode(inner_cmp, exit_test->in(1)->as_Bool()->_test._test);
+  inner_phi->set_req(LoopNode::EntryControl, zero);
+  inner_phi->set_req(LoopNode::LoopBackControl, inner_incr);
+  register_new_node(inner_phi, x);
+  register_new_node(inner_incr, x);
+  register_new_node(inner_cmp, x);
+  register_new_node(inner_bol, x);
+
+  _igvn.replace_input_of(exit_test, 1, inner_bol);
+
+  // Add a predicate to guarantee limit adjustment doesn't overflow
+  if (limit_check_required) {
+    assert(phi_incr != NULL, "only when exit test must be transformed");
+    ProjNode *limit_check_proj = find_predicate_insertion_point(outer_head->in(LoopNode::EntryControl), Deoptimization::Reason_loop_limit_check);
+    assert(limit_check_proj != NULL, "was tested before");
+    IfNode* check_iff = limit_check_proj->in(0)->as_If();
+    Node* cmp_limit;
+    Node* bol;
+
+    if (stride_con > 0) {
+      cmp_limit = new CmpLNode(limit, _igvn.longcon(max_jlong - stride_con));
+      bol = new BoolNode(cmp_limit, BoolTest::le);
+    } else {
+      cmp_limit = new CmpLNode(limit, _igvn.longcon(min_jlong - stride_con));
+      bol = new BoolNode(cmp_limit, BoolTest::ge);
+    }
+
+    insert_loop_limit_check(limit_check_proj, cmp_limit, bol);
+    Node* new_predicate = limit_check_proj->in(0)->in(0);
+    Node* above_predicate = new_predicate->in(0)->in(0);
+    Node* entry = outer_head->in(LoopNode::EntryControl);
+    _igvn.replace_input_of(limit_check_proj->in(0), 0, above_predicate);
+    _igvn.replace_input_of(new_predicate->in(0), 0, entry);
+    _igvn.replace_input_of(outer_head, LoopNode::EntryControl, new_predicate);
+    set_idom(new_predicate->in(0), entry, dom_depth(entry));
+    set_idom(new_predicate, new_predicate->in(0), dom_depth(entry));
+    Node* region = new_predicate->in(0)->as_If()->proj_out(new_predicate->Opcode() == Op_IfFalse)->unique_ctrl_out();
+    assert(region->is_Region(), "should be region merging predicates");
+    set_idom(region, entry, dom_depth(entry));
+    set_idom(limit_check_proj->in(0), above_predicate, dom_depth(above_predicate));
+  }
+
+  LoopNode* inner_head = x->as_Loop();
+
+  // Clone inner loop phis to outer loop
+  for (uint i = 0; i < inner_head->outcnt(); i++) {
+    Node* u = inner_head->raw_out(i);
+    if (u->is_Phi() && u != inner_phi && u != phi) {
+      assert(u->in(0) == inner_head, "inconsistent");
+      Node* clone = u->clone();
+      clone->set_req(0, outer_head);
+      register_new_node(clone, outer_head);
+      _igvn.replace_input_of(u, LoopNode::EntryControl, clone);
+    }
+  }
+
+  // Replace inner loop long iv phi as inner loop int iv phi + outer
+  // loop iv phi
+  long_loop_replace_long_iv(phi, inner_phi, outer_phi, inner_head);
+
+  // Replace inner loop long iv incr with inner loop int incr + outer
+  // loop iv phi
+  long_loop_replace_long_iv(incr, inner_incr, outer_phi, inner_head);
+
+  set_subtree_ctrl(inner_iters_actual_int);
+
+  // Summary of steps from inital loop to loop nest:
+  //
+  // == old IR nodes =>
+  //
+  // entry_control: {...}
+  // x:
+  // for (long phi = init;;) {
+  //   // phi := Phi(x, init, incr)
+  //   // incr := AddL(phi, longcon(stride))
+  //   exit_test:
+  //   if (phi < limit)
+  //     back_control: fallthrough;
+  //   else
+  //     exit_branch: break;
+  //   // test happens before increment => phi == phi_incr != NULL
+  //   long incr = phi + stride;
+  //   ... use phi and incr ...
+  //   phi = incr;
+  // }
+  //
+  // == new IR nodes (just before final peel) =>
+  //
+  // entry_control: {...}
+  // long adjusted_limit = limit + stride;  //because phi_incr != NULL
+  // assert(!limit_check_required || (extralong)limit + stride == adjusted_limit);  // else deopt
+  // ulong inner_iters_limit = max_jint - ABS(stride) - 1;  //near 0x7FFFFFF0
+  // outer_head:
+  // for (long outer_phi = init;;) {
+  //   // outer_phi := phi->clone(), in(0):=outer_head, => Phi(outer_head, init, incr)
+  //   // REPLACE phi  => AddL(outer_phi, I2L(inner_phi))
+  //   // REPLACE incr => AddL(outer_phi, I2L(inner_incr))
+  //   // SO THAT outer_phi := Phi(outer_head, init, AddL(outer_phi, I2L(inner_incr)))
+  //   ulong inner_iters_max = (ulong) MAX(0, ((extralong)adjusted_limit - outer_phi) * SGN(stride));
+  //   int inner_iters_actual_int = (int) MIN(inner_iters_limit, inner_iters_max) * SGN(stride);
+  //   inner_head: x: //in(1) := outer_head
+  //   int inner_phi;
+  //   for (inner_phi = 0;;) {
+  //     // inner_phi := Phi(x, intcon(0), inner_phi + stride)
+  //     int inner_incr = inner_phi + stride;
+  //     bool inner_bol = (inner_incr < inner_iters_actual_int);
+  //     exit_test: //exit_test->in(1) := inner_bol;
+  //     if (inner_bol) // WAS (phi < limit)
+  //       back_control: fallthrough;
+  //     else
+  //       inner_exit_branch: break;  //exit_branch->clone()
+  //     ... use phi=>(outer_phi+inner_phi) and incr=>(outer_phi+inner_incr) ...
+  //     inner_phi = inner_phi + stride;  // inner_incr
+  //   }
+  //   outer_exit_test:  //exit_test->clone(), in(0):=inner_exit_branch
+  //   if ((outer_phi+inner_phi) < limit)  // WAS (phi < limit)
+  //     outer_back_branch: fallthrough;  //back_control->clone(), in(0):=outer_exit_test
+  //   else
+  //     exit_branch: break;  //in(0) := outer_exit_test
+  // }
+
+  // Peel one iteration of the loop and use the safepoint at the end
+  // of the peeled iteration to insert empty predicates. If no well
+  // positioned safepoint peel to guarantee a safepoint in the outer
+  // loop.
+  if (safepoint != NULL || !loop->_has_call) {
+    old_new.clear();
+    do_peeling(loop, old_new);
+  }
+
+  if (safepoint != NULL) {
+    SafePointNode* cloned_sfpt = old_new[safepoint->_idx]->as_SafePoint();
+
+    if (UseLoopPredicate) {
+      add_empty_predicate(Deoptimization::Reason_predicate, inner_head, outer_ilt, cloned_sfpt);
+    }
+    if (UseProfiledLoopPredicate) {
+      add_empty_predicate(Deoptimization::Reason_profile_predicate, inner_head, outer_ilt, cloned_sfpt);
+    }
+    add_empty_predicate(Deoptimization::Reason_loop_limit_check, inner_head, outer_ilt, cloned_sfpt);
+  }
+
+#ifndef PRODUCT
+  Atomic::inc(&_long_loop_nests);
+#endif
+
+  inner_head->mark_transformed_long_loop();
+
+  return true;
+}
+
+#ifdef ASSERT
+// convert an int counted loop to a long counted to stress handling of
+// long counted loops
+bool PhaseIdealLoop::convert_to_long_loop(Node* cmp, Node* phi, IdealLoopTree* loop) {
+  Unique_Node_List iv_nodes;
+  Node_List old_new;
+  iv_nodes.push(cmp);
+  bool failed = false;
+
+  for (uint i = 0; i < iv_nodes.size() && !failed; i++) {
+    Node* n = iv_nodes.at(i);
+    switch(n->Opcode()) {
+      case Op_Phi: {
+        Node* clone = new PhiNode(n->in(0), TypeLong::LONG);
+        old_new.map(n->_idx, clone);
+        break;
+      }
+      case Op_CmpI: {
+        Node* clone = new CmpLNode(NULL, NULL);
+        old_new.map(n->_idx, clone);
+        break;
+      }
+      case Op_AddI: {
+        Node* clone = new AddLNode(NULL, NULL);
+        old_new.map(n->_idx, clone);
+        break;
+      }
+      case Op_CastII: {
+        failed = true;
+        break;
+      }
+      default:
+        DEBUG_ONLY(n->dump());
+        fatal("unexpected");
+    }
+
+    for (uint i = 1; i < n->req(); i++) {
+      Node* in = n->in(i);
+      if (in == NULL) {
+        continue;
+      }
+      if (loop->is_member(get_loop(get_ctrl(in)))) {
+        iv_nodes.push(in);
+      }
+    }
+  }
+
+  if (failed) {
+    for (uint i = 0; i < iv_nodes.size(); i++) {
+      Node* n = iv_nodes.at(i);
+      Node* clone = old_new[n->_idx];
+      if (clone != NULL) {
+        _igvn.remove_dead_node(clone);
+      }
+    }
+    return false;
+  }
+
+  for (uint i = 0; i < iv_nodes.size(); i++) {
+    Node* n = iv_nodes.at(i);
+    Node* clone = old_new[n->_idx];
+    for (uint i = 1; i < n->req(); i++) {
+      Node* in = n->in(i);
+      if (in == NULL) {
+        continue;
+      }
+      Node* in_clone = old_new[in->_idx];
+      if (in_clone == NULL) {
+        assert(_igvn.type(in)->isa_int(), "");
+        in_clone = new ConvI2LNode(in);
+        _igvn.register_new_node_with_optimizer(in_clone);
+        set_subtree_ctrl(in_clone);
+      }
+      if (in_clone->in(0) == NULL) {
+        in_clone->set_req(0, C->top());
+        clone->set_req(i, in_clone);
+        in_clone->set_req(0, NULL);
+      } else {
+        clone->set_req(i, in_clone);
+      }
+    }
+    _igvn.register_new_node_with_optimizer(clone);
+  }
+  set_ctrl(old_new[phi->_idx], phi->in(0));
+
+  for (uint i = 0; i < iv_nodes.size(); i++) {
+    Node* n = iv_nodes.at(i);
+    Node* clone = old_new[n->_idx];
+    set_subtree_ctrl(clone);
+    Node* m = n->Opcode() == Op_CmpI ? clone : NULL;
+    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+      Node* u = n->fast_out(i);
+      if (iv_nodes.member(u)) {
+        continue;
+      }
+      if (m == NULL) {
+        m = new ConvL2INode(clone);
+        _igvn.register_new_node_with_optimizer(m);
+        set_subtree_ctrl(m);
+      }
+      _igvn.rehash_node_delayed(u);
+      int nb = u->replace_edge(n, m);
+      --i, imax -= nb;
+    }
+  }
+  return true;
+}
+#endif
+
 //------------------------------is_counted_loop--------------------------------
 bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
  PhaseGVN *gvn = &_igvn;
@ -514,7 +1240,6 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
  float cl_prob = 0;
  Node* incr = NULL;
  Node* limit = NULL;
-
  Node* cmp = loop_exit_test(back_control, loop, incr, limit, bt, cl_prob);
  if (cmp == NULL || cmp->Opcode() != Op_CmpI) {
    return false; // Avoid pointer & float & 64-bit compares
@ -640,16 +1365,8 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
    assert(trunc1 == NULL && trunc2 == NULL, "no truncation for int");
  }

-  // If the condition is inverted and we will be rolling
-  // through MININT to MAXINT, then bail out.
-  if (bt == BoolTest::eq || // Bail out, but this loop trips at most twice!
-      // Odd stride
-      (bt == BoolTest::ne && stride_con != 1 && stride_con != -1) ||
-      // Count down loop rolls through MAXINT
-      ((bt == BoolTest::le || bt == BoolTest::lt) && stride_con < 0) ||
-      // Count up loop rolls through MININT
-      ((bt == BoolTest::ge || bt == BoolTest::gt) && stride_con > 0)) {
-    return false; // Bail out
+  if (!condition_stride_ok(bt, stride_con)) {
+    return false;
  }

  const TypeInt* init_t = gvn->type(init_trip)->is_int();
@ -722,6 +1439,7 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
    if (sov < 0) {
      return false;  // Bailout: integer overflow is certain.
    }
+    assert(!x->as_Loop()->is_transformed_long_loop(), "long loop was transformed");
    // Generate loop's limit check.
    // Loop limit check predicate should be near the loop.
    ProjNode *limit_check_proj = find_predicate_insertion_point(init_control, Deoptimization::Reason_loop_limit_check);
@ -809,6 +1527,12 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
    }
  }

+#ifdef ASSERT
+  if (!x->as_Loop()->is_transformed_long_loop() && StressLongCountedLoop > 0 && trunc1 == NULL && convert_to_long_loop(cmp, phi, loop)) {
+    return false;
+  }
+#endif
+
  if (phi_incr != NULL) {
    // If compare points directly to the phi we need to adjust
    // the compare so that it points to the incr. Limit have
@ -995,6 +1719,12 @@ bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
    loop = outer_ilt;
  }

+#ifndef PRODUCT
+  if (x->as_Loop()->is_transformed_long_loop()) {
+    Atomic::inc(&_long_loop_counted_loops);
+  }
+#endif
+
  return true;
 }

@ -2574,11 +3304,14 @@ void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
    // Look for induction variables
    phase->replace_parallel_iv(this);

-  } else if (_parent != NULL && !_irreducible) {
+  } else {
+    assert(!_head->is_Loop() || !_head->as_Loop()->is_transformed_long_loop(), "transformation to counted loop should not fail");
+    if (_parent != NULL && !_irreducible) {
    // Not a counted loop. Keep one safepoint.
    bool keep_one_sfpt = true;
    remove_safepoints(phase, keep_one_sfpt);
  }
+  }

  // Recursively
  assert(loop->_child != this || (loop->_head->as_Loop()->is_OuterStripMinedLoop() && _head->as_CountedLoop()->is_strip_mined()), "what kind of loop was added?");
@ -3223,7 +3956,12 @@ void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
    return;
  }

-  if (ReassociateInvariants) {
+  for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
+    IdealLoopTree* lpt = iter.current();
+    is_long_counted_loop(lpt->_head, lpt, worklist);
+  }
+
+  if (ReassociateInvariants && !C->major_progress()) {
    // Reassociate invariants and prep for split_thru_phi
    for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
      IdealLoopTree* lpt = iter.current();
@ -3251,7 +3989,7 @@ void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {

  // Check for aggressive application of split-if and other transforms
  // that require basic-block info (like cloning through Phi's)
-  if( SplitIfBlocks && do_split_ifs ) {
+  if (!C->major_progress() && SplitIfBlocks && do_split_ifs) {
    visited.clear();
    split_if_with_blocks( visited, nstack);
    NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
@ -3369,8 +4107,11 @@ void PhaseIdealLoop::build_and_optimize(LoopOptsMode mode) {
 //------------------------------print_statistics-------------------------------
 int PhaseIdealLoop::_loop_invokes=0;// Count of PhaseIdealLoop invokes
 int PhaseIdealLoop::_loop_work=0; // Sum of PhaseIdealLoop x unique
+volatile int PhaseIdealLoop::_long_loop_candidates=0; // Number of long loops seen
+volatile int PhaseIdealLoop::_long_loop_nests=0; // Number of long loops successfully transformed to a nest
+volatile int PhaseIdealLoop::_long_loop_counted_loops=0; // Number of long loops successfully transformed to a counted loop
 void PhaseIdealLoop::print_statistics() {
-  tty->print_cr("PhaseIdealLoop=%d, sum _unique=%d", _loop_invokes, _loop_work);
+  tty->print_cr("PhaseIdealLoop=%d, sum _unique=%d, long loops=%d/%d/%d", _loop_invokes, _loop_work, _long_loop_counted_loops, _long_loop_nests, _long_loop_candidates);
 }

 //------------------------------verify-----------------------------------------
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@ -76,7 +76,8 @@ protected:
         IsMultiversioned=16384,
         StripMined=32768,
         SubwordLoop=65536,
-         ProfileTripFailed=131072};
+         ProfileTripFailed=131072,
+         TransformedLongLoop=262144};
  char _unswitch_count;
  enum { _unswitch_max=3 };
  char _postloop_flags;
@ -101,6 +102,7 @@ public:
  bool is_strip_mined() const { return _loop_flags & StripMined; }
  bool is_profile_trip_failed() const { return _loop_flags & ProfileTripFailed; }
  bool is_subword_loop() const { return _loop_flags & SubwordLoop; }
+  bool is_transformed_long_loop() const { return _loop_flags & TransformedLongLoop; }

  void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
  void mark_has_reductions() { _loop_flags |= HasReductions; }
@ -115,6 +117,7 @@ public:
  void clear_strip_mined() { _loop_flags &= ~StripMined; }
  void mark_profile_trip_failed() { _loop_flags |= ProfileTripFailed; }
  void mark_subword_loop() { _loop_flags |= SubwordLoop; }
+  void mark_transformed_long_loop() { _loop_flags |= TransformedLongLoop; }

  int unswitch_max() { return _unswitch_max; }
  int unswitch_count() { return _unswitch_count; }
@ -1049,6 +1052,13 @@ public:
  PhiNode* loop_iv_phi(Node* xphi, Node* phi_incr, Node* x, IdealLoopTree* loop);

  bool is_counted_loop(Node* n, IdealLoopTree* &loop);
+  void long_loop_replace_long_iv(Node* iv_to_replace, Node* inner_iv, Node* outer_phi, Node* inner_head);
+  bool is_long_counted_loop(Node* x, IdealLoopTree* loop, Node_List &old_new);
+#ifdef ASSERT
+  bool convert_to_long_loop(Node* cmp, Node* phi, IdealLoopTree* loop);
+#endif
+  void add_empty_predicate(Deoptimization::DeoptReason reason, Node* inner_head, IdealLoopTree* loop, SafePointNode* sfpt);
+  SafePointNode* find_safepoint(Node* back_control, Node* x, IdealLoopTree* loop);
  IdealLoopTree* insert_outer_loop(IdealLoopTree* loop, LoopNode* outer_l, Node* outer_ift);
  IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
                                               IdealLoopTree* loop, float cl_prob, float le_fcnt,
@ -1471,6 +1481,9 @@ public:
  static void print_statistics();
  static int _loop_invokes;     // Count of PhaseIdealLoop invokes
  static int _loop_work;        // Sum of PhaseIdealLoop x _unique
+  static volatile int _long_loop_candidates;
+  static volatile int _long_loop_nests;
+  static volatile int _long_loop_counted_loops;
 #endif

  void rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const;
--- a/src/hotspot/share/opto/loopopts.cpp
+++ b/src/hotspot/share/opto/loopopts.cpp
@ -1038,7 +1038,9 @@ Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {

  // Do not clone the trip counter through on a CountedLoop
  // (messes up the canonical shape).
-  if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
+  if ((n_blk->is_CountedLoop() || (n_blk->is_Loop() && n_blk->as_Loop()->is_transformed_long_loop())) && n->Opcode() == Op_AddI) {
+    return n;
+  }

  // Check for having no control input; not pinned.  Allow
  // dominating control.
--- a/src/hotspot/share/opto/macro.cpp
+++ b/src/hotspot/share/opto/macro.cpp
@ -94,44 +94,6 @@ void PhaseMacroExpand::migrate_outs(Node *old, Node *target) {
  assert(old->outcnt() == 0, "all uses must be deleted");
 }

-void PhaseMacroExpand::copy_call_debug_info(CallNode *oldcall, CallNode * newcall) {
-  // Copy debug information and adjust JVMState information
-  uint old_dbg_start = oldcall->tf()->domain()->cnt();
-  uint new_dbg_start = newcall->tf()->domain()->cnt();
-  int jvms_adj  = new_dbg_start - old_dbg_start;
-  assert (new_dbg_start == newcall->req(), "argument count mismatch");
-
-  // SafePointScalarObject node could be referenced several times in debug info.
-  // Use Dict to record cloned nodes.
-  Dict* sosn_map = new Dict(cmpkey,hashkey);
-  for (uint i = old_dbg_start; i < oldcall->req(); i++) {
-    Node* old_in = oldcall->in(i);
-    // Clone old SafePointScalarObjectNodes, adjusting their field contents.
-    if (old_in != NULL && old_in->is_SafePointScalarObject()) {
-      SafePointScalarObjectNode* old_sosn = old_in->as_SafePointScalarObject();
-      uint old_unique = C->unique();
-      Node* new_in = old_sosn->clone(sosn_map);
-      if (old_unique != C->unique()) { // New node?
-        new_in->set_req(0, C->root()); // reset control edge
-        new_in = transform_later(new_in); // Register new node.
-      }
-      old_in = new_in;
-    }
-    newcall->add_req(old_in);
-  }
-
-  // JVMS may be shared so clone it before we modify it
-  newcall->set_jvms(oldcall->jvms() != NULL ? oldcall->jvms()->clone_deep(C) : NULL);
-  for (JVMState *jvms = newcall->jvms(); jvms != NULL; jvms = jvms->caller()) {
-    jvms->set_map(newcall);
-    jvms->set_locoff(jvms->locoff()+jvms_adj);
-    jvms->set_stkoff(jvms->stkoff()+jvms_adj);
-    jvms->set_monoff(jvms->monoff()+jvms_adj);
-    jvms->set_scloff(jvms->scloff()+jvms_adj);
-    jvms->set_endoff(jvms->endoff()+jvms_adj);
-  }
-}
-
 Node* PhaseMacroExpand::opt_bits_test(Node* ctrl, Node* region, int edge, Node* word, int mask, int bits, bool return_fast_path) {
  Node* cmp;
  if (mask != 0) {
@ -184,7 +146,7 @@ CallNode* PhaseMacroExpand::make_slow_call(CallNode *oldcall, const TypeFunc* sl
  if (parm0 != NULL)  call->init_req(TypeFunc::Parms+0, parm0);
  if (parm1 != NULL)  call->init_req(TypeFunc::Parms+1, parm1);
  if (parm2 != NULL)  call->init_req(TypeFunc::Parms+2, parm2);
-  copy_call_debug_info(oldcall, call);
+  call->copy_call_debug_info(&_igvn, oldcall);
  call->set_cnt(PROB_UNLIKELY_MAG(4));  // Same effect as RC_UNCOMMON.
  _igvn.replace_node(oldcall, call);
  transform_later(call);
@ -1473,7 +1435,7 @@ void PhaseMacroExpand::expand_allocate_common(

  // Copy debug information and adjust JVMState information, then replace
  // allocate node with the call
-  copy_call_debug_info((CallNode *) alloc,  call);
+  call->copy_call_debug_info(&_igvn, alloc);
  if (expand_fast_path) {
    call->set_cnt(PROB_UNLIKELY_MAG(4));  // Same effect as RC_UNCOMMON.
  } else {
--- a/src/hotspot/share/opto/macroArrayCopy.cpp
+++ b/src/hotspot/share/opto/macroArrayCopy.cpp
@ -958,7 +958,7 @@ MergeMemNode* PhaseMacroExpand::generate_slow_arraycopy(ArrayCopyNode *ac,
  call->init_req(TypeFunc::Parms+2, dest);
  call->init_req(TypeFunc::Parms+3, dest_offset);
  call->init_req(TypeFunc::Parms+4, copy_length);
-  copy_call_debug_info(ac, call);
+  call->copy_call_debug_info(&_igvn, ac);

  call->set_cnt(PROB_UNLIKELY_MAG(4));  // Same effect as RC_UNCOMMON.
  _igvn.replace_node(ac, call);
--- a/src/hotspot/share/opto/parse1.cpp
+++ b/src/hotspot/share/opto/parse1.cpp
@ -2254,23 +2254,7 @@ void Parse::return_current(Node* value) {

 //------------------------------add_safepoint----------------------------------
 void Parse::add_safepoint() {
-  // See if we can avoid this safepoint.  No need for a SafePoint immediately
-  // after a Call (except Leaf Call) or another SafePoint.
-  Node *proj = control();
  uint parms = TypeFunc::Parms+1;
-  if( proj->is_Proj() ) {
-    Node *n0 = proj->in(0);
-    if( n0->is_Catch() ) {
-      n0 = n0->in(0)->in(0);
-      assert( n0->is_Call(), "expect a call here" );
-    }
-    if( n0->is_Call() ) {
-      if( n0->as_Call()->guaranteed_safepoint() )
-        return;
-    } else if( n0->is_SafePoint() && n0->req() >= parms ) {
-      return;
-    }
-  }

  // Clear out dead values from the debug info.
  kill_dead_locals();