8202377: Modularize C2 GC barriers

Reviewed-by: neliasso, roland
2025-09-18 10:04:42 +02:00 · 2018-05-18 14:51:06 +02:00 · 2018-05-18 14:51:06 +02:00 · 53ec88908c
commit 53ec88908c
parent 2aa9d028c7
31 changed files with 2648 additions and 1832 deletions
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
@ -0,0 +1,772 @@
 /*
 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #include "precompiled.hpp"
 #include "gc/g1/c2/g1BarrierSetC2.hpp"
 #include "gc/g1/g1BarrierSet.hpp"
 #include "gc/g1/g1CardTable.hpp"
 #include "gc/g1/g1ThreadLocalData.hpp"
 #include "gc/g1/heapRegion.hpp"
 #include "opto/arraycopynode.hpp"
 #include "opto/graphKit.hpp"
 #include "opto/idealKit.hpp"
 #include "opto/macro.hpp"
 #include "opto/type.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "utilities/macros.hpp"
 const TypeFunc *G1BarrierSetC2::g1_wb_pre_Type() {
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
  // create result type (range)
  fields = TypeTuple::fields(0);
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
  return TypeFunc::make(domain, range);
 }
 const TypeFunc *G1BarrierSetC2::g1_wb_post_Type() {
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL;  // Card addr
  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL;  // thread
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
  // create result type (range)
  fields = TypeTuple::fields(0);
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
 }
 #define __ ideal.
 /*
 * Determine if the G1 pre-barrier can be removed. The pre-barrier is
 * required by SATB to make sure all objects live at the start of the
 * marking are kept alive, all reference updates need to any previous
 * reference stored before writing.
 *
 * If the previous value is NULL there is no need to save the old value.
 * References that are NULL are filtered during runtime by the barrier
 * code to avoid unnecessary queuing.
 *
 * However in the case of newly allocated objects it might be possible to
 * prove that the reference about to be overwritten is NULL during compile
 * time and avoid adding the barrier code completely.
 *
 * The compiler needs to determine that the object in which a field is about
 * to be written is newly allocated, and that no prior store to the same field
 * has happened since the allocation.
 *
 * Returns true if the pre-barrier can be removed
 */
 bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
                                               PhaseTransform* phase,
                                               Node* adr,
                                               BasicType bt,
                                               uint adr_idx) const {
  intptr_t offset = 0;
  Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
  AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase);
  if (offset == Type::OffsetBot) {
    return false; // cannot unalias unless there are precise offsets
  }
  if (alloc == NULL) {
    return false; // No allocation found
  }
  intptr_t size_in_bytes = type2aelembytes(bt);
  Node* mem = kit->memory(adr_idx); // start searching here...
  for (int cnt = 0; cnt < 50; cnt++) {
    if (mem->is_Store()) {
      Node* st_adr = mem->in(MemNode::Address);
      intptr_t st_offset = 0;
      Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
      if (st_base == NULL) {
        break; // inscrutable pointer
      }
      // Break we have found a store with same base and offset as ours so break
      if (st_base == base && st_offset == offset) {
        break;
      }
      if (st_offset != offset && st_offset != Type::OffsetBot) {
        const int MAX_STORE = BytesPerLong;
        if (st_offset >= offset + size_in_bytes ||
            st_offset <= offset - MAX_STORE ||
            st_offset <= offset - mem->as_Store()->memory_size()) {
          // Success:  The offsets are provably independent.
          // (You may ask, why not just test st_offset != offset and be done?
          // The answer is that stores of different sizes can co-exist
          // in the same sequence of RawMem effects.  We sometimes initialize
          // a whole 'tile' of array elements with a single jint or jlong.)
          mem = mem->in(MemNode::Memory);
          continue; // advance through independent store memory
        }
      }
      if (st_base != base
          && MemNode::detect_ptr_independence(base, alloc, st_base,
                                              AllocateNode::Ideal_allocation(st_base, phase),
                                              phase)) {
        // Success:  The bases are provably independent.
        mem = mem->in(MemNode::Memory);
        continue; // advance through independent store memory
      }
    } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
      InitializeNode* st_init = mem->in(0)->as_Initialize();
      AllocateNode* st_alloc = st_init->allocation();
      // Make sure that we are looking at the same allocation site.
      // The alloc variable is guaranteed to not be null here from earlier check.
      if (alloc == st_alloc) {
        // Check that the initialization is storing NULL so that no previous store
        // has been moved up and directly write a reference
        Node* captured_store = st_init->find_captured_store(offset,
                                                            type2aelembytes(T_OBJECT),
                                                            phase);
        if (captured_store == NULL || captured_store == st_init->zero_memory()) {
          return true;
        }
      }
    }
    // Unless there is an explicit 'continue', we must bail out here,
    // because 'mem' is an inscrutable memory state (e.g., a call).
    break;
  }
  return false;
 }
 // G1 pre/post barriers
 void G1BarrierSetC2::pre_barrier(GraphKit* kit,
                                 bool do_load,
                                 Node* ctl,
                                 Node* obj,
                                 Node* adr,
                                 uint alias_idx,
                                 Node* val,
                                 const TypeOopPtr* val_type,
                                 Node* pre_val,
                                 BasicType bt) const {
  // Some sanity checks
  // Note: val is unused in this routine.
  if (do_load) {
    // We need to generate the load of the previous value
    assert(obj != NULL, "must have a base");
    assert(adr != NULL, "where are loading from?");
    assert(pre_val == NULL, "loaded already?");
    assert(val_type != NULL, "need a type");
    if (use_ReduceInitialCardMarks()
        && g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, bt, alias_idx)) {
      return;
    }
  } else {
    // In this case both val_type and alias_idx are unused.
    assert(pre_val != NULL, "must be loaded already");
    // Nothing to be done if pre_val is null.
    if (pre_val->bottom_type() == TypePtr::NULL_PTR) return;
    assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
  }
  assert(bt == T_OBJECT, "or we shouldn't be here");
  IdealKit ideal(kit, true);
  Node* tls = __ thread(); // ThreadLocalStorage
  Node* no_base = __ top();
  Node* zero  = __ ConI(0);
  Node* zeroX = __ ConX(0);
  float likely  = PROB_LIKELY(0.999);
  float unlikely  = PROB_UNLIKELY(0.999);
  BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
  assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width");
  // Offsets into the thread
  const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
  const int index_offset   = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
  const int buffer_offset  = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
  // Now the actual pointers into the thread
  Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
  Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
  Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));
  // Now some of the values
  Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
  // if (!marking)
  __ if_then(marking, BoolTest::ne, zero, unlikely); {
    BasicType index_bt = TypeX_X->basic_type();
    assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size.");
    Node* index   = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw);
    if (do_load) {
      // load original value
      // alias_idx correct??
      pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx);
    }
    // if (pre_val != NULL)
    __ if_then(pre_val, BoolTest::ne, kit->null()); {
      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
      // is the queue for this thread full?
      __ if_then(index, BoolTest::ne, zeroX, likely); {
        // decrement the index
        Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
        // Now get the buffer location we will log the previous value into and store it
        Node *log_addr = __ AddP(no_base, buffer, next_index);
        __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered);
        // update the index
        __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered);
      } __ else_(); {
        // logging buffer is full, call the runtime
        const TypeFunc *tf = g1_wb_pre_Type();
        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls);
      } __ end_if();  // (!index)
    } __ end_if();  // (pre_val != NULL)
  } __ end_if();  // (!marking)
  // Final sync IdealKit and GraphKit.
  kit->final_sync(ideal);
 }
 /*
 * G1 similar to any GC with a Young Generation requires a way to keep track of
 * references from Old Generation to Young Generation to make sure all live
 * objects are found. G1 also requires to keep track of object references
 * between different regions to enable evacuation of old regions, which is done
 * as part of mixed collections. References are tracked in remembered sets and
 * is continuously updated as reference are written to with the help of the
 * post-barrier.
 *
 * To reduce the number of updates to the remembered set the post-barrier
 * filters updates to fields in objects located in the Young Generation,
 * the same region as the reference, when the NULL is being written or
 * if the card is already marked as dirty by an earlier write.
 *
 * Under certain circumstances it is possible to avoid generating the
 * post-barrier completely if it is possible during compile time to prove
 * the object is newly allocated and that no safepoint exists between the
 * allocation and the store.
 *
 * In the case of slow allocation the allocation code must handle the barrier
 * as part of the allocation in the case the allocated object is not located
 * in the nursery, this would happen for humongous objects. This is similar to
 * how CMS is required to handle this case, see the comments for the method
 * CollectedHeap::new_deferred_store_barrier and OptoRuntime::new_deferred_store_barrier.
 * A deferred card mark is required for these objects and handled in the above
 * mentioned methods.
 *
 * Returns true if the post barrier can be removed
 */
 bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit,
                                                PhaseTransform* phase, Node* store,
                                                Node* adr) const {
  intptr_t      offset = 0;
  Node*         base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
  AllocateNode* alloc  = AllocateNode::Ideal_allocation(base, phase);
  if (offset == Type::OffsetBot) {
    return false; // cannot unalias unless there are precise offsets
  }
  if (alloc == NULL) {
     return false; // No allocation found
  }
  // Start search from Store node
  Node* mem = store->in(MemNode::Control);
  if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
    InitializeNode* st_init = mem->in(0)->as_Initialize();
    AllocateNode*  st_alloc = st_init->allocation();
    // Make sure we are looking at the same allocation
    if (alloc == st_alloc) {
      return true;
    }
  }
  return false;
 }
 //
 // Update the card table and add card address to the queue
 //
 void G1BarrierSetC2::g1_mark_card(GraphKit* kit,
                                  IdealKit& ideal,
                                  Node* card_adr,
                                  Node* oop_store,
                                  uint oop_alias_idx,
                                  Node* index,
                                  Node* index_adr,
                                  Node* buffer,
                                  const TypeFunc* tf) const {
  Node* zero  = __ ConI(0);
  Node* zeroX = __ ConX(0);
  Node* no_base = __ top();
  BasicType card_bt = T_BYTE;
  // Smash zero into card. MUST BE ORDERED WRT TO STORE
  __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw);
  //  Now do the queue work
  __ if_then(index, BoolTest::ne, zeroX); {
    Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
    Node* log_addr = __ AddP(no_base, buffer, next_index);
    // Order, see storeCM.
    __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered);
    __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered);
  } __ else_(); {
    __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
  } __ end_if();
 }
 void G1BarrierSetC2::post_barrier(GraphKit* kit,
                                  Node* ctl,
                                  Node* oop_store,
                                  Node* obj,
                                  Node* adr,
                                  uint alias_idx,
                                  Node* val,
                                  BasicType bt,
                                  bool use_precise) const {
  // If we are writing a NULL then we need no post barrier
  if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
    // Must be NULL
    const Type* t = val->bottom_type();
    assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL");
    // No post barrier if writing NULLx
    return;
  }
  if (use_ReduceInitialCardMarks() && obj == kit->just_allocated_object(kit->control())) {
    // We can skip marks on a freshly-allocated object in Eden.
    // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp.
    // That routine informs GC to take appropriate compensating steps,
    // upon a slow-path allocation, so as to make this card-mark
    // elision safe.
    return;
  }
  if (use_ReduceInitialCardMarks()
      && g1_can_remove_post_barrier(kit, &kit->gvn(), oop_store, adr)) {
    return;
  }
  if (!use_precise) {
    // All card marks for a (non-array) instance are in one place:
    adr = obj;
  }
  // (Else it's an array (or unknown), and we want more precise card marks.)
  assert(adr != NULL, "");
  IdealKit ideal(kit, true);
  Node* tls = __ thread(); // ThreadLocalStorage
  Node* no_base = __ top();
  float unlikely  = PROB_UNLIKELY(0.999);
  Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val());
  Node* dirty_card = __ ConI((jint)G1CardTable::dirty_card_val());
  Node* zeroX = __ ConX(0);
  const TypeFunc *tf = g1_wb_post_Type();
  // Offsets into the thread
  const int index_offset  = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
  const int buffer_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
  // Pointers into the thread
  Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
  Node* index_adr =  __ AddP(no_base, tls, __ ConX(index_offset));
  // Now some values
  // Use ctrl to avoid hoisting these values past a safepoint, which could
  // potentially reset these fields in the JavaThread.
  Node* index  = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw);
  Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
  // Convert the store obj pointer to an int prior to doing math on it
  // Must use ctrl to prevent "integerized oop" existing across safepoint
  Node* cast =  __ CastPX(__ ctrl(), adr);
  // Divide pointer by card size
  Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) );
  // Combine card table base and card offset
  Node* card_adr = __ AddP(no_base, byte_map_base_node(kit), card_offset );
  // If we know the value being stored does it cross regions?
  if (val != NULL) {
    // Does the store cause us to cross regions?
    // Should be able to do an unsigned compare of region_size instead of
    // and extra shift. Do we have an unsigned compare??
    // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);
    Node* xor_res =  __ URShiftX ( __ XorX( cast,  __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes));
    // if (xor_res == 0) same region so skip
    __ if_then(xor_res, BoolTest::ne, zeroX); {
      // No barrier if we are storing a NULL
      __ if_then(val, BoolTest::ne, kit->null(), unlikely); {
        // Ok must mark the card if not already dirty
        // load the original value of the card
        Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
        __ if_then(card_val, BoolTest::ne, young_card); {
          kit->sync_kit(ideal);
          kit->insert_mem_bar(Op_MemBarVolatile, oop_store);
          __ sync_kit(kit);
          Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
          __ if_then(card_val_reload, BoolTest::ne, dirty_card); {
            g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
          } __ end_if();
        } __ end_if();
      } __ end_if();
    } __ end_if();
  } else {
    // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks.
    // We don't need a barrier here if the destination is a newly allocated object
    // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden
    // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()).
    assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
    Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
    __ if_then(card_val, BoolTest::ne, young_card); {
      g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
    } __ end_if();
  }
  // Final sync IdealKit and GraphKit.
  kit->final_sync(ideal);
 }
 // Helper that guards and inserts a pre-barrier.
 void G1BarrierSetC2::insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset,
                                        Node* pre_val, bool need_mem_bar) const {
  // We could be accessing the referent field of a reference object. If so, when G1
  // is enabled, we need to log the value in the referent field in an SATB buffer.
  // This routine performs some compile time filters and generates suitable
  // runtime filters that guard the pre-barrier code.
  // Also add memory barrier for non volatile load from the referent field
  // to prevent commoning of loads across safepoint.
  // Some compile time checks.
  // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
  const TypeX* otype = offset->find_intptr_t_type();
  if (otype != NULL && otype->is_con() &&
      otype->get_con() != java_lang_ref_Reference::referent_offset) {
    // Constant offset but not the reference_offset so just return
    return;
  }
  // We only need to generate the runtime guards for instances.
  const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
  if (btype != NULL) {
    if (btype->isa_aryptr()) {
      // Array type so nothing to do
      return;
    }
    const TypeInstPtr* itype = btype->isa_instptr();
    if (itype != NULL) {
      // Can the klass of base_oop be statically determined to be
      // _not_ a sub-class of Reference and _not_ Object?
      ciKlass* klass = itype->klass();
      if ( klass->is_loaded() &&
          !klass->is_subtype_of(kit->env()->Reference_klass()) &&
          !kit->env()->Object_klass()->is_subtype_of(klass)) {
        return;
      }
    }
  }
  // The compile time filters did not reject base_oop/offset so
  // we need to generate the following runtime filters
  //
  // if (offset == java_lang_ref_Reference::_reference_offset) {
  //   if (instance_of(base, java.lang.ref.Reference)) {
  //     pre_barrier(_, pre_val, ...);
  //   }
  // }
  float likely   = PROB_LIKELY(  0.999);
  float unlikely = PROB_UNLIKELY(0.999);
  IdealKit ideal(kit);
  Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
  __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
      // Update graphKit memory and control from IdealKit.
      kit->sync_kit(ideal);
      Node* ref_klass_con = kit->makecon(TypeKlassPtr::make(kit->env()->Reference_klass()));
      Node* is_instof = kit->gen_instanceof(base_oop, ref_klass_con);
      // Update IdealKit memory and control from graphKit.
      __ sync_kit(kit);
      Node* one = __ ConI(1);
      // is_instof == 0 if base_oop == NULL
      __ if_then(is_instof, BoolTest::eq, one, unlikely); {
        // Update graphKit from IdeakKit.
        kit->sync_kit(ideal);
        // Use the pre-barrier to record the value in the referent field
        pre_barrier(kit, false /* do_load */,
                    __ ctrl(),
                    NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
                    pre_val /* pre_val */,
                    T_OBJECT);
        if (need_mem_bar) {
          // Add memory barrier to prevent commoning reads from this field
          // across safepoint since GC can change its value.
          kit->insert_mem_bar(Op_MemBarCPUOrder);
        }
        // Update IdealKit from graphKit.
        __ sync_kit(kit);
      } __ end_if(); // _ref_type != ref_none
  } __ end_if(); // offset == referent_offset
  // Final sync IdealKit and GraphKit.
  kit->final_sync(ideal);
 }
 #undef __
 Node* G1BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
  DecoratorSet decorators = access.decorators();
  GraphKit* kit = access.kit();
  Node* adr = access.addr().node();
  Node* obj = access.base();
  bool mismatched = (decorators & C2_MISMATCHED) != 0;
  bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0;
  bool on_heap = (decorators & IN_HEAP) != 0;
  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
  bool is_unordered = (decorators & MO_UNORDERED) != 0;
  bool need_cpu_mem_bar = !is_unordered || mismatched || !on_heap;
  Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : kit->top();
  Node* load = CardTableBarrierSetC2::load_at_resolved(access, val_type);
  // If we are reading the value of the referent field of a Reference
  // object (either by using Unsafe directly or through reflection)
  // then, if G1 is enabled, we need to record the referent in an
  // SATB log buffer using the pre-barrier mechanism.
  // Also we need to add memory barrier to prevent commoning reads
  // from this field across safepoint since GC can change its value.
  bool need_read_barrier = on_heap && (on_weak ||
                                       (unknown && offset != kit->top() && obj != kit->top()));
  if (!access.is_oop() || !need_read_barrier) {
    return load;
  }
  if (on_weak) {
    // Use the pre-barrier to record the value in the referent field
    pre_barrier(kit, false /* do_load */,
                kit->control(),
                NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
                load /* pre_val */, T_OBJECT);
    // Add memory barrier to prevent commoning reads from this field
    // across safepoint since GC can change its value.
    kit->insert_mem_bar(Op_MemBarCPUOrder);
  } else if (unknown) {
    // We do not require a mem bar inside pre_barrier if need_mem_bar
    // is set: the barriers would be emitted by us.
    insert_pre_barrier(kit, obj, offset, load, !need_cpu_mem_bar);
  }
  return load;
 }
 bool G1BarrierSetC2::is_gc_barrier_node(Node* node) const {
  if (CardTableBarrierSetC2::is_gc_barrier_node(node)) {
    return true;
  }
  if (node->Opcode() != Op_CallLeaf) {
    return false;
  }
  CallLeafNode *call = node->as_CallLeaf();
  if (call->_name == NULL) {
    return false;
  }
  return strcmp(call->_name, "g1_wb_pre") == 0 || strcmp(call->_name, "g1_wb_post") == 0;
 }
 void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
  assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
  assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
  // It could be only one user, URShift node, in Object.clone() intrinsic
  // but the new allocation is passed to arraycopy stub and it could not
  // be scalar replaced. So we don't check the case.
  // An other case of only one user (Xor) is when the value check for NULL
  // in G1 post barrier is folded after CCP so the code which used URShift
  // is removed.
  // Take Region node before eliminating post barrier since it also
  // eliminates CastP2X node when it has only one user.
  Node* this_region = node->in(0);
  assert(this_region != NULL, "");
  // Remove G1 post barrier.
  // Search for CastP2X->Xor->URShift->Cmp path which
  // checks if the store done to a different from the value's region.
  // And replace Cmp with #0 (false) to collapse G1 post barrier.
  Node* xorx = node->find_out_with(Op_XorX);
  if (xorx != NULL) {
    Node* shift = xorx->unique_out();
    Node* cmpx = shift->unique_out();
    assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
    cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
    "missing region check in G1 post barrier");
    macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
    // Remove G1 pre barrier.
    // Search "if (marking != 0)" check and set it to "false".
    // There is no G1 pre barrier if previous stored value is NULL
    // (for example, after initialization).
    if (this_region->is_Region() && this_region->req() == 3) {
      int ind = 1;
      if (!this_region->in(ind)->is_IfFalse()) {
        ind = 2;
      }
      if (this_region->in(ind)->is_IfFalse() &&
          this_region->in(ind)->in(0)->Opcode() == Op_If) {
        Node* bol = this_region->in(ind)->in(0)->in(1);
        assert(bol->is_Bool(), "");
        cmpx = bol->in(1);
        if (bol->as_Bool()->_test._test == BoolTest::ne &&
            cmpx->is_Cmp() && cmpx->in(2) == macro->intcon(0) &&
            cmpx->in(1)->is_Load()) {
          Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
          const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
          if (adr->is_AddP() && adr->in(AddPNode::Base) == macro->top() &&
              adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
              adr->in(AddPNode::Offset) == macro->MakeConX(marking_offset)) {
            macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
          }
        }
      }
    }
  } else {
    assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
    // This is a G1 post barrier emitted by the Object.clone() intrinsic.
    // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card
    // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier.
    Node* shift = node->find_out_with(Op_URShiftX);
    assert(shift != NULL, "missing G1 post barrier");
    Node* addp = shift->unique_out();
    Node* load = addp->find_out_with(Op_LoadB);
    assert(load != NULL, "missing G1 post barrier");
    Node* cmpx = load->unique_out();
    assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
           cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
           "missing card value check in G1 post barrier");
    macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
    // There is no G1 pre barrier in this case
  }
  // Now CastP2X can be removed since it is used only on dead path
  // which currently still alive until igvn optimize it.
  assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, "");
  macro->replace_node(node, macro->top());
 }
 Node* G1BarrierSetC2::step_over_gc_barrier(Node* c) const {
  if (!use_ReduceInitialCardMarks() &&
      c != NULL && c->is_Region() && c->req() == 3) {
    for (uint i = 1; i < c->req(); i++) {
      if (c->in(i) != NULL && c->in(i)->is_Region() &&
          c->in(i)->req() == 3) {
        Node* r = c->in(i);
        for (uint j = 1; j < r->req(); j++) {
          if (r->in(j) != NULL && r->in(j)->is_Proj() &&
              r->in(j)->in(0) != NULL &&
              r->in(j)->in(0)->Opcode() == Op_CallLeaf &&
              r->in(j)->in(0)->as_Call()->entry_point() == CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post)) {
            Node* call = r->in(j)->in(0);
            c = c->in(i == 1 ? 2 : 1);
            if (c != NULL) {
              c = c->in(0);
              if (c != NULL) {
                c = c->in(0);
                assert(call->in(0) == NULL ||
                       call->in(0)->in(0) == NULL ||
                       call->in(0)->in(0)->in(0) == NULL ||
                       call->in(0)->in(0)->in(0)->in(0) == NULL ||
                       call->in(0)->in(0)->in(0)->in(0)->in(0) == NULL ||
                       c == call->in(0)->in(0)->in(0)->in(0)->in(0), "bad barrier shape");
                return c;
              }
            }
          }
        }
      }
    }
  }
  return c;
 }
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
@ -0,0 +1,93 @@
 /*
 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #ifndef SHARE_GC_SHARED_C2_G1BARRIERSETC2_HPP
 #define SHARE_GC_SHARED_C2_G1BARRIERSETC2_HPP
 #include "gc/shared/c2/cardTableBarrierSetC2.hpp"
 class PhaseTransform;
 class Type;
 class TypeFunc;
 class G1BarrierSetC2: public CardTableBarrierSetC2 {
 protected:
  virtual void pre_barrier(GraphKit* kit,
                           bool do_load,
                           Node* ctl,
                           Node* obj,
                           Node* adr,
                           uint adr_idx,
                           Node* val,
                           const TypeOopPtr* val_type,
                           Node* pre_val,
                           BasicType bt) const;
  virtual void post_barrier(GraphKit* kit,
                            Node* ctl,
                            Node* store,
                            Node* obj,
                            Node* adr,
                            uint adr_idx,
                            Node* val,
                            BasicType bt,
                            bool use_precise) const;
  bool g1_can_remove_pre_barrier(GraphKit* kit,
                                 PhaseTransform* phase,
                                 Node* adr,
                                 BasicType bt,
                                 uint adr_idx) const;
  bool g1_can_remove_post_barrier(GraphKit* kit,
                                  PhaseTransform* phase, Node* store,
                                  Node* adr) const;
  void g1_mark_card(GraphKit* kit,
                    IdealKit& ideal,
                    Node* card_adr,
                    Node* oop_store,
                    uint oop_alias_idx,
                    Node* index,
                    Node* index_adr,
                    Node* buffer,
                    const TypeFunc* tf) const;
  // Helper for unsafe accesses, that may or may not be on the referent field.
  // Generates the guards that check whether the result of
  // Unsafe.getObject should be recorded in an SATB log buffer.
  void insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar) const;
  static const TypeFunc* g1_wb_pre_Type();
  static const TypeFunc* g1_wb_post_Type();
  virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
 public:
  virtual bool is_gc_barrier_node(Node* node) const;
  virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
  virtual Node* step_over_gc_barrier(Node* c) const;
 };
 #endif // SHARE_GC_SHARED_C2_G1BARRIERSETC2_HPP
--- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp
@ -34,14 +34,19 @@
 #include "oops/access.inline.hpp"
 #include "oops/compressedOops.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/thread.inline.hpp"
 #include "utilities/macros.hpp"
 #ifdef COMPILER1
 #include "gc/g1/c1/g1BarrierSetC1.hpp"
 #endif
 #ifdef COMPILER2
 #include "gc/g1/c2/g1BarrierSetC2.hpp"
 #endif
 class G1BarrierSetC1;
 class G1BarrierSetC2;
 SATBMarkQueueSet G1BarrierSet::_satb_mark_queue_set;
 DirtyCardQueueSet G1BarrierSet::_dirty_card_queue_set;
@ -49,6 +54,7 @@ DirtyCardQueueSet G1BarrierSet::_dirty_card_queue_set;
 G1BarrierSet::G1BarrierSet(G1CardTable* card_table) :
  CardTableBarrierSet(make_barrier_set_assembler<G1BarrierSetAssembler>(),
                      make_barrier_set_c1<G1BarrierSetC1>(),
                      make_barrier_set_c2<G1BarrierSetC2>(),
                      card_table,
                      BarrierSet::FakeRtti(BarrierSet::G1BarrierSet)) {}
--- a/src/hotspot/share/gc/shared/barrierSet.hpp
+++ b/src/hotspot/share/gc/shared/barrierSet.hpp
@ -35,6 +35,7 @@
 class BarrierSetAssembler;
 class BarrierSetC1;
 class BarrierSetC2;
 class JavaThread;
 // This class provides the interface between a barrier implementation and
@ -70,6 +71,7 @@ private:
  FakeRtti _fake_rtti;
  BarrierSetAssembler* _barrier_set_assembler;
  BarrierSetC1* _barrier_set_c1;
  BarrierSetC2* _barrier_set_c2;
 public:
  // Metafunction mapping a class derived from BarrierSet to the
@ -92,10 +94,12 @@ public:
 protected:
  BarrierSet(BarrierSetAssembler* barrier_set_assembler,
             BarrierSetC1* barrier_set_c1,
             BarrierSetC2* barrier_set_c2,
             const FakeRtti& fake_rtti) :
    _fake_rtti(fake_rtti),
    _barrier_set_assembler(barrier_set_assembler),
-    _barrier_set_c1(barrier_set_c1) {}
+    _barrier_set_c1(barrier_set_c1),
    _barrier_set_c2(barrier_set_c2) {}
  ~BarrierSet() { }
  template <class BarrierSetAssemblerT>
@ -108,6 +112,11 @@ protected:
    return COMPILER1_PRESENT(new BarrierSetC1T()) NOT_COMPILER1(NULL);
  }
  template <class BarrierSetC2T>
  BarrierSetC2* make_barrier_set_c2() {
    return COMPILER2_PRESENT(new BarrierSetC2T()) NOT_COMPILER2(NULL);
  }
 public:
  // Support for optimizing compilers to call the barrier set on slow path allocations
  // that did not enter a TLAB. Used for e.g. ReduceInitialCardMarks.
@ -138,6 +147,11 @@ public:
    return _barrier_set_c1;
  }
  BarrierSetC2* barrier_set_c2() {
    assert(_barrier_set_c2 != NULL, "should be set");
    return _barrier_set_c2;
  }
  // The AccessBarrier of a BarrierSet subclass is called by the Access API
  // (cf. oops/access.hpp) to perform decorated accesses. GC implementations
  // may override these default access operations by declaring an
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
@ -0,0 +1,588 @@
 /*
 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #include "precompiled.hpp"
 #include "gc/shared/c2/barrierSetC2.hpp"
 #include "opto/arraycopynode.hpp"
 #include "opto/graphKit.hpp"
 #include "opto/idealKit.hpp"
 #include "opto/narrowptrnode.hpp"
 #include "utilities/macros.hpp"
 // By default this is a no-op.
 void BarrierSetC2::resolve_address(C2Access& access) const { }
 void* C2Access::barrier_set_state() const {
  return _kit->barrier_set_state();
 }
 bool C2Access::needs_cpu_membar() const {
  bool mismatched = (_decorators & C2_MISMATCHED) != 0;
  bool is_unordered = (_decorators & MO_UNORDERED) != 0;
  bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
  bool on_heap = (_decorators & IN_HEAP) != 0;
  bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
  bool is_read = (_decorators & C2_READ_ACCESS) != 0;
  bool is_atomic = is_read && is_write;
  if (is_atomic) {
    // Atomics always need to be wrapped in CPU membars
    return true;
  }
  if (anonymous) {
    // We will need memory barriers unless we can determine a unique
    // alias category for this reference.  (Note:  If for some reason
    // the barriers get omitted and the unsafe reference begins to "pollute"
    // the alias analysis of the rest of the graph, either Compile::can_alias
    // or Compile::must_alias will throw a diagnostic assert.)
    if (!on_heap || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) {
      return true;
    }
  }
  return false;
 }
 Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
  DecoratorSet decorators = access.decorators();
  GraphKit* kit = access.kit();
  bool mismatched = (decorators & C2_MISMATCHED) != 0;
  bool unaligned = (decorators & C2_UNALIGNED) != 0;
  bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
  bool in_root = (decorators & IN_ROOT) != 0;
  assert(!in_root, "not supported yet");
  if (access.type() == T_DOUBLE) {
    Node* new_val = kit->dstore_rounding(val.node());
    val.set_node(new_val);
  }
  MemNode::MemOrd mo = access.mem_node_mo();
  Node* store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), access.type(),
                                     access.addr().type(), mo, requires_atomic_access, unaligned, mismatched);
  access.set_raw_access(store);
  return store;
 }
 Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
  DecoratorSet decorators = access.decorators();
  GraphKit* kit = access.kit();
  Node* adr = access.addr().node();
  const TypePtr* adr_type = access.addr().type();
  bool mismatched = (decorators & C2_MISMATCHED) != 0;
  bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
  bool unaligned = (decorators & C2_UNALIGNED) != 0;
  bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0;
  bool pinned = (decorators & C2_PINNED_LOAD) != 0;
  bool in_root = (decorators & IN_ROOT) != 0;
  assert(!in_root, "not supported yet");
  MemNode::MemOrd mo = access.mem_node_mo();
  LoadNode::ControlDependency dep = pinned ? LoadNode::Pinned : LoadNode::DependsOnlyOnTest;
  Node* control = control_dependent ? kit->control() : NULL;
  Node* load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo,
                              dep, requires_atomic_access, unaligned, mismatched);
  access.set_raw_access(load);
  return load;
 }
 class C2AccessFence: public StackObj {
  C2Access& _access;
 public:
  C2AccessFence(C2Access& access) :
    _access(access) {
    GraphKit* kit = access.kit();
    DecoratorSet decorators = access.decorators();
    bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
    bool is_read = (decorators & C2_READ_ACCESS) != 0;
    bool is_atomic = is_read && is_write;
    bool is_volatile = (decorators & MO_SEQ_CST) != 0;
    bool is_release = (decorators & MO_RELEASE) != 0;
    if (is_atomic) {
      // Memory-model-wise, a LoadStore acts like a little synchronized
      // block, so needs barriers on each side.  These don't translate
      // into actual barriers on most machines, but we still need rest of
      // compiler to respect ordering.
      if (is_release) {
        kit->insert_mem_bar(Op_MemBarRelease);
      } else if (is_volatile) {
        if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
          kit->insert_mem_bar(Op_MemBarVolatile);
        } else {
          kit->insert_mem_bar(Op_MemBarRelease);
        }
      }
    } else if (is_write) {
      // If reference is volatile, prevent following memory ops from
      // floating down past the volatile write.  Also prevents commoning
      // another volatile read.
      if (is_volatile || is_release) {
        kit->insert_mem_bar(Op_MemBarRelease);
      }
    } else {
      // Memory barrier to prevent normal and 'unsafe' accesses from
      // bypassing each other.  Happens after null checks, so the
      // exception paths do not take memory state from the memory barrier,
      // so there's no problems making a strong assert about mixing users
      // of safe & unsafe memory.
      if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) {
        kit->insert_mem_bar(Op_MemBarVolatile);
      }
    }
    if (access.needs_cpu_membar()) {
      kit->insert_mem_bar(Op_MemBarCPUOrder);
    }
    if (is_atomic) {
      // 4984716: MemBars must be inserted before this
      //          memory node in order to avoid a false
      //          dependency which will confuse the scheduler.
      access.set_memory();
    }
  }
  ~C2AccessFence() {
    GraphKit* kit = _access.kit();
    DecoratorSet decorators = _access.decorators();
    bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
    bool is_read = (decorators & C2_READ_ACCESS) != 0;
    bool is_atomic = is_read && is_write;
    bool is_volatile = (decorators & MO_SEQ_CST) != 0;
    bool is_acquire = (decorators & MO_ACQUIRE) != 0;
    // If reference is volatile, prevent following volatiles ops from
    // floating up before the volatile access.
    if (_access.needs_cpu_membar()) {
      kit->insert_mem_bar(Op_MemBarCPUOrder);
    }
    if (is_atomic) {
      if (is_acquire || is_volatile) {
        kit->insert_mem_bar(Op_MemBarAcquire);
      }
    } else if (is_write) {
      // If not multiple copy atomic, we do the MemBarVolatile before the load.
      if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) {
        kit->insert_mem_bar(Op_MemBarVolatile); // Use fat membar
      }
    } else {
      if (is_volatile || is_acquire) {
        kit->insert_mem_bar(Op_MemBarAcquire, _access.raw_access());
      }
    }
  }
 };
 Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const {
  C2AccessFence fence(access);
  resolve_address(access);
  return store_at_resolved(access, val);
 }
 Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const {
  C2AccessFence fence(access);
  resolve_address(access);
  return load_at_resolved(access, val_type);
 }
 MemNode::MemOrd C2Access::mem_node_mo() const {
  bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
  bool is_read = (_decorators & C2_READ_ACCESS) != 0;
  if ((_decorators & MO_SEQ_CST) != 0) {
    if (is_write && is_read) {
      // For atomic operations
      return MemNode::seqcst;
    } else if (is_write) {
      return MemNode::release;
    } else {
      assert(is_read, "what else?");
      return MemNode::acquire;
    }
  } else if ((_decorators & MO_RELEASE) != 0) {
    return MemNode::release;
  } else if ((_decorators & MO_ACQUIRE) != 0) {
    return MemNode::acquire;
  } else if (is_write) {
    // Volatile fields need releasing stores.
    // Non-volatile fields also need releasing stores if they hold an
    // object reference, because the object reference might point to
    // a freshly created object.
    // Conservatively release stores of object references.
    return StoreNode::release_if_reference(_type);
  } else {
    return MemNode::unordered;
  }
 }
 void C2Access::fixup_decorators() {
  bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0;
  bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo;
  bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
  bool is_read = (_decorators & C2_READ_ACCESS) != 0;
  bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
  if (AlwaysAtomicAccesses && is_unordered) {
    _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits
    _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess
  }
  _decorators = AccessInternal::decorator_fixup(_decorators);
  if (is_read && !is_write && anonymous) {
    // To be valid, unsafe loads may depend on other conditions than
    // the one that guards them: pin the Load node
    _decorators |= C2_CONTROL_DEPENDENT_LOAD;
    _decorators |= C2_PINNED_LOAD;
    const TypePtr* adr_type = _addr.type();
    Node* adr = _addr.node();
    if (!needs_cpu_membar() && adr_type->isa_instptr()) {
      assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null");
      intptr_t offset = Type::OffsetBot;
      AddPNode::Ideal_base_and_offset(adr, &_kit->gvn(), offset);
      if (offset >= 0) {
        int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->klass()->layout_helper());
        if (offset < s) {
          // Guaranteed to be a valid access, no need to pin it
          _decorators ^= C2_CONTROL_DEPENDENT_LOAD;
          _decorators ^= C2_PINNED_LOAD;
        }
      }
    }
  }
 }
 //--------------------------- atomic operations---------------------------------
 static void pin_atomic_op(C2AtomicAccess& access) {
  if (!access.needs_pinning()) {
    return;
  }
  // SCMemProjNodes represent the memory state of a LoadStore. Their
  // main role is to prevent LoadStore nodes from being optimized away
  // when their results aren't used.
  GraphKit* kit = access.kit();
  Node* load_store = access.raw_access();
  assert(load_store != NULL, "must pin atomic op");
  Node* proj = kit->gvn().transform(new SCMemProjNode(load_store));
  kit->set_memory(proj, access.alias_idx());
 }
 void C2AtomicAccess::set_memory() {
  Node *mem = _kit->memory(_alias_idx);
  _memory = mem;
 }
 Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val,
                                                   Node* new_val, const Type* value_type) const {
  GraphKit* kit = access.kit();
  MemNode::MemOrd mo = access.mem_node_mo();
  Node* mem = access.memory();
  Node* adr = access.addr().node();
  const TypePtr* adr_type = access.addr().type();
  Node* load_store = NULL;
  if (access.is_oop()) {
 #ifdef _LP64
    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
      Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
      Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
      load_store = kit->gvn().transform(new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo));
    } else
 #endif
    {
      load_store = kit->gvn().transform(new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo));
    }
  } else {
    switch (access.type()) {
      case T_BYTE: {
        load_store = kit->gvn().transform(new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
        break;
      }
      case T_SHORT: {
        load_store = kit->gvn().transform(new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
        break;
      }
      case T_INT: {
        load_store = kit->gvn().transform(new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
        break;
      }
      case T_LONG: {
        load_store = kit->gvn().transform(new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
        break;
      }
      default:
        ShouldNotReachHere();
    }
  }
  access.set_raw_access(load_store);
  pin_atomic_op(access);
 #ifdef _LP64
  if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
    return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
  }
 #endif
  return load_store;
 }
 Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val,
                                                    Node* new_val, const Type* value_type) const {
  GraphKit* kit = access.kit();
  DecoratorSet decorators = access.decorators();
  MemNode::MemOrd mo = access.mem_node_mo();
  Node* mem = access.memory();
  bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0;
  Node* load_store = NULL;
  Node* adr = access.addr().node();
  if (access.is_oop()) {
 #ifdef _LP64
    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
      Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
      Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
      if (is_weak_cas) {
        load_store = kit->gvn().transform(new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
      } else {
        load_store = kit->gvn().transform(new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
      }
    } else
 #endif
    {
      if (is_weak_cas) {
        load_store = kit->gvn().transform(new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
      } else {
        load_store = kit->gvn().transform(new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
      }
    }
  } else {
    switch(access.type()) {
      case T_BYTE: {
        if (is_weak_cas) {
          load_store = kit->gvn().transform(new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo));
        } else {
          load_store = kit->gvn().transform(new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo));
        }
        break;
      }
      case T_SHORT: {
        if (is_weak_cas) {
          load_store = kit->gvn().transform(new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo));
        } else {
          load_store = kit->gvn().transform(new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo));
        }
        break;
      }
      case T_INT: {
        if (is_weak_cas) {
          load_store = kit->gvn().transform(new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo));
        } else {
          load_store = kit->gvn().transform(new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo));
        }
        break;
      }
      case T_LONG: {
        if (is_weak_cas) {
          load_store = kit->gvn().transform(new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo));
        } else {
          load_store = kit->gvn().transform(new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo));
        }
        break;
      }
      default:
        ShouldNotReachHere();
    }
  }
  access.set_raw_access(load_store);
  pin_atomic_op(access);
  return load_store;
 }
 Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
  GraphKit* kit = access.kit();
  Node* mem = access.memory();
  Node* adr = access.addr().node();
  const TypePtr* adr_type = access.addr().type();
  Node* load_store = NULL;
  if (access.is_oop()) {
 #ifdef _LP64
    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
      Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
      load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
    } else
 #endif
    {
      load_store = kit->gvn().transform(new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()));
    }
  } else  {
    switch (access.type()) {
      case T_BYTE:
        load_store = kit->gvn().transform(new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type));
        break;
      case T_SHORT:
        load_store = kit->gvn().transform(new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type));
        break;
      case T_INT:
        load_store = kit->gvn().transform(new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type));
        break;
      case T_LONG:
        load_store = kit->gvn().transform(new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type));
        break;
      default:
        ShouldNotReachHere();
    }
  }
  access.set_raw_access(load_store);
  pin_atomic_op(access);
 #ifdef _LP64
  if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
    return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
  }
 #endif
  return load_store;
 }
 Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
  Node* load_store = NULL;
  GraphKit* kit = access.kit();
  Node* adr = access.addr().node();
  const TypePtr* adr_type = access.addr().type();
  Node* mem = access.memory();
  switch(access.type()) {
    case T_BYTE:
      load_store = kit->gvn().transform(new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type));
      break;
    case T_SHORT:
      load_store = kit->gvn().transform(new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type));
      break;
    case T_INT:
      load_store = kit->gvn().transform(new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type));
      break;
    case T_LONG:
      load_store = kit->gvn().transform(new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type));
      break;
    default:
      ShouldNotReachHere();
  }
  access.set_raw_access(load_store);
  pin_atomic_op(access);
  return load_store;
 }
 Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicAccess& access, Node* expected_val,
                                          Node* new_val, const Type* value_type) const {
  C2AccessFence fence(access);
  resolve_address(access);
  return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
 }
 Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicAccess& access, Node* expected_val,
                                           Node* new_val, const Type* value_type) const {
  C2AccessFence fence(access);
  resolve_address(access);
  return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
 }
 Node* BarrierSetC2::atomic_xchg_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
  C2AccessFence fence(access);
  resolve_address(access);
  return atomic_xchg_at_resolved(access, new_val, value_type);
 }
 Node* BarrierSetC2::atomic_add_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
  C2AccessFence fence(access);
  resolve_address(access);
  return atomic_add_at_resolved(access, new_val, value_type);
 }
 void BarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const {
  // Exclude the header but include array length to copy by 8 bytes words.
  // Can't use base_offset_in_bytes(bt) since basic type is unknown.
  int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
                            instanceOopDesc::base_offset_in_bytes();
  // base_off:
  // 8  - 32-bit VM
  // 12 - 64-bit VM, compressed klass
  // 16 - 64-bit VM, normal klass
  if (base_off % BytesPerLong != 0) {
    assert(UseCompressedClassPointers, "");
    if (is_array) {
      // Exclude length to copy by 8 bytes words.
      base_off += sizeof(int);
    } else {
      // Include klass to copy by 8 bytes words.
      base_off = instanceOopDesc::klass_offset_in_bytes();
    }
    assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
  }
  Node* src_base  = kit->basic_plus_adr(src,  base_off);
  Node* dst_base = kit->basic_plus_adr(dst, base_off);
  // Compute the length also, if needed:
  Node* countx = size;
  countx = kit->gvn().transform(new SubXNode(countx, kit->MakeConX(base_off)));
  countx = kit->gvn().transform(new URShiftXNode(countx, kit->intcon(LogBytesPerLong) ));
  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
  ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, NULL, dst_base, NULL, countx, false, false);
  ac->set_clonebasic();
  Node* n = kit->gvn().transform(ac);
  if (n == ac) {
    kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
  } else {
    kit->set_all_memory(n);
  }
 }
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
@ -0,0 +1,217 @@
 /*
 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #ifndef SHARE_GC_SHARED_C2_BARRIERSETC2_HPP
 #define SHARE_GC_SHARED_C2_BARRIERSETC2_HPP
 #include "memory/allocation.hpp"
 #include "oops/accessDecorators.hpp"
 #include "opto/loopnode.hpp"
 #include "opto/memnode.hpp"
 #include "utilities/globalDefinitions.hpp"
 // This means the access is mismatched. This means the value of an access
 // is not equivalent to the value pointed to by the address.
 const DecoratorSet C2_MISMATCHED             = DECORATOR_LAST << 1;
 // The access may not be aligned to its natural size.
 const DecoratorSet C2_UNALIGNED              = DECORATOR_LAST << 2;
 // The atomic cmpxchg is weak, meaning that spurious false negatives are allowed,
 // but never false positives.
 const DecoratorSet C2_WEAK_CMPXCHG           = DECORATOR_LAST << 3;
 // This denotes that a load has control dependency.
 const DecoratorSet C2_CONTROL_DEPENDENT_LOAD = DECORATOR_LAST << 4;
 // This denotes that a load that must be pinned.
 const DecoratorSet C2_PINNED_LOAD            = DECORATOR_LAST << 5;
 // This denotes that the access is produced from the sun.misc.Unsafe intrinsics.
 const DecoratorSet C2_UNSAFE_ACCESS          = DECORATOR_LAST << 6;
 // This denotes that the access mutates state.
 const DecoratorSet C2_WRITE_ACCESS           = DECORATOR_LAST << 7;
 // This denotes that the access reads state.
 const DecoratorSet C2_READ_ACCESS            = DECORATOR_LAST << 8;
 class GraphKit;
 class IdealKit;
 class Node;
 class Type;
 class TypePtr;
 class PhaseMacroExpand;
 // This class wraps a node and a type.
 class C2AccessValue: public StackObj {
 protected:
  Node* _node;
  const Type* _type;
 public:
  C2AccessValue(Node* node, const Type* type) :
    _node(node),
    _type(type) {}
  Node* node() const        { return _node; }
  const Type* type() const  { return _type; }
  void set_node(Node* node) { _node = node; }
 };
 // This class wraps a node and a pointer type.
 class C2AccessValuePtr: public C2AccessValue {
  int _alias_idx;
 public:
  C2AccessValuePtr(Node* node, const TypePtr* type) :
    C2AccessValue(node, reinterpret_cast<const Type*>(type)) {}
  const TypePtr* type() const { return reinterpret_cast<const TypePtr*>(_type); }
  int alias_idx() const       { return _alias_idx; }
 };
 // This class wraps a bunch of context parameters thare are passed around in the
 // BarrierSetC2 backend hierarchy, for loads and stores, to reduce boiler plate.
 class C2Access: public StackObj {
 protected:
  GraphKit*         _kit;
  DecoratorSet      _decorators;
  BasicType         _type;
  Node*             _base;
  C2AccessValuePtr& _addr;
  Node*             _raw_access;
  void fixup_decorators();
  void* barrier_set_state() const;
 public:
  C2Access(GraphKit* kit, DecoratorSet decorators,
           BasicType type, Node* base, C2AccessValuePtr& addr) :
    _kit(kit),
    _decorators(decorators),
    _type(type),
    _base(base),
    _addr(addr),
    _raw_access(NULL)
  {
    fixup_decorators();
  }
  GraphKit* kit() const           { return _kit; }
  DecoratorSet decorators() const { return _decorators; }
  Node* base() const              { return _base; }
  C2AccessValuePtr& addr() const  { return _addr; }
  BasicType type() const          { return _type; }
  bool is_oop() const             { return _type == T_OBJECT || _type == T_ARRAY; }
  bool is_raw() const             { return (_decorators & AS_RAW) != 0; }
  Node* raw_access() const        { return _raw_access; }
  void set_raw_access(Node* raw_access) { _raw_access = raw_access; }
  virtual void set_memory() {} // no-op for normal accesses, but not for atomic accesses.
  MemNode::MemOrd mem_node_mo() const;
  bool needs_cpu_membar() const;
  template <typename T>
  T barrier_set_state_as() const {
    return reinterpret_cast<T>(barrier_set_state());
  }
 };
 // This class wraps a bunch of context parameters thare are passed around in the
 // BarrierSetC2 backend hierarchy, for atomic accesses, to reduce boiler plate.
 class C2AtomicAccess: public C2Access {
  Node* _memory;
  uint  _alias_idx;
  bool  _needs_pinning;
 public:
  C2AtomicAccess(GraphKit* kit, DecoratorSet decorators, BasicType type,
                 Node* base, C2AccessValuePtr& addr, uint alias_idx) :
    C2Access(kit, decorators, type, base, addr),
    _memory(NULL),
    _alias_idx(alias_idx),
    _needs_pinning(true) {}
  // Set the memory node based on the current memory slice.
  virtual void set_memory();
  Node* memory() const       { return _memory; }
  uint alias_idx() const     { return _alias_idx; }
  bool needs_pinning() const { return _needs_pinning; }
  void set_needs_pinning(bool value)    { _needs_pinning = value; }
 };
 // This is the top-level class for the backend of the Access API in C2.
 // The top-level class is responsible for performing raw accesses. The
 // various GC barrier sets inherit from the BarrierSetC2 class to sprinkle
 // barriers into the accesses.
 class BarrierSetC2: public CHeapObj<mtGC> {
 protected:
  virtual void resolve_address(C2Access& access) const;
  virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const;
  virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
  virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val,
                                               Node* new_val, const Type* val_type) const;
  virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val,
                                                Node* new_val, const Type* value_type) const;
  virtual Node* atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* val_type) const;
  virtual Node* atomic_add_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* val_type) const;
 public:
  // This is the entry-point for the backend to perform accesses through the Access API.
  virtual Node* store_at(C2Access& access, C2AccessValue& val) const;
  virtual Node* load_at(C2Access& access, const Type* val_type) const;
  virtual Node* atomic_cmpxchg_val_at(C2AtomicAccess& access, Node* expected_val,
                                      Node* new_val, const Type* val_type) const;
  virtual Node* atomic_cmpxchg_bool_at(C2AtomicAccess& access, Node* expected_val,
                                       Node* new_val, const Type* val_type) const;
  virtual Node* atomic_xchg_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const;
  virtual Node* atomic_add_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const;
  virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const;
  // These are general helper methods used by C2
  virtual bool array_copy_requires_gc_barriers(BasicType type) const { return false; }
  // Support for GC barriers emitted during parsing
  virtual bool is_gc_barrier_node(Node* node) const { return false; }
  virtual Node* step_over_gc_barrier(Node* c) const { return c; }
  // Support for macro expanded GC barriers
  virtual void register_potential_barrier_node(Node* node) const { }
  virtual void unregister_potential_barrier_node(Node* node) const { }
  virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { }
  virtual void enqueue_useful_gc_barrier(Unique_Node_List &worklist, Node* node) const {}
  virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful) const {}
  virtual void add_users_to_worklist(Unique_Node_List* worklist) const {}
  // Allow barrier sets to have shared state that is preserved across a compilation unit.
  // This could for example comprise macro nodes to be expanded during macro expansion.
  virtual void* create_barrier_state(Arena* comp_arena) const { return NULL; }
  // If the BarrierSetC2 state has kept macro nodes in its compilation unit state to be
  // expanded later, then now is the time to do so.
  virtual bool expand_macro_nodes(PhaseMacroExpand* macro) const { return false; }
  virtual void verify_gc_barriers(bool post_parse) const {}
 };
 #endif // SHARE_GC_SHARED_C2_BARRIERSETC2_HPP
--- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
@ -0,0 +1,191 @@
 /*
 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #include "precompiled.hpp"
 #include "ci/ciUtilities.hpp"
 #include "gc/shared/cardTable.hpp"
 #include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/c2/cardTableBarrierSetC2.hpp"
 #include "opto/arraycopynode.hpp"
 #include "opto/graphKit.hpp"
 #include "opto/idealKit.hpp"
 #include "opto/macro.hpp"
 #include "utilities/macros.hpp"
 #define __ ideal.
 Node* CardTableBarrierSetC2::byte_map_base_node(GraphKit* kit) const {
  // Get base of card map
  jbyte* card_table_base = ci_card_table_address();
   if (card_table_base != NULL) {
     return kit->makecon(TypeRawPtr::make((address)card_table_base));
   } else {
     return kit->null();
   }
 }
 // vanilla/CMS post barrier
 // Insert a write-barrier store.  This is to let generational GC work; we have
 // to flag all oop-stores before the next GC point.
 void CardTableBarrierSetC2::post_barrier(GraphKit* kit,
                                         Node* ctl,
                                         Node* oop_store,
                                         Node* obj,
                                         Node* adr,
                                         uint  adr_idx,
                                         Node* val,
                                         BasicType bt,
                                         bool use_precise) const {
  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
  CardTable* ct = ctbs->card_table();
  // No store check needed if we're storing a NULL or an old object
  // (latter case is probably a string constant). The concurrent
  // mark sweep garbage collector, however, needs to have all nonNull
  // oop updates flagged via card-marks.
  if (val != NULL && val->is_Con()) {
    // must be either an oop or NULL
    const Type* t = val->bottom_type();
    if (t == TypePtr::NULL_PTR || t == Type::TOP)
      // stores of null never (?) need barriers
      return;
  }
  if (use_ReduceInitialCardMarks()
      && obj == kit->just_allocated_object(kit->control())) {
    // We can skip marks on a freshly-allocated object in Eden.
    // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp.
    // That routine informs GC to take appropriate compensating steps,
    // upon a slow-path allocation, so as to make this card-mark
    // elision safe.
    return;
  }
  if (!use_precise) {
    // All card marks for a (non-array) instance are in one place:
    adr = obj;
  }
  // (Else it's an array (or unknown), and we want more precise card marks.)
  assert(adr != NULL, "");
  IdealKit ideal(kit, true);
  // Convert the pointer to an int prior to doing math on it
  Node* cast = __ CastPX(__ ctrl(), adr);
  // Divide by card size
  Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) );
  // Combine card table base and card offset
  Node* card_adr = __ AddP(__ top(), byte_map_base_node(kit), card_offset );
  // Get the alias_index for raw card-mark memory
  int adr_type = Compile::AliasIdxRaw;
  Node*   zero = __ ConI(0); // Dirty card value
  if (UseCondCardMark) {
    if (ct->scanned_concurrently()) {
      kit->insert_mem_bar(Op_MemBarVolatile, oop_store);
      __ sync_kit(kit);
    }
    // The classic GC reference write barrier is typically implemented
    // as a store into the global card mark table.  Unfortunately
    // unconditional stores can result in false sharing and excessive
    // coherence traffic as well as false transactional aborts.
    // UseCondCardMark enables MP "polite" conditional card mark
    // stores.  In theory we could relax the load from ctrl() to
    // no_ctrl, but that doesn't buy much latitude.
    Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, T_BYTE, adr_type);
    __ if_then(card_val, BoolTest::ne, zero);
  }
  // Smash zero into card
  if(!ct->scanned_concurrently()) {
    __ store(__ ctrl(), card_adr, zero, T_BYTE, adr_type, MemNode::unordered);
  } else {
    // Specialized path for CM store barrier
    __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, T_BYTE, adr_type);
  }
  if (UseCondCardMark) {
    __ end_if();
  }
  // Final sync IdealKit and GraphKit.
  kit->final_sync(ideal);
 }
 void CardTableBarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const {
  BarrierSetC2::clone(kit, src, dst, size, is_array);
  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
  // If necessary, emit some card marks afterwards.  (Non-arrays only.)
  bool card_mark = !is_array && !use_ReduceInitialCardMarks();
  if (card_mark) {
    assert(!is_array, "");
    // Put in store barrier for any and all oops we are sticking
    // into this object.  (We could avoid this if we could prove
    // that the object type contains no oop fields at all.)
    Node* no_particular_value = NULL;
    Node* no_particular_field = NULL;
    int raw_adr_idx = Compile::AliasIdxRaw;
    post_barrier(kit, kit->control(),
                 kit->memory(raw_adr_type),
                 dst,
                 no_particular_field,
                 raw_adr_idx,
                 no_particular_value,
                 T_OBJECT,
                 false);
  }
 }
 bool CardTableBarrierSetC2::use_ReduceInitialCardMarks() const {
  return ReduceInitialCardMarks;
 }
 bool CardTableBarrierSetC2::is_gc_barrier_node(Node* node) const {
  return ModRefBarrierSetC2::is_gc_barrier_node(node) || node->Opcode() == Op_StoreCM;
 }
 void CardTableBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
  assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
  Node *shift = node->unique_out();
  Node *addp = shift->unique_out();
  for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
    Node *mem = addp->last_out(j);
    if (UseCondCardMark && mem->is_Load()) {
      assert(mem->Opcode() == Op_LoadB, "unexpected code shape");
      // The load is checking if the card has been written so
      // replace it with zero to fold the test.
      macro->replace_node(mem, macro->intcon(0));
      continue;
    }
    assert(mem->is_Store(), "store required");
    macro->replace_node(mem, mem->in(MemNode::Memory));
  }
 }
 bool CardTableBarrierSetC2::array_copy_requires_gc_barriers(BasicType type) const {
  return !use_ReduceInitialCardMarks();
 }
--- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp
@ -0,0 +1,53 @@
 /*
 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #ifndef SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP
 #define SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP
 #include "gc/shared/c2/modRefBarrierSetC2.hpp"
 class CardTableBarrierSetC2: public ModRefBarrierSetC2 {
 protected:
  virtual void post_barrier(GraphKit* kit,
                            Node* ctl,
                            Node* store,
                            Node* obj,
                            Node* adr,
                            uint adr_idx,
                            Node* val,
                            BasicType bt,
                            bool use_precise) const;
  Node* byte_map_base_node(GraphKit* kit) const;
 public:
  virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const;
  virtual bool is_gc_barrier_node(Node* node) const;
  virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
  virtual bool array_copy_requires_gc_barriers(BasicType type) const;
  bool use_ReduceInitialCardMarks() const;
 };
 #endif // SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP
--- a/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.cpp
@ -0,0 +1,135 @@
 /*
 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #include "precompiled.hpp"
 #include "opto/arraycopynode.hpp"
 #include "opto/graphKit.hpp"
 #include "opto/idealKit.hpp"
 #include "opto/narrowptrnode.hpp"
 #include "gc/shared/c2/modRefBarrierSetC2.hpp"
 #include "utilities/macros.hpp"
 Node* ModRefBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
  DecoratorSet decorators = access.decorators();
  GraphKit* kit = access.kit();
  const TypePtr* adr_type = access.addr().type();
  Node* adr = access.addr().node();
  bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
  bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
  bool on_heap = (decorators & IN_HEAP) != 0;
  bool use_precise = on_array || anonymous;
  if (!access.is_oop() || (!on_heap && !anonymous)) {
    return BarrierSetC2::store_at_resolved(access, val);
  }
  uint adr_idx = kit->C->get_alias_index(adr_type);
  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
  pre_barrier(kit, true /* do_load */, kit->control(), access.base(), adr, adr_idx, val.node(),
              static_cast<const TypeOopPtr*>(val.type()), NULL /* pre_val */, access.type());
  Node* store = BarrierSetC2::store_at_resolved(access, val);
  post_barrier(kit, kit->control(), access.raw_access(), access.base(), adr, adr_idx, val.node(),
               access.type(), use_precise);
  return store;
 }
 Node* ModRefBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val,
                                                         Node* new_val, const Type* value_type) const {
  GraphKit* kit = access.kit();
  if (!access.is_oop()) {
    return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
  }
  pre_barrier(kit, false /* do_load */,
              kit->control(), NULL, NULL, max_juint, NULL, NULL,
              expected_val /* pre_val */, T_OBJECT);
  Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
  post_barrier(kit, kit->control(), access.raw_access(), access.base(),
               access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true);
  return result;
 }
 Node* ModRefBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val,
                                                          Node* new_val, const Type* value_type) const {
  GraphKit* kit = access.kit();
  if (!access.is_oop()) {
    return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
  }
  pre_barrier(kit, false /* do_load */,
              kit->control(), NULL, NULL, max_juint, NULL, NULL,
              expected_val /* pre_val */, T_OBJECT);
  Node* load_store = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
  // Emit the post barrier only when the actual store happened. This makes sense
  // to check only for LS_cmp_* that can fail to set the value.
  // LS_cmp_exchange does not produce any branches by default, so there is no
  // boolean result to piggyback on. TODO: When we merge CompareAndSwap with
  // CompareAndExchange and move branches here, it would make sense to conditionalize
  // post_barriers for LS_cmp_exchange as well.
  //
  // CAS success path is marked more likely since we anticipate this is a performance
  // critical path, while CAS failure path can use the penalty for going through unlikely
  // path as backoff. Which is still better than doing a store barrier there.
  IdealKit ideal(kit);
  ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); {
    kit->sync_kit(ideal);
    post_barrier(kit, ideal.ctrl(), access.raw_access(), access.base(),
                 access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true);
    ideal.sync_kit(kit);
  } ideal.end_if();
  kit->final_sync(ideal);
  return load_store;
 }
 Node* ModRefBarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
  GraphKit* kit = access.kit();
  Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type);
  if (!access.is_oop()) {
    return result;
  }
  // Don't need to load pre_val. The old value is returned by load_store.
  // The pre_barrier can execute after the xchg as long as no safepoint
  // gets inserted between them.
  pre_barrier(kit, false /* do_load */,
              kit->control(), NULL, NULL, max_juint, NULL, NULL,
              result /* pre_val */, T_OBJECT);
  post_barrier(kit, kit->control(), access.raw_access(), access.base(), access.addr().node(),
               access.alias_idx(), new_val, T_OBJECT, true);
  return result;
 }
--- a/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.hpp
@ -0,0 +1,64 @@
 /*
 * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */
 #ifndef SHARE_GC_SHARED_C2_MODREFBARRIERSETC2_HPP
 #define SHARE_GC_SHARED_C2_MODREFBARRIERSETC2_HPP
 #include "gc/shared/c2/barrierSetC2.hpp"
 class TypeOopPtr;
 class ModRefBarrierSetC2: public BarrierSetC2 {
 protected:
  virtual void pre_barrier(GraphKit* kit,
                           bool do_load,
                           Node* ctl,
                           Node* obj,
                           Node* adr,
                           uint adr_idx,
                           Node* val,
                           const TypeOopPtr* val_type,
                           Node* pre_val,
                           BasicType bt) const {}
  virtual void post_barrier(GraphKit* kit,
                            Node* ctl,
                            Node* store,
                            Node* obj,
                            Node* adr,
                            uint adr_idx,
                            Node* val,
                            BasicType bt,
                            bool use_precise) const {}
  virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const;
  virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val,
                                               Node* new_val, const Type* value_type) const;
  virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val,
                                                Node* new_val, const Type* value_type) const;
  virtual Node* atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const;
 };
 #endif // SHARE_GC_SHARED_C2_MODREFBARRIERSETC2_HPP
--- a/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp
+++ b/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp
@ -37,8 +37,12 @@
 #ifdef COMPILER1
 #include "gc/shared/c1/cardTableBarrierSetC1.hpp"
 #endif
 #ifdef COMPILER2
 #include "gc/shared/c2/cardTableBarrierSetC2.hpp"
 #endif
 class CardTableBarrierSetC1;
 class CardTableBarrierSetC2;
 // This kind of "BarrierSet" allows a "CollectedHeap" to detect and
 // enumerate ref fields that have been modified (since the last
@ -46,10 +50,12 @@ class CardTableBarrierSetC1;
 CardTableBarrierSet::CardTableBarrierSet(BarrierSetAssembler* barrier_set_assembler,
                                         BarrierSetC1* barrier_set_c1,
                                         BarrierSetC2* barrier_set_c2,
                                         CardTable* card_table,
                                         const BarrierSet::FakeRtti& fake_rtti) :
  ModRefBarrierSet(barrier_set_assembler,
                   barrier_set_c1,
                   barrier_set_c2,
                   fake_rtti.add_tag(BarrierSet::CardTableBarrierSet)),
  _defer_initial_card_mark(false),
  _card_table(card_table)
@ -58,6 +64,7 @@ CardTableBarrierSet::CardTableBarrierSet(BarrierSetAssembler* barrier_set_assemb
 CardTableBarrierSet::CardTableBarrierSet(CardTable* card_table) :
  ModRefBarrierSet(make_barrier_set_assembler<CardTableBarrierSetAssembler>(),
                   make_barrier_set_c1<CardTableBarrierSetC1>(),
                   make_barrier_set_c2<CardTableBarrierSetC2>(),
                   BarrierSet::FakeRtti(BarrierSet::CardTableBarrierSet)),
  _defer_initial_card_mark(false),
  _card_table(card_table)
@ -155,7 +162,7 @@ void CardTableBarrierSet::initialize_deferred_card_mark_barriers() {
  // Used for ReduceInitialCardMarks (when COMPILER2 or JVMCI is used);
  // otherwise remains unused.
 #if COMPILER2_OR_JVMCI
-  _defer_initial_card_mark = is_server_compilation_mode_vm() && ReduceInitialCardMarks && can_elide_tlab_store_barriers()
+  _defer_initial_card_mark = is_server_compilation_mode_vm() && ReduceInitialCardMarks
                             && (DeferInitialCardMark || card_mark_must_follow_store());
 #else
  assert(_defer_initial_card_mark == false, "Who would set it?");
--- a/src/hotspot/share/gc/shared/cardTableBarrierSet.hpp
+++ b/src/hotspot/share/gc/shared/cardTableBarrierSet.hpp
@ -54,6 +54,7 @@ class CardTableBarrierSet: public ModRefBarrierSet {
  CardTableBarrierSet(BarrierSetAssembler* barrier_set_assembler,
                      BarrierSetC1* barrier_set_c1,
                      BarrierSetC2* barrier_set_c2,
                      CardTable* card_table,
                      const BarrierSet::FakeRtti& fake_rtti);
@ -89,23 +90,6 @@ class CardTableBarrierSet: public ModRefBarrierSet {
  // remembered set.
  void flush_deferred_card_mark_barrier(JavaThread* thread);
  // Can a compiler initialize a new object without store barriers?
  // This permission only extends from the creation of a new object
  // via a TLAB up to the first subsequent safepoint. If such permission
  // is granted for this heap type, the compiler promises to call
  // defer_store_barrier() below on any slow path allocation of
  // a new object for which such initializing store barriers will
  // have been elided. G1, like CMS, allows this, but should be
  // ready to provide a compensating write barrier as necessary
  // if that storage came out of a non-young region. The efficiency
  // of this implementation depends crucially on being able to
  // answer very efficiently in constant time whether a piece of
  // storage in the heap comes from a young region or not.
  // See ReduceInitialCardMarks.
  virtual bool can_elide_tlab_store_barriers() const {
    return true;
  }
  // If a compiler is eliding store barriers for TLAB-allocated objects,
  // we will be informed of a slow-path allocation by a call
  // to on_slowpath_allocation_exit() below. Such a call precedes the
--- a/src/hotspot/share/gc/shared/modRefBarrierSet.hpp
+++ b/src/hotspot/share/gc/shared/modRefBarrierSet.hpp
@ -34,9 +34,11 @@ class ModRefBarrierSet: public BarrierSet {
 protected:
  ModRefBarrierSet(BarrierSetAssembler* barrier_set_assembler,
                   BarrierSetC1* barrier_set_c1,
                   BarrierSetC2* barrier_set_c2,
                   const BarrierSet::FakeRtti& fake_rtti)
    : BarrierSet(barrier_set_assembler,
                 barrier_set_c1,
                 barrier_set_c2,
                 fake_rtti.add_tag(BarrierSet::ModRef)) { }
  ~ModRefBarrierSet() { }
--- a/src/hotspot/share/opto/arraycopynode.cpp
+++ b/src/hotspot/share/opto/arraycopynode.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -23,9 +23,13 @@
 */
 #include "precompiled.hpp"
 #include "gc/shared/barrierSet.hpp"
 #include "gc/shared/c2/barrierSetC2.hpp"
 #include "gc/shared/c2/cardTableBarrierSetC2.hpp"
 #include "opto/arraycopynode.hpp"
 #include "opto/graphKit.hpp"
 #include "runtime/sharedRuntime.hpp"
 #include "utilities/macros.hpp"
 ArrayCopyNode::ArrayCopyNode(Compile* C, bool alloc_tightly_coupled, bool has_negative_length_guard)
  : CallNode(arraycopy_type(), NULL, TypeRawPtr::BOTTOM),
@ -252,7 +256,9 @@ bool ArrayCopyNode::prepare_array_copy(PhaseGVN *phase, bool can_reshape,
      return false;
    }
-    if (dest_elem == T_OBJECT && (!is_alloc_tightly_coupled() || !GraphKit::use_ReduceInitialCardMarks())) {
+    BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
    if (dest_elem == T_OBJECT && (!is_alloc_tightly_coupled() ||
                                  bs->array_copy_requires_gc_barriers(T_OBJECT))) {
      // It's an object array copy but we can't emit the card marking
      // that is needed
      return false;
@ -434,9 +440,10 @@ bool ArrayCopyNode::finish_transform(PhaseGVN *phase, bool can_reshape,
    if (is_clonebasic()) {
      Node* out_mem = proj_out(TypeFunc::Memory);
      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
      if (out_mem->outcnt() != 1 || !out_mem->raw_out(0)->is_MergeMem() ||
          out_mem->raw_out(0)->outcnt() != 1 || !out_mem->raw_out(0)->raw_out(0)->is_MemBar()) {
-        assert(!GraphKit::use_ReduceInitialCardMarks(), "can only happen with card marking");
+        assert(bs->array_copy_requires_gc_barriers(T_OBJECT), "can only happen with card marking");
        return false;
      }
@ -643,49 +650,13 @@ bool ArrayCopyNode::may_modify_helper(const TypeOopPtr *t_oop, Node* n, PhaseTra
  return false;
 }
 static Node* step_over_gc_barrier(Node* c) {
 #if INCLUDE_G1GC
  if (UseG1GC && !GraphKit::use_ReduceInitialCardMarks() &&
      c != NULL && c->is_Region() && c->req() == 3) {
    for (uint i = 1; i < c->req(); i++) {
      if (c->in(i) != NULL && c->in(i)->is_Region() &&
          c->in(i)->req() == 3) {
        Node* r = c->in(i);
        for (uint j = 1; j < r->req(); j++) {
          if (r->in(j) != NULL && r->in(j)->is_Proj() &&
              r->in(j)->in(0) != NULL &&
              r->in(j)->in(0)->Opcode() == Op_CallLeaf &&
              r->in(j)->in(0)->as_Call()->entry_point() == CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post)) {
            Node* call = r->in(j)->in(0);
            c = c->in(i == 1 ? 2 : 1);
            if (c != NULL) {
              c = c->in(0);
              if (c != NULL) {
                c = c->in(0);
                assert(call->in(0) == NULL ||
                       call->in(0)->in(0) == NULL ||
                       call->in(0)->in(0)->in(0) == NULL ||
                       call->in(0)->in(0)->in(0)->in(0) == NULL ||
                       call->in(0)->in(0)->in(0)->in(0)->in(0) == NULL ||
                       c == call->in(0)->in(0)->in(0)->in(0)->in(0), "bad barrier shape");
                return c;
              }
            }
          }
        }
      }
    }
  }
 #endif // INCLUDE_G1GC
  return c;
 }
 bool ArrayCopyNode::may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase, ArrayCopyNode*& ac) {
  Node* c = mb->in(0);
-  // step over g1 gc barrier if we're at a clone with ReduceInitialCardMarks off
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
-  c = step_over_gc_barrier(c);
+  // step over g1 gc barrier if we're at e.g. a clone with ReduceInitialCardMarks off
  c = bs->step_over_gc_barrier(c);
  CallNode* call = NULL;
  if (c != NULL && c->is_Region()) {
@ -701,7 +672,11 @@ bool ArrayCopyNode::may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTra
    }
  } else if (may_modify_helper(t_oop, c->in(0), phase, call)) {
    ac = call->isa_ArrayCopy();
-    assert(c == mb->in(0) || (ac != NULL && ac->is_clonebasic() && !GraphKit::use_ReduceInitialCardMarks()), "only for clone");
+#ifdef ASSERT
    bool use_ReduceInitialCardMarks = BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
      static_cast<CardTableBarrierSetC2*>(bs)->use_ReduceInitialCardMarks();
    assert(c == mb->in(0) || (ac != NULL && ac->is_clonebasic() && !use_ReduceInitialCardMarks), "only for clone");
 #endif
    return true;
  }
@ -749,4 +724,3 @@ bool ArrayCopyNode::modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransf
  }
  return false;
 }
--- a/src/hotspot/share/opto/compile.cpp
+++ b/src/hotspot/share/opto/compile.cpp
@ -33,6 +33,8 @@
 #include "compiler/compileLog.hpp"
 #include "compiler/disassembler.hpp"
 #include "compiler/oopMap.hpp"
 #include "gc/shared/barrierSet.hpp"
 #include "gc/shared/c2/barrierSetC2.hpp"
 #include "memory/resourceArea.hpp"
 #include "opto/addnode.hpp"
 #include "opto/block.hpp"
@ -414,6 +416,8 @@ void Compile::remove_useless_nodes(Unique_Node_List &useful) {
      remove_opaque4_node(opaq);
    }
  }
  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
  bs->eliminate_useless_gc_barriers(useful);
  // clean up the late inline lists
  remove_useless_late_inlines(&_string_late_inlines, useful);
  remove_useless_late_inlines(&_boxing_late_inlines, useful);
@ -637,6 +641,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
                  _stub_function(NULL),
                  _stub_entry_point(NULL),
                  _method(target),
                  _barrier_set_state(BarrierSet::barrier_set()->barrier_set_c2()->create_barrier_state(comp_arena())),
                  _entry_bci(osr_bci),
                  _initial_gvn(NULL),
                  _for_igvn(NULL),
@ -772,17 +777,12 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
      StartNode* s = new StartNode(root(), tf()->domain());
      initial_gvn()->set_type_bottom(s);
      init_start(s);
-      if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) {
+      if (method()->intrinsic_id() == vmIntrinsics::_Reference_get) {
        // With java.lang.ref.reference.get() we must go through the
-        // intrinsic when G1 is enabled - even when get() is the root
+        // intrinsic - even when get() is the root
        // method of the compile - so that, if necessary, the value in
        // the referent field of the reference object gets recorded by
        // the pre-barrier code.
        // Specifically, if G1 is enabled, the value in the referent
        // field is recorded by the G1 SATB pre barrier. This will
        // result in the referent being marked live and the reference
        // object removed from the list of discovered references during
        // reference processing.
        cg = find_intrinsic(method(), false);
      }
      if (cg == NULL) {
@ -2334,6 +2334,9 @@ void Compile::Optimize() {
      if (failing())  return;
    }
  }
  if (failing())  return;
  // Ensure that major progress is now clear
  C->clear_major_progress();
@ -2350,6 +2353,11 @@ void Compile::Optimize() {
    igvn.optimize();
  }
 #ifdef ASSERT
  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
  bs->verify_gc_barriers(false);
 #endif
  {
    TracePhase tp("macroExpand", &timers[_t_macroExpand]);
    PhaseMacroExpand  mex(igvn);
--- a/src/hotspot/share/opto/compile.hpp
+++ b/src/hotspot/share/opto/compile.hpp
@ -359,6 +359,9 @@ class Compile : public Phase {
  const char*           _stub_name;             // Name of stub or adapter being compiled, or NULL
  address               _stub_entry_point;      // Compile code entry for generated stub, or NULL
  // For GC
  void*                 _barrier_set_state;
  // Control of this compilation.
  int                   _num_loop_opts;         // Number of iterations for doing loop optimiztions
  int                   _max_inline_size;       // Max inline size for this compilation
@ -530,6 +533,8 @@ class Compile : public Phase {
 public:
  void* barrier_set_state() const { return _barrier_set_state; }
  outputStream* print_inlining_stream() const {
    assert(print_inlining() || print_intrinsics(), "PrintInlining off?");
    return _print_inlining_stream;
@ -1349,7 +1354,6 @@ class Compile : public Phase {
  // supporting clone_map
  CloneMap&     clone_map();
  void          set_clone_map(Dict* d);
 };
 #endif // SHARE_VM_OPTO_COMPILE_HPP
--- a/src/hotspot/share/opto/escape.cpp
+++ b/src/hotspot/share/opto/escape.cpp
@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "ci/bcEscapeAnalyzer.hpp"
 #include "compiler/compileLog.hpp"
 #include "gc/shared/c2/barrierSetC2.hpp"
 #include "libadt/vectset.hpp"
 #include "memory/allocation.hpp"
 #include "memory/resourceArea.hpp"
@ -980,10 +981,9 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
                                       arg_has_oops && (i > TypeFunc::Parms);
 #ifdef ASSERT
          if (!(is_arraycopy ||
                BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(call) ||
                (call->as_CallLeaf()->_name != NULL &&
-                 (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
+                 (strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32C") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "updateBytesAdler32") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
@ -3285,9 +3285,7 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist,
            (op == Op_StrCompressedCopy || op == Op_StrInflatedCopy)) {
          // They overwrite memory edge corresponding to destination array,
          memnode_worklist.append_if_missing(use);
-        } else if (!(op == Op_StoreCM ||
+        } else if (!(BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(use) ||
              (op == Op_CallLeaf && use->as_CallLeaf()->_name != NULL &&
               strcmp(use->as_CallLeaf()->_name, "g1_wb_pre") == 0) ||
              op == Op_AryEq || op == Op_StrComp || op == Op_HasNegatives ||
              op == Op_StrCompressedCopy || op == Op_StrInflatedCopy ||
              op == Op_StrEquals || op == Op_StrIndexOf || op == Op_StrIndexOfChar)) {
--- a/src/hotspot/share/opto/graphKit.cpp
+++ b/src/hotspot/share/opto/graphKit.cpp
@ -26,9 +26,7 @@
 #include "ci/ciUtilities.hpp"
 #include "compiler/compileLog.hpp"
 #include "gc/shared/barrierSet.hpp"
-#include "gc/shared/cardTable.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
 #include "gc/shared/cardTableBarrierSet.hpp"
 #include "gc/shared/collectedHeap.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/resourceArea.hpp"
 #include "opto/addnode.hpp"
@ -45,18 +43,14 @@
 #include "opto/runtime.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/sharedRuntime.hpp"
 #if INCLUDE_G1GC
 #include "gc/g1/g1CardTable.hpp"
 #include "gc/g1/g1ThreadLocalData.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_G1GC
 //----------------------------GraphKit-----------------------------------------
 // Main utility constructor.
 GraphKit::GraphKit(JVMState* jvms)
  : Phase(Phase::Parser),
    _env(C->env()),
-    _gvn(*C->initial_gvn())
+    _gvn(*C->initial_gvn()),
    _barrier_set(BarrierSet::barrier_set()->barrier_set_c2())
 {
  _exceptions = jvms->map()->next_exception();
  if (_exceptions != NULL)  jvms->map()->set_next_exception(NULL);
@ -67,7 +61,8 @@ GraphKit::GraphKit(JVMState* jvms)
 GraphKit::GraphKit()
  : Phase(Phase::Parser),
    _env(C->env()),
-    _gvn(*C->initial_gvn())
+    _gvn(*C->initial_gvn()),
    _barrier_set(BarrierSet::barrier_set()->barrier_set_c2())
 {
  _exceptions = NULL;
  set_map(NULL);
@ -610,8 +605,7 @@ void GraphKit::builtin_throw(Deoptimization::DeoptReason reason, Node* arg) {
      Node *adr = basic_plus_adr(ex_node, ex_node, offset);
      const TypeOopPtr* val_type = TypeOopPtr::make_from_klass(env()->String_klass());
-      // Conservatively release stores of object references.
+      Node *store = access_store_at(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT, IN_HEAP);
      Node *store = store_oop_to_object(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT, MemNode::release);
      add_exception_state(make_exception_state(ex_node));
      return;
@ -1550,145 +1544,142 @@ Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt,
  return st;
 }
-
+Node* GraphKit::access_store_at(Node* ctl,
-void GraphKit::pre_barrier(bool do_load,
+                                Node* obj,
-                           Node* ctl,
+                                Node* adr,
-                           Node* obj,
+                                const TypePtr* adr_type,
-                           Node* adr,
+                                Node* val,
-                           uint  adr_idx,
+                                const Type* val_type,
-                           Node* val,
+                                BasicType bt,
-                           const TypeOopPtr* val_type,
+                                DecoratorSet decorators) {
                           Node* pre_val,
                           BasicType bt) {
  BarrierSet* bs = BarrierSet::barrier_set();
  set_control(ctl);
  switch (bs->kind()) {
 #if INCLUDE_G1GC
    case BarrierSet::G1BarrierSet:
      g1_write_barrier_pre(do_load, obj, adr, adr_idx, val, val_type, pre_val, bt);
      break;
 #endif
    case BarrierSet::CardTableBarrierSet:
      break;
    default      :
      ShouldNotReachHere();
  }
 }
 bool GraphKit::can_move_pre_barrier() const {
  BarrierSet* bs = BarrierSet::barrier_set();
  switch (bs->kind()) {
 #if INCLUDE_G1GC
    case BarrierSet::G1BarrierSet:
      return true; // Can move it if no safepoint
 #endif
    case BarrierSet::CardTableBarrierSet:
      return true; // There is no pre-barrier
    default      :
      ShouldNotReachHere();
  }
  return false;
 }
 void GraphKit::post_barrier(Node* ctl,
                            Node* store,
                            Node* obj,
                            Node* adr,
                            uint  adr_idx,
                            Node* val,
                            BasicType bt,
                            bool use_precise) {
  BarrierSet* bs = BarrierSet::barrier_set();
  set_control(ctl);
  switch (bs->kind()) {
 #if INCLUDE_G1GC
    case BarrierSet::G1BarrierSet:
      g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
      break;
 #endif
    case BarrierSet::CardTableBarrierSet:
      write_barrier_post(store, obj, adr, adr_idx, val, use_precise);
      break;
    default      :
      ShouldNotReachHere();
  }
 }
 Node* GraphKit::store_oop(Node* ctl,
                          Node* obj,
                          Node* adr,
                          const TypePtr* adr_type,
                          Node* val,
                          const TypeOopPtr* val_type,
                          BasicType bt,
                          bool use_precise,
                          MemNode::MemOrd mo,
                          bool mismatched) {
  // Transformation of a value which could be NULL pointer (CastPP #NULL)
  // could be delayed during Parse (for example, in adjust_map_after_if()).
  // Execute transformation here to avoid barrier generation in such case.
-  if (_gvn.type(val) == TypePtr::NULL_PTR)
+  if (_gvn.type(val) == TypePtr::NULL_PTR) {
    val = _gvn.makecon(TypePtr::NULL_PTR);
  }
  set_control(ctl);
-  if (stopped()) return top(); // Dead path ?
+  if (stopped()) {
    return top(); // Dead path ?
  }
  assert(bt == T_OBJECT, "sanity");
  assert(val != NULL, "not dead path");
  uint adr_idx = C->get_alias_index(adr_type);
  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
-  pre_barrier(true /* do_load */,
+  C2AccessValuePtr addr(adr, adr_type);
-              control(), obj, adr, adr_idx, val, val_type,
+  C2AccessValue value(val, val_type);
-              NULL /* pre_val */,
+  C2Access access(this, decorators | C2_WRITE_ACCESS, bt, obj, addr);
-              bt);
+  if (access.is_raw()) {
-
+    return _barrier_set->BarrierSetC2::store_at(access, value);
-  Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo, mismatched);
+  } else {
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise);
+    return _barrier_set->store_at(access, value);
-  return store;
+  }
 }
-// Could be an array or object we don't know at compile time (unsafe ref.)
+Node* GraphKit::access_load_at(Node* obj,   // containing obj
-Node* GraphKit::store_oop_to_unknown(Node* ctl,
+                               Node* adr,   // actual adress to store val at
-                             Node* obj,   // containing obj
+                               const TypePtr* adr_type,
-                             Node* adr,  // actual adress to store val at
+                               const Type* val_type,
-                             const TypePtr* adr_type,
+                               BasicType bt,
-                             Node* val,
+                               DecoratorSet decorators) {
-                             BasicType bt,
+  if (stopped()) {
-                             MemNode::MemOrd mo,
+    return top(); // Dead path ?
                             bool mismatched) {
  Compile::AliasType* at = C->alias_type(adr_type);
  const TypeOopPtr* val_type = NULL;
  if (adr_type->isa_instptr()) {
    if (at->field() != NULL) {
      // known field.  This code is a copy of the do_put_xxx logic.
      ciField* field = at->field();
      if (!field->type()->is_loaded()) {
        val_type = TypeInstPtr::BOTTOM;
      } else {
        val_type = TypeOopPtr::make_from_klass(field->type()->as_klass());
      }
    }
  } else if (adr_type->isa_aryptr()) {
    val_type = adr_type->is_aryptr()->elem()->make_oopptr();
  }
-  if (val_type == NULL) {
+
-    val_type = TypeInstPtr::BOTTOM;
+  C2AccessValuePtr addr(adr, adr_type);
  C2Access access(this, decorators | C2_READ_ACCESS, bt, obj, addr);
  if (access.is_raw()) {
    return _barrier_set->BarrierSetC2::load_at(access, val_type);
  } else {
    return _barrier_set->load_at(access, val_type);
  }
  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo, mismatched);
 }
 Node* GraphKit::access_atomic_cmpxchg_val_at(Node* ctl,
                                             Node* obj,
                                             Node* adr,
                                             const TypePtr* adr_type,
                                             int alias_idx,
                                             Node* expected_val,
                                             Node* new_val,
                                             const Type* value_type,
                                             BasicType bt,
                                             DecoratorSet decorators) {
  set_control(ctl);
  C2AccessValuePtr addr(adr, adr_type);
  C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS,
                        bt, obj, addr, alias_idx);
  if (access.is_raw()) {
    return _barrier_set->BarrierSetC2::atomic_cmpxchg_val_at(access, expected_val, new_val, value_type);
  } else {
    return _barrier_set->atomic_cmpxchg_val_at(access, expected_val, new_val, value_type);
  }
 }
 Node* GraphKit::access_atomic_cmpxchg_bool_at(Node* ctl,
                                              Node* obj,
                                              Node* adr,
                                              const TypePtr* adr_type,
                                              int alias_idx,
                                              Node* expected_val,
                                              Node* new_val,
                                              const Type* value_type,
                                              BasicType bt,
                                              DecoratorSet decorators) {
  set_control(ctl);
  C2AccessValuePtr addr(adr, adr_type);
  C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS,
                        bt, obj, addr, alias_idx);
  if (access.is_raw()) {
    return _barrier_set->BarrierSetC2::atomic_cmpxchg_bool_at(access, expected_val, new_val, value_type);
  } else {
    return _barrier_set->atomic_cmpxchg_bool_at(access, expected_val, new_val, value_type);
  }
 }
 Node* GraphKit::access_atomic_xchg_at(Node* ctl,
                                      Node* obj,
                                      Node* adr,
                                      const TypePtr* adr_type,
                                      int alias_idx,
                                      Node* new_val,
                                      const Type* value_type,
                                      BasicType bt,
                                      DecoratorSet decorators) {
  set_control(ctl);
  C2AccessValuePtr addr(adr, adr_type);
  C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS,
                        bt, obj, addr, alias_idx);
  if (access.is_raw()) {
    return _barrier_set->BarrierSetC2::atomic_xchg_at(access, new_val, value_type);
  } else {
    return _barrier_set->atomic_xchg_at(access, new_val, value_type);
  }
 }
 Node* GraphKit::access_atomic_add_at(Node* ctl,
                                     Node* obj,
                                     Node* adr,
                                     const TypePtr* adr_type,
                                     int alias_idx,
                                     Node* new_val,
                                     const Type* value_type,
                                     BasicType bt,
                                     DecoratorSet decorators) {
  set_control(ctl);
  C2AccessValuePtr addr(adr, adr_type);
  C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS, bt, obj, addr, alias_idx);
  if (access.is_raw()) {
    return _barrier_set->BarrierSetC2::atomic_add_at(access, new_val, value_type);
  } else {
    return _barrier_set->atomic_add_at(access, new_val, value_type);
  }
 }
 void GraphKit::access_clone(Node* ctl, Node* src, Node* dst, Node* size, bool is_array) {
  set_control(ctl);
  return _barrier_set->clone(this, src, dst, size, is_array);
 }
 //-------------------------array_element_address-------------------------
 Node* GraphKit::array_element_address(Node* ary, Node* idx, BasicType elembt,
@ -3817,20 +3808,10 @@ void GraphKit::add_predicate(int nargs) {
  add_predicate_impl(Deoptimization::Reason_loop_limit_check, nargs);
 }
 //----------------------------- store barriers ----------------------------
 #define __ ideal.
 bool GraphKit::use_ReduceInitialCardMarks() {
  BarrierSet *bs = BarrierSet::barrier_set();
  return bs->is_a(BarrierSet::CardTableBarrierSet)
         && barrier_set_cast<CardTableBarrierSet>(bs)->can_elide_tlab_store_barriers()
         && ReduceInitialCardMarks;
 }
 void GraphKit::sync_kit(IdealKit& ideal) {
-  set_all_memory(__ merged_memory());
+  set_all_memory(ideal.merged_memory());
-  set_i_o(__ i_o());
+  set_i_o(ideal.i_o());
-  set_control(__ ctrl());
+  set_control(ideal.ctrl());
 }
 void GraphKit::final_sync(IdealKit& ideal) {
@ -3838,541 +3819,6 @@ void GraphKit::final_sync(IdealKit& ideal) {
  sync_kit(ideal);
 }
 Node* GraphKit::byte_map_base_node() {
  // Get base of card map
  jbyte* card_table_base = ci_card_table_address();
  if (card_table_base != NULL) {
    return makecon(TypeRawPtr::make((address)card_table_base));
  } else {
    return null();
  }
 }
 // vanilla/CMS post barrier
 // Insert a write-barrier store.  This is to let generational GC work; we have
 // to flag all oop-stores before the next GC point.
 void GraphKit::write_barrier_post(Node* oop_store,
                                  Node* obj,
                                  Node* adr,
                                  uint  adr_idx,
                                  Node* val,
                                  bool use_precise) {
  // No store check needed if we're storing a NULL or an old object
  // (latter case is probably a string constant). The concurrent
  // mark sweep garbage collector, however, needs to have all nonNull
  // oop updates flagged via card-marks.
  if (val != NULL && val->is_Con()) {
    // must be either an oop or NULL
    const Type* t = val->bottom_type();
    if (t == TypePtr::NULL_PTR || t == Type::TOP)
      // stores of null never (?) need barriers
      return;
  }
  if (use_ReduceInitialCardMarks()
      && obj == just_allocated_object(control())) {
    // We can skip marks on a freshly-allocated object in Eden.
    // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp.
    // That routine informs GC to take appropriate compensating steps,
    // upon a slow-path allocation, so as to make this card-mark
    // elision safe.
    return;
  }
  if (!use_precise) {
    // All card marks for a (non-array) instance are in one place:
    adr = obj;
  }
  // (Else it's an array (or unknown), and we want more precise card marks.)
  assert(adr != NULL, "");
  IdealKit ideal(this, true);
  // Convert the pointer to an int prior to doing math on it
  Node* cast = __ CastPX(__ ctrl(), adr);
  // Divide by card size
  assert(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet),
         "Only one we handle so far.");
  Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) );
  // Combine card table base and card offset
  Node* card_adr = __ AddP(__ top(), byte_map_base_node(), card_offset );
  // Get the alias_index for raw card-mark memory
  int adr_type = Compile::AliasIdxRaw;
  Node*   zero = __ ConI(0); // Dirty card value
  BasicType bt = T_BYTE;
  if (UseConcMarkSweepGC && UseCondCardMark) {
    insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
    __ sync_kit(this);
  }
  if (UseCondCardMark) {
    // The classic GC reference write barrier is typically implemented
    // as a store into the global card mark table.  Unfortunately
    // unconditional stores can result in false sharing and excessive
    // coherence traffic as well as false transactional aborts.
    // UseCondCardMark enables MP "polite" conditional card mark
    // stores.  In theory we could relax the load from ctrl() to
    // no_ctrl, but that doesn't buy much latitude.
    Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, bt, adr_type);
    __ if_then(card_val, BoolTest::ne, zero);
  }
  // Smash zero into card
  if( !UseConcMarkSweepGC ) {
    __ store(__ ctrl(), card_adr, zero, bt, adr_type, MemNode::unordered);
  } else {
    // Specialized path for CM store barrier
    __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type);
  }
  if (UseCondCardMark) {
    __ end_if();
  }
  // Final sync IdealKit and GraphKit.
  final_sync(ideal);
 }
 #if INCLUDE_G1GC
 /*
 * Determine if the G1 pre-barrier can be removed. The pre-barrier is
 * required by SATB to make sure all objects live at the start of the
 * marking are kept alive, all reference updates need to any previous
 * reference stored before writing.
 *
 * If the previous value is NULL there is no need to save the old value.
 * References that are NULL are filtered during runtime by the barrier
 * code to avoid unnecessary queuing.
 *
 * However in the case of newly allocated objects it might be possible to
 * prove that the reference about to be overwritten is NULL during compile
 * time and avoid adding the barrier code completely.
 *
 * The compiler needs to determine that the object in which a field is about
 * to be written is newly allocated, and that no prior store to the same field
 * has happened since the allocation.
 *
 * Returns true if the pre-barrier can be removed
 */
 bool GraphKit::g1_can_remove_pre_barrier(PhaseTransform* phase, Node* adr,
                                         BasicType bt, uint adr_idx) {
  intptr_t offset = 0;
  Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
  AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase);
  if (offset == Type::OffsetBot) {
    return false; // cannot unalias unless there are precise offsets
  }
  if (alloc == NULL) {
    return false; // No allocation found
  }
  intptr_t size_in_bytes = type2aelembytes(bt);
  Node* mem = memory(adr_idx); // start searching here...
  for (int cnt = 0; cnt < 50; cnt++) {
    if (mem->is_Store()) {
      Node* st_adr = mem->in(MemNode::Address);
      intptr_t st_offset = 0;
      Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
      if (st_base == NULL) {
        break; // inscrutable pointer
      }
      // Break we have found a store with same base and offset as ours so break
      if (st_base == base && st_offset == offset) {
        break;
      }
      if (st_offset != offset && st_offset != Type::OffsetBot) {
        const int MAX_STORE = BytesPerLong;
        if (st_offset >= offset + size_in_bytes ||
            st_offset <= offset - MAX_STORE ||
            st_offset <= offset - mem->as_Store()->memory_size()) {
          // Success:  The offsets are provably independent.
          // (You may ask, why not just test st_offset != offset and be done?
          // The answer is that stores of different sizes can co-exist
          // in the same sequence of RawMem effects.  We sometimes initialize
          // a whole 'tile' of array elements with a single jint or jlong.)
          mem = mem->in(MemNode::Memory);
          continue; // advance through independent store memory
        }
      }
      if (st_base != base
          && MemNode::detect_ptr_independence(base, alloc, st_base,
                                              AllocateNode::Ideal_allocation(st_base, phase),
                                              phase)) {
        // Success:  The bases are provably independent.
        mem = mem->in(MemNode::Memory);
        continue; // advance through independent store memory
      }
    } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
      InitializeNode* st_init = mem->in(0)->as_Initialize();
      AllocateNode* st_alloc = st_init->allocation();
      // Make sure that we are looking at the same allocation site.
      // The alloc variable is guaranteed to not be null here from earlier check.
      if (alloc == st_alloc) {
        // Check that the initialization is storing NULL so that no previous store
        // has been moved up and directly write a reference
        Node* captured_store = st_init->find_captured_store(offset,
                                                            type2aelembytes(T_OBJECT),
                                                            phase);
        if (captured_store == NULL || captured_store == st_init->zero_memory()) {
          return true;
        }
      }
    }
    // Unless there is an explicit 'continue', we must bail out here,
    // because 'mem' is an inscrutable memory state (e.g., a call).
    break;
  }
  return false;
 }
 // G1 pre/post barriers
 void GraphKit::g1_write_barrier_pre(bool do_load,
                                    Node* obj,
                                    Node* adr,
                                    uint alias_idx,
                                    Node* val,
                                    const TypeOopPtr* val_type,
                                    Node* pre_val,
                                    BasicType bt) {
  // Some sanity checks
  // Note: val is unused in this routine.
  if (do_load) {
    // We need to generate the load of the previous value
    assert(obj != NULL, "must have a base");
    assert(adr != NULL, "where are loading from?");
    assert(pre_val == NULL, "loaded already?");
    assert(val_type != NULL, "need a type");
    if (use_ReduceInitialCardMarks()
        && g1_can_remove_pre_barrier(&_gvn, adr, bt, alias_idx)) {
      return;
    }
  } else {
    // In this case both val_type and alias_idx are unused.
    assert(pre_val != NULL, "must be loaded already");
    // Nothing to be done if pre_val is null.
    if (pre_val->bottom_type() == TypePtr::NULL_PTR) return;
    assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
  }
  assert(bt == T_OBJECT, "or we shouldn't be here");
  IdealKit ideal(this, true);
  Node* tls = __ thread(); // ThreadLocalStorage
  Node* no_ctrl = NULL;
  Node* no_base = __ top();
  Node* zero  = __ ConI(0);
  Node* zeroX = __ ConX(0);
  float likely  = PROB_LIKELY(0.999);
  float unlikely  = PROB_UNLIKELY(0.999);
  BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
  assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width");
  // Offsets into the thread
  const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
  const int index_offset   = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
  const int buffer_offset  = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
  // Now the actual pointers into the thread
  Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
  Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
  Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));
  // Now some of the values
  Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
  // if (!marking)
  __ if_then(marking, BoolTest::ne, zero, unlikely); {
    BasicType index_bt = TypeX_X->basic_type();
    assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size.");
    Node* index   = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw);
    if (do_load) {
      // load original value
      // alias_idx correct??
      pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx);
    }
    // if (pre_val != NULL)
    __ if_then(pre_val, BoolTest::ne, null()); {
      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
      // is the queue for this thread full?
      __ if_then(index, BoolTest::ne, zeroX, likely); {
        // decrement the index
        Node* next_index = _gvn.transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
        // Now get the buffer location we will log the previous value into and store it
        Node *log_addr = __ AddP(no_base, buffer, next_index);
        __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered);
        // update the index
        __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered);
      } __ else_(); {
        // logging buffer is full, call the runtime
        const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls);
      } __ end_if();  // (!index)
    } __ end_if();  // (pre_val != NULL)
  } __ end_if();  // (!marking)
  // Final sync IdealKit and GraphKit.
  final_sync(ideal);
 }
 /*
 * G1 similar to any GC with a Young Generation requires a way to keep track of
 * references from Old Generation to Young Generation to make sure all live
 * objects are found. G1 also requires to keep track of object references
 * between different regions to enable evacuation of old regions, which is done
 * as part of mixed collections. References are tracked in remembered sets and
 * is continuously updated as reference are written to with the help of the
 * post-barrier.
 *
 * To reduce the number of updates to the remembered set the post-barrier
 * filters updates to fields in objects located in the Young Generation,
 * the same region as the reference, when the NULL is being written or
 * if the card is already marked as dirty by an earlier write.
 *
 * Under certain circumstances it is possible to avoid generating the
 * post-barrier completely if it is possible during compile time to prove
 * the object is newly allocated and that no safepoint exists between the
 * allocation and the store.
 *
 * In the case of slow allocation the allocation code must handle the barrier
 * as part of the allocation in the case the allocated object is not located
 * in the nursery, this would happen for humongous objects. This is similar to
 * how CMS is required to handle this case, see the comments for the method
 * CardTableBarrierSet::on_allocation_slowpath_exit and OptoRuntime::new_deferred_store_barrier.
 * A deferred card mark is required for these objects and handled in the above
 * mentioned methods.
 *
 * Returns true if the post barrier can be removed
 */
 bool GraphKit::g1_can_remove_post_barrier(PhaseTransform* phase, Node* store,
                                          Node* adr) {
  intptr_t      offset = 0;
  Node*         base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
  AllocateNode* alloc  = AllocateNode::Ideal_allocation(base, phase);
  if (offset == Type::OffsetBot) {
    return false; // cannot unalias unless there are precise offsets
  }
  if (alloc == NULL) {
     return false; // No allocation found
  }
  // Start search from Store node
  Node* mem = store->in(MemNode::Control);
  if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
    InitializeNode* st_init = mem->in(0)->as_Initialize();
    AllocateNode*  st_alloc = st_init->allocation();
    // Make sure we are looking at the same allocation
    if (alloc == st_alloc) {
      return true;
    }
  }
  return false;
 }
 //
 // Update the card table and add card address to the queue
 //
 void GraphKit::g1_mark_card(IdealKit& ideal,
                            Node* card_adr,
                            Node* oop_store,
                            uint oop_alias_idx,
                            Node* index,
                            Node* index_adr,
                            Node* buffer,
                            const TypeFunc* tf) {
  Node* zero  = __ ConI(0);
  Node* zeroX = __ ConX(0);
  Node* no_base = __ top();
  BasicType card_bt = T_BYTE;
  // Smash zero into card. MUST BE ORDERED WRT TO STORE
  __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw);
  //  Now do the queue work
  __ if_then(index, BoolTest::ne, zeroX); {
    Node* next_index = _gvn.transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
    Node* log_addr = __ AddP(no_base, buffer, next_index);
    // Order, see storeCM.
    __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered);
    __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered);
  } __ else_(); {
    __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
  } __ end_if();
 }
 void GraphKit::g1_write_barrier_post(Node* oop_store,
                                     Node* obj,
                                     Node* adr,
                                     uint alias_idx,
                                     Node* val,
                                     BasicType bt,
                                     bool use_precise) {
  // If we are writing a NULL then we need no post barrier
  if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
    // Must be NULL
    const Type* t = val->bottom_type();
    assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL");
    // No post barrier if writing NULLx
    return;
  }
  if (use_ReduceInitialCardMarks() && obj == just_allocated_object(control())) {
    // We can skip marks on a freshly-allocated object in Eden.
    // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp.
    // That routine informs GC to take appropriate compensating steps,
    // upon a slow-path allocation, so as to make this card-mark
    // elision safe.
    return;
  }
  if (use_ReduceInitialCardMarks()
      && g1_can_remove_post_barrier(&_gvn, oop_store, adr)) {
    return;
  }
  if (!use_precise) {
    // All card marks for a (non-array) instance are in one place:
    adr = obj;
  }
  // (Else it's an array (or unknown), and we want more precise card marks.)
  assert(adr != NULL, "");
  IdealKit ideal(this, true);
  Node* tls = __ thread(); // ThreadLocalStorage
  Node* no_base = __ top();
  float likely  = PROB_LIKELY(0.999);
  float unlikely  = PROB_UNLIKELY(0.999);
  Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val());
  Node* dirty_card = __ ConI((jint)CardTable::dirty_card_val());
  Node* zeroX = __ ConX(0);
  // Get the alias_index for raw card-mark memory
  const TypePtr* card_type = TypeRawPtr::BOTTOM;
  const TypeFunc *tf = OptoRuntime::g1_wb_post_Type();
  // Offsets into the thread
  const int index_offset  = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
  const int buffer_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
  // Pointers into the thread
  Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
  Node* index_adr =  __ AddP(no_base, tls, __ ConX(index_offset));
  // Now some values
  // Use ctrl to avoid hoisting these values past a safepoint, which could
  // potentially reset these fields in the JavaThread.
  Node* index  = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw);
  Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
  // Convert the store obj pointer to an int prior to doing math on it
  // Must use ctrl to prevent "integerized oop" existing across safepoint
  Node* cast =  __ CastPX(__ ctrl(), adr);
  // Divide pointer by card size
  Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) );
  // Combine card table base and card offset
  Node* card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );
  // If we know the value being stored does it cross regions?
  if (val != NULL) {
    // Does the store cause us to cross regions?
    // Should be able to do an unsigned compare of region_size instead of
    // and extra shift. Do we have an unsigned compare??
    // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);
    Node* xor_res =  __ URShiftX ( __ XorX( cast,  __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes));
    // if (xor_res == 0) same region so skip
    __ if_then(xor_res, BoolTest::ne, zeroX); {
      // No barrier if we are storing a NULL
      __ if_then(val, BoolTest::ne, null(), unlikely); {
        // Ok must mark the card if not already dirty
        // load the original value of the card
        Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
        __ if_then(card_val, BoolTest::ne, young_card); {
          sync_kit(ideal);
          // Use Op_MemBarVolatile to achieve the effect of a StoreLoad barrier.
          insert_mem_bar(Op_MemBarVolatile, oop_store);
          __ sync_kit(this);
          Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
          __ if_then(card_val_reload, BoolTest::ne, dirty_card); {
            g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
          } __ end_if();
        } __ end_if();
      } __ end_if();
    } __ end_if();
  } else {
    // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks.
    // We don't need a barrier here if the destination is a newly allocated object
    // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden
    // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()).
    assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
    Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
    __ if_then(card_val, BoolTest::ne, young_card); {
      g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
    } __ end_if();
  }
  // Final sync IdealKit and GraphKit.
  final_sync(ideal);
 }
 #undef __
 #endif // INCLUDE_G1GC
 Node* GraphKit::load_String_length(Node* ctrl, Node* str) {
  Node* len = load_array_length(load_String_value(ctrl, str));
  Node* coder = load_String_coder(ctrl, str);
@ -4388,9 +3834,9 @@ Node* GraphKit::load_String_value(Node* ctrl, Node* str) {
  const TypeAryPtr* value_type = TypeAryPtr::make(TypePtr::NotNull,
                                                  TypeAry::make(TypeInt::BYTE, TypeInt::POS),
                                                  ciTypeArrayKlass::make(T_BYTE), true, 0);
-  int value_field_idx = C->get_alias_index(value_field_type);
+  Node* p = basic_plus_adr(str, str, value_offset);
-  Node* load = make_load(ctrl, basic_plus_adr(str, str, value_offset),
+  Node* load = access_load_at(str, p, value_field_type, value_type, T_OBJECT,
-                         value_type, T_OBJECT, value_field_idx, MemNode::unordered);
+                              IN_HEAP | C2_CONTROL_DEPENDENT_LOAD);
  // String.value field is known to be @Stable.
  if (UseImplicitStableValues) {
    load = cast_array_to_stable(load, value_type);
@ -4416,8 +3862,8 @@ void GraphKit::store_String_value(Node* ctrl, Node* str, Node* value) {
  const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::NotNull, C->env()->String_klass(),
                                                     false, NULL, 0);
  const TypePtr* value_field_type = string_type->add_offset(value_offset);
-  store_oop_to_object(ctrl, str,  basic_plus_adr(str, value_offset), value_field_type,
+  access_store_at(ctrl, str,  basic_plus_adr(str, value_offset), value_field_type,
-      value, TypeAryPtr::BYTES, T_OBJECT, MemNode::unordered);
+                  value, TypeAryPtr::BYTES, T_OBJECT, IN_HEAP);
 }
 void GraphKit::store_String_coder(Node* ctrl, Node* str, Node* value) {
--- a/src/hotspot/share/opto/graphKit.hpp
+++ b/src/hotspot/share/opto/graphKit.hpp
@ -27,6 +27,7 @@
 #include "ci/ciEnv.hpp"
 #include "ci/ciMethodData.hpp"
 #include "gc/shared/c2/barrierSetC2.hpp"
 #include "opto/addnode.hpp"
 #include "opto/callnode.hpp"
 #include "opto/cfgnode.hpp"
@ -38,6 +39,7 @@
 #include "opto/type.hpp"
 #include "runtime/deoptimization.hpp"
 class BarrierSetC2;
 class FastLockNode;
 class FastUnlockNode;
 class IdealKit;
@ -63,6 +65,7 @@ class GraphKit : public Phase {
  SafePointNode*    _exceptions;// Parser map(s) for exception state(s)
  int               _bci;       // JVM Bytecode Pointer
  ciMethod*         _method;    // JVM Current Method
  BarrierSetC2*     _barrier_set;
 private:
  int               _sp;        // JVM Expression Stack Pointer; don't modify directly!
@ -86,8 +89,9 @@ class GraphKit : public Phase {
  virtual Parse*          is_Parse()          const { return NULL; }
  virtual LibraryCallKit* is_LibraryCallKit() const { return NULL; }
-  ciEnv*        env()           const { return _env; }
+  ciEnv*        env()               const { return _env; }
-  PhaseGVN&     gvn()           const { return _gvn; }
+  PhaseGVN&     gvn()               const { return _gvn; }
  void*         barrier_set_state() const { return C->barrier_set_state(); }
  void record_for_igvn(Node* n) const { C->record_for_igvn(n); }  // delegate to Compile
@ -103,9 +107,6 @@ class GraphKit : public Phase {
  Node* zerocon(BasicType bt)   const { return _gvn.zerocon(bt); }
  // (See also macro MakeConX in type.hpp, which uses intcon or longcon.)
  // Helper for byte_map_base
  Node* byte_map_base_node();
  jint  find_int_con(Node* n, jint value_if_unknown) {
    return _gvn.find_int_con(n, value_if_unknown);
  }
@ -569,70 +570,67 @@ class GraphKit : public Phase {
                        bool unaligned = false,
                        bool mismatched = false);
  // Perform decorated accesses
-  // All in one pre-barrier, store, post_barrier
+  Node* access_store_at(Node* ctl,
-  // Insert a write-barrier'd store.  This is to let generational GC
+                        Node* obj,   // containing obj
-  // work; we have to flag all oop-stores before the next GC point.
+                        Node* adr,   // actual adress to store val at
-  //
+                        const TypePtr* adr_type,
-  // It comes in 3 flavors of store to an object, array, or unknown.
+                        Node* val,
-  // We use precise card marks for arrays to avoid scanning the entire
+                        const Type* val_type,
-  // array. We use imprecise for object. We use precise for unknown
+                        BasicType bt,
-  // since we don't know if we have an array or and object or even
+                        DecoratorSet decorators);
  // where the object starts.
  //
  // If val==NULL, it is taken to be a completely unknown value. QQQ
-  Node* store_oop(Node* ctl,
+  Node* access_load_at(Node* obj,   // containing obj
-                  Node* obj,   // containing obj
+                       Node* adr,   // actual adress to store val at
-                  Node* adr,   // actual adress to store val at
+                       const TypePtr* adr_type,
-                  const TypePtr* adr_type,
+                       const Type* val_type,
-                  Node* val,
+                       BasicType bt,
-                  const TypeOopPtr* val_type,
+                       DecoratorSet decorators);
                  BasicType bt,
                  bool use_precise,
                  MemNode::MemOrd mo,
                  bool mismatched = false);
-  Node* store_oop_to_object(Node* ctl,
+  Node* access_atomic_cmpxchg_val_at(Node* ctl,
-                            Node* obj,   // containing obj
+                                     Node* obj,
-                            Node* adr,   // actual adress to store val at
+                                     Node* adr,
-                            const TypePtr* adr_type,
+                                     const TypePtr* adr_type,
-                            Node* val,
+                                     int alias_idx,
-                            const TypeOopPtr* val_type,
+                                     Node* expected_val,
-                            BasicType bt,
+                                     Node* new_val,
-                            MemNode::MemOrd mo) {
+                                     const Type* value_type,
-    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false, mo);
+                                     BasicType bt,
-  }
+                                     DecoratorSet decorators);
-  Node* store_oop_to_array(Node* ctl,
+  Node* access_atomic_cmpxchg_bool_at(Node* ctl,
-                           Node* obj,   // containing obj
+                                      Node* obj,
-                           Node* adr,   // actual adress to store val at
+                                      Node* adr,
-                           const TypePtr* adr_type,
+                                      const TypePtr* adr_type,
-                           Node* val,
+                                      int alias_idx,
-                           const TypeOopPtr* val_type,
+                                      Node* expected_val,
-                           BasicType bt,
+                                      Node* new_val,
-                           MemNode::MemOrd mo) {
+                                      const Type* value_type,
-    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo);
+                                      BasicType bt,
-  }
+                                      DecoratorSet decorators);
-  // Could be an array or object we don't know at compile time (unsafe ref.)
+  Node* access_atomic_xchg_at(Node* ctl,
-  Node* store_oop_to_unknown(Node* ctl,
+                              Node* obj,
-                             Node* obj,   // containing obj
+                              Node* adr,
-                             Node* adr,   // actual adress to store val at
+                              const TypePtr* adr_type,
                              int alias_idx,
                              Node* new_val,
                              const Type* value_type,
                              BasicType bt,
                              DecoratorSet decorators);
  Node* access_atomic_add_at(Node* ctl,
                             Node* obj,
                             Node* adr,
                             const TypePtr* adr_type,
-                             Node* val,
+                             int alias_idx,
                             Node* new_val,
                             const Type* value_type,
                             BasicType bt,
-                             MemNode::MemOrd mo,
+                             DecoratorSet decorators);
                             bool mismatched = false);
-  // For the few case where the barriers need special help
+  void access_clone(Node* ctl, Node* src, Node* dst, Node* size, bool is_array);
  void pre_barrier(bool do_load, Node* ctl,
                   Node* obj, Node* adr, uint adr_idx, Node* val, const TypeOopPtr* val_type,
                   Node* pre_val,
                   BasicType bt);
  void post_barrier(Node* ctl, Node* store, Node* obj, Node* adr, uint adr_idx,
                    Node* val, BasicType bt, bool use_precise);
  // Return addressing for an array element.
  Node* array_element_address(Node* ary, Node* idx, BasicType elembt,
@ -754,49 +752,10 @@ class GraphKit : public Phase {
  // Returns the object (if any) which was created the moment before.
  Node* just_allocated_object(Node* current_control);
  static bool use_ReduceInitialCardMarks();
  // Sync Ideal and Graph kits.
  void sync_kit(IdealKit& ideal);
  void final_sync(IdealKit& ideal);
  // vanilla/CMS post barrier
  void write_barrier_post(Node *store, Node* obj,
                          Node* adr,  uint adr_idx, Node* val, bool use_precise);
  // Allow reordering of pre-barrier with oop store and/or post-barrier.
  // Used for load_store operations which loads old value.
  bool can_move_pre_barrier() const;
 #if INCLUDE_G1GC
  // G1 pre/post barriers
  void g1_write_barrier_pre(bool do_load,
                            Node* obj,
                            Node* adr,
                            uint alias_idx,
                            Node* val,
                            const TypeOopPtr* val_type,
                            Node* pre_val,
                            BasicType bt);
  void g1_write_barrier_post(Node* store,
                             Node* obj,
                             Node* adr,
                             uint alias_idx,
                             Node* val,
                             BasicType bt,
                             bool use_precise);
  // Helper function for g1
  private:
  void g1_mark_card(IdealKit& ideal, Node* card_adr, Node* store, uint oop_alias_idx,
                    Node* index, Node* index_adr,
                    Node* buffer, const TypeFunc* tf);
  bool g1_can_remove_pre_barrier(PhaseTransform* phase, Node* adr, BasicType bt, uint adr_idx);
  bool g1_can_remove_post_barrier(PhaseTransform* phase, Node* store, Node* adr);
 #endif // INCLUDE_G1GC
  public:
  // Helper function to round double arguments before a call
  void round_double_arguments(ciMethod* dest_method);
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@ -244,12 +244,9 @@ class LibraryCallKit : public GraphKit {
  // This returns Type::AnyPtr, RawPtr, or OopPtr.
  int classify_unsafe_addr(Node* &base, Node* &offset, BasicType type);
  Node* make_unsafe_address(Node*& base, Node* offset, BasicType type = T_ILLEGAL, bool can_cast = false);
  // Helper for inline_unsafe_access.
  // Generates the guards that check whether the result of
  // Unsafe.getObject should be recorded in an SATB log buffer.
  void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);
  typedef enum { Relaxed, Opaque, Volatile, Acquire, Release } AccessKind;
  DecoratorSet mo_decorator_for_access_kind(AccessKind kind);
  bool inline_unsafe_access(bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
  static bool klass_needs_init_guard(Node* kls);
  bool inline_unsafe_allocate();
@ -269,7 +266,7 @@ class LibraryCallKit : public GraphKit {
  bool inline_array_copyOf(bool is_copyOfRange);
  bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
  bool inline_preconditions_checkIndex();
-  void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark);
+  void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array);
  bool inline_native_clone(bool is_virtual);
  bool inline_native_Reflection_getCallerClass();
  // Helper function for inlining native object hash method
@ -285,8 +282,6 @@ class LibraryCallKit : public GraphKit {
                                      uint new_idx);
  typedef enum { LS_get_add, LS_get_set, LS_cmp_swap, LS_cmp_swap_weak, LS_cmp_exchange } LoadStoreKind;
  MemNode::MemOrd access_kind_to_memord_LS(AccessKind access_kind, bool is_store);
  MemNode::MemOrd access_kind_to_memord(AccessKind access_kind);
  bool inline_unsafe_load_store(BasicType type,  LoadStoreKind kind, AccessKind access_kind);
  bool inline_unsafe_fence(vmIntrinsics::ID id);
  bool inline_onspinwait();
@ -2224,106 +2219,6 @@ bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) {
 //----------------------------inline_unsafe_access----------------------------
 // Helper that guards and inserts a pre-barrier.
 void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset,
                                        Node* pre_val, bool need_mem_bar) {
  // We could be accessing the referent field of a reference object. If so, when G1
  // is enabled, we need to log the value in the referent field in an SATB buffer.
  // This routine performs some compile time filters and generates suitable
  // runtime filters that guard the pre-barrier code.
  // Also add memory barrier for non volatile load from the referent field
  // to prevent commoning of loads across safepoint.
  if (!UseG1GC && !need_mem_bar)
    return;
  // Some compile time checks.
  // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
  const TypeX* otype = offset->find_intptr_t_type();
  if (otype != NULL && otype->is_con() &&
      otype->get_con() != java_lang_ref_Reference::referent_offset) {
    // Constant offset but not the reference_offset so just return
    return;
  }
  // We only need to generate the runtime guards for instances.
  const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
  if (btype != NULL) {
    if (btype->isa_aryptr()) {
      // Array type so nothing to do
      return;
    }
    const TypeInstPtr* itype = btype->isa_instptr();
    if (itype != NULL) {
      // Can the klass of base_oop be statically determined to be
      // _not_ a sub-class of Reference and _not_ Object?
      ciKlass* klass = itype->klass();
      if ( klass->is_loaded() &&
          !klass->is_subtype_of(env()->Reference_klass()) &&
          !env()->Object_klass()->is_subtype_of(klass)) {
        return;
      }
    }
  }
  // The compile time filters did not reject base_oop/offset so
  // we need to generate the following runtime filters
  //
  // if (offset == java_lang_ref_Reference::_reference_offset) {
  //   if (instance_of(base, java.lang.ref.Reference)) {
  //     pre_barrier(_, pre_val, ...);
  //   }
  // }
  float likely   = PROB_LIKELY(  0.999);
  float unlikely = PROB_UNLIKELY(0.999);
  IdealKit ideal(this);
 #define __ ideal.
  Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
  __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
      // Update graphKit memory and control from IdealKit.
      sync_kit(ideal);
      Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass()));
      Node* is_instof = gen_instanceof(base_oop, ref_klass_con);
      // Update IdealKit memory and control from graphKit.
      __ sync_kit(this);
      Node* one = __ ConI(1);
      // is_instof == 0 if base_oop == NULL
      __ if_then(is_instof, BoolTest::eq, one, unlikely); {
        // Update graphKit from IdeakKit.
        sync_kit(ideal);
        // Use the pre-barrier to record the value in the referent field
        pre_barrier(false /* do_load */,
                    __ ctrl(),
                    NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
                    pre_val /* pre_val */,
                    T_OBJECT);
        if (need_mem_bar) {
          // Add memory barrier to prevent commoning reads from this field
          // across safepoint since GC can change its value.
          insert_mem_bar(Op_MemBarCPUOrder);
        }
        // Update IdealKit from graphKit.
        __ sync_kit(this);
      } __ end_if(); // _ref_type != ref_none
  } __ end_if(); // offset == referent_offset
  // Final sync IdealKit and GraphKit.
  final_sync(ideal);
 #undef __
 }
 const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type) {
  // Attempt to infer a sharper value type from the offset and base type.
  ciKlass* sharpened_klass = NULL;
@ -2362,12 +2257,39 @@ const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_
  return NULL;
 }
 DecoratorSet LibraryCallKit::mo_decorator_for_access_kind(AccessKind kind) {
  switch (kind) {
      case Relaxed:
        return MO_UNORDERED;
      case Opaque:
        return MO_RELAXED;
      case Acquire:
        return MO_ACQUIRE;
      case Release:
        return MO_RELEASE;
      case Volatile:
        return MO_SEQ_CST;
      default:
        ShouldNotReachHere();
        return 0;
  }
 }
 bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, const AccessKind kind, const bool unaligned) {
  if (callee()->is_static())  return false;  // caller must have the capability!
  DecoratorSet decorators = C2_UNSAFE_ACCESS;
  guarantee(!is_store || kind != Acquire, "Acquire accesses can be produced only for loads");
  guarantee( is_store || kind != Release, "Release accesses can be produced only for stores");
  assert(type != T_OBJECT || !unaligned, "unaligned access not supported with object type");
  if (type == T_OBJECT || type == T_ARRAY) {
    decorators |= ON_UNKNOWN_OOP_REF;
  }
  if (unaligned) {
    decorators |= C2_UNALIGNED;
  }
 #ifndef PRODUCT
  {
    ResourceMark rm;
@ -2426,6 +2348,10 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
  // Can base be NULL? Otherwise, always on-heap access.
  bool can_access_non_heap = TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop));
  if (!can_access_non_heap) {
    decorators |= IN_HEAP;
  }
  val = is_store ? argument(4) : NULL;
  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
@ -2463,60 +2389,15 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
  assert(!mismatched || alias_type->adr_type()->is_oopptr(), "off-heap access can't be mismatched");
  if (mismatched) {
    decorators |= C2_MISMATCHED;
  }
  // First guess at the value type.
  const Type *value_type = Type::get_const_basic_type(type);
  // We will need memory barriers unless we can determine a unique
  // alias category for this reference.  (Note:  If for some reason
  // the barriers get omitted and the unsafe reference begins to "pollute"
  // the alias analysis of the rest of the graph, either Compile::can_alias
  // or Compile::must_alias will throw a diagnostic assert.)
  bool need_mem_bar = false;
  switch (kind) {
      case Relaxed:
          need_mem_bar = (mismatched && !adr_type->isa_aryptr()) || can_access_non_heap;
          break;
      case Opaque:
          // Opaque uses CPUOrder membars for protection against code movement.
      case Acquire:
      case Release:
      case Volatile:
          need_mem_bar = true;
          break;
      default:
          ShouldNotReachHere();
  }
  // Some accesses require access atomicity for all types, notably longs and doubles.
  // When AlwaysAtomicAccesses is enabled, all accesses are atomic.
  bool requires_atomic_access = false;
  switch (kind) {
      case Relaxed:
          requires_atomic_access = AlwaysAtomicAccesses;
          break;
      case Opaque:
          // Opaque accesses are atomic.
      case Acquire:
      case Release:
      case Volatile:
          requires_atomic_access = true;
          break;
      default:
          ShouldNotReachHere();
  }
  // Figure out the memory ordering.
-  // Acquire/Release/Volatile accesses require marking the loads/stores with MemOrd
+  decorators |= mo_decorator_for_access_kind(kind);
  MemNode::MemOrd mo = access_kind_to_memord_LS(kind, is_store);
  // If we are reading the value of the referent field of a Reference
  // object (either by using Unsafe directly or through reflection)
  // then, if G1 is enabled, we need to record the referent in an
  // SATB log buffer using the pre-barrier mechanism.
  // Also we need to add memory barrier to prevent commoning reads
  // from this field across safepoint since GC can change its value.
  bool need_read_barrier = !is_store &&
                           offset != top() && heap_base_oop != top();
  if (!is_store && type == T_OBJECT) {
    const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type);
@ -2534,39 +2415,6 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
  // and it is not possible to fully distinguish unintended nulls
  // from intended ones in this API.
  // We need to emit leading and trailing CPU membars (see below) in
  // addition to memory membars for special access modes. This is a little
  // too strong, but avoids the need to insert per-alias-type
  // volatile membars (for stores; compare Parse::do_put_xxx), which
  // we cannot do effectively here because we probably only have a
  // rough approximation of type.
  switch(kind) {
    case Relaxed:
    case Opaque:
    case Acquire:
      break;
    case Release:
    case Volatile:
      if (is_store) {
        insert_mem_bar(Op_MemBarRelease);
      } else {
        if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
          insert_mem_bar(Op_MemBarVolatile);
        }
      }
      break;
    default:
      ShouldNotReachHere();
  }
  // Memory barrier to prevent normal and 'unsafe' accesses from
  // bypassing each other.  Happens after null checks, so the
  // exception paths do not take memory state from the memory barrier,
  // so there's no problems making a strong assert about mixing users
  // of safe & unsafe memory.
  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
  if (!is_store) {
    Node* p = NULL;
    // Try to constant fold a load from a constant field
@ -2575,37 +2423,17 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
      // final or stable field
      p = make_constant_from_field(field, heap_base_oop);
    }
-    if (p == NULL) {
+
-      // To be valid, unsafe loads may depend on other conditions than
+    if (p == NULL) { // Could not constant fold the load
-      // the one that guards them: pin the Load node
+      p = access_load_at(heap_base_oop, adr, adr_type, value_type, type, decorators);
-      LoadNode::ControlDependency dep = LoadNode::Pinned;
+      // Normalize the value returned by getBoolean in the following cases
-      Node* ctrl = control();
+      if (type == T_BOOLEAN &&
-      // non volatile loads may be able to float
+          (mismatched ||
-      if (!need_mem_bar && adr_type->isa_instptr()) {
+           heap_base_oop == top() ||                  // - heap_base_oop is NULL or
-        assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null");
+           (can_access_non_heap && field == NULL))    // - heap_base_oop is potentially NULL
-        intptr_t offset = Type::OffsetBot;
+                                                      //   and the unsafe access is made to large offset
-        AddPNode::Ideal_base_and_offset(adr, &_gvn, offset);
+                                                      //   (i.e., larger than the maximum offset necessary for any
-        if (offset >= 0) {
+                                                      //   field access)
          int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->klass()->layout_helper());
          if (offset < s) {
            // Guaranteed to be a valid access, no need to pin it
            dep = LoadNode::DependsOnlyOnTest;
            ctrl = NULL;
          }
        }
      }
      p = make_load(ctrl, adr, value_type, type, adr_type, mo, dep, requires_atomic_access, unaligned, mismatched);
      // load value
      switch (type) {
      case T_BOOLEAN:
      {
        // Normalize the value returned by getBoolean in the following cases
        if (mismatched ||
            heap_base_oop == top() ||                            // - heap_base_oop is NULL or
            (can_access_non_heap && alias_type->field() == NULL) // - heap_base_oop is potentially NULL
                                                                 //   and the unsafe access is made to large offset
                                                                 //   (i.e., larger than the maximum offset necessary for any
                                                                 //   field access)
            ) {
          IdealKit ideal = IdealKit(this);
 #define __ ideal.
@ -2618,81 +2446,26 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
          final_sync(ideal);
          p = __ value(normalized_result);
 #undef __
        }
      }
      case T_CHAR:
      case T_BYTE:
      case T_SHORT:
      case T_INT:
      case T_LONG:
      case T_FLOAT:
      case T_DOUBLE:
        break;
      case T_OBJECT:
        if (need_read_barrier) {
          // We do not require a mem bar inside pre_barrier if need_mem_bar
          // is set: the barriers would be emitted by us.
          insert_pre_barrier(heap_base_oop, offset, p, !need_mem_bar);
        }
        break;
      case T_ADDRESS:
        // Cast to an int type.
        p = _gvn.transform(new CastP2XNode(NULL, p));
        p = ConvX2UL(p);
        break;
      default:
        fatal("unexpected type %d: %s", type, type2name(type));
        break;
      }
    }
    if (type == T_ADDRESS) {
      p = gvn().transform(new CastP2XNode(NULL, p));
      p = ConvX2UL(p);
    }
    // The load node has the control of the preceding MemBarCPUOrder.  All
    // following nodes will have the control of the MemBarCPUOrder inserted at
    // the end of this method.  So, pushing the load onto the stack at a later
    // point is fine.
    set_result(p);
  } else {
-    // place effect of store into memory
+    if (bt == T_ADDRESS) {
    switch (type) {
    case T_DOUBLE:
      val = dstore_rounding(val);
      break;
    case T_ADDRESS:
      // Repackage the long as a pointer.
      val = ConvL2X(val);
-      val = _gvn.transform(new CastX2PNode(val));
+      val = gvn().transform(new CastX2PNode(val));
      break;
    default:
      break;
    }
    if (type == T_OBJECT) {
      store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched);
    } else {
      store_to_memory(control(), adr, val, type, adr_type, mo, requires_atomic_access, unaligned, mismatched);
    }
    access_store_at(control(), heap_base_oop, adr, adr_type, val, value_type, type, decorators);
  }
  switch(kind) {
    case Relaxed:
    case Opaque:
    case Release:
      break;
    case Acquire:
    case Volatile:
      if (!is_store) {
        insert_mem_bar(Op_MemBarAcquire);
      } else {
        if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
          insert_mem_bar(Op_MemBarVolatile);
        }
      }
      break;
    default:
      ShouldNotReachHere();
  }
  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
  return true;
 }
@ -2757,6 +2530,9 @@ bool LibraryCallKit::inline_unsafe_load_store(const BasicType type, const LoadSt
  if (callee()->is_static())  return false;  // caller must have the capability!
  DecoratorSet decorators = C2_UNSAFE_ACCESS;
  decorators |= mo_decorator_for_access_kind(access_kind);
 #ifndef PRODUCT
  BasicType rtype;
  {
@ -2888,318 +2664,54 @@ bool LibraryCallKit::inline_unsafe_load_store(const BasicType type, const LoadSt
  int alias_idx = C->get_alias_index(adr_type);
-  // Memory-model-wise, a LoadStore acts like a little synchronized
+  if (type == T_OBJECT || type == T_ARRAY) {
-  // block, so needs barriers on each side.  These don't translate
+    decorators |= IN_HEAP | ON_UNKNOWN_OOP_REF;
  // into actual barriers on most machines, but we still need rest of
  // compiler to respect ordering.
  switch (access_kind) {
    case Relaxed:
    case Acquire:
      break;
    case Release:
      insert_mem_bar(Op_MemBarRelease);
      break;
    case Volatile:
      if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
        insert_mem_bar(Op_MemBarVolatile);
      } else {
        insert_mem_bar(Op_MemBarRelease);
      }
      break;
    default:
      ShouldNotReachHere();
  }
  insert_mem_bar(Op_MemBarCPUOrder);
  // Figure out the memory ordering.
  MemNode::MemOrd mo = access_kind_to_memord(access_kind);
  // 4984716: MemBars must be inserted before this
  //          memory node in order to avoid a false
  //          dependency which will confuse the scheduler.
  Node *mem = memory(alias_idx);
  // For now, we handle only those cases that actually exist: ints,
  // longs, and Object. Adding others should be straightforward.
  Node* load_store = NULL;
  switch(type) {
  case T_BYTE:
    switch(kind) {
      case LS_get_add:
        load_store = _gvn.transform(new GetAndAddBNode(control(), mem, adr, newval, adr_type));
        break;
      case LS_get_set:
        load_store = _gvn.transform(new GetAndSetBNode(control(), mem, adr, newval, adr_type));
        break;
      case LS_cmp_swap_weak:
        load_store = _gvn.transform(new WeakCompareAndSwapBNode(control(), mem, adr, newval, oldval, mo));
        break;
      case LS_cmp_swap:
        load_store = _gvn.transform(new CompareAndSwapBNode(control(), mem, adr, newval, oldval, mo));
        break;
      case LS_cmp_exchange:
        load_store = _gvn.transform(new CompareAndExchangeBNode(control(), mem, adr, newval, oldval, adr_type, mo));
        break;
      default:
        ShouldNotReachHere();
    }
    break;
  case T_SHORT:
    switch(kind) {
      case LS_get_add:
        load_store = _gvn.transform(new GetAndAddSNode(control(), mem, adr, newval, adr_type));
        break;
      case LS_get_set:
        load_store = _gvn.transform(new GetAndSetSNode(control(), mem, adr, newval, adr_type));
        break;
      case LS_cmp_swap_weak:
        load_store = _gvn.transform(new WeakCompareAndSwapSNode(control(), mem, adr, newval, oldval, mo));
        break;
      case LS_cmp_swap:
        load_store = _gvn.transform(new CompareAndSwapSNode(control(), mem, adr, newval, oldval, mo));
        break;
      case LS_cmp_exchange:
        load_store = _gvn.transform(new CompareAndExchangeSNode(control(), mem, adr, newval, oldval, adr_type, mo));
        break;
      default:
        ShouldNotReachHere();
    }
    break;
  case T_INT:
    switch(kind) {
      case LS_get_add:
        load_store = _gvn.transform(new GetAndAddINode(control(), mem, adr, newval, adr_type));
        break;
      case LS_get_set:
        load_store = _gvn.transform(new GetAndSetINode(control(), mem, adr, newval, adr_type));
        break;
      case LS_cmp_swap_weak:
        load_store = _gvn.transform(new WeakCompareAndSwapINode(control(), mem, adr, newval, oldval, mo));
        break;
      case LS_cmp_swap:
        load_store = _gvn.transform(new CompareAndSwapINode(control(), mem, adr, newval, oldval, mo));
        break;
      case LS_cmp_exchange:
        load_store = _gvn.transform(new CompareAndExchangeINode(control(), mem, adr, newval, oldval, adr_type, mo));
        break;
      default:
        ShouldNotReachHere();
    }
    break;
  case T_LONG:
    switch(kind) {
      case LS_get_add:
        load_store = _gvn.transform(new GetAndAddLNode(control(), mem, adr, newval, adr_type));
        break;
      case LS_get_set:
        load_store = _gvn.transform(new GetAndSetLNode(control(), mem, adr, newval, adr_type));
        break;
      case LS_cmp_swap_weak:
        load_store = _gvn.transform(new WeakCompareAndSwapLNode(control(), mem, adr, newval, oldval, mo));
        break;
      case LS_cmp_swap:
        load_store = _gvn.transform(new CompareAndSwapLNode(control(), mem, adr, newval, oldval, mo));
        break;
      case LS_cmp_exchange:
        load_store = _gvn.transform(new CompareAndExchangeLNode(control(), mem, adr, newval, oldval, adr_type, mo));
        break;
      default:
        ShouldNotReachHere();
    }
    break;
  case T_OBJECT:
    // Transformation of a value which could be NULL pointer (CastPP #NULL)
    // could be delayed during Parse (for example, in adjust_map_after_if()).
    // Execute transformation here to avoid barrier generation in such case.
    if (_gvn.type(newval) == TypePtr::NULL_PTR)
      newval = _gvn.makecon(TypePtr::NULL_PTR);
-    // Reference stores need a store barrier.
+    if (oldval != NULL && _gvn.type(oldval) == TypePtr::NULL_PTR) {
-    switch(kind) {
+      // Refine the value to a null constant, when it is known to be null
-      case LS_get_set: {
+      oldval = _gvn.makecon(TypePtr::NULL_PTR);
        // If pre-barrier must execute before the oop store, old value will require do_load here.
        if (!can_move_pre_barrier()) {
          pre_barrier(true /* do_load*/,
                      control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
                      NULL /* pre_val*/,
                      T_OBJECT);
        } // Else move pre_barrier to use load_store value, see below.
        break;
      }
      case LS_cmp_swap_weak:
      case LS_cmp_swap:
      case LS_cmp_exchange: {
        // Same as for newval above:
        if (_gvn.type(oldval) == TypePtr::NULL_PTR) {
          oldval = _gvn.makecon(TypePtr::NULL_PTR);
        }
        // The only known value which might get overwritten is oldval.
        pre_barrier(false /* do_load */,
                    control(), NULL, NULL, max_juint, NULL, NULL,
                    oldval /* pre_val */,
                    T_OBJECT);
        break;
      }
      default:
        ShouldNotReachHere();
    }
 #ifdef _LP64
    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
      Node *newval_enc = _gvn.transform(new EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
      switch(kind) {
        case LS_get_set:
          load_store = _gvn.transform(new GetAndSetNNode(control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
          break;
        case LS_cmp_swap_weak: {
          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
          load_store = _gvn.transform(new WeakCompareAndSwapNNode(control(), mem, adr, newval_enc, oldval_enc, mo));
          break;
        }
        case LS_cmp_swap: {
          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
          load_store = _gvn.transform(new CompareAndSwapNNode(control(), mem, adr, newval_enc, oldval_enc, mo));
          break;
        }
        case LS_cmp_exchange: {
          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
          load_store = _gvn.transform(new CompareAndExchangeNNode(control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo));
          break;
        }
        default:
          ShouldNotReachHere();
      }
    } else
 #endif
    switch (kind) {
      case LS_get_set:
        load_store = _gvn.transform(new GetAndSetPNode(control(), mem, adr, newval, adr_type, value_type->is_oopptr()));
        break;
      case LS_cmp_swap_weak:
        load_store = _gvn.transform(new WeakCompareAndSwapPNode(control(), mem, adr, newval, oldval, mo));
        break;
      case LS_cmp_swap:
        load_store = _gvn.transform(new CompareAndSwapPNode(control(), mem, adr, newval, oldval, mo));
        break;
      case LS_cmp_exchange:
        load_store = _gvn.transform(new CompareAndExchangePNode(control(), mem, adr, newval, oldval, adr_type, value_type->is_oopptr(), mo));
        break;
      default:
        ShouldNotReachHere();
    }
    // Emit the post barrier only when the actual store happened. This makes sense
    // to check only for LS_cmp_* that can fail to set the value.
    // LS_cmp_exchange does not produce any branches by default, so there is no
    // boolean result to piggyback on. TODO: When we merge CompareAndSwap with
    // CompareAndExchange and move branches here, it would make sense to conditionalize
    // post_barriers for LS_cmp_exchange as well.
    //
    // CAS success path is marked more likely since we anticipate this is a performance
    // critical path, while CAS failure path can use the penalty for going through unlikely
    // path as backoff. Which is still better than doing a store barrier there.
    switch (kind) {
      case LS_get_set:
      case LS_cmp_exchange: {
        post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
        break;
      }
      case LS_cmp_swap_weak:
      case LS_cmp_swap: {
        IdealKit ideal(this);
        ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); {
          sync_kit(ideal);
          post_barrier(ideal.ctrl(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
          ideal.sync_kit(this);
        } ideal.end_if();
        final_sync(ideal);
        break;
      }
      default:
        ShouldNotReachHere();
    }
    break;
  default:
    fatal("unexpected type %d: %s", type, type2name(type));
    break;
  }
  // SCMemProjNodes represent the memory state of a LoadStore. Their
  // main role is to prevent LoadStore nodes from being optimized away
  // when their results aren't used.
  Node* proj = _gvn.transform(new SCMemProjNode(load_store));
  set_memory(proj, alias_idx);
  if (type == T_OBJECT && (kind == LS_get_set || kind == LS_cmp_exchange)) {
 #ifdef _LP64
    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
      load_store = _gvn.transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
    }
 #endif
    if (can_move_pre_barrier() && kind == LS_get_set) {
      // Don't need to load pre_val. The old value is returned by load_store.
      // The pre_barrier can execute after the xchg as long as no safepoint
      // gets inserted between them.
      pre_barrier(false /* do_load */,
                  control(), NULL, NULL, max_juint, NULL, NULL,
                  load_store /* pre_val */,
                  T_OBJECT);
    }
  }
-  // Add the trailing membar surrounding the access
+  Node* result = NULL;
-  insert_mem_bar(Op_MemBarCPUOrder);
+  switch (kind) {
-
+    case LS_cmp_exchange: {
-  switch (access_kind) {
+      result = access_atomic_cmpxchg_val_at(control(), base, adr, adr_type, alias_idx,
-    case Relaxed:
+                                            oldval, newval, value_type, type, decorators);
    case Release:
      break; // do nothing
    case Acquire:
    case Volatile:
      insert_mem_bar(Op_MemBarAcquire);
      // !support_IRIW_for_not_multiple_copy_atomic_cpu handled in platform code
      break;
    }
    case LS_cmp_swap_weak:
      decorators |= C2_WEAK_CMPXCHG;
    case LS_cmp_swap: {
      result = access_atomic_cmpxchg_bool_at(control(), base, adr, adr_type, alias_idx,
                                             oldval, newval, value_type, type, decorators);
      break;
    }
    case LS_get_set: {
      result = access_atomic_xchg_at(control(), base, adr, adr_type, alias_idx,
                                     newval, value_type, type, decorators);
      break;
    }
    case LS_get_add: {
      result = access_atomic_add_at(control(), base, adr, adr_type, alias_idx,
                                    newval, value_type, type, decorators);
      break;
    }
    default:
      ShouldNotReachHere();
  }
-  assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
+  assert(type2size[result->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
-  set_result(load_store);
+  set_result(result);
  return true;
 }
 MemNode::MemOrd LibraryCallKit::access_kind_to_memord_LS(AccessKind kind, bool is_store) {
  MemNode::MemOrd mo = MemNode::unset;
  switch(kind) {
    case Opaque:
    case Relaxed:  mo = MemNode::unordered; break;
    case Acquire:  mo = MemNode::acquire;   break;
    case Release:  mo = MemNode::release;   break;
    case Volatile: mo = is_store ? MemNode::release : MemNode::acquire; break;
    default:
      ShouldNotReachHere();
  }
  guarantee(mo != MemNode::unset, "Should select memory ordering");
  return mo;
 }
 MemNode::MemOrd LibraryCallKit::access_kind_to_memord(AccessKind kind) {
  MemNode::MemOrd mo = MemNode::unset;
  switch(kind) {
    case Opaque:
    case Relaxed:  mo = MemNode::unordered; break;
    case Acquire:  mo = MemNode::acquire;   break;
    case Release:  mo = MemNode::release;   break;
    case Volatile: mo = MemNode::seqcst;    break;
    default:
      ShouldNotReachHere();
  }
  guarantee(mo != MemNode::unset, "Should select memory ordering");
  return mo;
 }
 bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
  // Regardless of form, don't allow previous ld/st to move down,
  // then issue acquire, release, or volatile mem_bar.
@ -4636,7 +4148,7 @@ bool LibraryCallKit::inline_unsafe_copyMemory() {
 //------------------------clone_coping-----------------------------------
 // Helper function for inline_native_clone.
-void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark) {
+void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array) {
  assert(obj_size != NULL, "");
  Node* raw_obj = alloc_obj->in(1);
  assert(alloc_obj->is_CheckCastPP() && raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
@ -4656,66 +4168,9 @@ void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, b
  // Copy the fastest available way.
  // TODO: generate fields copies for small objects instead.
  Node* src  = obj;
  Node* dest = alloc_obj;
  Node* size = _gvn.transform(obj_size);
-  // Exclude the header but include array length to copy by 8 bytes words.
+  access_clone(control(), obj, alloc_obj, size, is_array);
  // Can't use base_offset_in_bytes(bt) since basic type is unknown.
  int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
                            instanceOopDesc::base_offset_in_bytes();
  // base_off:
  // 8  - 32-bit VM
  // 12 - 64-bit VM, compressed klass
  // 16 - 64-bit VM, normal klass
  if (base_off % BytesPerLong != 0) {
    assert(UseCompressedClassPointers, "");
    if (is_array) {
      // Exclude length to copy by 8 bytes words.
      base_off += sizeof(int);
    } else {
      // Include klass to copy by 8 bytes words.
      base_off = instanceOopDesc::klass_offset_in_bytes();
    }
    assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
  }
  src  = basic_plus_adr(src,  base_off);
  dest = basic_plus_adr(dest, base_off);
  // Compute the length also, if needed:
  Node* countx = size;
  countx = _gvn.transform(new SubXNode(countx, MakeConX(base_off)));
  countx = _gvn.transform(new URShiftXNode(countx, intcon(LogBytesPerLong) ));
  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
  ArrayCopyNode* ac = ArrayCopyNode::make(this, false, src, NULL, dest, NULL, countx, false, false);
  ac->set_clonebasic();
  Node* n = _gvn.transform(ac);
  if (n == ac) {
    set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
  } else {
    set_all_memory(n);
  }
  // If necessary, emit some card marks afterwards.  (Non-arrays only.)
  if (card_mark) {
    assert(!is_array, "");
    // Put in store barrier for any and all oops we are sticking
    // into this object.  (We could avoid this if we could prove
    // that the object type contains no oop fields at all.)
    Node* no_particular_value = NULL;
    Node* no_particular_field = NULL;
    int raw_adr_idx = Compile::AliasIdxRaw;
    post_barrier(control(),
                 memory(raw_adr_type),
                 alloc_obj,
                 no_particular_field,
                 raw_adr_idx,
                 no_particular_value,
                 T_OBJECT,
                 false);
  }
  // Do not let reads from the cloned object float above the arraycopy.
  if (alloc != NULL) {
@ -4805,9 +4260,6 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
    PhiNode*    result_mem = new PhiNode(result_reg, Type::MEMORY, TypePtr::BOTTOM);
    record_for_igvn(result_reg);
    const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
    int raw_adr_idx = Compile::AliasIdxRaw;
    Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
    if (array_ctl != NULL) {
      // It's an array.
@ -4817,9 +4269,10 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
      Node* obj_size  = NULL;
      Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size);  // no arguments to push
-      if (!use_ReduceInitialCardMarks()) {
+      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
      if (bs->array_copy_requires_gc_barriers(T_OBJECT)) {
        // If it is an oop array, it requires very special treatment,
-        // because card marking is required on each card of the array.
+        // because gc barriers are required when accessing the array.
        Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL);
        if (is_obja != NULL) {
          PreserveJVMState pjvms2(this);
@ -4838,7 +4291,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
          result_mem ->set_req(_objArray_path, reset_memory());
        }
      }
-      // Otherwise, there are no card marks to worry about.
+      // Otherwise, there are no barriers to worry about.
      // (We can dispense with card marks if we know the allocation
      //  comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
      //  causes the non-eden paths to take compensating steps to
@ -4847,7 +4300,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
      //  the object.)
      if (!stopped()) {
-        copy_to_clone(obj, alloc_obj, obj_size, true, false);
+        copy_to_clone(obj, alloc_obj, obj_size, true);
        // Present the results of the copy.
        result_reg->init_req(_array_path, control());
@ -4893,7 +4346,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
      // exception state between multiple Object.clone versions (reexecute=true vs reexecute=false).
      Node* alloc_obj = new_instance(obj_klass, NULL, &obj_size, /*deoptimize_on_exception=*/true);
-      copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks());
+      copy_to_clone(obj, alloc_obj, obj_size, false);
      // Present the results of the slow call.
      result_reg->init_req(_instance_path, control());
@ -6100,21 +5553,23 @@ bool LibraryCallKit::inline_reference_get() {
  Node* reference_obj = null_check_receiver();
  if (stopped()) return true;
  const TypeInstPtr* tinst = _gvn.type(reference_obj)->isa_instptr();
  assert(tinst != NULL, "obj is null");
  assert(tinst->klass()->is_loaded(), "obj is not loaded");
  ciInstanceKlass* referenceKlass = tinst->klass()->as_instance_klass();
  ciField* field = referenceKlass->get_field_by_name(ciSymbol::make("referent"),
                                                     ciSymbol::make("Ljava/lang/Object;"),
                                                     false);
  assert (field != NULL, "undefined field");
  Node* adr = basic_plus_adr(reference_obj, reference_obj, referent_offset);
  const TypePtr* adr_type = C->alias_type(field)->adr_type();
  ciInstanceKlass* klass = env()->Object_klass();
  const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass);
-  Node* no_ctrl = NULL;
+  DecoratorSet decorators = IN_HEAP | ON_WEAK_OOP_REF;
-  Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT, MemNode::unordered);
+  Node* result = access_load_at(reference_obj, adr, adr_type, object_type, T_OBJECT, decorators);
  // Use the pre-barrier to record the value in the referent field
  pre_barrier(false /* do_load */,
              control(),
              NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
              result /* pre_val */,
              T_OBJECT);
  // Add memory barrier to prevent commoning reads from this field
  // across safepoint since GC can change its value.
  insert_mem_bar(Op_MemBarCPUOrder);
@ -6167,20 +5622,13 @@ Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * field
    type = Type::get_const_basic_type(bt);
  }
-  if (support_IRIW_for_not_multiple_copy_atomic_cpu && is_vol) {
+  DecoratorSet decorators = IN_HEAP;
-    insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
+
  }
  // Build the load.
  MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered;
  Node* loadedField = make_load(NULL, adr, type, bt, adr_type, mo, LoadNode::DependsOnlyOnTest, is_vol);
  // If reference is volatile, prevent following memory ops from
  // floating up past the volatile read.  Also prevents commoning
  // another volatile read.
  if (is_vol) {
-    // Memory barrier includes bogus read of value to force load BEFORE membar
+    decorators |= MO_SEQ_CST;
    insert_mem_bar(Op_MemBarAcquire, loadedField);
  }
-  return loadedField;
+
  return access_load_at(fromObj, adr, adr_type, type, bt, decorators);
 }
 Node * LibraryCallKit::field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@ -684,10 +684,12 @@ class PhaseIdealLoop : public PhaseTransform {
  // Mark as post visited
  void set_postvisited( Node *n ) { assert( !is_postvisited( n ), "" ); _preorders[n->_idx] |= 1; }
 public:
  // Set/get control node out.  Set lower bit to distinguish from IdealLoopTree
  // Returns true if "n" is a data node, false if it's a control node.
  bool has_ctrl( Node *n ) const { return ((intptr_t)_nodes[n->_idx]) & 1; }
 private:
  // clear out dead code after build_loop_late
  Node_List _deadlist;
@ -736,6 +738,8 @@ class PhaseIdealLoop : public PhaseTransform {
 public:
  PhaseIterGVN &igvn() const { return _igvn; }
  static bool is_canonical_loop_entry(CountedLoopNode* cl);
  bool has_node( Node* n ) const {
@ -789,7 +793,6 @@ public:
    }
  }
 private:
  Node *get_ctrl_no_update_helper(Node *i) const {
    assert(has_ctrl(i), "should be control, not loop");
    return (Node*)(((intptr_t)_nodes[i->_idx]) & ~1);
@ -822,7 +825,6 @@ private:
  // the 'old_node' with 'new_node'.  Kill old-node.  Add a reference
  // from old_node to new_node to support the lazy update.  Reference
  // replaces loop reference, since that is not needed for dead node.
 public:
  void lazy_update(Node *old_node, Node *new_node) {
    assert(old_node != new_node, "no cycles please");
    // Re-use the side array slot for this node to provide the
@ -856,6 +858,7 @@ private:
  uint *_dom_depth;              // Used for fast LCA test
  GrowableArray<uint>* _dom_stk; // For recomputation of dom depth
 public:
  Node* idom_no_update(Node* d) const {
    return idom_no_update(d->_idx);
  }
@ -911,7 +914,6 @@ private:
  // build the loop tree and perform any requested optimizations
  void build_and_optimize(bool do_split_if, bool skip_loop_opts);
 public:
  // Dominators for the sea of nodes
  void Dominators();
  Node *dom_lca( Node *n1, Node *n2 ) const {
@ -968,6 +970,8 @@ public:
    return (IdealLoopTree*)_nodes[n->_idx];
  }
  IdealLoopTree *ltree_root() const { return _ltree_root; }
  // Is 'n' a (nested) member of 'loop'?
  int is_member( const IdealLoopTree *loop, Node *n ) const {
    return loop->is_member(get_loop(n)); }
--- a/src/hotspot/share/opto/loopopts.cpp
+++ b/src/hotspot/share/opto/loopopts.cpp
@ -23,6 +23,8 @@
 */
 #include "precompiled.hpp"
 #include "gc/shared/barrierSet.hpp"
 #include "gc/shared/c2/barrierSetC2.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "opto/addnode.hpp"
--- a/src/hotspot/share/opto/macro.cpp
+++ b/src/hotspot/share/opto/macro.cpp
@ -227,108 +227,9 @@ void PhaseMacroExpand::extract_call_projections(CallNode *call) {
 }
-// Eliminate a card mark sequence.  p2x is a ConvP2XNode
+void PhaseMacroExpand::eliminate_gc_barrier(Node* p2x) {
-void PhaseMacroExpand::eliminate_card_mark(Node* p2x) {
+  BarrierSetC2 *bs = BarrierSet::barrier_set()->barrier_set_c2();
-  assert(p2x->Opcode() == Op_CastP2X, "ConvP2XNode required");
+  bs->eliminate_gc_barrier(this, p2x);
  if (!UseG1GC) {
    // vanilla/CMS post barrier
    Node *shift = p2x->unique_out();
    Node *addp = shift->unique_out();
    for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
      Node *mem = addp->last_out(j);
      if (UseCondCardMark && mem->is_Load()) {
        assert(mem->Opcode() == Op_LoadB, "unexpected code shape");
        // The load is checking if the card has been written so
        // replace it with zero to fold the test.
        _igvn.replace_node(mem, intcon(0));
        continue;
      }
      assert(mem->is_Store(), "store required");
      _igvn.replace_node(mem, mem->in(MemNode::Memory));
    }
  }
 #if INCLUDE_G1GC
  else {
    // G1 pre/post barriers
    assert(p2x->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
    // It could be only one user, URShift node, in Object.clone() intrinsic
    // but the new allocation is passed to arraycopy stub and it could not
    // be scalar replaced. So we don't check the case.
    // An other case of only one user (Xor) is when the value check for NULL
    // in G1 post barrier is folded after CCP so the code which used URShift
    // is removed.
    // Take Region node before eliminating post barrier since it also
    // eliminates CastP2X node when it has only one user.
    Node* this_region = p2x->in(0);
    assert(this_region != NULL, "");
    // Remove G1 post barrier.
    // Search for CastP2X->Xor->URShift->Cmp path which
    // checks if the store done to a different from the value's region.
    // And replace Cmp with #0 (false) to collapse G1 post barrier.
    Node* xorx = p2x->find_out_with(Op_XorX);
    if (xorx != NULL) {
      Node* shift = xorx->unique_out();
      Node* cmpx = shift->unique_out();
      assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
      cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
      "missing region check in G1 post barrier");
      _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
      // Remove G1 pre barrier.
      // Search "if (marking != 0)" check and set it to "false".
      // There is no G1 pre barrier if previous stored value is NULL
      // (for example, after initialization).
      if (this_region->is_Region() && this_region->req() == 3) {
        int ind = 1;
        if (!this_region->in(ind)->is_IfFalse()) {
          ind = 2;
        }
        if (this_region->in(ind)->is_IfFalse() &&
            this_region->in(ind)->in(0)->Opcode() == Op_If) {
          Node* bol = this_region->in(ind)->in(0)->in(1);
          assert(bol->is_Bool(), "");
          cmpx = bol->in(1);
          if (bol->as_Bool()->_test._test == BoolTest::ne &&
              cmpx->is_Cmp() && cmpx->in(2) == intcon(0) &&
              cmpx->in(1)->is_Load()) {
            Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
            const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
            if (adr->is_AddP() && adr->in(AddPNode::Base) == top() &&
                adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
                adr->in(AddPNode::Offset) == MakeConX(marking_offset)) {
              _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
            }
          }
        }
      }
    } else {
      assert(!GraphKit::use_ReduceInitialCardMarks(), "can only happen with card marking");
      // This is a G1 post barrier emitted by the Object.clone() intrinsic.
      // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card
      // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier.
      Node* shift = p2x->find_out_with(Op_URShiftX);
      assert(shift != NULL, "missing G1 post barrier");
      Node* addp = shift->unique_out();
      Node* load = addp->find_out_with(Op_LoadB);
      assert(load != NULL, "missing G1 post barrier");
      Node* cmpx = load->unique_out();
      assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
             cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
             "missing card value check in G1 post barrier");
      _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
      // There is no G1 pre barrier in this case
    }
    // Now CastP2X can be removed since it is used only on dead path
    // which currently still alive until igvn optimize it.
    assert(p2x->outcnt() == 0 || p2x->unique_out()->Opcode() == Op_URShiftX, "");
    _igvn.replace_node(p2x, top());
  }
 #endif // INCLUDE_G1GC
 }
 // Search for a memory operation for the specified memory slice.
@ -1029,7 +930,7 @@ void PhaseMacroExpand::process_users_of_allocation(CallNode *alloc) {
              disconnect_projections(membar_after->as_MemBar(), _igvn);
            }
          } else {
-            eliminate_card_mark(n);
+            eliminate_gc_barrier(n);
          }
          k -= (oc2 - use->outcnt());
        }
@ -1062,7 +963,7 @@ void PhaseMacroExpand::process_users_of_allocation(CallNode *alloc) {
        _igvn._worklist.push(ac);
      } else {
-        eliminate_card_mark(use);
+        eliminate_gc_barrier(use);
      }
      j -= (oc1 - res->outcnt());
    }
@ -2801,5 +2702,6 @@ bool PhaseMacroExpand::expand_macro_nodes() {
  _igvn.set_delay_transform(false);
  _igvn.optimize();
  if (C->failing())  return true;
-  return false;
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
  return bs->expand_macro_nodes(this);
 }
--- a/src/hotspot/share/opto/macro.hpp
+++ b/src/hotspot/share/opto/macro.hpp
@ -37,11 +37,8 @@ class PhaseMacroExpand : public Phase {
 private:
  PhaseIterGVN &_igvn;
 public:
  // Helper methods roughly modeled after GraphKit:
  Node* top()                   const { return C->top(); }
  Node* intcon(jint con)        const { return _igvn.intcon(con); }
  Node* longcon(jlong con)      const { return _igvn.longcon(con); }
  Node* makecon(const Type *t)  const { return _igvn.makecon(t); }
  Node* basic_plus_adr(Node* base, int offset) {
    return (offset == 0)? base: basic_plus_adr(base, MakeConX(offset));
  }
@ -66,6 +63,7 @@ private:
  Node* make_store(Node* ctl, Node* mem, Node* base, int offset,
                   Node* value, BasicType bt);
 private:
  // projections extracted from a call node
  ProjNode *_fallthroughproj;
  ProjNode *_fallthroughcatchproj;
@ -94,7 +92,7 @@ private:
  bool scalar_replacement(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints_done);
  void process_users_of_allocation(CallNode *alloc);
-  void eliminate_card_mark(Node *cm);
+  void eliminate_gc_barrier(Node *p2x);
  void mark_eliminated_box(Node* box, Node* obj);
  void mark_eliminated_locking_nodes(AbstractLockNode *alock);
  bool eliminate_locking_node(AbstractLockNode *alock);
@ -209,6 +207,14 @@ public:
  void eliminate_macro_nodes();
  bool expand_macro_nodes();
  PhaseIterGVN &igvn() const { return _igvn; }
  // Members accessed from BarrierSetC2
  void replace_node(Node* source, Node* target) { _igvn.replace_node(source, target); }
  Node* intcon(jint con)        const { return _igvn.intcon(con); }
  Node* longcon(jlong con)      const { return _igvn.longcon(con); }
  Node* makecon(const Type *t)  const { return _igvn.makecon(t); }
  Node* top()                   const { return C->top(); }
 };
 #endif // SHARE_VM_OPTO_MACRO_HPP
--- a/src/hotspot/share/opto/macroArrayCopy.cpp
+++ b/src/hotspot/share/opto/macroArrayCopy.cpp
@ -550,9 +550,9 @@ Node* PhaseMacroExpand::generate_arraycopy(ArrayCopyNode *ac, AllocateArrayNode*
    }
    // At this point we know we do not need type checks on oop stores.
-    // Let's see if we need card marks:
+    BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
-    if (alloc != NULL && GraphKit::use_ReduceInitialCardMarks()) {
+    if (alloc != NULL && !bs->array_copy_requires_gc_barriers(copy_type)) {
-      // If we do not need card marks, copy using the jint or jlong stub.
+      // If we do not need gc barriers, copy using the jint or jlong stub.
      copy_type = LP64_ONLY(UseCompressedOops ? T_INT : T_LONG) NOT_LP64(T_INT);
      assert(type2aelembytes(basic_elem_type) == type2aelembytes(copy_type),
             "sizes agree");
--- a/src/hotspot/share/opto/node.cpp
+++ b/src/hotspot/share/opto/node.cpp
@ -23,6 +23,8 @@
 */
 #include "precompiled.hpp"
 #include "gc/shared/barrierSet.hpp"
 #include "gc/shared/c2/barrierSetC2.hpp"
 #include "libadt/vectset.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
@ -37,6 +39,7 @@
 #include "opto/regmask.hpp"
 #include "opto/type.hpp"
 #include "utilities/copy.hpp"
 #include "utilities/macros.hpp"
 class RegMask;
 // #include "phase.hpp"
@ -499,6 +502,8 @@ Node *Node::clone() const {
    C->add_macro_node(n);
  if (is_expensive())
    C->add_expensive_node(n);
  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
  bs->register_potential_barrier_node(n);
  // If the cloned node is a range check dependent CastII, add it to the list.
  CastIINode* cast = n->isa_CastII();
  if (cast != NULL && cast->has_range_check()) {
@ -622,6 +627,8 @@ void Node::destruct() {
  if (is_SafePoint()) {
    as_SafePoint()->delete_replaced_nodes();
  }
  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
  bs->unregister_potential_barrier_node(this);
 #ifdef ASSERT
  // We will not actually delete the storage, but we'll make the node unusable.
  *(address*)this = badAddress;  // smash the C++ vtbl, probably
@ -1361,6 +1368,8 @@ static void kill_dead_code( Node *dead, PhaseIterGVN *igvn ) {
      if (dead->Opcode() == Op_Opaque4) {
        igvn->C->remove_range_check_cast(dead);
      }
      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
      bs->unregister_potential_barrier_node(dead);
      igvn->C->record_dead_node(dead->_idx);
      // Kill all inputs to the dead guy
      for (uint i=0; i < dead->req(); i++) {
@ -1379,6 +1388,8 @@ static void kill_dead_code( Node *dead, PhaseIterGVN *igvn ) {
            // The restriction (outcnt() <= 2) is the same as in set_req_X()
            // and remove_globally_dead_node().
            igvn->add_users_to_worklist( n );
          } else {
            BarrierSet::barrier_set()->barrier_set_c2()->enqueue_useful_gc_barrier(igvn->_worklist, n);
          }
        }
      }
--- a/src/hotspot/share/opto/parse2.cpp
+++ b/src/hotspot/share/opto/parse2.cpp
@ -51,29 +51,60 @@ extern int explicit_null_checks_inserted,
 #endif
 //---------------------------------array_load----------------------------------
-void Parse::array_load(BasicType elem_type) {
+void Parse::array_load(BasicType bt) {
-  const Type* elem = Type::TOP;
+  const Type* elemtype = Type::TOP;
-  Node* adr = array_addressing(elem_type, 0, &elem);
+  bool big_val = bt == T_DOUBLE || bt == T_LONG;
  Node* adr = array_addressing(bt, 0, &elemtype);
  if (stopped())  return;     // guaranteed null or range check
-  dec_sp(2);                  // Pop array and index
+
-  const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
+  pop();                      // index (already used)
-  Node* ld = make_load(control(), adr, elem, elem_type, adr_type, MemNode::unordered);
+  Node* array = pop();        // the array itself
-  push(ld);
+
  if (elemtype == TypeInt::BOOL) {
    bt = T_BOOLEAN;
  } else if (bt == T_OBJECT) {
    elemtype = _gvn.type(array)->is_aryptr()->elem()->make_oopptr();
  }
  const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(bt);
  Node* ld = access_load_at(array, adr, adr_type, elemtype, bt,
                            IN_HEAP | IN_HEAP_ARRAY | C2_CONTROL_DEPENDENT_LOAD);
  if (big_val) {
    push_pair(ld);
  } else {
    push(ld);
  }
 }
 //--------------------------------array_store----------------------------------
-void Parse::array_store(BasicType elem_type) {
+void Parse::array_store(BasicType bt) {
-  const Type* elem = Type::TOP;
+  const Type* elemtype = Type::TOP;
-  Node* adr = array_addressing(elem_type, 1, &elem);
+  bool big_val = bt == T_DOUBLE || bt == T_LONG;
  Node* adr = array_addressing(bt, big_val ? 2 : 1, &elemtype);
  if (stopped())  return;     // guaranteed null or range check
-  Node* val = pop();
+  if (bt == T_OBJECT) {
-  dec_sp(2);                  // Pop array and index
+    array_store_check();
  const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
  if (elem == TypeInt::BOOL) {
    elem_type = T_BOOLEAN;
  }
-  store_to_memory(control(), adr, val, elem_type, adr_type, StoreNode::release_if_reference(elem_type));
+  Node* val;                  // Oop to store
  if (big_val) {
    val = pop_pair();
  } else {
    val = pop();
  }
  pop();                      // index (already used)
  Node* array = pop();        // the array itself
  if (elemtype == TypeInt::BOOL) {
    bt = T_BOOLEAN;
  } else if (bt == T_OBJECT) {
    elemtype = _gvn.type(array)->is_aryptr()->elem()->make_oopptr();
  }
  const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(bt);
  access_store_at(control(), array, adr, adr_type, val, elemtype, bt, MO_UNORDERED | IN_HEAP | IN_HEAP_ARRAY);
 }
@ -2141,61 +2172,23 @@ void Parse::do_one_bytecode() {
    break;
  }
-  case Bytecodes::_baload: array_load(T_BYTE);   break;
+  case Bytecodes::_baload:  array_load(T_BYTE);    break;
-  case Bytecodes::_caload: array_load(T_CHAR);   break;
+  case Bytecodes::_caload:  array_load(T_CHAR);    break;
-  case Bytecodes::_iaload: array_load(T_INT);    break;
+  case Bytecodes::_iaload:  array_load(T_INT);     break;
-  case Bytecodes::_saload: array_load(T_SHORT);  break;
+  case Bytecodes::_saload:  array_load(T_SHORT);   break;
-  case Bytecodes::_faload: array_load(T_FLOAT);  break;
+  case Bytecodes::_faload:  array_load(T_FLOAT);   break;
-  case Bytecodes::_aaload: array_load(T_OBJECT); break;
+  case Bytecodes::_aaload:  array_load(T_OBJECT);  break;
-  case Bytecodes::_laload: {
+  case Bytecodes::_laload:  array_load(T_LONG);    break;
-    a = array_addressing(T_LONG, 0);
+  case Bytecodes::_daload:  array_load(T_DOUBLE);  break;
-    if (stopped())  return;     // guaranteed null or range check
+  case Bytecodes::_bastore: array_store(T_BYTE);   break;
-    dec_sp(2);                  // Pop array and index
+  case Bytecodes::_castore: array_store(T_CHAR);   break;
-    push_pair(make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS, MemNode::unordered));
+  case Bytecodes::_iastore: array_store(T_INT);    break;
-    break;
+  case Bytecodes::_sastore: array_store(T_SHORT);  break;
-  }
+  case Bytecodes::_fastore: array_store(T_FLOAT);  break;
-  case Bytecodes::_daload: {
+  case Bytecodes::_aastore: array_store(T_OBJECT); break;
-    a = array_addressing(T_DOUBLE, 0);
+  case Bytecodes::_lastore: array_store(T_LONG);   break;
-    if (stopped())  return;     // guaranteed null or range check
+  case Bytecodes::_dastore: array_store(T_DOUBLE); break;
-    dec_sp(2);                  // Pop array and index
+
    push_pair(make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES, MemNode::unordered));
    break;
  }
  case Bytecodes::_bastore: array_store(T_BYTE);  break;
  case Bytecodes::_castore: array_store(T_CHAR);  break;
  case Bytecodes::_iastore: array_store(T_INT);   break;
  case Bytecodes::_sastore: array_store(T_SHORT); break;
  case Bytecodes::_fastore: array_store(T_FLOAT); break;
  case Bytecodes::_aastore: {
    d = array_addressing(T_OBJECT, 1);
    if (stopped())  return;     // guaranteed null or range check
    array_store_check();
    c = pop();                  // Oop to store
    b = pop();                  // index (already used)
    a = pop();                  // the array itself
    const TypeOopPtr* elemtype  = _gvn.type(a)->is_aryptr()->elem()->make_oopptr();
    const TypeAryPtr* adr_type = TypeAryPtr::OOPS;
    Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT,
                                     StoreNode::release_if_reference(T_OBJECT));
    break;
  }
  case Bytecodes::_lastore: {
    a = array_addressing(T_LONG, 2);
    if (stopped())  return;     // guaranteed null or range check
    c = pop_pair();
    dec_sp(2);                  // Pop array and index
    store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS, MemNode::unordered);
    break;
  }
  case Bytecodes::_dastore: {
    a = array_addressing(T_DOUBLE, 2);
    if (stopped())  return;     // guaranteed null or range check
    c = pop_pair();
    dec_sp(2);                  // Pop array and index
    c = dstore_rounding(c);
    store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES, MemNode::unordered);
    break;
  }
  case Bytecodes::_getfield:
    do_getfield();
    break;
--- a/src/hotspot/share/opto/parse3.cpp
+++ b/src/hotspot/share/opto/parse3.cpp
@ -177,7 +177,12 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {
  bool must_assert_null = false;
-  if( bt == T_OBJECT ) {
+  DecoratorSet decorators = IN_HEAP;
  decorators |= is_vol ? MO_SEQ_CST : MO_UNORDERED;
  bool is_obj = bt == T_OBJECT || bt == T_ARRAY;
  if (is_obj) {
    if (!field->type()->is_loaded()) {
      type = TypeInstPtr::BOTTOM;
      must_assert_null = true;
@ -198,14 +203,8 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {
  } else {
    type = Type::get_const_basic_type(bt);
  }
-  if (support_IRIW_for_not_multiple_copy_atomic_cpu && field->is_volatile()) {
+
-    insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
+  Node* ld = access_load_at(obj, adr, adr_type, type, bt, decorators);
  }
  // Build the load.
  //
  MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered;
  bool needs_atomic_access = is_vol || AlwaysAtomicAccesses;
  Node* ld = make_load(NULL, adr, type, bt, adr_type, mo, LoadNode::DependsOnlyOnTest, needs_atomic_access);
  // Adjust Java stack
  if (type2size[bt] == 1)
@ -236,22 +235,10 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {
    null_assert(peek());
    set_bci(iter().cur_bci()); // put it back
  }
  // If reference is volatile, prevent following memory ops from
  // floating up past the volatile read.  Also prevents commoning
  // another volatile read.
  if (field->is_volatile()) {
    // Memory barrier includes bogus read of value to force load BEFORE membar
    insert_mem_bar(Op_MemBarAcquire, ld);
  }
 }
 void Parse::do_put_xxx(Node* obj, ciField* field, bool is_field) {
  bool is_vol = field->is_volatile();
  // If reference is volatile, prevent following memory ops from
  // floating down past the volatile write.  Also prevents commoning
  // another volatile read.
  if (is_vol)  insert_mem_bar(Op_MemBarRelease);
  // Compute address and memory type.
  int offset = field->offset_in_bytes();
@ -260,71 +247,50 @@ void Parse::do_put_xxx(Node* obj, ciField* field, bool is_field) {
  BasicType bt = field->layout_type();
  // Value to be stored
  Node* val = type2size[bt] == 1 ? pop() : pop_pair();
  // Round doubles before storing
  if (bt == T_DOUBLE)  val = dstore_rounding(val);
-  // Conservatively release stores of object references.
+  DecoratorSet decorators = IN_HEAP;
-  const MemNode::MemOrd mo =
+  decorators |= is_vol ? MO_SEQ_CST : MO_UNORDERED;
-    is_vol ?
+
-    // Volatile fields need releasing stores.
+  bool is_obj = bt == T_OBJECT || bt == T_ARRAY;
    MemNode::release :
    // Non-volatile fields also need releasing stores if they hold an
    // object reference, because the object reference might point to
    // a freshly created object.
    StoreNode::release_if_reference(bt);
  // Store the value.
-  Node* store;
+  const Type* field_type;
-  if (bt == T_OBJECT) {
+  if (!field->type()->is_loaded()) {
-    const TypeOopPtr* field_type;
+    field_type = TypeInstPtr::BOTTOM;
    if (!field->type()->is_loaded()) {
      field_type = TypeInstPtr::BOTTOM;
    } else {
      field_type = TypeOopPtr::make_from_klass(field->type()->as_klass());
    }
    store = store_oop_to_object(control(), obj, adr, adr_type, val, field_type, bt, mo);
  } else {
-    bool needs_atomic_access = is_vol || AlwaysAtomicAccesses;
+    if (is_obj) {
-    store = store_to_memory(control(), adr, val, bt, adr_type, mo, needs_atomic_access);
+      field_type = TypeOopPtr::make_from_klass(field->type()->as_klass());
-  }
+    } else {
-
+      field_type = Type::BOTTOM;
  // If reference is volatile, prevent following volatiles ops from
  // floating up before the volatile write.
  if (is_vol) {
    // If not multiple copy atomic, we do the MemBarVolatile before the load.
    if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
      insert_mem_bar(Op_MemBarVolatile); // Use fat membar
    }
  }
  access_store_at(control(), obj, adr, adr_type, val, field_type, bt, decorators);
  if (is_field) {
    // Remember we wrote a volatile field.
    // For not multiple copy atomic cpu (ppc64) a barrier should be issued
    // in constructors which have such stores. See do_exits() in parse1.cpp.
-    if (is_field) {
+    if (is_vol) {
      set_wrote_volatile(true);
    }
  }
  if (is_field) {
    set_wrote_fields(true);
  }
-  // If the field is final, the rules of Java say we are in <init> or <clinit>.
+    // If the field is final, the rules of Java say we are in <init> or <clinit>.
-  // Note the presence of writes to final non-static fields, so that we
+    // Note the presence of writes to final non-static fields, so that we
-  // can insert a memory barrier later on to keep the writes from floating
+    // can insert a memory barrier later on to keep the writes from floating
-  // out of the constructor.
+    // out of the constructor.
-  // Any method can write a @Stable field; insert memory barriers after those also.
+    // Any method can write a @Stable field; insert memory barriers after those also.
  if (is_field && (field->is_final() || field->is_stable())) {
    if (field->is_final()) {
-        set_wrote_final(true);
+      set_wrote_final(true);
      if (AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) {
        // Preserve allocation ptr to create precedent edge to it in membar
        // generated on exit from constructor.
        // Can't bind stable with its allocation, only record allocation for final field.
        set_alloc_with_final(obj);
      }
    }
    if (field->is_stable()) {
-        set_wrote_stable(true);
+      set_wrote_stable(true);
    }
    // Preserve allocation ptr to create precedent edge to it in membar
    // generated on exit from constructor.
    // Can't bind stable with its allocation, only record allocation for final field.
    if (field->is_final() && AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) {
      set_alloc_with_final(obj);
    }
  }
 }
@ -385,7 +351,7 @@ Node* Parse::expand_multianewarray(ciArrayKlass* array_klass, Node* *lengths, in
      Node*    elem   = expand_multianewarray(array_klass_1, &lengths[1], ndimensions-1, nargs);
      intptr_t offset = header + ((intptr_t)i << LogBytesPerHeapOop);
      Node*    eaddr  = basic_plus_adr(array, offset);
-      store_oop_to_array(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT, MemNode::unordered);
+      access_store_at(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT, IN_HEAP | IN_HEAP_ARRAY);
    }
  }
  return array;
--- a/src/hotspot/share/opto/phaseX.cpp
+++ b/src/hotspot/share/opto/phaseX.cpp
@ -23,6 +23,8 @@
 */
 #include "precompiled.hpp"
 #include "gc/shared/barrierSet.hpp"
 #include "gc/shared/c2/barrierSetC2.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "opto/block.hpp"
@ -36,6 +38,7 @@
 #include "opto/phaseX.hpp"
 #include "opto/regalloc.hpp"
 #include "opto/rootnode.hpp"
 #include "utilities/macros.hpp"
 //=============================================================================
 #define NODE_HASH_MINIMUM_SIZE    255
@ -939,6 +942,9 @@ PhaseIterGVN::PhaseIterGVN( PhaseGVN *gvn ) : PhaseGVN(gvn),
        n->is_Mem() )
      add_users_to_worklist(n);
  }
  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
  bs->add_users_to_worklist(&_worklist);
 }
 /**
@ -1369,6 +1375,8 @@ void PhaseIterGVN::remove_globally_dead_node( Node *dead ) {
                }
                assert(!(i < imax), "sanity");
              }
            } else {
              BarrierSet::barrier_set()->barrier_set_c2()->enqueue_useful_gc_barrier(_worklist, in);
            }
            if (ReduceFieldZeroing && dead->is_Load() && i == MemNode::Memory &&
                in->is_Proj() && in->in(0) != NULL && in->in(0)->is_Initialize()) {
@ -1424,6 +1432,8 @@ void PhaseIterGVN::remove_globally_dead_node( Node *dead ) {
      if (dead->Opcode() == Op_Opaque4) {
        C->remove_opaque4_node(dead);
      }
      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
      bs->unregister_potential_barrier_node(dead);
    }
  } // while (_stack.is_nonempty())
 }
--- a/src/hotspot/share/opto/runtime.cpp
+++ b/src/hotspot/share/opto/runtime.cpp
@ -95,8 +95,6 @@ address OptoRuntime::_multianewarray3_Java                        = NULL;
 address OptoRuntime::_multianewarray4_Java                        = NULL;
 address OptoRuntime::_multianewarray5_Java                        = NULL;
 address OptoRuntime::_multianewarrayN_Java                        = NULL;
 address OptoRuntime::_g1_wb_pre_Java                              = NULL;
 address OptoRuntime::_g1_wb_post_Java                             = NULL;
 address OptoRuntime::_vtable_must_compile_Java                    = NULL;
 address OptoRuntime::_complete_monitor_locking_Java               = NULL;
 address OptoRuntime::_monitor_notify_Java                         = NULL;
@ -141,10 +139,6 @@ bool OptoRuntime::generate(ciEnv* env) {
  gen(env, _multianewarray4_Java           , multianewarray4_Type         , multianewarray4_C               ,    0 , true , false, false);
  gen(env, _multianewarray5_Java           , multianewarray5_Type         , multianewarray5_C               ,    0 , true , false, false);
  gen(env, _multianewarrayN_Java           , multianewarrayN_Type         , multianewarrayN_C               ,    0 , true , false, false);
 #if INCLUDE_G1GC
  gen(env, _g1_wb_pre_Java                 , g1_wb_pre_Type               , SharedRuntime::g1_wb_pre        ,    0 , false, false, false);
  gen(env, _g1_wb_post_Java                , g1_wb_post_Type              , SharedRuntime::g1_wb_post       ,    0 , false, false, false);
 #endif // INCLUDE_G1GC
  gen(env, _complete_monitor_locking_Java  , complete_monitor_enter_Type  , SharedRuntime::complete_monitor_locking_C, 0, false, false, false);
  gen(env, _monitor_notify_Java            , monitor_notify_Type          , monitor_notify_C                ,    0 , false, false, false);
  gen(env, _monitor_notifyAll_Java         , monitor_notify_Type          , monitor_notifyAll_C             ,    0 , false, false, false);
@ -544,33 +538,6 @@ const TypeFunc *OptoRuntime::multianewarrayN_Type() {
  return TypeFunc::make(domain, range);
 }
 const TypeFunc *OptoRuntime::g1_wb_pre_Type() {
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
  // create result type (range)
  fields = TypeTuple::fields(0);
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
  return TypeFunc::make(domain, range);
 }
 const TypeFunc *OptoRuntime::g1_wb_post_Type() {
  const Type **fields = TypeTuple::fields(2);
  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL;  // Card addr
  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL;  // thread
  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
  // create result type (range)
  fields = TypeTuple::fields(0);
  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
  return TypeFunc::make(domain, range);
 }
 const TypeFunc *OptoRuntime::uncommon_trap_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(1);
--- a/src/hotspot/share/opto/runtime.hpp
+++ b/src/hotspot/share/opto/runtime.hpp
@ -141,8 +141,6 @@ class OptoRuntime : public AllStatic {
  static address _multianewarray4_Java;
  static address _multianewarray5_Java;
  static address _multianewarrayN_Java;
  static address _g1_wb_pre_Java;
  static address _g1_wb_post_Java;
  static address _vtable_must_compile_Java;
  static address _complete_monitor_locking_Java;
  static address _rethrow_Java;
@ -170,8 +168,6 @@ class OptoRuntime : public AllStatic {
  static void multianewarray4_C(Klass* klass, int len1, int len2, int len3, int len4, JavaThread *thread);
  static void multianewarray5_C(Klass* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread);
  static void multianewarrayN_C(Klass* klass, arrayOopDesc* dims, JavaThread *thread);
  static void g1_wb_pre_C(oopDesc* orig, JavaThread* thread);
  static void g1_wb_post_C(void* card_addr, JavaThread* thread);
 public:
  // Slow-path Locking and Unlocking
@ -223,8 +219,6 @@ private:
  static address multianewarray4_Java()                  { return _multianewarray4_Java; }
  static address multianewarray5_Java()                  { return _multianewarray5_Java; }
  static address multianewarrayN_Java()                  { return _multianewarrayN_Java; }
  static address g1_wb_pre_Java()                        { return _g1_wb_pre_Java; }
  static address g1_wb_post_Java()                       { return _g1_wb_post_Java; }
  static address vtable_must_compile_stub()              { return _vtable_must_compile_Java; }
  static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java; }
  static address monitor_notify_Java()                   { return _monitor_notify_Java; }
@ -257,8 +251,6 @@ private:
  static const TypeFunc* multianewarray4_Type(); // multianewarray
  static const TypeFunc* multianewarray5_Type(); // multianewarray
  static const TypeFunc* multianewarrayN_Type(); // multianewarray
  static const TypeFunc* g1_wb_pre_Type();
  static const TypeFunc* g1_wb_post_Type();
  static const TypeFunc* complete_monitor_enter_Type();
  static const TypeFunc* complete_monitor_exit_Type();
  static const TypeFunc* monitor_notify_Type();