8202377: Modularize C2 GC barriers

Reviewed-by: neliasso, roland
2025-09-18 01:54:47 +02:00 · 2018-05-18 14:51:06 +02:00 · 2018-05-18 14:51:06 +02:00 · 53ec88908c
commit 53ec88908c
parent 2aa9d028c7
31 changed files with 2648 additions and 1832 deletions
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp
@ -0,0 +1,772 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#include "gc/g1/g1BarrierSet.hpp"
+#include "gc/g1/g1CardTable.hpp"
+#include "gc/g1/g1ThreadLocalData.hpp"
+#include "gc/g1/heapRegion.hpp"
+#include "opto/arraycopynode.hpp"
+#include "opto/graphKit.hpp"
+#include "opto/idealKit.hpp"
+#include "opto/macro.hpp"
+#include "opto/type.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "utilities/macros.hpp"
+
+const TypeFunc *G1BarrierSetC2::g1_wb_pre_Type() {
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
+  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc *G1BarrierSetC2::g1_wb_post_Type() {
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL;  // Card addr
+  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL;  // thread
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+#define __ ideal.
+/*
+ * Determine if the G1 pre-barrier can be removed. The pre-barrier is
+ * required by SATB to make sure all objects live at the start of the
+ * marking are kept alive, all reference updates need to any previous
+ * reference stored before writing.
+ *
+ * If the previous value is NULL there is no need to save the old value.
+ * References that are NULL are filtered during runtime by the barrier
+ * code to avoid unnecessary queuing.
+ *
+ * However in the case of newly allocated objects it might be possible to
+ * prove that the reference about to be overwritten is NULL during compile
+ * time and avoid adding the barrier code completely.
+ *
+ * The compiler needs to determine that the object in which a field is about
+ * to be written is newly allocated, and that no prior store to the same field
+ * has happened since the allocation.
+ *
+ * Returns true if the pre-barrier can be removed
+ */
+bool G1BarrierSetC2::g1_can_remove_pre_barrier(GraphKit* kit,
+                                               PhaseTransform* phase,
+                                               Node* adr,
+                                               BasicType bt,
+                                               uint adr_idx) const {
+  intptr_t offset = 0;
+  Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
+  AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase);
+
+  if (offset == Type::OffsetBot) {
+    return false; // cannot unalias unless there are precise offsets
+  }
+
+  if (alloc == NULL) {
+    return false; // No allocation found
+  }
+
+  intptr_t size_in_bytes = type2aelembytes(bt);
+
+  Node* mem = kit->memory(adr_idx); // start searching here...
+
+  for (int cnt = 0; cnt < 50; cnt++) {
+
+    if (mem->is_Store()) {
+
+      Node* st_adr = mem->in(MemNode::Address);
+      intptr_t st_offset = 0;
+      Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
+
+      if (st_base == NULL) {
+        break; // inscrutable pointer
+      }
+
+      // Break we have found a store with same base and offset as ours so break
+      if (st_base == base && st_offset == offset) {
+        break;
+      }
+
+      if (st_offset != offset && st_offset != Type::OffsetBot) {
+        const int MAX_STORE = BytesPerLong;
+        if (st_offset >= offset + size_in_bytes ||
+            st_offset <= offset - MAX_STORE ||
+            st_offset <= offset - mem->as_Store()->memory_size()) {
+          // Success:  The offsets are provably independent.
+          // (You may ask, why not just test st_offset != offset and be done?
+          // The answer is that stores of different sizes can co-exist
+          // in the same sequence of RawMem effects.  We sometimes initialize
+          // a whole 'tile' of array elements with a single jint or jlong.)
+          mem = mem->in(MemNode::Memory);
+          continue; // advance through independent store memory
+        }
+      }
+
+      if (st_base != base
+          && MemNode::detect_ptr_independence(base, alloc, st_base,
+                                              AllocateNode::Ideal_allocation(st_base, phase),
+                                              phase)) {
+        // Success:  The bases are provably independent.
+        mem = mem->in(MemNode::Memory);
+        continue; // advance through independent store memory
+      }
+    } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
+
+      InitializeNode* st_init = mem->in(0)->as_Initialize();
+      AllocateNode* st_alloc = st_init->allocation();
+
+      // Make sure that we are looking at the same allocation site.
+      // The alloc variable is guaranteed to not be null here from earlier check.
+      if (alloc == st_alloc) {
+        // Check that the initialization is storing NULL so that no previous store
+        // has been moved up and directly write a reference
+        Node* captured_store = st_init->find_captured_store(offset,
+                                                            type2aelembytes(T_OBJECT),
+                                                            phase);
+        if (captured_store == NULL || captured_store == st_init->zero_memory()) {
+          return true;
+        }
+      }
+    }
+
+    // Unless there is an explicit 'continue', we must bail out here,
+    // because 'mem' is an inscrutable memory state (e.g., a call).
+    break;
+  }
+
+  return false;
+}
+
+// G1 pre/post barriers
+void G1BarrierSetC2::pre_barrier(GraphKit* kit,
+                                 bool do_load,
+                                 Node* ctl,
+                                 Node* obj,
+                                 Node* adr,
+                                 uint alias_idx,
+                                 Node* val,
+                                 const TypeOopPtr* val_type,
+                                 Node* pre_val,
+                                 BasicType bt) const {
+  // Some sanity checks
+  // Note: val is unused in this routine.
+
+  if (do_load) {
+    // We need to generate the load of the previous value
+    assert(obj != NULL, "must have a base");
+    assert(adr != NULL, "where are loading from?");
+    assert(pre_val == NULL, "loaded already?");
+    assert(val_type != NULL, "need a type");
+
+    if (use_ReduceInitialCardMarks()
+        && g1_can_remove_pre_barrier(kit, &kit->gvn(), adr, bt, alias_idx)) {
+      return;
+    }
+
+  } else {
+    // In this case both val_type and alias_idx are unused.
+    assert(pre_val != NULL, "must be loaded already");
+    // Nothing to be done if pre_val is null.
+    if (pre_val->bottom_type() == TypePtr::NULL_PTR) return;
+    assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
+  }
+  assert(bt == T_OBJECT, "or we shouldn't be here");
+
+  IdealKit ideal(kit, true);
+
+  Node* tls = __ thread(); // ThreadLocalStorage
+
+  Node* no_base = __ top();
+  Node* zero  = __ ConI(0);
+  Node* zeroX = __ ConX(0);
+
+  float likely  = PROB_LIKELY(0.999);
+  float unlikely  = PROB_UNLIKELY(0.999);
+
+  BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
+  assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width");
+
+  // Offsets into the thread
+  const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
+  const int index_offset   = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
+  const int buffer_offset  = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
+
+  // Now the actual pointers into the thread
+  Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
+  Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
+  Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));
+
+  // Now some of the values
+  Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
+
+  // if (!marking)
+  __ if_then(marking, BoolTest::ne, zero, unlikely); {
+    BasicType index_bt = TypeX_X->basic_type();
+    assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size.");
+    Node* index   = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw);
+
+    if (do_load) {
+      // load original value
+      // alias_idx correct??
+      pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx);
+    }
+
+    // if (pre_val != NULL)
+    __ if_then(pre_val, BoolTest::ne, kit->null()); {
+      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+
+      // is the queue for this thread full?
+      __ if_then(index, BoolTest::ne, zeroX, likely); {
+
+        // decrement the index
+        Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
+
+        // Now get the buffer location we will log the previous value into and store it
+        Node *log_addr = __ AddP(no_base, buffer, next_index);
+        __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered);
+        // update the index
+        __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered);
+
+      } __ else_(); {
+
+        // logging buffer is full, call the runtime
+        const TypeFunc *tf = g1_wb_pre_Type();
+        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls);
+      } __ end_if();  // (!index)
+    } __ end_if();  // (pre_val != NULL)
+  } __ end_if();  // (!marking)
+
+  // Final sync IdealKit and GraphKit.
+  kit->final_sync(ideal);
+}
+
+/*
+ * G1 similar to any GC with a Young Generation requires a way to keep track of
+ * references from Old Generation to Young Generation to make sure all live
+ * objects are found. G1 also requires to keep track of object references
+ * between different regions to enable evacuation of old regions, which is done
+ * as part of mixed collections. References are tracked in remembered sets and
+ * is continuously updated as reference are written to with the help of the
+ * post-barrier.
+ *
+ * To reduce the number of updates to the remembered set the post-barrier
+ * filters updates to fields in objects located in the Young Generation,
+ * the same region as the reference, when the NULL is being written or
+ * if the card is already marked as dirty by an earlier write.
+ *
+ * Under certain circumstances it is possible to avoid generating the
+ * post-barrier completely if it is possible during compile time to prove
+ * the object is newly allocated and that no safepoint exists between the
+ * allocation and the store.
+ *
+ * In the case of slow allocation the allocation code must handle the barrier
+ * as part of the allocation in the case the allocated object is not located
+ * in the nursery, this would happen for humongous objects. This is similar to
+ * how CMS is required to handle this case, see the comments for the method
+ * CollectedHeap::new_deferred_store_barrier and OptoRuntime::new_deferred_store_barrier.
+ * A deferred card mark is required for these objects and handled in the above
+ * mentioned methods.
+ *
+ * Returns true if the post barrier can be removed
+ */
+bool G1BarrierSetC2::g1_can_remove_post_barrier(GraphKit* kit,
+                                                PhaseTransform* phase, Node* store,
+                                                Node* adr) const {
+  intptr_t      offset = 0;
+  Node*         base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
+  AllocateNode* alloc  = AllocateNode::Ideal_allocation(base, phase);
+
+  if (offset == Type::OffsetBot) {
+    return false; // cannot unalias unless there are precise offsets
+  }
+
+  if (alloc == NULL) {
+     return false; // No allocation found
+  }
+
+  // Start search from Store node
+  Node* mem = store->in(MemNode::Control);
+  if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
+
+    InitializeNode* st_init = mem->in(0)->as_Initialize();
+    AllocateNode*  st_alloc = st_init->allocation();
+
+    // Make sure we are looking at the same allocation
+    if (alloc == st_alloc) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+//
+// Update the card table and add card address to the queue
+//
+void G1BarrierSetC2::g1_mark_card(GraphKit* kit,
+                                  IdealKit& ideal,
+                                  Node* card_adr,
+                                  Node* oop_store,
+                                  uint oop_alias_idx,
+                                  Node* index,
+                                  Node* index_adr,
+                                  Node* buffer,
+                                  const TypeFunc* tf) const {
+  Node* zero  = __ ConI(0);
+  Node* zeroX = __ ConX(0);
+  Node* no_base = __ top();
+  BasicType card_bt = T_BYTE;
+  // Smash zero into card. MUST BE ORDERED WRT TO STORE
+  __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw);
+
+  //  Now do the queue work
+  __ if_then(index, BoolTest::ne, zeroX); {
+
+    Node* next_index = kit->gvn().transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
+    Node* log_addr = __ AddP(no_base, buffer, next_index);
+
+    // Order, see storeCM.
+    __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered);
+    __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered);
+
+  } __ else_(); {
+    __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
+  } __ end_if();
+
+}
+
+void G1BarrierSetC2::post_barrier(GraphKit* kit,
+                                  Node* ctl,
+                                  Node* oop_store,
+                                  Node* obj,
+                                  Node* adr,
+                                  uint alias_idx,
+                                  Node* val,
+                                  BasicType bt,
+                                  bool use_precise) const {
+  // If we are writing a NULL then we need no post barrier
+
+  if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
+    // Must be NULL
+    const Type* t = val->bottom_type();
+    assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL");
+    // No post barrier if writing NULLx
+    return;
+  }
+
+  if (use_ReduceInitialCardMarks() && obj == kit->just_allocated_object(kit->control())) {
+    // We can skip marks on a freshly-allocated object in Eden.
+    // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp.
+    // That routine informs GC to take appropriate compensating steps,
+    // upon a slow-path allocation, so as to make this card-mark
+    // elision safe.
+    return;
+  }
+
+  if (use_ReduceInitialCardMarks()
+      && g1_can_remove_post_barrier(kit, &kit->gvn(), oop_store, adr)) {
+    return;
+  }
+
+  if (!use_precise) {
+    // All card marks for a (non-array) instance are in one place:
+    adr = obj;
+  }
+  // (Else it's an array (or unknown), and we want more precise card marks.)
+  assert(adr != NULL, "");
+
+  IdealKit ideal(kit, true);
+
+  Node* tls = __ thread(); // ThreadLocalStorage
+
+  Node* no_base = __ top();
+  float unlikely  = PROB_UNLIKELY(0.999);
+  Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val());
+  Node* dirty_card = __ ConI((jint)G1CardTable::dirty_card_val());
+  Node* zeroX = __ ConX(0);
+
+  const TypeFunc *tf = g1_wb_post_Type();
+
+  // Offsets into the thread
+  const int index_offset  = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
+  const int buffer_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
+
+  // Pointers into the thread
+
+  Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
+  Node* index_adr =  __ AddP(no_base, tls, __ ConX(index_offset));
+
+  // Now some values
+  // Use ctrl to avoid hoisting these values past a safepoint, which could
+  // potentially reset these fields in the JavaThread.
+  Node* index  = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw);
+  Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
+
+  // Convert the store obj pointer to an int prior to doing math on it
+  // Must use ctrl to prevent "integerized oop" existing across safepoint
+  Node* cast =  __ CastPX(__ ctrl(), adr);
+
+  // Divide pointer by card size
+  Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) );
+
+  // Combine card table base and card offset
+  Node* card_adr = __ AddP(no_base, byte_map_base_node(kit), card_offset );
+
+  // If we know the value being stored does it cross regions?
+
+  if (val != NULL) {
+    // Does the store cause us to cross regions?
+
+    // Should be able to do an unsigned compare of region_size instead of
+    // and extra shift. Do we have an unsigned compare??
+    // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);
+    Node* xor_res =  __ URShiftX ( __ XorX( cast,  __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes));
+
+    // if (xor_res == 0) same region so skip
+    __ if_then(xor_res, BoolTest::ne, zeroX); {
+
+      // No barrier if we are storing a NULL
+      __ if_then(val, BoolTest::ne, kit->null(), unlikely); {
+
+        // Ok must mark the card if not already dirty
+
+        // load the original value of the card
+        Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
+
+        __ if_then(card_val, BoolTest::ne, young_card); {
+          kit->sync_kit(ideal);
+          kit->insert_mem_bar(Op_MemBarVolatile, oop_store);
+          __ sync_kit(kit);
+
+          Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
+          __ if_then(card_val_reload, BoolTest::ne, dirty_card); {
+            g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
+          } __ end_if();
+        } __ end_if();
+      } __ end_if();
+    } __ end_if();
+  } else {
+    // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks.
+    // We don't need a barrier here if the destination is a newly allocated object
+    // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden
+    // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()).
+    assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
+    Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
+    __ if_then(card_val, BoolTest::ne, young_card); {
+      g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
+    } __ end_if();
+  }
+
+  // Final sync IdealKit and GraphKit.
+  kit->final_sync(ideal);
+}
+
+// Helper that guards and inserts a pre-barrier.
+void G1BarrierSetC2::insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset,
+                                        Node* pre_val, bool need_mem_bar) const {
+  // We could be accessing the referent field of a reference object. If so, when G1
+  // is enabled, we need to log the value in the referent field in an SATB buffer.
+  // This routine performs some compile time filters and generates suitable
+  // runtime filters that guard the pre-barrier code.
+  // Also add memory barrier for non volatile load from the referent field
+  // to prevent commoning of loads across safepoint.
+
+  // Some compile time checks.
+
+  // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
+  const TypeX* otype = offset->find_intptr_t_type();
+  if (otype != NULL && otype->is_con() &&
+      otype->get_con() != java_lang_ref_Reference::referent_offset) {
+    // Constant offset but not the reference_offset so just return
+    return;
+  }
+
+  // We only need to generate the runtime guards for instances.
+  const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
+  if (btype != NULL) {
+    if (btype->isa_aryptr()) {
+      // Array type so nothing to do
+      return;
+    }
+
+    const TypeInstPtr* itype = btype->isa_instptr();
+    if (itype != NULL) {
+      // Can the klass of base_oop be statically determined to be
+      // _not_ a sub-class of Reference and _not_ Object?
+      ciKlass* klass = itype->klass();
+      if ( klass->is_loaded() &&
+          !klass->is_subtype_of(kit->env()->Reference_klass()) &&
+          !kit->env()->Object_klass()->is_subtype_of(klass)) {
+        return;
+      }
+    }
+  }
+
+  // The compile time filters did not reject base_oop/offset so
+  // we need to generate the following runtime filters
+  //
+  // if (offset == java_lang_ref_Reference::_reference_offset) {
+  //   if (instance_of(base, java.lang.ref.Reference)) {
+  //     pre_barrier(_, pre_val, ...);
+  //   }
+  // }
+
+  float likely   = PROB_LIKELY(  0.999);
+  float unlikely = PROB_UNLIKELY(0.999);
+
+  IdealKit ideal(kit);
+
+  Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
+
+  __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
+      // Update graphKit memory and control from IdealKit.
+      kit->sync_kit(ideal);
+
+      Node* ref_klass_con = kit->makecon(TypeKlassPtr::make(kit->env()->Reference_klass()));
+      Node* is_instof = kit->gen_instanceof(base_oop, ref_klass_con);
+
+      // Update IdealKit memory and control from graphKit.
+      __ sync_kit(kit);
+
+      Node* one = __ ConI(1);
+      // is_instof == 0 if base_oop == NULL
+      __ if_then(is_instof, BoolTest::eq, one, unlikely); {
+
+        // Update graphKit from IdeakKit.
+        kit->sync_kit(ideal);
+
+        // Use the pre-barrier to record the value in the referent field
+        pre_barrier(kit, false /* do_load */,
+                    __ ctrl(),
+                    NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
+                    pre_val /* pre_val */,
+                    T_OBJECT);
+        if (need_mem_bar) {
+          // Add memory barrier to prevent commoning reads from this field
+          // across safepoint since GC can change its value.
+          kit->insert_mem_bar(Op_MemBarCPUOrder);
+        }
+        // Update IdealKit from graphKit.
+        __ sync_kit(kit);
+
+      } __ end_if(); // _ref_type != ref_none
+  } __ end_if(); // offset == referent_offset
+
+  // Final sync IdealKit and GraphKit.
+  kit->final_sync(ideal);
+}
+
+#undef __
+
+Node* G1BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
+  DecoratorSet decorators = access.decorators();
+  GraphKit* kit = access.kit();
+
+  Node* adr = access.addr().node();
+  Node* obj = access.base();
+
+  bool mismatched = (decorators & C2_MISMATCHED) != 0;
+  bool unknown = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
+  bool is_unordered = (decorators & MO_UNORDERED) != 0;
+  bool need_cpu_mem_bar = !is_unordered || mismatched || !on_heap;
+
+  Node* offset = adr->is_AddP() ? adr->in(AddPNode::Offset) : kit->top();
+  Node* load = CardTableBarrierSetC2::load_at_resolved(access, val_type);
+
+  // If we are reading the value of the referent field of a Reference
+  // object (either by using Unsafe directly or through reflection)
+  // then, if G1 is enabled, we need to record the referent in an
+  // SATB log buffer using the pre-barrier mechanism.
+  // Also we need to add memory barrier to prevent commoning reads
+  // from this field across safepoint since GC can change its value.
+  bool need_read_barrier = on_heap && (on_weak ||
+                                       (unknown && offset != kit->top() && obj != kit->top()));
+
+  if (!access.is_oop() || !need_read_barrier) {
+    return load;
+  }
+
+  if (on_weak) {
+    // Use the pre-barrier to record the value in the referent field
+    pre_barrier(kit, false /* do_load */,
+                kit->control(),
+                NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
+                load /* pre_val */, T_OBJECT);
+    // Add memory barrier to prevent commoning reads from this field
+    // across safepoint since GC can change its value.
+    kit->insert_mem_bar(Op_MemBarCPUOrder);
+  } else if (unknown) {
+    // We do not require a mem bar inside pre_barrier if need_mem_bar
+    // is set: the barriers would be emitted by us.
+    insert_pre_barrier(kit, obj, offset, load, !need_cpu_mem_bar);
+  }
+
+  return load;
+}
+
+bool G1BarrierSetC2::is_gc_barrier_node(Node* node) const {
+  if (CardTableBarrierSetC2::is_gc_barrier_node(node)) {
+    return true;
+  }
+  if (node->Opcode() != Op_CallLeaf) {
+    return false;
+  }
+  CallLeafNode *call = node->as_CallLeaf();
+  if (call->_name == NULL) {
+    return false;
+  }
+
+  return strcmp(call->_name, "g1_wb_pre") == 0 || strcmp(call->_name, "g1_wb_post") == 0;
+}
+
+void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
+  assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
+  assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
+  // It could be only one user, URShift node, in Object.clone() intrinsic
+  // but the new allocation is passed to arraycopy stub and it could not
+  // be scalar replaced. So we don't check the case.
+
+  // An other case of only one user (Xor) is when the value check for NULL
+  // in G1 post barrier is folded after CCP so the code which used URShift
+  // is removed.
+
+  // Take Region node before eliminating post barrier since it also
+  // eliminates CastP2X node when it has only one user.
+  Node* this_region = node->in(0);
+  assert(this_region != NULL, "");
+
+  // Remove G1 post barrier.
+
+  // Search for CastP2X->Xor->URShift->Cmp path which
+  // checks if the store done to a different from the value's region.
+  // And replace Cmp with #0 (false) to collapse G1 post barrier.
+  Node* xorx = node->find_out_with(Op_XorX);
+  if (xorx != NULL) {
+    Node* shift = xorx->unique_out();
+    Node* cmpx = shift->unique_out();
+    assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
+    cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
+    "missing region check in G1 post barrier");
+    macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
+
+    // Remove G1 pre barrier.
+
+    // Search "if (marking != 0)" check and set it to "false".
+    // There is no G1 pre barrier if previous stored value is NULL
+    // (for example, after initialization).
+    if (this_region->is_Region() && this_region->req() == 3) {
+      int ind = 1;
+      if (!this_region->in(ind)->is_IfFalse()) {
+        ind = 2;
+      }
+      if (this_region->in(ind)->is_IfFalse() &&
+          this_region->in(ind)->in(0)->Opcode() == Op_If) {
+        Node* bol = this_region->in(ind)->in(0)->in(1);
+        assert(bol->is_Bool(), "");
+        cmpx = bol->in(1);
+        if (bol->as_Bool()->_test._test == BoolTest::ne &&
+            cmpx->is_Cmp() && cmpx->in(2) == macro->intcon(0) &&
+            cmpx->in(1)->is_Load()) {
+          Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
+          const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
+          if (adr->is_AddP() && adr->in(AddPNode::Base) == macro->top() &&
+              adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
+              adr->in(AddPNode::Offset) == macro->MakeConX(marking_offset)) {
+            macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
+          }
+        }
+      }
+    }
+  } else {
+    assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
+    // This is a G1 post barrier emitted by the Object.clone() intrinsic.
+    // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card
+    // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier.
+    Node* shift = node->find_out_with(Op_URShiftX);
+    assert(shift != NULL, "missing G1 post barrier");
+    Node* addp = shift->unique_out();
+    Node* load = addp->find_out_with(Op_LoadB);
+    assert(load != NULL, "missing G1 post barrier");
+    Node* cmpx = load->unique_out();
+    assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
+           cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
+           "missing card value check in G1 post barrier");
+    macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ));
+    // There is no G1 pre barrier in this case
+  }
+  // Now CastP2X can be removed since it is used only on dead path
+  // which currently still alive until igvn optimize it.
+  assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, "");
+  macro->replace_node(node, macro->top());
+}
+
+Node* G1BarrierSetC2::step_over_gc_barrier(Node* c) const {
+  if (!use_ReduceInitialCardMarks() &&
+      c != NULL && c->is_Region() && c->req() == 3) {
+    for (uint i = 1; i < c->req(); i++) {
+      if (c->in(i) != NULL && c->in(i)->is_Region() &&
+          c->in(i)->req() == 3) {
+        Node* r = c->in(i);
+        for (uint j = 1; j < r->req(); j++) {
+          if (r->in(j) != NULL && r->in(j)->is_Proj() &&
+              r->in(j)->in(0) != NULL &&
+              r->in(j)->in(0)->Opcode() == Op_CallLeaf &&
+              r->in(j)->in(0)->as_Call()->entry_point() == CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post)) {
+            Node* call = r->in(j)->in(0);
+            c = c->in(i == 1 ? 2 : 1);
+            if (c != NULL) {
+              c = c->in(0);
+              if (c != NULL) {
+                c = c->in(0);
+                assert(call->in(0) == NULL ||
+                       call->in(0)->in(0) == NULL ||
+                       call->in(0)->in(0)->in(0) == NULL ||
+                       call->in(0)->in(0)->in(0)->in(0) == NULL ||
+                       call->in(0)->in(0)->in(0)->in(0)->in(0) == NULL ||
+                       c == call->in(0)->in(0)->in(0)->in(0)->in(0), "bad barrier shape");
+                return c;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return c;
+}
--- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
+++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp
@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_SHARED_C2_G1BARRIERSETC2_HPP
+#define SHARE_GC_SHARED_C2_G1BARRIERSETC2_HPP
+
+#include "gc/shared/c2/cardTableBarrierSetC2.hpp"
+
+class PhaseTransform;
+class Type;
+class TypeFunc;
+
+class G1BarrierSetC2: public CardTableBarrierSetC2 {
+protected:
+  virtual void pre_barrier(GraphKit* kit,
+                           bool do_load,
+                           Node* ctl,
+                           Node* obj,
+                           Node* adr,
+                           uint adr_idx,
+                           Node* val,
+                           const TypeOopPtr* val_type,
+                           Node* pre_val,
+                           BasicType bt) const;
+
+  virtual void post_barrier(GraphKit* kit,
+                            Node* ctl,
+                            Node* store,
+                            Node* obj,
+                            Node* adr,
+                            uint adr_idx,
+                            Node* val,
+                            BasicType bt,
+                            bool use_precise) const;
+
+  bool g1_can_remove_pre_barrier(GraphKit* kit,
+                                 PhaseTransform* phase,
+                                 Node* adr,
+                                 BasicType bt,
+                                 uint adr_idx) const;
+
+  bool g1_can_remove_post_barrier(GraphKit* kit,
+                                  PhaseTransform* phase, Node* store,
+                                  Node* adr) const;
+
+  void g1_mark_card(GraphKit* kit,
+                    IdealKit& ideal,
+                    Node* card_adr,
+                    Node* oop_store,
+                    uint oop_alias_idx,
+                    Node* index,
+                    Node* index_adr,
+                    Node* buffer,
+                    const TypeFunc* tf) const;
+
+  // Helper for unsafe accesses, that may or may not be on the referent field.
+  // Generates the guards that check whether the result of
+  // Unsafe.getObject should be recorded in an SATB log buffer.
+  void insert_pre_barrier(GraphKit* kit, Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar) const;
+
+  static const TypeFunc* g1_wb_pre_Type();
+  static const TypeFunc* g1_wb_post_Type();
+
+  virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
+
+ public:
+  virtual bool is_gc_barrier_node(Node* node) const;
+  virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
+  virtual Node* step_over_gc_barrier(Node* c) const;
+};
+
+#endif // SHARE_GC_SHARED_C2_G1BARRIERSETC2_HPP
--- a/src/hotspot/share/gc/g1/g1BarrierSet.cpp
+++ b/src/hotspot/share/gc/g1/g1BarrierSet.cpp
@ -34,14 +34,19 @@
 #include "oops/access.inline.hpp"
 #include "oops/compressedOops.inline.hpp"
 #include "oops/oop.inline.hpp"
+#include "runtime/interfaceSupport.inline.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/thread.inline.hpp"
 #include "utilities/macros.hpp"
 #ifdef COMPILER1
 #include "gc/g1/c1/g1BarrierSetC1.hpp"
 #endif
+#ifdef COMPILER2
+#include "gc/g1/c2/g1BarrierSetC2.hpp"
+#endif

 class G1BarrierSetC1;
+class G1BarrierSetC2;

 SATBMarkQueueSet G1BarrierSet::_satb_mark_queue_set;
 DirtyCardQueueSet G1BarrierSet::_dirty_card_queue_set;
@ -49,6 +54,7 @@ DirtyCardQueueSet G1BarrierSet::_dirty_card_queue_set;
 G1BarrierSet::G1BarrierSet(G1CardTable* card_table) :
  CardTableBarrierSet(make_barrier_set_assembler<G1BarrierSetAssembler>(),
                      make_barrier_set_c1<G1BarrierSetC1>(),
+                      make_barrier_set_c2<G1BarrierSetC2>(),
                      card_table,
                      BarrierSet::FakeRtti(BarrierSet::G1BarrierSet)) {}

--- a/src/hotspot/share/gc/shared/barrierSet.hpp
+++ b/src/hotspot/share/gc/shared/barrierSet.hpp
@ -35,6 +35,7 @@

 class BarrierSetAssembler;
 class BarrierSetC1;
+class BarrierSetC2;
 class JavaThread;

 // This class provides the interface between a barrier implementation and
@ -70,6 +71,7 @@ private:
  FakeRtti _fake_rtti;
  BarrierSetAssembler* _barrier_set_assembler;
  BarrierSetC1* _barrier_set_c1;
+  BarrierSetC2* _barrier_set_c2;

 public:
  // Metafunction mapping a class derived from BarrierSet to the
@ -92,10 +94,12 @@ public:
 protected:
  BarrierSet(BarrierSetAssembler* barrier_set_assembler,
             BarrierSetC1* barrier_set_c1,
+             BarrierSetC2* barrier_set_c2,
             const FakeRtti& fake_rtti) :
    _fake_rtti(fake_rtti),
    _barrier_set_assembler(barrier_set_assembler),
-    _barrier_set_c1(barrier_set_c1) {}
+    _barrier_set_c1(barrier_set_c1),
+    _barrier_set_c2(barrier_set_c2) {}
  ~BarrierSet() { }

  template <class BarrierSetAssemblerT>
@ -108,6 +112,11 @@ protected:
    return COMPILER1_PRESENT(new BarrierSetC1T()) NOT_COMPILER1(NULL);
  }

+  template <class BarrierSetC2T>
+  BarrierSetC2* make_barrier_set_c2() {
+    return COMPILER2_PRESENT(new BarrierSetC2T()) NOT_COMPILER2(NULL);
+  }
+
 public:
  // Support for optimizing compilers to call the barrier set on slow path allocations
  // that did not enter a TLAB. Used for e.g. ReduceInitialCardMarks.
@ -138,6 +147,11 @@ public:
    return _barrier_set_c1;
  }

+  BarrierSetC2* barrier_set_c2() {
+    assert(_barrier_set_c2 != NULL, "should be set");
+    return _barrier_set_c2;
+  }
+
  // The AccessBarrier of a BarrierSet subclass is called by the Access API
  // (cf. oops/access.hpp) to perform decorated accesses. GC implementations
  // may override these default access operations by declaring an
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp
@ -0,0 +1,588 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
+#include "opto/arraycopynode.hpp"
+#include "opto/graphKit.hpp"
+#include "opto/idealKit.hpp"
+#include "opto/narrowptrnode.hpp"
+#include "utilities/macros.hpp"
+
+// By default this is a no-op.
+void BarrierSetC2::resolve_address(C2Access& access) const { }
+
+void* C2Access::barrier_set_state() const {
+  return _kit->barrier_set_state();
+}
+
+bool C2Access::needs_cpu_membar() const {
+  bool mismatched = (_decorators & C2_MISMATCHED) != 0;
+  bool is_unordered = (_decorators & MO_UNORDERED) != 0;
+  bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
+  bool on_heap = (_decorators & IN_HEAP) != 0;
+
+  bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
+  bool is_read = (_decorators & C2_READ_ACCESS) != 0;
+  bool is_atomic = is_read && is_write;
+
+  if (is_atomic) {
+    // Atomics always need to be wrapped in CPU membars
+    return true;
+  }
+
+  if (anonymous) {
+    // We will need memory barriers unless we can determine a unique
+    // alias category for this reference.  (Note:  If for some reason
+    // the barriers get omitted and the unsafe reference begins to "pollute"
+    // the alias analysis of the rest of the graph, either Compile::can_alias
+    // or Compile::must_alias will throw a diagnostic assert.)
+    if (!on_heap || !is_unordered || (mismatched && !_addr.type()->isa_aryptr())) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
+  DecoratorSet decorators = access.decorators();
+  GraphKit* kit = access.kit();
+
+  bool mismatched = (decorators & C2_MISMATCHED) != 0;
+  bool unaligned = (decorators & C2_UNALIGNED) != 0;
+  bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
+
+  bool in_root = (decorators & IN_ROOT) != 0;
+  assert(!in_root, "not supported yet");
+
+  if (access.type() == T_DOUBLE) {
+    Node* new_val = kit->dstore_rounding(val.node());
+    val.set_node(new_val);
+  }
+
+  MemNode::MemOrd mo = access.mem_node_mo();
+
+  Node* store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), access.type(),
+                                     access.addr().type(), mo, requires_atomic_access, unaligned, mismatched);
+  access.set_raw_access(store);
+  return store;
+}
+
+Node* BarrierSetC2::load_at_resolved(C2Access& access, const Type* val_type) const {
+  DecoratorSet decorators = access.decorators();
+  GraphKit* kit = access.kit();
+
+  Node* adr = access.addr().node();
+  const TypePtr* adr_type = access.addr().type();
+
+  bool mismatched = (decorators & C2_MISMATCHED) != 0;
+  bool requires_atomic_access = (decorators & MO_UNORDERED) == 0;
+  bool unaligned = (decorators & C2_UNALIGNED) != 0;
+  bool control_dependent = (decorators & C2_CONTROL_DEPENDENT_LOAD) != 0;
+  bool pinned = (decorators & C2_PINNED_LOAD) != 0;
+
+  bool in_root = (decorators & IN_ROOT) != 0;
+  assert(!in_root, "not supported yet");
+
+  MemNode::MemOrd mo = access.mem_node_mo();
+  LoadNode::ControlDependency dep = pinned ? LoadNode::Pinned : LoadNode::DependsOnlyOnTest;
+  Node* control = control_dependent ? kit->control() : NULL;
+
+  Node* load = kit->make_load(control, adr, val_type, access.type(), adr_type, mo,
+                              dep, requires_atomic_access, unaligned, mismatched);
+  access.set_raw_access(load);
+
+  return load;
+}
+
+class C2AccessFence: public StackObj {
+  C2Access& _access;
+
+public:
+  C2AccessFence(C2Access& access) :
+    _access(access) {
+    GraphKit* kit = access.kit();
+    DecoratorSet decorators = access.decorators();
+
+    bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
+    bool is_read = (decorators & C2_READ_ACCESS) != 0;
+    bool is_atomic = is_read && is_write;
+
+    bool is_volatile = (decorators & MO_SEQ_CST) != 0;
+    bool is_release = (decorators & MO_RELEASE) != 0;
+
+    if (is_atomic) {
+      // Memory-model-wise, a LoadStore acts like a little synchronized
+      // block, so needs barriers on each side.  These don't translate
+      // into actual barriers on most machines, but we still need rest of
+      // compiler to respect ordering.
+      if (is_release) {
+        kit->insert_mem_bar(Op_MemBarRelease);
+      } else if (is_volatile) {
+        if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+          kit->insert_mem_bar(Op_MemBarVolatile);
+        } else {
+          kit->insert_mem_bar(Op_MemBarRelease);
+        }
+      }
+    } else if (is_write) {
+      // If reference is volatile, prevent following memory ops from
+      // floating down past the volatile write.  Also prevents commoning
+      // another volatile read.
+      if (is_volatile || is_release) {
+        kit->insert_mem_bar(Op_MemBarRelease);
+      }
+    } else {
+      // Memory barrier to prevent normal and 'unsafe' accesses from
+      // bypassing each other.  Happens after null checks, so the
+      // exception paths do not take memory state from the memory barrier,
+      // so there's no problems making a strong assert about mixing users
+      // of safe & unsafe memory.
+      if (is_volatile && support_IRIW_for_not_multiple_copy_atomic_cpu) {
+        kit->insert_mem_bar(Op_MemBarVolatile);
+      }
+    }
+
+    if (access.needs_cpu_membar()) {
+      kit->insert_mem_bar(Op_MemBarCPUOrder);
+    }
+
+    if (is_atomic) {
+      // 4984716: MemBars must be inserted before this
+      //          memory node in order to avoid a false
+      //          dependency which will confuse the scheduler.
+      access.set_memory();
+    }
+  }
+
+  ~C2AccessFence() {
+    GraphKit* kit = _access.kit();
+    DecoratorSet decorators = _access.decorators();
+
+    bool is_write = (decorators & C2_WRITE_ACCESS) != 0;
+    bool is_read = (decorators & C2_READ_ACCESS) != 0;
+    bool is_atomic = is_read && is_write;
+
+    bool is_volatile = (decorators & MO_SEQ_CST) != 0;
+    bool is_acquire = (decorators & MO_ACQUIRE) != 0;
+
+    // If reference is volatile, prevent following volatiles ops from
+    // floating up before the volatile access.
+    if (_access.needs_cpu_membar()) {
+      kit->insert_mem_bar(Op_MemBarCPUOrder);
+    }
+
+    if (is_atomic) {
+      if (is_acquire || is_volatile) {
+        kit->insert_mem_bar(Op_MemBarAcquire);
+      }
+    } else if (is_write) {
+      // If not multiple copy atomic, we do the MemBarVolatile before the load.
+      if (is_volatile && !support_IRIW_for_not_multiple_copy_atomic_cpu) {
+        kit->insert_mem_bar(Op_MemBarVolatile); // Use fat membar
+      }
+    } else {
+      if (is_volatile || is_acquire) {
+        kit->insert_mem_bar(Op_MemBarAcquire, _access.raw_access());
+      }
+    }
+  }
+
+};
+
+Node* BarrierSetC2::store_at(C2Access& access, C2AccessValue& val) const {
+  C2AccessFence fence(access);
+  resolve_address(access);
+  return store_at_resolved(access, val);
+}
+
+Node* BarrierSetC2::load_at(C2Access& access, const Type* val_type) const {
+  C2AccessFence fence(access);
+  resolve_address(access);
+  return load_at_resolved(access, val_type);
+}
+
+MemNode::MemOrd C2Access::mem_node_mo() const {
+  bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
+  bool is_read = (_decorators & C2_READ_ACCESS) != 0;
+  if ((_decorators & MO_SEQ_CST) != 0) {
+    if (is_write && is_read) {
+      // For atomic operations
+      return MemNode::seqcst;
+    } else if (is_write) {
+      return MemNode::release;
+    } else {
+      assert(is_read, "what else?");
+      return MemNode::acquire;
+    }
+  } else if ((_decorators & MO_RELEASE) != 0) {
+    return MemNode::release;
+  } else if ((_decorators & MO_ACQUIRE) != 0) {
+    return MemNode::acquire;
+  } else if (is_write) {
+    // Volatile fields need releasing stores.
+    // Non-volatile fields also need releasing stores if they hold an
+    // object reference, because the object reference might point to
+    // a freshly created object.
+    // Conservatively release stores of object references.
+    return StoreNode::release_if_reference(_type);
+  } else {
+    return MemNode::unordered;
+  }
+}
+
+void C2Access::fixup_decorators() {
+  bool default_mo = (_decorators & MO_DECORATOR_MASK) == 0;
+  bool is_unordered = (_decorators & MO_UNORDERED) != 0 || default_mo;
+  bool anonymous = (_decorators & C2_UNSAFE_ACCESS) != 0;
+
+  bool is_read = (_decorators & C2_READ_ACCESS) != 0;
+  bool is_write = (_decorators & C2_WRITE_ACCESS) != 0;
+
+  if (AlwaysAtomicAccesses && is_unordered) {
+    _decorators &= ~MO_DECORATOR_MASK; // clear the MO bits
+    _decorators |= MO_RELAXED; // Force the MO_RELAXED decorator with AlwaysAtomicAccess
+  }
+
+  _decorators = AccessInternal::decorator_fixup(_decorators);
+
+  if (is_read && !is_write && anonymous) {
+    // To be valid, unsafe loads may depend on other conditions than
+    // the one that guards them: pin the Load node
+    _decorators |= C2_CONTROL_DEPENDENT_LOAD;
+    _decorators |= C2_PINNED_LOAD;
+    const TypePtr* adr_type = _addr.type();
+    Node* adr = _addr.node();
+    if (!needs_cpu_membar() && adr_type->isa_instptr()) {
+      assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null");
+      intptr_t offset = Type::OffsetBot;
+      AddPNode::Ideal_base_and_offset(adr, &_kit->gvn(), offset);
+      if (offset >= 0) {
+        int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->klass()->layout_helper());
+        if (offset < s) {
+          // Guaranteed to be a valid access, no need to pin it
+          _decorators ^= C2_CONTROL_DEPENDENT_LOAD;
+          _decorators ^= C2_PINNED_LOAD;
+        }
+      }
+    }
+  }
+}
+
+//--------------------------- atomic operations---------------------------------
+
+static void pin_atomic_op(C2AtomicAccess& access) {
+  if (!access.needs_pinning()) {
+    return;
+  }
+  // SCMemProjNodes represent the memory state of a LoadStore. Their
+  // main role is to prevent LoadStore nodes from being optimized away
+  // when their results aren't used.
+  GraphKit* kit = access.kit();
+  Node* load_store = access.raw_access();
+  assert(load_store != NULL, "must pin atomic op");
+  Node* proj = kit->gvn().transform(new SCMemProjNode(load_store));
+  kit->set_memory(proj, access.alias_idx());
+}
+
+void C2AtomicAccess::set_memory() {
+  Node *mem = _kit->memory(_alias_idx);
+  _memory = mem;
+}
+
+Node* BarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val,
+                                                   Node* new_val, const Type* value_type) const {
+  GraphKit* kit = access.kit();
+  MemNode::MemOrd mo = access.mem_node_mo();
+  Node* mem = access.memory();
+
+  Node* adr = access.addr().node();
+  const TypePtr* adr_type = access.addr().type();
+
+  Node* load_store = NULL;
+
+  if (access.is_oop()) {
+#ifdef _LP64
+    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
+      Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
+      Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
+      load_store = kit->gvn().transform(new CompareAndExchangeNNode(kit->control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo));
+    } else
+#endif
+    {
+      load_store = kit->gvn().transform(new CompareAndExchangePNode(kit->control(), mem, adr, new_val, expected_val, adr_type, value_type->is_oopptr(), mo));
+    }
+  } else {
+    switch (access.type()) {
+      case T_BYTE: {
+        load_store = kit->gvn().transform(new CompareAndExchangeBNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
+        break;
+      }
+      case T_SHORT: {
+        load_store = kit->gvn().transform(new CompareAndExchangeSNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
+        break;
+      }
+      case T_INT: {
+        load_store = kit->gvn().transform(new CompareAndExchangeINode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
+        break;
+      }
+      case T_LONG: {
+        load_store = kit->gvn().transform(new CompareAndExchangeLNode(kit->control(), mem, adr, new_val, expected_val, adr_type, mo));
+        break;
+      }
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  access.set_raw_access(load_store);
+  pin_atomic_op(access);
+
+#ifdef _LP64
+  if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
+    return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
+  }
+#endif
+
+  return load_store;
+}
+
+Node* BarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val,
+                                                    Node* new_val, const Type* value_type) const {
+  GraphKit* kit = access.kit();
+  DecoratorSet decorators = access.decorators();
+  MemNode::MemOrd mo = access.mem_node_mo();
+  Node* mem = access.memory();
+  bool is_weak_cas = (decorators & C2_WEAK_CMPXCHG) != 0;
+  Node* load_store = NULL;
+  Node* adr = access.addr().node();
+
+  if (access.is_oop()) {
+#ifdef _LP64
+    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
+      Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
+      Node *oldval_enc = kit->gvn().transform(new EncodePNode(expected_val, expected_val->bottom_type()->make_narrowoop()));
+      if (is_weak_cas) {
+        load_store = kit->gvn().transform(new WeakCompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
+      } else {
+        load_store = kit->gvn().transform(new CompareAndSwapNNode(kit->control(), mem, adr, newval_enc, oldval_enc, mo));
+      }
+    } else
+#endif
+    {
+      if (is_weak_cas) {
+        load_store = kit->gvn().transform(new WeakCompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
+      } else {
+        load_store = kit->gvn().transform(new CompareAndSwapPNode(kit->control(), mem, adr, new_val, expected_val, mo));
+      }
+    }
+  } else {
+    switch(access.type()) {
+      case T_BYTE: {
+        if (is_weak_cas) {
+          load_store = kit->gvn().transform(new WeakCompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo));
+        } else {
+          load_store = kit->gvn().transform(new CompareAndSwapBNode(kit->control(), mem, adr, new_val, expected_val, mo));
+        }
+        break;
+      }
+      case T_SHORT: {
+        if (is_weak_cas) {
+          load_store = kit->gvn().transform(new WeakCompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo));
+        } else {
+          load_store = kit->gvn().transform(new CompareAndSwapSNode(kit->control(), mem, adr, new_val, expected_val, mo));
+        }
+        break;
+      }
+      case T_INT: {
+        if (is_weak_cas) {
+          load_store = kit->gvn().transform(new WeakCompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo));
+        } else {
+          load_store = kit->gvn().transform(new CompareAndSwapINode(kit->control(), mem, adr, new_val, expected_val, mo));
+        }
+        break;
+      }
+      case T_LONG: {
+        if (is_weak_cas) {
+          load_store = kit->gvn().transform(new WeakCompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo));
+        } else {
+          load_store = kit->gvn().transform(new CompareAndSwapLNode(kit->control(), mem, adr, new_val, expected_val, mo));
+        }
+        break;
+      }
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  access.set_raw_access(load_store);
+  pin_atomic_op(access);
+
+  return load_store;
+}
+
+Node* BarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
+  GraphKit* kit = access.kit();
+  Node* mem = access.memory();
+  Node* adr = access.addr().node();
+  const TypePtr* adr_type = access.addr().type();
+  Node* load_store = NULL;
+
+  if (access.is_oop()) {
+#ifdef _LP64
+    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
+      Node *newval_enc = kit->gvn().transform(new EncodePNode(new_val, new_val->bottom_type()->make_narrowoop()));
+      load_store = kit->gvn().transform(new GetAndSetNNode(kit->control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
+    } else
+#endif
+    {
+      load_store = kit->gvn().transform(new GetAndSetPNode(kit->control(), mem, adr, new_val, adr_type, value_type->is_oopptr()));
+    }
+  } else  {
+    switch (access.type()) {
+      case T_BYTE:
+        load_store = kit->gvn().transform(new GetAndSetBNode(kit->control(), mem, adr, new_val, adr_type));
+        break;
+      case T_SHORT:
+        load_store = kit->gvn().transform(new GetAndSetSNode(kit->control(), mem, adr, new_val, adr_type));
+        break;
+      case T_INT:
+        load_store = kit->gvn().transform(new GetAndSetINode(kit->control(), mem, adr, new_val, adr_type));
+        break;
+      case T_LONG:
+        load_store = kit->gvn().transform(new GetAndSetLNode(kit->control(), mem, adr, new_val, adr_type));
+        break;
+      default:
+        ShouldNotReachHere();
+    }
+  }
+
+  access.set_raw_access(load_store);
+  pin_atomic_op(access);
+
+#ifdef _LP64
+  if (access.is_oop() && adr->bottom_type()->is_ptr_to_narrowoop()) {
+    return kit->gvn().transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
+  }
+#endif
+
+  return load_store;
+}
+
+Node* BarrierSetC2::atomic_add_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
+  Node* load_store = NULL;
+  GraphKit* kit = access.kit();
+  Node* adr = access.addr().node();
+  const TypePtr* adr_type = access.addr().type();
+  Node* mem = access.memory();
+
+  switch(access.type()) {
+    case T_BYTE:
+      load_store = kit->gvn().transform(new GetAndAddBNode(kit->control(), mem, adr, new_val, adr_type));
+      break;
+    case T_SHORT:
+      load_store = kit->gvn().transform(new GetAndAddSNode(kit->control(), mem, adr, new_val, adr_type));
+      break;
+    case T_INT:
+      load_store = kit->gvn().transform(new GetAndAddINode(kit->control(), mem, adr, new_val, adr_type));
+      break;
+    case T_LONG:
+      load_store = kit->gvn().transform(new GetAndAddLNode(kit->control(), mem, adr, new_val, adr_type));
+      break;
+    default:
+      ShouldNotReachHere();
+  }
+
+  access.set_raw_access(load_store);
+  pin_atomic_op(access);
+
+  return load_store;
+}
+
+Node* BarrierSetC2::atomic_cmpxchg_val_at(C2AtomicAccess& access, Node* expected_val,
+                                          Node* new_val, const Type* value_type) const {
+  C2AccessFence fence(access);
+  resolve_address(access);
+  return atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
+}
+
+Node* BarrierSetC2::atomic_cmpxchg_bool_at(C2AtomicAccess& access, Node* expected_val,
+                                           Node* new_val, const Type* value_type) const {
+  C2AccessFence fence(access);
+  resolve_address(access);
+  return atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
+}
+
+Node* BarrierSetC2::atomic_xchg_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
+  C2AccessFence fence(access);
+  resolve_address(access);
+  return atomic_xchg_at_resolved(access, new_val, value_type);
+}
+
+Node* BarrierSetC2::atomic_add_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
+  C2AccessFence fence(access);
+  resolve_address(access);
+  return atomic_add_at_resolved(access, new_val, value_type);
+}
+
+void BarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const {
+  // Exclude the header but include array length to copy by 8 bytes words.
+  // Can't use base_offset_in_bytes(bt) since basic type is unknown.
+  int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
+                            instanceOopDesc::base_offset_in_bytes();
+  // base_off:
+  // 8  - 32-bit VM
+  // 12 - 64-bit VM, compressed klass
+  // 16 - 64-bit VM, normal klass
+  if (base_off % BytesPerLong != 0) {
+    assert(UseCompressedClassPointers, "");
+    if (is_array) {
+      // Exclude length to copy by 8 bytes words.
+      base_off += sizeof(int);
+    } else {
+      // Include klass to copy by 8 bytes words.
+      base_off = instanceOopDesc::klass_offset_in_bytes();
+    }
+    assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
+  }
+  Node* src_base  = kit->basic_plus_adr(src,  base_off);
+  Node* dst_base = kit->basic_plus_adr(dst, base_off);
+
+  // Compute the length also, if needed:
+  Node* countx = size;
+  countx = kit->gvn().transform(new SubXNode(countx, kit->MakeConX(base_off)));
+  countx = kit->gvn().transform(new URShiftXNode(countx, kit->intcon(LogBytesPerLong) ));
+
+  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
+
+  ArrayCopyNode* ac = ArrayCopyNode::make(kit, false, src_base, NULL, dst_base, NULL, countx, false, false);
+  ac->set_clonebasic();
+  Node* n = kit->gvn().transform(ac);
+  if (n == ac) {
+    kit->set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
+  } else {
+    kit->set_all_memory(n);
+  }
+}
--- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp
@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_SHARED_C2_BARRIERSETC2_HPP
+#define SHARE_GC_SHARED_C2_BARRIERSETC2_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/accessDecorators.hpp"
+#include "opto/loopnode.hpp"
+#include "opto/memnode.hpp"
+#include "utilities/globalDefinitions.hpp"
+
+// This means the access is mismatched. This means the value of an access
+// is not equivalent to the value pointed to by the address.
+const DecoratorSet C2_MISMATCHED             = DECORATOR_LAST << 1;
+// The access may not be aligned to its natural size.
+const DecoratorSet C2_UNALIGNED              = DECORATOR_LAST << 2;
+// The atomic cmpxchg is weak, meaning that spurious false negatives are allowed,
+// but never false positives.
+const DecoratorSet C2_WEAK_CMPXCHG           = DECORATOR_LAST << 3;
+// This denotes that a load has control dependency.
+const DecoratorSet C2_CONTROL_DEPENDENT_LOAD = DECORATOR_LAST << 4;
+// This denotes that a load that must be pinned.
+const DecoratorSet C2_PINNED_LOAD            = DECORATOR_LAST << 5;
+// This denotes that the access is produced from the sun.misc.Unsafe intrinsics.
+const DecoratorSet C2_UNSAFE_ACCESS          = DECORATOR_LAST << 6;
+// This denotes that the access mutates state.
+const DecoratorSet C2_WRITE_ACCESS           = DECORATOR_LAST << 7;
+// This denotes that the access reads state.
+const DecoratorSet C2_READ_ACCESS            = DECORATOR_LAST << 8;
+
+class GraphKit;
+class IdealKit;
+class Node;
+class Type;
+class TypePtr;
+class PhaseMacroExpand;
+
+// This class wraps a node and a type.
+class C2AccessValue: public StackObj {
+protected:
+  Node* _node;
+  const Type* _type;
+
+public:
+  C2AccessValue(Node* node, const Type* type) :
+    _node(node),
+    _type(type) {}
+
+  Node* node() const        { return _node; }
+  const Type* type() const  { return _type; }
+
+  void set_node(Node* node) { _node = node; }
+};
+
+// This class wraps a node and a pointer type.
+class C2AccessValuePtr: public C2AccessValue {
+  int _alias_idx;
+
+public:
+  C2AccessValuePtr(Node* node, const TypePtr* type) :
+    C2AccessValue(node, reinterpret_cast<const Type*>(type)) {}
+
+  const TypePtr* type() const { return reinterpret_cast<const TypePtr*>(_type); }
+  int alias_idx() const       { return _alias_idx; }
+};
+
+// This class wraps a bunch of context parameters thare are passed around in the
+// BarrierSetC2 backend hierarchy, for loads and stores, to reduce boiler plate.
+class C2Access: public StackObj {
+protected:
+  GraphKit*         _kit;
+  DecoratorSet      _decorators;
+  BasicType         _type;
+  Node*             _base;
+  C2AccessValuePtr& _addr;
+  Node*             _raw_access;
+
+  void fixup_decorators();
+  void* barrier_set_state() const;
+
+public:
+  C2Access(GraphKit* kit, DecoratorSet decorators,
+           BasicType type, Node* base, C2AccessValuePtr& addr) :
+    _kit(kit),
+    _decorators(decorators),
+    _type(type),
+    _base(base),
+    _addr(addr),
+    _raw_access(NULL)
+  {
+    fixup_decorators();
+  }
+
+  GraphKit* kit() const           { return _kit; }
+  DecoratorSet decorators() const { return _decorators; }
+  Node* base() const              { return _base; }
+  C2AccessValuePtr& addr() const  { return _addr; }
+  BasicType type() const          { return _type; }
+  bool is_oop() const             { return _type == T_OBJECT || _type == T_ARRAY; }
+  bool is_raw() const             { return (_decorators & AS_RAW) != 0; }
+  Node* raw_access() const        { return _raw_access; }
+
+  void set_raw_access(Node* raw_access) { _raw_access = raw_access; }
+  virtual void set_memory() {} // no-op for normal accesses, but not for atomic accesses.
+
+  MemNode::MemOrd mem_node_mo() const;
+  bool needs_cpu_membar() const;
+
+  template <typename T>
+  T barrier_set_state_as() const {
+    return reinterpret_cast<T>(barrier_set_state());
+  }
+};
+
+// This class wraps a bunch of context parameters thare are passed around in the
+// BarrierSetC2 backend hierarchy, for atomic accesses, to reduce boiler plate.
+class C2AtomicAccess: public C2Access {
+  Node* _memory;
+  uint  _alias_idx;
+  bool  _needs_pinning;
+
+public:
+  C2AtomicAccess(GraphKit* kit, DecoratorSet decorators, BasicType type,
+                 Node* base, C2AccessValuePtr& addr, uint alias_idx) :
+    C2Access(kit, decorators, type, base, addr),
+    _memory(NULL),
+    _alias_idx(alias_idx),
+    _needs_pinning(true) {}
+
+  // Set the memory node based on the current memory slice.
+  virtual void set_memory();
+
+  Node* memory() const       { return _memory; }
+  uint alias_idx() const     { return _alias_idx; }
+  bool needs_pinning() const { return _needs_pinning; }
+
+  void set_needs_pinning(bool value)    { _needs_pinning = value; }
+};
+
+// This is the top-level class for the backend of the Access API in C2.
+// The top-level class is responsible for performing raw accesses. The
+// various GC barrier sets inherit from the BarrierSetC2 class to sprinkle
+// barriers into the accesses.
+class BarrierSetC2: public CHeapObj<mtGC> {
+protected:
+  virtual void resolve_address(C2Access& access) const;
+  virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const;
+  virtual Node* load_at_resolved(C2Access& access, const Type* val_type) const;
+
+  virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val,
+                                               Node* new_val, const Type* val_type) const;
+  virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val,
+                                                Node* new_val, const Type* value_type) const;
+  virtual Node* atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* val_type) const;
+  virtual Node* atomic_add_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* val_type) const;
+
+public:
+  // This is the entry-point for the backend to perform accesses through the Access API.
+  virtual Node* store_at(C2Access& access, C2AccessValue& val) const;
+  virtual Node* load_at(C2Access& access, const Type* val_type) const;
+
+  virtual Node* atomic_cmpxchg_val_at(C2AtomicAccess& access, Node* expected_val,
+                                      Node* new_val, const Type* val_type) const;
+  virtual Node* atomic_cmpxchg_bool_at(C2AtomicAccess& access, Node* expected_val,
+                                       Node* new_val, const Type* val_type) const;
+  virtual Node* atomic_xchg_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const;
+  virtual Node* atomic_add_at(C2AtomicAccess& access, Node* new_val, const Type* value_type) const;
+
+  virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const;
+
+  // These are general helper methods used by C2
+  virtual bool array_copy_requires_gc_barriers(BasicType type) const { return false; }
+
+  // Support for GC barriers emitted during parsing
+  virtual bool is_gc_barrier_node(Node* node) const { return false; }
+  virtual Node* step_over_gc_barrier(Node* c) const { return c; }
+
+  // Support for macro expanded GC barriers
+  virtual void register_potential_barrier_node(Node* node) const { }
+  virtual void unregister_potential_barrier_node(Node* node) const { }
+  virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { }
+  virtual void enqueue_useful_gc_barrier(Unique_Node_List &worklist, Node* node) const {}
+  virtual void eliminate_useless_gc_barriers(Unique_Node_List &useful) const {}
+  virtual void add_users_to_worklist(Unique_Node_List* worklist) const {}
+
+  // Allow barrier sets to have shared state that is preserved across a compilation unit.
+  // This could for example comprise macro nodes to be expanded during macro expansion.
+  virtual void* create_barrier_state(Arena* comp_arena) const { return NULL; }
+  // If the BarrierSetC2 state has kept macro nodes in its compilation unit state to be
+  // expanded later, then now is the time to do so.
+  virtual bool expand_macro_nodes(PhaseMacroExpand* macro) const { return false; }
+  virtual void verify_gc_barriers(bool post_parse) const {}
+};
+
+#endif // SHARE_GC_SHARED_C2_BARRIERSETC2_HPP
--- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp
@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "ci/ciUtilities.hpp"
+#include "gc/shared/cardTable.hpp"
+#include "gc/shared/cardTableBarrierSet.hpp"
+#include "gc/shared/c2/cardTableBarrierSetC2.hpp"
+#include "opto/arraycopynode.hpp"
+#include "opto/graphKit.hpp"
+#include "opto/idealKit.hpp"
+#include "opto/macro.hpp"
+#include "utilities/macros.hpp"
+
+#define __ ideal.
+
+Node* CardTableBarrierSetC2::byte_map_base_node(GraphKit* kit) const {
+  // Get base of card map
+  jbyte* card_table_base = ci_card_table_address();
+   if (card_table_base != NULL) {
+     return kit->makecon(TypeRawPtr::make((address)card_table_base));
+   } else {
+     return kit->null();
+   }
+}
+
+// vanilla/CMS post barrier
+// Insert a write-barrier store.  This is to let generational GC work; we have
+// to flag all oop-stores before the next GC point.
+void CardTableBarrierSetC2::post_barrier(GraphKit* kit,
+                                         Node* ctl,
+                                         Node* oop_store,
+                                         Node* obj,
+                                         Node* adr,
+                                         uint  adr_idx,
+                                         Node* val,
+                                         BasicType bt,
+                                         bool use_precise) const {
+  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
+  CardTable* ct = ctbs->card_table();
+  // No store check needed if we're storing a NULL or an old object
+  // (latter case is probably a string constant). The concurrent
+  // mark sweep garbage collector, however, needs to have all nonNull
+  // oop updates flagged via card-marks.
+  if (val != NULL && val->is_Con()) {
+    // must be either an oop or NULL
+    const Type* t = val->bottom_type();
+    if (t == TypePtr::NULL_PTR || t == Type::TOP)
+      // stores of null never (?) need barriers
+      return;
+  }
+
+  if (use_ReduceInitialCardMarks()
+      && obj == kit->just_allocated_object(kit->control())) {
+    // We can skip marks on a freshly-allocated object in Eden.
+    // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp.
+    // That routine informs GC to take appropriate compensating steps,
+    // upon a slow-path allocation, so as to make this card-mark
+    // elision safe.
+    return;
+  }
+
+  if (!use_precise) {
+    // All card marks for a (non-array) instance are in one place:
+    adr = obj;
+  }
+  // (Else it's an array (or unknown), and we want more precise card marks.)
+  assert(adr != NULL, "");
+
+  IdealKit ideal(kit, true);
+
+  // Convert the pointer to an int prior to doing math on it
+  Node* cast = __ CastPX(__ ctrl(), adr);
+
+  // Divide by card size
+  Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) );
+
+  // Combine card table base and card offset
+  Node* card_adr = __ AddP(__ top(), byte_map_base_node(kit), card_offset );
+
+  // Get the alias_index for raw card-mark memory
+  int adr_type = Compile::AliasIdxRaw;
+  Node*   zero = __ ConI(0); // Dirty card value
+
+  if (UseCondCardMark) {
+    if (ct->scanned_concurrently()) {
+      kit->insert_mem_bar(Op_MemBarVolatile, oop_store);
+      __ sync_kit(kit);
+    }
+    // The classic GC reference write barrier is typically implemented
+    // as a store into the global card mark table.  Unfortunately
+    // unconditional stores can result in false sharing and excessive
+    // coherence traffic as well as false transactional aborts.
+    // UseCondCardMark enables MP "polite" conditional card mark
+    // stores.  In theory we could relax the load from ctrl() to
+    // no_ctrl, but that doesn't buy much latitude.
+    Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, T_BYTE, adr_type);
+    __ if_then(card_val, BoolTest::ne, zero);
+  }
+
+  // Smash zero into card
+  if(!ct->scanned_concurrently()) {
+    __ store(__ ctrl(), card_adr, zero, T_BYTE, adr_type, MemNode::unordered);
+  } else {
+    // Specialized path for CM store barrier
+    __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, T_BYTE, adr_type);
+  }
+
+  if (UseCondCardMark) {
+    __ end_if();
+  }
+
+  // Final sync IdealKit and GraphKit.
+  kit->final_sync(ideal);
+}
+
+void CardTableBarrierSetC2::clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const {
+  BarrierSetC2::clone(kit, src, dst, size, is_array);
+  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
+
+  // If necessary, emit some card marks afterwards.  (Non-arrays only.)
+  bool card_mark = !is_array && !use_ReduceInitialCardMarks();
+  if (card_mark) {
+    assert(!is_array, "");
+    // Put in store barrier for any and all oops we are sticking
+    // into this object.  (We could avoid this if we could prove
+    // that the object type contains no oop fields at all.)
+    Node* no_particular_value = NULL;
+    Node* no_particular_field = NULL;
+    int raw_adr_idx = Compile::AliasIdxRaw;
+    post_barrier(kit, kit->control(),
+                 kit->memory(raw_adr_type),
+                 dst,
+                 no_particular_field,
+                 raw_adr_idx,
+                 no_particular_value,
+                 T_OBJECT,
+                 false);
+  }
+}
+
+bool CardTableBarrierSetC2::use_ReduceInitialCardMarks() const {
+  return ReduceInitialCardMarks;
+}
+
+bool CardTableBarrierSetC2::is_gc_barrier_node(Node* node) const {
+  return ModRefBarrierSetC2::is_gc_barrier_node(node) || node->Opcode() == Op_StoreCM;
+}
+
+void CardTableBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const {
+  assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required");
+  Node *shift = node->unique_out();
+  Node *addp = shift->unique_out();
+  for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
+    Node *mem = addp->last_out(j);
+    if (UseCondCardMark && mem->is_Load()) {
+      assert(mem->Opcode() == Op_LoadB, "unexpected code shape");
+      // The load is checking if the card has been written so
+      // replace it with zero to fold the test.
+      macro->replace_node(mem, macro->intcon(0));
+      continue;
+    }
+    assert(mem->is_Store(), "store required");
+    macro->replace_node(mem, mem->in(MemNode::Memory));
+  }
+}
+
+bool CardTableBarrierSetC2::array_copy_requires_gc_barriers(BasicType type) const {
+  return !use_ReduceInitialCardMarks();
+}
--- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp
@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP
+#define SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP
+
+#include "gc/shared/c2/modRefBarrierSetC2.hpp"
+
+class CardTableBarrierSetC2: public ModRefBarrierSetC2 {
+protected:
+  virtual void post_barrier(GraphKit* kit,
+                            Node* ctl,
+                            Node* store,
+                            Node* obj,
+                            Node* adr,
+                            uint adr_idx,
+                            Node* val,
+                            BasicType bt,
+                            bool use_precise) const;
+
+  Node* byte_map_base_node(GraphKit* kit) const;
+
+public:
+  virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const;
+  virtual bool is_gc_barrier_node(Node* node) const;
+  virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const;
+  virtual bool array_copy_requires_gc_barriers(BasicType type) const;
+
+  bool use_ReduceInitialCardMarks() const;
+};
+
+#endif // SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP
--- a/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.cpp
+++ b/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.cpp
@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "opto/arraycopynode.hpp"
+#include "opto/graphKit.hpp"
+#include "opto/idealKit.hpp"
+#include "opto/narrowptrnode.hpp"
+#include "gc/shared/c2/modRefBarrierSetC2.hpp"
+#include "utilities/macros.hpp"
+
+Node* ModRefBarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) const {
+  DecoratorSet decorators = access.decorators();
+  GraphKit* kit = access.kit();
+
+  const TypePtr* adr_type = access.addr().type();
+  Node* adr = access.addr().node();
+
+  bool on_array = (decorators & IN_HEAP_ARRAY) != 0;
+  bool anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
+  bool on_heap = (decorators & IN_HEAP) != 0;
+  bool use_precise = on_array || anonymous;
+
+  if (!access.is_oop() || (!on_heap && !anonymous)) {
+    return BarrierSetC2::store_at_resolved(access, val);
+  }
+
+  uint adr_idx = kit->C->get_alias_index(adr_type);
+  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+
+  pre_barrier(kit, true /* do_load */, kit->control(), access.base(), adr, adr_idx, val.node(),
+              static_cast<const TypeOopPtr*>(val.type()), NULL /* pre_val */, access.type());
+  Node* store = BarrierSetC2::store_at_resolved(access, val);
+  post_barrier(kit, kit->control(), access.raw_access(), access.base(), adr, adr_idx, val.node(),
+               access.type(), use_precise);
+
+  return store;
+}
+
+Node* ModRefBarrierSetC2::atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val,
+                                                         Node* new_val, const Type* value_type) const {
+  GraphKit* kit = access.kit();
+
+  if (!access.is_oop()) {
+    return BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
+  }
+
+  pre_barrier(kit, false /* do_load */,
+              kit->control(), NULL, NULL, max_juint, NULL, NULL,
+              expected_val /* pre_val */, T_OBJECT);
+
+  Node* result = BarrierSetC2::atomic_cmpxchg_val_at_resolved(access, expected_val, new_val, value_type);
+
+  post_barrier(kit, kit->control(), access.raw_access(), access.base(),
+               access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true);
+
+  return result;
+}
+
+Node* ModRefBarrierSetC2::atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val,
+                                                          Node* new_val, const Type* value_type) const {
+  GraphKit* kit = access.kit();
+
+  if (!access.is_oop()) {
+    return BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
+  }
+
+  pre_barrier(kit, false /* do_load */,
+              kit->control(), NULL, NULL, max_juint, NULL, NULL,
+              expected_val /* pre_val */, T_OBJECT);
+
+  Node* load_store = BarrierSetC2::atomic_cmpxchg_bool_at_resolved(access, expected_val, new_val, value_type);
+
+  // Emit the post barrier only when the actual store happened. This makes sense
+  // to check only for LS_cmp_* that can fail to set the value.
+  // LS_cmp_exchange does not produce any branches by default, so there is no
+  // boolean result to piggyback on. TODO: When we merge CompareAndSwap with
+  // CompareAndExchange and move branches here, it would make sense to conditionalize
+  // post_barriers for LS_cmp_exchange as well.
+  //
+  // CAS success path is marked more likely since we anticipate this is a performance
+  // critical path, while CAS failure path can use the penalty for going through unlikely
+  // path as backoff. Which is still better than doing a store barrier there.
+  IdealKit ideal(kit);
+  ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); {
+    kit->sync_kit(ideal);
+    post_barrier(kit, ideal.ctrl(), access.raw_access(), access.base(),
+                 access.addr().node(), access.alias_idx(), new_val, T_OBJECT, true);
+    ideal.sync_kit(kit);
+  } ideal.end_if();
+  kit->final_sync(ideal);
+
+  return load_store;
+}
+
+Node* ModRefBarrierSetC2::atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const {
+  GraphKit* kit = access.kit();
+
+  Node* result = BarrierSetC2::atomic_xchg_at_resolved(access, new_val, value_type);
+  if (!access.is_oop()) {
+    return result;
+  }
+
+  // Don't need to load pre_val. The old value is returned by load_store.
+  // The pre_barrier can execute after the xchg as long as no safepoint
+  // gets inserted between them.
+  pre_barrier(kit, false /* do_load */,
+              kit->control(), NULL, NULL, max_juint, NULL, NULL,
+              result /* pre_val */, T_OBJECT);
+  post_barrier(kit, kit->control(), access.raw_access(), access.base(), access.addr().node(),
+               access.alias_idx(), new_val, T_OBJECT, true);
+
+  return result;
+}
--- a/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.hpp
+++ b/src/hotspot/share/gc/shared/c2/modRefBarrierSetC2.hpp
@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_GC_SHARED_C2_MODREFBARRIERSETC2_HPP
+#define SHARE_GC_SHARED_C2_MODREFBARRIERSETC2_HPP
+
+#include "gc/shared/c2/barrierSetC2.hpp"
+
+class TypeOopPtr;
+
+class ModRefBarrierSetC2: public BarrierSetC2 {
+protected:
+  virtual void pre_barrier(GraphKit* kit,
+                           bool do_load,
+                           Node* ctl,
+                           Node* obj,
+                           Node* adr,
+                           uint adr_idx,
+                           Node* val,
+                           const TypeOopPtr* val_type,
+                           Node* pre_val,
+                           BasicType bt) const {}
+
+  virtual void post_barrier(GraphKit* kit,
+                            Node* ctl,
+                            Node* store,
+                            Node* obj,
+                            Node* adr,
+                            uint adr_idx,
+                            Node* val,
+                            BasicType bt,
+                            bool use_precise) const {}
+
+  virtual Node* store_at_resolved(C2Access& access, C2AccessValue& val) const;
+
+  virtual Node* atomic_cmpxchg_val_at_resolved(C2AtomicAccess& access, Node* expected_val,
+                                               Node* new_val, const Type* value_type) const;
+  virtual Node* atomic_cmpxchg_bool_at_resolved(C2AtomicAccess& access, Node* expected_val,
+                                                Node* new_val, const Type* value_type) const;
+  virtual Node* atomic_xchg_at_resolved(C2AtomicAccess& access, Node* new_val, const Type* value_type) const;
+};
+
+#endif // SHARE_GC_SHARED_C2_MODREFBARRIERSETC2_HPP
--- a/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp
+++ b/src/hotspot/share/gc/shared/cardTableBarrierSet.cpp
@ -37,8 +37,12 @@
 #ifdef COMPILER1
 #include "gc/shared/c1/cardTableBarrierSetC1.hpp"
 #endif
+#ifdef COMPILER2
+#include "gc/shared/c2/cardTableBarrierSetC2.hpp"
+#endif

 class CardTableBarrierSetC1;
+class CardTableBarrierSetC2;

 // This kind of "BarrierSet" allows a "CollectedHeap" to detect and
 // enumerate ref fields that have been modified (since the last
@ -46,10 +50,12 @@ class CardTableBarrierSetC1;

 CardTableBarrierSet::CardTableBarrierSet(BarrierSetAssembler* barrier_set_assembler,
                                         BarrierSetC1* barrier_set_c1,
+                                         BarrierSetC2* barrier_set_c2,
                                         CardTable* card_table,
                                         const BarrierSet::FakeRtti& fake_rtti) :
  ModRefBarrierSet(barrier_set_assembler,
                   barrier_set_c1,
+                   barrier_set_c2,
                   fake_rtti.add_tag(BarrierSet::CardTableBarrierSet)),
  _defer_initial_card_mark(false),
  _card_table(card_table)
@ -58,6 +64,7 @@ CardTableBarrierSet::CardTableBarrierSet(BarrierSetAssembler* barrier_set_assemb
 CardTableBarrierSet::CardTableBarrierSet(CardTable* card_table) :
  ModRefBarrierSet(make_barrier_set_assembler<CardTableBarrierSetAssembler>(),
                   make_barrier_set_c1<CardTableBarrierSetC1>(),
+                   make_barrier_set_c2<CardTableBarrierSetC2>(),
                   BarrierSet::FakeRtti(BarrierSet::CardTableBarrierSet)),
  _defer_initial_card_mark(false),
  _card_table(card_table)
@ -155,7 +162,7 @@ void CardTableBarrierSet::initialize_deferred_card_mark_barriers() {
  // Used for ReduceInitialCardMarks (when COMPILER2 or JVMCI is used);
  // otherwise remains unused.
 #if COMPILER2_OR_JVMCI
-  _defer_initial_card_mark = is_server_compilation_mode_vm() && ReduceInitialCardMarks && can_elide_tlab_store_barriers()
+  _defer_initial_card_mark = is_server_compilation_mode_vm() && ReduceInitialCardMarks
                             && (DeferInitialCardMark || card_mark_must_follow_store());
 #else
  assert(_defer_initial_card_mark == false, "Who would set it?");
--- a/src/hotspot/share/gc/shared/cardTableBarrierSet.hpp
+++ b/src/hotspot/share/gc/shared/cardTableBarrierSet.hpp
@ -54,6 +54,7 @@ class CardTableBarrierSet: public ModRefBarrierSet {

  CardTableBarrierSet(BarrierSetAssembler* barrier_set_assembler,
                      BarrierSetC1* barrier_set_c1,
+                      BarrierSetC2* barrier_set_c2,
                      CardTable* card_table,
                      const BarrierSet::FakeRtti& fake_rtti);

@ -89,23 +90,6 @@ class CardTableBarrierSet: public ModRefBarrierSet {
  // remembered set.
  void flush_deferred_card_mark_barrier(JavaThread* thread);

-  // Can a compiler initialize a new object without store barriers?
-  // This permission only extends from the creation of a new object
-  // via a TLAB up to the first subsequent safepoint. If such permission
-  // is granted for this heap type, the compiler promises to call
-  // defer_store_barrier() below on any slow path allocation of
-  // a new object for which such initializing store barriers will
-  // have been elided. G1, like CMS, allows this, but should be
-  // ready to provide a compensating write barrier as necessary
-  // if that storage came out of a non-young region. The efficiency
-  // of this implementation depends crucially on being able to
-  // answer very efficiently in constant time whether a piece of
-  // storage in the heap comes from a young region or not.
-  // See ReduceInitialCardMarks.
-  virtual bool can_elide_tlab_store_barriers() const {
-    return true;
-  }
-
  // If a compiler is eliding store barriers for TLAB-allocated objects,
  // we will be informed of a slow-path allocation by a call
  // to on_slowpath_allocation_exit() below. Such a call precedes the
--- a/src/hotspot/share/gc/shared/modRefBarrierSet.hpp
+++ b/src/hotspot/share/gc/shared/modRefBarrierSet.hpp
@ -34,9 +34,11 @@ class ModRefBarrierSet: public BarrierSet {
 protected:
  ModRefBarrierSet(BarrierSetAssembler* barrier_set_assembler,
                   BarrierSetC1* barrier_set_c1,
+                   BarrierSetC2* barrier_set_c2,
                   const BarrierSet::FakeRtti& fake_rtti)
    : BarrierSet(barrier_set_assembler,
                 barrier_set_c1,
+                 barrier_set_c2,
                 fake_rtti.add_tag(BarrierSet::ModRef)) { }
  ~ModRefBarrierSet() { }

--- a/src/hotspot/share/opto/arraycopynode.cpp
+++ b/src/hotspot/share/opto/arraycopynode.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -23,9 +23,13 @@
 */

 #include "precompiled.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
+#include "gc/shared/c2/cardTableBarrierSetC2.hpp"
 #include "opto/arraycopynode.hpp"
 #include "opto/graphKit.hpp"
 #include "runtime/sharedRuntime.hpp"
+#include "utilities/macros.hpp"

 ArrayCopyNode::ArrayCopyNode(Compile* C, bool alloc_tightly_coupled, bool has_negative_length_guard)
  : CallNode(arraycopy_type(), NULL, TypeRawPtr::BOTTOM),
@ -252,7 +256,9 @@ bool ArrayCopyNode::prepare_array_copy(PhaseGVN *phase, bool can_reshape,
      return false;
    }

-    if (dest_elem == T_OBJECT && (!is_alloc_tightly_coupled() || !GraphKit::use_ReduceInitialCardMarks())) {
+    BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+    if (dest_elem == T_OBJECT && (!is_alloc_tightly_coupled() ||
+                                  bs->array_copy_requires_gc_barriers(T_OBJECT))) {
      // It's an object array copy but we can't emit the card marking
      // that is needed
      return false;
@ -434,9 +440,10 @@ bool ArrayCopyNode::finish_transform(PhaseGVN *phase, bool can_reshape,
    if (is_clonebasic()) {
      Node* out_mem = proj_out(TypeFunc::Memory);

+      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
      if (out_mem->outcnt() != 1 || !out_mem->raw_out(0)->is_MergeMem() ||
          out_mem->raw_out(0)->outcnt() != 1 || !out_mem->raw_out(0)->raw_out(0)->is_MemBar()) {
-        assert(!GraphKit::use_ReduceInitialCardMarks(), "can only happen with card marking");
+        assert(bs->array_copy_requires_gc_barriers(T_OBJECT), "can only happen with card marking");
        return false;
      }

@ -643,49 +650,13 @@ bool ArrayCopyNode::may_modify_helper(const TypeOopPtr *t_oop, Node* n, PhaseTra
  return false;
 }

-static Node* step_over_gc_barrier(Node* c) {
-#if INCLUDE_G1GC
-  if (UseG1GC && !GraphKit::use_ReduceInitialCardMarks() &&
-      c != NULL && c->is_Region() && c->req() == 3) {
-    for (uint i = 1; i < c->req(); i++) {
-      if (c->in(i) != NULL && c->in(i)->is_Region() &&
-          c->in(i)->req() == 3) {
-        Node* r = c->in(i);
-        for (uint j = 1; j < r->req(); j++) {
-          if (r->in(j) != NULL && r->in(j)->is_Proj() &&
-              r->in(j)->in(0) != NULL &&
-              r->in(j)->in(0)->Opcode() == Op_CallLeaf &&
-              r->in(j)->in(0)->as_Call()->entry_point() == CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post)) {
-            Node* call = r->in(j)->in(0);
-            c = c->in(i == 1 ? 2 : 1);
-            if (c != NULL) {
-              c = c->in(0);
-              if (c != NULL) {
-                c = c->in(0);
-                assert(call->in(0) == NULL ||
-                       call->in(0)->in(0) == NULL ||
-                       call->in(0)->in(0)->in(0) == NULL ||
-                       call->in(0)->in(0)->in(0)->in(0) == NULL ||
-                       call->in(0)->in(0)->in(0)->in(0)->in(0) == NULL ||
-                       c == call->in(0)->in(0)->in(0)->in(0)->in(0), "bad barrier shape");
-                return c;
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-#endif // INCLUDE_G1GC
-  return c;
-}
-
 bool ArrayCopyNode::may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTransform *phase, ArrayCopyNode*& ac) {

  Node* c = mb->in(0);

-  // step over g1 gc barrier if we're at a clone with ReduceInitialCardMarks off
-  c = step_over_gc_barrier(c);
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  // step over g1 gc barrier if we're at e.g. a clone with ReduceInitialCardMarks off
+  c = bs->step_over_gc_barrier(c);

  CallNode* call = NULL;
  if (c != NULL && c->is_Region()) {
@ -701,7 +672,11 @@ bool ArrayCopyNode::may_modify(const TypeOopPtr *t_oop, MemBarNode* mb, PhaseTra
    }
  } else if (may_modify_helper(t_oop, c->in(0), phase, call)) {
    ac = call->isa_ArrayCopy();
-    assert(c == mb->in(0) || (ac != NULL && ac->is_clonebasic() && !GraphKit::use_ReduceInitialCardMarks()), "only for clone");
+#ifdef ASSERT
+    bool use_ReduceInitialCardMarks = BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet) &&
+      static_cast<CardTableBarrierSetC2*>(bs)->use_ReduceInitialCardMarks();
+    assert(c == mb->in(0) || (ac != NULL && ac->is_clonebasic() && !use_ReduceInitialCardMarks), "only for clone");
+#endif
    return true;
  }

@ -749,4 +724,3 @@ bool ArrayCopyNode::modifies(intptr_t offset_lo, intptr_t offset_hi, PhaseTransf
  }
  return false;
 }
-
--- a/src/hotspot/share/opto/compile.cpp
+++ b/src/hotspot/share/opto/compile.cpp
@ -33,6 +33,8 @@
 #include "compiler/compileLog.hpp"
 #include "compiler/disassembler.hpp"
 #include "compiler/oopMap.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
 #include "memory/resourceArea.hpp"
 #include "opto/addnode.hpp"
 #include "opto/block.hpp"
@ -414,6 +416,8 @@ void Compile::remove_useless_nodes(Unique_Node_List &useful) {
      remove_opaque4_node(opaq);
    }
  }
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  bs->eliminate_useless_gc_barriers(useful);
  // clean up the late inline lists
  remove_useless_late_inlines(&_string_late_inlines, useful);
  remove_useless_late_inlines(&_boxing_late_inlines, useful);
@ -637,6 +641,7 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
                  _stub_function(NULL),
                  _stub_entry_point(NULL),
                  _method(target),
+                  _barrier_set_state(BarrierSet::barrier_set()->barrier_set_c2()->create_barrier_state(comp_arena())),
                  _entry_bci(osr_bci),
                  _initial_gvn(NULL),
                  _for_igvn(NULL),
@ -772,17 +777,12 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr
      StartNode* s = new StartNode(root(), tf()->domain());
      initial_gvn()->set_type_bottom(s);
      init_start(s);
-      if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) {
+      if (method()->intrinsic_id() == vmIntrinsics::_Reference_get) {
        // With java.lang.ref.reference.get() we must go through the
-        // intrinsic when G1 is enabled - even when get() is the root
+        // intrinsic - even when get() is the root
        // method of the compile - so that, if necessary, the value in
        // the referent field of the reference object gets recorded by
        // the pre-barrier code.
-        // Specifically, if G1 is enabled, the value in the referent
-        // field is recorded by the G1 SATB pre barrier. This will
-        // result in the referent being marked live and the reference
-        // object removed from the list of discovered references during
-        // reference processing.
        cg = find_intrinsic(method(), false);
      }
      if (cg == NULL) {
@ -2334,6 +2334,9 @@ void Compile::Optimize() {
      if (failing())  return;
    }
  }
+
+  if (failing())  return;
+
  // Ensure that major progress is now clear
  C->clear_major_progress();

@ -2350,6 +2353,11 @@ void Compile::Optimize() {
    igvn.optimize();
  }

+#ifdef ASSERT
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  bs->verify_gc_barriers(false);
+#endif
+
  {
    TracePhase tp("macroExpand", &timers[_t_macroExpand]);
    PhaseMacroExpand  mex(igvn);
--- a/src/hotspot/share/opto/compile.hpp
+++ b/src/hotspot/share/opto/compile.hpp
@ -359,6 +359,9 @@ class Compile : public Phase {
  const char*           _stub_name;             // Name of stub or adapter being compiled, or NULL
  address               _stub_entry_point;      // Compile code entry for generated stub, or NULL

+  // For GC
+  void*                 _barrier_set_state;
+
  // Control of this compilation.
  int                   _num_loop_opts;         // Number of iterations for doing loop optimiztions
  int                   _max_inline_size;       // Max inline size for this compilation
@ -530,6 +533,8 @@ class Compile : public Phase {

 public:

+  void* barrier_set_state() const { return _barrier_set_state; }
+
  outputStream* print_inlining_stream() const {
    assert(print_inlining() || print_intrinsics(), "PrintInlining off?");
    return _print_inlining_stream;
@ -1349,7 +1354,6 @@ class Compile : public Phase {
  // supporting clone_map
  CloneMap&     clone_map();
  void          set_clone_map(Dict* d);
-
 };

 #endif // SHARE_VM_OPTO_COMPILE_HPP
--- a/src/hotspot/share/opto/escape.cpp
+++ b/src/hotspot/share/opto/escape.cpp
@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "ci/bcEscapeAnalyzer.hpp"
 #include "compiler/compileLog.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
 #include "libadt/vectset.hpp"
 #include "memory/allocation.hpp"
 #include "memory/resourceArea.hpp"
@ -980,10 +981,9 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
                                       arg_has_oops && (i > TypeFunc::Parms);
 #ifdef ASSERT
          if (!(is_arraycopy ||
+                BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(call) ||
                (call->as_CallLeaf()->_name != NULL &&
-                 (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
-                  strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
-                  strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 ||
+                 (strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32C") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "updateBytesAdler32") == 0 ||
                  strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
@ -3285,9 +3285,7 @@ void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist,
            (op == Op_StrCompressedCopy || op == Op_StrInflatedCopy)) {
          // They overwrite memory edge corresponding to destination array,
          memnode_worklist.append_if_missing(use);
-        } else if (!(op == Op_StoreCM ||
-              (op == Op_CallLeaf && use->as_CallLeaf()->_name != NULL &&
-               strcmp(use->as_CallLeaf()->_name, "g1_wb_pre") == 0) ||
+        } else if (!(BarrierSet::barrier_set()->barrier_set_c2()->is_gc_barrier_node(use) ||
              op == Op_AryEq || op == Op_StrComp || op == Op_HasNegatives ||
              op == Op_StrCompressedCopy || op == Op_StrInflatedCopy ||
              op == Op_StrEquals || op == Op_StrIndexOf || op == Op_StrIndexOfChar)) {
--- a/src/hotspot/share/opto/graphKit.cpp
+++ b/src/hotspot/share/opto/graphKit.cpp
@ -26,9 +26,7 @@
 #include "ci/ciUtilities.hpp"
 #include "compiler/compileLog.hpp"
 #include "gc/shared/barrierSet.hpp"
-#include "gc/shared/cardTable.hpp"
-#include "gc/shared/cardTableBarrierSet.hpp"
-#include "gc/shared/collectedHeap.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/resourceArea.hpp"
 #include "opto/addnode.hpp"
@ -45,18 +43,14 @@
 #include "opto/runtime.hpp"
 #include "runtime/deoptimization.hpp"
 #include "runtime/sharedRuntime.hpp"
-#if INCLUDE_G1GC
-#include "gc/g1/g1CardTable.hpp"
-#include "gc/g1/g1ThreadLocalData.hpp"
-#include "gc/g1/heapRegion.hpp"
-#endif // INCLUDE_G1GC

 //----------------------------GraphKit-----------------------------------------
 // Main utility constructor.
 GraphKit::GraphKit(JVMState* jvms)
  : Phase(Phase::Parser),
    _env(C->env()),
-    _gvn(*C->initial_gvn())
+    _gvn(*C->initial_gvn()),
+    _barrier_set(BarrierSet::barrier_set()->barrier_set_c2())
 {
  _exceptions = jvms->map()->next_exception();
  if (_exceptions != NULL)  jvms->map()->set_next_exception(NULL);
@ -67,7 +61,8 @@ GraphKit::GraphKit(JVMState* jvms)
 GraphKit::GraphKit()
  : Phase(Phase::Parser),
    _env(C->env()),
-    _gvn(*C->initial_gvn())
+    _gvn(*C->initial_gvn()),
+    _barrier_set(BarrierSet::barrier_set()->barrier_set_c2())
 {
  _exceptions = NULL;
  set_map(NULL);
@ -610,8 +605,7 @@ void GraphKit::builtin_throw(Deoptimization::DeoptReason reason, Node* arg) {

      Node *adr = basic_plus_adr(ex_node, ex_node, offset);
      const TypeOopPtr* val_type = TypeOopPtr::make_from_klass(env()->String_klass());
-      // Conservatively release stores of object references.
-      Node *store = store_oop_to_object(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT, MemNode::release);
+      Node *store = access_store_at(control(), ex_node, adr, adr_typ, null(), val_type, T_OBJECT, IN_HEAP);

      add_exception_state(make_exception_state(ex_node));
      return;
@ -1550,145 +1544,142 @@ Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt,
  return st;
 }

-
-void GraphKit::pre_barrier(bool do_load,
-                           Node* ctl,
-                           Node* obj,
-                           Node* adr,
-                           uint  adr_idx,
-                           Node* val,
-                           const TypeOopPtr* val_type,
-                           Node* pre_val,
-                           BasicType bt) {
-
-  BarrierSet* bs = BarrierSet::barrier_set();
-  set_control(ctl);
-  switch (bs->kind()) {
-
-#if INCLUDE_G1GC
-    case BarrierSet::G1BarrierSet:
-      g1_write_barrier_pre(do_load, obj, adr, adr_idx, val, val_type, pre_val, bt);
-      break;
-#endif
-
-    case BarrierSet::CardTableBarrierSet:
-      break;
-
-    default      :
-      ShouldNotReachHere();
-
-  }
-}
-
-bool GraphKit::can_move_pre_barrier() const {
-  BarrierSet* bs = BarrierSet::barrier_set();
-  switch (bs->kind()) {
-
-#if INCLUDE_G1GC
-    case BarrierSet::G1BarrierSet:
-      return true; // Can move it if no safepoint
-#endif
-
-    case BarrierSet::CardTableBarrierSet:
-      return true; // There is no pre-barrier
-
-    default      :
-      ShouldNotReachHere();
-  }
-  return false;
-}
-
-void GraphKit::post_barrier(Node* ctl,
-                            Node* store,
-                            Node* obj,
-                            Node* adr,
-                            uint  adr_idx,
-                            Node* val,
-                            BasicType bt,
-                            bool use_precise) {
-  BarrierSet* bs = BarrierSet::barrier_set();
-  set_control(ctl);
-  switch (bs->kind()) {
-#if INCLUDE_G1GC
-    case BarrierSet::G1BarrierSet:
-      g1_write_barrier_post(store, obj, adr, adr_idx, val, bt, use_precise);
-      break;
-#endif
-
-    case BarrierSet::CardTableBarrierSet:
-      write_barrier_post(store, obj, adr, adr_idx, val, use_precise);
-      break;
-
-    default      :
-      ShouldNotReachHere();
-
-  }
-}
-
-Node* GraphKit::store_oop(Node* ctl,
+Node* GraphKit::access_store_at(Node* ctl,
                                Node* obj,
                                Node* adr,
                                const TypePtr* adr_type,
                                Node* val,
-                          const TypeOopPtr* val_type,
+                                const Type* val_type,
                                BasicType bt,
-                          bool use_precise,
-                          MemNode::MemOrd mo,
-                          bool mismatched) {
+                                DecoratorSet decorators) {
  // Transformation of a value which could be NULL pointer (CastPP #NULL)
  // could be delayed during Parse (for example, in adjust_map_after_if()).
  // Execute transformation here to avoid barrier generation in such case.
-  if (_gvn.type(val) == TypePtr::NULL_PTR)
+  if (_gvn.type(val) == TypePtr::NULL_PTR) {
    val = _gvn.makecon(TypePtr::NULL_PTR);
+  }

  set_control(ctl);
-  if (stopped()) return top(); // Dead path ?
-
-  assert(bt == T_OBJECT, "sanity");
-  assert(val != NULL, "not dead path");
-  uint adr_idx = C->get_alias_index(adr_type);
-  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
-
-  pre_barrier(true /* do_load */,
-              control(), obj, adr, adr_idx, val, val_type,
-              NULL /* pre_val */,
-              bt);
-
-  Node* store = store_to_memory(control(), adr, val, bt, adr_idx, mo, mismatched);
-  post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise);
-  return store;
+  if (stopped()) {
+    return top(); // Dead path ?
  }

-// Could be an array or object we don't know at compile time (unsafe ref.)
-Node* GraphKit::store_oop_to_unknown(Node* ctl,
-                             Node* obj,   // containing obj
+  assert(val != NULL, "not dead path");
+
+  C2AccessValuePtr addr(adr, adr_type);
+  C2AccessValue value(val, val_type);
+  C2Access access(this, decorators | C2_WRITE_ACCESS, bt, obj, addr);
+  if (access.is_raw()) {
+    return _barrier_set->BarrierSetC2::store_at(access, value);
+  } else {
+    return _barrier_set->store_at(access, value);
+  }
+}
+
+Node* GraphKit::access_load_at(Node* obj,   // containing obj
                               Node* adr,   // actual adress to store val at
                               const TypePtr* adr_type,
-                             Node* val,
+                               const Type* val_type,
                               BasicType bt,
-                             MemNode::MemOrd mo,
-                             bool mismatched) {
-  Compile::AliasType* at = C->alias_type(adr_type);
-  const TypeOopPtr* val_type = NULL;
-  if (adr_type->isa_instptr()) {
-    if (at->field() != NULL) {
-      // known field.  This code is a copy of the do_put_xxx logic.
-      ciField* field = at->field();
-      if (!field->type()->is_loaded()) {
-        val_type = TypeInstPtr::BOTTOM;
-      } else {
-        val_type = TypeOopPtr::make_from_klass(field->type()->as_klass());
-      }
-    }
-  } else if (adr_type->isa_aryptr()) {
-    val_type = adr_type->is_aryptr()->elem()->make_oopptr();
-  }
-  if (val_type == NULL) {
-    val_type = TypeInstPtr::BOTTOM;
-  }
-  return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo, mismatched);
+                               DecoratorSet decorators) {
+  if (stopped()) {
+    return top(); // Dead path ?
  }

+  C2AccessValuePtr addr(adr, adr_type);
+  C2Access access(this, decorators | C2_READ_ACCESS, bt, obj, addr);
+  if (access.is_raw()) {
+    return _barrier_set->BarrierSetC2::load_at(access, val_type);
+  } else {
+    return _barrier_set->load_at(access, val_type);
+  }
+}
+
+Node* GraphKit::access_atomic_cmpxchg_val_at(Node* ctl,
+                                             Node* obj,
+                                             Node* adr,
+                                             const TypePtr* adr_type,
+                                             int alias_idx,
+                                             Node* expected_val,
+                                             Node* new_val,
+                                             const Type* value_type,
+                                             BasicType bt,
+                                             DecoratorSet decorators) {
+  set_control(ctl);
+  C2AccessValuePtr addr(adr, adr_type);
+  C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS,
+                        bt, obj, addr, alias_idx);
+  if (access.is_raw()) {
+    return _barrier_set->BarrierSetC2::atomic_cmpxchg_val_at(access, expected_val, new_val, value_type);
+  } else {
+    return _barrier_set->atomic_cmpxchg_val_at(access, expected_val, new_val, value_type);
+  }
+}
+
+Node* GraphKit::access_atomic_cmpxchg_bool_at(Node* ctl,
+                                              Node* obj,
+                                              Node* adr,
+                                              const TypePtr* adr_type,
+                                              int alias_idx,
+                                              Node* expected_val,
+                                              Node* new_val,
+                                              const Type* value_type,
+                                              BasicType bt,
+                                              DecoratorSet decorators) {
+  set_control(ctl);
+  C2AccessValuePtr addr(adr, adr_type);
+  C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS,
+                        bt, obj, addr, alias_idx);
+  if (access.is_raw()) {
+    return _barrier_set->BarrierSetC2::atomic_cmpxchg_bool_at(access, expected_val, new_val, value_type);
+  } else {
+    return _barrier_set->atomic_cmpxchg_bool_at(access, expected_val, new_val, value_type);
+  }
+}
+
+Node* GraphKit::access_atomic_xchg_at(Node* ctl,
+                                      Node* obj,
+                                      Node* adr,
+                                      const TypePtr* adr_type,
+                                      int alias_idx,
+                                      Node* new_val,
+                                      const Type* value_type,
+                                      BasicType bt,
+                                      DecoratorSet decorators) {
+  set_control(ctl);
+  C2AccessValuePtr addr(adr, adr_type);
+  C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS,
+                        bt, obj, addr, alias_idx);
+  if (access.is_raw()) {
+    return _barrier_set->BarrierSetC2::atomic_xchg_at(access, new_val, value_type);
+  } else {
+    return _barrier_set->atomic_xchg_at(access, new_val, value_type);
+  }
+}
+
+Node* GraphKit::access_atomic_add_at(Node* ctl,
+                                     Node* obj,
+                                     Node* adr,
+                                     const TypePtr* adr_type,
+                                     int alias_idx,
+                                     Node* new_val,
+                                     const Type* value_type,
+                                     BasicType bt,
+                                     DecoratorSet decorators) {
+  set_control(ctl);
+  C2AccessValuePtr addr(adr, adr_type);
+  C2AtomicAccess access(this, decorators | C2_READ_ACCESS | C2_WRITE_ACCESS, bt, obj, addr, alias_idx);
+  if (access.is_raw()) {
+    return _barrier_set->BarrierSetC2::atomic_add_at(access, new_val, value_type);
+  } else {
+    return _barrier_set->atomic_add_at(access, new_val, value_type);
+  }
+}
+
+void GraphKit::access_clone(Node* ctl, Node* src, Node* dst, Node* size, bool is_array) {
+  set_control(ctl);
+  return _barrier_set->clone(this, src, dst, size, is_array);
+}

 //-------------------------array_element_address-------------------------
 Node* GraphKit::array_element_address(Node* ary, Node* idx, BasicType elembt,
@ -3817,20 +3808,10 @@ void GraphKit::add_predicate(int nargs) {
  add_predicate_impl(Deoptimization::Reason_loop_limit_check, nargs);
 }

-//----------------------------- store barriers ----------------------------
-#define __ ideal.
-
-bool GraphKit::use_ReduceInitialCardMarks() {
-  BarrierSet *bs = BarrierSet::barrier_set();
-  return bs->is_a(BarrierSet::CardTableBarrierSet)
-         && barrier_set_cast<CardTableBarrierSet>(bs)->can_elide_tlab_store_barriers()
-         && ReduceInitialCardMarks;
-}
-
 void GraphKit::sync_kit(IdealKit& ideal) {
-  set_all_memory(__ merged_memory());
-  set_i_o(__ i_o());
-  set_control(__ ctrl());
+  set_all_memory(ideal.merged_memory());
+  set_i_o(ideal.i_o());
+  set_control(ideal.ctrl());
 }

 void GraphKit::final_sync(IdealKit& ideal) {
@ -3838,541 +3819,6 @@ void GraphKit::final_sync(IdealKit& ideal) {
  sync_kit(ideal);
 }

-Node* GraphKit::byte_map_base_node() {
-  // Get base of card map
-  jbyte* card_table_base = ci_card_table_address();
-  if (card_table_base != NULL) {
-    return makecon(TypeRawPtr::make((address)card_table_base));
-  } else {
-    return null();
-  }
-}
-
-// vanilla/CMS post barrier
-// Insert a write-barrier store.  This is to let generational GC work; we have
-// to flag all oop-stores before the next GC point.
-void GraphKit::write_barrier_post(Node* oop_store,
-                                  Node* obj,
-                                  Node* adr,
-                                  uint  adr_idx,
-                                  Node* val,
-                                  bool use_precise) {
-  // No store check needed if we're storing a NULL or an old object
-  // (latter case is probably a string constant). The concurrent
-  // mark sweep garbage collector, however, needs to have all nonNull
-  // oop updates flagged via card-marks.
-  if (val != NULL && val->is_Con()) {
-    // must be either an oop or NULL
-    const Type* t = val->bottom_type();
-    if (t == TypePtr::NULL_PTR || t == Type::TOP)
-      // stores of null never (?) need barriers
-      return;
-  }
-
-  if (use_ReduceInitialCardMarks()
-      && obj == just_allocated_object(control())) {
-    // We can skip marks on a freshly-allocated object in Eden.
-    // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp.
-    // That routine informs GC to take appropriate compensating steps,
-    // upon a slow-path allocation, so as to make this card-mark
-    // elision safe.
-    return;
-  }
-
-  if (!use_precise) {
-    // All card marks for a (non-array) instance are in one place:
-    adr = obj;
-  }
-  // (Else it's an array (or unknown), and we want more precise card marks.)
-  assert(adr != NULL, "");
-
-  IdealKit ideal(this, true);
-
-  // Convert the pointer to an int prior to doing math on it
-  Node* cast = __ CastPX(__ ctrl(), adr);
-
-  // Divide by card size
-  assert(BarrierSet::barrier_set()->is_a(BarrierSet::CardTableBarrierSet),
-         "Only one we handle so far.");
-  Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) );
-
-  // Combine card table base and card offset
-  Node* card_adr = __ AddP(__ top(), byte_map_base_node(), card_offset );
-
-  // Get the alias_index for raw card-mark memory
-  int adr_type = Compile::AliasIdxRaw;
-  Node*   zero = __ ConI(0); // Dirty card value
-  BasicType bt = T_BYTE;
-
-  if (UseConcMarkSweepGC && UseCondCardMark) {
-    insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
-    __ sync_kit(this);
-  }
-
-  if (UseCondCardMark) {
-    // The classic GC reference write barrier is typically implemented
-    // as a store into the global card mark table.  Unfortunately
-    // unconditional stores can result in false sharing and excessive
-    // coherence traffic as well as false transactional aborts.
-    // UseCondCardMark enables MP "polite" conditional card mark
-    // stores.  In theory we could relax the load from ctrl() to
-    // no_ctrl, but that doesn't buy much latitude.
-    Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, bt, adr_type);
-    __ if_then(card_val, BoolTest::ne, zero);
-  }
-
-  // Smash zero into card
-  if( !UseConcMarkSweepGC ) {
-    __ store(__ ctrl(), card_adr, zero, bt, adr_type, MemNode::unordered);
-  } else {
-    // Specialized path for CM store barrier
-    __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, bt, adr_type);
-  }
-
-  if (UseCondCardMark) {
-    __ end_if();
-  }
-
-  // Final sync IdealKit and GraphKit.
-  final_sync(ideal);
-}
-
-#if INCLUDE_G1GC
-
-/*
- * Determine if the G1 pre-barrier can be removed. The pre-barrier is
- * required by SATB to make sure all objects live at the start of the
- * marking are kept alive, all reference updates need to any previous
- * reference stored before writing.
- *
- * If the previous value is NULL there is no need to save the old value.
- * References that are NULL are filtered during runtime by the barrier
- * code to avoid unnecessary queuing.
- *
- * However in the case of newly allocated objects it might be possible to
- * prove that the reference about to be overwritten is NULL during compile
- * time and avoid adding the barrier code completely.
- *
- * The compiler needs to determine that the object in which a field is about
- * to be written is newly allocated, and that no prior store to the same field
- * has happened since the allocation.
- *
- * Returns true if the pre-barrier can be removed
- */
-bool GraphKit::g1_can_remove_pre_barrier(PhaseTransform* phase, Node* adr,
-                                         BasicType bt, uint adr_idx) {
-  intptr_t offset = 0;
-  Node* base = AddPNode::Ideal_base_and_offset(adr, phase, offset);
-  AllocateNode* alloc = AllocateNode::Ideal_allocation(base, phase);
-
-  if (offset == Type::OffsetBot) {
-    return false; // cannot unalias unless there are precise offsets
-  }
-
-  if (alloc == NULL) {
-    return false; // No allocation found
-  }
-
-  intptr_t size_in_bytes = type2aelembytes(bt);
-
-  Node* mem = memory(adr_idx); // start searching here...
-
-  for (int cnt = 0; cnt < 50; cnt++) {
-
-    if (mem->is_Store()) {
-
-      Node* st_adr = mem->in(MemNode::Address);
-      intptr_t st_offset = 0;
-      Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
-
-      if (st_base == NULL) {
-        break; // inscrutable pointer
-      }
-
-      // Break we have found a store with same base and offset as ours so break
-      if (st_base == base && st_offset == offset) {
-        break;
-      }
-
-      if (st_offset != offset && st_offset != Type::OffsetBot) {
-        const int MAX_STORE = BytesPerLong;
-        if (st_offset >= offset + size_in_bytes ||
-            st_offset <= offset - MAX_STORE ||
-            st_offset <= offset - mem->as_Store()->memory_size()) {
-          // Success:  The offsets are provably independent.
-          // (You may ask, why not just test st_offset != offset and be done?
-          // The answer is that stores of different sizes can co-exist
-          // in the same sequence of RawMem effects.  We sometimes initialize
-          // a whole 'tile' of array elements with a single jint or jlong.)
-          mem = mem->in(MemNode::Memory);
-          continue; // advance through independent store memory
-        }
-      }
-
-      if (st_base != base
-          && MemNode::detect_ptr_independence(base, alloc, st_base,
-                                              AllocateNode::Ideal_allocation(st_base, phase),
-                                              phase)) {
-        // Success:  The bases are provably independent.
-        mem = mem->in(MemNode::Memory);
-        continue; // advance through independent store memory
-      }
-    } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
-
-      InitializeNode* st_init = mem->in(0)->as_Initialize();
-      AllocateNode* st_alloc = st_init->allocation();
-
-      // Make sure that we are looking at the same allocation site.
-      // The alloc variable is guaranteed to not be null here from earlier check.
-      if (alloc == st_alloc) {
-        // Check that the initialization is storing NULL so that no previous store
-        // has been moved up and directly write a reference
-        Node* captured_store = st_init->find_captured_store(offset,
-                                                            type2aelembytes(T_OBJECT),
-                                                            phase);
-        if (captured_store == NULL || captured_store == st_init->zero_memory()) {
-          return true;
-        }
-      }
-    }
-
-    // Unless there is an explicit 'continue', we must bail out here,
-    // because 'mem' is an inscrutable memory state (e.g., a call).
-    break;
-  }
-
-  return false;
-}
-
-// G1 pre/post barriers
-void GraphKit::g1_write_barrier_pre(bool do_load,
-                                    Node* obj,
-                                    Node* adr,
-                                    uint alias_idx,
-                                    Node* val,
-                                    const TypeOopPtr* val_type,
-                                    Node* pre_val,
-                                    BasicType bt) {
-
-  // Some sanity checks
-  // Note: val is unused in this routine.
-
-  if (do_load) {
-    // We need to generate the load of the previous value
-    assert(obj != NULL, "must have a base");
-    assert(adr != NULL, "where are loading from?");
-    assert(pre_val == NULL, "loaded already?");
-    assert(val_type != NULL, "need a type");
-
-    if (use_ReduceInitialCardMarks()
-        && g1_can_remove_pre_barrier(&_gvn, adr, bt, alias_idx)) {
-      return;
-    }
-
-  } else {
-    // In this case both val_type and alias_idx are unused.
-    assert(pre_val != NULL, "must be loaded already");
-    // Nothing to be done if pre_val is null.
-    if (pre_val->bottom_type() == TypePtr::NULL_PTR) return;
-    assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
-  }
-  assert(bt == T_OBJECT, "or we shouldn't be here");
-
-  IdealKit ideal(this, true);
-
-  Node* tls = __ thread(); // ThreadLocalStorage
-
-  Node* no_ctrl = NULL;
-  Node* no_base = __ top();
-  Node* zero  = __ ConI(0);
-  Node* zeroX = __ ConX(0);
-
-  float likely  = PROB_LIKELY(0.999);
-  float unlikely  = PROB_UNLIKELY(0.999);
-
-  BasicType active_type = in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 ? T_INT : T_BYTE;
-  assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 4 || in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "flag width");
-
-  // Offsets into the thread
-  const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
-  const int index_offset   = in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset());
-  const int buffer_offset  = in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset());
-
-  // Now the actual pointers into the thread
-  Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
-  Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
-  Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));
-
-  // Now some of the values
-  Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
-
-  // if (!marking)
-  __ if_then(marking, BoolTest::ne, zero, unlikely); {
-    BasicType index_bt = TypeX_X->basic_type();
-    assert(sizeof(size_t) == type2aelembytes(index_bt), "Loading G1 SATBMarkQueue::_index with wrong size.");
-    Node* index   = __ load(__ ctrl(), index_adr, TypeX_X, index_bt, Compile::AliasIdxRaw);
-
-    if (do_load) {
-      // load original value
-      // alias_idx correct??
-      pre_val = __ load(__ ctrl(), adr, val_type, bt, alias_idx);
-    }
-
-    // if (pre_val != NULL)
-    __ if_then(pre_val, BoolTest::ne, null()); {
-      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
-
-      // is the queue for this thread full?
-      __ if_then(index, BoolTest::ne, zeroX, likely); {
-
-        // decrement the index
-        Node* next_index = _gvn.transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
-
-        // Now get the buffer location we will log the previous value into and store it
-        Node *log_addr = __ AddP(no_base, buffer, next_index);
-        __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw, MemNode::unordered);
-        // update the index
-        __ store(__ ctrl(), index_adr, next_index, index_bt, Compile::AliasIdxRaw, MemNode::unordered);
-
-      } __ else_(); {
-
-        // logging buffer is full, call the runtime
-        const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
-        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls);
-      } __ end_if();  // (!index)
-    } __ end_if();  // (pre_val != NULL)
-  } __ end_if();  // (!marking)
-
-  // Final sync IdealKit and GraphKit.
-  final_sync(ideal);
-}
-
-/*
- * G1 similar to any GC with a Young Generation requires a way to keep track of
- * references from Old Generation to Young Generation to make sure all live
- * objects are found. G1 also requires to keep track of object references
- * between different regions to enable evacuation of old regions, which is done
- * as part of mixed collections. References are tracked in remembered sets and
- * is continuously updated as reference are written to with the help of the
- * post-barrier.
- *
- * To reduce the number of updates to the remembered set the post-barrier
- * filters updates to fields in objects located in the Young Generation,
- * the same region as the reference, when the NULL is being written or
- * if the card is already marked as dirty by an earlier write.
- *
- * Under certain circumstances it is possible to avoid generating the
- * post-barrier completely if it is possible during compile time to prove
- * the object is newly allocated and that no safepoint exists between the
- * allocation and the store.
- *
- * In the case of slow allocation the allocation code must handle the barrier
- * as part of the allocation in the case the allocated object is not located
- * in the nursery, this would happen for humongous objects. This is similar to
- * how CMS is required to handle this case, see the comments for the method
- * CardTableBarrierSet::on_allocation_slowpath_exit and OptoRuntime::new_deferred_store_barrier.
- * A deferred card mark is required for these objects and handled in the above
- * mentioned methods.
- *
- * Returns true if the post barrier can be removed
- */
-bool GraphKit::g1_can_remove_post_barrier(PhaseTransform* phase, Node* store,
-                                          Node* adr) {
-  intptr_t      offset = 0;
-  Node*         base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
-  AllocateNode* alloc  = AllocateNode::Ideal_allocation(base, phase);
-
-  if (offset == Type::OffsetBot) {
-    return false; // cannot unalias unless there are precise offsets
-  }
-
-  if (alloc == NULL) {
-     return false; // No allocation found
-  }
-
-  // Start search from Store node
-  Node* mem = store->in(MemNode::Control);
-  if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
-
-    InitializeNode* st_init = mem->in(0)->as_Initialize();
-    AllocateNode*  st_alloc = st_init->allocation();
-
-    // Make sure we are looking at the same allocation
-    if (alloc == st_alloc) {
-      return true;
-    }
-  }
-
-  return false;
-}
-
-//
-// Update the card table and add card address to the queue
-//
-void GraphKit::g1_mark_card(IdealKit& ideal,
-                            Node* card_adr,
-                            Node* oop_store,
-                            uint oop_alias_idx,
-                            Node* index,
-                            Node* index_adr,
-                            Node* buffer,
-                            const TypeFunc* tf) {
-
-  Node* zero  = __ ConI(0);
-  Node* zeroX = __ ConX(0);
-  Node* no_base = __ top();
-  BasicType card_bt = T_BYTE;
-  // Smash zero into card. MUST BE ORDERED WRT TO STORE
-  __ storeCM(__ ctrl(), card_adr, zero, oop_store, oop_alias_idx, card_bt, Compile::AliasIdxRaw);
-
-  //  Now do the queue work
-  __ if_then(index, BoolTest::ne, zeroX); {
-
-    Node* next_index = _gvn.transform(new SubXNode(index, __ ConX(sizeof(intptr_t))));
-    Node* log_addr = __ AddP(no_base, buffer, next_index);
-
-    // Order, see storeCM.
-    __ store(__ ctrl(), log_addr, card_adr, T_ADDRESS, Compile::AliasIdxRaw, MemNode::unordered);
-    __ store(__ ctrl(), index_adr, next_index, TypeX_X->basic_type(), Compile::AliasIdxRaw, MemNode::unordered);
-
-  } __ else_(); {
-    __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), "g1_wb_post", card_adr, __ thread());
-  } __ end_if();
-
-}
-
-void GraphKit::g1_write_barrier_post(Node* oop_store,
-                                     Node* obj,
-                                     Node* adr,
-                                     uint alias_idx,
-                                     Node* val,
-                                     BasicType bt,
-                                     bool use_precise) {
-  // If we are writing a NULL then we need no post barrier
-
-  if (val != NULL && val->is_Con() && val->bottom_type() == TypePtr::NULL_PTR) {
-    // Must be NULL
-    const Type* t = val->bottom_type();
-    assert(t == Type::TOP || t == TypePtr::NULL_PTR, "must be NULL");
-    // No post barrier if writing NULLx
-    return;
-  }
-
-  if (use_ReduceInitialCardMarks() && obj == just_allocated_object(control())) {
-    // We can skip marks on a freshly-allocated object in Eden.
-    // Keep this code in sync with new_deferred_store_barrier() in runtime.cpp.
-    // That routine informs GC to take appropriate compensating steps,
-    // upon a slow-path allocation, so as to make this card-mark
-    // elision safe.
-    return;
-  }
-
-  if (use_ReduceInitialCardMarks()
-      && g1_can_remove_post_barrier(&_gvn, oop_store, adr)) {
-    return;
-  }
-
-  if (!use_precise) {
-    // All card marks for a (non-array) instance are in one place:
-    adr = obj;
-  }
-  // (Else it's an array (or unknown), and we want more precise card marks.)
-  assert(adr != NULL, "");
-
-  IdealKit ideal(this, true);
-
-  Node* tls = __ thread(); // ThreadLocalStorage
-
-  Node* no_base = __ top();
-  float likely  = PROB_LIKELY(0.999);
-  float unlikely  = PROB_UNLIKELY(0.999);
-  Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val());
-  Node* dirty_card = __ ConI((jint)CardTable::dirty_card_val());
-  Node* zeroX = __ ConX(0);
-
-  // Get the alias_index for raw card-mark memory
-  const TypePtr* card_type = TypeRawPtr::BOTTOM;
-
-  const TypeFunc *tf = OptoRuntime::g1_wb_post_Type();
-
-  // Offsets into the thread
-  const int index_offset  = in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset());
-  const int buffer_offset = in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset());
-
-  // Pointers into the thread
-
-  Node* buffer_adr = __ AddP(no_base, tls, __ ConX(buffer_offset));
-  Node* index_adr =  __ AddP(no_base, tls, __ ConX(index_offset));
-
-  // Now some values
-  // Use ctrl to avoid hoisting these values past a safepoint, which could
-  // potentially reset these fields in the JavaThread.
-  Node* index  = __ load(__ ctrl(), index_adr, TypeX_X, TypeX_X->basic_type(), Compile::AliasIdxRaw);
-  Node* buffer = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
-
-  // Convert the store obj pointer to an int prior to doing math on it
-  // Must use ctrl to prevent "integerized oop" existing across safepoint
-  Node* cast =  __ CastPX(__ ctrl(), adr);
-
-  // Divide pointer by card size
-  Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) );
-
-  // Combine card table base and card offset
-  Node* card_adr = __ AddP(no_base, byte_map_base_node(), card_offset );
-
-  // If we know the value being stored does it cross regions?
-
-  if (val != NULL) {
-    // Does the store cause us to cross regions?
-
-    // Should be able to do an unsigned compare of region_size instead of
-    // and extra shift. Do we have an unsigned compare??
-    // Node* region_size = __ ConI(1 << HeapRegion::LogOfHRGrainBytes);
-    Node* xor_res =  __ URShiftX ( __ XorX( cast,  __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes));
-
-    // if (xor_res == 0) same region so skip
-    __ if_then(xor_res, BoolTest::ne, zeroX); {
-
-      // No barrier if we are storing a NULL
-      __ if_then(val, BoolTest::ne, null(), unlikely); {
-
-        // Ok must mark the card if not already dirty
-
-        // load the original value of the card
-        Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
-
-        __ if_then(card_val, BoolTest::ne, young_card); {
-          sync_kit(ideal);
-          // Use Op_MemBarVolatile to achieve the effect of a StoreLoad barrier.
-          insert_mem_bar(Op_MemBarVolatile, oop_store);
-          __ sync_kit(this);
-
-          Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
-          __ if_then(card_val_reload, BoolTest::ne, dirty_card); {
-            g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
-          } __ end_if();
-        } __ end_if();
-      } __ end_if();
-    } __ end_if();
-  } else {
-    // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks.
-    // We don't need a barrier here if the destination is a newly allocated object
-    // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden
-    // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()).
-    assert(!use_ReduceInitialCardMarks(), "can only happen with card marking");
-    Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw);
-    __ if_then(card_val, BoolTest::ne, young_card); {
-      g1_mark_card(ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf);
-    } __ end_if();
-  }
-
-  // Final sync IdealKit and GraphKit.
-  final_sync(ideal);
-}
-#undef __
-
-#endif // INCLUDE_G1GC
-
 Node* GraphKit::load_String_length(Node* ctrl, Node* str) {
  Node* len = load_array_length(load_String_value(ctrl, str));
  Node* coder = load_String_coder(ctrl, str);
@ -4388,9 +3834,9 @@ Node* GraphKit::load_String_value(Node* ctrl, Node* str) {
  const TypeAryPtr* value_type = TypeAryPtr::make(TypePtr::NotNull,
                                                  TypeAry::make(TypeInt::BYTE, TypeInt::POS),
                                                  ciTypeArrayKlass::make(T_BYTE), true, 0);
-  int value_field_idx = C->get_alias_index(value_field_type);
-  Node* load = make_load(ctrl, basic_plus_adr(str, str, value_offset),
-                         value_type, T_OBJECT, value_field_idx, MemNode::unordered);
+  Node* p = basic_plus_adr(str, str, value_offset);
+  Node* load = access_load_at(str, p, value_field_type, value_type, T_OBJECT,
+                              IN_HEAP | C2_CONTROL_DEPENDENT_LOAD);
  // String.value field is known to be @Stable.
  if (UseImplicitStableValues) {
    load = cast_array_to_stable(load, value_type);
@ -4416,8 +3862,8 @@ void GraphKit::store_String_value(Node* ctrl, Node* str, Node* value) {
  const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::NotNull, C->env()->String_klass(),
                                                     false, NULL, 0);
  const TypePtr* value_field_type = string_type->add_offset(value_offset);
-  store_oop_to_object(ctrl, str,  basic_plus_adr(str, value_offset), value_field_type,
-      value, TypeAryPtr::BYTES, T_OBJECT, MemNode::unordered);
+  access_store_at(ctrl, str,  basic_plus_adr(str, value_offset), value_field_type,
+                  value, TypeAryPtr::BYTES, T_OBJECT, IN_HEAP);
 }

 void GraphKit::store_String_coder(Node* ctrl, Node* str, Node* value) {
--- a/src/hotspot/share/opto/graphKit.hpp
+++ b/src/hotspot/share/opto/graphKit.hpp
@ -27,6 +27,7 @@

 #include "ci/ciEnv.hpp"
 #include "ci/ciMethodData.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
 #include "opto/addnode.hpp"
 #include "opto/callnode.hpp"
 #include "opto/cfgnode.hpp"
@ -38,6 +39,7 @@
 #include "opto/type.hpp"
 #include "runtime/deoptimization.hpp"

+class BarrierSetC2;
 class FastLockNode;
 class FastUnlockNode;
 class IdealKit;
@ -63,6 +65,7 @@ class GraphKit : public Phase {
  SafePointNode*    _exceptions;// Parser map(s) for exception state(s)
  int               _bci;       // JVM Bytecode Pointer
  ciMethod*         _method;    // JVM Current Method
+  BarrierSetC2*     _barrier_set;

 private:
  int               _sp;        // JVM Expression Stack Pointer; don't modify directly!
@ -88,6 +91,7 @@ class GraphKit : public Phase {

  ciEnv*        env()               const { return _env; }
  PhaseGVN&     gvn()               const { return _gvn; }
+  void*         barrier_set_state() const { return C->barrier_set_state(); }

  void record_for_igvn(Node* n) const { C->record_for_igvn(n); }  // delegate to Compile

@ -103,9 +107,6 @@ class GraphKit : public Phase {
  Node* zerocon(BasicType bt)   const { return _gvn.zerocon(bt); }
  // (See also macro MakeConX in type.hpp, which uses intcon or longcon.)

-  // Helper for byte_map_base
-  Node* byte_map_base_node();
-
  jint  find_int_con(Node* n, jint value_if_unknown) {
    return _gvn.find_int_con(n, value_if_unknown);
  }
@ -569,70 +570,67 @@ class GraphKit : public Phase {
                        bool unaligned = false,
                        bool mismatched = false);

+  // Perform decorated accesses

-  // All in one pre-barrier, store, post_barrier
-  // Insert a write-barrier'd store.  This is to let generational GC
-  // work; we have to flag all oop-stores before the next GC point.
-  //
-  // It comes in 3 flavors of store to an object, array, or unknown.
-  // We use precise card marks for arrays to avoid scanning the entire
-  // array. We use imprecise for object. We use precise for unknown
-  // since we don't know if we have an array or and object or even
-  // where the object starts.
-  //
-  // If val==NULL, it is taken to be a completely unknown value. QQQ
-
-  Node* store_oop(Node* ctl,
+  Node* access_store_at(Node* ctl,
                        Node* obj,   // containing obj
                        Node* adr,   // actual adress to store val at
                        const TypePtr* adr_type,
                        Node* val,
-                  const TypeOopPtr* val_type,
+                        const Type* val_type,
                        BasicType bt,
-                  bool use_precise,
-                  MemNode::MemOrd mo,
-                  bool mismatched = false);
+                        DecoratorSet decorators);

-  Node* store_oop_to_object(Node* ctl,
-                            Node* obj,   // containing obj
+  Node* access_load_at(Node* obj,   // containing obj
                       Node* adr,   // actual adress to store val at
                       const TypePtr* adr_type,
-                            Node* val,
-                            const TypeOopPtr* val_type,
+                       const Type* val_type,
                       BasicType bt,
-                            MemNode::MemOrd mo) {
-    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, false, mo);
-  }
+                       DecoratorSet decorators);

-  Node* store_oop_to_array(Node* ctl,
-                           Node* obj,   // containing obj
-                           Node* adr,   // actual adress to store val at
+  Node* access_atomic_cmpxchg_val_at(Node* ctl,
+                                     Node* obj,
+                                     Node* adr,
                                     const TypePtr* adr_type,
-                           Node* val,
-                           const TypeOopPtr* val_type,
+                                     int alias_idx,
+                                     Node* expected_val,
+                                     Node* new_val,
+                                     const Type* value_type,
                                     BasicType bt,
-                           MemNode::MemOrd mo) {
-    return store_oop(ctl, obj, adr, adr_type, val, val_type, bt, true, mo);
-  }
+                                     DecoratorSet decorators);

-  // Could be an array or object we don't know at compile time (unsafe ref.)
-  Node* store_oop_to_unknown(Node* ctl,
-                             Node* obj,   // containing obj
-                             Node* adr,   // actual adress to store val at
+  Node* access_atomic_cmpxchg_bool_at(Node* ctl,
+                                      Node* obj,
+                                      Node* adr,
                                      const TypePtr* adr_type,
-                             Node* val,
+                                      int alias_idx,
+                                      Node* expected_val,
+                                      Node* new_val,
+                                      const Type* value_type,
                                      BasicType bt,
-                             MemNode::MemOrd mo,
-                             bool mismatched = false);
+                                      DecoratorSet decorators);

-  // For the few case where the barriers need special help
-  void pre_barrier(bool do_load, Node* ctl,
-                   Node* obj, Node* adr, uint adr_idx, Node* val, const TypeOopPtr* val_type,
-                   Node* pre_val,
-                   BasicType bt);
+  Node* access_atomic_xchg_at(Node* ctl,
+                              Node* obj,
+                              Node* adr,
+                              const TypePtr* adr_type,
+                              int alias_idx,
+                              Node* new_val,
+                              const Type* value_type,
+                              BasicType bt,
+                              DecoratorSet decorators);

-  void post_barrier(Node* ctl, Node* store, Node* obj, Node* adr, uint adr_idx,
-                    Node* val, BasicType bt, bool use_precise);
+  Node* access_atomic_add_at(Node* ctl,
+                             Node* obj,
+                             Node* adr,
+                             const TypePtr* adr_type,
+                             int alias_idx,
+                             Node* new_val,
+                             const Type* value_type,
+                             BasicType bt,
+                             DecoratorSet decorators);
+
+  void access_clone(Node* ctl, Node* src, Node* dst, Node* size, bool is_array);

  // Return addressing for an array element.
  Node* array_element_address(Node* ary, Node* idx, BasicType elembt,
@ -754,49 +752,10 @@ class GraphKit : public Phase {
  // Returns the object (if any) which was created the moment before.
  Node* just_allocated_object(Node* current_control);

-  static bool use_ReduceInitialCardMarks();
-
  // Sync Ideal and Graph kits.
  void sync_kit(IdealKit& ideal);
  void final_sync(IdealKit& ideal);

-  // vanilla/CMS post barrier
-  void write_barrier_post(Node *store, Node* obj,
-                          Node* adr,  uint adr_idx, Node* val, bool use_precise);
-
-  // Allow reordering of pre-barrier with oop store and/or post-barrier.
-  // Used for load_store operations which loads old value.
-  bool can_move_pre_barrier() const;
-
-#if INCLUDE_G1GC
-  // G1 pre/post barriers
-  void g1_write_barrier_pre(bool do_load,
-                            Node* obj,
-                            Node* adr,
-                            uint alias_idx,
-                            Node* val,
-                            const TypeOopPtr* val_type,
-                            Node* pre_val,
-                            BasicType bt);
-
-  void g1_write_barrier_post(Node* store,
-                             Node* obj,
-                             Node* adr,
-                             uint alias_idx,
-                             Node* val,
-                             BasicType bt,
-                             bool use_precise);
-  // Helper function for g1
-  private:
-  void g1_mark_card(IdealKit& ideal, Node* card_adr, Node* store, uint oop_alias_idx,
-                    Node* index, Node* index_adr,
-                    Node* buffer, const TypeFunc* tf);
-
-  bool g1_can_remove_pre_barrier(PhaseTransform* phase, Node* adr, BasicType bt, uint adr_idx);
-
-  bool g1_can_remove_post_barrier(PhaseTransform* phase, Node* store, Node* adr);
-#endif // INCLUDE_G1GC
-
  public:
  // Helper function to round double arguments before a call
  void round_double_arguments(ciMethod* dest_method);
--- a/src/hotspot/share/opto/library_call.cpp
+++ b/src/hotspot/share/opto/library_call.cpp
@ -244,12 +244,9 @@ class LibraryCallKit : public GraphKit {
  // This returns Type::AnyPtr, RawPtr, or OopPtr.
  int classify_unsafe_addr(Node* &base, Node* &offset, BasicType type);
  Node* make_unsafe_address(Node*& base, Node* offset, BasicType type = T_ILLEGAL, bool can_cast = false);
-  // Helper for inline_unsafe_access.
-  // Generates the guards that check whether the result of
-  // Unsafe.getObject should be recorded in an SATB log buffer.
-  void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar);

  typedef enum { Relaxed, Opaque, Volatile, Acquire, Release } AccessKind;
+  DecoratorSet mo_decorator_for_access_kind(AccessKind kind);
  bool inline_unsafe_access(bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
  static bool klass_needs_init_guard(Node* kls);
  bool inline_unsafe_allocate();
@ -269,7 +266,7 @@ class LibraryCallKit : public GraphKit {
  bool inline_array_copyOf(bool is_copyOfRange);
  bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
  bool inline_preconditions_checkIndex();
-  void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark);
+  void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array);
  bool inline_native_clone(bool is_virtual);
  bool inline_native_Reflection_getCallerClass();
  // Helper function for inlining native object hash method
@ -285,8 +282,6 @@ class LibraryCallKit : public GraphKit {
                                      uint new_idx);

  typedef enum { LS_get_add, LS_get_set, LS_cmp_swap, LS_cmp_swap_weak, LS_cmp_exchange } LoadStoreKind;
-  MemNode::MemOrd access_kind_to_memord_LS(AccessKind access_kind, bool is_store);
-  MemNode::MemOrd access_kind_to_memord(AccessKind access_kind);
  bool inline_unsafe_load_store(BasicType type,  LoadStoreKind kind, AccessKind access_kind);
  bool inline_unsafe_fence(vmIntrinsics::ID id);
  bool inline_onspinwait();
@ -2224,106 +2219,6 @@ bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) {

 //----------------------------inline_unsafe_access----------------------------

-// Helper that guards and inserts a pre-barrier.
-void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset,
-                                        Node* pre_val, bool need_mem_bar) {
-  // We could be accessing the referent field of a reference object. If so, when G1
-  // is enabled, we need to log the value in the referent field in an SATB buffer.
-  // This routine performs some compile time filters and generates suitable
-  // runtime filters that guard the pre-barrier code.
-  // Also add memory barrier for non volatile load from the referent field
-  // to prevent commoning of loads across safepoint.
-  if (!UseG1GC && !need_mem_bar)
-    return;
-
-  // Some compile time checks.
-
-  // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
-  const TypeX* otype = offset->find_intptr_t_type();
-  if (otype != NULL && otype->is_con() &&
-      otype->get_con() != java_lang_ref_Reference::referent_offset) {
-    // Constant offset but not the reference_offset so just return
-    return;
-  }
-
-  // We only need to generate the runtime guards for instances.
-  const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
-  if (btype != NULL) {
-    if (btype->isa_aryptr()) {
-      // Array type so nothing to do
-      return;
-    }
-
-    const TypeInstPtr* itype = btype->isa_instptr();
-    if (itype != NULL) {
-      // Can the klass of base_oop be statically determined to be
-      // _not_ a sub-class of Reference and _not_ Object?
-      ciKlass* klass = itype->klass();
-      if ( klass->is_loaded() &&
-          !klass->is_subtype_of(env()->Reference_klass()) &&
-          !env()->Object_klass()->is_subtype_of(klass)) {
-        return;
-      }
-    }
-  }
-
-  // The compile time filters did not reject base_oop/offset so
-  // we need to generate the following runtime filters
-  //
-  // if (offset == java_lang_ref_Reference::_reference_offset) {
-  //   if (instance_of(base, java.lang.ref.Reference)) {
-  //     pre_barrier(_, pre_val, ...);
-  //   }
-  // }
-
-  float likely   = PROB_LIKELY(  0.999);
-  float unlikely = PROB_UNLIKELY(0.999);
-
-  IdealKit ideal(this);
-#define __ ideal.
-
-  Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
-
-  __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
-      // Update graphKit memory and control from IdealKit.
-      sync_kit(ideal);
-
-      Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass()));
-      Node* is_instof = gen_instanceof(base_oop, ref_klass_con);
-
-      // Update IdealKit memory and control from graphKit.
-      __ sync_kit(this);
-
-      Node* one = __ ConI(1);
-      // is_instof == 0 if base_oop == NULL
-      __ if_then(is_instof, BoolTest::eq, one, unlikely); {
-
-        // Update graphKit from IdeakKit.
-        sync_kit(ideal);
-
-        // Use the pre-barrier to record the value in the referent field
-        pre_barrier(false /* do_load */,
-                    __ ctrl(),
-                    NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
-                    pre_val /* pre_val */,
-                    T_OBJECT);
-        if (need_mem_bar) {
-          // Add memory barrier to prevent commoning reads from this field
-          // across safepoint since GC can change its value.
-          insert_mem_bar(Op_MemBarCPUOrder);
-        }
-        // Update IdealKit from graphKit.
-        __ sync_kit(this);
-
-      } __ end_if(); // _ref_type != ref_none
-  } __ end_if(); // offset == referent_offset
-
-  // Final sync IdealKit and GraphKit.
-  final_sync(ideal);
-#undef __
-}
-
-
 const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type) {
  // Attempt to infer a sharper value type from the offset and base type.
  ciKlass* sharpened_klass = NULL;
@ -2362,12 +2257,39 @@ const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_
  return NULL;
 }

+DecoratorSet LibraryCallKit::mo_decorator_for_access_kind(AccessKind kind) {
+  switch (kind) {
+      case Relaxed:
+        return MO_UNORDERED;
+      case Opaque:
+        return MO_RELAXED;
+      case Acquire:
+        return MO_ACQUIRE;
+      case Release:
+        return MO_RELEASE;
+      case Volatile:
+        return MO_SEQ_CST;
+      default:
+        ShouldNotReachHere();
+        return 0;
+  }
+}
+
 bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, const AccessKind kind, const bool unaligned) {
  if (callee()->is_static())  return false;  // caller must have the capability!
+  DecoratorSet decorators = C2_UNSAFE_ACCESS;
  guarantee(!is_store || kind != Acquire, "Acquire accesses can be produced only for loads");
  guarantee( is_store || kind != Release, "Release accesses can be produced only for stores");
  assert(type != T_OBJECT || !unaligned, "unaligned access not supported with object type");

+  if (type == T_OBJECT || type == T_ARRAY) {
+    decorators |= ON_UNKNOWN_OOP_REF;
+  }
+
+  if (unaligned) {
+    decorators |= C2_UNALIGNED;
+  }
+
 #ifndef PRODUCT
  {
    ResourceMark rm;
@ -2426,6 +2348,10 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
  // Can base be NULL? Otherwise, always on-heap access.
  bool can_access_non_heap = TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop));

+  if (!can_access_non_heap) {
+    decorators |= IN_HEAP;
+  }
+
  val = is_store ? argument(4) : NULL;

  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
@ -2463,60 +2389,15 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c

  assert(!mismatched || alias_type->adr_type()->is_oopptr(), "off-heap access can't be mismatched");

+  if (mismatched) {
+    decorators |= C2_MISMATCHED;
+  }
+
  // First guess at the value type.
  const Type *value_type = Type::get_const_basic_type(type);

-  // We will need memory barriers unless we can determine a unique
-  // alias category for this reference.  (Note:  If for some reason
-  // the barriers get omitted and the unsafe reference begins to "pollute"
-  // the alias analysis of the rest of the graph, either Compile::can_alias
-  // or Compile::must_alias will throw a diagnostic assert.)
-  bool need_mem_bar = false;
-  switch (kind) {
-      case Relaxed:
-          need_mem_bar = (mismatched && !adr_type->isa_aryptr()) || can_access_non_heap;
-          break;
-      case Opaque:
-          // Opaque uses CPUOrder membars for protection against code movement.
-      case Acquire:
-      case Release:
-      case Volatile:
-          need_mem_bar = true;
-          break;
-      default:
-          ShouldNotReachHere();
-  }
-
-  // Some accesses require access atomicity for all types, notably longs and doubles.
-  // When AlwaysAtomicAccesses is enabled, all accesses are atomic.
-  bool requires_atomic_access = false;
-  switch (kind) {
-      case Relaxed:
-          requires_atomic_access = AlwaysAtomicAccesses;
-          break;
-      case Opaque:
-          // Opaque accesses are atomic.
-      case Acquire:
-      case Release:
-      case Volatile:
-          requires_atomic_access = true;
-          break;
-      default:
-          ShouldNotReachHere();
-  }
-
  // Figure out the memory ordering.
-  // Acquire/Release/Volatile accesses require marking the loads/stores with MemOrd
-  MemNode::MemOrd mo = access_kind_to_memord_LS(kind, is_store);
-
-  // If we are reading the value of the referent field of a Reference
-  // object (either by using Unsafe directly or through reflection)
-  // then, if G1 is enabled, we need to record the referent in an
-  // SATB log buffer using the pre-barrier mechanism.
-  // Also we need to add memory barrier to prevent commoning reads
-  // from this field across safepoint since GC can change its value.
-  bool need_read_barrier = !is_store &&
-                           offset != top() && heap_base_oop != top();
+  decorators |= mo_decorator_for_access_kind(kind);

  if (!is_store && type == T_OBJECT) {
    const TypeOopPtr* tjp = sharpen_unsafe_type(alias_type, adr_type);
@ -2534,39 +2415,6 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
  // and it is not possible to fully distinguish unintended nulls
  // from intended ones in this API.

-  // We need to emit leading and trailing CPU membars (see below) in
-  // addition to memory membars for special access modes. This is a little
-  // too strong, but avoids the need to insert per-alias-type
-  // volatile membars (for stores; compare Parse::do_put_xxx), which
-  // we cannot do effectively here because we probably only have a
-  // rough approximation of type.
-
-  switch(kind) {
-    case Relaxed:
-    case Opaque:
-    case Acquire:
-      break;
-    case Release:
-    case Volatile:
-      if (is_store) {
-        insert_mem_bar(Op_MemBarRelease);
-      } else {
-        if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-          insert_mem_bar(Op_MemBarVolatile);
-        }
-      }
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-
-  // Memory barrier to prevent normal and 'unsafe' accesses from
-  // bypassing each other.  Happens after null checks, so the
-  // exception paths do not take memory state from the memory barrier,
-  // so there's no problems making a strong assert about mixing users
-  // of safe & unsafe memory.
-  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
-
  if (!is_store) {
    Node* p = NULL;
    // Try to constant fold a load from a constant field
@ -2575,34 +2423,14 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
      // final or stable field
      p = make_constant_from_field(field, heap_base_oop);
    }
-    if (p == NULL) {
-      // To be valid, unsafe loads may depend on other conditions than
-      // the one that guards them: pin the Load node
-      LoadNode::ControlDependency dep = LoadNode::Pinned;
-      Node* ctrl = control();
-      // non volatile loads may be able to float
-      if (!need_mem_bar && adr_type->isa_instptr()) {
-        assert(adr_type->meet(TypePtr::NULL_PTR) != adr_type->remove_speculative(), "should be not null");
-        intptr_t offset = Type::OffsetBot;
-        AddPNode::Ideal_base_and_offset(adr, &_gvn, offset);
-        if (offset >= 0) {
-          int s = Klass::layout_helper_size_in_bytes(adr_type->isa_instptr()->klass()->layout_helper());
-          if (offset < s) {
-            // Guaranteed to be a valid access, no need to pin it
-            dep = LoadNode::DependsOnlyOnTest;
-            ctrl = NULL;
-          }
-        }
-      }
-      p = make_load(ctrl, adr, value_type, type, adr_type, mo, dep, requires_atomic_access, unaligned, mismatched);
-      // load value
-      switch (type) {
-      case T_BOOLEAN:
-      {
+
+    if (p == NULL) { // Could not constant fold the load
+      p = access_load_at(heap_base_oop, adr, adr_type, value_type, type, decorators);
      // Normalize the value returned by getBoolean in the following cases
-        if (mismatched ||
+      if (type == T_BOOLEAN &&
+          (mismatched ||
           heap_base_oop == top() ||                  // - heap_base_oop is NULL or
-            (can_access_non_heap && alias_type->field() == NULL) // - heap_base_oop is potentially NULL
+           (can_access_non_heap && field == NULL))    // - heap_base_oop is potentially NULL
                                                      //   and the unsafe access is made to large offset
                                                      //   (i.e., larger than the maximum offset necessary for any
                                                      //   field access)
@ -2620,30 +2448,9 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
 #undef __
      }
    }
-      case T_CHAR:
-      case T_BYTE:
-      case T_SHORT:
-      case T_INT:
-      case T_LONG:
-      case T_FLOAT:
-      case T_DOUBLE:
-        break;
-      case T_OBJECT:
-        if (need_read_barrier) {
-          // We do not require a mem bar inside pre_barrier if need_mem_bar
-          // is set: the barriers would be emitted by us.
-          insert_pre_barrier(heap_base_oop, offset, p, !need_mem_bar);
-        }
-        break;
-      case T_ADDRESS:
-        // Cast to an int type.
-        p = _gvn.transform(new CastP2XNode(NULL, p));
+    if (type == T_ADDRESS) {
+      p = gvn().transform(new CastP2XNode(NULL, p));
      p = ConvX2UL(p);
-        break;
-      default:
-        fatal("unexpected type %d: %s", type, type2name(type));
-        break;
-      }
    }
    // The load node has the control of the preceding MemBarCPUOrder.  All
    // following nodes will have the control of the MemBarCPUOrder inserted at
@ -2651,47 +2458,13 @@ bool LibraryCallKit::inline_unsafe_access(bool is_store, const BasicType type, c
    // point is fine.
    set_result(p);
  } else {
-    // place effect of store into memory
-    switch (type) {
-    case T_DOUBLE:
-      val = dstore_rounding(val);
-      break;
-    case T_ADDRESS:
+    if (bt == T_ADDRESS) {
      // Repackage the long as a pointer.
      val = ConvL2X(val);
-      val = _gvn.transform(new CastX2PNode(val));
-      break;
-    default:
-      break;
+      val = gvn().transform(new CastX2PNode(val));
    }
-
-    if (type == T_OBJECT) {
-      store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, type, mo, mismatched);
-    } else {
-      store_to_memory(control(), adr, val, type, adr_type, mo, requires_atomic_access, unaligned, mismatched);
+    access_store_at(control(), heap_base_oop, adr, adr_type, val, value_type, type, decorators);
  }
-  }
-
-  switch(kind) {
-    case Relaxed:
-    case Opaque:
-    case Release:
-      break;
-    case Acquire:
-    case Volatile:
-      if (!is_store) {
-        insert_mem_bar(Op_MemBarAcquire);
-      } else {
-        if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
-          insert_mem_bar(Op_MemBarVolatile);
-        }
-      }
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-
-  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);

  return true;
 }
@ -2757,6 +2530,9 @@ bool LibraryCallKit::inline_unsafe_load_store(const BasicType type, const LoadSt

  if (callee()->is_static())  return false;  // caller must have the capability!

+  DecoratorSet decorators = C2_UNSAFE_ACCESS;
+  decorators |= mo_decorator_for_access_kind(access_kind);
+
 #ifndef PRODUCT
  BasicType rtype;
  {
@ -2888,318 +2664,54 @@ bool LibraryCallKit::inline_unsafe_load_store(const BasicType type, const LoadSt

  int alias_idx = C->get_alias_index(adr_type);

-  // Memory-model-wise, a LoadStore acts like a little synchronized
-  // block, so needs barriers on each side.  These don't translate
-  // into actual barriers on most machines, but we still need rest of
-  // compiler to respect ordering.
+  if (type == T_OBJECT || type == T_ARRAY) {
+    decorators |= IN_HEAP | ON_UNKNOWN_OOP_REF;

-  switch (access_kind) {
-    case Relaxed:
-    case Acquire:
-      break;
-    case Release:
-      insert_mem_bar(Op_MemBarRelease);
-      break;
-    case Volatile:
-      if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
-        insert_mem_bar(Op_MemBarVolatile);
-      } else {
-        insert_mem_bar(Op_MemBarRelease);
-      }
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-  insert_mem_bar(Op_MemBarCPUOrder);
-
-  // Figure out the memory ordering.
-  MemNode::MemOrd mo = access_kind_to_memord(access_kind);
-
-  // 4984716: MemBars must be inserted before this
-  //          memory node in order to avoid a false
-  //          dependency which will confuse the scheduler.
-  Node *mem = memory(alias_idx);
-
-  // For now, we handle only those cases that actually exist: ints,
-  // longs, and Object. Adding others should be straightforward.
-  Node* load_store = NULL;
-  switch(type) {
-  case T_BYTE:
-    switch(kind) {
-      case LS_get_add:
-        load_store = _gvn.transform(new GetAndAddBNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_get_set:
-        load_store = _gvn.transform(new GetAndSetBNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_cmp_swap_weak:
-        load_store = _gvn.transform(new WeakCompareAndSwapBNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_swap:
-        load_store = _gvn.transform(new CompareAndSwapBNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_exchange:
-        load_store = _gvn.transform(new CompareAndExchangeBNode(control(), mem, adr, newval, oldval, adr_type, mo));
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-    break;
-  case T_SHORT:
-    switch(kind) {
-      case LS_get_add:
-        load_store = _gvn.transform(new GetAndAddSNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_get_set:
-        load_store = _gvn.transform(new GetAndSetSNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_cmp_swap_weak:
-        load_store = _gvn.transform(new WeakCompareAndSwapSNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_swap:
-        load_store = _gvn.transform(new CompareAndSwapSNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_exchange:
-        load_store = _gvn.transform(new CompareAndExchangeSNode(control(), mem, adr, newval, oldval, adr_type, mo));
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-    break;
-  case T_INT:
-    switch(kind) {
-      case LS_get_add:
-        load_store = _gvn.transform(new GetAndAddINode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_get_set:
-        load_store = _gvn.transform(new GetAndSetINode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_cmp_swap_weak:
-        load_store = _gvn.transform(new WeakCompareAndSwapINode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_swap:
-        load_store = _gvn.transform(new CompareAndSwapINode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_exchange:
-        load_store = _gvn.transform(new CompareAndExchangeINode(control(), mem, adr, newval, oldval, adr_type, mo));
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-    break;
-  case T_LONG:
-    switch(kind) {
-      case LS_get_add:
-        load_store = _gvn.transform(new GetAndAddLNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_get_set:
-        load_store = _gvn.transform(new GetAndSetLNode(control(), mem, adr, newval, adr_type));
-        break;
-      case LS_cmp_swap_weak:
-        load_store = _gvn.transform(new WeakCompareAndSwapLNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_swap:
-        load_store = _gvn.transform(new CompareAndSwapLNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_exchange:
-        load_store = _gvn.transform(new CompareAndExchangeLNode(control(), mem, adr, newval, oldval, adr_type, mo));
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-    break;
-  case T_OBJECT:
    // Transformation of a value which could be NULL pointer (CastPP #NULL)
    // could be delayed during Parse (for example, in adjust_map_after_if()).
    // Execute transformation here to avoid barrier generation in such case.
    if (_gvn.type(newval) == TypePtr::NULL_PTR)
      newval = _gvn.makecon(TypePtr::NULL_PTR);

-    // Reference stores need a store barrier.
-    switch(kind) {
-      case LS_get_set: {
-        // If pre-barrier must execute before the oop store, old value will require do_load here.
-        if (!can_move_pre_barrier()) {
-          pre_barrier(true /* do_load*/,
-                      control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
-                      NULL /* pre_val*/,
-                      T_OBJECT);
-        } // Else move pre_barrier to use load_store value, see below.
-        break;
-      }
-      case LS_cmp_swap_weak:
-      case LS_cmp_swap:
-      case LS_cmp_exchange: {
-        // Same as for newval above:
-        if (_gvn.type(oldval) == TypePtr::NULL_PTR) {
+    if (oldval != NULL && _gvn.type(oldval) == TypePtr::NULL_PTR) {
+      // Refine the value to a null constant, when it is known to be null
      oldval = _gvn.makecon(TypePtr::NULL_PTR);
    }
-        // The only known value which might get overwritten is oldval.
-        pre_barrier(false /* do_load */,
-                    control(), NULL, NULL, max_juint, NULL, NULL,
-                    oldval /* pre_val */,
-                    T_OBJECT);
-        break;
-      }
-      default:
-        ShouldNotReachHere();
  }

-#ifdef _LP64
-    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
-      Node *newval_enc = _gvn.transform(new EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
-
+  Node* result = NULL;
  switch (kind) {
-        case LS_get_set:
-          load_store = _gvn.transform(new GetAndSetNNode(control(), mem, adr, newval_enc, adr_type, value_type->make_narrowoop()));
-          break;
-        case LS_cmp_swap_weak: {
-          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
-          load_store = _gvn.transform(new WeakCompareAndSwapNNode(control(), mem, adr, newval_enc, oldval_enc, mo));
-          break;
-        }
-        case LS_cmp_swap: {
-          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
-          load_store = _gvn.transform(new CompareAndSwapNNode(control(), mem, adr, newval_enc, oldval_enc, mo));
-          break;
-        }
    case LS_cmp_exchange: {
-          Node *oldval_enc = _gvn.transform(new EncodePNode(oldval, oldval->bottom_type()->make_narrowoop()));
-          load_store = _gvn.transform(new CompareAndExchangeNNode(control(), mem, adr, newval_enc, oldval_enc, adr_type, value_type->make_narrowoop(), mo));
-          break;
-        }
-        default:
-          ShouldNotReachHere();
-      }
-    } else
-#endif
-    switch (kind) {
-      case LS_get_set:
-        load_store = _gvn.transform(new GetAndSetPNode(control(), mem, adr, newval, adr_type, value_type->is_oopptr()));
-        break;
-      case LS_cmp_swap_weak:
-        load_store = _gvn.transform(new WeakCompareAndSwapPNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_swap:
-        load_store = _gvn.transform(new CompareAndSwapPNode(control(), mem, adr, newval, oldval, mo));
-        break;
-      case LS_cmp_exchange:
-        load_store = _gvn.transform(new CompareAndExchangePNode(control(), mem, adr, newval, oldval, adr_type, value_type->is_oopptr(), mo));
-        break;
-      default:
-        ShouldNotReachHere();
-    }
-
-    // Emit the post barrier only when the actual store happened. This makes sense
-    // to check only for LS_cmp_* that can fail to set the value.
-    // LS_cmp_exchange does not produce any branches by default, so there is no
-    // boolean result to piggyback on. TODO: When we merge CompareAndSwap with
-    // CompareAndExchange and move branches here, it would make sense to conditionalize
-    // post_barriers for LS_cmp_exchange as well.
-    //
-    // CAS success path is marked more likely since we anticipate this is a performance
-    // critical path, while CAS failure path can use the penalty for going through unlikely
-    // path as backoff. Which is still better than doing a store barrier there.
-    switch (kind) {
-      case LS_get_set:
-      case LS_cmp_exchange: {
-        post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
+      result = access_atomic_cmpxchg_val_at(control(), base, adr, adr_type, alias_idx,
+                                            oldval, newval, value_type, type, decorators);
      break;
    }
    case LS_cmp_swap_weak:
+      decorators |= C2_WEAK_CMPXCHG;
    case LS_cmp_swap: {
-        IdealKit ideal(this);
-        ideal.if_then(load_store, BoolTest::ne, ideal.ConI(0), PROB_STATIC_FREQUENT); {
-          sync_kit(ideal);
-          post_barrier(ideal.ctrl(), load_store, base, adr, alias_idx, newval, T_OBJECT, true);
-          ideal.sync_kit(this);
-        } ideal.end_if();
-        final_sync(ideal);
+      result = access_atomic_cmpxchg_bool_at(control(), base, adr, adr_type, alias_idx,
+                                             oldval, newval, value_type, type, decorators);
+      break;
+    }
+    case LS_get_set: {
+      result = access_atomic_xchg_at(control(), base, adr, adr_type, alias_idx,
+                                     newval, value_type, type, decorators);
+      break;
+    }
+    case LS_get_add: {
+      result = access_atomic_add_at(control(), base, adr, adr_type, alias_idx,
+                                    newval, value_type, type, decorators);
      break;
    }
    default:
      ShouldNotReachHere();
  }
-    break;
-  default:
-    fatal("unexpected type %d: %s", type, type2name(type));
-    break;
-  }

-  // SCMemProjNodes represent the memory state of a LoadStore. Their
-  // main role is to prevent LoadStore nodes from being optimized away
-  // when their results aren't used.
-  Node* proj = _gvn.transform(new SCMemProjNode(load_store));
-  set_memory(proj, alias_idx);
-
-  if (type == T_OBJECT && (kind == LS_get_set || kind == LS_cmp_exchange)) {
-#ifdef _LP64
-    if (adr->bottom_type()->is_ptr_to_narrowoop()) {
-      load_store = _gvn.transform(new DecodeNNode(load_store, load_store->get_ptr_type()));
-    }
-#endif
-    if (can_move_pre_barrier() && kind == LS_get_set) {
-      // Don't need to load pre_val. The old value is returned by load_store.
-      // The pre_barrier can execute after the xchg as long as no safepoint
-      // gets inserted between them.
-      pre_barrier(false /* do_load */,
-                  control(), NULL, NULL, max_juint, NULL, NULL,
-                  load_store /* pre_val */,
-                  T_OBJECT);
-    }
-  }
-
-  // Add the trailing membar surrounding the access
-  insert_mem_bar(Op_MemBarCPUOrder);
-
-  switch (access_kind) {
-    case Relaxed:
-    case Release:
-      break; // do nothing
-    case Acquire:
-    case Volatile:
-      insert_mem_bar(Op_MemBarAcquire);
-      // !support_IRIW_for_not_multiple_copy_atomic_cpu handled in platform code
-      break;
-    default:
-      ShouldNotReachHere();
-  }
-
-  assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
-  set_result(load_store);
+  assert(type2size[result->bottom_type()->basic_type()] == type2size[rtype], "result type should match");
+  set_result(result);
  return true;
 }

-MemNode::MemOrd LibraryCallKit::access_kind_to_memord_LS(AccessKind kind, bool is_store) {
-  MemNode::MemOrd mo = MemNode::unset;
-  switch(kind) {
-    case Opaque:
-    case Relaxed:  mo = MemNode::unordered; break;
-    case Acquire:  mo = MemNode::acquire;   break;
-    case Release:  mo = MemNode::release;   break;
-    case Volatile: mo = is_store ? MemNode::release : MemNode::acquire; break;
-    default:
-      ShouldNotReachHere();
-  }
-  guarantee(mo != MemNode::unset, "Should select memory ordering");
-  return mo;
-}
-
-MemNode::MemOrd LibraryCallKit::access_kind_to_memord(AccessKind kind) {
-  MemNode::MemOrd mo = MemNode::unset;
-  switch(kind) {
-    case Opaque:
-    case Relaxed:  mo = MemNode::unordered; break;
-    case Acquire:  mo = MemNode::acquire;   break;
-    case Release:  mo = MemNode::release;   break;
-    case Volatile: mo = MemNode::seqcst;    break;
-    default:
-      ShouldNotReachHere();
-  }
-  guarantee(mo != MemNode::unset, "Should select memory ordering");
-  return mo;
-}
-
 bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
  // Regardless of form, don't allow previous ld/st to move down,
  // then issue acquire, release, or volatile mem_bar.
@ -4636,7 +4148,7 @@ bool LibraryCallKit::inline_unsafe_copyMemory() {

 //------------------------clone_coping-----------------------------------
 // Helper function for inline_native_clone.
-void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark) {
+void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array) {
  assert(obj_size != NULL, "");
  Node* raw_obj = alloc_obj->in(1);
  assert(alloc_obj->is_CheckCastPP() && raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
@ -4656,66 +4168,9 @@ void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, b

  // Copy the fastest available way.
  // TODO: generate fields copies for small objects instead.
-  Node* src  = obj;
-  Node* dest = alloc_obj;
  Node* size = _gvn.transform(obj_size);

-  // Exclude the header but include array length to copy by 8 bytes words.
-  // Can't use base_offset_in_bytes(bt) since basic type is unknown.
-  int base_off = is_array ? arrayOopDesc::length_offset_in_bytes() :
-                            instanceOopDesc::base_offset_in_bytes();
-  // base_off:
-  // 8  - 32-bit VM
-  // 12 - 64-bit VM, compressed klass
-  // 16 - 64-bit VM, normal klass
-  if (base_off % BytesPerLong != 0) {
-    assert(UseCompressedClassPointers, "");
-    if (is_array) {
-      // Exclude length to copy by 8 bytes words.
-      base_off += sizeof(int);
-    } else {
-      // Include klass to copy by 8 bytes words.
-      base_off = instanceOopDesc::klass_offset_in_bytes();
-    }
-    assert(base_off % BytesPerLong == 0, "expect 8 bytes alignment");
-  }
-  src  = basic_plus_adr(src,  base_off);
-  dest = basic_plus_adr(dest, base_off);
-
-  // Compute the length also, if needed:
-  Node* countx = size;
-  countx = _gvn.transform(new SubXNode(countx, MakeConX(base_off)));
-  countx = _gvn.transform(new URShiftXNode(countx, intcon(LogBytesPerLong) ));
-
-  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
-
-  ArrayCopyNode* ac = ArrayCopyNode::make(this, false, src, NULL, dest, NULL, countx, false, false);
-  ac->set_clonebasic();
-  Node* n = _gvn.transform(ac);
-  if (n == ac) {
-    set_predefined_output_for_runtime_call(ac, ac->in(TypeFunc::Memory), raw_adr_type);
-  } else {
-    set_all_memory(n);
-  }
-
-  // If necessary, emit some card marks afterwards.  (Non-arrays only.)
-  if (card_mark) {
-    assert(!is_array, "");
-    // Put in store barrier for any and all oops we are sticking
-    // into this object.  (We could avoid this if we could prove
-    // that the object type contains no oop fields at all.)
-    Node* no_particular_value = NULL;
-    Node* no_particular_field = NULL;
-    int raw_adr_idx = Compile::AliasIdxRaw;
-    post_barrier(control(),
-                 memory(raw_adr_type),
-                 alloc_obj,
-                 no_particular_field,
-                 raw_adr_idx,
-                 no_particular_value,
-                 T_OBJECT,
-                 false);
-  }
+  access_clone(control(), obj, alloc_obj, size, is_array);

  // Do not let reads from the cloned object float above the arraycopy.
  if (alloc != NULL) {
@ -4805,9 +4260,6 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
    PhiNode*    result_mem = new PhiNode(result_reg, Type::MEMORY, TypePtr::BOTTOM);
    record_for_igvn(result_reg);

-    const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
-    int raw_adr_idx = Compile::AliasIdxRaw;
-
    Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
    if (array_ctl != NULL) {
      // It's an array.
@ -4817,9 +4269,10 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
      Node* obj_size  = NULL;
      Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size);  // no arguments to push

-      if (!use_ReduceInitialCardMarks()) {
+      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+      if (bs->array_copy_requires_gc_barriers(T_OBJECT)) {
        // If it is an oop array, it requires very special treatment,
-        // because card marking is required on each card of the array.
+        // because gc barriers are required when accessing the array.
        Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL);
        if (is_obja != NULL) {
          PreserveJVMState pjvms2(this);
@ -4838,7 +4291,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
          result_mem ->set_req(_objArray_path, reset_memory());
        }
      }
-      // Otherwise, there are no card marks to worry about.
+      // Otherwise, there are no barriers to worry about.
      // (We can dispense with card marks if we know the allocation
      //  comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
      //  causes the non-eden paths to take compensating steps to
@ -4847,7 +4300,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
      //  the object.)

      if (!stopped()) {
-        copy_to_clone(obj, alloc_obj, obj_size, true, false);
+        copy_to_clone(obj, alloc_obj, obj_size, true);

        // Present the results of the copy.
        result_reg->init_req(_array_path, control());
@ -4893,7 +4346,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) {
      // exception state between multiple Object.clone versions (reexecute=true vs reexecute=false).
      Node* alloc_obj = new_instance(obj_klass, NULL, &obj_size, /*deoptimize_on_exception=*/true);

-      copy_to_clone(obj, alloc_obj, obj_size, false, !use_ReduceInitialCardMarks());
+      copy_to_clone(obj, alloc_obj, obj_size, false);

      // Present the results of the slow call.
      result_reg->init_req(_instance_path, control());
@ -6100,21 +5553,23 @@ bool LibraryCallKit::inline_reference_get() {
  Node* reference_obj = null_check_receiver();
  if (stopped()) return true;

+  const TypeInstPtr* tinst = _gvn.type(reference_obj)->isa_instptr();
+  assert(tinst != NULL, "obj is null");
+  assert(tinst->klass()->is_loaded(), "obj is not loaded");
+  ciInstanceKlass* referenceKlass = tinst->klass()->as_instance_klass();
+  ciField* field = referenceKlass->get_field_by_name(ciSymbol::make("referent"),
+                                                     ciSymbol::make("Ljava/lang/Object;"),
+                                                     false);
+  assert (field != NULL, "undefined field");
+
  Node* adr = basic_plus_adr(reference_obj, reference_obj, referent_offset);
+  const TypePtr* adr_type = C->alias_type(field)->adr_type();

  ciInstanceKlass* klass = env()->Object_klass();
  const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass);

-  Node* no_ctrl = NULL;
-  Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT, MemNode::unordered);
-
-  // Use the pre-barrier to record the value in the referent field
-  pre_barrier(false /* do_load */,
-              control(),
-              NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
-              result /* pre_val */,
-              T_OBJECT);
-
+  DecoratorSet decorators = IN_HEAP | ON_WEAK_OOP_REF;
+  Node* result = access_load_at(reference_obj, adr, adr_type, object_type, T_OBJECT, decorators);
  // Add memory barrier to prevent commoning reads from this field
  // across safepoint since GC can change its value.
  insert_mem_bar(Op_MemBarCPUOrder);
@ -6167,20 +5622,13 @@ Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * field
    type = Type::get_const_basic_type(bt);
  }

-  if (support_IRIW_for_not_multiple_copy_atomic_cpu && is_vol) {
-    insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
-  }
-  // Build the load.
-  MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered;
-  Node* loadedField = make_load(NULL, adr, type, bt, adr_type, mo, LoadNode::DependsOnlyOnTest, is_vol);
-  // If reference is volatile, prevent following memory ops from
-  // floating up past the volatile read.  Also prevents commoning
-  // another volatile read.
+  DecoratorSet decorators = IN_HEAP;
+
  if (is_vol) {
-    // Memory barrier includes bogus read of value to force load BEFORE membar
-    insert_mem_bar(Op_MemBarAcquire, loadedField);
+    decorators |= MO_SEQ_CST;
  }
-  return loadedField;
+
+  return access_load_at(fromObj, adr, adr_type, type, bt, decorators);
 }

 Node * LibraryCallKit::field_address_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@ -684,10 +684,12 @@ class PhaseIdealLoop : public PhaseTransform {
  // Mark as post visited
  void set_postvisited( Node *n ) { assert( !is_postvisited( n ), "" ); _preorders[n->_idx] |= 1; }

+public:
  // Set/get control node out.  Set lower bit to distinguish from IdealLoopTree
  // Returns true if "n" is a data node, false if it's a control node.
  bool has_ctrl( Node *n ) const { return ((intptr_t)_nodes[n->_idx]) & 1; }

+private:
  // clear out dead code after build_loop_late
  Node_List _deadlist;

@ -736,6 +738,8 @@ class PhaseIdealLoop : public PhaseTransform {

 public:

+  PhaseIterGVN &igvn() const { return _igvn; }
+
  static bool is_canonical_loop_entry(CountedLoopNode* cl);

  bool has_node( Node* n ) const {
@ -789,7 +793,6 @@ public:
    }
  }

-private:
  Node *get_ctrl_no_update_helper(Node *i) const {
    assert(has_ctrl(i), "should be control, not loop");
    return (Node*)(((intptr_t)_nodes[i->_idx]) & ~1);
@ -822,7 +825,6 @@ private:
  // the 'old_node' with 'new_node'.  Kill old-node.  Add a reference
  // from old_node to new_node to support the lazy update.  Reference
  // replaces loop reference, since that is not needed for dead node.
-public:
  void lazy_update(Node *old_node, Node *new_node) {
    assert(old_node != new_node, "no cycles please");
    // Re-use the side array slot for this node to provide the
@ -856,6 +858,7 @@ private:
  uint *_dom_depth;              // Used for fast LCA test
  GrowableArray<uint>* _dom_stk; // For recomputation of dom depth

+public:
  Node* idom_no_update(Node* d) const {
    return idom_no_update(d->_idx);
  }
@ -911,7 +914,6 @@ private:
  // build the loop tree and perform any requested optimizations
  void build_and_optimize(bool do_split_if, bool skip_loop_opts);

-public:
  // Dominators for the sea of nodes
  void Dominators();
  Node *dom_lca( Node *n1, Node *n2 ) const {
@ -968,6 +970,8 @@ public:
    return (IdealLoopTree*)_nodes[n->_idx];
  }

+  IdealLoopTree *ltree_root() const { return _ltree_root; }
+
  // Is 'n' a (nested) member of 'loop'?
  int is_member( const IdealLoopTree *loop, Node *n ) const {
    return loop->is_member(get_loop(n)); }
--- a/src/hotspot/share/opto/loopopts.cpp
+++ b/src/hotspot/share/opto/loopopts.cpp
@ -23,6 +23,8 @@
 */

 #include "precompiled.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "opto/addnode.hpp"
--- a/src/hotspot/share/opto/macro.cpp
+++ b/src/hotspot/share/opto/macro.cpp
@ -227,108 +227,9 @@ void PhaseMacroExpand::extract_call_projections(CallNode *call) {

 }

-// Eliminate a card mark sequence.  p2x is a ConvP2XNode
-void PhaseMacroExpand::eliminate_card_mark(Node* p2x) {
-  assert(p2x->Opcode() == Op_CastP2X, "ConvP2XNode required");
-  if (!UseG1GC) {
-    // vanilla/CMS post barrier
-    Node *shift = p2x->unique_out();
-    Node *addp = shift->unique_out();
-    for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) {
-      Node *mem = addp->last_out(j);
-      if (UseCondCardMark && mem->is_Load()) {
-        assert(mem->Opcode() == Op_LoadB, "unexpected code shape");
-        // The load is checking if the card has been written so
-        // replace it with zero to fold the test.
-        _igvn.replace_node(mem, intcon(0));
-        continue;
-      }
-      assert(mem->is_Store(), "store required");
-      _igvn.replace_node(mem, mem->in(MemNode::Memory));
-    }
-  }
-#if INCLUDE_G1GC
-  else {
-    // G1 pre/post barriers
-    assert(p2x->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes");
-    // It could be only one user, URShift node, in Object.clone() intrinsic
-    // but the new allocation is passed to arraycopy stub and it could not
-    // be scalar replaced. So we don't check the case.
-
-    // An other case of only one user (Xor) is when the value check for NULL
-    // in G1 post barrier is folded after CCP so the code which used URShift
-    // is removed.
-
-    // Take Region node before eliminating post barrier since it also
-    // eliminates CastP2X node when it has only one user.
-    Node* this_region = p2x->in(0);
-    assert(this_region != NULL, "");
-
-    // Remove G1 post barrier.
-
-    // Search for CastP2X->Xor->URShift->Cmp path which
-    // checks if the store done to a different from the value's region.
-    // And replace Cmp with #0 (false) to collapse G1 post barrier.
-    Node* xorx = p2x->find_out_with(Op_XorX);
-    if (xorx != NULL) {
-      Node* shift = xorx->unique_out();
-      Node* cmpx = shift->unique_out();
-      assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
-      cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
-      "missing region check in G1 post barrier");
-      _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
-
-      // Remove G1 pre barrier.
-
-      // Search "if (marking != 0)" check and set it to "false".
-      // There is no G1 pre barrier if previous stored value is NULL
-      // (for example, after initialization).
-      if (this_region->is_Region() && this_region->req() == 3) {
-        int ind = 1;
-        if (!this_region->in(ind)->is_IfFalse()) {
-          ind = 2;
-        }
-        if (this_region->in(ind)->is_IfFalse() &&
-            this_region->in(ind)->in(0)->Opcode() == Op_If) {
-          Node* bol = this_region->in(ind)->in(0)->in(1);
-          assert(bol->is_Bool(), "");
-          cmpx = bol->in(1);
-          if (bol->as_Bool()->_test._test == BoolTest::ne &&
-              cmpx->is_Cmp() && cmpx->in(2) == intcon(0) &&
-              cmpx->in(1)->is_Load()) {
-            Node* adr = cmpx->in(1)->as_Load()->in(MemNode::Address);
-            const int marking_offset = in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset());
-            if (adr->is_AddP() && adr->in(AddPNode::Base) == top() &&
-                adr->in(AddPNode::Address)->Opcode() == Op_ThreadLocal &&
-                adr->in(AddPNode::Offset) == MakeConX(marking_offset)) {
-              _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
-            }
-          }
-        }
-      }
-    } else {
-      assert(!GraphKit::use_ReduceInitialCardMarks(), "can only happen with card marking");
-      // This is a G1 post barrier emitted by the Object.clone() intrinsic.
-      // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card
-      // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier.
-      Node* shift = p2x->find_out_with(Op_URShiftX);
-      assert(shift != NULL, "missing G1 post barrier");
-      Node* addp = shift->unique_out();
-      Node* load = addp->find_out_with(Op_LoadB);
-      assert(load != NULL, "missing G1 post barrier");
-      Node* cmpx = load->unique_out();
-      assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() &&
-             cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne,
-             "missing card value check in G1 post barrier");
-      _igvn.replace_node(cmpx, makecon(TypeInt::CC_EQ));
-      // There is no G1 pre barrier in this case
-    }
-    // Now CastP2X can be removed since it is used only on dead path
-    // which currently still alive until igvn optimize it.
-    assert(p2x->outcnt() == 0 || p2x->unique_out()->Opcode() == Op_URShiftX, "");
-    _igvn.replace_node(p2x, top());
-  }
-#endif // INCLUDE_G1GC
+void PhaseMacroExpand::eliminate_gc_barrier(Node* p2x) {
+  BarrierSetC2 *bs = BarrierSet::barrier_set()->barrier_set_c2();
+  bs->eliminate_gc_barrier(this, p2x);
 }

 // Search for a memory operation for the specified memory slice.
@ -1029,7 +930,7 @@ void PhaseMacroExpand::process_users_of_allocation(CallNode *alloc) {
              disconnect_projections(membar_after->as_MemBar(), _igvn);
            }
          } else {
-            eliminate_card_mark(n);
+            eliminate_gc_barrier(n);
          }
          k -= (oc2 - use->outcnt());
        }
@ -1062,7 +963,7 @@ void PhaseMacroExpand::process_users_of_allocation(CallNode *alloc) {

        _igvn._worklist.push(ac);
      } else {
-        eliminate_card_mark(use);
+        eliminate_gc_barrier(use);
      }
      j -= (oc1 - res->outcnt());
    }
@ -2801,5 +2702,6 @@ bool PhaseMacroExpand::expand_macro_nodes() {
  _igvn.set_delay_transform(false);
  _igvn.optimize();
  if (C->failing())  return true;
-  return false;
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  return bs->expand_macro_nodes(this);
 }
--- a/src/hotspot/share/opto/macro.hpp
+++ b/src/hotspot/share/opto/macro.hpp
@ -37,11 +37,8 @@ class PhaseMacroExpand : public Phase {
 private:
  PhaseIterGVN &_igvn;

+public:
  // Helper methods roughly modeled after GraphKit:
-  Node* top()                   const { return C->top(); }
-  Node* intcon(jint con)        const { return _igvn.intcon(con); }
-  Node* longcon(jlong con)      const { return _igvn.longcon(con); }
-  Node* makecon(const Type *t)  const { return _igvn.makecon(t); }
  Node* basic_plus_adr(Node* base, int offset) {
    return (offset == 0)? base: basic_plus_adr(base, MakeConX(offset));
  }
@ -66,6 +63,7 @@ private:
  Node* make_store(Node* ctl, Node* mem, Node* base, int offset,
                   Node* value, BasicType bt);

+private:
  // projections extracted from a call node
  ProjNode *_fallthroughproj;
  ProjNode *_fallthroughcatchproj;
@ -94,7 +92,7 @@ private:
  bool scalar_replacement(AllocateNode *alloc, GrowableArray <SafePointNode *>& safepoints_done);
  void process_users_of_allocation(CallNode *alloc);

-  void eliminate_card_mark(Node *cm);
+  void eliminate_gc_barrier(Node *p2x);
  void mark_eliminated_box(Node* box, Node* obj);
  void mark_eliminated_locking_nodes(AbstractLockNode *alock);
  bool eliminate_locking_node(AbstractLockNode *alock);
@ -209,6 +207,14 @@ public:
  void eliminate_macro_nodes();
  bool expand_macro_nodes();

+  PhaseIterGVN &igvn() const { return _igvn; }
+
+  // Members accessed from BarrierSetC2
+  void replace_node(Node* source, Node* target) { _igvn.replace_node(source, target); }
+  Node* intcon(jint con)        const { return _igvn.intcon(con); }
+  Node* longcon(jlong con)      const { return _igvn.longcon(con); }
+  Node* makecon(const Type *t)  const { return _igvn.makecon(t); }
+  Node* top()                   const { return C->top(); }
 };

 #endif // SHARE_VM_OPTO_MACRO_HPP
--- a/src/hotspot/share/opto/macroArrayCopy.cpp
+++ b/src/hotspot/share/opto/macroArrayCopy.cpp
@ -550,9 +550,9 @@ Node* PhaseMacroExpand::generate_arraycopy(ArrayCopyNode *ac, AllocateArrayNode*
    }
    // At this point we know we do not need type checks on oop stores.

-    // Let's see if we need card marks:
-    if (alloc != NULL && GraphKit::use_ReduceInitialCardMarks()) {
-      // If we do not need card marks, copy using the jint or jlong stub.
+    BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+    if (alloc != NULL && !bs->array_copy_requires_gc_barriers(copy_type)) {
+      // If we do not need gc barriers, copy using the jint or jlong stub.
      copy_type = LP64_ONLY(UseCompressedOops ? T_INT : T_LONG) NOT_LP64(T_INT);
      assert(type2aelembytes(basic_elem_type) == type2aelembytes(copy_type),
             "sizes agree");
--- a/src/hotspot/share/opto/node.cpp
+++ b/src/hotspot/share/opto/node.cpp
@ -23,6 +23,8 @@
 */

 #include "precompiled.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
 #include "libadt/vectset.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
@ -37,6 +39,7 @@
 #include "opto/regmask.hpp"
 #include "opto/type.hpp"
 #include "utilities/copy.hpp"
+#include "utilities/macros.hpp"

 class RegMask;
 // #include "phase.hpp"
@ -499,6 +502,8 @@ Node *Node::clone() const {
    C->add_macro_node(n);
  if (is_expensive())
    C->add_expensive_node(n);
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  bs->register_potential_barrier_node(n);
  // If the cloned node is a range check dependent CastII, add it to the list.
  CastIINode* cast = n->isa_CastII();
  if (cast != NULL && cast->has_range_check()) {
@ -622,6 +627,8 @@ void Node::destruct() {
  if (is_SafePoint()) {
    as_SafePoint()->delete_replaced_nodes();
  }
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  bs->unregister_potential_barrier_node(this);
 #ifdef ASSERT
  // We will not actually delete the storage, but we'll make the node unusable.
  *(address*)this = badAddress;  // smash the C++ vtbl, probably
@ -1361,6 +1368,8 @@ static void kill_dead_code( Node *dead, PhaseIterGVN *igvn ) {
      if (dead->Opcode() == Op_Opaque4) {
        igvn->C->remove_range_check_cast(dead);
      }
+      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+      bs->unregister_potential_barrier_node(dead);
      igvn->C->record_dead_node(dead->_idx);
      // Kill all inputs to the dead guy
      for (uint i=0; i < dead->req(); i++) {
@ -1379,6 +1388,8 @@ static void kill_dead_code( Node *dead, PhaseIterGVN *igvn ) {
            // The restriction (outcnt() <= 2) is the same as in set_req_X()
            // and remove_globally_dead_node().
            igvn->add_users_to_worklist( n );
+          } else {
+            BarrierSet::barrier_set()->barrier_set_c2()->enqueue_useful_gc_barrier(igvn->_worklist, n);
          }
        }
      }
--- a/src/hotspot/share/opto/parse2.cpp
+++ b/src/hotspot/share/opto/parse2.cpp
@ -51,29 +51,60 @@ extern int explicit_null_checks_inserted,
 #endif

 //---------------------------------array_load----------------------------------
-void Parse::array_load(BasicType elem_type) {
-  const Type* elem = Type::TOP;
-  Node* adr = array_addressing(elem_type, 0, &elem);
+void Parse::array_load(BasicType bt) {
+  const Type* elemtype = Type::TOP;
+  bool big_val = bt == T_DOUBLE || bt == T_LONG;
+  Node* adr = array_addressing(bt, 0, &elemtype);
  if (stopped())  return;     // guaranteed null or range check
-  dec_sp(2);                  // Pop array and index
-  const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
-  Node* ld = make_load(control(), adr, elem, elem_type, adr_type, MemNode::unordered);
+
+  pop();                      // index (already used)
+  Node* array = pop();        // the array itself
+
+  if (elemtype == TypeInt::BOOL) {
+    bt = T_BOOLEAN;
+  } else if (bt == T_OBJECT) {
+    elemtype = _gvn.type(array)->is_aryptr()->elem()->make_oopptr();
+  }
+
+  const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(bt);
+
+  Node* ld = access_load_at(array, adr, adr_type, elemtype, bt,
+                            IN_HEAP | IN_HEAP_ARRAY | C2_CONTROL_DEPENDENT_LOAD);
+  if (big_val) {
+    push_pair(ld);
+  } else {
    push(ld);
  }
+}


 //--------------------------------array_store----------------------------------
-void Parse::array_store(BasicType elem_type) {
-  const Type* elem = Type::TOP;
-  Node* adr = array_addressing(elem_type, 1, &elem);
+void Parse::array_store(BasicType bt) {
+  const Type* elemtype = Type::TOP;
+  bool big_val = bt == T_DOUBLE || bt == T_LONG;
+  Node* adr = array_addressing(bt, big_val ? 2 : 1, &elemtype);
  if (stopped())  return;     // guaranteed null or range check
-  Node* val = pop();
-  dec_sp(2);                  // Pop array and index
-  const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
-  if (elem == TypeInt::BOOL) {
-    elem_type = T_BOOLEAN;
+  if (bt == T_OBJECT) {
+    array_store_check();
  }
-  store_to_memory(control(), adr, val, elem_type, adr_type, StoreNode::release_if_reference(elem_type));
+  Node* val;                  // Oop to store
+  if (big_val) {
+    val = pop_pair();
+  } else {
+    val = pop();
+  }
+  pop();                      // index (already used)
+  Node* array = pop();        // the array itself
+
+  if (elemtype == TypeInt::BOOL) {
+    bt = T_BOOLEAN;
+  } else if (bt == T_OBJECT) {
+    elemtype = _gvn.type(array)->is_aryptr()->elem()->make_oopptr();
+  }
+
+  const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(bt);
+
+  access_store_at(control(), array, adr, adr_type, val, elemtype, bt, MO_UNORDERED | IN_HEAP | IN_HEAP_ARRAY);
 }


@ -2147,55 +2178,17 @@ void Parse::do_one_bytecode() {
  case Bytecodes::_saload:  array_load(T_SHORT);   break;
  case Bytecodes::_faload:  array_load(T_FLOAT);   break;
  case Bytecodes::_aaload:  array_load(T_OBJECT);  break;
-  case Bytecodes::_laload: {
-    a = array_addressing(T_LONG, 0);
-    if (stopped())  return;     // guaranteed null or range check
-    dec_sp(2);                  // Pop array and index
-    push_pair(make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS, MemNode::unordered));
-    break;
-  }
-  case Bytecodes::_daload: {
-    a = array_addressing(T_DOUBLE, 0);
-    if (stopped())  return;     // guaranteed null or range check
-    dec_sp(2);                  // Pop array and index
-    push_pair(make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES, MemNode::unordered));
-    break;
-  }
+  case Bytecodes::_laload:  array_load(T_LONG);    break;
+  case Bytecodes::_daload:  array_load(T_DOUBLE);  break;
  case Bytecodes::_bastore: array_store(T_BYTE);   break;
  case Bytecodes::_castore: array_store(T_CHAR);   break;
  case Bytecodes::_iastore: array_store(T_INT);    break;
  case Bytecodes::_sastore: array_store(T_SHORT);  break;
  case Bytecodes::_fastore: array_store(T_FLOAT);  break;
-  case Bytecodes::_aastore: {
-    d = array_addressing(T_OBJECT, 1);
-    if (stopped())  return;     // guaranteed null or range check
-    array_store_check();
-    c = pop();                  // Oop to store
-    b = pop();                  // index (already used)
-    a = pop();                  // the array itself
-    const TypeOopPtr* elemtype  = _gvn.type(a)->is_aryptr()->elem()->make_oopptr();
-    const TypeAryPtr* adr_type = TypeAryPtr::OOPS;
-    Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT,
-                                     StoreNode::release_if_reference(T_OBJECT));
-    break;
-  }
-  case Bytecodes::_lastore: {
-    a = array_addressing(T_LONG, 2);
-    if (stopped())  return;     // guaranteed null or range check
-    c = pop_pair();
-    dec_sp(2);                  // Pop array and index
-    store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS, MemNode::unordered);
-    break;
-  }
-  case Bytecodes::_dastore: {
-    a = array_addressing(T_DOUBLE, 2);
-    if (stopped())  return;     // guaranteed null or range check
-    c = pop_pair();
-    dec_sp(2);                  // Pop array and index
-    c = dstore_rounding(c);
-    store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES, MemNode::unordered);
-    break;
-  }
+  case Bytecodes::_aastore: array_store(T_OBJECT); break;
+  case Bytecodes::_lastore: array_store(T_LONG);   break;
+  case Bytecodes::_dastore: array_store(T_DOUBLE); break;
+
  case Bytecodes::_getfield:
    do_getfield();
    break;
--- a/src/hotspot/share/opto/parse3.cpp
+++ b/src/hotspot/share/opto/parse3.cpp
@ -177,7 +177,12 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {

  bool must_assert_null = false;

-  if( bt == T_OBJECT ) {
+  DecoratorSet decorators = IN_HEAP;
+  decorators |= is_vol ? MO_SEQ_CST : MO_UNORDERED;
+
+  bool is_obj = bt == T_OBJECT || bt == T_ARRAY;
+
+  if (is_obj) {
    if (!field->type()->is_loaded()) {
      type = TypeInstPtr::BOTTOM;
      must_assert_null = true;
@ -198,14 +203,8 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {
  } else {
    type = Type::get_const_basic_type(bt);
  }
-  if (support_IRIW_for_not_multiple_copy_atomic_cpu && field->is_volatile()) {
-    insert_mem_bar(Op_MemBarVolatile);   // StoreLoad barrier
-  }
-  // Build the load.
-  //
-  MemNode::MemOrd mo = is_vol ? MemNode::acquire : MemNode::unordered;
-  bool needs_atomic_access = is_vol || AlwaysAtomicAccesses;
-  Node* ld = make_load(NULL, adr, type, bt, adr_type, mo, LoadNode::DependsOnlyOnTest, needs_atomic_access);
+
+  Node* ld = access_load_at(obj, adr, adr_type, type, bt, decorators);

  // Adjust Java stack
  if (type2size[bt] == 1)
@ -236,22 +235,10 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {
    null_assert(peek());
    set_bci(iter().cur_bci()); // put it back
  }
-
-  // If reference is volatile, prevent following memory ops from
-  // floating up past the volatile read.  Also prevents commoning
-  // another volatile read.
-  if (field->is_volatile()) {
-    // Memory barrier includes bogus read of value to force load BEFORE membar
-    insert_mem_bar(Op_MemBarAcquire, ld);
-  }
 }

 void Parse::do_put_xxx(Node* obj, ciField* field, bool is_field) {
  bool is_vol = field->is_volatile();
-  // If reference is volatile, prevent following memory ops from
-  // floating down past the volatile write.  Also prevents commoning
-  // another volatile read.
-  if (is_vol)  insert_mem_bar(Op_MemBarRelease);

  // Compute address and memory type.
  int offset = field->offset_in_bytes();
@ -260,73 +247,52 @@ void Parse::do_put_xxx(Node* obj, ciField* field, bool is_field) {
  BasicType bt = field->layout_type();
  // Value to be stored
  Node* val = type2size[bt] == 1 ? pop() : pop_pair();
-  // Round doubles before storing
-  if (bt == T_DOUBLE)  val = dstore_rounding(val);

-  // Conservatively release stores of object references.
-  const MemNode::MemOrd mo =
-    is_vol ?
-    // Volatile fields need releasing stores.
-    MemNode::release :
-    // Non-volatile fields also need releasing stores if they hold an
-    // object reference, because the object reference might point to
-    // a freshly created object.
-    StoreNode::release_if_reference(bt);
+  DecoratorSet decorators = IN_HEAP;
+  decorators |= is_vol ? MO_SEQ_CST : MO_UNORDERED;
+
+  bool is_obj = bt == T_OBJECT || bt == T_ARRAY;

  // Store the value.
-  Node* store;
-  if (bt == T_OBJECT) {
-    const TypeOopPtr* field_type;
+  const Type* field_type;
  if (!field->type()->is_loaded()) {
    field_type = TypeInstPtr::BOTTOM;
  } else {
+    if (is_obj) {
      field_type = TypeOopPtr::make_from_klass(field->type()->as_klass());
-    }
-    store = store_oop_to_object(control(), obj, adr, adr_type, val, field_type, bt, mo);
    } else {
-    bool needs_atomic_access = is_vol || AlwaysAtomicAccesses;
-    store = store_to_memory(control(), adr, val, bt, adr_type, mo, needs_atomic_access);
+      field_type = Type::BOTTOM;
    }
+  }
+  access_store_at(control(), obj, adr, adr_type, val, field_type, bt, decorators);

-  // If reference is volatile, prevent following volatiles ops from
-  // floating up before the volatile write.
-  if (is_vol) {
-    // If not multiple copy atomic, we do the MemBarVolatile before the load.
-    if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
-      insert_mem_bar(Op_MemBarVolatile); // Use fat membar
-    }
+  if (is_field) {
    // Remember we wrote a volatile field.
    // For not multiple copy atomic cpu (ppc64) a barrier should be issued
    // in constructors which have such stores. See do_exits() in parse1.cpp.
-    if (is_field) {
+    if (is_vol) {
      set_wrote_volatile(true);
    }
-  }
-
-  if (is_field) {
    set_wrote_fields(true);
-  }

    // If the field is final, the rules of Java say we are in <init> or <clinit>.
    // Note the presence of writes to final non-static fields, so that we
    // can insert a memory barrier later on to keep the writes from floating
    // out of the constructor.
    // Any method can write a @Stable field; insert memory barriers after those also.
-  if (is_field && (field->is_final() || field->is_stable())) {
    if (field->is_final()) {
      set_wrote_final(true);
-    }
-    if (field->is_stable()) {
-        set_wrote_stable(true);
-    }
-
+      if (AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) {
        // Preserve allocation ptr to create precedent edge to it in membar
        // generated on exit from constructor.
        // Can't bind stable with its allocation, only record allocation for final field.
-    if (field->is_final() && AllocateNode::Ideal_allocation(obj, &_gvn) != NULL) {
        set_alloc_with_final(obj);
      }
    }
+    if (field->is_stable()) {
+      set_wrote_stable(true);
+    }
+  }
 }

 //=============================================================================
@ -385,7 +351,7 @@ Node* Parse::expand_multianewarray(ciArrayKlass* array_klass, Node* *lengths, in
      Node*    elem   = expand_multianewarray(array_klass_1, &lengths[1], ndimensions-1, nargs);
      intptr_t offset = header + ((intptr_t)i << LogBytesPerHeapOop);
      Node*    eaddr  = basic_plus_adr(array, offset);
-      store_oop_to_array(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT, MemNode::unordered);
+      access_store_at(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT, IN_HEAP | IN_HEAP_ARRAY);
    }
  }
  return array;
--- a/src/hotspot/share/opto/phaseX.cpp
+++ b/src/hotspot/share/opto/phaseX.cpp
@ -23,6 +23,8 @@
 */

 #include "precompiled.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/c2/barrierSetC2.hpp"
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "opto/block.hpp"
@ -36,6 +38,7 @@
 #include "opto/phaseX.hpp"
 #include "opto/regalloc.hpp"
 #include "opto/rootnode.hpp"
+#include "utilities/macros.hpp"

 //=============================================================================
 #define NODE_HASH_MINIMUM_SIZE    255
@ -939,6 +942,9 @@ PhaseIterGVN::PhaseIterGVN( PhaseGVN *gvn ) : PhaseGVN(gvn),
        n->is_Mem() )
      add_users_to_worklist(n);
  }
+
+  BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+  bs->add_users_to_worklist(&_worklist);
 }

 /**
@ -1369,6 +1375,8 @@ void PhaseIterGVN::remove_globally_dead_node( Node *dead ) {
                }
                assert(!(i < imax), "sanity");
              }
+            } else {
+              BarrierSet::barrier_set()->barrier_set_c2()->enqueue_useful_gc_barrier(_worklist, in);
            }
            if (ReduceFieldZeroing && dead->is_Load() && i == MemNode::Memory &&
                in->is_Proj() && in->in(0) != NULL && in->in(0)->is_Initialize()) {
@ -1424,6 +1432,8 @@ void PhaseIterGVN::remove_globally_dead_node( Node *dead ) {
      if (dead->Opcode() == Op_Opaque4) {
        C->remove_opaque4_node(dead);
      }
+      BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
+      bs->unregister_potential_barrier_node(dead);
    }
  } // while (_stack.is_nonempty())
 }
--- a/src/hotspot/share/opto/runtime.cpp
+++ b/src/hotspot/share/opto/runtime.cpp
@ -95,8 +95,6 @@ address OptoRuntime::_multianewarray3_Java                        = NULL;
 address OptoRuntime::_multianewarray4_Java                        = NULL;
 address OptoRuntime::_multianewarray5_Java                        = NULL;
 address OptoRuntime::_multianewarrayN_Java                        = NULL;
-address OptoRuntime::_g1_wb_pre_Java                              = NULL;
-address OptoRuntime::_g1_wb_post_Java                             = NULL;
 address OptoRuntime::_vtable_must_compile_Java                    = NULL;
 address OptoRuntime::_complete_monitor_locking_Java               = NULL;
 address OptoRuntime::_monitor_notify_Java                         = NULL;
@ -141,10 +139,6 @@ bool OptoRuntime::generate(ciEnv* env) {
  gen(env, _multianewarray4_Java           , multianewarray4_Type         , multianewarray4_C               ,    0 , true , false, false);
  gen(env, _multianewarray5_Java           , multianewarray5_Type         , multianewarray5_C               ,    0 , true , false, false);
  gen(env, _multianewarrayN_Java           , multianewarrayN_Type         , multianewarrayN_C               ,    0 , true , false, false);
-#if INCLUDE_G1GC
-  gen(env, _g1_wb_pre_Java                 , g1_wb_pre_Type               , SharedRuntime::g1_wb_pre        ,    0 , false, false, false);
-  gen(env, _g1_wb_post_Java                , g1_wb_post_Type              , SharedRuntime::g1_wb_post       ,    0 , false, false, false);
-#endif // INCLUDE_G1GC
  gen(env, _complete_monitor_locking_Java  , complete_monitor_enter_Type  , SharedRuntime::complete_monitor_locking_C, 0, false, false, false);
  gen(env, _monitor_notify_Java            , monitor_notify_Type          , monitor_notify_C                ,    0 , false, false, false);
  gen(env, _monitor_notifyAll_Java         , monitor_notify_Type          , monitor_notifyAll_C             ,    0 , false, false, false);
@ -544,33 +538,6 @@ const TypeFunc *OptoRuntime::multianewarrayN_Type() {
  return TypeFunc::make(domain, range);
 }

-const TypeFunc *OptoRuntime::g1_wb_pre_Type() {
-  const Type **fields = TypeTuple::fields(2);
-  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // original field value
-  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL; // thread
-  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
-
-  // create result type (range)
-  fields = TypeTuple::fields(0);
-  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
-
-  return TypeFunc::make(domain, range);
-}
-
-const TypeFunc *OptoRuntime::g1_wb_post_Type() {
-
-  const Type **fields = TypeTuple::fields(2);
-  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL;  // Card addr
-  fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL;  // thread
-  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
-
-  // create result type (range)
-  fields = TypeTuple::fields(0);
-  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
-
-  return TypeFunc::make(domain, range);
-}
-
 const TypeFunc *OptoRuntime::uncommon_trap_Type() {
  // create input type (domain)
  const Type **fields = TypeTuple::fields(1);
--- a/src/hotspot/share/opto/runtime.hpp
+++ b/src/hotspot/share/opto/runtime.hpp
@ -141,8 +141,6 @@ class OptoRuntime : public AllStatic {
  static address _multianewarray4_Java;
  static address _multianewarray5_Java;
  static address _multianewarrayN_Java;
-  static address _g1_wb_pre_Java;
-  static address _g1_wb_post_Java;
  static address _vtable_must_compile_Java;
  static address _complete_monitor_locking_Java;
  static address _rethrow_Java;
@ -170,8 +168,6 @@ class OptoRuntime : public AllStatic {
  static void multianewarray4_C(Klass* klass, int len1, int len2, int len3, int len4, JavaThread *thread);
  static void multianewarray5_C(Klass* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread);
  static void multianewarrayN_C(Klass* klass, arrayOopDesc* dims, JavaThread *thread);
-  static void g1_wb_pre_C(oopDesc* orig, JavaThread* thread);
-  static void g1_wb_post_C(void* card_addr, JavaThread* thread);

 public:
  // Slow-path Locking and Unlocking
@ -223,8 +219,6 @@ private:
  static address multianewarray4_Java()                  { return _multianewarray4_Java; }
  static address multianewarray5_Java()                  { return _multianewarray5_Java; }
  static address multianewarrayN_Java()                  { return _multianewarrayN_Java; }
-  static address g1_wb_pre_Java()                        { return _g1_wb_pre_Java; }
-  static address g1_wb_post_Java()                       { return _g1_wb_post_Java; }
  static address vtable_must_compile_stub()              { return _vtable_must_compile_Java; }
  static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java; }
  static address monitor_notify_Java()                   { return _monitor_notify_Java; }
@ -257,8 +251,6 @@ private:
  static const TypeFunc* multianewarray4_Type(); // multianewarray
  static const TypeFunc* multianewarray5_Type(); // multianewarray
  static const TypeFunc* multianewarrayN_Type(); // multianewarray
-  static const TypeFunc* g1_wb_pre_Type();
-  static const TypeFunc* g1_wb_post_Type();
  static const TypeFunc* complete_monitor_enter_Type();
  static const TypeFunc* complete_monitor_exit_Type();
  static const TypeFunc* monitor_notify_Type();