Merge

2025-09-23 04:24:49 +02:00 · 2008-06-27 18:19:29 -04:00 · 2008-06-27 18:19:29 -04:00 · 289ca864cd
commit 289ca864cd
parent 2c9fd9172b 510a97ebed
98 changed files with 2105 additions and 605 deletions
--- a/hotspot/src/share/tools/MakeDeps/Database.java
+++ b/hotspot/src/share/tools/MakeDeps/Database.java
@ -36,6 +36,7 @@ public class Database {
  private FileList outerFiles;
  private FileList indivIncludes;
  private FileList grandInclude; // the results for the grand include file
+  private HashMap<String,String> platformDepFiles;
  private long threshold;
  private int nOuterFiles;
  private int nPrecompiledFiles;
@ -57,6 +58,7 @@ public class Database {
    outerFiles      = new FileList("outerFiles", plat);
    indivIncludes   = new FileList("IndivIncludes", plat);
    grandInclude    = new FileList(plat.getGIFileTemplate().nameOfList(), plat);
+    platformDepFiles = new HashMap<String,String>();

    threshold = t;
    nOuterFiles = 0;
@ -209,6 +211,10 @@ public class Database {
              FileList p = allFiles.listForFile(includer);
              p.setPlatformDependentInclude(pdName.dirPreStemSuff());

+              // Record the implicit include of this file so that the
+              // dependencies for precompiled headers can mention it.
+              platformDepFiles.put(newIncluder, includer);
+
              // Add an implicit dependency on platform
              // specific file for the generic file

@ -408,6 +414,12 @@ public class Database {
      for (Iterator iter = grandInclude.iterator(); iter.hasNext(); ) {
        FileList list = (FileList) iter.next();
        gd.println(list.getName() + " \\");
+        String platformDep = platformDepFiles.get(list.getName());
+        if (platformDep != null) {
+            // make sure changes to the platform dependent file will
+            // cause regeneration of the pch file.
+            gd.println(platformDep + " \\");
+        }
      }
      gd.println();
      gd.println();
--- a/hotspot/src/share/vm/adlc/formssel.cpp
+++ b/hotspot/src/share/vm/adlc/formssel.cpp
@ -729,6 +729,7 @@ bool InstructForm::captures_bottom_type() const {
        !strcmp(_matrule->_rChild->_opType,"DecodeN")    ||
        !strcmp(_matrule->_rChild->_opType,"EncodeP")    ||
        !strcmp(_matrule->_rChild->_opType,"LoadN")      ||
+        !strcmp(_matrule->_rChild->_opType,"LoadNKlass") ||
        !strcmp(_matrule->_rChild->_opType,"CreateEx")   ||  // type of exception
        !strcmp(_matrule->_rChild->_opType,"CheckCastPP")) ) return true;
  else if ( is_ideal_load() == Form::idealP )                return true;
--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
+++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
@ -283,6 +283,7 @@
  template(cache_field_name,                          "cache")                                    \
  template(value_name,                                "value")                                    \
  template(frontCacheEnabled_name,                    "frontCacheEnabled")                        \
+  template(stringCacheEnabled_name,                   "stringCacheEnabled")                       \
                                                                                                  \
  /* non-intrinsic name/signature pairs: */                                                       \
  template(register_method_name,                      "register")                                 \
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@ -805,28 +805,30 @@ size_t CompactibleFreeListSpace::block_size(const HeapWord* p) const {
  // This must be volatile, or else there is a danger that the compiler
  // will compile the code below into a sometimes-infinite loop, by keeping
  // the value read the first time in a register.
-  oop o = (oop)p;
-  volatile oop* second_word_addr = o->klass_addr();
  while (true) {
-    klassOop k = (klassOop)(*second_word_addr);
    // We must do this until we get a consistent view of the object.
-    if (FreeChunk::secondWordIndicatesFreeChunk((intptr_t)k)) {
-      FreeChunk* fc = (FreeChunk*)p;
-      volatile size_t* sz_addr = (volatile size_t*)(fc->size_addr());
-      size_t res = (*sz_addr);
-      klassOop k2 = (klassOop)(*second_word_addr);  // Read to confirm.
-      if (k == k2) {
+    if (FreeChunk::indicatesFreeChunk(p)) {
+      volatile FreeChunk* fc = (volatile FreeChunk*)p;
+      size_t res = fc->size();
+      // If the object is still a free chunk, return the size, else it
+      // has been allocated so try again.
+      if (FreeChunk::indicatesFreeChunk(p)) {
+        assert(res != 0, "Block size should not be 0");
+        return res;
+      }
+    } else {
+      // must read from what 'p' points to in each loop.
+      klassOop k = ((volatile oopDesc*)p)->klass_or_null();
+      if (k != NULL) {
+        assert(k->is_oop(true /* ignore mark word */), "Should really be klass oop.");
+        oop o = (oop)p;
+        assert(o->is_parsable(), "Should be parsable");
+        assert(o->is_oop(true /* ignore mark word */), "Should be an oop.");
+        size_t res = o->size_given_klass(k->klass_part());
+        res = adjustObjectSize(res);
        assert(res != 0, "Block size should not be 0");
        return res;
      }
-    } else if (k != NULL) {
-      assert(k->is_oop(true /* ignore mark word */), "Should really be klass oop.");
-      assert(o->is_parsable(), "Should be parsable");
-      assert(o->is_oop(true /* ignore mark word */), "Should be an oop.");
-      size_t res = o->size_given_klass(k->klass_part());
-      res = adjustObjectSize(res);
-      assert(res != 0, "Block size should not be 0");
-      return res;
    }
  }
 }
@ -845,31 +847,31 @@ const {
  // This must be volatile, or else there is a danger that the compiler
  // will compile the code below into a sometimes-infinite loop, by keeping
  // the value read the first time in a register.
-  oop o = (oop)p;
-  volatile oop* second_word_addr = o->klass_addr();
  DEBUG_ONLY(uint loops = 0;)
  while (true) {
-    klassOop k = (klassOop)(*second_word_addr);
    // We must do this until we get a consistent view of the object.
-    if (FreeChunk::secondWordIndicatesFreeChunk((intptr_t)k)) {
-      FreeChunk* fc = (FreeChunk*)p;
-      volatile size_t* sz_addr = (volatile size_t*)(fc->size_addr());
-      size_t res = (*sz_addr);
-      klassOop k2 = (klassOop)(*second_word_addr);  // Read to confirm.
-      if (k == k2) {
+    if (FreeChunk::indicatesFreeChunk(p)) {
+      volatile FreeChunk* fc = (volatile FreeChunk*)p;
+      size_t res = fc->size();
+      if (FreeChunk::indicatesFreeChunk(p)) {
        assert(res != 0, "Block size should not be 0");
        assert(loops == 0, "Should be 0");
        return res;
      }
-    } else if (k != NULL && o->is_parsable()) {
-      assert(k->is_oop(), "Should really be klass oop.");
-      assert(o->is_oop(), "Should be an oop");
-      size_t res = o->size_given_klass(k->klass_part());
-      res = adjustObjectSize(res);
-      assert(res != 0, "Block size should not be 0");
-      return res;
    } else {
-      return c->block_size_if_printezis_bits(p);
+      // must read from what 'p' points to in each loop.
+      klassOop k = ((volatile oopDesc*)p)->klass_or_null();
+      if (k != NULL && ((oopDesc*)p)->is_parsable()) {
+        assert(k->is_oop(), "Should really be klass oop.");
+        oop o = (oop)p;
+        assert(o->is_oop(), "Should be an oop");
+        size_t res = o->size_given_klass(k->klass_part());
+        res = adjustObjectSize(res);
+        assert(res != 0, "Block size should not be 0");
+        return res;
+      } else {
+        return c->block_size_if_printezis_bits(p);
+      }
    }
    assert(loops == 0, "Can loop at most once");
    DEBUG_ONLY(loops++;)
@ -907,9 +909,8 @@ bool CompactibleFreeListSpace::block_is_obj(const HeapWord* p) const {
  // and those objects (if garbage) may have been modified to hold
  // live range information.
  // assert(ParallelGCThreads > 0 || _bt.block_start(p) == p, "Should be a block boundary");
-  klassOop k = oop(p)->klass();
-  intptr_t ki = (intptr_t)k;
-  if (FreeChunk::secondWordIndicatesFreeChunk(ki)) return false;
+  if (FreeChunk::indicatesFreeChunk(p)) return false;
+  klassOop k = oop(p)->klass_or_null();
  if (k != NULL) {
    // Ignore mark word because it may have been used to
    // chain together promoted objects (the last one
@ -1027,7 +1028,7 @@ HeapWord* CompactibleFreeListSpace::allocate(size_t size) {
    FreeChunk* fc = (FreeChunk*)res;
    fc->markNotFree();
    assert(!fc->isFree(), "shouldn't be marked free");
-    assert(oop(fc)->klass() == NULL, "should look uninitialized");
+    assert(oop(fc)->klass_or_null() == NULL, "should look uninitialized");
    // Verify that the block offset table shows this to
    // be a single block, but not one which is unallocated.
    _bt.verify_single_block(res, size);
@ -2593,7 +2594,7 @@ HeapWord* CFLS_LAB::alloc(size_t word_sz) {
  }
  res->markNotFree();
  assert(!res->isFree(), "shouldn't be marked free");
-  assert(oop(res)->klass() == NULL, "should look uninitialized");
+  assert(oop(res)->klass_or_null() == NULL, "should look uninitialized");
  // mangle a just allocated object with a distinct pattern.
  debug_only(res->mangleAllocated(word_sz));
  return (HeapWord*)res;
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@ -190,7 +190,8 @@ ConcurrentMarkSweepGeneration::ConcurrentMarkSweepGeneration(
  // depends on this property.
  debug_only(
    FreeChunk* junk = NULL;
-    assert(junk->prev_addr() == (void*)(oop(junk)->klass_addr()),
+    assert(UseCompressedOops ||
+           junk->prev_addr() == (void*)(oop(junk)->klass_addr()),
           "Offset of FreeChunk::_prev within FreeChunk must match"
           "  that of OopDesc::_klass within OopDesc");
  )
@ -1039,7 +1040,7 @@ void CMSCollector::direct_allocated(HeapWord* start, size_t size) {
                                      // mark end of object
  }
  // check that oop looks uninitialized
-  assert(oop(start)->klass() == NULL, "_klass should be NULL");
+  assert(oop(start)->klass_or_null() == NULL, "_klass should be NULL");
 }

 void CMSCollector::promoted(bool par, HeapWord* start,
@ -1309,17 +1310,25 @@ ConcurrentMarkSweepGeneration::par_promote(int thread_num,
     }
  }
  oop obj = oop(obj_ptr);
-  assert(obj->klass() == NULL, "Object should be uninitialized here.");
+  assert(obj->klass_or_null() == NULL, "Object should be uninitialized here.");
  // Otherwise, copy the object.  Here we must be careful to insert the
  // klass pointer last, since this marks the block as an allocated object.
+  // Except with compressed oops it's the mark word.
  HeapWord* old_ptr = (HeapWord*)old;
  if (word_sz > (size_t)oopDesc::header_size()) {
    Copy::aligned_disjoint_words(old_ptr + oopDesc::header_size(),
                                 obj_ptr + oopDesc::header_size(),
                                 word_sz - oopDesc::header_size());
  }
+
+  if (UseCompressedOops) {
+    // Copy gap missed by (aligned) header size calculation above
+    obj->set_klass_gap(old->klass_gap());
+  }
+
  // Restore the mark word copied above.
  obj->set_mark(m);
+
  // Now we can track the promoted object, if necessary.  We take care
  // To delay the transition from uninitialized to full object
  // (i.e., insertion of klass pointer) until after, so that it
@ -1327,7 +1336,8 @@ ConcurrentMarkSweepGeneration::par_promote(int thread_num,
  if (promoInfo->tracking()) {
    promoInfo->track((PromotedObject*)obj, old->klass());
  }
-  // Finally, install the klass pointer.
+
+  // Finally, install the klass pointer (this should be volatile).
  obj->set_klass(old->klass());

  assert(old->is_oop(), "Will dereference klass ptr below");
@ -6165,7 +6175,7 @@ size_t CMSCollector::block_size_if_printezis_bits(HeapWord* addr) const {
 HeapWord* CMSCollector::next_card_start_after_block(HeapWord* addr) const {
  size_t sz = 0;
  oop p = (oop)addr;
-  if (p->klass() != NULL && p->is_parsable()) {
+  if (p->klass_or_null() != NULL && p->is_parsable()) {
    sz = CompactibleFreeListSpace::adjustObjectSize(p->size());
  } else {
    sz = block_size_using_printezis_bits(addr);
@ -6602,7 +6612,7 @@ size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m(
  }
  if (_bitMap->isMarked(addr)) {
    // it's marked; is it potentially uninitialized?
-    if (p->klass() != NULL) {
+    if (p->klass_or_null() != NULL) {
      if (CMSPermGenPrecleaningEnabled && !p->is_parsable()) {
        // Signal precleaning to redirty the card since
        // the klass pointer is already installed.
@ -6615,11 +6625,8 @@ size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m(
        if (p->is_objArray()) {
          // objArrays are precisely marked; restrict scanning
          // to dirty cards only.
-          size = p->oop_iterate(_scanningClosure, mr);
-          assert(size == CompactibleFreeListSpace::adjustObjectSize(size),
-                 "adjustObjectSize should be the identity for array sizes, "
-                 "which are necessarily larger than minimum object size of "
-                 "two heap words");
+          size = CompactibleFreeListSpace::adjustObjectSize(
+                   p->oop_iterate(_scanningClosure, mr));
        } else {
          // A non-array may have been imprecisely marked; we need
          // to scan object in its entirety.
@ -6653,7 +6660,7 @@ size_t ScanMarkedObjectsAgainCarefullyClosure::do_object_careful_m(
    }
  } else {
    // Either a not yet marked object or an uninitialized object
-    if (p->klass() == NULL || !p->is_parsable()) {
+    if (p->klass_or_null() == NULL || !p->is_parsable()) {
      // An uninitialized object, skip to the next card, since
      // we may not be able to read its P-bits yet.
      assert(size == 0, "Initial value");
@ -6710,7 +6717,7 @@ size_t SurvivorSpacePrecleanClosure::do_object_careful(oop p) {
  HeapWord* addr = (HeapWord*)p;
  DEBUG_ONLY(_collector->verify_work_stacks_empty();)
  assert(!_span.contains(addr), "we are scanning the survivor spaces");
-  assert(p->klass() != NULL, "object should be initializd");
+  assert(p->klass_or_null() != NULL, "object should be initializd");
  assert(p->is_parsable(), "must be parsable.");
  // an initialized object; ignore mark word in verification below
  // since we are running concurrent with mutators
@ -6868,7 +6875,7 @@ void MarkFromRootsClosure::do_bit(size_t offset) {
    assert(_skipBits == 0, "tautology");
    _skipBits = 2;  // skip next two marked bits ("Printezis-marks")
    oop p = oop(addr);
-    if (p->klass() == NULL || !p->is_parsable()) {
+    if (p->klass_or_null() == NULL || !p->is_parsable()) {
      DEBUG_ONLY(if (!_verifying) {)
        // We re-dirty the cards on which this object lies and increase
        // the _threshold so that we'll come back to scan this object
@ -6890,7 +6897,7 @@ void MarkFromRootsClosure::do_bit(size_t offset) {
          if (_threshold < end_card_addr) {
            _threshold = end_card_addr;
          }
-          if (p->klass() != NULL) {
+          if (p->klass_or_null() != NULL) {
            // Redirty the range of cards...
            _mut->mark_range(redirty_range);
          } // ...else the setting of klass will dirty the card anyway.
@ -7048,7 +7055,7 @@ void Par_MarkFromRootsClosure::do_bit(size_t offset) {
    assert(_skip_bits == 0, "tautology");
    _skip_bits = 2;  // skip next two marked bits ("Printezis-marks")
    oop p = oop(addr);
-    if (p->klass() == NULL || !p->is_parsable()) {
+    if (p->klass_or_null() == NULL || !p->is_parsable()) {
      // in the case of Clean-on-Enter optimization, redirty card
      // and avoid clearing card by increasing  the threshold.
      return;
@ -8023,7 +8030,7 @@ size_t SweepClosure::doLiveChunk(FreeChunk* fc) {
           "alignment problem");

    #ifdef DEBUG
-      if (oop(addr)->klass() != NULL &&
+      if (oop(addr)->klass_or_null() != NULL &&
          (   !_collector->should_unload_classes()
           || oop(addr)->is_parsable())) {
        // Ignore mark word because we are running concurrent with mutators
@ -8036,7 +8043,7 @@ size_t SweepClosure::doLiveChunk(FreeChunk* fc) {

  } else {
    // This should be an initialized object that's alive.
-    assert(oop(addr)->klass() != NULL &&
+    assert(oop(addr)->klass_or_null() != NULL &&
           (!_collector->should_unload_classes()
            || oop(addr)->is_parsable()),
           "Should be an initialized object");
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp
@ -22,88 +22,6 @@
 *
 */

-//
-// Free block maintenance for Concurrent Mark Sweep Generation
-//
-// The main data structure for free blocks are
-// . an indexed array of small free blocks, and
-// . a dictionary of large free blocks
-//
-
-// No virtuals in FreeChunk (don't want any vtables).
-
-// A FreeChunk is merely a chunk that can be in a doubly linked list
-// and has a size field. NOTE: FreeChunks are distinguished from allocated
-// objects in two ways (by the sweeper). The second word (prev) has the
-// LSB set to indicate a free chunk; allocated objects' klass() pointers
-// don't have their LSB set. The corresponding bit in the CMSBitMap is
-// set when the chunk is allocated. There are also blocks that "look free"
-// but are not part of the free list and should not be coalesced into larger
-// free blocks. These free blocks have their two LSB's set.
-
-class FreeChunk VALUE_OBJ_CLASS_SPEC {
-  friend class VMStructs;
-  FreeChunk* _next;
-  FreeChunk* _prev;
-  size_t     _size;
-
- public:
-  NOT_PRODUCT(static const size_t header_size();)
-  // Returns "true" if the "wrd", which is required to be the second word
-  // of a block, indicates that the block represents a free chunk.
-  static bool secondWordIndicatesFreeChunk(intptr_t wrd) {
-    return (wrd & 0x1) == 0x1;
-  }
-  bool isFree()       const {
-    return secondWordIndicatesFreeChunk((intptr_t)_prev);
-  }
-  bool cantCoalesce() const { return (((intptr_t)_prev) & 0x3) == 0x3; }
-  FreeChunk* next()   const { return _next; }
-  FreeChunk* prev()   const { return (FreeChunk*)(((intptr_t)_prev) & ~(0x3)); }
-  debug_only(void* prev_addr() const { return (void*)&_prev; })
-
-  void linkAfter(FreeChunk* ptr) {
-    linkNext(ptr);
-    if (ptr != NULL) ptr->linkPrev(this);
-  }
-  void linkAfterNonNull(FreeChunk* ptr) {
-    assert(ptr != NULL, "precondition violation");
-    linkNext(ptr);
-    ptr->linkPrev(this);
-  }
-  void linkNext(FreeChunk* ptr) { _next = ptr; }
-  void linkPrev(FreeChunk* ptr) { _prev = (FreeChunk*)((intptr_t)ptr | 0x1); }
-  void clearPrev()              { _prev = NULL; }
-  void clearNext()              { _next = NULL; }
-  void dontCoalesce()      {
-    // the block should be free
-    assert(isFree(), "Should look like a free block");
-    _prev = (FreeChunk*)(((intptr_t)_prev) | 0x2);
-  }
-  void markFree()    { _prev = (FreeChunk*)((intptr_t)_prev | 0x1);    }
-  void markNotFree() { _prev = NULL; }
-
-  size_t size()           const { return _size; }
-  void setSize(size_t size)     { _size = size; }
-
-  // For volatile reads:
-  size_t* size_addr()           { return &_size; }
-
-  // Return the address past the end of this chunk
-  HeapWord* end() const { return ((HeapWord*) this) + _size; }
-
-  // debugging
-  void verify()             const PRODUCT_RETURN;
-  void verifyList()         const PRODUCT_RETURN;
-  void mangleAllocated(size_t size) PRODUCT_RETURN;
-  void mangleFreed(size_t size)     PRODUCT_RETURN;
-};
-
-// Alignment helpers etc.
-#define numQuanta(x,y) ((x+y-1)/y)
-enum AlignmentConstants {
-  MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment
-};

 // A FreeBlockDictionary is an abstract superclass that will allow
 // a number of alternative implementations in the future.
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp
@ -47,15 +47,15 @@ void FreeChunk::mangleAllocated(size_t size) {
  Copy::fill_to_words(addr + hdr, size - hdr, baadbabeHeapWord);
 }

-void FreeChunk::mangleFreed(size_t size) {
+void FreeChunk::mangleFreed(size_t sz) {
  assert(baadbabeHeapWord != deadbeefHeapWord, "Need distinct patterns");
  // mangle all but the header of a just-freed block of storage
  // just prior to passing it to the storage dictionary
-  assert(size >= MinChunkSize, "smallest size of object");
-  assert(size == _size, "just checking");
+  assert(sz >= MinChunkSize, "smallest size of object");
+  assert(sz == size(), "just checking");
  HeapWord* addr = (HeapWord*)this;
  size_t hdr = header_size();
-  Copy::fill_to_words(addr + hdr, size - hdr, deadbeefHeapWord);
+  Copy::fill_to_words(addr + hdr, sz - hdr, deadbeefHeapWord);
 }

 void FreeChunk::verifyList() const {
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp
@ -0,0 +1,137 @@
+/*
+ * Copyright 2001-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Free block maintenance for Concurrent Mark Sweep Generation
+//
+// The main data structure for free blocks are
+// . an indexed array of small free blocks, and
+// . a dictionary of large free blocks
+//
+
+// No virtuals in FreeChunk (don't want any vtables).
+
+// A FreeChunk is merely a chunk that can be in a doubly linked list
+// and has a size field. NOTE: FreeChunks are distinguished from allocated
+// objects in two ways (by the sweeper), depending on whether the VM is 32 or
+// 64 bits.
+// In 32 bits or 64 bits without CompressedOops, the second word (prev) has the
+// LSB set to indicate a free chunk; allocated objects' klass() pointers
+// don't have their LSB set. The corresponding bit in the CMSBitMap is
+// set when the chunk is allocated. There are also blocks that "look free"
+// but are not part of the free list and should not be coalesced into larger
+// free blocks. These free blocks have their two LSB's set.
+
+class FreeChunk VALUE_OBJ_CLASS_SPEC {
+  friend class VMStructs;
+  // For 64 bit compressed oops, the markOop encodes both the size and the
+  // indication that this is a FreeChunk and not an object.
+  volatile size_t   _size;
+  FreeChunk* _prev;
+  FreeChunk* _next;
+
+  markOop mark()     const volatile { return (markOop)_size; }
+  void set_mark(markOop m)          { _size = (size_t)m; }
+
+ public:
+  NOT_PRODUCT(static const size_t header_size();)
+
+  // Returns "true" if the address indicates that the block represents
+  // a free chunk.
+  static bool indicatesFreeChunk(const HeapWord* addr) {
+    // Force volatile read from addr because value might change between
+    // calls.  We really want the read of _mark and _prev from this pointer
+    // to be volatile but making the fields volatile causes all sorts of
+    // compilation errors.
+    return ((volatile FreeChunk*)addr)->isFree();
+  }
+
+  bool isFree() const volatile {
+    LP64_ONLY(if (UseCompressedOops) return mark()->is_cms_free_chunk(); else)
+    return (((intptr_t)_prev) & 0x1) == 0x1;
+  }
+  bool cantCoalesce() const {
+    assert(isFree(), "can't get coalesce bit on not free");
+    return (((intptr_t)_prev) & 0x2) == 0x2;
+  }
+  void dontCoalesce() {
+    // the block should be free
+    assert(isFree(), "Should look like a free block");
+    _prev = (FreeChunk*)(((intptr_t)_prev) | 0x2);
+  }
+  FreeChunk* prev() const {
+    return (FreeChunk*)(((intptr_t)_prev) & ~(0x3));
+  }
+
+  debug_only(void* prev_addr() const { return (void*)&_prev; })
+
+  size_t size() const volatile {
+    LP64_ONLY(if (UseCompressedOops) return mark()->get_size(); else )
+    return _size;
+  }
+  void setSize(size_t sz) {
+    LP64_ONLY(if (UseCompressedOops) set_mark(markOopDesc::set_size_and_free(sz)); else )
+    _size = sz;
+  }
+
+  FreeChunk* next()   const { return _next; }
+
+  void linkAfter(FreeChunk* ptr) {
+    linkNext(ptr);
+    if (ptr != NULL) ptr->linkPrev(this);
+  }
+  void linkAfterNonNull(FreeChunk* ptr) {
+    assert(ptr != NULL, "precondition violation");
+    linkNext(ptr);
+    ptr->linkPrev(this);
+  }
+  void linkNext(FreeChunk* ptr) { _next = ptr; }
+  void linkPrev(FreeChunk* ptr) {
+     LP64_ONLY(if (UseCompressedOops) _prev = ptr; else)
+     _prev = (FreeChunk*)((intptr_t)ptr | 0x1);
+  }
+  void clearPrev()              { _prev = NULL; }
+  void clearNext()              { _next = NULL; }
+  void markNotFree() {
+   LP64_ONLY(if (UseCompressedOops) set_mark(markOopDesc::prototype());)
+   // Also set _prev to null
+   _prev = NULL;
+  }
+
+  // Return the address past the end of this chunk
+  HeapWord* end() const { return ((HeapWord*) this) + size(); }
+
+  // debugging
+  void verify()             const PRODUCT_RETURN;
+  void verifyList()         const PRODUCT_RETURN;
+  void mangleAllocated(size_t size) PRODUCT_RETURN;
+  void mangleFreed(size_t size)     PRODUCT_RETURN;
+};
+
+// Alignment helpers etc.
+#define numQuanta(x,y) ((x+y-1)/y)
+enum AlignmentConstants {
+  MinChunkSize = numQuanta(sizeof(FreeChunk), MinObjAlignmentInBytes) * MinObjAlignment
+};
+
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp
@ -23,6 +23,7 @@
 */

 #define VM_STRUCTS_CMS(nonstatic_field, \
+                   volatile_nonstatic_field, \
                   static_field) \
  nonstatic_field(CompactibleFreeListSpace,    _collector,                                    CMSCollector*)                         \
  nonstatic_field(CompactibleFreeListSpace,    _bt,                                           BlockOffsetArrayNonContigSpace)        \
@ -36,9 +37,9 @@
  nonstatic_field(CMSCollector,                _markBitMap,                                   CMSBitMap)                             \
  nonstatic_field(ConcurrentMarkSweepGeneration, _cmsSpace,                                   CompactibleFreeListSpace*)             \
     static_field(ConcurrentMarkSweepThread,   _collector,                                    CMSCollector*)                         \
+  volatile_nonstatic_field(FreeChunk,          _size,                                         size_t)                                \
  nonstatic_field(FreeChunk,                   _next,                                         FreeChunk*)                            \
  nonstatic_field(FreeChunk,                   _prev,                                         FreeChunk*)                            \
-  nonstatic_field(FreeChunk,                   _size,                                         size_t)                                \
  nonstatic_field(LinearAllocBlock,            _word_size,                                    size_t)                                \
  nonstatic_field(FreeList,                    _size,                                         size_t)                                \
  nonstatic_field(FreeList,                    _count,                                        ssize_t)                               \
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
@ -206,6 +206,7 @@ freeBlockDictionary.cpp                 thread_<os_family>.inline.hpp

 freeBlockDictionary.hpp                 allocation.hpp
 freeBlockDictionary.hpp                 debug.hpp
+freeBlockDictionary.hpp                 freeChunk.hpp
 freeBlockDictionary.hpp                 globalDefinitions.hpp
 freeBlockDictionary.hpp                 memRegion.hpp
 freeBlockDictionary.hpp                 mutex.hpp
@ -214,6 +215,14 @@ freeBlockDictionary.hpp                 ostream.hpp
 freeChunk.cpp                           copy.hpp
 freeChunk.cpp                           freeBlockDictionary.hpp

+freeChunk.hpp                           allocation.hpp
+freeChunk.hpp                           debug.hpp
+freeChunk.hpp                           globalDefinitions.hpp
+freeChunk.hpp                           markOop.hpp
+freeChunk.hpp                           memRegion.hpp
+freeChunk.hpp                           mutex.hpp
+freeChunk.hpp                           ostream.hpp
+
 freeList.cpp                            freeBlockDictionary.hpp
 freeList.cpp                            freeList.hpp
 freeList.cpp                            globals.hpp
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
@ -1004,6 +1004,9 @@ void PSParallelCompact::pre_compact(PreGCValues* pre_gc_values)

  DEBUG_ONLY(mark_bitmap()->verify_clear();)
  DEBUG_ONLY(summary_data().verify_clear();)
+
+  // Have worker threads release resources the next time they run a task.
+  gc_task_manager()->release_all_resources();
 }

 void PSParallelCompact::post_compact()
@ -1949,12 +1952,6 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
  TimeStamp compaction_start;
  TimeStamp collection_exit;

-  // "serial_CM" is needed until the parallel implementation
-  // of the move and update is done.
-  ParCompactionManager* serial_CM = new ParCompactionManager();
-  // Don't initialize more than once.
-  // serial_CM->initialize(&summary_data(), mark_bitmap());
-
  ParallelScavengeHeap* heap = gc_heap();
  GCCause::Cause gc_cause = heap->gc_cause();
  PSYoungGen* young_gen = heap->young_gen();
@ -1969,6 +1966,10 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
  PreGCValues pre_gc_values;
  pre_compact(&pre_gc_values);

+  // Get the compaction manager reserved for the VM thread.
+  ParCompactionManager* const vmthread_cm =
+    ParCompactionManager::manager_array(gc_task_manager()->workers());
+
  // Place after pre_compact() where the number of invocations is incremented.
  AdaptiveSizePolicyOutput(size_policy, heap->total_collections());

@ -2008,7 +2009,7 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
    bool marked_for_unloading = false;

    marking_start.update();
-    marking_phase(serial_CM, maximum_heap_compaction);
+    marking_phase(vmthread_cm, maximum_heap_compaction);

 #ifndef PRODUCT
    if (TraceParallelOldGCMarkingPhase) {
@ -2039,7 +2040,7 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
 #endif

    bool max_on_system_gc = UseMaximumCompactionOnSystemGC && is_system_gc;
-    summary_phase(serial_CM, maximum_heap_compaction || max_on_system_gc);
+    summary_phase(vmthread_cm, maximum_heap_compaction || max_on_system_gc);

 #ifdef ASSERT
    if (VerifyParallelOldWithMarkSweep &&
@ -2067,13 +2068,13 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
      // code can use the the forwarding pointers to
      // check the new pointer calculation.  The restore_marks()
      // has to be done before the real compact.
-      serial_CM->set_action(ParCompactionManager::VerifyUpdate);
-      compact_perm(serial_CM);
-      compact_serial(serial_CM);
-      serial_CM->set_action(ParCompactionManager::ResetObjects);
-      compact_perm(serial_CM);
-      compact_serial(serial_CM);
-      serial_CM->set_action(ParCompactionManager::UpdateAndCopy);
+      vmthread_cm->set_action(ParCompactionManager::VerifyUpdate);
+      compact_perm(vmthread_cm);
+      compact_serial(vmthread_cm);
+      vmthread_cm->set_action(ParCompactionManager::ResetObjects);
+      compact_perm(vmthread_cm);
+      compact_serial(vmthread_cm);
+      vmthread_cm->set_action(ParCompactionManager::UpdateAndCopy);

      // For debugging only
      PSMarkSweep::restore_marks();
@ -2084,16 +2085,14 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
    compaction_start.update();
    // Does the perm gen always have to be done serially because
    // klasses are used in the update of an object?
-    compact_perm(serial_CM);
+    compact_perm(vmthread_cm);

    if (UseParallelOldGCCompacting) {
      compact();
    } else {
-      compact_serial(serial_CM);
+      compact_serial(vmthread_cm);
    }

-    delete serial_CM;
-
    // Reset the mark bitmap, summary data, and do other bookkeeping.  Must be
    // done before resizing.
    post_compact();
--- a/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.cpp
@ -66,7 +66,7 @@ void ImmutableSpace::print() const {

 #endif

-void ImmutableSpace::verify(bool allow_dirty) const {
+void ImmutableSpace::verify(bool allow_dirty) {
  HeapWord* p = bottom();
  HeapWord* t = end();
  HeapWord* prev_p = NULL;
--- a/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/immutableSpace.hpp
@ -59,5 +59,5 @@ class ImmutableSpace: public CHeapObj {
  // Debugging
  virtual void print() const            PRODUCT_RETURN;
  virtual void print_short() const      PRODUCT_RETURN;
-  virtual void verify(bool allow_dirty) const;
+  virtual void verify(bool allow_dirty);
 };
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
@ -599,12 +599,28 @@ void MutableNUMASpace::initialize(MemRegion mr, bool clear_space) {
 // Mark the the holes in chunks below the top() as invalid.
 void MutableNUMASpace::set_top(HeapWord* value) {
  bool found_top = false;
-  for (int i = 0; i < lgrp_spaces()->length(); i++) {
+  for (int i = 0; i < lgrp_spaces()->length();) {
    LGRPSpace *ls = lgrp_spaces()->at(i);
    MutableSpace *s = ls->space();
    HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());

    if (s->contains(value)) {
+      // Check if setting the chunk's top to a given value would create a hole less than
+      // a minimal object; assuming that's not the last chunk in which case we don't care.
+      if (i < lgrp_spaces()->length() - 1) {
+        size_t remainder = pointer_delta(s->end(), value);
+        const size_t minimal_object_size = oopDesc::header_size();
+        if (remainder < minimal_object_size && remainder > 0) {
+          // Add a filler object of a minimal size, it will cross the chunk boundary.
+          SharedHeap::fill_region_with_object(MemRegion(value, minimal_object_size));
+          value += minimal_object_size;
+          assert(!s->contains(value), "Should be in the next chunk");
+          // Restart the loop from the same chunk, since the value has moved
+          // to the next one.
+          continue;
+        }
+      }
+
      if (!os::numa_has_static_binding() && top < value && top < s->end()) {
        ls->add_invalid_region(MemRegion(top, value));
      }
@ -620,6 +636,7 @@ void MutableNUMASpace::set_top(HeapWord* value) {
          s->set_top(s->end());
        }
    }
+    i++;
  }
  MutableSpace::set_top(value);
 }
@ -700,12 +717,14 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
  MutableSpace *s = lgrp_spaces()->at(i)->space();
  HeapWord *p = s->cas_allocate(size);
  if (p != NULL) {
-    size_t remainder = pointer_delta(s->end(), p);
+    size_t remainder = pointer_delta(s->end(), p + size);
    if (remainder < (size_t)oopDesc::header_size() && remainder > 0) {
      if (s->cas_deallocate(p, size)) {
        // We were the last to allocate and created a fragment less than
        // a minimal object.
        p = NULL;
+      } else {
+        guarantee(false, "Deallocation should always succeed");
      }
    }
  }
@ -761,10 +780,12 @@ void MutableNUMASpace::print_on(outputStream* st) const {
  }
 }

-void MutableNUMASpace::verify(bool allow_dirty) const {
- for (int i = 0; i < lgrp_spaces()->length(); i++) {
-    lgrp_spaces()->at(i)->space()->verify(allow_dirty);
-  }
+void MutableNUMASpace::verify(bool allow_dirty) {
+  // This can be called after setting an arbitary value to the space's top,
+  // so an object can cross the chunk boundary. We ensure the parsablity
+  // of the space and just walk the objects in linear fashion.
+  ensure_parsability();
+  MutableSpace::verify(allow_dirty);
 }

 // Scan pages and gather stats about page placement and size.
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
@ -192,7 +192,7 @@ class MutableNUMASpace : public MutableSpace {
  // Debugging
  virtual void print_on(outputStream* st) const;
  virtual void print_short_on(outputStream* st) const;
-  virtual void verify(bool allow_dirty) const;
+  virtual void verify(bool allow_dirty);

  virtual void set_top(HeapWord* value);
 };
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableSpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableSpace.cpp
@ -118,7 +118,7 @@ void MutableSpace::print_on(outputStream* st) const {
                 bottom(), top(), end());
 }

-void MutableSpace::verify(bool allow_dirty) const {
+void MutableSpace::verify(bool allow_dirty) {
  HeapWord* p = bottom();
  HeapWord* t = top();
  HeapWord* prev_p = NULL;
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableSpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableSpace.hpp
@ -98,5 +98,5 @@ class MutableSpace: public ImmutableSpace {
  virtual void print_on(outputStream* st) const;
  virtual void print_short() const;
  virtual void print_short_on(outputStream* st) const;
-  virtual void verify(bool allow_dirty) const;
+  virtual void verify(bool allow_dirty);
 };
--- a/hotspot/src/share/vm/memory/cardTableModRefBS.cpp
+++ b/hotspot/src/share/vm/memory/cardTableModRefBS.cpp
@ -196,6 +196,8 @@ void CardTableModRefBS::resize_covered_region(MemRegion new_region) {
  assert(_whole_heap.contains(new_region),
           "attempt to cover area not in reserved area");
  debug_only(verify_guard();)
+  // collided is true if the expansion would push into another committed region
+  debug_only(bool collided = false;)
  int const ind = find_covering_region_by_base(new_region.start());
  MemRegion const old_region = _covered[ind];
  assert(old_region.start() == new_region.start(), "just checking");
@ -211,12 +213,36 @@ void CardTableModRefBS::resize_covered_region(MemRegion new_region) {
    }
    // Align the end up to a page size (starts are already aligned).
    jbyte* const new_end = byte_after(new_region.last());
-    HeapWord* const new_end_aligned =
+    HeapWord* new_end_aligned =
      (HeapWord*) align_size_up((uintptr_t)new_end, _page_size);
    assert(new_end_aligned >= (HeapWord*) new_end,
           "align up, but less");
+    int ri = 0;
+    for (ri = 0; ri < _cur_covered_regions; ri++) {
+      if (ri != ind) {
+        if (_committed[ri].contains(new_end_aligned)) {
+          assert((new_end_aligned >= _committed[ri].start()) &&
+                 (_committed[ri].start() > _committed[ind].start()),
+                 "New end of committed region is inconsistent");
+          new_end_aligned = _committed[ri].start();
+          assert(new_end_aligned > _committed[ind].start(),
+            "New end of committed region is before start");
+          debug_only(collided = true;)
+          // Should only collide with 1 region
+          break;
+        }
+      }
+    }
+#ifdef ASSERT
+    for (++ri; ri < _cur_covered_regions; ri++) {
+      assert(!_committed[ri].contains(new_end_aligned),
+        "New end of committed region is in a second committed region");
+    }
+#endif
    // The guard page is always committed and should not be committed over.
-    HeapWord* const new_end_for_commit = MIN2(new_end_aligned, _guard_region.start());
+    HeapWord* const new_end_for_commit = MIN2(new_end_aligned,
+                                              _guard_region.start());
+
    if (new_end_for_commit > cur_committed.end()) {
      // Must commit new pages.
      MemRegion const new_committed =
@ -239,9 +265,11 @@ void CardTableModRefBS::resize_covered_region(MemRegion new_region) {
      if (!uncommit_region.is_empty()) {
        if (!os::uncommit_memory((char*)uncommit_region.start(),
                                 uncommit_region.byte_size())) {
-          // Do better than this for Merlin
-          vm_exit_out_of_memory(uncommit_region.byte_size(),
-            "card table contraction");
+          assert(false, "Card table contraction failed");
+          // The call failed so don't change the end of the
+          // committed region.  This is better than taking the
+          // VM down.
+          new_end_aligned = _committed[ind].end();
        }
      }
    }
@ -257,8 +285,25 @@ void CardTableModRefBS::resize_covered_region(MemRegion new_region) {
    }
    assert(index_for(new_region.last()) < (int) _guard_index,
      "The guard card will be overwritten");
-    jbyte* const end = byte_after(new_region.last());
+    // This line commented out cleans the newly expanded region and
+    // not the aligned up expanded region.
+    // jbyte* const end = byte_after(new_region.last());
+    jbyte* const end = (jbyte*) new_end_for_commit;
+    assert((end >= byte_after(new_region.last())) || collided,
+      "Expect to be beyond new region unless impacting another region");
    // do nothing if we resized downward.
+#ifdef ASSERT
+    for (int ri = 0; ri < _cur_covered_regions; ri++) {
+      if (ri != ind) {
+        // The end of the new committed region should not
+        // be in any existing region unless it matches
+        // the start of the next region.
+        assert(!_committed[ri].contains(end) ||
+               (_committed[ri].start() == (HeapWord*) end),
+               "Overlapping committed regions");
+      }
+    }
+#endif
    if (entry < end) {
      memset(entry, clean_card, pointer_delta(end, entry, sizeof(jbyte)));
    }
--- a/hotspot/src/share/vm/oops/markOop.hpp
+++ b/hotspot/src/share/vm/oops/markOop.hpp
@ -29,8 +29,10 @@
 //
 // Bit-format of an object header (most significant first):
 //
-//
-//  unused:0/25 hash:25/31 age:4 biased_lock:1 lock:2 = 32/64 bits
+//  32 bits: unused:0  hash:25 age:4 biased_lock:1 lock:2
+//  64 bits: unused:24 hash:31 cms:2 age:4 biased_lock:1 lock:2
+//           unused:20 size:35 cms:2 age:4 biased_lock:1 lock:2 (if cms
+//                                                               free chunk)
 //
 //  - hash contains the identity hash value: largest value is
 //    31 bits, see os::random().  Also, 64-bit vm's require
@ -91,6 +93,7 @@ class markOopDesc: public oopDesc {
         biased_lock_bits         = 1,
         max_hash_bits            = BitsPerWord - age_bits - lock_bits - biased_lock_bits,
         hash_bits                = max_hash_bits > 31 ? 31 : max_hash_bits,
+         cms_bits                 = LP64_ONLY(1) NOT_LP64(0),
         epoch_bits               = 2
  };

@ -106,7 +109,8 @@ class markOopDesc: public oopDesc {
  enum { lock_shift               = 0,
         biased_lock_shift        = lock_bits,
         age_shift                = lock_bits + biased_lock_bits,
-         hash_shift               = lock_bits + biased_lock_bits + age_bits,
+         cms_shift                = age_shift + age_bits,
+         hash_shift               = cms_shift + cms_bits,
         epoch_shift              = hash_shift
  };

@ -118,7 +122,9 @@ class markOopDesc: public oopDesc {
         age_mask                 = right_n_bits(age_bits),
         age_mask_in_place        = age_mask << age_shift,
         epoch_mask               = right_n_bits(epoch_bits),
-         epoch_mask_in_place      = epoch_mask << epoch_shift
+         epoch_mask_in_place      = epoch_mask << epoch_shift,
+         cms_mask                 = right_n_bits(cms_bits),
+         cms_mask_in_place        = cms_mask << cms_shift
 #ifndef _WIN64
         ,hash_mask               = right_n_bits(hash_bits),
         hash_mask_in_place       = (address_word)hash_mask << hash_shift
@ -360,4 +366,40 @@ class markOopDesc: public oopDesc {

  // see the definition in markOop.cpp for the gory details
  bool should_not_be_cached() const;
+
+  // These markOops indicate cms free chunk blocks and not objects.
+  // In 64 bit, the markOop is set to distinguish them from oops.
+  // These are defined in 32 bit mode for vmStructs.
+  const static uintptr_t cms_free_chunk_pattern  = 0x1;
+
+  // Constants for the size field.
+  enum { size_shift                = cms_shift + cms_bits,
+         size_bits                 = 35    // need for compressed oops 32G
+       };
+  // These values are too big for Win64
+  const static uintptr_t size_mask = LP64_ONLY(right_n_bits(size_bits))
+                                     NOT_LP64(0);
+  const static uintptr_t size_mask_in_place =
+                                     (address_word)size_mask << size_shift;
+
+#ifdef _LP64
+  static markOop cms_free_prototype() {
+    return markOop(((intptr_t)prototype() & ~cms_mask_in_place) |
+                   ((cms_free_chunk_pattern & cms_mask) << cms_shift));
+  }
+  uintptr_t cms_encoding() const {
+    return mask_bits(value() >> cms_shift, cms_mask);
+  }
+  bool is_cms_free_chunk() const {
+    return is_neutral() &&
+           (cms_encoding() & cms_free_chunk_pattern) == cms_free_chunk_pattern;
+  }
+
+  size_t get_size() const       { return (size_t)(value() >> size_shift); }
+  static markOop set_size_and_free(size_t size) {
+    assert((size & ~size_mask) == 0, "shouldn't overflow size field");
+    return markOop(((intptr_t)cms_free_prototype() & ~size_mask_in_place) |
+                   (((intptr_t)size & size_mask) << size_shift));
+  }
+#endif // _LP64
 };
--- a/hotspot/src/share/vm/oops/methodDataOop.hpp
+++ b/hotspot/src/share/vm/oops/methodDataOop.hpp
@ -158,7 +158,6 @@ public:
    assert(ProfileTraps, "used only under +ProfileTraps");
    uint old_flags = (_header._struct._flags & flag_mask);
    _header._struct._flags = (new_state << trap_shift) | old_flags;
-    assert(trap_state() == new_state, "sanity");
  }

  u1 flags() {
--- a/hotspot/src/share/vm/opto/connode.cpp
+++ b/hotspot/src/share/vm/opto/connode.cpp
@ -565,10 +565,12 @@ Node* DecodeNNode::Identity(PhaseTransform* phase) {
 }

 const Type *DecodeNNode::Value( PhaseTransform *phase ) const {
-  if (phase->type( in(1) ) == TypeNarrowOop::NULL_PTR) {
-    return TypePtr::NULL_PTR;
-  }
-  return bottom_type();
+  const Type *t = phase->type( in(1) );
+  if (t == Type::TOP) return Type::TOP;
+  if (t == TypeNarrowOop::NULL_PTR) return TypePtr::NULL_PTR;
+
+  assert(t->isa_narrowoop(), "only  narrowoop here");
+  return t->is_narrowoop()->make_oopptr();
 }

 Node* DecodeNNode::decode(PhaseTransform* phase, Node* value) {
@ -599,10 +601,12 @@ Node* EncodePNode::Identity(PhaseTransform* phase) {
 }

 const Type *EncodePNode::Value( PhaseTransform *phase ) const {
-  if (phase->type( in(1) ) == TypePtr::NULL_PTR) {
-    return TypeNarrowOop::NULL_PTR;
-  }
-  return bottom_type();
+  const Type *t = phase->type( in(1) );
+  if (t == Type::TOP) return Type::TOP;
+  if (t == TypePtr::NULL_PTR) return TypeNarrowOop::NULL_PTR;
+
+  assert(t->isa_oopptr(), "only oopptr here");
+  return t->is_oopptr()->make_narrowoop();
 }

 Node* EncodePNode::encode(PhaseTransform* phase, Node* value) {
--- a/hotspot/src/share/vm/opto/connode.hpp
+++ b/hotspot/src/share/vm/opto/connode.hpp
@ -549,10 +549,18 @@ class Opaque1Node : public Node {
  virtual uint hash() const ;                  // { return NO_HASH; }
  virtual uint cmp( const Node &n ) const;
 public:
-  Opaque1Node( Node *n ) : Node(0,n) {}
+  Opaque1Node( Compile* C, Node *n ) : Node(0,n) {
+    // Put it on the Macro nodes list to removed during macro nodes expansion.
+    init_flags(Flag_is_macro);
+    C->add_macro_node(this);
+  }
  // Special version for the pre-loop to hold the original loop limit
  // which is consumed by range check elimination.
-  Opaque1Node( Node *n, Node* orig_limit ) : Node(0,n,orig_limit) {}
+  Opaque1Node( Compile* C, Node *n, Node* orig_limit ) : Node(0,n,orig_limit) {
+    // Put it on the Macro nodes list to removed during macro nodes expansion.
+    init_flags(Flag_is_macro);
+    C->add_macro_node(this);
+  }
  Node* original_loop_limit() { return req()==3 ? in(2) : NULL; }
  virtual int Opcode() const;
  virtual const Type *bottom_type() const { return TypeInt::INT; }
@ -572,7 +580,11 @@ class Opaque2Node : public Node {
  virtual uint hash() const ;                  // { return NO_HASH; }
  virtual uint cmp( const Node &n ) const;
 public:
-  Opaque2Node( Node *n ) : Node(0,n) {}
+  Opaque2Node( Compile* C, Node *n ) : Node(0,n) {
+    // Put it on the Macro nodes list to removed during macro nodes expansion.
+    init_flags(Flag_is_macro);
+    C->add_macro_node(this);
+  }
  virtual int Opcode() const;
  virtual const Type *bottom_type() const { return TypeInt::INT; }
 };
--- a/hotspot/src/share/vm/opto/gcm.cpp
+++ b/hotspot/src/share/vm/opto/gcm.cpp
@ -307,7 +307,6 @@ static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark,

    // Test and set the visited bit.
    if (mid->raise_LCA_visited() == mark)  continue;  // already visited
-    mid->set_raise_LCA_visited(mark);

    // Don't process the current LCA, otherwise the search may terminate early
    if (mid != LCA && mid->raise_LCA_mark() == mark) {
@ -317,6 +316,8 @@ static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark,
      assert(early->dominates(LCA), "early is high enough");
      // Resume searching at that point, skipping intermediate levels.
      worklist.push(LCA);
+      if (LCA == mid)
+        continue; // Don't mark as visited to avoid early termination.
    } else {
      // Keep searching through this block's predecessors.
      for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) {
@ -324,6 +325,7 @@ static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark,
        worklist.push(mid_parent);
      }
    }
+    mid->set_raise_LCA_visited(mark);
  }
  return LCA;
 }
--- a/hotspot/src/share/vm/opto/loopTransform.cpp
+++ b/hotspot/src/share/vm/opto/loopTransform.cpp
@ -690,7 +690,7 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
  // (the main-loop trip-counter exit value) because we will be changing
  // the exit value (via unrolling) so we cannot constant-fold away the zero
  // trip guard until all unrolling is done.
-  Node *zer_opaq = new (C, 2) Opaque1Node(incr);
+  Node *zer_opaq = new (C, 2) Opaque1Node(C, incr);
  Node *zer_cmp  = new (C, 3) CmpINode( zer_opaq, limit );
  Node *zer_bol  = new (C, 2) BoolNode( zer_cmp, b_test );
  register_new_node( zer_opaq, new_main_exit );
@ -760,7 +760,7 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_
  // pre-loop, the main-loop may not execute at all.  Later in life this
  // zero-trip guard will become the minimum-trip guard when we unroll
  // the main-loop.
-  Node *min_opaq = new (C, 2) Opaque1Node(limit);
+  Node *min_opaq = new (C, 2) Opaque1Node(C, limit);
  Node *min_cmp  = new (C, 3) CmpINode( pre_incr, min_opaq );
  Node *min_bol  = new (C, 2) BoolNode( min_cmp, b_test );
  register_new_node( min_opaq, new_pre_exit );
@ -810,7 +810,7 @@ void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_

  // Save the original loop limit in this Opaque1 node for
  // use by range check elimination.
-  Node *pre_opaq  = new (C, 3) Opaque1Node(pre_limit, limit);
+  Node *pre_opaq  = new (C, 3) Opaque1Node(C, pre_limit, limit);

  register_new_node( pre_limit, pre_head->in(0) );
  register_new_node( pre_opaq , pre_head->in(0) );
--- a/hotspot/src/share/vm/opto/loopUnswitch.cpp
+++ b/hotspot/src/share/vm/opto/loopUnswitch.cpp
@ -205,7 +205,7 @@ ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,

  Node *cont      = _igvn.intcon(1);
  set_ctrl(cont, C->root());
-  Node* opq       = new (C, 2) Opaque1Node(cont);
+  Node* opq       = new (C, 2) Opaque1Node(C, cont);
  register_node(opq, outer_loop, entry, dom_depth(entry));
  Node *bol       = new (C, 2) Conv2BNode(opq);
  register_node(bol, outer_loop, entry, dom_depth(entry));
--- a/hotspot/src/share/vm/opto/loopopts.cpp
+++ b/hotspot/src/share/vm/opto/loopopts.cpp
@ -2685,7 +2685,7 @@ void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) {
      if( !cle->stride_is_con() ) continue;
      // Hit!  Refactor use to use the post-incremented tripcounter.
      // Compute a post-increment tripcounter.
-      Node *opaq = new (C, 2) Opaque2Node( cle->incr() );
+      Node *opaq = new (C, 2) Opaque2Node( C, cle->incr() );
      register_new_node( opaq, u_ctrl );
      Node *neg_stride = _igvn.intcon(-cle->stride_con());
      set_ctrl(neg_stride, C->root());
--- a/hotspot/src/share/vm/opto/machnode.cpp
+++ b/hotspot/src/share/vm/opto/machnode.cpp
@ -262,14 +262,16 @@ const Node* MachNode::get_base_and_disp(intptr_t &offset, const TypePtr* &adr_ty
    // Now we have collected every part of the ADLC MEMORY_INTER.
    // See if it adds up to a base + offset.
    if (index != NULL) {
-      if (!index->is_Con()) {
-        const TypeNarrowOop* narrowoop = index->bottom_type()->isa_narrowoop();
-        if (narrowoop != NULL) {
-          // Memory references through narrow oops have a
-          // funny base so grab the type from the index.
-          adr_type = narrowoop->make_oopptr();
-          return NULL;
-        }
+      const TypeNarrowOop* narrowoop = index->bottom_type()->isa_narrowoop();
+      if (narrowoop != NULL) { // EncodeN, LoadN, LoadConN, LoadNKlass.
+        // Memory references through narrow oops have a
+        // funny base so grab the type from the index:
+        // [R12 + narrow_oop_reg<<3 + offset]
+        assert(base == NULL, "Memory references through narrow oops have no base");
+        offset = disp;
+        adr_type = narrowoop->make_oopptr()->add_offset(offset);
+        return NULL;
+      } else if (!index->is_Con()) {
        disp = Type::OffsetBot;
      } else if (disp != Type::OffsetBot) {
        const TypeX* ti = index->bottom_type()->isa_intptr_t();
--- a/hotspot/src/share/vm/opto/macro.cpp
+++ b/hotspot/src/share/vm/opto/macro.cpp
@ -1674,7 +1674,14 @@ bool PhaseMacroExpand::expand_macro_nodes() {
        success = eliminate_locking_node(n->as_AbstractLock());
        break;
      default:
-        assert(false, "unknown node type in macro list");
+        if (n->Opcode() == Op_Opaque1 || n->Opcode() == Op_Opaque2) {
+          _igvn.add_users_to_worklist(n);
+          _igvn.hash_delete(n);
+          _igvn.subsume_node(n, n->in(1));
+          success = true;
+        } else {
+          assert(false, "unknown node type in macro list");
+        }
      }
      assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
      progress = progress || success;
--- a/hotspot/src/share/vm/opto/matcher.cpp
+++ b/hotspot/src/share/vm/opto/matcher.cpp
@ -82,6 +82,7 @@ Matcher::Matcher( Node_List &proj_list ) :
  idealreg2debugmask[Op_RegF] = NULL;
  idealreg2debugmask[Op_RegD] = NULL;
  idealreg2debugmask[Op_RegP] = NULL;
+  debug_only(_mem_node = NULL;)   // Ideal memory node consumed by mach node
 }

 //------------------------------warp_incoming_stk_arg------------------------
@ -1153,7 +1154,10 @@ MachNode *Matcher::match_tree( const Node *n ) {

  // StoreNodes require their Memory input to match any LoadNodes
  Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ;
-
+#ifdef ASSERT
+  Node* save_mem_node = _mem_node;
+  _mem_node = n->is_Store() ? (Node*)n : NULL;
+#endif
  // State object for root node of match tree
  // Allocate it on _states_arena - stack allocation can cause stack overflow.
  State *s = new (&_states_arena) State;
@ -1205,6 +1209,7 @@ MachNode *Matcher::match_tree( const Node *n ) {
    }
  }

+  debug_only( _mem_node = save_mem_node; )
  return m;
 }

@ -1445,8 +1450,30 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
  }

  // If a Memory was used, insert a Memory edge
-  if( mem != (Node*)1 )
+  if( mem != (Node*)1 ) {
    mach->ins_req(MemNode::Memory,mem);
+#ifdef ASSERT
+    // Verify adr type after matching memory operation
+    const MachOper* oper = mach->memory_operand();
+    if (oper != NULL && oper != (MachOper*)-1 &&
+        mach->adr_type() != TypeRawPtr::BOTTOM) { // non-direct addressing mode
+      // It has a unique memory operand.  Find corresponding ideal mem node.
+      Node* m = NULL;
+      if (leaf->is_Mem()) {
+        m = leaf;
+      } else {
+        m = _mem_node;
+        assert(m != NULL && m->is_Mem(), "expecting memory node");
+      }
+      if (m->adr_type() != mach->adr_type()) {
+        m->dump();
+        tty->print_cr("mach:");
+        mach->dump(1);
+      }
+      assert(m->adr_type() == mach->adr_type(), "matcher should not change adr type");
+    }
+#endif
+  }

  // If the _leaf is an AddP, insert the base edge
  if( leaf->is_AddP() )
@ -1510,7 +1537,9 @@ void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *m
    assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
    mach->_opnds[1] = s->MachOperGenerator( _reduceOp[catch_op], C );
    Node *mem1 = (Node*)1;
+    debug_only(Node *save_mem_node = _mem_node;)
    mach->add_req( ReduceInst(s, newrule, mem1) );
+    debug_only(_mem_node = save_mem_node;)
  }
  return;
 }
@ -1520,6 +1549,7 @@ uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mac
  if( s->_leaf->is_Load() ) {
    Node *mem2 = s->_leaf->in(MemNode::Memory);
    assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" );
+    debug_only( if( mem == (Node*)1 ) _mem_node = s->_leaf;)
    mem = mem2;
  }
  if( s->_leaf->in(0) != NULL && s->_leaf->req() > 1) {
@ -1563,7 +1593,9 @@ uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mac
        //             --> ReduceInst( newrule )
        mach->_opnds[num_opnds++] = s->MachOperGenerator( _reduceOp[catch_op], C );
        Node *mem1 = (Node*)1;
+        debug_only(Node *save_mem_node = _mem_node;)
        mach->add_req( ReduceInst( newstate, newrule, mem1 ) );
+        debug_only(_mem_node = save_mem_node;)
      }
    }
    assert( mach->_opnds[num_opnds-1], "" );
@ -1594,6 +1626,7 @@ void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) {
  if( s->_leaf->is_Load() ) {
    assert( mem == (Node*)1, "multiple Memories being matched at once?" );
    mem = s->_leaf->in(MemNode::Memory);
+    debug_only(_mem_node = s->_leaf;)
  }
  if( s->_leaf->in(0) && s->_leaf->req() > 1) {
    if( !mach->in(0) )
@ -1618,7 +1651,9 @@ void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) {
      // Reduce the instruction, and add a direct pointer from this
      // machine instruction to the newly reduced one.
      Node *mem1 = (Node*)1;
+      debug_only(Node *save_mem_node = _mem_node;)
      mach->add_req( ReduceInst( kid, newrule, mem1 ) );
+      debug_only(_mem_node = save_mem_node;)
    }
  }
 }
--- a/hotspot/src/share/vm/opto/matcher.hpp
+++ b/hotspot/src/share/vm/opto/matcher.hpp
@ -104,6 +104,8 @@ class Matcher : public PhaseTransform {
 #ifdef ASSERT
  // Make sure only new nodes are reachable from this node
  void verify_new_nodes_only(Node* root);
+
+  Node* _mem_node;   // Ideal memory node consumed by mach node
 #endif

 public:
--- a/hotspot/src/share/vm/opto/memnode.cpp
+++ b/hotspot/src/share/vm/opto/memnode.cpp
@ -253,11 +253,17 @@ bool MemNode::all_controls_dominate(Node* dom, Node* sub) {
  if (dom == NULL || dom->is_top() || sub == NULL || sub->is_top())
    return false; // Conservative answer for dead code

-  // Check 'dom'.
+  // Check 'dom'. Skip Proj and CatchProj nodes.
  dom = dom->find_exact_control(dom);
  if (dom == NULL || dom->is_top())
    return false; // Conservative answer for dead code

+  if (dom == sub) {
+    // For the case when, for example, 'sub' is Initialize and the original
+    // 'dom' is Proj node of the 'sub'.
+    return false;
+  }
+
  if (dom->is_Con() || dom->is_Start() || dom->is_Root() || dom == sub)
    return true;

@ -271,6 +277,7 @@ bool MemNode::all_controls_dominate(Node* dom, Node* sub) {
         sub->is_Region(), "expecting only these nodes");

  // Get control edge of 'sub'.
+  Node* orig_sub = sub;
  sub = sub->find_exact_control(sub->in(0));
  if (sub == NULL || sub->is_top())
    return false; // Conservative answer for dead code
@ -296,14 +303,16 @@ bool MemNode::all_controls_dominate(Node* dom, Node* sub) {

    for (uint next = 0; next < dom_list.size(); next++) {
      Node* n = dom_list.at(next);
+      if (n == orig_sub)
+        return false; // One of dom's inputs dominated by sub.
      if (!n->is_CFG() && n->pinned()) {
        // Check only own control edge for pinned non-control nodes.
        n = n->find_exact_control(n->in(0));
        if (n == NULL || n->is_top())
          return false; // Conservative answer for dead code
        assert(n->is_CFG(), "expecting control");
-      }
-      if (n->is_Con() || n->is_Start() || n->is_Root()) {
+        dom_list.push(n);
+      } else if (n->is_Con() || n->is_Start() || n->is_Root()) {
        only_dominating_controls = true;
      } else if (n->is_CFG()) {
        if (n->dominates(sub, nlist))
--- a/hotspot/src/share/vm/opto/node.cpp
+++ b/hotspot/src/share/vm/opto/node.cpp
@ -1039,6 +1039,9 @@ Node* Node::find_exact_control(Node* ctrl) {
 //--------------------------dominates------------------------------------------
 // Helper function for MemNode::all_controls_dominate().
 // Check if 'this' control node dominates or equal to 'sub' control node.
+// We already know that if any path back to Root or Start reaches 'this',
+// then all paths so, so this is a simple search for one example,
+// not an exhaustive search for a counterexample.
 bool Node::dominates(Node* sub, Node_List &nlist) {
  assert(this->is_CFG(), "expecting control");
  assert(sub != NULL && sub->is_CFG(), "expecting control");
@ -1047,110 +1050,115 @@ bool Node::dominates(Node* sub, Node_List &nlist) {
  int iterations_without_region_limit = DominatorSearchLimit;

  Node* orig_sub = sub;
+  Node* dom      = this;
+  bool  met_dom  = false;
  nlist.clear();
-  bool this_dominates = false;
-  bool result = false; // Conservative answer

-  while (sub != NULL) {        // walk 'sub' up the chain to 'this'
-    if (sub == this) {
+  // Walk 'sub' backward up the chain to 'dom', watching for regions.
+  // After seeing 'dom', continue up to Root or Start.
+  // If we hit a region (backward split point), it may be a loop head.
+  // Keep going through one of the region's inputs.  If we reach the
+  // same region again, go through a different input.  Eventually we
+  // will either exit through the loop head, or give up.
+  // (If we get confused, break out and return a conservative 'false'.)
+  while (sub != NULL) {
+    if (sub->is_top())  break; // Conservative answer for dead code.
+    if (sub == dom) {
      if (nlist.size() == 0) {
        // No Region nodes except loops were visited before and the EntryControl
        // path was taken for loops: it did not walk in a cycle.
-        result = true;
-        break;
-      } else if (this_dominates) {
-        result = false;          // already met before: walk in a cycle
-        break;
+        return true;
+      } else if (met_dom) {
+        break;          // already met before: walk in a cycle
      } else {
        // Region nodes were visited. Continue walk up to Start or Root
        // to make sure that it did not walk in a cycle.
-        this_dominates = true; // first time meet
+        met_dom = true; // first time meet
        iterations_without_region_limit = DominatorSearchLimit; // Reset
     }
    }
    if (sub->is_Start() || sub->is_Root()) {
-      result = this_dominates;
-      break;
+      // Success if we met 'dom' along a path to Start or Root.
+      // We assume there are no alternative paths that avoid 'dom'.
+      // (This assumption is up to the caller to ensure!)
+      return met_dom;
    }
-    Node* up = sub->find_exact_control(sub->in(0));
-    if (up == NULL || up->is_top()) {
-      result = false; // Conservative answer for dead code
-      break;
-    }
-    if (sub == up && (sub->is_Loop() || sub->is_Region() && sub->req() != 3)) {
-      // Take first valid path on the way up to 'this'.
+    Node* up = sub->in(0);
+    // Normalize simple pass-through regions and projections:
+    up = sub->find_exact_control(up);
+    // If sub == up, we found a self-loop.  Try to push past it.
+    if (sub == up && sub->is_Loop()) {
+      // Take loop entry path on the way up to 'dom'.
      up = sub->in(1); // in(LoopNode::EntryControl);
+    } else if (sub == up && sub->is_Region() && sub->req() != 3) {
+      // Always take in(1) path on the way up to 'dom' for clone regions
+      // (with only one input) or regions which merge > 2 paths
+      // (usually used to merge fast/slow paths).
+      up = sub->in(1);
    } else if (sub == up && sub->is_Region()) {
-      assert(sub->req() == 3, "sanity");
+      // Try both paths for Regions with 2 input paths (it may be a loop head).
+      // It could give conservative 'false' answer without information
+      // which region's input is the entry path.
      iterations_without_region_limit = DominatorSearchLimit; // Reset

-      // Try both paths for such Regions.
-      // It is not accurate without regions dominating information.
-      // With such information the other path should be checked for
-      // the most dominating Region which was visited before.
      bool region_was_visited_before = false;
-      uint i = 1;
-      uint size = nlist.size();
-      if (size == 0) {
-        // No such Region nodes were visited before.
-        // Take first valid path on the way up to 'this'.
-      } else {
-        // Was this Region node visited before?
-        intptr_t ni;
-        int j = size - 1;
-        for (; j >= 0; j--) {
-          ni = (intptr_t)nlist.at(j);
-          if ((Node*)(ni & ~1) == sub) {
-            if ((ni & 1) != 0) {
-              break; // Visited 2 paths. Give up.
-            } else {
-              // The Region node was visited before only once.
-              nlist.remove(j);
-              region_was_visited_before = true;
-              for (; i < sub->req(); i++) {
-                Node* in = sub->in(i);
-                if (in != NULL && !in->is_top() && in != sub) {
-                  break;
-                }
-              }
-              i++; // Take other path.
-              break;
-            }
+      // Was this Region node visited before?
+      // If so, we have reached it because we accidentally took a
+      // loop-back edge from 'sub' back into the body of the loop,
+      // and worked our way up again to the loop header 'sub'.
+      // So, take the first unexplored path on the way up to 'dom'.
+      for (int j = nlist.size() - 1; j >= 0; j--) {
+        intptr_t ni = (intptr_t)nlist.at(j);
+        Node* visited = (Node*)(ni & ~1);
+        bool  visited_twice_already = ((ni & 1) != 0);
+        if (visited == sub) {
+          if (visited_twice_already) {
+            // Visited 2 paths, but still stuck in loop body.  Give up.
+            return false;
          }
-        }
-        if (j >= 0 && (ni & 1) != 0) {
-          result = false; // Visited 2 paths. Give up.
+          // The Region node was visited before only once.
+          // (We will repush with the low bit set, below.)
+          nlist.remove(j);
+          // We will find a new edge and re-insert.
+          region_was_visited_before = true;
          break;
        }
-        // The Region node was not visited before.
      }
-      for (; i < sub->req(); i++) {
+
+      // Find an incoming edge which has not been seen yet; walk through it.
+      assert(up == sub, "");
+      uint skip = region_was_visited_before ? 1 : 0;
+      for (uint i = 1; i < sub->req(); i++) {
        Node* in = sub->in(i);
        if (in != NULL && !in->is_top() && in != sub) {
-          break;
-        }
-      }
-      if (i < sub->req()) {
-        up = sub->in(i);
-        if (region_was_visited_before && sub != up) {
-          // Set 0 bit to indicate that both paths were taken.
-          nlist.push((Node*)((intptr_t)sub + 1));
-        } else {
-          nlist.push(sub);
+          if (skip == 0) {
+            up = in;
+            break;
+          }
+          --skip;               // skip this nontrivial input
        }
      }
+
+      // Set 0 bit to indicate that both paths were taken.
+      nlist.push((Node*)((intptr_t)sub + (region_was_visited_before ? 1 : 0)));
    }
-    if (sub == up) {
-      result = false;    // some kind of tight cycle
-      break;
+
+    if (up == sub) {
+      break;    // some kind of tight cycle
+    }
+    if (up == orig_sub && met_dom) {
+      // returned back after visiting 'dom'
+      break;    // some kind of cycle
    }
    if (--iterations_without_region_limit < 0) {
-      result = false;    // dead cycle
-      break;
+      break;    // dead cycle
    }
    sub = up;
  }
-  return result;
+
+  // Did not meet Root or Start node in pred. chain.
+  // Conservative answer for dead code.
+  return false;
 }

 //------------------------------remove_dead_region-----------------------------
--- a/hotspot/src/share/vm/opto/subnode.cpp
+++ b/hotspot/src/share/vm/opto/subnode.cpp
@ -45,10 +45,13 @@ Node *SubNode::Identity( PhaseTransform *phase ) {
    return in(2)->in(2);
  }

-  // Convert "(X+Y) - Y" into X
+  // Convert "(X+Y) - Y" into X and "(X+Y) - X" into Y
  if( in(1)->Opcode() == Op_AddI ) {
    if( phase->eqv(in(1)->in(2),in(2)) )
      return in(1)->in(1);
+    if (phase->eqv(in(1)->in(1),in(2)))
+      return in(1)->in(2);
+
    // Also catch: "(X + Opaque2(Y)) - Y".  In this case, 'Y' is a loop-varying
    // trip counter and X is likely to be loop-invariant (that's how O2 Nodes
    // are originally used, although the optimizer sometimes jiggers things).
--- a/hotspot/src/share/vm/runtime/arguments.cpp
+++ b/hotspot/src/share/vm/runtime/arguments.cpp
@ -1174,7 +1174,7 @@ void Arguments::set_ergonomics_flags() {
  // field offset to determine free list chunk markers.
  // Check that UseCompressedOops can be set with the max heap size allocated
  // by ergonomics.
-  if (!UseConcMarkSweepGC && MaxHeapSize <= max_heap_for_compressed_oops()) {
+  if (MaxHeapSize <= max_heap_for_compressed_oops()) {
    if (FLAG_IS_DEFAULT(UseCompressedOops)) {
      // Leave compressed oops off by default. Uncomment
      // the following line to return it to default status.
--- a/hotspot/src/share/vm/runtime/globals.hpp
+++ b/hotspot/src/share/vm/runtime/globals.hpp
@ -2246,6 +2246,9 @@ class CommandLineFlags {
  product(bool, AggressiveOpts, false,                                      \
          "Enable aggressive optimizations - see arguments.cpp")            \
                                                                            \
+  product(bool, UseStringCache, false,                                      \
+          "Enable String cache capabilities on String.java")                \
+                                                                            \
  /* statistics */                                                          \
  develop(bool, UseVTune, false,                                            \
          "enable support for Intel's VTune profiler")                      \
--- a/hotspot/src/share/vm/runtime/thread.cpp
+++ b/hotspot/src/share/vm/runtime/thread.cpp
@ -2926,21 +2926,42 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
    }

    if (AggressiveOpts) {
-      // Forcibly initialize java/util/HashMap and mutate the private
-      // static final "frontCacheEnabled" field before we start creating instances
+      {
+        // Forcibly initialize java/util/HashMap and mutate the private
+        // static final "frontCacheEnabled" field before we start creating instances
 #ifdef ASSERT
-      klassOop tmp_k = SystemDictionary::find(vmSymbolHandles::java_util_HashMap(), Handle(), Handle(), CHECK_0);
-      assert(tmp_k == NULL, "java/util/HashMap should not be loaded yet");
+        klassOop tmp_k = SystemDictionary::find(vmSymbolHandles::java_util_HashMap(), Handle(), Handle(), CHECK_0);
+        assert(tmp_k == NULL, "java/util/HashMap should not be loaded yet");
 #endif
-      klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_util_HashMap(), Handle(), Handle(), CHECK_0);
-      KlassHandle k = KlassHandle(THREAD, k_o);
-      guarantee(k.not_null(), "Must find java/util/HashMap");
-      instanceKlassHandle ik = instanceKlassHandle(THREAD, k());
-      ik->initialize(CHECK_0);
-      fieldDescriptor fd;
-      // Possible we might not find this field; if so, don't break
-      if (ik->find_local_field(vmSymbols::frontCacheEnabled_name(), vmSymbols::bool_signature(), &fd)) {
-        k()->bool_field_put(fd.offset(), true);
+        klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_util_HashMap(), Handle(), Handle(), CHECK_0);
+        KlassHandle k = KlassHandle(THREAD, k_o);
+        guarantee(k.not_null(), "Must find java/util/HashMap");
+        instanceKlassHandle ik = instanceKlassHandle(THREAD, k());
+        ik->initialize(CHECK_0);
+        fieldDescriptor fd;
+        // Possible we might not find this field; if so, don't break
+        if (ik->find_local_field(vmSymbols::frontCacheEnabled_name(), vmSymbols::bool_signature(), &fd)) {
+          k()->bool_field_put(fd.offset(), true);
+        }
+      }
+
+      if (UseStringCache) {
+        // Forcibly initialize java/lang/String and mutate the private
+        // static final "stringCacheEnabled" field before we start creating instances
+#ifdef ASSERT
+        klassOop tmp_k = SystemDictionary::find(vmSymbolHandles::java_lang_String(), Handle(), Handle(), CHECK_0);
+        assert(tmp_k == NULL, "java/lang/String should not be loaded yet");
+#endif
+        klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_lang_String(), Handle(), Handle(), CHECK_0);
+        KlassHandle k = KlassHandle(THREAD, k_o);
+        guarantee(k.not_null(), "Must find java/lang/String");
+        instanceKlassHandle ik = instanceKlassHandle(THREAD, k());
+        ik->initialize(CHECK_0);
+        fieldDescriptor fd;
+        // Possible we might not find this field; if so, don't break
+        if (ik->find_local_field(vmSymbols::stringCacheEnabled_name(), vmSymbols::bool_signature(), &fd)) {
+          k()->bool_field_put(fd.offset(), true);
+        }
      }
    }

--- a/hotspot/src/share/vm/runtime/vmStructs.cpp
+++ b/hotspot/src/share/vm/runtime/vmStructs.cpp
@ -1695,7 +1695,12 @@ static inline uint64_t cast_uint64_t(size_t x)
  declare_constant(markOopDesc::no_hash)                                  \
  declare_constant(markOopDesc::no_hash_in_place)                         \
  declare_constant(markOopDesc::no_lock_in_place)                         \
-  declare_constant(markOopDesc::max_age)
+  declare_constant(markOopDesc::max_age)                                  \
+                                                                          \
+  /* Constants in markOop used by CMS. */                                 \
+  declare_constant(markOopDesc::cms_shift)                                \
+  declare_constant(markOopDesc::cms_mask)                                 \
+  declare_constant(markOopDesc::size_shift)                               \

  /* NOTE that we do not use the last_entry() macro here; it is used   */
  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and */
@ -1959,6 +1964,7 @@ VMStructEntry VMStructs::localHotSpotVMStructs[] = {
                        GENERATE_STATIC_VM_STRUCT_ENTRY)

  VM_STRUCTS_CMS(GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
+                 GENERATE_NONSTATIC_VM_STRUCT_ENTRY, \
                 GENERATE_STATIC_VM_STRUCT_ENTRY)
 #endif // SERIALGC

@ -2100,6 +2106,7 @@ VMStructs::init() {
             CHECK_STATIC_VM_STRUCT_ENTRY);

  VM_STRUCTS_CMS(CHECK_NONSTATIC_VM_STRUCT_ENTRY,
+             CHECK_VOLATILE_NONSTATIC_VM_STRUCT_ENTRY,
             CHECK_STATIC_VM_STRUCT_ENTRY);
 #endif // SERIALGC

@ -2204,6 +2211,7 @@ VMStructs::init() {
  debug_only(VM_STRUCTS_PARALLELGC(ENSURE_FIELD_TYPE_PRESENT, \
                                   ENSURE_FIELD_TYPE_PRESENT));
  debug_only(VM_STRUCTS_CMS(ENSURE_FIELD_TYPE_PRESENT, \
+                            ENSURE_FIELD_TYPE_PRESENT, \
                            ENSURE_FIELD_TYPE_PRESENT));
 #endif // SERIALGC
  debug_only(VM_STRUCTS_CPU(ENSURE_FIELD_TYPE_PRESENT, \