Merge

2025-08-27 14:54:52 +02:00 · 2008-09-17 16:49:18 +04:00 · 2008-09-17 16:49:18 +04:00 · b6aabd98fc
commit b6aabd98fc
parent 2032a83d3f 6358686351
214 changed files with 36267 additions and 1310 deletions
--- a/hotspot/make/linux/makefiles/top.make
+++ b/hotspot/make/linux/makefiles/top.make
@ -64,6 +64,7 @@ Include_DBs/GC          = $(VM)/includeDB_gc \
                          $(VM)/gc_implementation/includeDB_gc_parallelScavenge \
                          $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \
                          $(VM)/gc_implementation/includeDB_gc_parNew \
+                          $(VM)/gc_implementation/includeDB_gc_g1     \
                          $(VM)/gc_implementation/includeDB_gc_serial \
                          $(VM)/gc_implementation/includeDB_gc_shared

--- a/hotspot/make/solaris/makefiles/top.make
+++ b/hotspot/make/solaris/makefiles/top.make
@ -54,6 +54,7 @@ Include_DBs/GC     = $(VM)/includeDB_gc \
                     $(VM)/gc_implementation/includeDB_gc_parallelScavenge \
                     $(VM)/gc_implementation/includeDB_gc_concurrentMarkSweep \
                     $(VM)/gc_implementation/includeDB_gc_parNew \
+                     $(VM)/gc_implementation/includeDB_gc_g1 \
                     $(VM)/gc_implementation/includeDB_gc_serial \
                     $(VM)/gc_implementation/includeDB_gc_shared

--- a/hotspot/make/windows/makefiles/generated.make
+++ b/hotspot/make/windows/makefiles/generated.make
@ -50,7 +50,8 @@ IncludeDBs_gc= $(WorkSpace)/src/share/vm/includeDB_gc_parallel \
           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parallelScavenge \
           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_shared \
           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_parNew \
-           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
+           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep \
+           $(WorkSpace)/src/share/vm/gc_implementation/includeDB_gc_g1

 IncludeDBs_core=$(IncludeDBs_base) $(IncludeDBs_gc) \
                $(WorkSpace)/src/share/vm/includeDB_features
--- a/hotspot/make/windows/makefiles/makedeps.make
+++ b/hotspot/make/windows/makefiles/makedeps.make
@ -64,6 +64,7 @@ MakeDepsIncludesPRIVATE=\
        -relativeInclude src\share\vm\gc_implementation\shared \
        -relativeInclude src\share\vm\gc_implementation\parNew \
        -relativeInclude src\share\vm\gc_implementation\concurrentMarkSweep \
+        -relativeInclude src\share\vm\gc_implementation\g1 \
        -relativeInclude src\share\vm\gc_interface \
        -relativeInclude src\share\vm\asm \
        -relativeInclude src\share\vm\memory \
@ -115,6 +116,7 @@ MakeDepsIDEOptions=\
        -additionalFile includeDB_gc_parallel \
        -additionalFile includeDB_gc_parallelScavenge \
        -additionalFile includeDB_gc_concurrentMarkSweep \
+        -additionalFile includeDB_gc_g1 \
        -additionalFile includeDB_gc_parNew \
        -additionalFile includeDB_gc_shared \
        -additionalFile includeDB_gc_serial \
--- a/hotspot/make/windows/makefiles/vm.make
+++ b/hotspot/make/windows/makefiles/vm.make
@ -117,6 +117,7 @@ CPP_INCLUDE_DIRS=\
  /I "$(WorkSpace)\src\share\vm\gc_implementation\shared"\
  /I "$(WorkSpace)\src\share\vm\gc_implementation\parNew"\
  /I "$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep"\
+  /I "$(WorkSpace)\src\share\vm\gc_implementation\g1"\
  /I "$(WorkSpace)\src\share\vm\gc_interface"\
  /I "$(WorkSpace)\src\share\vm\asm"         \
  /I "$(WorkSpace)\src\share\vm\memory"      \
@ -146,6 +147,7 @@ VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parallelScavenge
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/shared
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/parNew
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/concurrentMarkSweep
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_implementation/g1
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc_interface
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory
@ -222,6 +224,9 @@ bytecodeInterpreterWithChecks.obj: ..\generated\jvmtifiles\bytecodeInterpreterWi
 {$(WorkSpace)\src\share\vm\gc_implementation\concurrentMarkSweep}.cpp.obj::
        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<

+{$(WorkSpace)\src\share\vm\gc_implementation\g1}.cpp.obj::
+        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<
+
 {$(WorkSpace)\src\share\vm\gc_interface}.cpp.obj::
        $(CPP) $(CPP_FLAGS) $(CPP_USE_PCH) /c $<

--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp
@ -130,6 +130,20 @@ int AbstractAssembler::code_fill_byte() {
  return 0x00;                  // illegal instruction 0x00000000
 }

+Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) {
+  switch (in) {
+  case rc_z:   return equal;
+  case rc_lez: return lessEqual;
+  case rc_lz:  return less;
+  case rc_nz:  return notEqual;
+  case rc_gz:  return greater;
+  case rc_gez: return greaterEqual;
+  default:
+    ShouldNotReachHere();
+  }
+  return equal;
+}
+
 // Generate a bunch 'o stuff (including v9's
 #ifndef PRODUCT
 void Assembler::test_v9() {
@ -1213,31 +1227,19 @@ void MacroAssembler::set_vm_result(Register oop_result) {
 }


-void MacroAssembler::store_check(Register tmp, Register obj) {
-  // Use two shifts to clear out those low order two bits! (Cannot opt. into 1.)
-
-  /* $$$ This stuff needs to go into one of the BarrierSet generator
-     functions.  (The particular barrier sets will have to be friends of
-     MacroAssembler, I guess.) */
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+void MacroAssembler::card_table_write(jbyte* byte_map_base,
+                                      Register tmp, Register obj) {
 #ifdef _LP64
  srlx(obj, CardTableModRefBS::card_shift, obj);
 #else
  srl(obj, CardTableModRefBS::card_shift, obj);
 #endif
  assert( tmp != obj, "need separate temp reg");
-  Address rs(tmp, (address)ct->byte_map_base);
+  Address rs(tmp, (address)byte_map_base);
  load_address(rs);
  stb(G0, rs.base(), obj);
 }

-void MacroAssembler::store_check(Register tmp, Register obj, Register offset) {
-  store_check(tmp, obj);
-}
-
 // %%% Note:  The following six instructions have been moved,
 //            unchanged, from assembler_sparc.inline.hpp.
 //            They will be refactored at a later date.
@ -1663,11 +1665,21 @@ void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * fil

  if (reg == G0)  return;       // always NULL, which is always an oop

-  char buffer[16];
+  char buffer[64];
+#ifdef COMPILER1
+  if (CommentedAssembly) {
+    snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset());
+    block_comment(buffer);
+  }
+#endif
+
+  int len = strlen(file) + strlen(msg) + 1 + 4;
  sprintf(buffer, "%d", line);
-  int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer);
+  len += strlen(buffer);
+  sprintf(buffer, " at offset %d ", offset());
+  len += strlen(buffer);
  char * real_msg = new char[len];
-  sprintf(real_msg, "%s (%s:%d)", msg, file, line);
+  sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line);

  // Call indirectly to solve generation ordering problem
  Address a(O7, (address)StubRoutines::verify_oop_subroutine_entry_address());
@ -2059,6 +2071,27 @@ void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) {
 #endif
 }

+void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+                                     Register s1, address d,
+                                     relocInfo::relocType rt ) {
+  if (VM_Version::v9_instructions_work()) {
+    bpr(rc, a, p, s1, d, rt);
+  } else {
+    tst(s1);
+    br(reg_cond_to_cc_cond(rc), a, p, d, rt);
+  }
+}
+
+void MacroAssembler::br_on_reg_cond( RCondition rc, bool a, Predict p,
+                                     Register s1, Label& L ) {
+  if (VM_Version::v9_instructions_work()) {
+    bpr(rc, a, p, s1, L);
+  } else {
+    tst(s1);
+    br(reg_cond_to_cc_cond(rc), a, p, L);
+  }
+}
+

 // instruction sequences factored across compiler & interpreter

@ -3241,68 +3274,74 @@ void MacroAssembler::eden_allocate(
  assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size");
  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");

-  // get eden boundaries
-  // note: we need both top & top_addr!
-  const Register top_addr = t1;
-  const Register end      = t2;
-
-  CollectedHeap* ch = Universe::heap();
-  set((intx)ch->top_addr(), top_addr);
-  intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
-  ld_ptr(top_addr, delta, end);
-  ld_ptr(top_addr, 0, obj);
-
-  // try to allocate
-  Label retry;
-  bind(retry);
-#ifdef ASSERT
-  // make sure eden top is properly aligned
-  {
-    Label L;
-    btst(MinObjAlignmentInBytesMask, obj);
-    br(Assembler::zero, false, Assembler::pt, L);
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    // No allocation in the shared eden.
+    br(Assembler::always, false, Assembler::pt, slow_case);
    delayed()->nop();
-    stop("eden top is not properly aligned");
-    bind(L);
-  }
-#endif // ASSERT
-  const Register free = end;
-  sub(end, obj, free);                                   // compute amount of free space
-  if (var_size_in_bytes->is_valid()) {
-    // size is unknown at compile time
-    cmp(free, var_size_in_bytes);
-    br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
-    delayed()->add(obj, var_size_in_bytes, end);
  } else {
-    // size is known at compile time
-    cmp(free, con_size_in_bytes);
-    br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
-    delayed()->add(obj, con_size_in_bytes, end);
-  }
-  // Compare obj with the value at top_addr; if still equal, swap the value of
-  // end with the value at top_addr. If not equal, read the value at top_addr
-  // into end.
-  casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
-  // if someone beat us on the allocation, try again, otherwise continue
-  cmp(obj, end);
-  brx(Assembler::notEqual, false, Assembler::pn, retry);
-  delayed()->mov(end, obj);                              // nop if successfull since obj == end
+    // get eden boundaries
+    // note: we need both top & top_addr!
+    const Register top_addr = t1;
+    const Register end      = t2;
+
+    CollectedHeap* ch = Universe::heap();
+    set((intx)ch->top_addr(), top_addr);
+    intx delta = (intx)ch->end_addr() - (intx)ch->top_addr();
+    ld_ptr(top_addr, delta, end);
+    ld_ptr(top_addr, 0, obj);
+
+    // try to allocate
+    Label retry;
+    bind(retry);
+#ifdef ASSERT
+    // make sure eden top is properly aligned
+    {
+      Label L;
+      btst(MinObjAlignmentInBytesMask, obj);
+      br(Assembler::zero, false, Assembler::pt, L);
+      delayed()->nop();
+      stop("eden top is not properly aligned");
+      bind(L);
+    }
+#endif // ASSERT
+    const Register free = end;
+    sub(end, obj, free);                                   // compute amount of free space
+    if (var_size_in_bytes->is_valid()) {
+      // size is unknown at compile time
+      cmp(free, var_size_in_bytes);
+      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+      delayed()->add(obj, var_size_in_bytes, end);
+    } else {
+      // size is known at compile time
+      cmp(free, con_size_in_bytes);
+      br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case
+      delayed()->add(obj, con_size_in_bytes, end);
+    }
+    // Compare obj with the value at top_addr; if still equal, swap the value of
+    // end with the value at top_addr. If not equal, read the value at top_addr
+    // into end.
+    casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr());
+    // if someone beat us on the allocation, try again, otherwise continue
+    cmp(obj, end);
+    brx(Assembler::notEqual, false, Assembler::pn, retry);
+    delayed()->mov(end, obj);                              // nop if successfull since obj == end

 #ifdef ASSERT
-  // make sure eden top is properly aligned
-  {
-    Label L;
-    const Register top_addr = t1;
+    // make sure eden top is properly aligned
+    {
+      Label L;
+      const Register top_addr = t1;

-    set((intx)ch->top_addr(), top_addr);
-    ld_ptr(top_addr, 0, top_addr);
-    btst(MinObjAlignmentInBytesMask, top_addr);
-    br(Assembler::zero, false, Assembler::pt, L);
-    delayed()->nop();
-    stop("eden top is not properly aligned");
-    bind(L);
-  }
+      set((intx)ch->top_addr(), top_addr);
+      ld_ptr(top_addr, 0, top_addr);
+      btst(MinObjAlignmentInBytesMask, top_addr);
+      br(Assembler::zero, false, Assembler::pt, L);
+      delayed()->nop();
+      stop("eden top is not properly aligned");
+      bind(L);
+    }
 #endif // ASSERT
+  }
 }


@ -3554,6 +3593,468 @@ void MacroAssembler::bang_stack_size(Register Rsize, Register Rtsp,
  }
 }

+///////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+static uint num_stores = 0;
+static uint num_null_pre_stores = 0;
+
+static void count_null_pre_vals(void* pre_val) {
+  num_stores++;
+  if (pre_val == NULL) num_null_pre_stores++;
+  if ((num_stores % 1000000) == 0) {
+    tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
+                  num_stores, num_null_pre_stores,
+                  100.0*(float)num_null_pre_stores/(float)num_stores);
+  }
+}
+
+static address satb_log_enqueue_with_frame = 0;
+static u_char* satb_log_enqueue_with_frame_end = 0;
+
+static address satb_log_enqueue_frameless = 0;
+static u_char* satb_log_enqueue_frameless_end = 0;
+
+static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
+
+// The calls to this don't work.  We'd need to do a fair amount of work to
+// make it work.
+static void check_index(int ind) {
+  assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
+         "Invariants.")
+}
+
+static void generate_satb_log_enqueue(bool with_frame) {
+  BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
+  CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+  MacroAssembler masm(&buf);
+  address start = masm.pc();
+  Register pre_val;
+
+  Label refill, restart;
+  if (with_frame) {
+    masm.save_frame(0);
+    pre_val = I0;  // Was O0 before the save.
+  } else {
+    pre_val = O0;
+  }
+  int satb_q_index_byte_offset =
+    in_bytes(JavaThread::satb_mark_queue_offset() +
+             PtrQueue::byte_offset_of_index());
+  int satb_q_buf_byte_offset =
+    in_bytes(JavaThread::satb_mark_queue_offset() +
+             PtrQueue::byte_offset_of_buf());
+  assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) &&
+         in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t),
+         "check sizes in assembly below");
+
+  masm.bind(restart);
+  masm.ld_ptr(G2_thread, satb_q_index_byte_offset, L0);
+
+  masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn, L0, refill);
+  // If the branch is taken, no harm in executing this in the delay slot.
+  masm.delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, L1);
+  masm.sub(L0, oopSize, L0);
+
+  masm.st_ptr(pre_val, L1, L0);  // [_buf + index] := I0
+  if (!with_frame) {
+    // Use return-from-leaf
+    masm.retl();
+    masm.delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  } else {
+    // Not delayed.
+    masm.st_ptr(L0, G2_thread, satb_q_index_byte_offset);
+  }
+  if (with_frame) {
+    masm.ret();
+    masm.delayed()->restore();
+  }
+  masm.bind(refill);
+
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &SATBMarkQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  masm.mov(G1_scratch, L0);
+  masm.mov(G3_scratch, L1);
+  masm.mov(G4, L2);
+  // We need the value of O0 above (for the write into the buffer), so we
+  // save and restore it.
+  masm.mov(O0, L3);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  masm.mov(O7, L4);
+  masm.call_VM_leaf(L5, handle_zero, G2_thread);
+  masm.mov(L0, G1_scratch);
+  masm.mov(L1, G3_scratch);
+  masm.mov(L2, G4);
+  masm.mov(L3, O0);
+  masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  masm.delayed()->mov(L4, O7);
+
+  if (with_frame) {
+    satb_log_enqueue_with_frame = start;
+    satb_log_enqueue_with_frame_end = masm.pc();
+  } else {
+    satb_log_enqueue_frameless = start;
+    satb_log_enqueue_frameless_end = masm.pc();
+  }
+}
+
+static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) {
+  if (with_frame) {
+    if (satb_log_enqueue_with_frame == 0) {
+      generate_satb_log_enqueue(with_frame);
+      assert(satb_log_enqueue_with_frame != 0, "postcondition.");
+      if (G1SATBPrintStubs) {
+        tty->print_cr("Generated with-frame satb enqueue:");
+        Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
+                             satb_log_enqueue_with_frame_end,
+                             tty);
+      }
+    }
+  } else {
+    if (satb_log_enqueue_frameless == 0) {
+      generate_satb_log_enqueue(with_frame);
+      assert(satb_log_enqueue_frameless != 0, "postcondition.");
+      if (G1SATBPrintStubs) {
+        tty->print_cr("Generated frameless satb enqueue:");
+        Disassembler::decode((u_char*)satb_log_enqueue_frameless,
+                             satb_log_enqueue_frameless_end,
+                             tty);
+      }
+    }
+  }
+}
+
+void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
+  assert(offset == 0 || index == noreg, "choose one");
+
+  if (G1DisablePreBarrier) return;
+  // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
+  Label filtered;
+  // satb_log_barrier_work0(tmp, filtered);
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    ld(G2,
+       in_bytes(JavaThread::satb_mark_queue_offset() +
+                PtrQueue::byte_offset_of_active()),
+       tmp);
+  } else {
+    guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+              "Assumption");
+    ldsb(G2,
+         in_bytes(JavaThread::satb_mark_queue_offset() +
+                  PtrQueue::byte_offset_of_active()),
+         tmp);
+  }
+  // Check on whether to annul.
+  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+  delayed() -> nop();
+
+  // satb_log_barrier_work1(tmp, offset);
+  if (index == noreg) {
+    if (Assembler::is_simm13(offset)) {
+      ld_ptr(obj, offset, tmp);
+    } else {
+      set(offset, tmp);
+      ld_ptr(obj, tmp, tmp);
+    }
+  } else {
+    ld_ptr(obj, index, tmp);
+  }
+
+  // satb_log_barrier_work2(obj, tmp, offset);
+
+  // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
+
+  const Register pre_val = tmp;
+
+  if (G1SATBBarrierPrintNullPreVals) {
+    save_frame(0);
+    mov(pre_val, O0);
+    // Save G-regs that target may use.
+    mov(G1, L1);
+    mov(G2, L2);
+    mov(G3, L3);
+    mov(G4, L4);
+    mov(G5, L5);
+    call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
+    delayed()->nop();
+    // Restore G-regs that target may have used.
+    mov(L1, G1);
+    mov(L2, G2);
+    mov(L3, G3);
+    mov(L4, G4);
+    mov(L5, G5);
+    restore(G0, G0, G0);
+  }
+
+  // Check on whether to annul.
+  br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
+  delayed() -> nop();
+
+  // OK, it's not filtered, so we'll need to call enqueue.  In the normal
+  // case, pre_val will be a scratch G-reg, but there's some cases in which
+  // it's an O-reg.  In the first case, do a normal call.  In the latter,
+  // do a save here and call the frameless version.
+
+  guarantee(pre_val->is_global() || pre_val->is_out(),
+            "Or we need to think harder.");
+  if (pre_val->is_global() && !preserve_o_regs) {
+    generate_satb_log_enqueue_if_necessary(true); // with frame.
+    call(satb_log_enqueue_with_frame);
+    delayed()->mov(pre_val, O0);
+  } else {
+    generate_satb_log_enqueue_if_necessary(false); // with frameless.
+    save_frame(0);
+    call(satb_log_enqueue_frameless);
+    delayed()->mov(pre_val->after_save(), O0);
+    restore();
+  }
+
+  bind(filtered);
+}
+
+static jint num_ct_writes = 0;
+static jint num_ct_writes_filtered_in_hr = 0;
+static jint num_ct_writes_filtered_null = 0;
+static jint num_ct_writes_filtered_pop = 0;
+static G1CollectedHeap* g1 = NULL;
+
+static Thread* count_ct_writes(void* filter_val, void* new_val) {
+  Atomic::inc(&num_ct_writes);
+  if (filter_val == NULL) {
+    Atomic::inc(&num_ct_writes_filtered_in_hr);
+  } else if (new_val == NULL) {
+    Atomic::inc(&num_ct_writes_filtered_null);
+  } else {
+    if (g1 == NULL) {
+      g1 = G1CollectedHeap::heap();
+    }
+    if ((HeapWord*)new_val < g1->popular_object_boundary()) {
+      Atomic::inc(&num_ct_writes_filtered_pop);
+    }
+  }
+  if ((num_ct_writes % 1000000) == 0) {
+    jint num_ct_writes_filtered =
+      num_ct_writes_filtered_in_hr +
+      num_ct_writes_filtered_null +
+      num_ct_writes_filtered_pop;
+
+    tty->print_cr("%d potential CT writes: %5.2f%% filtered\n"
+                  "   (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).",
+                  num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered/(float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_in_hr/
+                  (float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_null/
+                  (float)num_ct_writes,
+                  100.0*(float)num_ct_writes_filtered_pop/
+                  (float)num_ct_writes);
+  }
+  return Thread::current();
+}
+
+static address dirty_card_log_enqueue = 0;
+static u_char* dirty_card_log_enqueue_end = 0;
+
+// This gets to assume that o0 contains the object address.
+static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) {
+  BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2);
+  CodeBuffer buf(bb->instructions_begin(), bb->instructions_size());
+  MacroAssembler masm(&buf);
+  address start = masm.pc();
+
+  Label not_already_dirty, restart, refill;
+
+#ifdef _LP64
+  masm.srlx(O0, CardTableModRefBS::card_shift, O0);
+#else
+  masm.srl(O0, CardTableModRefBS::card_shift, O0);
+#endif
+  Address rs(O1, (address)byte_map_base);
+  masm.load_address(rs); // O1 := <card table base>
+  masm.ldub(O0, O1, O2); // O2 := [O0 + O1]
+
+  masm.br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+                      O2, not_already_dirty);
+  // Get O1 + O2 into a reg by itself -- useful in the take-the-branch
+  // case, harmless if not.
+  masm.delayed()->add(O0, O1, O3);
+
+  // We didn't take the branch, so we're already dirty: return.
+  // Use return-from-leaf
+  masm.retl();
+  masm.delayed()->nop();
+
+  // Not dirty.
+  masm.bind(not_already_dirty);
+  // First, dirty it.
+  masm.stb(G0, O3, G0);  // [cardPtr] := 0  (i.e., dirty).
+  int dirty_card_q_index_byte_offset =
+    in_bytes(JavaThread::dirty_card_queue_offset() +
+             PtrQueue::byte_offset_of_index());
+  int dirty_card_q_buf_byte_offset =
+    in_bytes(JavaThread::dirty_card_queue_offset() +
+             PtrQueue::byte_offset_of_buf());
+  masm.bind(restart);
+  masm.ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0);
+
+  masm.br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+                      L0, refill);
+  // If the branch is taken, no harm in executing this in the delay slot.
+  masm.delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1);
+  masm.sub(L0, oopSize, L0);
+
+  masm.st_ptr(O3, L1, L0);  // [_buf + index] := I0
+  // Use return-from-leaf
+  masm.retl();
+  masm.delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset);
+
+  masm.bind(refill);
+  address handle_zero =
+    CAST_FROM_FN_PTR(address,
+                     &DirtyCardQueueSet::handle_zero_index_for_thread);
+  // This should be rare enough that we can afford to save all the
+  // scratch registers that the calling context might be using.
+  masm.mov(G1_scratch, L3);
+  masm.mov(G3_scratch, L5);
+  // We need the value of O3 above (for the write into the buffer), so we
+  // save and restore it.
+  masm.mov(O3, L6);
+  // Since the call will overwrite O7, we save and restore that, as well.
+  masm.mov(O7, L4);
+
+  masm.call_VM_leaf(L7_thread_cache, handle_zero, G2_thread);
+  masm.mov(L3, G1_scratch);
+  masm.mov(L5, G3_scratch);
+  masm.mov(L6, O3);
+  masm.br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+  masm.delayed()->mov(L4, O7);
+
+  dirty_card_log_enqueue = start;
+  dirty_card_log_enqueue_end = masm.pc();
+  // XXX Should have a guarantee here about not going off the end!
+  // Does it already do so?  Do an experiment...
+}
+
+static inline void
+generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) {
+  if (dirty_card_log_enqueue == 0) {
+    generate_dirty_card_log_enqueue(byte_map_base);
+    assert(dirty_card_log_enqueue != 0, "postcondition.");
+    if (G1SATBPrintStubs) {
+      tty->print_cr("Generated dirty_card enqueue:");
+      Disassembler::decode((u_char*)dirty_card_log_enqueue,
+                           dirty_card_log_enqueue_end,
+                           tty);
+    }
+  }
+}
+
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+
+  Label filtered;
+  MacroAssembler* post_filter_masm = this;
+
+  if (new_val == G0) return;
+  if (G1DisablePostBarrier) return;
+
+  G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::G1SATBCT ||
+         bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier");
+  if (G1RSBarrierRegionFilter) {
+    xor3(store_addr, new_val, tmp);
+#ifdef _LP64
+    srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#else
+    srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
+#endif
+    if (G1PrintCTFilterStats) {
+      guarantee(tmp->is_global(), "Or stats won't work...");
+      // This is a sleazy hack: I'm temporarily hijacking G2, which I
+      // promise to restore.
+      mov(new_val, G2);
+      save_frame(0);
+      mov(tmp, O0);
+      mov(G2, O1);
+      // Save G-regs that target may use.
+      mov(G1, L1);
+      mov(G2, L2);
+      mov(G3, L3);
+      mov(G4, L4);
+      mov(G5, L5);
+      call(CAST_FROM_FN_PTR(address, &count_ct_writes));
+      delayed()->nop();
+      mov(O0, G2);
+      // Restore G-regs that target may have used.
+      mov(L1, G1);
+      mov(L3, G3);
+      mov(L4, G4);
+      mov(L5, G5);
+      restore(G0, G0, G0);
+    }
+    // XXX Should I predict this taken or not?  Does it mattern?
+    br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
+    delayed()->nop();
+  }
+
+  // Now we decide how to generate the card table write.  If we're
+  // enqueueing, we call out to a generated function.  Otherwise, we do it
+  // inline here.
+
+  if (G1RSBarrierUseQueue) {
+    // If the "store_addr" register is an "in" or "local" register, move it to
+    // a scratch reg so we can pass it as an argument.
+    bool use_scr = !(store_addr->is_global() || store_addr->is_out());
+    // Pick a scratch register different from "tmp".
+    Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch);
+    // Make sure we use up the delay slot!
+    if (use_scr) {
+      post_filter_masm->mov(store_addr, scr);
+    } else {
+      post_filter_masm->nop();
+    }
+    generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base);
+    save_frame(0);
+    call(dirty_card_log_enqueue);
+    if (use_scr) {
+      delayed()->mov(scr, O0);
+    } else {
+      delayed()->mov(store_addr->after_save(), O0);
+    }
+    restore();
+
+  } else {
+
+#ifdef _LP64
+    post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr);
+#else
+    post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr);
+#endif
+    assert( tmp != store_addr, "need separate temp reg");
+    Address rs(tmp, (address)bs->byte_map_base);
+    load_address(rs);
+    stb(G0, rs.base(), store_addr);
+  }
+
+  bind(filtered);
+
+}
+
+#endif  // SERIALGC
+///////////////////////////////////////////////////////////////////////////////////
+
+void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) {
+  // If we're writing constant NULL, we can skip the write barrier.
+  if (new_val == G0) return;
+  CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set();
+  assert(bs->kind() == BarrierSet::CardTableModRef ||
+         bs->kind() == BarrierSet::CardTableExtension, "wrong barrier");
+  card_table_write(bs->byte_map_base, tmp, store_addr);
+}
+
 void MacroAssembler::load_klass(Register src_oop, Register klass) {
  // The number of bytes in this code is used by
  // MachCallDynamicJavaNode::ret_addr_offset()
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
@ -1439,7 +1439,11 @@ public:
  // pp 214

  void save(    Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | rs2(s2) ); }
-  void save(    Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
+  void save(    Register s1, int simm13a, Register d ) {
+    // make sure frame is at least large enough for the register save area
+    assert(-simm13a >= 16 * wordSize, "frame too small");
+    emit_long( op(arith_op) | rd(d) | op3(save_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) );
+  }

  void restore( Register s1 = G0,  Register s2 = G0, Register d = G0 ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | rs2(s2) ); }
  void restore( Register s1,       int simm13a,      Register d      ) { emit_long( op(arith_op) | rd(d) | op3(restore_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
@ -1594,6 +1598,11 @@ public:
  inline void wrasi(  Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(3, 29, 25)); }
  inline void wrfprs( Register d) { v9_only(); emit_long( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }

+  // For a given register condition, return the appropriate condition code
+  // Condition (the one you would use to get the same effect after "tst" on
+  // the target register.)
+  Assembler::Condition reg_cond_to_cc_cond(RCondition in);
+

  // Creation
  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
@ -1630,6 +1639,8 @@ class RegistersForDebugging : public StackObj {

  // restore global registers in case C code disturbed them
  static void restore_registers(MacroAssembler* a, Register r);
+
+
 };


@ -1722,6 +1733,12 @@ class MacroAssembler: public Assembler {
  void br_null   ( Register s1, bool a, Predict p, Label& L );
  void br_notnull( Register s1, bool a, Predict p, Label& L );

+  // These versions will do the most efficient thing on v8 and v9.  Perhaps
+  // this is what the routine above was meant to do, but it didn't (and
+  // didn't cover both target address kinds.)
+  void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt = relocInfo::none );
+  void br_on_reg_cond( RCondition c, bool a, Predict p, Register s1, Label& L);
+
  inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
  inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );

@ -2056,9 +2073,23 @@ class MacroAssembler: public Assembler {
 #endif // ASSERT

 public:
-  // Stores
-  void store_check(Register tmp, Register obj);                // store check for obj - register is destroyed afterwards
-  void store_check(Register tmp, Register obj, Register offset); // store check for obj - register is destroyed afterwards
+
+  // Write to card table for - register is destroyed afterwards.
+  void card_table_write(jbyte* byte_map_base, Register tmp, Register obj);
+
+  void card_write_barrier_post(Register store_addr, Register new_val, Register tmp);
+
+#ifndef SERIALGC
+  // Array store and offset
+  void g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs);
+
+  void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp);
+
+  // May do filtering, depending on the boolean arguments.
+  void g1_card_table_write(jbyte* byte_map_base,
+                           Register tmp, Register obj, Register new_val,
+                           bool region_filter, bool null_filter);
+#endif // SERIALGC

  // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
  void push_fTOS();
--- a/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp
@ -404,4 +404,55 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
 }


+///////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
+  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                    pre_val_reg, _continuation);
+  __ delayed()->nop();
+
+  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
+  __ delayed()->mov(pre_val_reg, G4);
+  __ br(Assembler::always, false, Assembler::pt, _continuation);
+  __ delayed()->nop();
+
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register addr_reg = addr()->as_pointer_register();
+  Register new_val_reg = new_val()->as_register();
+  __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                    new_val_reg, _continuation);
+  __ delayed()->nop();
+
+  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id));
+  __ delayed()->mov(addr_reg, G4);
+  __ br(Assembler::always, false, Assembler::pt, _continuation);
+  __ delayed()->nop();
+}
+
+#endif // SERIALGC
+///////////////////////////////////////////////////////////////////////////////////
+
 #undef __
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
@ -2093,7 +2093,11 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
  // the known type isn't loaded since the code sanity checks
  // in debug mode and the type isn't required when we know the exact type
  // also check that the type is an array type.
-  if (op->expected_type() == NULL) {
+  // We also, for now, always call the stub if the barrier set requires a
+  // write_ref_pre barrier (which the stub does, but none of the optimized
+  // cases currently does).
+  if (op->expected_type() == NULL ||
+      Universe::heap()->barrier_set()->has_write_ref_pre_barrier()) {
    __ mov(src,     O0);
    __ mov(src_pos, O1);
    __ mov(dst,     O2);
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
@ -365,6 +365,10 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
    __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3, store_check_info);
  }

+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
+  }
  __ move(value.result(), array_addr, null_check_info);
  if (obj_store) {
    // Is this precise?
@ -663,6 +667,10 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {

  __ add(obj.result(), offset.result(), addr);

+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    pre_barrier(obj.result(), false, NULL);
+  }
+
  if (type == objectType)
    __ cas_obj(addr, cmp.result(), val.result(), t1, t2);
  else if (type == intType)
@ -677,7 +685,11 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
  LIR_Opr result = rlock_result(x);
  __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0), result);
  if (type == objectType) {  // Write-barrier needed for Object fields.
+#ifdef PRECISE_CARDMARK
+    post_barrier(addr, val.result());
+#else
    post_barrier(obj.result(), val.result());
+#endif // PRECISE_CARDMARK
  }
 }

@ -1154,6 +1166,10 @@ void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
        addr = new LIR_Address(base_op, index_op, type);
      }

+      if (is_obj) {
+        pre_barrier(LIR_OprFact::address(addr), false, NULL);
+        // _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr));
+      }
      __ move(data, addr);
      if (is_obj) {
        // This address is precise
--- a/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_Runtime1_sparc.cpp
@ -832,6 +832,163 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
      }
      break;

+#ifndef SERIALGC
+    case g1_pre_barrier_slow_id:
+      { // G4: previous value of memory
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ save_frame(0);
+          __ set((int)id, O1);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ set_info("g1_pre_barrier_slow_id", dont_gc_arguments);
+
+        Register pre_val = G4;
+        Register tmp  = G1_scratch;
+        Register tmp2 = G3_scratch;
+
+        Label refill, restart;
+        bool with_frame = false; // I don't know if we can do with-frame.
+        int satb_q_index_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   PtrQueue::byte_offset_of_index());
+        int satb_q_buf_byte_offset =
+          in_bytes(JavaThread::satb_mark_queue_offset() +
+                   PtrQueue::byte_offset_of_buf());
+        __ bind(restart);
+        __ ld_ptr(G2_thread, satb_q_index_byte_offset, tmp);
+
+        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false,
+                          Assembler::pn, tmp, refill);
+
+        // If the branch is taken, no harm in executing this in the delay slot.
+        __ delayed()->ld_ptr(G2_thread, satb_q_buf_byte_offset, tmp2);
+        __ sub(tmp, oopSize, tmp);
+
+        __ st_ptr(pre_val, tmp2, tmp);  // [_buf + index] := <address_of_card>
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->st_ptr(tmp, G2_thread, satb_q_index_byte_offset);
+
+        __ bind(refill);
+        __ save_frame(0);
+
+        __ mov(pre_val, L0);
+        __ mov(tmp,     L1);
+        __ mov(tmp2,    L2);
+
+        __ call_VM_leaf(L7_thread_cache,
+                        CAST_FROM_FN_PTR(address,
+                                         SATBMarkQueueSet::handle_zero_index_for_thread),
+                                         G2_thread);
+
+        __ mov(L0, pre_val);
+        __ mov(L1, tmp);
+        __ mov(L2, tmp2);
+
+        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+        __ delayed()->restore();
+      }
+      break;
+
+    case g1_post_barrier_slow_id:
+      {
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ save_frame(0);
+          __ set((int)id, O1);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), I0);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ set_info("g1_post_barrier_slow_id", dont_gc_arguments);
+
+        Register addr = G4;
+        Register cardtable = G5;
+        Register tmp  = G1_scratch;
+        Register tmp2 = G3_scratch;
+        jbyte* byte_map_base = ((CardTableModRefBS*)bs)->byte_map_base;
+
+        Label not_already_dirty, restart, refill;
+
+#ifdef _LP64
+        __ srlx(addr, CardTableModRefBS::card_shift, addr);
+#else
+        __ srl(addr, CardTableModRefBS::card_shift, addr);
+#endif
+
+        Address rs(cardtable, (address)byte_map_base);
+        __ load_address(rs); // cardtable := <card table base>
+        __ ldub(addr, cardtable, tmp); // tmp := [addr + cardtable]
+
+        __ br_on_reg_cond(Assembler::rc_nz, /*annul*/false, Assembler::pt,
+                          tmp, not_already_dirty);
+        // Get cardtable + tmp into a reg by itself -- useful in the take-the-branch
+        // case, harmless if not.
+        __ delayed()->add(addr, cardtable, tmp2);
+
+        // We didn't take the branch, so we're already dirty: return.
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->nop();
+
+        // Not dirty.
+        __ bind(not_already_dirty);
+        // First, dirty it.
+        __ stb(G0, tmp2, 0);  // [cardPtr] := 0  (i.e., dirty).
+
+        Register tmp3 = cardtable;
+        Register tmp4 = tmp;
+
+        // these registers are now dead
+        addr = cardtable = tmp = noreg;
+
+        int dirty_card_q_index_byte_offset =
+          in_bytes(JavaThread::dirty_card_queue_offset() +
+                   PtrQueue::byte_offset_of_index());
+        int dirty_card_q_buf_byte_offset =
+          in_bytes(JavaThread::dirty_card_queue_offset() +
+                   PtrQueue::byte_offset_of_buf());
+        __ bind(restart);
+        __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, tmp3);
+
+        __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pn,
+                          tmp3, refill);
+        // If the branch is taken, no harm in executing this in the delay slot.
+        __ delayed()->ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, tmp4);
+        __ sub(tmp3, oopSize, tmp3);
+
+        __ st_ptr(tmp2, tmp4, tmp3);  // [_buf + index] := <address_of_card>
+        // Use return-from-leaf
+        __ retl();
+        __ delayed()->st_ptr(tmp3, G2_thread, dirty_card_q_index_byte_offset);
+
+        __ bind(refill);
+        __ save_frame(0);
+
+        __ mov(tmp2, L0);
+        __ mov(tmp3, L1);
+        __ mov(tmp4, L2);
+
+        __ call_VM_leaf(L7_thread_cache,
+                        CAST_FROM_FN_PTR(address,
+                                         DirtyCardQueueSet::handle_zero_index_for_thread),
+                                         G2_thread);
+
+        __ mov(L0, tmp2);
+        __ mov(L1, tmp3);
+        __ mov(L2, tmp4);
+
+        __ br(Assembler::always, /*annul*/false, Assembler::pt, restart);
+        __ delayed()->restore();
+      }
+      break;
+#endif // !SERIALGC
+
    default:
      { __ set_info("unimplemented entry", dont_gc_arguments);
        __ save_frame(0);
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp
@ -1110,30 +1110,31 @@ class StubGenerator: public StubCodeGenerator {
  //  The input registers are overwritten.
  //
  void gen_write_ref_array_pre_barrier(Register addr, Register count) {
-#if 0 // G1 only
    BarrierSet* bs = Universe::heap()->barrier_set();
    if (bs->has_write_ref_pre_barrier()) {
      assert(bs->has_write_ref_array_pre_opt(),
             "Else unsupported barrier set.");

-      assert(addr->is_global() && count->is_global(),
-             "If not, then we have to fix this code to handle more "
-             "general cases.");
-      // Get some new fresh output registers.
      __ save_frame(0);
      // Save the necessary global regs... will be used after.
-      __ mov(addr, L0);
-      __ mov(count, L1);
-
-      __ mov(addr, O0);
+      if (addr->is_global()) {
+        __ mov(addr, L0);
+      }
+      if (count->is_global()) {
+        __ mov(count, L1);
+      }
+      __ mov(addr->after_save(), O0);
      // Get the count into O1
      __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
-      __ delayed()->mov(count, O1);
-      __ mov(L0, addr);
-      __ mov(L1, count);
+      __ delayed()->mov(count->after_save(), O1);
+      if (addr->is_global()) {
+        __ mov(L0, addr);
+      }
+      if (count->is_global()) {
+        __ mov(L1, count);
+      }
      __ restore();
    }
-#endif // 0
  }
  //
  //  Generate post-write barrier for array.
@ -1150,22 +1151,17 @@ class StubGenerator: public StubCodeGenerator {
    BarrierSet* bs = Universe::heap()->barrier_set();

    switch (bs->kind()) {
-#if 0 // G1 - only
      case BarrierSet::G1SATBCT:
      case BarrierSet::G1SATBCTLogging:
        {
-          assert(addr->is_global() && count->is_global(),
-                 "If not, then we have to fix this code to handle more "
-                 "general cases.");
          // Get some new fresh output registers.
          __ save_frame(0);
-          __ mov(addr, O0);
+          __ mov(addr->after_save(), O0);
          __ call(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
-          __ delayed()->mov(count, O1);
+          __ delayed()->mov(count->after_save(), O1);
          __ restore();
        }
        break;
-#endif // 0 G1 - only
      case BarrierSet::CardTableModRef:
      case BarrierSet::CardTableExtension:
        {
@ -2412,8 +2408,7 @@ class StubGenerator: public StubCodeGenerator {
    StubCodeMark mark(this, "StubRoutines", name);
    address start = __ pc();

-    gen_write_ref_array_pre_barrier(G1, G5);
-
+    gen_write_ref_array_pre_barrier(O1, O2);

 #ifdef ASSERT
    // We sometimes save a frame (see partial_subtype_check below).
--- a/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/templateTable_sparc.cpp
@ -28,6 +28,79 @@
 #ifndef CC_INTERP
 #define __ _masm->

+// Misc helpers
+
+// Do an oop store like *(base + index + offset) = val
+// index can be noreg,
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Register base,
+                         Register index,
+                         int offset,
+                         Register val,
+                         Register tmp,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(tmp != val && tmp != base && tmp != index, "register collision");
+  assert(index == noreg || offset == 0, "only one offset");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        __ g1_write_barrier_pre( base, index, offset, tmp, /*preserve_o_regs*/true);
+        if (index == noreg ) {
+          assert(Assembler::is_simm13(offset), "fix this code");
+          __ store_heap_oop(val, base, offset);
+        } else {
+          __ store_heap_oop(val, base, index);
+        }
+
+        // No need for post barrier if storing NULL
+        if (val != G0) {
+          if (precise) {
+            if (index == noreg) {
+              __ add(base, offset, base);
+            } else {
+              __ add(base, index, base);
+            }
+          }
+          __ g1_write_barrier_post(base, val, tmp);
+        }
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (index == noreg ) {
+          assert(Assembler::is_simm13(offset), "fix this code");
+          __ store_heap_oop(val, base, offset);
+        } else {
+          __ store_heap_oop(val, base, index);
+        }
+        // No need for post barrier if storing NULL
+        if (val != G0) {
+          if (precise) {
+            if (index == noreg) {
+              __ add(base, offset, base);
+            } else {
+              __ add(base, index, base);
+            }
+          }
+          __ card_write_barrier_post(base, val, tmp);
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      ShouldNotReachHere();
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+

 //----------------------------------------------------------------------------------------------------
 // Platform-dependent initialization
@ -758,6 +831,8 @@ void TemplateTable::aastore() {
  // O4:        array element klass
  // O5:        value klass

+  // Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
  // Generate a fast subtype check.  Branch to store_ok if no
  // failure.  Throw if failure.
  __ gen_subtype_check( O5, O4, G3_scratch, G4_scratch, G1_scratch, store_ok );
@ -767,18 +842,14 @@ void TemplateTable::aastore() {

  // Store is OK.
  __ bind(store_ok);
-  __ store_heap_oop(Otos_i, O1, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-  // Quote from rememberedSet.hpp: For objArrays, the precise card
-  // corresponding to the pointer store is dirtied so we don't need to
-  // scavenge the entire array.
-  Address element(O1, 0, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
-  __ add(element, O1);              // address the element precisely
-  __ store_check(G3_scratch, O1);
+  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Otos_i, G3_scratch, _bs->kind(), true);
+
  __ ba(false,done);
  __ delayed()->inc(Lesp, 3* Interpreter::stackElementSize()); // adj sp (pops array, index and value)

  __ bind(is_null);
-  __ store_heap_oop(Otos_i, element);
+  do_oop_store(_masm, O1, noreg, arrayOopDesc::base_offset_in_bytes(T_OBJECT), G0, G4_scratch, _bs->kind(), true);
+
  __ profile_null_seen(G3_scratch);
  __ inc(Lesp, 3* Interpreter::stackElementSize());     // adj sp (pops array, index and value)
  __ bind(done);
@ -2449,8 +2520,9 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
    // atos
    __ pop_ptr();
    __ verify_oop(Otos_i);
-    __ store_heap_oop(Otos_i, Rclass, Roffset);
-    __ store_check(G1_scratch, Rclass, Roffset);
+
+    do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+
    __ ba(false, checkVolatile);
    __ delayed()->tst(Lscratch);

@ -2491,8 +2563,9 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
    __ pop_ptr();
    pop_and_check_object(Rclass);
    __ verify_oop(Otos_i);
-    __ store_heap_oop(Otos_i, Rclass, Roffset);
-    __ store_check(G1_scratch, Rclass, Roffset);
+
+    do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
+
    patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch);
    __ ba(false, checkVolatile);
    __ delayed()->tst(Lscratch);
@ -2646,8 +2719,7 @@ void TemplateTable::fast_storefield(TosState state) {
      __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset);
      break;
    case Bytecodes::_fast_aputfield:
-      __ store_heap_oop(Otos_i, Rclass, Roffset);
-      __ store_check(G1_scratch, Rclass, Roffset);
+      do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
      break;
    default:
      ShouldNotReachHere();
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp
@ -5935,26 +5935,30 @@ void MacroAssembler::eden_allocate(Register obj,
                                   Label& slow_case) {
  assert(obj == rax, "obj must be in rax, for cmpxchg");
  assert_different_registers(obj, var_size_in_bytes, t1);
-  Register end = t1;
-  Label retry;
-  bind(retry);
-  ExternalAddress heap_top((address) Universe::heap()->top_addr());
-  movptr(obj, heap_top);
-  if (var_size_in_bytes == noreg) {
-    lea(end, Address(obj, con_size_in_bytes));
+  if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) {
+    jmp(slow_case);
  } else {
-    lea(end, Address(obj, var_size_in_bytes, Address::times_1));
+    Register end = t1;
+    Label retry;
+    bind(retry);
+    ExternalAddress heap_top((address) Universe::heap()->top_addr());
+    movptr(obj, heap_top);
+    if (var_size_in_bytes == noreg) {
+      lea(end, Address(obj, con_size_in_bytes));
+    } else {
+      lea(end, Address(obj, var_size_in_bytes, Address::times_1));
+    }
+    // if end < obj then we wrapped around => object too long => slow case
+    cmpptr(end, obj);
+    jcc(Assembler::below, slow_case);
+    cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
+    jcc(Assembler::above, slow_case);
+    // Compare obj with the top addr, and if still equal, store the new top addr in
+    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
+    // it otherwise. Use lock prefix for atomicity on MPs.
+    locked_cmpxchgptr(end, heap_top);
+    jcc(Assembler::notEqual, retry);
  }
-  // if end < obj then we wrapped around => object too long => slow case
-  cmpptr(end, obj);
-  jcc(Assembler::below, slow_case);
-  cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
-  jcc(Assembler::above, slow_case);
-  // Compare obj with the top addr, and if still equal, store the new top addr in
-  // end at the address of the top addr pointer. Sets ZF if was equal, and clears
-  // it otherwise. Use lock prefix for atomicity on MPs.
-  locked_cmpxchgptr(end, heap_top);
-  jcc(Assembler::notEqual, retry);
 }

 void MacroAssembler::enter() {
@ -6491,6 +6495,179 @@ void MacroAssembler::sign_extend_short(Register reg) {
  }
 }

+//////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+#ifndef _LP64
+                                          Register thread,
+#endif
+                                          Register tmp,
+                                          Register tmp2,
+                                          bool tosca_live) {
+  LP64_ONLY(Register thread = r15_thread;)
+  Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+
+  Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+
+
+  Label done;
+  Label runtime;
+
+  // if (!marking_in_progress) goto done;
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    cmpb(in_progress, 0);
+  }
+  jcc(Assembler::equal, done);
+
+  // if (x.f == NULL) goto done;
+  cmpptr(Address(obj, 0), NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // Can we store original value in the thread's buffer?
+
+  LP64_ONLY(movslq(tmp, index);)
+  movptr(tmp2, Address(obj, 0));
+#ifdef _LP64
+  cmpq(tmp, 0);
+#else
+  cmpl(index, 0);
+#endif
+  jcc(Assembler::equal, runtime);
+#ifdef _LP64
+  subq(tmp, wordSize);
+  movl(index, tmp);
+  addq(tmp, buffer);
+#else
+  subl(index, wordSize);
+  movl(tmp, buffer);
+  addl(tmp, index);
+#endif
+  movptr(Address(tmp, 0), tmp2);
+  jmp(done);
+  bind(runtime);
+  // save the live input values
+  if(tosca_live) push(rax);
+  push(obj);
+#ifdef _LP64
+  movq(c_rarg0, Address(obj, 0));
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), c_rarg0, r15_thread);
+#else
+  push(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
+  pop(thread);
+#endif
+  pop(obj);
+  if(tosca_live) pop(rax);
+  bind(done);
+
+}
+
+void MacroAssembler::g1_write_barrier_post(Register store_addr,
+                                           Register new_val,
+#ifndef _LP64
+                                           Register thread,
+#endif
+                                           Register tmp,
+                                           Register tmp2) {
+
+  LP64_ONLY(Register thread = r15_thread;)
+  Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_index()));
+  Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                       PtrQueue::byte_offset_of_buf()));
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+  Label done;
+  Label runtime;
+
+  // Does store cross heap regions?
+
+  movptr(tmp, store_addr);
+  xorptr(tmp, new_val);
+  shrptr(tmp, HeapRegion::LogOfHRGrainBytes);
+  jcc(Assembler::equal, done);
+
+  // crosses regions, storing NULL?
+
+  cmpptr(new_val, (int32_t) NULL_WORD);
+  jcc(Assembler::equal, done);
+
+  // storing region crossing non-NULL, is card already dirty?
+
+  ExternalAddress cardtable((address) ct->byte_map_base);
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+#ifdef _LP64
+  const Register card_addr = tmp;
+
+  movq(card_addr, store_addr);
+  shrq(card_addr, CardTableModRefBS::card_shift);
+
+  lea(tmp2, cardtable);
+
+  // get the address of the card
+  addq(card_addr, tmp2);
+#else
+  const Register card_index = tmp;
+
+  movl(card_index, store_addr);
+  shrl(card_index, CardTableModRefBS::card_shift);
+
+  Address index(noreg, card_index, Address::times_1);
+  const Register card_addr = tmp;
+  lea(card_addr, as_Address(ArrayAddress(cardtable, index)));
+#endif
+  cmpb(Address(card_addr, 0), 0);
+  jcc(Assembler::equal, done);
+
+  // storing a region crossing, non-NULL oop, card is clean.
+  // dirty card and log.
+
+  movb(Address(card_addr, 0), 0);
+
+  cmpl(queue_index, 0);
+  jcc(Assembler::equal, runtime);
+  subl(queue_index, wordSize);
+  movptr(tmp2, buffer);
+#ifdef _LP64
+  movslq(rscratch1, queue_index);
+  addq(tmp2, rscratch1);
+  movq(Address(tmp2, 0), card_addr);
+#else
+  addl(tmp2, queue_index);
+  movl(Address(tmp2, 0), card_index);
+#endif
+  jmp(done);
+
+  bind(runtime);
+  // save the live input values
+  push(store_addr);
+  push(new_val);
+#ifdef _LP64
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread);
+#else
+  push(thread);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+  pop(thread);
+#endif
+  pop(new_val);
+  pop(store_addr);
+
+  bind(done);
+
+}
+
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////
+
+
 void MacroAssembler::store_check(Register obj) {
  // Does a store check for the oop in register obj. The content of
  // register obj is destroyed afterwards.
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp
@ -227,9 +227,11 @@ class Address VALUE_OBJ_CLASS_SPEC {
 #endif // ASSERT

  // accessors
-  bool uses(Register reg) const {
-    return _base == reg || _index == reg;
-  }
+  bool        uses(Register reg) const { return _base == reg || _index == reg; }
+  Register    base()             const { return _base;  }
+  Register    index()            const { return _index; }
+  ScaleFactor scale()            const { return _scale; }
+  int         disp()             const { return _disp;  }

  // Convert the raw encoding form into the form expected by the constructor for
  // Address.  An index of 4 (rsp) corresponds to having no index, so convert
@ -1310,7 +1312,8 @@ private:
 // on arguments should also go in here.

 class MacroAssembler: public Assembler {
- friend class LIR_Assembler;
+  friend class LIR_Assembler;
+  friend class Runtime1;      // as_Address()
 protected:

  Address as_Address(AddressLiteral adr);
@ -1453,6 +1456,7 @@ class MacroAssembler: public Assembler {
  // The pointer will be loaded into the thread register.
  void get_thread(Register thread);

+
  // Support for VM calls
  //
  // It is imperative that all calls into the VM are handled via the call_VM macros.
@ -1527,6 +1531,22 @@ class MacroAssembler: public Assembler {
  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)

+  void g1_write_barrier_pre(Register obj,
+#ifndef _LP64
+                            Register thread,
+#endif
+                            Register tmp,
+                            Register tmp2,
+                            bool     tosca_live);
+  void g1_write_barrier_post(Register store_addr,
+                             Register new_val,
+#ifndef _LP64
+                             Register thread,
+#endif
+                             Register tmp,
+                             Register tmp2);
+
+
  // split store_check(Register obj) to enhance instruction interleaving
  void store_check_part_1(Register obj);
  void store_check_part_2(Register obj);
--- a/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_CodeStubs_x86.cpp
@ -456,5 +456,50 @@ void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
  __ jmp(_continuation);
 }

+/////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+
+  // At this point we know that marking is in progress
+
+  __ bind(_entry);
+  assert(pre_val()->is_register(), "Precondition.");
+
+  Register pre_val_reg = pre_val()->as_register();
+
+  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false);
+
+  __ cmpptr(pre_val_reg, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, _continuation);
+  ce->store_parameter(pre_val()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+  __ jmp(_continuation);
+
+}
+
+jbyte* G1PostBarrierStub::_byte_map_base = NULL;
+
+jbyte* G1PostBarrierStub::byte_map_base_slow() {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  assert(bs->is_a(BarrierSet::G1SATBCTLogging),
+         "Must be if we're using this.");
+  return ((G1SATBCardTableModRefBS*)bs)->byte_map_base;
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register new_val_reg = new_val()->as_register();
+  __ cmpptr(new_val_reg, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, _continuation);
+  ce->store_parameter(addr()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_post_barrier_slow_id)));
+  __ jmp(_continuation);
+}
+
+#endif // SERIALGC
+/////////////////////////////////////////////////////////////////////////////

 #undef __
--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp
@ -302,6 +302,8 @@ void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
  }

  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
    __ move(value.result(), array_addr, null_check_info);
    // Seems to be a precise
    post_barrier(LIR_OprFact::address(array_addr), value.result());
@ -756,7 +758,10 @@ void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
  __ move(obj.result(), addr);
  __ add(addr, offset.result(), addr);

-
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Do the pre-write barrier, if any.
+    pre_barrier(addr, false, NULL);
+  }

  LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
  if (type == objectType)
@ -1286,6 +1291,8 @@ void LIRGenerator::put_Object_unsafe(LIR_Opr src, LIR_Opr offset, LIR_Opr data,
    LIR_Address* addr = new LIR_Address(src, offset, type);
    bool is_obj = (type == T_ARRAY || type == T_OBJECT);
    if (is_obj) {
+      // Do the pre-write barrier, if any.
+      pre_barrier(LIR_OprFact::address(addr), false, NULL);
      __ move(data, addr);
      assert(src->is_register(), "must be register");
      // Seems to be a precise address
--- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp
+++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp
@ -1583,6 +1583,166 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
      }
      break;

+#ifndef SERIALGC
+    case g1_pre_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_pre_barrier", dont_gc_arguments);
+        // arg0 : previous value of memory
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        if (bs->kind() != BarrierSet::G1SATBCTLogging) {
+          __ movptr(rax, (int)id);
+          __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), rax);
+          __ should_not_reach_here();
+          break;
+        }
+
+        __ push(rax);
+        __ push(rdx);
+
+        const Register pre_val = rax;
+        const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
+        const Register tmp = rdx;
+
+        NOT_LP64(__ get_thread(thread);)
+
+        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_active()));
+
+        Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+
+        Label done;
+        Label runtime;
+
+        // Can we store original value in the thread's buffer?
+
+        LP64_ONLY(__ movslq(tmp, queue_index);)
+#ifdef _LP64
+        __ cmpq(tmp, 0);
+#else
+        __ cmpl(queue_index, 0);
+#endif
+        __ jcc(Assembler::equal, runtime);
+#ifdef _LP64
+        __ subq(tmp, wordSize);
+        __ movl(queue_index, tmp);
+        __ addq(tmp, buffer);
+#else
+        __ subl(queue_index, wordSize);
+        __ movl(tmp, buffer);
+        __ addl(tmp, queue_index);
+#endif
+
+        // prev_val (rax)
+        f.load_argument(0, pre_val);
+        __ movptr(Address(tmp, 0), pre_val);
+        __ jmp(done);
+
+        __ bind(runtime);
+        // load the pre-value
+        __ push(rcx);
+        f.load_argument(0, rcx);
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), rcx, thread);
+        __ pop(rcx);
+
+        __ bind(done);
+        __ pop(rdx);
+        __ pop(rax);
+      }
+      break;
+
+    case g1_post_barrier_slow_id:
+      {
+        StubFrame f(sasm, "g1_post_barrier", dont_gc_arguments);
+
+
+        // arg0: store_address
+        Address store_addr(rbp, 2*BytesPerWord);
+
+        BarrierSet* bs = Universe::heap()->barrier_set();
+        CardTableModRefBS* ct = (CardTableModRefBS*)bs;
+        Label done;
+        Label runtime;
+
+        // At this point we know new_value is non-NULL and the new_value crosses regsion.
+        // Must check to see if card is already dirty
+
+        const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread);
+
+        Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                             PtrQueue::byte_offset_of_index()));
+        Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
+                                        PtrQueue::byte_offset_of_buf()));
+
+        __ push(rax);
+        __ push(rdx);
+
+        NOT_LP64(__ get_thread(thread);)
+        ExternalAddress cardtable((address)ct->byte_map_base);
+        assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
+        const Register card_addr = rdx;
+#ifdef _LP64
+        const Register tmp = rscratch1;
+        f.load_argument(0, card_addr);
+        __ shrq(card_addr, CardTableModRefBS::card_shift);
+        __ lea(tmp, cardtable);
+        // get the address of the card
+        __ addq(card_addr, tmp);
+#else
+        const Register card_index = rdx;
+        f.load_argument(0, card_index);
+        __ shrl(card_index, CardTableModRefBS::card_shift);
+
+        Address index(noreg, card_index, Address::times_1);
+        __ leal(card_addr, __ as_Address(ArrayAddress(cardtable, index)));
+#endif
+
+        __ cmpb(Address(card_addr, 0), 0);
+        __ jcc(Assembler::equal, done);
+
+        // storing region crossing non-NULL, card is clean.
+        // dirty card and log.
+
+        __ movb(Address(card_addr, 0), 0);
+
+        __ cmpl(queue_index, 0);
+        __ jcc(Assembler::equal, runtime);
+        __ subl(queue_index, wordSize);
+
+        const Register buffer_addr = rbx;
+        __ push(rbx);
+
+        __ movptr(buffer_addr, buffer);
+
+#ifdef _LP64
+        __ movslq(rscratch1, queue_index);
+        __ addptr(buffer_addr, rscratch1);
+#else
+        __ addptr(buffer_addr, queue_index);
+#endif
+        __ movptr(Address(buffer_addr, 0), card_addr);
+
+        __ pop(rbx);
+        __ jmp(done);
+
+        __ bind(runtime);
+        NOT_LP64(__ push(rcx);)
+        __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread);
+        NOT_LP64(__ pop(rcx);)
+
+        __ bind(done);
+        __ pop(rdx);
+        __ pop(rax);
+
+      }
+      break;
+#endif // !SERIALGC
+
    default:
      { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
        __ movptr(rax, (int)id);
--- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.cpp
@ -44,8 +44,13 @@ void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
  // Note: No need to save/restore bcp & locals (r13 & r14) pointer
  //       since these are callee saved registers and no blocking/
  //       GC can happen in leaf calls.
+  // Further Note: DO NOT save/restore bcp/locals. If a caller has
+  // already saved them so that it can use esi/edi as temporaries
+  // then a save/restore here will DESTROY the copy the caller
+  // saved! There used to be a save_bcp() that only happened in
+  // the ASSERT path (no restore_bcp). Which caused bizarre failures
+  // when jvm built with ASSERTs.
 #ifdef ASSERT
-  save_bcp();
  {
    Label L;
    cmpptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
@ -58,24 +63,9 @@ void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
  // super call
  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
  // interpreter specific
-#ifdef ASSERT
-  {
-    Label L;
-    cmpptr(r13, Address(rbp, frame::interpreter_frame_bcx_offset * wordSize));
-    jcc(Assembler::equal, L);
-    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-         " r13 not callee saved?");
-    bind(L);
-  }
-  {
-    Label L;
-    cmpptr(r14, Address(rbp, frame::interpreter_frame_locals_offset * wordSize));
-    jcc(Assembler::equal, L);
-    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
-         " r14 not callee saved?");
-    bind(L);
-  }
-#endif
+  // Used to ASSERT that r13/r14 were equal to frame's bcp/locals
+  // but since they may not have been saved (and we don't want to
+  // save thme here (see note above) the assert is invalid.
 }

 void InterpreterMacroAssembler::call_VM_base(Register oop_result,
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp
@ -712,7 +712,6 @@ class StubGenerator: public StubCodeGenerator {
  //     end     -  element count
  void  gen_write_ref_array_pre_barrier(Register start, Register count) {
    assert_different_registers(start, count);
-#if 0 // G1 only
    BarrierSet* bs = Universe::heap()->barrier_set();
    switch (bs->kind()) {
      case BarrierSet::G1SATBCT:
@ -721,8 +720,8 @@ class StubGenerator: public StubCodeGenerator {
          __ pusha();                      // push registers
          __ push(count);
          __ push(start);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre));
-          __ addl(esp, wordSize * 2);
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
+          __ addptr(rsp, 2*wordSize);
          __ popa();
        }
        break;
@ -734,7 +733,6 @@ class StubGenerator: public StubCodeGenerator {
        ShouldNotReachHere();

    }
-#endif // 0 - G1 only
  }


@ -750,20 +748,18 @@ class StubGenerator: public StubCodeGenerator {
    BarrierSet* bs = Universe::heap()->barrier_set();
    assert_different_registers(start, count);
    switch (bs->kind()) {
-#if 0 // G1 only
      case BarrierSet::G1SATBCT:
      case BarrierSet::G1SATBCTLogging:
        {
          __ pusha();                      // push registers
          __ push(count);
          __ push(start);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
-          __ addl(esp, wordSize * 2);
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
+          __ addptr(rsp, 2*wordSize);
          __ popa();

        }
        break;
-#endif // 0 G1 only

      case BarrierSet::CardTableModRef:
      case BarrierSet::CardTableExtension:
@ -1378,9 +1374,9 @@ class StubGenerator: public StubCodeGenerator {
    Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes());

    // Copy from low to high addresses, indexed from the end of each array.
+    gen_write_ref_array_pre_barrier(to, count);
    __ lea(end_from, end_from_addr);
    __ lea(end_to,   end_to_addr);
-    gen_write_ref_array_pre_barrier(to, count);
    assert(length == count, "");        // else fix next line:
    __ negptr(count);                   // negate and test the length
    __ jccb(Assembler::notZero, L_load_element);
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp
@ -1153,18 +1153,26 @@ class StubGenerator: public StubCodeGenerator {
  //     Destroy no registers!
  //
  void  gen_write_ref_array_pre_barrier(Register addr, Register count) {
-#if 0 // G1 - only
-    assert_different_registers(addr, c_rarg1);
-    assert_different_registers(count, c_rarg0);
    BarrierSet* bs = Universe::heap()->barrier_set();
    switch (bs->kind()) {
      case BarrierSet::G1SATBCT:
      case BarrierSet::G1SATBCTLogging:
        {
          __ pusha();                      // push registers
-          __ movptr(c_rarg0, addr);
-          __ movptr(c_rarg1, count);
-          __ call(RuntimeAddress(BarrierSet::static_write_ref_array_pre));
+          if (count == c_rarg0) {
+            if (addr == c_rarg1) {
+              // exactly backwards!!
+              __ xchgptr(c_rarg1, c_rarg0);
+            } else {
+              __ movptr(c_rarg1, count);
+              __ movptr(c_rarg0, addr);
+            }
+
+          } else {
+            __ movptr(c_rarg0, addr);
+            __ movptr(c_rarg1, count);
+          }
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre)));
          __ popa();
        }
        break;
@ -1172,11 +1180,10 @@ class StubGenerator: public StubCodeGenerator {
      case BarrierSet::CardTableExtension:
      case BarrierSet::ModRef:
        break;
-      default      :
+      default:
        ShouldNotReachHere();

    }
-#endif // 0 G1 - only
  }

  //
@ -1193,7 +1200,6 @@ class StubGenerator: public StubCodeGenerator {
    assert_different_registers(start, end, scratch);
    BarrierSet* bs = Universe::heap()->barrier_set();
    switch (bs->kind()) {
-#if 0 // G1 - only
      case BarrierSet::G1SATBCT:
      case BarrierSet::G1SATBCTLogging:

@ -1206,11 +1212,10 @@ class StubGenerator: public StubCodeGenerator {
          __ shrptr(scratch, LogBytesPerWord);
          __ mov(c_rarg0, start);
          __ mov(c_rarg1, scratch);
-          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post));
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post)));
          __ popa();
        }
        break;
-#endif // 0 G1 - only
      case BarrierSet::CardTableModRef:
      case BarrierSet::CardTableExtension:
        {
@ -1239,8 +1244,12 @@ class StubGenerator: public StubCodeGenerator {
          __ decrement(count);
          __ jcc(Assembler::greaterEqual, L_loop);
        }
-      }
-   }
+        break;
+      default:
+        ShouldNotReachHere();
+
+    }
+  }

  // Copy big chunks forward
  //
@ -2282,7 +2291,7 @@ class StubGenerator: public StubCodeGenerator {
    // and report their number to the caller.
    assert_different_registers(rax, r14_length, count, to, end_to, rcx);
    __ lea(end_to, to_element_addr);
-    gen_write_ref_array_post_barrier(to, end_to, rcx);
+    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
    __ movptr(rax, r14_length);           // original oops
    __ addptr(rax, count);                // K = (original - remaining) oops
    __ notptr(rax);                       // report (-1^K) to caller
@ -2291,7 +2300,7 @@ class StubGenerator: public StubCodeGenerator {
    // Come here on success only.
    __ BIND(L_do_card_marks);
    __ addptr(end_to, -wordSize);         // make an inclusive end pointer
-    gen_write_ref_array_post_barrier(to, end_to, rcx);
+    gen_write_ref_array_post_barrier(to, end_to, rscratch1);
    __ xorptr(rax, rax);                  // return 0 on success

    // Common exit point (success or failure).
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp
@ -107,6 +107,78 @@ static Assembler::Condition j_not(TemplateTable::Condition cc) {
 //----------------------------------------------------------------------------------------------------
 // Miscelaneous helper routines

+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == rax, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        // We do it regardless of precise because we need the registers
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != rdx) {
+            __ movl(rdx, obj.base());
+          }
+        } else {
+          __ leal(rdx, obj);
+        }
+        __ get_thread(rcx);
+        __ save_bcp();
+        __ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg);
+
+        // Do the actual store
+        // noreg means NULL
+        if (val == noreg) {
+          __ movl(Address(rdx, 0), NULL_WORD);
+          // No post barrier for NULL
+        } else {
+          __ movl(Address(rdx, 0), val);
+          __ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi);
+        }
+        __ restore_bcp();
+
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ movl(obj, NULL_WORD);
+        } else {
+          __ movl(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ leal(rdx, obj);
+            __ store_check(rdx);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ movl(obj, NULL_WORD);
+      } else {
+        __ movl(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 Address TemplateTable::at_bcp(int offset) {
  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
  return Address(rsi, offset);
@ -876,6 +948,8 @@ void TemplateTable::aastore() {
  __ movptr(rax, at_tos());     // Value
  __ movl(rcx, at_tos_p1());  // Index
  __ movptr(rdx, at_tos_p2());  // Array
+
+  Address element_address(rdx, rcx, Address::times_4, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
  index_check_without_pop(rdx, rcx);      // kills rbx,
  // do array store check - check for NULL value first
  __ testptr(rax, rax);
@ -887,7 +961,7 @@ void TemplateTable::aastore() {
  __ movptr(rax, Address(rdx, oopDesc::klass_offset_in_bytes()));
  __ movptr(rax, Address(rax, sizeof(oopDesc) + objArrayKlass::element_klass_offset_in_bytes()));
  // Compress array+index*wordSize+12 into a single register.  Frees ECX.
-  __ lea(rdx, Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ lea(rdx, element_address);

  // Generate subtype check.  Blows ECX.  Resets EDI to locals.
  // Superklass in EAX.  Subklass in EBX.
@ -899,15 +973,20 @@ void TemplateTable::aastore() {

  // Come here on success
  __ bind(ok_is_subtype);
-  __ movptr(rax, at_rsp());     // Value
-  __ movptr(Address(rdx, 0), rax);
-  __ store_check(rdx);
-  __ jmpb(done);
+
+  // Get the value to store
+  __ movptr(rax, at_rsp());
+  // and store it with appropriate barrier
+  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
+
+  __ jmp(done);

  // Have a NULL in EAX, EDX=array, ECX=index.  Store NULL at ary[idx]
  __ bind(is_null);
  __ profile_null_seen(rbx);
-  __ movptr(Address(rdx, rcx, Address::times_ptr, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), rax);
+
+  // Store NULL, (noreg means NULL to do_oop_store)
+  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);

  // Pop stack arguments
  __ bind(done);
@ -1515,7 +1594,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
    // compute return address as bci in rax,
    __ lea(rax, at_bcp((is_wide ? 5 : 3) - in_bytes(constMethodOopDesc::codes_offset())));
    __ subptr(rax, Address(rcx, methodOopDesc::const_offset()));
-    // Adjust the bcp in ESI by the displacement in EDX
+    // Adjust the bcp in RSI by the displacement in EDX
    __ addptr(rsi, rdx);
    // Push return address
    __ push_i(rax);
@ -1526,7 +1605,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {

  // Normal (non-jsr) branch handling

-  // Adjust the bcp in ESI by the displacement in EDX
+  // Adjust the bcp in RSI by the displacement in EDX
  __ addptr(rsi, rdx);

  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
@ -2439,11 +2518,12 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
  __ pop(atos);
  if (!is_static) pop_and_check_object(obj);

-  __ movptr(lo, rax );
-  __ store_check(obj, lo);  // Need to mark card
+  do_oop_store(_masm, lo, rax, _bs->kind(), false);
+
  if (!is_static) {
    patch_bytecode(Bytecodes::_fast_aputfield, rcx, rbx);
  }
+
  __ jmp(Done);

  __ bind(notObj);
@ -2664,7 +2744,10 @@ void TemplateTable::fast_storefield(TosState state) {
      break;
    case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
    case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
-    case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break;
+    case Bytecodes::_fast_aputfield: {
+      do_oop_store(_masm, lo, rax, _bs->kind(), false);
+      break;
+    }
    default:
      ShouldNotReachHere();
  }
@ -2672,7 +2755,8 @@ void TemplateTable::fast_storefield(TosState state) {
  Label done;
  volatile_barrier(Assembler::Membar_mask_bits(Assembler::StoreLoad |
                                               Assembler::StoreStore));
-  __ jmpb(done);
+  // Barriers are so large that short branch doesn't reach!
+  __ jmp(done);

  // Same code as above, but don't need rdx to test for volatile.
  __ bind(notVolatile);
@ -2694,7 +2778,10 @@ void TemplateTable::fast_storefield(TosState state) {
      break;
    case Bytecodes::_fast_fputfield: __ fstp_s(lo); break;
    case Bytecodes::_fast_dputfield: __ fstp_d(lo); break;
-    case Bytecodes::_fast_aputfield: __ movptr(lo, rax); __ store_check(rcx, lo); break;
+    case Bytecodes::_fast_aputfield: {
+      do_oop_store(_masm, lo, rax, _bs->kind(), false);
+      break;
+    }
    default:
      ShouldNotReachHere();
  }
@ -3054,8 +3141,6 @@ void TemplateTable::_new() {
  Label initialize_object;  // including clearing the fields
  Label allocate_shared;

-  ExternalAddress heap_top((address)Universe::heap()->top_addr());
-
  __ get_cpool_and_tags(rcx, rax);
  // get instanceKlass
  __ movptr(rcx, Address(rcx, rdx, Address::times_ptr, sizeof(constantPoolOopDesc)));
@ -3112,6 +3197,8 @@ void TemplateTable::_new() {
  if (allow_shared_alloc) {
    __ bind(allocate_shared);

+    ExternalAddress heap_top((address)Universe::heap()->top_addr());
+
    Label retry;
    __ bind(retry);
    __ movptr(rax, heap_top);
--- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp
+++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp
@ -115,6 +115,69 @@ static Assembler::Condition j_not(TemplateTable::Condition cc) {


 // Miscelaneous helper routines
+// Store an oop (or NULL) at the address described by obj.
+// If val == noreg this means store a NULL
+
+static void do_oop_store(InterpreterMacroAssembler* _masm,
+                         Address obj,
+                         Register val,
+                         BarrierSet::Name barrier,
+                         bool precise) {
+  assert(val == noreg || val == rax, "parameter is just for looks");
+  switch (barrier) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      {
+        // flatten object address if needed
+        if (obj.index() == noreg && obj.disp() == 0) {
+          if (obj.base() != rdx) {
+            __ movq(rdx, obj.base());
+          }
+        } else {
+          __ leaq(rdx, obj);
+        }
+        __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg);
+        if (val == noreg) {
+          __ store_heap_oop(Address(rdx, 0), NULL_WORD);
+        } else {
+          __ store_heap_oop(Address(rdx, 0), val);
+          __ g1_write_barrier_post(rdx, val, r8, rbx);
+        }
+
+      }
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      {
+        if (val == noreg) {
+          __ store_heap_oop(obj, NULL_WORD);
+        } else {
+          __ store_heap_oop(obj, val);
+          // flatten object address if needed
+          if (!precise || (obj.index() == noreg && obj.disp() == 0)) {
+            __ store_check(obj.base());
+          } else {
+            __ leaq(rdx, obj);
+            __ store_check(rdx);
+          }
+        }
+      }
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      if (val == noreg) {
+        __ store_heap_oop(obj, NULL_WORD);
+      } else {
+        __ store_heap_oop(obj, val);
+      }
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}

 Address TemplateTable::at_bcp(int offset) {
  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
@ -560,8 +623,8 @@ void TemplateTable::aaload() {
  // rdx: array
  index_check(rdx, rax); // kills rbx
  __ load_heap_oop(rax, Address(rdx, rax,
-                       UseCompressedOops ? Address::times_4 : Address::times_8,
-                       arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+                                UseCompressedOops ? Address::times_4 : Address::times_8,
+                                arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
 }

 void TemplateTable::baload() {
@ -866,6 +929,11 @@ void TemplateTable::aastore() {
  __ movptr(rax, at_tos());    // value
  __ movl(rcx, at_tos_p1()); // index
  __ movptr(rdx, at_tos_p2()); // array
+
+  Address element_address(rdx, rcx,
+                          UseCompressedOops? Address::times_4 : Address::times_8,
+                          arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+
  index_check(rdx, rcx);     // kills rbx
  // do array store check - check for NULL value first
  __ testptr(rax, rax);
@ -879,9 +947,7 @@ void TemplateTable::aastore() {
                         sizeof(oopDesc) +
                         objArrayKlass::element_klass_offset_in_bytes()));
  // Compress array + index*oopSize + 12 into a single register.  Frees rcx.
-  __ lea(rdx, Address(rdx, rcx,
-                      UseCompressedOops ? Address::times_4 : Address::times_8,
-                      arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+  __ lea(rdx, element_address);

  // Generate subtype check.  Blows rcx, rdi
  // Superklass in rax.  Subklass in rbx.
@ -893,18 +959,19 @@ void TemplateTable::aastore() {

  // Come here on success
  __ bind(ok_is_subtype);
-  __ movptr(rax, at_tos()); // Value
-  __ store_heap_oop(Address(rdx, 0), rax);
-  __ store_check(rdx);
+
+  // Get the value we will store
+  __ movptr(rax, at_tos());
+  // Now store using the appropriate barrier
+  do_oop_store(_masm, Address(rdx, 0), rax, _bs->kind(), true);
  __ jmp(done);

  // Have a NULL in rax, rdx=array, ecx=index.  Store NULL at ary[idx]
  __ bind(is_null);
  __ profile_null_seen(rbx);
-  __ store_heap_oop(Address(rdx, rcx,
-                            UseCompressedOops ? Address::times_4 : Address::times_8,
-                            arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
-                    rax);
+
+  // Store a NULL
+  do_oop_store(_masm, element_address, noreg, _bs->kind(), true);

  // Pop stack arguments
  __ bind(done);
@ -2396,8 +2463,10 @@ void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
  // atos
  __ pop(atos);
  if (!is_static) pop_and_check_object(obj);
-  __ store_heap_oop(field, rax);
-  __ store_check(obj, field); // Need to mark card
+
+  // Store into the field
+  do_oop_store(_masm, field, rax, _bs->kind(), false);
+
  if (!is_static) {
    patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx);
  }
@ -2584,8 +2653,7 @@ void TemplateTable::fast_storefield(TosState state) {
  // access field
  switch (bytecode()) {
  case Bytecodes::_fast_aputfield:
-    __ store_heap_oop(field, rax);
-    __ store_check(rcx, field);
+    do_oop_store(_masm, field, rax, _bs->kind(), false);
    break;
  case Bytecodes::_fast_lputfield:
    __ movq(field, rax);
@ -3044,8 +3112,6 @@ void TemplateTable::_new() {
  Label initialize_header;
  Label initialize_object; // including clearing the fields
  Label allocate_shared;
-  ExternalAddress top((address)Universe::heap()->top_addr());
-  ExternalAddress end((address)Universe::heap()->end_addr());

  __ get_cpool_and_tags(rsi, rax);
  // get instanceKlass
@ -3106,6 +3172,9 @@ void TemplateTable::_new() {
  if (allow_shared_alloc) {
    __ bind(allocate_shared);

+    ExternalAddress top((address)Universe::heap()->top_addr());
+    ExternalAddress end((address)Universe::heap()->end_addr());
+
    const Register RtopAddr = rscratch1;
    const Register RendAddr = rscratch2;

--- a/hotspot/src/os/linux/vm/os_linux.cpp
+++ b/hotspot/src/os/linux/vm/os_linux.cpp
@ -1261,6 +1261,17 @@ jlong os::elapsed_frequency() {
  return (1000 * 1000);
 }

+// For now, we say that linux does not support vtime.  I have no idea
+// whether it can actually be made to (DLD, 9/13/05).
+
+bool os::supports_vtime() { return false; }
+bool os::enable_vtime()   { return false; }
+bool os::vtime_enabled()  { return false; }
+double os::elapsedVTime() {
+  // better than nothing, but not much
+  return elapsedTime();
+}
+
 jlong os::javaTimeMillis() {
  timeval time;
  int status = gettimeofday(&time, NULL);
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp
@ -1691,6 +1691,40 @@ bool os::getTimesSecs(double* process_real_time,
  }
 }

+bool os::supports_vtime() { return true; }
+
+bool os::enable_vtime() {
+  int fd = open("/proc/self/ctl", O_WRONLY);
+  if (fd == -1)
+    return false;
+
+  long cmd[] = { PCSET, PR_MSACCT };
+  int res = write(fd, cmd, sizeof(long) * 2);
+  close(fd);
+  if (res != sizeof(long) * 2)
+    return false;
+
+  return true;
+}
+
+bool os::vtime_enabled() {
+  int fd = open("/proc/self/status", O_RDONLY);
+  if (fd == -1)
+    return false;
+
+  pstatus_t status;
+  int res = read(fd, (void*) &status, sizeof(pstatus_t));
+  close(fd);
+  if (res != sizeof(pstatus_t))
+    return false;
+
+  return status.pr_flags & PR_MSACCT;
+}
+
+double os::elapsedVTime() {
+  return (double)gethrvtime() / (double)hrtime_hz;
+}
+
 // Used internally for comparisons only
 // getTimeMillis guaranteed to not move backwards on Solaris
 jlong getTimeMillis() {
@ -2688,7 +2722,7 @@ size_t os::numa_get_leaf_groups(int *ids, size_t size) {
   return bottom;
 }

-// Detect the topology change. Typically happens during CPU pluggin-unplugging.
+// Detect the topology change. Typically happens during CPU plugging-unplugging.
 bool os::numa_topology_changed() {
  int is_stale = Solaris::lgrp_cookie_stale(Solaris::lgrp_cookie());
  if (is_stale != -1 && is_stale) {
--- a/hotspot/src/os/windows/vm/os_windows.cpp
+++ b/hotspot/src/os/windows/vm/os_windows.cpp
@ -737,6 +737,17 @@ FILETIME java_to_windows_time(jlong l) {
  return result;
 }

+// For now, we say that Windows does not support vtime.  I have no idea
+// whether it can actually be made to (DLD, 9/13/05).
+
+bool os::supports_vtime() { return false; }
+bool os::enable_vtime() { return false; }
+bool os::vtime_enabled() { return false; }
+double os::elapsedVTime() {
+  // better than nothing, but not much
+  return elapsedTime();
+}
+
 jlong os::javaTimeMillis() {
  if (UseFakeTimers) {
    return fake_time++;
--- a/hotspot/src/share/vm/adlc/formssel.cpp
+++ b/hotspot/src/share/vm/adlc/formssel.cpp
@ -3768,6 +3768,10 @@ bool MatchRule::is_chain_rule(FormDict &globals) const {
 int MatchRule::is_ideal_copy() const {
  if( _rChild ) {
    const char  *opType = _rChild->_opType;
+#if 1
+    if( strcmp(opType,"CastIP")==0 )
+      return 1;
+#else
    if( strcmp(opType,"CastII")==0 )
      return 1;
    // Do not treat *CastPP this way, because it
@ -3787,6 +3791,7 @@ int MatchRule::is_ideal_copy() const {
    //  return 1;
    //if( strcmp(opType,"CastP2X")==0 )
    //  return 1;
+#endif
  }
  if( is_chain_rule(_AD.globalNames()) &&
      _lChild && strncmp(_lChild->_opType,"stackSlot",9)==0 )
--- a/hotspot/src/share/vm/c1/c1_CodeStubs.hpp
+++ b/hotspot/src/share/vm/c1/c1_CodeStubs.hpp
@ -482,3 +482,81 @@ class ArrayCopyStub: public CodeStub {
  virtual void print_name(outputStream* out) const { out->print("ArrayCopyStub"); }
 #endif // PRODUCT
 };
+
+//////////////////////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+// Code stubs for Garbage-First barriers.
+class G1PreBarrierStub: public CodeStub {
+ private:
+  LIR_Opr _addr;
+  LIR_Opr _pre_val;
+  LIR_PatchCode _patch_code;
+  CodeEmitInfo* _info;
+
+ public:
+  // pre_val (a temporary register) must be a register;
+  // addr (the address of the field to be read) must be a LIR_Address
+  G1PreBarrierStub(LIR_Opr addr, LIR_Opr pre_val, LIR_PatchCode patch_code, CodeEmitInfo* info) :
+    _addr(addr), _pre_val(pre_val), _patch_code(patch_code), _info(info)
+  {
+    assert(_pre_val->is_register(), "should be temporary register");
+    assert(_addr->is_address(), "should be the address of the field");
+  }
+
+  LIR_Opr addr() const { return _addr; }
+  LIR_Opr pre_val() const { return _pre_val; }
+  LIR_PatchCode patch_code() const { return _patch_code; }
+  CodeEmitInfo* info() const { return _info; }
+
+  virtual void emit_code(LIR_Assembler* e);
+  virtual void visit(LIR_OpVisitState* visitor) {
+    // don't pass in the code emit info since it's processed in the fast
+    // path
+    if (_info != NULL)
+      visitor->do_slow_case(_info);
+    else
+      visitor->do_slow_case();
+    visitor->do_input(_addr);
+    visitor->do_temp(_pre_val);
+  }
+#ifndef PRODUCT
+  virtual void print_name(outputStream* out) const { out->print("G1PreBarrierStub"); }
+#endif // PRODUCT
+};
+
+class G1PostBarrierStub: public CodeStub {
+ private:
+  LIR_Opr _addr;
+  LIR_Opr _new_val;
+
+  static jbyte* _byte_map_base;
+  static jbyte* byte_map_base_slow();
+  static jbyte* byte_map_base() {
+    if (_byte_map_base == NULL) {
+      _byte_map_base = byte_map_base_slow();
+    }
+    return _byte_map_base;
+  }
+
+ public:
+  // addr (the address of the object head) and new_val must be registers.
+  G1PostBarrierStub(LIR_Opr addr, LIR_Opr new_val): _addr(addr), _new_val(new_val) { }
+
+  LIR_Opr addr() const { return _addr; }
+  LIR_Opr new_val() const { return _new_val; }
+
+  virtual void emit_code(LIR_Assembler* e);
+  virtual void visit(LIR_OpVisitState* visitor) {
+    // don't pass in the code emit info since it's processed in the fast path
+    visitor->do_slow_case();
+    visitor->do_input(_addr);
+    visitor->do_input(_new_val);
+  }
+#ifndef PRODUCT
+  virtual void print_name(outputStream* out) const { out->print("G1PostBarrierStub"); }
+#endif // PRODUCT
+};
+
+#endif // SERIALGC
+//////////////////////////////////////////////////////////////////////////////////////////
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.cpp
@ -74,6 +74,7 @@ void LIR_Assembler::patching_epilog(PatchingStub* patch, LIR_PatchCode patch_cod
 LIR_Assembler::LIR_Assembler(Compilation* c):
   _compilation(c)
 , _masm(c->masm())
+ , _bs(Universe::heap()->barrier_set())
 , _frame_map(c->frame_map())
 , _current_block(NULL)
 , _pending_non_safepoint(NULL)
--- a/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
+++ b/hotspot/src/share/vm/c1/c1_LIRAssembler.hpp
@ -24,11 +24,13 @@

 class Compilation;
 class ScopeValue;
+class BarrierSet;

 class LIR_Assembler: public CompilationResourceObj {
 private:
  C1_MacroAssembler* _masm;
  CodeStubList*      _slow_case_stubs;
+  BarrierSet*        _bs;

  Compilation*       _compilation;
  FrameMap*          _frame_map;
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp
@ -285,16 +285,7 @@ jlong LIRItem::get_jlong_constant() const {


 void LIRGenerator::init() {
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-
-#ifdef _LP64
-  _card_table_base = new LIR_Const((jlong)ct->byte_map_base);
-#else
-  _card_table_base = new LIR_Const((jint)ct->byte_map_base);
-#endif
+  _bs = Universe::heap()->barrier_set();
 }


@ -1239,8 +1230,37 @@ LIR_Opr LIRGenerator::load_constant(LIR_Const* c) {

 // Various barriers

+void LIRGenerator::pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
+  // Do the pre-write barrier, if any.
+  switch (_bs->kind()) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      G1SATBCardTableModRef_pre_barrier(addr_opr, patch, info);
+      break;
+#endif // SERIALGC
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      // No pre barriers
+      break;
+    case BarrierSet::ModRef:
+    case BarrierSet::Other:
+      // No pre barriers
+      break;
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
 void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
-  switch (Universe::heap()->barrier_set()->kind()) {
+  switch (_bs->kind()) {
+#ifndef SERIALGC
+    case BarrierSet::G1SATBCT:
+    case BarrierSet::G1SATBCTLogging:
+      G1SATBCardTableModRef_post_barrier(addr,  new_val);
+      break;
+#endif // SERIALGC
    case BarrierSet::CardTableModRef:
    case BarrierSet::CardTableExtension:
      CardTableModRef_post_barrier(addr,  new_val);
@ -1254,11 +1274,120 @@ void LIRGenerator::post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
    }
 }

+////////////////////////////////////////////////////////////////////////
+#ifndef SERIALGC
+
+void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
+  if (G1DisablePreBarrier) return;
+
+  // First we test whether marking is in progress.
+  BasicType flag_type;
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    flag_type = T_INT;
+  } else {
+    guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
+              "Assumption");
+    flag_type = T_BYTE;
+  }
+  LIR_Opr thrd = getThreadPointer();
+  LIR_Address* mark_active_flag_addr =
+    new LIR_Address(thrd,
+                    in_bytes(JavaThread::satb_mark_queue_offset() +
+                             PtrQueue::byte_offset_of_active()),
+                    flag_type);
+  // Read the marking-in-progress flag.
+  LIR_Opr flag_val = new_register(T_INT);
+  __ load(mark_active_flag_addr, flag_val);
+
+  LabelObj* start_store = new LabelObj();
+
+  LIR_PatchCode pre_val_patch_code =
+    patch ? lir_patch_normal : lir_patch_none;
+
+  LIR_Opr pre_val = new_register(T_OBJECT);
+
+  __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
+  if (!addr_opr->is_address()) {
+    assert(addr_opr->is_register(), "must be");
+    addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, 0, T_OBJECT));
+  }
+  CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code,
+                                        info);
+  __ branch(lir_cond_notEqual, T_INT, slow);
+  __ branch_destination(slow->continuation());
+}
+
+void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
+  if (G1DisablePostBarrier) return;
+
+  // If the "new_val" is a constant NULL, no barrier is necessary.
+  if (new_val->is_constant() &&
+      new_val->as_constant_ptr()->as_jobject() == NULL) return;
+
+  if (!new_val->is_register()) {
+    LIR_Opr new_val_reg = new_pointer_register();
+    if (new_val->is_constant()) {
+      __ move(new_val, new_val_reg);
+    } else {
+      __ leal(new_val, new_val_reg);
+    }
+    new_val = new_val_reg;
+  }
+  assert(new_val->is_register(), "must be a register at this point");
+
+  if (addr->is_address()) {
+    LIR_Address* address = addr->as_address_ptr();
+    LIR_Opr ptr = new_pointer_register();
+    if (!address->index()->is_valid() && address->disp() == 0) {
+      __ move(address->base(), ptr);
+    } else {
+      assert(address->disp() != max_jint, "lea doesn't support patched addresses!");
+      __ leal(addr, ptr);
+    }
+    addr = ptr;
+  }
+  assert(addr->is_register(), "must be a register at this point");
+
+  LIR_Opr xor_res = new_pointer_register();
+  LIR_Opr xor_shift_res = new_pointer_register();
+
+  if (TwoOperandLIRForm ) {
+    __ move(addr, xor_res);
+    __ logical_xor(xor_res, new_val, xor_res);
+    __ move(xor_res, xor_shift_res);
+    __ unsigned_shift_right(xor_shift_res,
+                            LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes),
+                            xor_shift_res,
+                            LIR_OprDesc::illegalOpr());
+  } else {
+    __ logical_xor(addr, new_val, xor_res);
+    __ unsigned_shift_right(xor_res,
+                            LIR_OprFact::intConst(HeapRegion::LogOfHRGrainBytes),
+                            xor_shift_res,
+                            LIR_OprDesc::illegalOpr());
+  }
+
+  if (!new_val->is_register()) {
+    LIR_Opr new_val_reg = new_pointer_register();
+    __ leal(new_val, new_val_reg);
+    new_val = new_val_reg;
+  }
+  assert(new_val->is_register(), "must be a register at this point");
+
+  __ cmp(lir_cond_notEqual, xor_shift_res, LIR_OprFact::intptrConst(NULL_WORD));
+
+  CodeStub* slow = new G1PostBarrierStub(addr, new_val);
+  __ branch(lir_cond_notEqual, T_INT, slow);
+  __ branch_destination(slow->continuation());
+}
+
+#endif // SERIALGC
+////////////////////////////////////////////////////////////////////////
+
 void LIRGenerator::CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {

-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(sizeof(*((CardTableModRefBS*)bs)->byte_map_base) == sizeof(jbyte), "adjust this code");
-  LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)bs)->byte_map_base);
+  assert(sizeof(*((CardTableModRefBS*)_bs)->byte_map_base) == sizeof(jbyte), "adjust this code");
+  LIR_Const* card_table_base = new LIR_Const(((CardTableModRefBS*)_bs)->byte_map_base);
  if (addr->is_address()) {
    LIR_Address* address = addr->as_address_ptr();
    LIR_Opr ptr = new_register(T_OBJECT);
@ -1388,6 +1517,13 @@ void LIRGenerator::do_StoreField(StoreField* x) {
    __ membar_release();
  }

+  if (is_oop) {
+    // Do the pre-write barrier, if any.
+    pre_barrier(LIR_OprFact::address(address),
+                needs_patching,
+                (info ? new CodeEmitInfo(info) : NULL));
+  }
+
  if (is_volatile) {
    assert(!needs_patching && x->is_loaded(),
           "how do we know it's volatile if it's not loaded");
@ -1398,7 +1534,12 @@ void LIRGenerator::do_StoreField(StoreField* x) {
  }

  if (is_oop) {
+#ifdef PRECISE_CARDMARK
+    // Precise cardmarks don't work
+    post_barrier(LIR_OprFact::address(address), value.result());
+#else
    post_barrier(object.result(), value.result());
+#endif // PRECISE_CARDMARK
  }

  if (is_volatile && os::is_MP()) {
--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
+++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp
@ -145,6 +145,7 @@ class PhiResolver: public CompilationResourceObj {

 // only the classes below belong in the same file
 class LIRGenerator: public InstructionVisitor, public BlockClosure {
+
 private:
  Compilation*  _compilation;
  ciMethod*     _method;    // method that we are compiling
@ -154,6 +155,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
  Values        _instruction_for_operand;
  BitMap2D      _vreg_flags; // flags which can be set on a per-vreg basis
  LIR_List*     _lir;
+  BarrierSet*   _bs;

  LIRGenerator* gen() {
    return this;
@ -174,8 +176,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
  LIR_OprList                     _reg_for_constants;
  Values                          _unpinned_constants;

-  LIR_Const*                      _card_table_base;
-
  friend class PhiResolver;

  // unified bailout support
@ -196,8 +196,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
  LIR_Opr load_constant(Constant* x);
  LIR_Opr load_constant(LIR_Const* constant);

-  LIR_Const* card_table_base() const { return _card_table_base; }
-
  void  set_result(Value x, LIR_Opr opr)           {
    assert(opr->is_valid(), "must set to valid value");
    assert(x->operand()->is_illegal(), "operand should never change");
@ -253,12 +251,17 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {

  // generic interface

+  void pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
  void post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);

  // specific implementations
+  // pre barriers
+
+  void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);

  // post barriers

+  void G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
  void CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);


--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp
@ -168,6 +168,8 @@ void Runtime1::generate_blob_for(StubID id) {
  switch (id) {
    // These stubs don't need to have an oopmap
    case dtrace_object_alloc_id:
+    case g1_pre_barrier_slow_id:
+    case g1_post_barrier_slow_id:
    case slow_subtype_check_id:
    case fpu2long_stub_id:
    case unwind_exception_id:
--- a/hotspot/src/share/vm/c1/c1_Runtime1.hpp
+++ b/hotspot/src/share/vm/c1/c1_Runtime1.hpp
@ -56,6 +56,8 @@ class StubAssembler;
  stub(access_field_patching)        \
  stub(load_klass_patching)          \
  stub(jvmti_exception_throw)        \
+  stub(g1_pre_barrier_slow)          \
+  stub(g1_post_barrier_slow)         \
  stub(fpu2long_stub)                \
  stub(counter_overflow)             \
  last_entry(number_of_ids)
--- a/hotspot/src/share/vm/c1/c1_globals.hpp
+++ b/hotspot/src/share/vm/c1/c1_globals.hpp
@ -213,9 +213,6 @@
  develop(bool, UseFastLocking, true,                                       \
          "Use fast inlined locking code")                                  \
                                                                            \
-  product(bool, FastTLABRefill, true,                                       \
-          "Use fast TLAB refill code")                                      \
-                                                                            \
  develop(bool, UseSlowPath, false,                                         \
          "For debugging: test slow cases by always using them")            \
                                                                            \
--- a/hotspot/src/share/vm/compiler/methodLiveness.cpp
+++ b/hotspot/src/share/vm/compiler/methodLiveness.cpp
@ -76,8 +76,9 @@ class BitCounter: public BitMapClosure {
  BitCounter() : _count(0) {}

  // Callback when bit in map is set
-  virtual void do_bit(size_t offset) {
+  virtual bool do_bit(size_t offset) {
    _count++;
+    return true;
  }

  int count() {
@ -467,7 +468,7 @@ MethodLivenessResult MethodLiveness::get_liveness_at(int entry_bci) {
    bci = 0;
  }

-  MethodLivenessResult answer(NULL,0);
+  MethodLivenessResult answer((uintptr_t*)NULL,0);

  if (_block_count > 0) {
    if (TimeLivenessAnalysis) _time_total.start();
--- a/hotspot/src/share/vm/compiler/methodLiveness.hpp
+++ b/hotspot/src/share/vm/compiler/methodLiveness.hpp
@ -29,7 +29,7 @@ class MethodLivenessResult : public BitMap {
  bool _is_valid;

 public:
-  MethodLivenessResult(uintptr_t* map, idx_t size_in_bits)
+  MethodLivenessResult(BitMap::bm_word_t* map, idx_t size_in_bits)
    : BitMap(map, size_in_bits)
    , _is_valid(false)
  {}
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp
@ -790,7 +790,7 @@ CompactibleFreeListSpace::object_iterate_careful_m(MemRegion mr,
 }


-HeapWord* CompactibleFreeListSpace::block_start(const void* p) const {
+HeapWord* CompactibleFreeListSpace::block_start_const(const void* p) const {
  NOT_PRODUCT(verify_objects_initialized());
  return _bt.block_start(p);
 }
@ -2286,9 +2286,9 @@ void CompactibleFreeListSpace::verifyIndexedFreeLists() const {
 }

 void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const {
-  guarantee(size % 2 == 0, "Odd slots should be empty");
-  for (FreeChunk* fc = _indexedFreeList[size].head(); fc != NULL;
-    fc = fc->next()) {
+  FreeChunk* fc =  _indexedFreeList[size].head();
+  guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty");
+  for (; fc != NULL; fc = fc->next()) {
    guarantee(fc->size() == size, "Size inconsistency");
    guarantee(fc->isFree(), "!free?");
    guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list");
@ -2790,10 +2790,11 @@ initialize_sequential_subtasks_for_rescan(int n_threads) {
  assert(n_threads > 0, "Unexpected n_threads argument");
  const size_t task_size = rescan_task_size();
  size_t n_tasks = (used_region().word_size() + task_size - 1)/task_size;
-  assert((used_region().start() + (n_tasks - 1)*task_size <
-          used_region().end()) &&
-         (used_region().start() + n_tasks*task_size >=
-          used_region().end()), "n_task calculation incorrect");
+  assert((n_tasks == 0) == used_region().is_empty(), "n_tasks incorrect");
+  assert(n_tasks == 0 ||
+         ((used_region().start() + (n_tasks - 1)*task_size < used_region().end()) &&
+          (used_region().start() + n_tasks*task_size >= used_region().end())),
+         "n_tasks calculation incorrect");
  SequentialSubTasksDone* pst = conc_par_seq_tasks();
  assert(!pst->valid(), "Clobbering existing data?");
  pst->set_par_threads(n_threads);
@ -2833,7 +2834,7 @@ initialize_sequential_subtasks_for_marking(int n_threads,
  assert(n_tasks == 0 ||
         ((span.start() + (n_tasks - 1)*task_size < span.end()) &&
          (span.start() + n_tasks*task_size >= span.end())),
-         "n_task calculation incorrect");
+         "n_tasks calculation incorrect");
  SequentialSubTasksDone* pst = conc_par_seq_tasks();
  assert(!pst->valid(), "Clobbering existing data?");
  pst->set_par_threads(n_threads);
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp
@ -502,7 +502,7 @@ class CompactibleFreeListSpace: public CompactibleSpace {

  void blk_iterate(BlkClosure* cl);
  void blk_iterate_careful(BlkClosureCareful* cl);
-  HeapWord* block_start(const void* p) const;
+  HeapWord* block_start_const(const void* p) const;
  HeapWord* block_start_careful(const void* p) const;
  size_t block_size(const HeapWord* p) const;
  size_t block_size_no_stall(HeapWord* p, const CMSCollector* c) const;
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp
@ -2761,13 +2761,14 @@ class VerifyMarkedClosure: public BitMapClosure {
 public:
  VerifyMarkedClosure(CMSBitMap* bm): _marks(bm), _failed(false) {}

-  void do_bit(size_t offset) {
+  bool do_bit(size_t offset) {
    HeapWord* addr = _marks->offsetToHeapWord(offset);
    if (!_marks->isMarked(addr)) {
      oop(addr)->print();
      gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
      _failed = true;
    }
+    return true;
  }

  bool failed() { return _failed; }
@ -3650,6 +3651,7 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
  CompactibleFreeListSpace*  _cms_space;
  CompactibleFreeListSpace* _perm_space;
  HeapWord*     _global_finger;
+  HeapWord*     _restart_addr;

  //  Exposed here for yielding support
  Mutex* const _bit_map_lock;
@ -3680,7 +3682,7 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
    _term.set_task(this);
    assert(_cms_space->bottom() < _perm_space->bottom(),
           "Finger incorrectly initialized below");
-    _global_finger = _cms_space->bottom();
+    _restart_addr = _global_finger = _cms_space->bottom();
  }


@ -3698,6 +3700,10 @@ class CMSConcMarkingTask: public YieldingFlexibleGangTask {
  bool result() { return _result; }

  void reset(HeapWord* ra) {
+    assert(_global_finger >= _cms_space->end(),  "Postcondition of ::work(i)");
+    assert(_global_finger >= _perm_space->end(), "Postcondition of ::work(i)");
+    assert(ra             <  _perm_space->end(), "ra too large");
+    _restart_addr = _global_finger = ra;
    _term.reset_for_reuse();
  }

@ -3842,16 +3848,24 @@ void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) {
  int n_tasks = pst->n_tasks();
  // We allow that there may be no tasks to do here because
  // we are restarting after a stack overflow.
-  assert(pst->valid() || n_tasks == 0, "Uninitializd use?");
+  assert(pst->valid() || n_tasks == 0, "Uninitialized use?");
  int nth_task = 0;

-  HeapWord* start = sp->bottom();
+  HeapWord* aligned_start = sp->bottom();
+  if (sp->used_region().contains(_restart_addr)) {
+    // Align down to a card boundary for the start of 0th task
+    // for this space.
+    aligned_start =
+      (HeapWord*)align_size_down((uintptr_t)_restart_addr,
+                                 CardTableModRefBS::card_size);
+  }
+
  size_t chunk_size = sp->marking_task_size();
  while (!pst->is_task_claimed(/* reference */ nth_task)) {
    // Having claimed the nth task in this space,
    // compute the chunk that it corresponds to:
-    MemRegion span = MemRegion(start + nth_task*chunk_size,
-                               start + (nth_task+1)*chunk_size);
+    MemRegion span = MemRegion(aligned_start + nth_task*chunk_size,
+                               aligned_start + (nth_task+1)*chunk_size);
    // Try and bump the global finger via a CAS;
    // note that we need to do the global finger bump
    // _before_ taking the intersection below, because
@ -3866,26 +3880,40 @@ void CMSConcMarkingTask::do_scan_and_mark(int i, CompactibleFreeListSpace* sp) {
    // beyond the "top" address of the space.
    span = span.intersection(sp->used_region());
    if (!span.is_empty()) {  // Non-null task
-      // We want to skip the first object because
-      // the protocol is to scan any object in its entirety
-      // that _starts_ in this span; a fortiori, any
-      // object starting in an earlier span is scanned
-      // as part of an earlier claimed task.
-      // Below we use the "careful" version of block_start
-      // so we do not try to navigate uninitialized objects.
-      HeapWord* prev_obj = sp->block_start_careful(span.start());
-      // Below we use a variant of block_size that uses the
-      // Printezis bits to avoid waiting for allocated
-      // objects to become initialized/parsable.
-      while (prev_obj < span.start()) {
-        size_t sz = sp->block_size_no_stall(prev_obj, _collector);
-        if (sz > 0) {
-          prev_obj += sz;
+      HeapWord* prev_obj;
+      assert(!span.contains(_restart_addr) || nth_task == 0,
+             "Inconsistency");
+      if (nth_task == 0) {
+        // For the 0th task, we'll not need to compute a block_start.
+        if (span.contains(_restart_addr)) {
+          // In the case of a restart because of stack overflow,
+          // we might additionally skip a chunk prefix.
+          prev_obj = _restart_addr;
        } else {
-          // In this case we may end up doing a bit of redundant
-          // scanning, but that appears unavoidable, short of
-          // locking the free list locks; see bug 6324141.
-          break;
+          prev_obj = span.start();
+        }
+      } else {
+        // We want to skip the first object because
+        // the protocol is to scan any object in its entirety
+        // that _starts_ in this span; a fortiori, any
+        // object starting in an earlier span is scanned
+        // as part of an earlier claimed task.
+        // Below we use the "careful" version of block_start
+        // so we do not try to navigate uninitialized objects.
+        prev_obj = sp->block_start_careful(span.start());
+        // Below we use a variant of block_size that uses the
+        // Printezis bits to avoid waiting for allocated
+        // objects to become initialized/parsable.
+        while (prev_obj < span.start()) {
+          size_t sz = sp->block_size_no_stall(prev_obj, _collector);
+          if (sz > 0) {
+            prev_obj += sz;
+          } else {
+            // In this case we may end up doing a bit of redundant
+            // scanning, but that appears unavoidable, short of
+            // locking the free list locks; see bug 6324141.
+            break;
+          }
        }
      }
      if (prev_obj < span.end()) {
@ -3938,12 +3966,14 @@ class Par_ConcMarkingClosure: public OopClosure {
  void handle_stack_overflow(HeapWord* lost);
 };

-// Grey object rescan during work stealing phase --
-// the salient assumption here is that stolen oops must
-// always be initialized, so we do not need to check for
-// uninitialized objects before scanning here.
+// Grey object scanning during work stealing phase --
+// the salient assumption here is that any references
+// that are in these stolen objects being scanned must
+// already have been initialized (else they would not have
+// been published), so we do not need to check for
+// uninitialized objects before pushing here.
 void Par_ConcMarkingClosure::do_oop(oop obj) {
-  assert(obj->is_oop_or_null(), "expected an oop or NULL");
+  assert(obj->is_oop_or_null(true), "expected an oop or NULL");
  HeapWord* addr = (HeapWord*)obj;
  // Check if oop points into the CMS generation
  // and is not marked
@ -4001,7 +4031,7 @@ void Par_ConcMarkingClosure::trim_queue(size_t max) {
 // in CMSCollector's _restart_address.
 void Par_ConcMarkingClosure::handle_stack_overflow(HeapWord* lost) {
  // We need to do this under a mutex to prevent other
-  // workers from interfering with the expansion below.
+  // workers from interfering with the work done below.
  MutexLockerEx ml(_overflow_stack->par_lock(),
                   Mutex::_no_safepoint_check_flag);
  // Remember the least grey address discarded
@ -4640,8 +4670,11 @@ size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* gen,
      startTimer();
      sample_eden();
      // Get and clear dirty region from card table
-      dirtyRegion = _ct->ct_bs()->dirty_card_range_after_preclean(
-                                    MemRegion(nextAddr, endAddr));
+      dirtyRegion = _ct->ct_bs()->dirty_card_range_after_reset(
+                                    MemRegion(nextAddr, endAddr),
+                                    true,
+                                    CardTableModRefBS::precleaned_card_val());
+
      assert(dirtyRegion.start() >= nextAddr,
             "returned region inconsistent?");
    }
@ -5409,8 +5442,8 @@ void CMSCollector::do_remark_non_parallel() {
                              &mrias_cl);
  {
    TraceTime t("grey object rescan", PrintGCDetails, false, gclog_or_tty);
-    // Iterate over the dirty cards, marking them precleaned, and
-    // setting the corresponding bits in the mod union table.
+    // Iterate over the dirty cards, setting the corresponding bits in the
+    // mod union table.
    {
      ModUnionClosure modUnionClosure(&_modUnionTable);
      _ct->ct_bs()->dirty_card_iterate(
@ -6182,7 +6215,7 @@ HeapWord* CMSCollector::next_card_start_after_block(HeapWord* addr) const {
 // bit vector itself. That is done by a separate call CMSBitMap::allocate()
 // further below.
 CMSBitMap::CMSBitMap(int shifter, int mutex_rank, const char* mutex_name):
-  _bm(NULL,0),
+  _bm(),
  _shifter(shifter),
  _lock(mutex_rank >= 0 ? new Mutex(mutex_rank, mutex_name, true) : NULL)
 {
@ -6207,7 +6240,7 @@ bool CMSBitMap::allocate(MemRegion mr) {
  }
  assert(_virtual_space.committed_size() == brs.size(),
         "didn't reserve backing store for all of CMS bit map?");
-  _bm.set_map((uintptr_t*)_virtual_space.low());
+  _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
  assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
         _bmWordSize, "inconsistency in bit map sizing");
  _bm.set_size(_bmWordSize >> _shifter);
@ -6554,7 +6587,7 @@ void Par_MarkRefsIntoAndScanClosure::do_oop(oop obj) {
  if (obj != NULL) {
    // Ignore mark word because this could be an already marked oop
    // that may be chained at the end of the overflow list.
-    assert(obj->is_oop(), "expected an oop");
+    assert(obj->is_oop(true), "expected an oop");
    HeapWord* addr = (HeapWord*)obj;
    if (_span.contains(addr) &&
        !_bit_map->isMarked(addr)) {
@ -6845,10 +6878,10 @@ void MarkFromRootsClosure::reset(HeapWord* addr) {

 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void MarkFromRootsClosure::do_bit(size_t offset) {
+bool MarkFromRootsClosure::do_bit(size_t offset) {
  if (_skipBits > 0) {
    _skipBits--;
-    return;
+    return true;
  }
  // convert offset into a HeapWord*
  HeapWord* addr = _bitMap->startWord() + offset;
@ -6886,10 +6919,11 @@ void MarkFromRootsClosure::do_bit(size_t offset) {
          } // ...else the setting of klass will dirty the card anyway.
        }
      DEBUG_ONLY(})
-      return;
+      return true;
    }
  }
  scanOopsInOop(addr);
+  return true;
 }

 // We take a break if we've been at this for a while,
@ -7023,10 +7057,10 @@ Par_MarkFromRootsClosure::Par_MarkFromRootsClosure(CMSConcMarkingTask* task,

 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void Par_MarkFromRootsClosure::do_bit(size_t offset) {
+bool Par_MarkFromRootsClosure::do_bit(size_t offset) {
  if (_skip_bits > 0) {
    _skip_bits--;
-    return;
+    return true;
  }
  // convert offset into a HeapWord*
  HeapWord* addr = _bit_map->startWord() + offset;
@ -7041,10 +7075,11 @@ void Par_MarkFromRootsClosure::do_bit(size_t offset) {
    if (p->klass_or_null() == NULL || !p->is_parsable()) {
      // in the case of Clean-on-Enter optimization, redirty card
      // and avoid clearing card by increasing  the threshold.
-      return;
+      return true;
    }
  }
  scan_oops_in_oop(addr);
+  return true;
 }

 void Par_MarkFromRootsClosure::scan_oops_in_oop(HeapWord* ptr) {
@ -7167,7 +7202,7 @@ void MarkFromRootsVerifyClosure::reset(HeapWord* addr) {

 // Should revisit to see if this should be restructured for
 // greater efficiency.
-void MarkFromRootsVerifyClosure::do_bit(size_t offset) {
+bool MarkFromRootsVerifyClosure::do_bit(size_t offset) {
  // convert offset into a HeapWord*
  HeapWord* addr = _verification_bm->startWord() + offset;
  assert(_verification_bm->endWord() && addr < _verification_bm->endWord(),
@ -7195,6 +7230,7 @@ void MarkFromRootsVerifyClosure::do_bit(size_t offset) {
    new_oop->oop_iterate(&_pam_verify_closure);
  }
  assert(_mark_stack->isEmpty(), "tautology, emphasizing post-condition");
+  return true;
 }

 PushAndMarkVerifyClosure::PushAndMarkVerifyClosure(
@ -7289,6 +7325,8 @@ Par_PushOrMarkClosure::Par_PushOrMarkClosure(CMSCollector* collector,
  _should_remember_klasses(collector->should_unload_classes())
 { }

+// Assumes thread-safe access by callers, who are
+// responsible for mutual exclusion.
 void CMSCollector::lower_restart_addr(HeapWord* low) {
  assert(_span.contains(low), "Out of bounds addr");
  if (_restart_addr == NULL) {
@ -7314,7 +7352,7 @@ void PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
 // in CMSCollector's _restart_address.
 void Par_PushOrMarkClosure::handle_stack_overflow(HeapWord* lost) {
  // We need to do this under a mutex to prevent other
-  // workers from interfering with the expansion below.
+  // workers from interfering with the work done below.
  MutexLockerEx ml(_overflow_stack->par_lock(),
                   Mutex::_no_safepoint_check_flag);
  // Remember the least grey address discarded
@ -7438,8 +7476,12 @@ PushAndMarkClosure::PushAndMarkClosure(CMSCollector* collector,
 // Grey object rescan during pre-cleaning and second checkpoint phases --
 // the non-parallel version (the parallel version appears further below.)
 void PushAndMarkClosure::do_oop(oop obj) {
-  // If _concurrent_precleaning, ignore mark word verification
-  assert(obj->is_oop_or_null(_concurrent_precleaning),
+  // Ignore mark word verification. If during concurrent precleaning,
+  // the object monitor may be locked. If during the checkpoint
+  // phases, the object may already have been reached by a  different
+  // path and may be at the end of the global overflow list (so
+  // the mark word may be NULL).
+  assert(obj->is_oop_or_null(true /* ignore mark word */),
         "expected an oop or NULL");
  HeapWord* addr = (HeapWord*)obj;
  // Check if oop points into the CMS generation
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp
@ -1327,7 +1327,7 @@ class MarkFromRootsClosure: public BitMapClosure {
                       CMSMarkStack*  markStack,
                       CMSMarkStack*  revisitStack,
                       bool should_yield, bool verifying = false);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
  void reset(HeapWord* addr);
  inline void do_yield_check();

@ -1363,7 +1363,7 @@ class Par_MarkFromRootsClosure: public BitMapClosure {
                       CMSMarkStack*  overflow_stack,
                       CMSMarkStack*  revisit_stack,
                       bool should_yield);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
  inline void do_yield_check();

 private:
@ -1411,7 +1411,7 @@ class MarkFromRootsVerifyClosure: public BitMapClosure {
                             CMSBitMap* verification_bm,
                             CMSBitMap* cms_bm,
                             CMSMarkStack*  mark_stack);
-  void do_bit(size_t offset);
+  bool do_bit(size_t offset);
  void reset(HeapWord* addr);
 };

@ -1420,8 +1420,9 @@ class MarkFromRootsVerifyClosure: public BitMapClosure {
 // "empty" (i.e. the bit vector doesn't have any 1-bits).
 class FalseBitMapClosure: public BitMapClosure {
 public:
-  void do_bit(size_t offset) {
+  bool do_bit(size_t offset) {
    guarantee(false, "Should not have a 1 bit");
+    return true;
  }
 };

--- a/hotspot/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp
@ -0,0 +1,195 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A BufferingOops closure tries to separate out the cost of finding roots
+// from the cost of applying closures to them.  It maintains an array of
+// ref-containing locations.  Until the array is full, applying the closure
+// to an oop* merely records that location in the array.  Since this
+// closure app cost is small, an elapsed timer can approximately attribute
+// all of this cost to the cost of finding the roots.  When the array fills
+// up, the wrapped closure is applied to all elements, keeping track of
+// this elapsed time of this process, and leaving the array empty.
+// The caller must be sure to call "done" to process any unprocessed
+// buffered entriess.
+
+class Generation;
+class HeapRegion;
+
+class BufferingOopClosure: public OopClosure {
+protected:
+  enum PrivateConstants {
+    BufferLength = 1024
+  };
+
+  oop          *_buffer[BufferLength];
+  oop         **_buffer_top;
+  oop         **_buffer_curr;
+
+  OopClosure  *_oc;
+  double       _closure_app_seconds;
+
+  void process_buffer () {
+
+    double start = os::elapsedTime();
+    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
+      _oc->do_oop(*curr);
+    }
+    _buffer_curr = _buffer;
+    _closure_app_seconds += (os::elapsedTime() - start);
+  }
+
+public:
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+  virtual void do_oop(oop *p) {
+    if (_buffer_curr == _buffer_top) {
+      process_buffer();
+    }
+
+    *_buffer_curr = p;
+    ++_buffer_curr;
+  }
+  void done () {
+    if (_buffer_curr > _buffer) {
+      process_buffer();
+    }
+  }
+  double closure_app_seconds () {
+    return _closure_app_seconds;
+  }
+  BufferingOopClosure (OopClosure *oc) :
+    _oc(oc),
+    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _closure_app_seconds(0.0) { }
+};
+
+class BufferingOopsInGenClosure: public OopsInGenClosure {
+  BufferingOopClosure _boc;
+  OopsInGenClosure* _oc;
+public:
+  BufferingOopsInGenClosure(OopsInGenClosure *oc) :
+    _boc(oc), _oc(oc) {}
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop* p) {
+    assert(generation()->is_in_reserved(p), "Must be in!");
+    _boc.do_oop(p);
+  }
+
+  void done() {
+    _boc.done();
+  }
+
+  double closure_app_seconds () {
+    return _boc.closure_app_seconds();
+  }
+
+  void set_generation(Generation* gen) {
+    OopsInGenClosure::set_generation(gen);
+    _oc->set_generation(gen);
+  }
+
+  void reset_generation() {
+    // Make sure we finish the current work with the current generation.
+    _boc.done();
+    OopsInGenClosure::reset_generation();
+    _oc->reset_generation();
+  }
+
+};
+
+
+class BufferingOopsInHeapRegionClosure: public OopsInHeapRegionClosure {
+private:
+  enum PrivateConstants {
+    BufferLength = 1024
+  };
+
+  oop                      *_buffer[BufferLength];
+  oop                     **_buffer_top;
+  oop                     **_buffer_curr;
+
+  HeapRegion               *_hr_buffer[BufferLength];
+  HeapRegion              **_hr_curr;
+
+  OopsInHeapRegionClosure  *_oc;
+  double                    _closure_app_seconds;
+
+  void process_buffer () {
+
+    assert((_hr_curr - _hr_buffer) == (_buffer_curr - _buffer),
+           "the two lengths should be the same");
+
+    double start = os::elapsedTime();
+    HeapRegion **hr_curr = _hr_buffer;
+    HeapRegion *hr_prev = NULL;
+    for (oop **curr = _buffer; curr < _buffer_curr; ++curr) {
+      HeapRegion *region = *hr_curr;
+      if (region != hr_prev) {
+        _oc->set_region(region);
+        hr_prev = region;
+      }
+      _oc->do_oop(*curr);
+      ++hr_curr;
+    }
+    _buffer_curr = _buffer;
+    _hr_curr = _hr_buffer;
+    _closure_app_seconds += (os::elapsedTime() - start);
+  }
+
+public:
+  virtual void do_oop(narrowOop *p) {
+    guarantee(false, "NYI");
+  }
+
+  virtual void do_oop(oop *p) {
+    if (_buffer_curr == _buffer_top) {
+      assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
+      process_buffer();
+    }
+
+    *_buffer_curr = p;
+    ++_buffer_curr;
+    *_hr_curr = _from;
+    ++_hr_curr;
+  }
+  void done () {
+    if (_buffer_curr > _buffer) {
+      assert(_hr_curr > _hr_buffer, "_hr_curr should be consistent with _buffer_curr");
+      process_buffer();
+    }
+  }
+  double closure_app_seconds () {
+    return _closure_app_seconds;
+  }
+  BufferingOopsInHeapRegionClosure (OopsInHeapRegionClosure *oc) :
+    _oc(oc),
+    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _hr_curr(_hr_buffer),
+    _closure_app_seconds(0.0) { }
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp
@ -0,0 +1,409 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_collectionSetChooser.cpp.incl"
+
+CSetChooserCache::CSetChooserCache() {
+  for (int i = 0; i < CacheLength; ++i)
+    _cache[i] = NULL;
+  clear();
+}
+
+void CSetChooserCache::clear() {
+  _occupancy = 0;
+  _first = 0;
+  for (int i = 0; i < CacheLength; ++i) {
+    HeapRegion *hr = _cache[i];
+    if (hr != NULL)
+      hr->set_sort_index(-1);
+    _cache[i] = NULL;
+  }
+}
+
+#ifndef PRODUCT
+bool CSetChooserCache::verify() {
+  int index = _first;
+  HeapRegion *prev = NULL;
+  for (int i = 0; i < _occupancy; ++i) {
+    guarantee(_cache[index] != NULL, "cache entry should not be empty");
+    HeapRegion *hr = _cache[index];
+    guarantee(!hr->is_young(), "should not be young!");
+    if (prev != NULL) {
+      guarantee(prev->gc_efficiency() >= hr->gc_efficiency(),
+                "cache should be correctly ordered");
+    }
+    guarantee(hr->sort_index() == get_sort_index(index),
+              "sort index should be correct");
+    index = trim_index(index + 1);
+    prev = hr;
+  }
+
+  for (int i = 0; i < (CacheLength - _occupancy); ++i) {
+    guarantee(_cache[index] == NULL, "cache entry should be empty");
+    index = trim_index(index + 1);
+  }
+
+  guarantee(index == _first, "we should have reached where we started from");
+  return true;
+}
+#endif // PRODUCT
+
+void CSetChooserCache::insert(HeapRegion *hr) {
+  assert(!is_full(), "cache should not be empty");
+  hr->calc_gc_efficiency();
+
+  int empty_index;
+  if (_occupancy == 0) {
+    empty_index = _first;
+  } else {
+    empty_index = trim_index(_first + _occupancy);
+    assert(_cache[empty_index] == NULL, "last slot should be empty");
+    int last_index = trim_index(empty_index - 1);
+    HeapRegion *last = _cache[last_index];
+    assert(last != NULL,"as the cache is not empty, last should not be empty");
+    while (empty_index != _first &&
+           last->gc_efficiency() < hr->gc_efficiency()) {
+      _cache[empty_index] = last;
+      last->set_sort_index(get_sort_index(empty_index));
+      empty_index = last_index;
+      last_index = trim_index(last_index - 1);
+      last = _cache[last_index];
+    }
+  }
+  _cache[empty_index] = hr;
+  hr->set_sort_index(get_sort_index(empty_index));
+
+  ++_occupancy;
+  assert(verify(), "cache should be consistent");
+}
+
+HeapRegion *CSetChooserCache::remove_first() {
+  if (_occupancy > 0) {
+    assert(_cache[_first] != NULL, "cache should have at least one region");
+    HeapRegion *ret = _cache[_first];
+    _cache[_first] = NULL;
+    ret->set_sort_index(-1);
+    --_occupancy;
+    _first = trim_index(_first + 1);
+    assert(verify(), "cache should be consistent");
+    return ret;
+  } else {
+    return NULL;
+  }
+}
+
+// this is a bit expensive... but we expect that it should not be called
+// to often.
+void CSetChooserCache::remove(HeapRegion *hr) {
+  assert(_occupancy > 0, "cache should not be empty");
+  assert(hr->sort_index() < -1, "should already be in the cache");
+  int index = get_index(hr->sort_index());
+  assert(_cache[index] == hr, "index should be correct");
+  int next_index = trim_index(index + 1);
+  int last_index = trim_index(_first + _occupancy - 1);
+  while (index != last_index) {
+    assert(_cache[next_index] != NULL, "should not be null");
+    _cache[index] = _cache[next_index];
+    _cache[index]->set_sort_index(get_sort_index(index));
+
+    index = next_index;
+    next_index = trim_index(next_index+1);
+  }
+  assert(index == last_index, "should have reached the last one");
+  _cache[index] = NULL;
+  hr->set_sort_index(-1);
+  --_occupancy;
+  assert(verify(), "cache should be consistent");
+}
+
+static inline int orderRegions(HeapRegion* hr1, HeapRegion* hr2) {
+  if (hr1 == NULL) {
+    if (hr2 == NULL) return 0;
+    else return 1;
+  } else if (hr2 == NULL) {
+    return -1;
+  }
+  if (hr2->gc_efficiency() < hr1->gc_efficiency()) return -1;
+  else if (hr1->gc_efficiency() < hr2->gc_efficiency()) return 1;
+  else return 0;
+}
+
+static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
+  return orderRegions(*hr1p, *hr2p);
+}
+
+CollectionSetChooser::CollectionSetChooser() :
+  // The line below is the worst bit of C++ hackery I've ever written
+  // (Detlefs, 11/23).  You should think of it as equivalent to
+  // "_regions(100, true)": initialize the growable array and inform it
+  // that it should allocate its elem array(s) on the C heap.  The first
+  // argument, however, is actually a comma expression (new-expr, 100).
+  // The purpose of the new_expr is to inform the growable array that it
+  // is *already* allocated on the C heap: it uses the placement syntax to
+  // keep it from actually doing any allocation.
+  _markedRegions((ResourceObj::operator new (sizeof(GrowableArray<HeapRegion*>),
+                                             (void*)&_markedRegions,
+                                             ResourceObj::C_HEAP),
+                  100),
+                 true),
+  _curMarkedIndex(0),
+  _numMarkedRegions(0),
+  _unmarked_age_1_returned_as_new(false),
+  _first_par_unreserved_idx(0)
+{}
+
+
+
+#ifndef PRODUCT
+bool CollectionSetChooser::verify() {
+  int index = 0;
+  guarantee(_curMarkedIndex <= _numMarkedRegions,
+            "_curMarkedIndex should be within bounds");
+  while (index < _curMarkedIndex) {
+    guarantee(_markedRegions.at(index++) == NULL,
+              "all entries before _curMarkedIndex should be NULL");
+  }
+  HeapRegion *prev = NULL;
+  while (index < _numMarkedRegions) {
+    HeapRegion *curr = _markedRegions.at(index++);
+    if (curr != NULL) {
+      int si = curr->sort_index();
+      guarantee(!curr->is_young(), "should not be young!");
+      guarantee(si > -1 && si == (index-1), "sort index invariant");
+      if (prev != NULL) {
+        guarantee(orderRegions(prev, curr) != 1, "regions should be sorted");
+      }
+      prev = curr;
+    }
+  }
+  return _cache.verify();
+}
+#endif
+
+bool
+CollectionSetChooser::addRegionToCache() {
+  assert(!_cache.is_full(), "cache should not be full");
+
+  HeapRegion *hr = NULL;
+  while (hr == NULL && _curMarkedIndex < _numMarkedRegions) {
+    hr = _markedRegions.at(_curMarkedIndex++);
+  }
+  if (hr == NULL)
+    return false;
+  assert(!hr->is_young(), "should not be young!");
+  assert(hr->sort_index() == _curMarkedIndex-1, "sort_index invariant");
+  _markedRegions.at_put(hr->sort_index(), NULL);
+  _cache.insert(hr);
+  assert(!_cache.is_empty(), "cache should not be empty");
+  assert(verify(), "cache should be consistent");
+  return false;
+}
+
+void
+CollectionSetChooser::fillCache() {
+  while (!_cache.is_full() && addRegionToCache()) {
+  }
+}
+
+void
+CollectionSetChooser::sortMarkedHeapRegions() {
+  guarantee(_cache.is_empty(), "cache should be empty");
+  // First trim any unused portion of the top in the parallel case.
+  if (_first_par_unreserved_idx > 0) {
+    if (G1PrintParCleanupStats) {
+      gclog_or_tty->print("     Truncating _markedRegions from %d to %d.\n",
+                          _markedRegions.length(), _first_par_unreserved_idx);
+    }
+    assert(_first_par_unreserved_idx <= _markedRegions.length(),
+           "Or we didn't reserved enough length");
+    _markedRegions.trunc_to(_first_par_unreserved_idx);
+  }
+  _markedRegions.sort(orderRegions);
+  assert(_numMarkedRegions <= _markedRegions.length(), "Requirement");
+  assert(_numMarkedRegions == 0
+         || _markedRegions.at(_numMarkedRegions-1) != NULL,
+         "Testing _numMarkedRegions");
+  assert(_numMarkedRegions == _markedRegions.length()
+         || _markedRegions.at(_numMarkedRegions) == NULL,
+         "Testing _numMarkedRegions");
+  if (G1PrintParCleanupStats) {
+    gclog_or_tty->print_cr("     Sorted %d marked regions.", _numMarkedRegions);
+  }
+  for (int i = 0; i < _numMarkedRegions; i++) {
+    assert(_markedRegions.at(i) != NULL, "Should be true by sorting!");
+    _markedRegions.at(i)->set_sort_index(i);
+    if (G1PrintRegionLivenessInfo > 0) {
+      if (i == 0) gclog_or_tty->print_cr("Sorted marked regions:");
+      if (i < G1PrintRegionLivenessInfo ||
+          (_numMarkedRegions-i) < G1PrintRegionLivenessInfo) {
+        HeapRegion* hr = _markedRegions.at(i);
+        size_t u = hr->used();
+        gclog_or_tty->print_cr("  Region %d: %d used, %d max live, %5.2f%%.",
+                      i, u, hr->max_live_bytes(),
+                      100.0*(float)hr->max_live_bytes()/(float)u);
+      }
+    }
+  }
+  if (G1PolicyVerbose > 1)
+    printSortedHeapRegions();
+  assert(verify(), "should now be sorted");
+}
+
+void
+printHeapRegion(HeapRegion *hr) {
+  if (hr->isHumongous())
+    gclog_or_tty->print("H: ");
+  if (hr->in_collection_set())
+    gclog_or_tty->print("CS: ");
+  if (hr->popular())
+    gclog_or_tty->print("pop: ");
+  gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) "
+                         "[" PTR_FORMAT ", " PTR_FORMAT"] "
+                         "Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.",
+                         hr, hr->is_young() ? "Y " : "  ",
+                         hr->is_marked()? "M1" : "M0",
+                         hr->bottom(), hr->end(),
+                         hr->used()/K, hr->garbage_bytes()/K);
+}
+
+void
+CollectionSetChooser::addMarkedHeapRegion(HeapRegion* hr) {
+  assert(!hr->isHumongous(),
+         "Humongous regions shouldn't be added to the collection set");
+  assert(!hr->is_young(), "should not be young!");
+  _markedRegions.append(hr);
+  _numMarkedRegions++;
+  hr->calc_gc_efficiency();
+}
+
+void
+CollectionSetChooser::
+prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) {
+  _first_par_unreserved_idx = 0;
+  size_t max_waste = ParallelGCThreads * chunkSize;
+  // it should be aligned with respect to chunkSize
+  size_t aligned_n_regions =
+                     (n_regions + (chunkSize - 1)) / chunkSize * chunkSize;
+  assert( aligned_n_regions % chunkSize == 0, "should be aligned" );
+  _markedRegions.at_put_grow((int)(aligned_n_regions + max_waste - 1), NULL);
+}
+
+jint
+CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
+  jint res = Atomic::add(n_regions, &_first_par_unreserved_idx);
+  assert(_markedRegions.length() > res + n_regions - 1,
+         "Should already have been expanded");
+  return res - n_regions;
+}
+
+void
+CollectionSetChooser::setMarkedHeapRegion(jint index, HeapRegion* hr) {
+  assert(_markedRegions.at(index) == NULL, "precondition");
+  assert(!hr->is_young(), "should not be young!");
+  _markedRegions.at_put(index, hr);
+  hr->calc_gc_efficiency();
+}
+
+void
+CollectionSetChooser::incNumMarkedHeapRegions(jint inc_by) {
+  (void)Atomic::add(inc_by, &_numMarkedRegions);
+}
+
+void
+CollectionSetChooser::clearMarkedHeapRegions(){
+  for (int i = 0; i < _markedRegions.length(); i++) {
+    HeapRegion* r =   _markedRegions.at(i);
+    if (r != NULL) r->set_sort_index(-1);
+  }
+  _markedRegions.clear();
+  _curMarkedIndex = 0;
+  _numMarkedRegions = 0;
+  _cache.clear();
+};
+
+void
+CollectionSetChooser::updateAfterFullCollection() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  clearMarkedHeapRegions();
+}
+
+void
+CollectionSetChooser::printSortedHeapRegions() {
+  gclog_or_tty->print_cr("Printing %d Heap Regions sorted by amount of known garbage",
+                _numMarkedRegions);
+  for (int i = 0; i < _markedRegions.length(); i++) {
+    printHeapRegion(_markedRegions.at(i));
+  }
+  gclog_or_tty->print_cr("Done sorted heap region print");
+}
+
+void CollectionSetChooser::removeRegion(HeapRegion *hr) {
+  int si = hr->sort_index();
+  assert(si == -1 || hr->is_marked(), "Sort index not valid.");
+  if (si > -1) {
+    assert(_markedRegions.at(si) == hr, "Sort index not valid." );
+    _markedRegions.at_put(si, NULL);
+  } else if (si < -1) {
+    assert(_cache.region_in_cache(hr), "should be in the cache");
+    _cache.remove(hr);
+    assert(hr->sort_index() == -1, "sort index invariant");
+  }
+  hr->set_sort_index(-1);
+}
+
+// if time_remaining < 0.0, then this method should try to return
+// a region, whether it fits within the remaining time or not
+HeapRegion*
+CollectionSetChooser::getNextMarkedRegion(double time_remaining,
+                                          double avg_prediction) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  fillCache();
+  if (_cache.is_empty()) {
+    assert(_curMarkedIndex == _numMarkedRegions,
+           "if cache is empty, list should also be empty");
+    return NULL;
+  }
+
+  HeapRegion *hr = _cache.get_first();
+  assert(hr != NULL, "if cache not empty, first entry should be non-null");
+  double predicted_time = g1h->predict_region_elapsed_time_ms(hr, false);
+
+  if (g1p->adaptive_young_list_length()) {
+    if (time_remaining - predicted_time < 0.0) {
+      g1h->check_if_region_is_too_expensive(predicted_time);
+      return NULL;
+    }
+  } else {
+    if (predicted_time > 2.0 * avg_prediction) {
+      return NULL;
+    }
+  }
+
+  HeapRegion *hr2 = _cache.remove_first();
+  assert(hr == hr2, "cache contents should not have changed");
+
+  return hr;
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.hpp
@ -0,0 +1,138 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// We need to sort heap regions by collection desirability.
+
+class CSetChooserCache {
+private:
+  enum {
+    CacheLength = 16
+  } PrivateConstants;
+
+  HeapRegion*  _cache[CacheLength];
+  int          _occupancy; // number of region in cache
+  int          _first; // "first" region in the cache
+
+  // adding CacheLength to deal with negative values
+  inline int trim_index(int index) {
+    return (index + CacheLength) % CacheLength;
+  }
+
+  inline int get_sort_index(int index) {
+    return -index-2;
+  }
+  inline int get_index(int sort_index) {
+    return -sort_index-2;
+  }
+
+public:
+  CSetChooserCache(void);
+
+  inline int occupancy(void) { return _occupancy; }
+  inline bool is_full()      { return _occupancy == CacheLength; }
+  inline bool is_empty()     { return _occupancy == 0; }
+
+  void clear(void);
+  void insert(HeapRegion *hr);
+  HeapRegion *remove_first(void);
+  void remove (HeapRegion *hr);
+  inline HeapRegion *get_first(void) {
+    return _cache[_first];
+  }
+
+#ifndef PRODUCT
+  bool verify (void);
+  bool region_in_cache(HeapRegion *hr) {
+    int sort_index = hr->sort_index();
+    if (sort_index < -1) {
+      int index = get_index(sort_index);
+      guarantee(index < CacheLength, "should be within bounds");
+      return _cache[index] == hr;
+    } else
+      return 0;
+  }
+#endif // PRODUCT
+};
+
+class CollectionSetChooser: public CHeapObj {
+
+  GrowableArray<HeapRegion*> _markedRegions;
+  int _curMarkedIndex;
+  int _numMarkedRegions;
+  CSetChooserCache _cache;
+
+  // True iff last collection pause ran of out new "age 0" regions, and
+  // returned an "age 1" region.
+  bool _unmarked_age_1_returned_as_new;
+
+  jint _first_par_unreserved_idx;
+
+public:
+
+  HeapRegion* getNextMarkedRegion(double time_so_far, double avg_prediction);
+
+  CollectionSetChooser();
+
+  void printSortedHeapRegions();
+
+  void sortMarkedHeapRegions();
+  void fillCache();
+  bool addRegionToCache(void);
+  void addMarkedHeapRegion(HeapRegion *hr);
+
+  // Must be called before calls to getParMarkedHeapRegionChunk.
+  // "n_regions" is the number of regions, "chunkSize" the chunk size.
+  void prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize);
+  // Returns the first index in a contiguous chunk of "n_regions" indexes
+  // that the calling thread has reserved.  These must be set by the
+  // calling thread using "setMarkedHeapRegion" (to NULL if necessary).
+  jint getParMarkedHeapRegionChunk(jint n_regions);
+  // Set the marked array entry at index to hr.  Careful to claim the index
+  // first if in parallel.
+  void setMarkedHeapRegion(jint index, HeapRegion* hr);
+  // Atomically increment the number of claimed regions by "inc_by".
+  void incNumMarkedHeapRegions(jint inc_by);
+
+  void clearMarkedHeapRegions();
+
+  void updateAfterFullCollection();
+
+  // Ensure that "hr" is not a member of the marked region array or the cache
+  void removeRegion(HeapRegion* hr);
+
+  bool unmarked_age_1_returned_as_new() { return _unmarked_age_1_returned_as_new; }
+
+  // Returns true if the used portion of "_markedRegions" is properly
+  // sorted, otherwise asserts false.
+#ifndef PRODUCT
+  bool verify(void);
+  bool regionProperlyOrdered(HeapRegion* r) {
+    int si = r->sort_index();
+    return (si == -1) ||
+      (si > -1 && _markedRegions.at(si) == r) ||
+      (si < -1 && _cache.region_in_cache(r));
+  }
+#endif
+
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp
@ -0,0 +1,355 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentG1Refine.cpp.incl"
+
+bool ConcurrentG1Refine::_enabled = false;
+
+ConcurrentG1Refine::ConcurrentG1Refine() :
+  _pya(PYA_continue), _last_pya(PYA_continue),
+  _last_cards_during(), _first_traversal(false),
+  _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL),
+  _hot_cache(NULL),
+  _def_use_cache(false), _use_cache(false),
+  _n_periods(0), _total_cards(0), _total_travs(0)
+{
+  if (G1ConcRefine) {
+    _cg1rThread = new ConcurrentG1RefineThread(this);
+    assert(cg1rThread() != NULL, "Conc refine should have been created");
+    assert(cg1rThread()->cg1r() == this,
+           "Conc refine thread should refer to this");
+  } else {
+    _cg1rThread = NULL;
+  }
+}
+
+void ConcurrentG1Refine::init() {
+  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+    _n_card_counts =
+      (unsigned) (g1h->g1_reserved_obj_bytes() >> CardTableModRefBS::card_shift);
+    _card_counts = NEW_C_HEAP_ARRAY(unsigned char, _n_card_counts);
+    for (size_t i = 0; i < _n_card_counts; i++) _card_counts[i] = 0;
+    ModRefBarrierSet* bs = g1h->mr_bs();
+    guarantee(bs->is_a(BarrierSet::CardTableModRef), "Precondition");
+    CardTableModRefBS* ctbs = (CardTableModRefBS*)bs;
+    _ct_bot = ctbs->byte_for_const(g1h->reserved_region().start());
+    if (G1ConcRSCountTraversals) {
+      _cur_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
+      _cum_card_count_histo = NEW_C_HEAP_ARRAY(unsigned, 256);
+      for (int i = 0; i < 256; i++) {
+        _cur_card_count_histo[i] = 0;
+        _cum_card_count_histo[i] = 0;
+      }
+    }
+  }
+  if (G1ConcRSLogCacheSize > 0) {
+    _def_use_cache = true;
+    _use_cache = true;
+    _hot_cache_size = (1 << G1ConcRSLogCacheSize);
+    _hot_cache = NEW_C_HEAP_ARRAY(jbyte*, _hot_cache_size);
+    _n_hot = 0;
+    _hot_cache_idx = 0;
+  }
+}
+
+ConcurrentG1Refine::~ConcurrentG1Refine() {
+  if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) {
+    assert(_card_counts != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned char, _card_counts);
+    assert(_cur_card_count_histo != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned, _cur_card_count_histo);
+    assert(_cum_card_count_histo != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(unsigned, _cum_card_count_histo);
+  }
+  if (G1ConcRSLogCacheSize > 0) {
+    assert(_hot_cache != NULL, "Logic");
+    FREE_C_HEAP_ARRAY(jbyte*, _hot_cache);
+  }
+}
+
+bool ConcurrentG1Refine::refine() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards();
+  clear_hot_cache();  // Any previous values in this are now invalid.
+  g1h->g1_rem_set()->concurrentRefinementPass(this);
+  _traversals++;
+  unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards();
+  unsigned cards_during = cards_after-cards_before;
+  // If this is the first traversal in the current enabling
+  // and we did some cards, or if the number of cards found is decreasing
+  // sufficiently quickly, then keep going.  Otherwise, sleep a while.
+  bool res =
+    (_first_traversal && cards_during > 0)
+    ||
+    (!_first_traversal && cards_during * 3 < _last_cards_during * 2);
+  _last_cards_during = cards_during;
+  _first_traversal = false;
+  return res;
+}
+
+void ConcurrentG1Refine::enable() {
+  MutexLocker x(G1ConcRefine_mon);
+  if (!_enabled) {
+    _enabled = true;
+    _first_traversal = true; _last_cards_during = 0;
+    G1ConcRefine_mon->notify_all();
+  }
+}
+
+unsigned ConcurrentG1Refine::disable() {
+  MutexLocker x(G1ConcRefine_mon);
+  if (_enabled) {
+    _enabled = false;
+    return _traversals;
+  } else {
+    return 0;
+  }
+}
+
+void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() {
+  G1ConcRefine_mon->lock();
+  while (!_enabled) {
+    G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag);
+  }
+  G1ConcRefine_mon->unlock();
+  _traversals = 0;
+};
+
+void ConcurrentG1Refine::set_pya_restart() {
+  // If we're using the log-based RS barrier, the above will cause
+  // in-progress traversals of completed log buffers to quit early; we will
+  // also abandon all other buffers.
+  if (G1RSBarrierUseQueue) {
+    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+    dcqs.abandon_logs();
+    if (_cg1rThread->do_traversal()) {
+      _pya = PYA_restart;
+    } else {
+      _cg1rThread->set_do_traversal(true);
+      // Reset the post-yield actions.
+      _pya = PYA_continue;
+      _last_pya = PYA_continue;
+    }
+  } else {
+    _pya = PYA_restart;
+  }
+}
+
+void ConcurrentG1Refine::set_pya_cancel() {
+  _pya = PYA_cancel;
+}
+
+PostYieldAction ConcurrentG1Refine::get_pya() {
+  if (_pya != PYA_continue) {
+    jint val = _pya;
+    while (true) {
+      jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val);
+      if (val_read == val) {
+        PostYieldAction res = (PostYieldAction)val;
+        assert(res != PYA_continue, "Only the refine thread should reset.");
+        _last_pya = res;
+        return res;
+      } else {
+        val = val_read;
+      }
+    }
+  }
+  // QQQ WELL WHAT DO WE RETURN HERE???
+  // make up something!
+  return PYA_continue;
+}
+
+PostYieldAction ConcurrentG1Refine::get_last_pya() {
+  PostYieldAction res = _last_pya;
+  _last_pya = PYA_continue;
+  return res;
+}
+
+bool ConcurrentG1Refine::do_traversal() {
+  return _cg1rThread->do_traversal();
+}
+
+int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) {
+  size_t card_num = (card_ptr - _ct_bot);
+  guarantee(0 <= card_num && card_num < _n_card_counts, "Bounds");
+  unsigned char cnt = _card_counts[card_num];
+  if (cnt < 255) _card_counts[card_num]++;
+  return cnt;
+  _total_travs++;
+}
+
+jbyte* ConcurrentG1Refine::cache_insert(jbyte* card_ptr) {
+  int count = add_card_count(card_ptr);
+  // Count previously unvisited cards.
+  if (count == 0) _total_cards++;
+  // We'll assume a traversal unless we store it in the cache.
+  if (count < G1ConcRSHotCardLimit) {
+    _total_travs++;
+    return card_ptr;
+  }
+  // Otherwise, it's hot.
+  jbyte* res = NULL;
+  MutexLockerEx x(HotCardCache_lock, Mutex::_no_safepoint_check_flag);
+  if (_n_hot == _hot_cache_size) {
+    _total_travs++;
+    res = _hot_cache[_hot_cache_idx];
+    _n_hot--;
+  }
+  // Now _n_hot < _hot_cache_size, and we can insert at _hot_cache_idx.
+  _hot_cache[_hot_cache_idx] = card_ptr;
+  _hot_cache_idx++;
+  if (_hot_cache_idx == _hot_cache_size) _hot_cache_idx = 0;
+  _n_hot++;
+  return res;
+}
+
+
+void ConcurrentG1Refine::clean_up_cache(int worker_i, G1RemSet* g1rs) {
+  assert(!use_cache(), "cache should be disabled");
+  int start_ind = _hot_cache_idx-1;
+  for (int i = 0; i < _n_hot; i++) {
+    int ind = start_ind - i;
+    if (ind < 0) ind = ind + _hot_cache_size;
+    jbyte* entry = _hot_cache[ind];
+    if (entry != NULL) {
+      g1rs->concurrentRefineOneCard(entry, worker_i);
+    }
+  }
+  _n_hot = 0;
+  _hot_cache_idx = 0;
+}
+
+void ConcurrentG1Refine::clear_and_record_card_counts() {
+  if (G1ConcRSLogCacheSize == 0 && !G1ConcRSCountTraversals) return;
+  _n_periods++;
+  if (G1ConcRSCountTraversals) {
+    for (size_t i = 0; i < _n_card_counts; i++) {
+      unsigned char bucket = _card_counts[i];
+      _cur_card_count_histo[bucket]++;
+      _card_counts[i] = 0;
+    }
+    gclog_or_tty->print_cr("Card counts:");
+    for (int i = 0; i < 256; i++) {
+      if (_cur_card_count_histo[i] > 0) {
+        gclog_or_tty->print_cr("  %3d: %9d", i, _cur_card_count_histo[i]);
+        _cum_card_count_histo[i] += _cur_card_count_histo[i];
+        _cur_card_count_histo[i] = 0;
+      }
+    }
+  } else {
+    assert(G1ConcRSLogCacheSize > 0, "Logic");
+    Copy::fill_to_words((HeapWord*)(&_card_counts[0]),
+                        _n_card_counts / HeapWordSize);
+  }
+}
+
+void
+ConcurrentG1Refine::
+print_card_count_histo_range(unsigned* histo, int from, int to,
+                             float& cum_card_pct,
+                             float& cum_travs_pct) {
+  unsigned cards = 0;
+  unsigned travs = 0;
+  guarantee(to <= 256, "Precondition");
+  for (int i = from; i < to-1; i++) {
+    cards += histo[i];
+    travs += histo[i] * i;
+  }
+  if (to == 256) {
+    unsigned histo_card_sum = 0;
+    unsigned histo_trav_sum = 0;
+    for (int i = 1; i < 255; i++) {
+      histo_trav_sum += histo[i] * i;
+    }
+    cards += histo[255];
+    // correct traversals for the last one.
+    unsigned travs_255 = (unsigned) (_total_travs - histo_trav_sum);
+    travs += travs_255;
+
+  } else {
+    cards += histo[to-1];
+    travs += histo[to-1] * (to-1);
+  }
+  float fperiods = (float)_n_periods;
+  float f_tot_cards = (float)_total_cards/fperiods;
+  float f_tot_travs = (float)_total_travs/fperiods;
+  if (cards > 0) {
+    float fcards = (float)cards/fperiods;
+    float ftravs = (float)travs/fperiods;
+    if (to == 256) {
+      gclog_or_tty->print(" %4d-       %10.2f%10.2f", from, fcards, ftravs);
+    } else {
+      gclog_or_tty->print(" %4d-%4d   %10.2f%10.2f", from, to-1, fcards, ftravs);
+    }
+    float pct_cards = fcards*100.0/f_tot_cards;
+    cum_card_pct += pct_cards;
+    float pct_travs = ftravs*100.0/f_tot_travs;
+    cum_travs_pct += pct_travs;
+    gclog_or_tty->print_cr("%10.2f%10.2f%10.2f%10.2f",
+                  pct_cards, cum_card_pct,
+                  pct_travs, cum_travs_pct);
+  }
+}
+
+void ConcurrentG1Refine::print_final_card_counts() {
+  if (!G1ConcRSCountTraversals) return;
+
+  gclog_or_tty->print_cr("Did %d total traversals of %d distinct cards.",
+                _total_travs, _total_cards);
+  float fperiods = (float)_n_periods;
+  gclog_or_tty->print_cr("  This is an average of %8.2f traversals, %8.2f cards, "
+                "per collection.", (float)_total_travs/fperiods,
+                (float)_total_cards/fperiods);
+  gclog_or_tty->print_cr("  This is an average of %8.2f traversals/distinct "
+                "dirty card.\n",
+                _total_cards > 0 ?
+                (float)_total_travs/(float)_total_cards : 0.0);
+
+
+  gclog_or_tty->print_cr("Histogram:\n\n%10s   %10s%10s%10s%10s%10s%10s",
+                "range", "# cards", "# travs", "% cards", "(cum)",
+                "% travs", "(cum)");
+  gclog_or_tty->print_cr("------------------------------------------------------------"
+                "-------------");
+  float cum_cards_pct = 0.0;
+  float cum_travs_pct = 0.0;
+  for (int i = 1; i < 10; i++) {
+    print_card_count_histo_range(_cum_card_count_histo, i, i+1,
+                                 cum_cards_pct, cum_travs_pct);
+  }
+  for (int i = 10; i < 100; i += 10) {
+    print_card_count_histo_range(_cum_card_count_histo, i, i+10,
+                                 cum_cards_pct, cum_travs_pct);
+  }
+  print_card_count_histo_range(_cum_card_count_histo, 100, 150,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 150, 200,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 150, 255,
+                               cum_cards_pct, cum_travs_pct);
+  print_card_count_histo_range(_cum_card_count_histo, 255, 256,
+                               cum_cards_pct, cum_travs_pct);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp
@ -0,0 +1,132 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Forward decl
+class ConcurrentG1RefineThread;
+class G1RemSet;
+
+// What to do after a yield:
+enum PostYieldAction {
+  PYA_continue,  // Continue the traversal
+  PYA_restart,   // Restart
+  PYA_cancel     // It's been completed by somebody else: cancel.
+};
+
+class ConcurrentG1Refine {
+  ConcurrentG1RefineThread* _cg1rThread;
+
+  volatile jint _pya;
+  PostYieldAction _last_pya;
+
+  static bool _enabled;  // Protected by G1ConcRefine_mon.
+  unsigned _traversals;
+
+  // Number of cards processed during last refinement traversal.
+  unsigned _first_traversal;
+  unsigned _last_cards_during;
+
+  // The cache for card refinement.
+  bool     _use_cache;
+  bool     _def_use_cache;
+  size_t _n_periods;
+  size_t _total_cards;
+  size_t _total_travs;
+
+  unsigned char*  _card_counts;
+  unsigned _n_card_counts;
+  const jbyte* _ct_bot;
+  unsigned* _cur_card_count_histo;
+  unsigned* _cum_card_count_histo;
+  jbyte**  _hot_cache;
+  int      _hot_cache_size;
+  int      _n_hot;
+  int      _hot_cache_idx;
+
+  // Returns the count of this card after incrementing it.
+  int add_card_count(jbyte* card_ptr);
+
+  void print_card_count_histo_range(unsigned* histo, int from, int to,
+                                    float& cum_card_pct,
+                                    float& cum_travs_pct);
+ public:
+  ConcurrentG1Refine();
+  ~ConcurrentG1Refine();
+
+  void init(); // Accomplish some initialization that has to wait.
+
+  // Enabled Conc refinement, waking up thread if necessary.
+  void enable();
+
+  // Returns the number of traversals performed since this refiner was enabled.
+  unsigned disable();
+
+  // Requires G1ConcRefine_mon to be held.
+  bool enabled() { return _enabled; }
+
+  // Returns only when G1 concurrent refinement has been enabled.
+  void wait_for_ConcurrentG1Refine_enabled();
+
+  // Do one concurrent refinement pass over the card table.  Returns "true"
+  // if heuristics determine that another pass should be done immediately.
+  bool refine();
+
+  // Indicate that an in-progress refinement pass should start over.
+  void set_pya_restart();
+  // Indicate that an in-progress refinement pass should quit.
+  void set_pya_cancel();
+
+  // Get the appropriate post-yield action.  Also sets last_pya.
+  PostYieldAction get_pya();
+
+  // The last PYA read by "get_pya".
+  PostYieldAction get_last_pya();
+
+  bool do_traversal();
+
+  ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; }
+
+  // If this is the first entry for the slot, writes into the cache and
+  // returns NULL.  If it causes an eviction, returns the evicted pointer.
+  // Otherwise, its a cache hit, and returns NULL.
+  jbyte* cache_insert(jbyte* card_ptr);
+
+  // Process the cached entries.
+  void clean_up_cache(int worker_i, G1RemSet* g1rs);
+
+  // Discard entries in the hot cache.
+  void clear_hot_cache() {
+    _hot_cache_idx = 0; _n_hot = 0;
+  }
+
+  bool hot_cache_is_empty() { return _n_hot == 0; }
+
+  bool use_cache() { return _use_cache; }
+  void set_use_cache(bool b) {
+    if (b) _use_cache = _def_use_cache;
+    else   _use_cache = false;
+  }
+
+  void clear_and_record_card_counts();
+  void print_final_card_counts();
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp
@ -0,0 +1,246 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentG1RefineThread.cpp.incl"
+
+// ======= Concurrent Mark Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+ConcurrentG1RefineThread::
+ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) :
+  ConcurrentGCThread(),
+  _cg1r(cg1r),
+  _started(false),
+  _in_progress(false),
+  _do_traversal(false),
+  _vtime_accum(0.0),
+  _co_tracker(G1CRGroup),
+  _interval_ms(5.0)
+{
+  create_and_start();
+}
+
+const long timeout = 200; // ms.
+
+void ConcurrentG1RefineThread::traversalBasedRefinement() {
+  _cg1r->wait_for_ConcurrentG1Refine_enabled();
+  MutexLocker x(G1ConcRefine_mon);
+  while (_cg1r->enabled()) {
+    MutexUnlocker ux(G1ConcRefine_mon);
+    ResourceMark rm;
+    HandleMark   hm;
+
+    if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine starting pass");
+    _sts.join();
+    bool no_sleep = _cg1r->refine();
+    _sts.leave();
+    if (!no_sleep) {
+      MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+      // We do this only for the timeout; we don't expect this to be signalled.
+      CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout);
+    }
+  }
+}
+
+void ConcurrentG1RefineThread::queueBasedRefinement() {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+  // Wait for completed log buffers to exist.
+  {
+    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+    while (!_do_traversal && !dcqs.process_completed_buffers() &&
+           !_should_terminate) {
+      DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
+    }
+  }
+
+  if (_should_terminate) {
+    return;
+  }
+
+  // Now we take them off (this doesn't hold locks while it applies
+  // closures.)  (If we did a full collection, then we'll do a full
+  // traversal.
+  _sts.join();
+  if (_do_traversal) {
+    (void)_cg1r->refine();
+    switch (_cg1r->get_last_pya()) {
+    case PYA_cancel: case PYA_continue:
+      // Continue was caught and handled inside "refine".  If it's still
+      // "continue" when we get here, we're done.
+      _do_traversal = false;
+      break;
+    case PYA_restart:
+      assert(_do_traversal, "Because of Full GC.");
+      break;
+    }
+  } else {
+    int n_logs = 0;
+    int lower_limit = 0;
+    double start_vtime_sec; // only used when G1SmoothConcRefine is on
+    int prev_buffer_num; // only used when G1SmoothConcRefine is on
+
+    if (G1SmoothConcRefine) {
+      lower_limit = 0;
+      start_vtime_sec = os::elapsedVTime();
+      prev_buffer_num = (int) dcqs.completed_buffers_num();
+    } else {
+      lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now.
+    }
+    while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) {
+      double end_vtime_sec;
+      double elapsed_vtime_sec;
+      int elapsed_vtime_ms;
+      int curr_buffer_num;
+
+      if (G1SmoothConcRefine) {
+        end_vtime_sec = os::elapsedVTime();
+        elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
+        elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
+        curr_buffer_num = (int) dcqs.completed_buffers_num();
+
+        if (curr_buffer_num > prev_buffer_num ||
+            curr_buffer_num > DCQBarrierProcessCompletedThreshold) {
+          decreaseInterval(elapsed_vtime_ms);
+        } else if (curr_buffer_num < prev_buffer_num) {
+          increaseInterval(elapsed_vtime_ms);
+        }
+      }
+
+      sample_young_list_rs_lengths();
+      _co_tracker.update(false);
+
+      if (G1SmoothConcRefine) {
+        start_vtime_sec = os::elapsedVTime();
+        prev_buffer_num = curr_buffer_num;
+
+        _sts.leave();
+        os::sleep(Thread::current(), (jlong) _interval_ms, false);
+        _sts.join();
+      }
+
+      n_logs++;
+    }
+    // Make sure we harvest the PYA, if any.
+    (void)_cg1r->get_pya();
+  }
+  _sts.leave();
+}
+
+void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1p = g1h->g1_policy();
+  if (g1p->adaptive_young_list_length()) {
+    int regions_visited = 0;
+
+    g1h->young_list_rs_length_sampling_init();
+    while (g1h->young_list_rs_length_sampling_more()) {
+      g1h->young_list_rs_length_sampling_next();
+      ++regions_visited;
+
+      // we try to yield every time we visit 10 regions
+      if (regions_visited == 10) {
+        if (_sts.should_yield()) {
+          _sts.yield("G1 refine");
+          // we just abandon the iteration
+          break;
+        }
+        regions_visited = 0;
+      }
+    }
+
+    g1p->check_prediction_validity();
+  }
+}
+
+void ConcurrentG1RefineThread::run() {
+  initialize_in_thread();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+
+  _co_tracker.enable();
+  _co_tracker.start();
+
+  while (!_should_terminate) {
+    // wait until started is set.
+    if (G1RSBarrierUseQueue) {
+      queueBasedRefinement();
+    } else {
+      traversalBasedRefinement();
+    }
+    _sts.join();
+    _co_tracker.update();
+    _sts.leave();
+    if (os::supports_vtime()) {
+      _vtime_accum = (os::elapsedVTime() - _vtime_start);
+    } else {
+      _vtime_accum = 0.0;
+    }
+  }
+  _sts.join();
+  _co_tracker.update(true);
+  _sts.leave();
+  assert(_should_terminate, "just checking");
+
+  terminate();
+}
+
+
+void ConcurrentG1RefineThread::yield() {
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield");
+  _sts.yield("G1 refine");
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-yield-end");
+}
+
+void ConcurrentG1RefineThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  {
+    MutexLockerEx mu(Terminator_lock);
+    _should_terminate = true;
+  }
+
+  {
+    MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
+    DirtyCardQ_CBL_mon->notify_all();
+  }
+
+  {
+    MutexLockerEx mu(Terminator_lock);
+    while (!_has_terminated) {
+      Terminator_lock->wait();
+    }
+  }
+  if (TraceG1Refine) gclog_or_tty->print_cr("G1-Refine-stop");
+}
+
+void ConcurrentG1RefineThread::print() {
+  gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+void ConcurrentG1RefineThread::set_do_traversal(bool b) {
+  _do_traversal = b;
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp
@ -0,0 +1,104 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Forward Decl.
+class ConcurrentG1Refine;
+
+// The G1 Concurrent Refinement Thread (could be several in the future).
+
+class ConcurrentG1RefineThread: public ConcurrentGCThread {
+  friend class VMStructs;
+  friend class G1CollectedHeap;
+
+  double _vtime_start;  // Initial virtual time.
+  double _vtime_accum;  // Initial virtual time.
+
+ public:
+  virtual void run();
+
+ private:
+  ConcurrentG1Refine*              _cg1r;
+  bool                             _started;
+  bool                             _in_progress;
+  volatile bool                    _restart;
+
+  COTracker                        _co_tracker;
+  double                           _interval_ms;
+
+  bool                             _do_traversal;
+
+  void decreaseInterval(int processing_time_ms) {
+    double min_interval_ms = (double) processing_time_ms;
+    _interval_ms = 0.8 * _interval_ms;
+    if (_interval_ms < min_interval_ms)
+      _interval_ms = min_interval_ms;
+  }
+  void increaseInterval(int processing_time_ms) {
+    double max_interval_ms = 9.0 * (double) processing_time_ms;
+    _interval_ms = 1.1 * _interval_ms;
+    if (max_interval_ms > 0 && _interval_ms > max_interval_ms)
+      _interval_ms = max_interval_ms;
+  }
+
+  void sleepBeforeNextCycle();
+
+  void traversalBasedRefinement();
+
+  void queueBasedRefinement();
+
+  // For use by G1CollectedHeap, which is a friend.
+  static SuspendibleThreadSet* sts() { return &_sts; }
+
+ public:
+  // Constructor
+  ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r);
+
+  // Printing
+  void print();
+
+  // Total virtual time so far.
+  double vtime_accum() { return _vtime_accum; }
+
+  ConcurrentG1Refine* cg1r()                     { return _cg1r;     }
+
+
+  void            set_started()                  { _started = true;   }
+  void            clear_started()                { _started = false;  }
+  bool            started()                      { return _started;   }
+
+  void            set_in_progress()              { _in_progress = true;   }
+  void            clear_in_progress()            { _in_progress = false;  }
+  bool            in_progress()                  { return _in_progress;   }
+
+  void            set_do_traversal(bool b);
+  bool            do_traversal() { return _do_traversal; }
+
+  void            sample_young_list_rs_lengths();
+
+  // Yield for GC
+  void            yield();
+
+  // shutdown
+  static void stop();
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.cpp
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMark.hpp
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp
@ -0,0 +1,336 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentMarkThread.cpp.incl"
+
+// ======= Concurrent Mark Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+SurrogateLockerThread*
+     ConcurrentMarkThread::_slt = NULL;
+
+ConcurrentMarkThread::ConcurrentMarkThread(ConcurrentMark* cm) :
+  ConcurrentGCThread(),
+  _cm(cm),
+  _started(false),
+  _in_progress(false),
+  _vtime_accum(0.0),
+  _vtime_mark_accum(0.0),
+  _vtime_count_accum(0.0)
+{
+  create_and_start();
+}
+
+class CMCheckpointRootsInitialClosure: public VoidClosure {
+
+  ConcurrentMark* _cm;
+public:
+
+  CMCheckpointRootsInitialClosure(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->checkpointRootsInitial();
+  }
+};
+
+class CMCheckpointRootsFinalClosure: public VoidClosure {
+
+  ConcurrentMark* _cm;
+public:
+
+  CMCheckpointRootsFinalClosure(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->checkpointRootsFinal(false); // !clear_all_soft_refs
+  }
+};
+
+class CMCleanUp: public VoidClosure {
+  ConcurrentMark* _cm;
+public:
+
+  CMCleanUp(ConcurrentMark* cm) :
+    _cm(cm) {}
+
+  void do_void(){
+    _cm->cleanup();
+  }
+};
+
+
+
+void ConcurrentMarkThread::run() {
+  initialize_in_thread();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  G1CollectorPolicy* g1_policy = g1->g1_policy();
+  G1MMUTracker *mmu_tracker = g1_policy->mmu_tracker();
+  Thread *current_thread = Thread::current();
+
+  while (!_should_terminate) {
+    // wait until started is set.
+    sleepBeforeNextCycle();
+    {
+      ResourceMark rm;
+      HandleMark   hm;
+      double cycle_start = os::elapsedVTime();
+      double mark_start_sec = os::elapsedTime();
+      char verbose_str[128];
+
+      if (PrintGC) {
+        gclog_or_tty->date_stamp(PrintGCDateStamps);
+        gclog_or_tty->stamp(PrintGCTimeStamps);
+        tty->print_cr("[GC concurrent-mark-start]");
+      }
+
+      if (!g1_policy->in_young_gc_mode()) {
+        // this ensures the flag is not set if we bail out of the marking
+        // cycle; normally the flag is cleared immediately after cleanup
+        g1->set_marking_complete();
+
+        if (g1_policy->adaptive_young_list_length()) {
+          double now = os::elapsedTime();
+          double init_prediction_ms = g1_policy->predict_init_time_ms();
+          jlong sleep_time_ms = mmu_tracker->when_ms(now, init_prediction_ms);
+          os::sleep(current_thread, sleep_time_ms, false);
+        }
+
+        // We don't have to skip here if we've been asked to restart, because
+        // in the worst case we just enqueue a new VM operation to start a
+        // marking.  Note that the init operation resets has_aborted()
+        CMCheckpointRootsInitialClosure init_cl(_cm);
+        strcpy(verbose_str, "GC initial-mark");
+        VM_CGC_Operation op(&init_cl, verbose_str);
+        VMThread::execute(&op);
+      }
+
+      int iter = 0;
+      do {
+        iter++;
+        if (!cm()->has_aborted()) {
+          _cm->markFromRoots();
+        } else {
+          if (TraceConcurrentMark)
+            gclog_or_tty->print_cr("CM-skip-mark-from-roots");
+        }
+
+        double mark_end_time = os::elapsedVTime();
+        double mark_end_sec = os::elapsedTime();
+        _vtime_mark_accum += (mark_end_time - cycle_start);
+        if (!cm()->has_aborted()) {
+          if (g1_policy->adaptive_young_list_length()) {
+            double now = os::elapsedTime();
+            double remark_prediction_ms = g1_policy->predict_remark_time_ms();
+            jlong sleep_time_ms = mmu_tracker->when_ms(now, remark_prediction_ms);
+            os::sleep(current_thread, sleep_time_ms, false);
+          }
+
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-mark-end, %1.7lf sec]",
+                                      mark_end_sec - mark_start_sec);
+          }
+
+          CMCheckpointRootsFinalClosure final_cl(_cm);
+          sprintf(verbose_str, "GC remark");
+          VM_CGC_Operation op(&final_cl, verbose_str);
+          VMThread::execute(&op);
+        } else {
+          if (TraceConcurrentMark)
+            gclog_or_tty->print_cr("CM-skip-remark");
+        }
+        if (cm()->restart_for_overflow() &&
+            G1TraceMarkStackOverflow) {
+          gclog_or_tty->print_cr("Restarting conc marking because of MS overflow "
+                                 "in remark (restart #%d).", iter);
+        }
+
+        if (cm()->restart_for_overflow()) {
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-mark-restart-for-overflow]");
+          }
+        }
+      } while (cm()->restart_for_overflow());
+      double counting_start_time = os::elapsedVTime();
+
+      // YSR: These look dubious (i.e. redundant) !!! FIX ME
+      slt()->manipulatePLL(SurrogateLockerThread::acquirePLL);
+      slt()->manipulatePLL(SurrogateLockerThread::releaseAndNotifyPLL);
+
+      if (!cm()->has_aborted()) {
+        double count_start_sec = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-count-start]");
+        }
+
+        _sts.join();
+        _cm->calcDesiredRegions();
+        _sts.leave();
+
+        if (!cm()->has_aborted()) {
+          double count_end_sec = os::elapsedTime();
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-count-end, %1.7lf]",
+                                   count_end_sec - count_start_sec);
+          }
+        }
+      } else {
+        if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-end-game");
+      }
+      double end_time = os::elapsedVTime();
+      _vtime_count_accum += (end_time - counting_start_time);
+      // Update the total virtual time before doing this, since it will try
+      // to measure it to get the vtime for this marking.  We purposely
+      // neglect the presumably-short "completeCleanup" phase here.
+      _vtime_accum = (end_time - _vtime_start);
+      if (!cm()->has_aborted()) {
+        if (g1_policy->adaptive_young_list_length()) {
+          double now = os::elapsedTime();
+          double cleanup_prediction_ms = g1_policy->predict_cleanup_time_ms();
+          jlong sleep_time_ms = mmu_tracker->when_ms(now, cleanup_prediction_ms);
+          os::sleep(current_thread, sleep_time_ms, false);
+        }
+
+        CMCleanUp cl_cl(_cm);
+        sprintf(verbose_str, "GC cleanup");
+        VM_CGC_Operation op(&cl_cl, verbose_str);
+        VMThread::execute(&op);
+      } else {
+        if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-skip-cleanup");
+        G1CollectedHeap::heap()->set_marking_complete();
+      }
+
+      if (!cm()->has_aborted()) {
+        double cleanup_start_sec = os::elapsedTime();
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-cleanup-start]");
+        }
+
+        // Now do the remainder of the cleanup operation.
+        _sts.join();
+        _cm->completeCleanup();
+        if (!cm()->has_aborted()) {
+          g1_policy->record_concurrent_mark_cleanup_completed();
+
+          double cleanup_end_sec = os::elapsedTime();
+          if (PrintGC) {
+            gclog_or_tty->date_stamp(PrintGCDateStamps);
+            gclog_or_tty->stamp(PrintGCTimeStamps);
+            gclog_or_tty->print_cr("[GC concurrent-cleanup-end, %1.7lf]",
+                                   cleanup_end_sec - cleanup_start_sec);
+          }
+        }
+        _sts.leave();
+      }
+      // We're done: no more unclean regions coming.
+      G1CollectedHeap::heap()->set_unclean_regions_coming(false);
+
+      if (cm()->has_aborted()) {
+        if (PrintGC) {
+          gclog_or_tty->date_stamp(PrintGCDateStamps);
+          gclog_or_tty->stamp(PrintGCTimeStamps);
+          gclog_or_tty->print_cr("[GC concurrent-mark-abort]");
+        }
+      }
+
+      _sts.join();
+      _cm->disable_co_trackers();
+      _sts.leave();
+
+      // we now want to allow clearing of the marking bitmap to be
+      // suspended by a collection pause.
+      _sts.join();
+      _cm->clearNextBitmap();
+      _sts.leave();
+    }
+  }
+  assert(_should_terminate, "just checking");
+
+  terminate();
+}
+
+
+void ConcurrentMarkThread::yield() {
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield");
+  _sts.yield("Concurrent Mark");
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-yield-end");
+}
+
+void ConcurrentMarkThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  MutexLockerEx mu(Terminator_lock);
+  _should_terminate = true;
+  while (!_has_terminated) {
+    Terminator_lock->wait();
+  }
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-stop");
+}
+
+void ConcurrentMarkThread::print() {
+  gclog_or_tty->print("\"Concurrent Mark GC Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+void ConcurrentMarkThread::sleepBeforeNextCycle() {
+  clear_in_progress();
+  // We join here because we don't want to do the "shouldConcurrentMark()"
+  // below while the world is otherwise stopped.
+  MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag);
+  while (!started()) {
+    if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-sleeping");
+    CGC_lock->wait(Mutex::_no_safepoint_check_flag);
+  }
+  set_in_progress();
+  clear_started();
+  if (TraceConcurrentMark) gclog_or_tty->print_cr("CM-starting");
+
+  return;
+}
+
+// Note: this method, although exported by the ConcurrentMarkSweepThread,
+// which is a non-JavaThread, can only be called by a JavaThread.
+// Currently this is done at vm creation time (post-vm-init) by the
+// main/Primordial (Java)Thread.
+// XXX Consider changing this in the future to allow the CMS thread
+// itself to create this thread?
+void ConcurrentMarkThread::makeSurrogateLockerThread(TRAPS) {
+  assert(_slt == NULL, "SLT already created");
+  _slt = SurrogateLockerThread::make(THREAD);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp
@ -0,0 +1,84 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The Concurrent Mark GC Thread (could be several in the future).
+// This is copied from the Concurrent Mark Sweep GC Thread
+// Still under construction.
+
+class ConcurrentMark;
+
+class ConcurrentMarkThread: public ConcurrentGCThread {
+  friend class VMStructs;
+
+  double _vtime_start;  // Initial virtual time.
+  double _vtime_accum;  // Accumulated virtual time.
+
+  double _vtime_mark_accum;
+  double _vtime_count_accum;
+
+ public:
+  virtual void run();
+
+ private:
+  ConcurrentMark*                  _cm;
+  bool                             _started;
+  bool                             _in_progress;
+
+  void sleepBeforeNextCycle();
+
+  static SurrogateLockerThread*         _slt;
+
+ public:
+  // Constructor
+  ConcurrentMarkThread(ConcurrentMark* cm);
+
+  static void makeSurrogateLockerThread(TRAPS);
+  static SurrogateLockerThread* slt() { return _slt; }
+
+  // Printing
+  void print();
+
+  // Total virtual time so far.
+  double vtime_accum();
+  // Marking virtual time so far
+  double vtime_mark_accum();
+  // Counting virtual time so far.
+  double vtime_count_accum() { return _vtime_count_accum; }
+
+  ConcurrentMark* cm()                           { return _cm;     }
+
+  void            set_started()                  { _started = true;   }
+  void            clear_started()                { _started = false;  }
+  bool            started()                      { return _started;   }
+
+  void            set_in_progress()              { _in_progress = true;   }
+  void            clear_in_progress()            { _in_progress = false;  }
+  bool            in_progress()                  { return _in_progress;   }
+
+  // Yield for GC
+  void            yield();
+
+  // shutdown
+  static void stop();
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentMarkThread.inline.hpp
@ -0,0 +1,33 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+  // Total virtual time so far.
+inline double ConcurrentMarkThread::vtime_accum() {
+  return _vtime_accum + _cm->all_task_accum_vtime();
+}
+
+// Marking virtual time so far
+inline double ConcurrentMarkThread::vtime_mark_accum() {
+  return _vtime_mark_accum + _cm->all_task_accum_vtime();
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.cpp
@ -0,0 +1,191 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_concurrentZFThread.cpp.incl"
+
+// ======= Concurrent Zero-Fill Thread ========
+
+// The CM thread is created when the G1 garbage collector is used
+
+int ConcurrentZFThread::_region_allocs = 0;
+int ConcurrentZFThread::_sync_zfs = 0;
+int ConcurrentZFThread::_zf_waits = 0;
+int ConcurrentZFThread::_regions_filled = 0;
+
+ConcurrentZFThread::ConcurrentZFThread() :
+  ConcurrentGCThread(),
+  _co_tracker(G1ZFGroup)
+{
+  create_and_start();
+}
+
+void ConcurrentZFThread::wait_for_ZF_completed(HeapRegion* hr) {
+  assert(ZF_mon->owned_by_self(), "Precondition.");
+  note_zf_wait();
+  while (hr->zero_fill_state() == HeapRegion::ZeroFilling) {
+    ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+  }
+}
+
+void ConcurrentZFThread::processHeapRegion(HeapRegion* hr) {
+  assert(!Universe::heap()->is_gc_active(),
+         "This should not happen during GC.");
+  assert(hr != NULL, "Precondition");
+  // These are unlocked reads, but if this test is successful, then no
+  // other thread will attempt this zero filling.  Only a GC thread can
+  // modify the ZF state of a region whose state is zero-filling, and this
+  // should only happen while the ZF thread is locking out GC.
+  if (hr->zero_fill_state() == HeapRegion::ZeroFilling
+      && hr->zero_filler() == Thread::current()) {
+    assert(hr->top() == hr->bottom(), "better be empty!");
+    assert(!hr->isHumongous(), "Only free regions on unclean list.");
+    Copy::fill_to_words(hr->bottom(), hr->capacity()/HeapWordSize);
+    note_region_filled();
+  }
+}
+
+void ConcurrentZFThread::run() {
+  initialize_in_thread();
+  Thread* thr_self = Thread::current();
+  _vtime_start = os::elapsedVTime();
+  wait_for_universe_init();
+  _co_tracker.enable();
+  _co_tracker.start();
+
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  _sts.join();
+  while (!_should_terminate) {
+    _sts.leave();
+
+    {
+      MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+
+      // This local variable will hold a region being zero-filled.  This
+      // region will neither be on the unclean or zero-filled lists, and
+      // will not be available for allocation; thus, we might have an
+      // allocation fail, causing a full GC, because of this, but this is a
+      // price we will pay.  (In future, we might want to make the fact
+      // that there's a region being zero-filled apparent to the G1 heap,
+      // which could then wait for it in this extreme case...)
+      HeapRegion* to_fill;
+
+      while (!g1->should_zf()
+             || (to_fill = g1->pop_unclean_region_list_locked()) == NULL)
+        ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+      while (to_fill->zero_fill_state() == HeapRegion::ZeroFilling)
+        ZF_mon->wait(Mutex::_no_safepoint_check_flag);
+
+      // So now to_fill is non-NULL and is not ZeroFilling.  It might be
+      // Allocated or ZeroFilled.  (The latter could happen if this thread
+      // starts the zero-filling of a region, but a GC intervenes and
+      // pushes new regions needing on the front of the filling on the
+      // front of the list.)
+
+      switch (to_fill->zero_fill_state()) {
+      case HeapRegion::Allocated:
+        to_fill = NULL;
+        break;
+
+      case HeapRegion::NotZeroFilled:
+        to_fill->set_zero_fill_in_progress(thr_self);
+
+        ZF_mon->unlock();
+        _sts.join();
+        processHeapRegion(to_fill);
+        _sts.leave();
+        ZF_mon->lock_without_safepoint_check();
+
+        if (to_fill->zero_fill_state() == HeapRegion::ZeroFilling
+            && to_fill->zero_filler() == thr_self) {
+          to_fill->set_zero_fill_complete();
+          (void)g1->put_free_region_on_list_locked(to_fill);
+        }
+        break;
+
+      case HeapRegion::ZeroFilled:
+        (void)g1->put_free_region_on_list_locked(to_fill);
+        break;
+
+      case HeapRegion::ZeroFilling:
+        ShouldNotReachHere();
+        break;
+      }
+    }
+    _vtime_accum = (os::elapsedVTime() - _vtime_start);
+    _sts.join();
+
+    _co_tracker.update();
+  }
+  _co_tracker.update(false);
+  _sts.leave();
+
+  assert(_should_terminate, "just checking");
+  terminate();
+}
+
+bool ConcurrentZFThread::offer_yield() {
+  if (_sts.should_yield()) {
+    _sts.yield("Concurrent ZF");
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void ConcurrentZFThread::stop() {
+  // it is ok to take late safepoints here, if needed
+  MutexLockerEx mu(Terminator_lock);
+  _should_terminate = true;
+  while (!_has_terminated) {
+    Terminator_lock->wait();
+  }
+}
+
+void ConcurrentZFThread::print() {
+  gclog_or_tty->print("\"Concurrent ZF Thread\" ");
+  Thread::print();
+  gclog_or_tty->cr();
+}
+
+
+double ConcurrentZFThread::_vtime_accum;
+
+void ConcurrentZFThread::print_summary_info() {
+  gclog_or_tty->print("\nConcurrent Zero-Filling:\n");
+  gclog_or_tty->print("  Filled %d regions, used %5.2fs.\n",
+                      _regions_filled,
+                      vtime_accum());
+  gclog_or_tty->print("  Of %d region allocs, %d (%5.2f%%) required sync ZF,\n",
+                      _region_allocs, _sync_zfs,
+                      (_region_allocs > 0 ?
+                       (float)_sync_zfs/(float)_region_allocs*100.0 :
+                       0.0));
+  gclog_or_tty->print("     and %d (%5.2f%%) required a ZF wait.\n",
+                      _zf_waits,
+                      (_region_allocs > 0 ?
+                       (float)_zf_waits/(float)_region_allocs*100.0 :
+                       0.0));
+
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp
@ -0,0 +1,85 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The Concurrent ZF Thread.  Performs concurrent zero-filling.
+
+class ConcurrentZFThread: public ConcurrentGCThread {
+  friend class VMStructs;
+  friend class ZeroFillRegionClosure;
+
+ private:
+
+  // Zero fill the heap region.
+  void processHeapRegion(HeapRegion* r);
+
+  // Stats
+  //   Allocation (protected by heap lock).
+  static int _region_allocs;  // Number of regions allocated
+  static int _sync_zfs;       //   Synchronous zero-fills +
+  static int _zf_waits;      //   Wait for conc zero-fill completion.
+
+  // Number of regions CFZ thread fills.
+  static int _regions_filled;
+
+  COTracker _co_tracker;
+
+  double _vtime_start;  // Initial virtual time.
+
+  // These are static because the "print_summary_info" method is, and
+  // it currently assumes there is only one ZF thread.  We'll change when
+  // we need to.
+  static double _vtime_accum;  // Initial virtual time.
+  static double vtime_accum() { return _vtime_accum; }
+
+  // Offer yield for GC.  Returns true if yield occurred.
+  bool offer_yield();
+
+ public:
+  // Constructor
+  ConcurrentZFThread();
+
+  // Main loop.
+  virtual void run();
+
+  // Printing
+  void print();
+
+  // Waits until "r" has been zero-filled.  Requires caller to hold the
+  // ZF_mon.
+  static void wait_for_ZF_completed(HeapRegion* r);
+
+  // Get or clear the current unclean region.  Should be done
+  // while holding the ZF_needed_mon lock.
+
+  // shutdown
+  static void stop();
+
+  // Stats
+  static void note_region_alloc() {_region_allocs++; }
+  static void note_sync_zfs() { _sync_zfs++; }
+  static void note_zf_wait() { _zf_waits++; }
+  static void note_region_filled() { _regions_filled++; }
+
+  static void print_summary_info();
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp
@ -0,0 +1,307 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_dirtyCardQueue.cpp.incl"
+
+bool DirtyCardQueue::apply_closure(CardTableEntryClosure* cl,
+                                   bool consume,
+                                   size_t worker_i) {
+  bool res = true;
+  if (_buf != NULL) {
+    res = apply_closure_to_buffer(cl, _buf, _index, _sz,
+                                  consume,
+                                  (int) worker_i);
+    if (res && consume) _index = _sz;
+  }
+  return res;
+}
+
+bool DirtyCardQueue::apply_closure_to_buffer(CardTableEntryClosure* cl,
+                                             void** buf,
+                                             size_t index, size_t sz,
+                                             bool consume,
+                                             int worker_i) {
+  if (cl == NULL) return true;
+  for (size_t i = index; i < sz; i += oopSize) {
+    int ind = byte_index_to_index((int)i);
+    jbyte* card_ptr = (jbyte*)buf[ind];
+    if (card_ptr != NULL) {
+      // Set the entry to null, so we don't do it again (via the test
+      // above) if we reconsider this buffer.
+      if (consume) buf[ind] = NULL;
+      if (!cl->do_card_ptr(card_ptr, worker_i)) return false;
+    }
+  }
+  return true;
+}
+
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+DirtyCardQueueSet::DirtyCardQueueSet() :
+  PtrQueueSet(true /*notify_when_complete*/),
+  _closure(NULL),
+  _shared_dirty_card_queue(this, true /*perm*/),
+  _free_ids(NULL),
+  _processed_buffers_mut(0), _processed_buffers_rs_thread(0)
+{
+  _all_active = true;
+}
+
+size_t DirtyCardQueueSet::num_par_ids() {
+  return MAX2(ParallelGCThreads, (size_t)2);
+}
+
+
+void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                                   int max_completed_queue,
+                                   Mutex* lock) {
+  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+  set_buffer_size(DCQBarrierQueueBufferSize);
+  set_process_completed_threshold(DCQBarrierProcessCompletedThreshold);
+
+  _shared_dirty_card_queue.set_lock(lock);
+  _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon);
+  bool b = _free_ids->claim_perm_id(0);
+  guarantee(b, "Must reserve id zero for concurrent refinement thread.");
+}
+
+void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) {
+  t->dirty_card_queue().handle_zero_index();
+}
+
+void DirtyCardQueueSet::set_closure(CardTableEntryClosure* closure) {
+  _closure = closure;
+}
+
+void DirtyCardQueueSet::iterate_closure_all_threads(bool consume,
+                                                    size_t worker_i) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    bool b = t->dirty_card_queue().apply_closure(_closure, consume);
+    guarantee(b, "Should not be interrupted.");
+  }
+  bool b = shared_dirty_card_queue()->apply_closure(_closure,
+                                                    consume,
+                                                    worker_i);
+  guarantee(b, "Should not be interrupted.");
+}
+
+bool DirtyCardQueueSet::mut_process_buffer(void** buf) {
+
+  // Used to determine if we had already claimed a par_id
+  // before entering this method.
+  bool already_claimed = false;
+
+  // We grab the current JavaThread.
+  JavaThread* thread = JavaThread::current();
+
+  // We get the the number of any par_id that this thread
+  // might have already claimed.
+  int worker_i = thread->get_claimed_par_id();
+
+  // If worker_i is not -1 then the thread has already claimed
+  // a par_id. We make note of it using the already_claimed value
+  if (worker_i != -1) {
+    already_claimed = true;
+  } else {
+
+    // Otherwise we need to claim a par id
+    worker_i = _free_ids->claim_par_id();
+
+    // And store the par_id value in the thread
+    thread->set_claimed_par_id(worker_i);
+  }
+
+  bool b = false;
+  if (worker_i != -1) {
+    b = DirtyCardQueue::apply_closure_to_buffer(_closure, buf, 0,
+                                                _sz, true, worker_i);
+    if (b) Atomic::inc(&_processed_buffers_mut);
+
+    // If we had not claimed an id before entering the method
+    // then we must release the id.
+    if (!already_claimed) {
+
+      // we release the id
+      _free_ids->release_par_id(worker_i);
+
+      // and set the claimed_id in the thread to -1
+      thread->set_claimed_par_id(-1);
+    }
+  }
+  return b;
+}
+
+DirtyCardQueueSet::CompletedBufferNode*
+DirtyCardQueueSet::get_completed_buffer_lock(int stop_at) {
+  CompletedBufferNode* nd = NULL;
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+
+  if ((int)_n_completed_buffers <= stop_at) {
+    _process_completed = false;
+    return NULL;
+  }
+
+  if (_completed_buffers_head != NULL) {
+    nd = _completed_buffers_head;
+    _completed_buffers_head = nd->next;
+    if (_completed_buffers_head == NULL)
+      _completed_buffers_tail = NULL;
+    _n_completed_buffers--;
+  }
+  debug_only(assert_completed_buffer_list_len_correct_locked());
+  return nd;
+}
+
+// We only do this in contexts where there is no concurrent enqueueing.
+DirtyCardQueueSet::CompletedBufferNode*
+DirtyCardQueueSet::get_completed_buffer_CAS() {
+  CompletedBufferNode* nd = _completed_buffers_head;
+
+  while (nd != NULL) {
+    CompletedBufferNode* next = nd->next;
+    CompletedBufferNode* result =
+      (CompletedBufferNode*)Atomic::cmpxchg_ptr(next,
+                                                &_completed_buffers_head,
+                                                nd);
+    if (result == nd) {
+      return result;
+    } else {
+      nd = _completed_buffers_head;
+    }
+  }
+  assert(_completed_buffers_head == NULL, "Loop post");
+  _completed_buffers_tail = NULL;
+  return NULL;
+}
+
+bool DirtyCardQueueSet::
+apply_closure_to_completed_buffer_helper(int worker_i,
+                                         CompletedBufferNode* nd) {
+  if (nd != NULL) {
+    bool b =
+      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf,
+                                              nd->index, _sz,
+                                              true, worker_i);
+    void** buf = nd->buf;
+    delete nd;
+    if (b) {
+      deallocate_buffer(buf);
+      return true;  // In normal case, go on to next buffer.
+    } else {
+      enqueue_complete_buffer(buf, nd->index, true);
+      return false;
+    }
+  } else {
+    return false;
+  }
+}
+
+bool DirtyCardQueueSet::apply_closure_to_completed_buffer(int worker_i,
+                                                          int stop_at,
+                                                          bool with_CAS)
+{
+  CompletedBufferNode* nd = NULL;
+  if (with_CAS) {
+    guarantee(stop_at == 0, "Precondition");
+    nd = get_completed_buffer_CAS();
+  } else {
+    nd = get_completed_buffer_lock(stop_at);
+  }
+  bool res = apply_closure_to_completed_buffer_helper(worker_i, nd);
+  if (res) _processed_buffers_rs_thread++;
+  return res;
+}
+
+void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() {
+  CompletedBufferNode* nd = _completed_buffers_head;
+  while (nd != NULL) {
+    bool b =
+      DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz,
+                                              false);
+    guarantee(b, "Should not stop early.");
+    nd = nd->next;
+  }
+}
+
+void DirtyCardQueueSet::abandon_logs() {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  CompletedBufferNode* buffers_to_delete = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    while (_completed_buffers_head != NULL) {
+      CompletedBufferNode* nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      nd->next = buffers_to_delete;
+      buffers_to_delete = nd;
+    }
+    _n_completed_buffers = 0;
+    _completed_buffers_tail = NULL;
+    debug_only(assert_completed_buffer_list_len_correct_locked());
+  }
+  while (buffers_to_delete != NULL) {
+    CompletedBufferNode* nd = buffers_to_delete;
+    buffers_to_delete = nd->next;
+    deallocate_buffer(nd->buf);
+    delete nd;
+  }
+  // Since abandon is done only at safepoints, we can safely manipulate
+  // these queues.
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->dirty_card_queue().reset();
+  }
+  shared_dirty_card_queue()->reset();
+}
+
+
+void DirtyCardQueueSet::concatenate_logs() {
+  // Iterate over all the threads, if we find a partial log add it to
+  // the global list of logs.  Temporarily turn off the limit on the number
+  // of outstanding buffers.
+  int save_max_completed_queue = _max_completed_queue;
+  _max_completed_queue = max_jint;
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    DirtyCardQueue& dcq = t->dirty_card_queue();
+    if (dcq.size() != 0) {
+      void **buf = t->dirty_card_queue().get_buf();
+      // We must NULL out the unused entries, then enqueue.
+      for (size_t i = 0; i < t->dirty_card_queue().get_index(); i += oopSize) {
+        buf[PtrQueue::byte_index_to_index((int)i)] = NULL;
+      }
+      enqueue_complete_buffer(dcq.get_buf(), dcq.get_index());
+      dcq.reinitialize();
+    }
+  }
+  if (_shared_dirty_card_queue.size() != 0) {
+    enqueue_complete_buffer(_shared_dirty_card_queue.get_buf(),
+                            _shared_dirty_card_queue.get_index());
+    _shared_dirty_card_queue.reinitialize();
+  }
+  // Restore the completed buffer queue limit.
+  _max_completed_queue = save_max_completed_queue;
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp
@ -0,0 +1,152 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class FreeIdSet;
+
+// A closure class for processing card table entries.  Note that we don't
+// require these closure objects to be stack-allocated.
+class CardTableEntryClosure: public CHeapObj {
+public:
+  // Process the card whose card table entry is "card_ptr".  If returns
+  // "false", terminate the iteration early.
+  virtual bool do_card_ptr(jbyte* card_ptr, int worker_i = 0) = 0;
+};
+
+// A ptrQueue whose elements are "oops", pointers to object heads.
+class DirtyCardQueue: public PtrQueue {
+public:
+  DirtyCardQueue(PtrQueueSet* qset_, bool perm = false) :
+    PtrQueue(qset_, perm)
+  {
+    // Dirty card queues are always active.
+    _active = true;
+  }
+  // Apply the closure to all elements, and reset the index to make the
+  // buffer empty.  If a closure application returns "false", return
+  // "false" immediately, halting the iteration.  If "consume" is true,
+  // deletes processed entries from logs.
+  bool apply_closure(CardTableEntryClosure* cl,
+                     bool consume = true,
+                     size_t worker_i = 0);
+
+  // Apply the closure to all elements of "buf", down to "index"
+  // (inclusive.)  If returns "false", then a closure application returned
+  // "false", and we return immediately.  If "consume" is true, entries are
+  // set to NULL as they are processed, so they will not be processed again
+  // later.
+  static bool apply_closure_to_buffer(CardTableEntryClosure* cl,
+                                      void** buf, size_t index, size_t sz,
+                                      bool consume = true,
+                                      int worker_i = 0);
+  void **get_buf() { return _buf;}
+  void set_buf(void **buf) {_buf = buf;}
+  size_t get_index() { return _index;}
+  void reinitialize() { _buf = 0; _sz = 0; _index = 0;}
+};
+
+
+
+class DirtyCardQueueSet: public PtrQueueSet {
+  CardTableEntryClosure* _closure;
+
+  DirtyCardQueue _shared_dirty_card_queue;
+
+  // Override.
+  bool mut_process_buffer(void** buf);
+
+  // Protected by the _cbl_mon.
+  FreeIdSet* _free_ids;
+
+  // The number of completed buffers processed by mutator and rs thread,
+  // respectively.
+  jint _processed_buffers_mut;
+  jint _processed_buffers_rs_thread;
+
+public:
+  DirtyCardQueueSet();
+
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0,
+                  Mutex* lock = NULL);
+
+  // The number of parallel ids that can be claimed to allow collector or
+  // mutator threads to do card-processing work.
+  static size_t num_par_ids();
+
+  static void handle_zero_index_for_thread(JavaThread* t);
+
+  // Register "blk" as "the closure" for all queues.  Only one such closure
+  // is allowed.  The "apply_closure_to_completed_buffer" method will apply
+  // this closure to a completed buffer, and "iterate_closure_all_threads"
+  // applies it to partially-filled buffers (the latter should only be done
+  // with the world stopped).
+  void set_closure(CardTableEntryClosure* closure);
+
+  // If there is a registered closure for buffers, apply it to all entries
+  // in all currently-active buffers.  This should only be applied at a
+  // safepoint.  (Currently must not be called in parallel; this should
+  // change in the future.)  If "consume" is true, processed entries are
+  // discarded.
+  void iterate_closure_all_threads(bool consume = true,
+                                   size_t worker_i = 0);
+
+  // If there exists some completed buffer, pop it, then apply the
+  // registered closure to all its elements, nulling out those elements
+  // processed.  If all elements are processed, returns "true".  If no
+  // completed buffers exist, returns false.  If a completed buffer exists,
+  // but is only partially completed before a "yield" happens, the
+  // partially completed buffer (with its processed elements set to NULL)
+  // is returned to the completed buffer set, and this call returns false.
+  bool apply_closure_to_completed_buffer(int worker_i = 0,
+                                         int stop_at = 0,
+                                         bool with_CAS = false);
+  bool apply_closure_to_completed_buffer_helper(int worker_i,
+                                                CompletedBufferNode* nd);
+
+  CompletedBufferNode* get_completed_buffer_CAS();
+  CompletedBufferNode* get_completed_buffer_lock(int stop_at);
+  // Applies the current closure to all completed buffers,
+  // non-consumptively.
+  void apply_closure_to_all_completed_buffers();
+
+  DirtyCardQueue* shared_dirty_card_queue() {
+    return &_shared_dirty_card_queue;
+  }
+
+  // If a full collection is happening, reset partial logs, and ignore
+  // completed ones: the full collection will make them all irrelevant.
+  void abandon_logs();
+
+  // If any threads have partial logs, add them to the global list of logs.
+  void concatenate_logs();
+  void clear_n_completed_buffers() { _n_completed_buffers = 0;}
+
+  jint processed_buffers_mut() {
+    return _processed_buffers_mut;
+  }
+  jint processed_buffers_rs_thread() {
+    return _processed_buffers_rs_thread;
+  }
+
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp
@ -0,0 +1,628 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1BlockOffsetTable.cpp.incl"
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetSharedArray
+//////////////////////////////////////////////////////////////////////
+
+G1BlockOffsetSharedArray::G1BlockOffsetSharedArray(MemRegion reserved,
+                                                   size_t init_word_size) :
+  _reserved(reserved), _end(NULL)
+{
+  size_t size = compute_size(reserved.word_size());
+  ReservedSpace rs(ReservedSpace::allocation_align_size_up(size));
+  if (!rs.is_reserved()) {
+    vm_exit_during_initialization("Could not reserve enough space for heap offset array");
+  }
+  if (!_vs.initialize(rs, 0)) {
+    vm_exit_during_initialization("Could not reserve enough space for heap offset array");
+  }
+  _offset_array = (u_char*)_vs.low_boundary();
+  resize(init_word_size);
+  if (TraceBlockOffsetTable) {
+    gclog_or_tty->print_cr("G1BlockOffsetSharedArray::G1BlockOffsetSharedArray: ");
+    gclog_or_tty->print_cr("  "
+                  "  rs.base(): " INTPTR_FORMAT
+                  "  rs.size(): " INTPTR_FORMAT
+                  "  rs end(): " INTPTR_FORMAT,
+                  rs.base(), rs.size(), rs.base() + rs.size());
+    gclog_or_tty->print_cr("  "
+                  "  _vs.low_boundary(): " INTPTR_FORMAT
+                  "  _vs.high_boundary(): " INTPTR_FORMAT,
+                  _vs.low_boundary(),
+                  _vs.high_boundary());
+  }
+}
+
+void G1BlockOffsetSharedArray::resize(size_t new_word_size) {
+  assert(new_word_size <= _reserved.word_size(), "Resize larger than reserved");
+  size_t new_size = compute_size(new_word_size);
+  size_t old_size = _vs.committed_size();
+  size_t delta;
+  char* high = _vs.high();
+  _end = _reserved.start() + new_word_size;
+  if (new_size > old_size) {
+    delta = ReservedSpace::page_align_size_up(new_size - old_size);
+    assert(delta > 0, "just checking");
+    if (!_vs.expand_by(delta)) {
+      // Do better than this for Merlin
+      vm_exit_out_of_memory(delta, "offset table expansion");
+    }
+    assert(_vs.high() == high + delta, "invalid expansion");
+    // Initialization of the contents is left to the
+    // G1BlockOffsetArray that uses it.
+  } else {
+    delta = ReservedSpace::page_align_size_down(old_size - new_size);
+    if (delta == 0) return;
+    _vs.shrink_by(delta);
+    assert(_vs.high() == high - delta, "invalid expansion");
+  }
+}
+
+bool G1BlockOffsetSharedArray::is_card_boundary(HeapWord* p) const {
+  assert(p >= _reserved.start(), "just checking");
+  size_t delta = pointer_delta(p, _reserved.start());
+  return (delta & right_n_bits(LogN_words)) == (size_t)NoBits;
+}
+
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetArray
+//////////////////////////////////////////////////////////////////////
+
+G1BlockOffsetArray::G1BlockOffsetArray(G1BlockOffsetSharedArray* array,
+                                       MemRegion mr, bool init_to_zero) :
+  G1BlockOffsetTable(mr.start(), mr.end()),
+  _unallocated_block(_bottom),
+  _array(array), _csp(NULL),
+  _init_to_zero(init_to_zero) {
+  assert(_bottom <= _end, "arguments out of order");
+  if (!_init_to_zero) {
+    // initialize cards to point back to mr.start()
+    set_remainder_to_point_to_start(mr.start() + N_words, mr.end());
+    _array->set_offset_array(0, 0);  // set first card to 0
+  }
+}
+
+void G1BlockOffsetArray::set_space(Space* sp) {
+  _sp = sp;
+  _csp = sp->toContiguousSpace();
+}
+
+// The arguments follow the normal convention of denoting
+// a right-open interval: [start, end)
+void
+G1BlockOffsetArray:: set_remainder_to_point_to_start(HeapWord* start, HeapWord* end) {
+
+  if (start >= end) {
+    // The start address is equal to the end address (or to
+    // the right of the end address) so there are not cards
+    // that need to be updated..
+    return;
+  }
+
+  // Write the backskip value for each region.
+  //
+  //    offset
+  //    card             2nd                       3rd
+  //     | +- 1st        |                         |
+  //     v v             v                         v
+  //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+     +-+-+-+-+-+-+-+-+-+-+-
+  //    |x|0|0|0|0|0|0|0|1|1|1|1|1|1| ... |1|1|1|1|2|2|2|2|2|2| ...
+  //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+     +-+-+-+-+-+-+-+-+-+-+-
+  //    11              19                        75
+  //      12
+  //
+  //    offset card is the card that points to the start of an object
+  //      x - offset value of offset card
+  //    1st - start of first logarithmic region
+  //      0 corresponds to logarithmic value N_words + 0 and 2**(3 * 0) = 1
+  //    2nd - start of second logarithmic region
+  //      1 corresponds to logarithmic value N_words + 1 and 2**(3 * 1) = 8
+  //    3rd - start of third logarithmic region
+  //      2 corresponds to logarithmic value N_words + 2 and 2**(3 * 2) = 64
+  //
+  //    integer below the block offset entry is an example of
+  //    the index of the entry
+  //
+  //    Given an address,
+  //      Find the index for the address
+  //      Find the block offset table entry
+  //      Convert the entry to a back slide
+  //        (e.g., with today's, offset = 0x81 =>
+  //          back slip = 2**(3*(0x81 - N_words)) = 2**3) = 8
+  //      Move back N (e.g., 8) entries and repeat with the
+  //        value of the new entry
+  //
+  size_t start_card = _array->index_for(start);
+  size_t end_card = _array->index_for(end-1);
+  assert(start ==_array->address_for_index(start_card), "Precondition");
+  assert(end ==_array->address_for_index(end_card)+N_words, "Precondition");
+  set_remainder_to_point_to_start_incl(start_card, end_card); // closed interval
+}
+
+// Unlike the normal convention in this code, the argument here denotes
+// a closed, inclusive interval: [start_card, end_card], cf set_remainder_to_point_to_start()
+// above.
+void
+G1BlockOffsetArray::set_remainder_to_point_to_start_incl(size_t start_card, size_t end_card) {
+  if (start_card > end_card) {
+    return;
+  }
+  assert(start_card > _array->index_for(_bottom), "Cannot be first card");
+  assert(_array->offset_array(start_card-1) <= N_words,
+    "Offset card has an unexpected value");
+  size_t start_card_for_region = start_card;
+  u_char offset = max_jubyte;
+  for (int i = 0; i < BlockOffsetArray::N_powers; i++) {
+    // -1 so that the the card with the actual offset is counted.  Another -1
+    // so that the reach ends in this region and not at the start
+    // of the next.
+    size_t reach = start_card - 1 + (BlockOffsetArray::power_to_cards_back(i+1) - 1);
+    offset = N_words + i;
+    if (reach >= end_card) {
+      _array->set_offset_array(start_card_for_region, end_card, offset);
+      start_card_for_region = reach + 1;
+      break;
+    }
+    _array->set_offset_array(start_card_for_region, reach, offset);
+    start_card_for_region = reach + 1;
+  }
+  assert(start_card_for_region > end_card, "Sanity check");
+  DEBUG_ONLY(check_all_cards(start_card, end_card);)
+}
+
+// The block [blk_start, blk_end) has been allocated;
+// adjust the block offset table to represent this information;
+// right-open interval: [blk_start, blk_end)
+void
+G1BlockOffsetArray::alloc_block(HeapWord* blk_start, HeapWord* blk_end) {
+  mark_block(blk_start, blk_end);
+  allocated(blk_start, blk_end);
+}
+
+// Adjust BOT to show that a previously whole block has been split
+// into two.
+void G1BlockOffsetArray::split_block(HeapWord* blk, size_t blk_size,
+                                     size_t left_blk_size) {
+  // Verify that the BOT shows [blk, blk + blk_size) to be one block.
+  verify_single_block(blk, blk_size);
+  // Update the BOT to indicate that [blk + left_blk_size, blk + blk_size)
+  // is one single block.
+  mark_block(blk + left_blk_size, blk + blk_size);
+}
+
+
+// Action_mark - update the BOT for the block [blk_start, blk_end).
+//               Current typical use is for splitting a block.
+// Action_single - udpate the BOT for an allocation.
+// Action_verify - BOT verification.
+void G1BlockOffsetArray::do_block_internal(HeapWord* blk_start,
+                                           HeapWord* blk_end,
+                                           Action action) {
+  assert(Universe::heap()->is_in_reserved(blk_start),
+         "reference must be into the heap");
+  assert(Universe::heap()->is_in_reserved(blk_end-1),
+         "limit must be within the heap");
+  // This is optimized to make the test fast, assuming we only rarely
+  // cross boundaries.
+  uintptr_t end_ui = (uintptr_t)(blk_end - 1);
+  uintptr_t start_ui = (uintptr_t)blk_start;
+  // Calculate the last card boundary preceding end of blk
+  intptr_t boundary_before_end = (intptr_t)end_ui;
+  clear_bits(boundary_before_end, right_n_bits(LogN));
+  if (start_ui <= (uintptr_t)boundary_before_end) {
+    // blk starts at or crosses a boundary
+    // Calculate index of card on which blk begins
+    size_t    start_index = _array->index_for(blk_start);
+    // Index of card on which blk ends
+    size_t    end_index   = _array->index_for(blk_end - 1);
+    // Start address of card on which blk begins
+    HeapWord* boundary    = _array->address_for_index(start_index);
+    assert(boundary <= blk_start, "blk should start at or after boundary");
+    if (blk_start != boundary) {
+      // blk starts strictly after boundary
+      // adjust card boundary and start_index forward to next card
+      boundary += N_words;
+      start_index++;
+    }
+    assert(start_index <= end_index, "monotonicity of index_for()");
+    assert(boundary <= (HeapWord*)boundary_before_end, "tautology");
+    switch (action) {
+      case Action_mark: {
+        if (init_to_zero()) {
+          _array->set_offset_array(start_index, boundary, blk_start);
+          break;
+        } // Else fall through to the next case
+      }
+      case Action_single: {
+        _array->set_offset_array(start_index, boundary, blk_start);
+        // We have finished marking the "offset card". We need to now
+        // mark the subsequent cards that this blk spans.
+        if (start_index < end_index) {
+          HeapWord* rem_st = _array->address_for_index(start_index) + N_words;
+          HeapWord* rem_end = _array->address_for_index(end_index) + N_words;
+          set_remainder_to_point_to_start(rem_st, rem_end);
+        }
+        break;
+      }
+      case Action_check: {
+        _array->check_offset_array(start_index, boundary, blk_start);
+        // We have finished checking the "offset card". We need to now
+        // check the subsequent cards that this blk spans.
+        check_all_cards(start_index + 1, end_index);
+        break;
+      }
+      default:
+        ShouldNotReachHere();
+    }
+  }
+}
+
+// The card-interval [start_card, end_card] is a closed interval; this
+// is an expensive check -- use with care and only under protection of
+// suitable flag.
+void G1BlockOffsetArray::check_all_cards(size_t start_card, size_t end_card) const {
+
+  if (end_card < start_card) {
+    return;
+  }
+  guarantee(_array->offset_array(start_card) == N_words, "Wrong value in second card");
+  for (size_t c = start_card + 1; c <= end_card; c++ /* yeah! */) {
+    u_char entry = _array->offset_array(c);
+    if (c - start_card > BlockOffsetArray::power_to_cards_back(1)) {
+      guarantee(entry > N_words, "Should be in logarithmic region");
+    }
+    size_t backskip = BlockOffsetArray::entry_to_cards_back(entry);
+    size_t landing_card = c - backskip;
+    guarantee(landing_card >= (start_card - 1), "Inv");
+    if (landing_card >= start_card) {
+      guarantee(_array->offset_array(landing_card) <= entry, "monotonicity");
+    } else {
+      guarantee(landing_card == start_card - 1, "Tautology");
+      guarantee(_array->offset_array(landing_card) <= N_words, "Offset value");
+    }
+  }
+}
+
+// The range [blk_start, blk_end) represents a single contiguous block
+// of storage; modify the block offset table to represent this
+// information; Right-open interval: [blk_start, blk_end)
+// NOTE: this method does _not_ adjust _unallocated_block.
+void
+G1BlockOffsetArray::single_block(HeapWord* blk_start, HeapWord* blk_end) {
+  do_block_internal(blk_start, blk_end, Action_single);
+}
+
+// Mark the BOT such that if [blk_start, blk_end) straddles a card
+// boundary, the card following the first such boundary is marked
+// with the appropriate offset.
+// NOTE: this method does _not_ adjust _unallocated_block or
+// any cards subsequent to the first one.
+void
+G1BlockOffsetArray::mark_block(HeapWord* blk_start, HeapWord* blk_end) {
+  do_block_internal(blk_start, blk_end, Action_mark);
+}
+
+void G1BlockOffsetArray::join_blocks(HeapWord* blk1, HeapWord* blk2) {
+  HeapWord* blk1_start = Universe::heap()->block_start(blk1);
+  HeapWord* blk2_start = Universe::heap()->block_start(blk2);
+  assert(blk1 == blk1_start && blk2 == blk2_start,
+         "Must be block starts.");
+  assert(blk1 + _sp->block_size(blk1) == blk2, "Must be contiguous.");
+  size_t blk1_start_index = _array->index_for(blk1);
+  size_t blk2_start_index = _array->index_for(blk2);
+  assert(blk1_start_index <= blk2_start_index, "sanity");
+  HeapWord* blk2_card_start = _array->address_for_index(blk2_start_index);
+  if (blk2 == blk2_card_start) {
+    // blk2 starts a card.  Does blk1 start on the prevous card, or futher
+    // back?
+    assert(blk1_start_index < blk2_start_index, "must be lower card.");
+    if (blk1_start_index + 1 == blk2_start_index) {
+      // previous card; new value for blk2 card is size of blk1.
+      _array->set_offset_array(blk2_start_index, (u_char) _sp->block_size(blk1));
+    } else {
+      // Earlier card; go back a card.
+      _array->set_offset_array(blk2_start_index, N_words);
+    }
+  } else {
+    // blk2 does not start a card.  Does it cross a card?  If not, nothing
+    // to do.
+    size_t blk2_end_index =
+      _array->index_for(blk2 + _sp->block_size(blk2) - 1);
+    assert(blk2_end_index >= blk2_start_index, "sanity");
+    if (blk2_end_index > blk2_start_index) {
+      // Yes, it crosses a card.  The value for the next card must change.
+      if (blk1_start_index + 1 == blk2_start_index) {
+        // previous card; new value for second blk2 card is size of blk1.
+        _array->set_offset_array(blk2_start_index + 1,
+                                 (u_char) _sp->block_size(blk1));
+      } else {
+        // Earlier card; go back a card.
+        _array->set_offset_array(blk2_start_index + 1, N_words);
+      }
+    }
+  }
+}
+
+HeapWord* G1BlockOffsetArray::block_start_unsafe(const void* addr) {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+  // Otherwise, find the block start using the table.
+  HeapWord* q = block_at_or_preceding(addr, false, 0);
+  return forward_to_block_containing_addr(q, addr);
+}
+
+// This duplicates a little code from the above: unavoidable.
+HeapWord*
+G1BlockOffsetArray::block_start_unsafe_const(const void* addr) const {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+  // Otherwise, find the block start using the table.
+  HeapWord* q = block_at_or_preceding(addr, false, 0);
+  HeapWord* n = q + _sp->block_size(q);
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+
+HeapWord*
+G1BlockOffsetArray::forward_to_block_containing_addr_slow(HeapWord* q,
+                                                          HeapWord* n,
+                                                          const void* addr) {
+  // We're not in the normal case.  We need to handle an important subcase
+  // here: LAB allocation.  An allocation previously recorded in the
+  // offset table was actually a lab allocation, and was divided into
+  // several objects subsequently.  Fix this situation as we answer the
+  // query, by updating entries as we cross them.
+
+  // If the fist object's end q is at the card boundary. Start refining
+  // with the corresponding card (the value of the entry will be basically
+  // set to 0). If the object crosses the boundary -- start from the next card.
+  size_t next_index = _array->index_for(n) + !_array->is_card_boundary(n);
+  HeapWord* next_boundary = _array->address_for_index(next_index);
+  if (csp() != NULL) {
+    if (addr >= csp()->top()) return csp()->top();
+    while (next_boundary < addr) {
+      while (n <= next_boundary) {
+        q = n;
+        oop obj = oop(q);
+        if (obj->klass() == NULL) return q;
+        n += obj->size();
+      }
+      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+      // [q, n) is the block that crosses the boundary.
+      alloc_block_work2(&next_boundary, &next_index, q, n);
+    }
+  } else {
+    while (next_boundary < addr) {
+      while (n <= next_boundary) {
+        q = n;
+        oop obj = oop(q);
+        if (obj->klass() == NULL) return q;
+        n += _sp->block_size(q);
+      }
+      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+      // [q, n) is the block that crosses the boundary.
+      alloc_block_work2(&next_boundary, &next_index, q, n);
+    }
+  }
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+HeapWord* G1BlockOffsetArray::block_start_careful(const void* addr) const {
+  assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
+
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  // Must read this exactly once because it can be modified by parallel
+  // allocation.
+  HeapWord* ub = _unallocated_block;
+  if (BlockOffsetArrayUseUnallocatedBlock && addr >= ub) {
+    assert(ub < _end, "tautology (see above)");
+    return ub;
+  }
+
+  // Otherwise, find the block start using the table, but taking
+  // care (cf block_start_unsafe() above) not to parse any objects/blocks
+  // on the cards themsleves.
+  size_t index = _array->index_for(addr);
+  assert(_array->address_for_index(index) == addr,
+         "arg should be start of card");
+
+  HeapWord* q = (HeapWord*)addr;
+  uint offset;
+  do {
+    offset = _array->offset_array(index--);
+    q -= offset;
+  } while (offset == N_words);
+  assert(q <= addr, "block start should be to left of arg");
+  return q;
+}
+
+// Note that the committed size of the covered space may have changed,
+// so the table size might also wish to change.
+void G1BlockOffsetArray::resize(size_t new_word_size) {
+  HeapWord* new_end = _bottom + new_word_size;
+  if (_end < new_end && !init_to_zero()) {
+    // verify that the old and new boundaries are also card boundaries
+    assert(_array->is_card_boundary(_end),
+           "_end not a card boundary");
+    assert(_array->is_card_boundary(new_end),
+           "new _end would not be a card boundary");
+    // set all the newly added cards
+    _array->set_offset_array(_end, new_end, N_words);
+  }
+  _end = new_end;  // update _end
+}
+
+void G1BlockOffsetArray::set_region(MemRegion mr) {
+  _bottom = mr.start();
+  _end = mr.end();
+}
+
+//
+//              threshold_
+//              |   _index_
+//              v   v
+//      +-------+-------+-------+-------+-------+
+//      | i-1   |   i   | i+1   | i+2   | i+3   |
+//      +-------+-------+-------+-------+-------+
+//       ( ^    ]
+//         block-start
+//
+void G1BlockOffsetArray::alloc_block_work2(HeapWord** threshold_, size_t* index_,
+                                           HeapWord* blk_start, HeapWord* blk_end) {
+  // For efficiency, do copy-in/copy-out.
+  HeapWord* threshold = *threshold_;
+  size_t    index = *index_;
+
+  assert(blk_start != NULL && blk_end > blk_start,
+         "phantom block");
+  assert(blk_end > threshold, "should be past threshold");
+  assert(blk_start <= threshold, "blk_start should be at or before threshold")
+  assert(pointer_delta(threshold, blk_start) <= N_words,
+         "offset should be <= BlockOffsetSharedArray::N");
+  assert(Universe::heap()->is_in_reserved(blk_start),
+         "reference must be into the heap");
+  assert(Universe::heap()->is_in_reserved(blk_end-1),
+         "limit must be within the heap");
+  assert(threshold == _array->_reserved.start() + index*N_words,
+         "index must agree with threshold");
+
+  DEBUG_ONLY(size_t orig_index = index;)
+
+  // Mark the card that holds the offset into the block.  Note
+  // that _next_offset_index and _next_offset_threshold are not
+  // updated until the end of this method.
+  _array->set_offset_array(index, threshold, blk_start);
+
+  // We need to now mark the subsequent cards that this blk spans.
+
+  // Index of card on which blk ends.
+  size_t end_index   = _array->index_for(blk_end - 1);
+
+  // Are there more cards left to be updated?
+  if (index + 1 <= end_index) {
+    HeapWord* rem_st  = _array->address_for_index(index + 1);
+    // Calculate rem_end this way because end_index
+    // may be the last valid index in the covered region.
+    HeapWord* rem_end = _array->address_for_index(end_index) +  N_words;
+    set_remainder_to_point_to_start(rem_st, rem_end);
+  }
+
+  index = end_index + 1;
+  // Calculate threshold_ this way because end_index
+  // may be the last valid index in the covered region.
+  threshold = _array->address_for_index(end_index) + N_words;
+  assert(threshold >= blk_end, "Incorrect offset threshold");
+
+  // index_ and threshold_ updated here.
+  *threshold_ = threshold;
+  *index_ = index;
+
+#ifdef ASSERT
+  // The offset can be 0 if the block starts on a boundary.  That
+  // is checked by an assertion above.
+  size_t start_index = _array->index_for(blk_start);
+  HeapWord* boundary    = _array->address_for_index(start_index);
+  assert((_array->offset_array(orig_index) == 0 &&
+          blk_start == boundary) ||
+          (_array->offset_array(orig_index) > 0 &&
+         _array->offset_array(orig_index) <= N_words),
+         "offset array should have been set");
+  for (size_t j = orig_index + 1; j <= end_index; j++) {
+    assert(_array->offset_array(j) > 0 &&
+           _array->offset_array(j) <=
+             (u_char) (N_words+BlockOffsetArray::N_powers-1),
+           "offset array should have been set");
+  }
+#endif
+}
+
+//////////////////////////////////////////////////////////////////////
+// G1BlockOffsetArrayContigSpace
+//////////////////////////////////////////////////////////////////////
+
+HeapWord*
+G1BlockOffsetArrayContigSpace::block_start_unsafe(const void* addr) {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
+  return forward_to_block_containing_addr(q, addr);
+}
+
+HeapWord*
+G1BlockOffsetArrayContigSpace::
+block_start_unsafe_const(const void* addr) const {
+  assert(_bottom <= addr && addr < _end,
+         "addr must be covered by this Array");
+  HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
+  HeapWord* n = q + _sp->block_size(q);
+  return forward_to_block_containing_addr_const(q, n, addr);
+}
+
+G1BlockOffsetArrayContigSpace::
+G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array,
+                              MemRegion mr) :
+  G1BlockOffsetArray(array, mr, true)
+{
+  _next_offset_threshold = NULL;
+  _next_offset_index = 0;
+}
+
+HeapWord* G1BlockOffsetArrayContigSpace::initialize_threshold() {
+  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+         "just checking");
+  _next_offset_index = _array->index_for(_bottom);
+  _next_offset_index++;
+  _next_offset_threshold =
+    _array->address_for_index(_next_offset_index);
+  return _next_offset_threshold;
+}
+
+void G1BlockOffsetArrayContigSpace::zero_bottom_entry() {
+  assert(!Universe::heap()->is_in_reserved(_array->_offset_array),
+         "just checking");
+  size_t bottom_index = _array->index_for(_bottom);
+  assert(_array->address_for_index(bottom_index) == _bottom,
+         "Precondition of call");
+  _array->set_offset_array(bottom_index, 0);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp
@ -0,0 +1,487 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The CollectedHeap type requires subtypes to implement a method
+// "block_start".  For some subtypes, notably generational
+// systems using card-table-based write barriers, the efficiency of this
+// operation may be important.  Implementations of the "BlockOffsetArray"
+// class may be useful in providing such efficient implementations.
+//
+// While generally mirroring the structure of the BOT for GenCollectedHeap,
+// the following types are tailored more towards G1's uses; these should,
+// however, be merged back into a common BOT to avoid code duplication
+// and reduce maintenance overhead.
+//
+//    G1BlockOffsetTable (abstract)
+//    -- G1BlockOffsetArray                (uses G1BlockOffsetSharedArray)
+//       -- G1BlockOffsetArrayContigSpace
+//
+// A main impediment to the consolidation of this code might be the
+// effect of making some of the block_start*() calls non-const as
+// below. Whether that might adversely affect performance optimizations
+// that compilers might normally perform in the case of non-G1
+// collectors needs to be carefully investigated prior to any such
+// consolidation.
+
+// Forward declarations
+class ContiguousSpace;
+class G1BlockOffsetSharedArray;
+
+class G1BlockOffsetTable VALUE_OBJ_CLASS_SPEC {
+  friend class VMStructs;
+protected:
+  // These members describe the region covered by the table.
+
+  // The space this table is covering.
+  HeapWord* _bottom;    // == reserved.start
+  HeapWord* _end;       // End of currently allocated region.
+
+public:
+  // Initialize the table to cover the given space.
+  // The contents of the initial table are undefined.
+  G1BlockOffsetTable(HeapWord* bottom, HeapWord* end) :
+    _bottom(bottom), _end(end)
+    {
+      assert(_bottom <= _end, "arguments out of order");
+    }
+
+  // Note that the committed size of the covered space may have changed,
+  // so the table size might also wish to change.
+  virtual void resize(size_t new_word_size) = 0;
+
+  virtual void set_bottom(HeapWord* new_bottom) {
+    assert(new_bottom <= _end, "new_bottom > _end");
+    _bottom = new_bottom;
+    resize(pointer_delta(_end, _bottom));
+  }
+
+  // Requires "addr" to be contained by a block, and returns the address of
+  // the start of that block.  (May have side effects, namely updating of
+  // shared array entries that "point" too far backwards.  This can occur,
+  // for example, when LAB allocation is used in a space covered by the
+  // table.)
+  virtual HeapWord* block_start_unsafe(const void* addr) = 0;
+  // Same as above, but does not have any of the possible side effects
+  // discussed above.
+  virtual HeapWord* block_start_unsafe_const(const void* addr) const = 0;
+
+  // Returns the address of the start of the block containing "addr", or
+  // else "null" if it is covered by no block.  (May have side effects,
+  // namely updating of shared array entries that "point" too far
+  // backwards.  This can occur, for example, when lab allocation is used
+  // in a space covered by the table.)
+  inline HeapWord* block_start(const void* addr);
+  // Same as above, but does not have any of the possible side effects
+  // discussed above.
+  inline HeapWord* block_start_const(const void* addr) const;
+};
+
+// This implementation of "G1BlockOffsetTable" divides the covered region
+// into "N"-word subregions (where "N" = 2^"LogN".  An array with an entry
+// for each such subregion indicates how far back one must go to find the
+// start of the chunk that includes the first word of the subregion.
+//
+// Each BlockOffsetArray is owned by a Space.  However, the actual array
+// may be shared by several BlockOffsetArrays; this is useful
+// when a single resizable area (such as a generation) is divided up into
+// several spaces in which contiguous allocation takes place,
+// such as, for example, in G1 or in the train generation.)
+
+// Here is the shared array type.
+
+class G1BlockOffsetSharedArray: public CHeapObj {
+  friend class G1BlockOffsetArray;
+  friend class G1BlockOffsetArrayContigSpace;
+  friend class VMStructs;
+
+private:
+  // The reserved region covered by the shared array.
+  MemRegion _reserved;
+
+  // End of the current committed region.
+  HeapWord* _end;
+
+  // Array for keeping offsets for retrieving object start fast given an
+  // address.
+  VirtualSpace _vs;
+  u_char* _offset_array;          // byte array keeping backwards offsets
+
+  // Bounds checking accessors:
+  // For performance these have to devolve to array accesses in product builds.
+  u_char offset_array(size_t index) const {
+    assert(index < _vs.committed_size(), "index out of range");
+    return _offset_array[index];
+  }
+
+  void set_offset_array(size_t index, u_char offset) {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(offset <= N_words, "offset too large");
+    _offset_array[index] = offset;
+  }
+
+  void set_offset_array(size_t index, HeapWord* high, HeapWord* low) {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(high >= low, "addresses out of order");
+    assert(pointer_delta(high, low) <= N_words, "offset too large");
+    _offset_array[index] = (u_char) pointer_delta(high, low);
+  }
+
+  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
+    assert(index_for(right - 1) < _vs.committed_size(),
+           "right address out of range");
+    assert(left  < right, "Heap addresses out of order");
+    size_t num_cards = pointer_delta(right, left) >> LogN_words;
+    memset(&_offset_array[index_for(left)], offset, num_cards);
+  }
+
+  void set_offset_array(size_t left, size_t right, u_char offset) {
+    assert(right < _vs.committed_size(), "right address out of range");
+    assert(left  <= right, "indexes out of order");
+    size_t num_cards = right - left + 1;
+    memset(&_offset_array[left], offset, num_cards);
+  }
+
+  void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
+    assert(index < _vs.committed_size(), "index out of range");
+    assert(high >= low, "addresses out of order");
+    assert(pointer_delta(high, low) <= N_words, "offset too large");
+    assert(_offset_array[index] == pointer_delta(high, low),
+           "Wrong offset");
+  }
+
+  bool is_card_boundary(HeapWord* p) const;
+
+  // Return the number of slots needed for an offset array
+  // that covers mem_region_words words.
+  // We always add an extra slot because if an object
+  // ends on a card boundary we put a 0 in the next
+  // offset array slot, so we want that slot always
+  // to be reserved.
+
+  size_t compute_size(size_t mem_region_words) {
+    size_t number_of_slots = (mem_region_words / N_words) + 1;
+    return ReservedSpace::page_align_size_up(number_of_slots);
+  }
+
+public:
+  enum SomePublicConstants {
+    LogN = 9,
+    LogN_words = LogN - LogHeapWordSize,
+    N_bytes = 1 << LogN,
+    N_words = 1 << LogN_words
+  };
+
+  // Initialize the table to cover from "base" to (at least)
+  // "base + init_word_size".  In the future, the table may be expanded
+  // (see "resize" below) up to the size of "_reserved" (which must be at
+  // least "init_word_size".) The contents of the initial table are
+  // undefined; it is the responsibility of the constituent
+  // G1BlockOffsetTable(s) to initialize cards.
+  G1BlockOffsetSharedArray(MemRegion reserved, size_t init_word_size);
+
+  // Notes a change in the committed size of the region covered by the
+  // table.  The "new_word_size" may not be larger than the size of the
+  // reserved region this table covers.
+  void resize(size_t new_word_size);
+
+  void set_bottom(HeapWord* new_bottom);
+
+  // Updates all the BlockOffsetArray's sharing this shared array to
+  // reflect the current "top"'s of their spaces.
+  void update_offset_arrays();
+
+  // Return the appropriate index into "_offset_array" for "p".
+  inline size_t index_for(const void* p) const;
+
+  // Return the address indicating the start of the region corresponding to
+  // "index" in "_offset_array".
+  inline HeapWord* address_for_index(size_t index) const;
+};
+
+// And here is the G1BlockOffsetTable subtype that uses the array.
+
+class G1BlockOffsetArray: public G1BlockOffsetTable {
+  friend class G1BlockOffsetSharedArray;
+  friend class G1BlockOffsetArrayContigSpace;
+  friend class VMStructs;
+private:
+  enum SomePrivateConstants {
+    N_words = G1BlockOffsetSharedArray::N_words,
+    LogN    = G1BlockOffsetSharedArray::LogN
+  };
+
+  // The following enums are used by do_block_helper
+  enum Action {
+    Action_single,      // BOT records a single block (see single_block())
+    Action_mark,        // BOT marks the start of a block (see mark_block())
+    Action_check        // Check that BOT records block correctly
+                        // (see verify_single_block()).
+  };
+
+  // This is the array, which can be shared by several BlockOffsetArray's
+  // servicing different
+  G1BlockOffsetSharedArray* _array;
+
+  // The space that owns this subregion.
+  Space* _sp;
+
+  // If "_sp" is a contiguous space, the field below is the view of "_sp"
+  // as a contiguous space, else NULL.
+  ContiguousSpace* _csp;
+
+  // If true, array entries are initialized to 0; otherwise, they are
+  // initialized to point backwards to the beginning of the covered region.
+  bool _init_to_zero;
+
+  // The portion [_unallocated_block, _sp.end()) of the space that
+  // is a single block known not to contain any objects.
+  // NOTE: See BlockOffsetArrayUseUnallocatedBlock flag.
+  HeapWord* _unallocated_block;
+
+  // Sets the entries
+  // corresponding to the cards starting at "start" and ending at "end"
+  // to point back to the card before "start": the interval [start, end)
+  // is right-open.
+  void set_remainder_to_point_to_start(HeapWord* start, HeapWord* end);
+  // Same as above, except that the args here are a card _index_ interval
+  // that is closed: [start_index, end_index]
+  void set_remainder_to_point_to_start_incl(size_t start, size_t end);
+
+  // A helper function for BOT adjustment/verification work
+  void do_block_internal(HeapWord* blk_start, HeapWord* blk_end, Action action);
+
+protected:
+
+  ContiguousSpace* csp() const { return _csp; }
+
+  // Returns the address of a block whose start is at most "addr".
+  // If "has_max_index" is true, "assumes "max_index" is the last valid one
+  // in the array.
+  inline HeapWord* block_at_or_preceding(const void* addr,
+                                         bool has_max_index,
+                                         size_t max_index) const;
+
+  // "q" is a block boundary that is <= "addr"; "n" is the address of the
+  // next block (or the end of the space.)  Return the address of the
+  // beginning of the block that contains "addr".  Does so without side
+  // effects (see, e.g., spec of  block_start.)
+  inline HeapWord*
+  forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
+                                         const void* addr) const;
+
+  // "q" is a block boundary that is <= "addr"; return the address of the
+  // beginning of the block that contains "addr".  May have side effects
+  // on "this", by updating imprecise entries.
+  inline HeapWord* forward_to_block_containing_addr(HeapWord* q,
+                                                    const void* addr);
+
+  // "q" is a block boundary that is <= "addr"; "n" is the address of the
+  // next block (or the end of the space.)  Return the address of the
+  // beginning of the block that contains "addr".  May have side effects
+  // on "this", by updating imprecise entries.
+  HeapWord* forward_to_block_containing_addr_slow(HeapWord* q,
+                                                  HeapWord* n,
+                                                  const void* addr);
+
+  // Requires that "*threshold_" be the first array entry boundary at or
+  // above "blk_start", and that "*index_" be the corresponding array
+  // index.  If the block starts at or crosses "*threshold_", records
+  // "blk_start" as the appropriate block start for the array index
+  // starting at "*threshold_", and for any other indices crossed by the
+  // block.  Updates "*threshold_" and "*index_" to correspond to the first
+  // index after the block end.
+  void alloc_block_work2(HeapWord** threshold_, size_t* index_,
+                         HeapWord* blk_start, HeapWord* blk_end);
+
+public:
+  // The space may not have it's bottom and top set yet, which is why the
+  // region is passed as a parameter.  If "init_to_zero" is true, the
+  // elements of the array are initialized to zero.  Otherwise, they are
+  // initialized to point backwards to the beginning.
+  G1BlockOffsetArray(G1BlockOffsetSharedArray* array, MemRegion mr,
+                     bool init_to_zero);
+
+  // Note: this ought to be part of the constructor, but that would require
+  // "this" to be passed as a parameter to a member constructor for
+  // the containing concrete subtype of Space.
+  // This would be legal C++, but MS VC++ doesn't allow it.
+  void set_space(Space* sp);
+
+  // Resets the covered region to the given "mr".
+  void set_region(MemRegion mr);
+
+  // Resets the covered region to one with the same _bottom as before but
+  // the "new_word_size".
+  void resize(size_t new_word_size);
+
+  // These must be guaranteed to work properly (i.e., do nothing)
+  // when "blk_start" ("blk" for second version) is "NULL".
+  virtual void alloc_block(HeapWord* blk_start, HeapWord* blk_end);
+  virtual void alloc_block(HeapWord* blk, size_t size) {
+    alloc_block(blk, blk + size);
+  }
+
+  // The following methods are useful and optimized for a
+  // general, non-contiguous space.
+
+  // The given arguments are required to be the starts of adjacent ("blk1"
+  // before "blk2") well-formed blocks covered by "this".  After this call,
+  // they should be considered to form one block.
+  virtual void join_blocks(HeapWord* blk1, HeapWord* blk2);
+
+  // Given a block [blk_start, blk_start + full_blk_size), and
+  // a left_blk_size < full_blk_size, adjust the BOT to show two
+  // blocks [blk_start, blk_start + left_blk_size) and
+  // [blk_start + left_blk_size, blk_start + full_blk_size).
+  // It is assumed (and verified in the non-product VM) that the
+  // BOT was correct for the original block.
+  void split_block(HeapWord* blk_start, size_t full_blk_size,
+                           size_t left_blk_size);
+
+  // Adjust the BOT to show that it has a single block in the
+  // range [blk_start, blk_start + size). All necessary BOT
+  // cards are adjusted, but _unallocated_block isn't.
+  void single_block(HeapWord* blk_start, HeapWord* blk_end);
+  void single_block(HeapWord* blk, size_t size) {
+    single_block(blk, blk + size);
+  }
+
+  // Adjust BOT to show that it has a block in the range
+  // [blk_start, blk_start + size). Only the first card
+  // of BOT is touched. It is assumed (and verified in the
+  // non-product VM) that the remaining cards of the block
+  // are correct.
+  void mark_block(HeapWord* blk_start, HeapWord* blk_end);
+  void mark_block(HeapWord* blk, size_t size) {
+    mark_block(blk, blk + size);
+  }
+
+  // Adjust _unallocated_block to indicate that a particular
+  // block has been newly allocated or freed. It is assumed (and
+  // verified in the non-product VM) that the BOT is correct for
+  // the given block.
+  inline void allocated(HeapWord* blk_start, HeapWord* blk_end) {
+    // Verify that the BOT shows [blk, blk + blk_size) to be one block.
+    verify_single_block(blk_start, blk_end);
+    if (BlockOffsetArrayUseUnallocatedBlock) {
+      _unallocated_block = MAX2(_unallocated_block, blk_end);
+    }
+  }
+
+  inline void allocated(HeapWord* blk, size_t size) {
+    allocated(blk, blk + size);
+  }
+
+  inline void freed(HeapWord* blk_start, HeapWord* blk_end);
+
+  inline void freed(HeapWord* blk, size_t size);
+
+  virtual HeapWord* block_start_unsafe(const void* addr);
+  virtual HeapWord* block_start_unsafe_const(const void* addr) const;
+
+  // Requires "addr" to be the start of a card and returns the
+  // start of the block that contains the given address.
+  HeapWord* block_start_careful(const void* addr) const;
+
+  // If true, initialize array slots with no allocated blocks to zero.
+  // Otherwise, make them point back to the front.
+  bool init_to_zero() { return _init_to_zero; }
+
+  // Verification & debugging - ensure that the offset table reflects the fact
+  // that the block [blk_start, blk_end) or [blk, blk + size) is a
+  // single block of storage. NOTE: can;t const this because of
+  // call to non-const do_block_internal() below.
+  inline void verify_single_block(HeapWord* blk_start, HeapWord* blk_end) {
+    if (VerifyBlockOffsetArray) {
+      do_block_internal(blk_start, blk_end, Action_check);
+    }
+  }
+
+  inline void verify_single_block(HeapWord* blk, size_t size) {
+    verify_single_block(blk, blk + size);
+  }
+
+  // Verify that the given block is before _unallocated_block
+  inline void verify_not_unallocated(HeapWord* blk_start,
+                                     HeapWord* blk_end) const {
+    if (BlockOffsetArrayUseUnallocatedBlock) {
+      assert(blk_start < blk_end, "Block inconsistency?");
+      assert(blk_end <= _unallocated_block, "_unallocated_block problem");
+    }
+  }
+
+  inline void verify_not_unallocated(HeapWord* blk, size_t size) const {
+    verify_not_unallocated(blk, blk + size);
+  }
+
+  void check_all_cards(size_t left_card, size_t right_card) const;
+};
+
+// A subtype of BlockOffsetArray that takes advantage of the fact
+// that its underlying space is a ContiguousSpace, so that its "active"
+// region can be more efficiently tracked (than for a non-contiguous space).
+class G1BlockOffsetArrayContigSpace: public G1BlockOffsetArray {
+  friend class VMStructs;
+
+  // allocation boundary at which offset array must be updated
+  HeapWord* _next_offset_threshold;
+  size_t    _next_offset_index;      // index corresponding to that boundary
+
+  // Work function to be called when allocation start crosses the next
+  // threshold in the contig space.
+  void alloc_block_work1(HeapWord* blk_start, HeapWord* blk_end) {
+    alloc_block_work2(&_next_offset_threshold, &_next_offset_index,
+                      blk_start, blk_end);
+  }
+
+
+ public:
+  G1BlockOffsetArrayContigSpace(G1BlockOffsetSharedArray* array, MemRegion mr);
+
+  // Initialize the threshold to reflect the first boundary after the
+  // bottom of the covered region.
+  HeapWord* initialize_threshold();
+
+  // Zero out the entry for _bottom (offset will be zero).
+  void      zero_bottom_entry();
+
+  // Return the next threshold, the point at which the table should be
+  // updated.
+  HeapWord* threshold() const { return _next_offset_threshold; }
+
+  // These must be guaranteed to work properly (i.e., do nothing)
+  // when "blk_start" ("blk" for second version) is "NULL".  In this
+  // implementation, that's true because NULL is represented as 0, and thus
+  // never exceeds the "_next_offset_threshold".
+  void alloc_block(HeapWord* blk_start, HeapWord* blk_end) {
+    if (blk_end > _next_offset_threshold)
+      alloc_block_work1(blk_start, blk_end);
+  }
+  void alloc_block(HeapWord* blk, size_t size) {
+     alloc_block(blk, blk+size);
+  }
+
+  HeapWord* block_start_unsafe(const void* addr);
+  HeapWord* block_start_unsafe_const(const void* addr) const;
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp
@ -0,0 +1,153 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
+  if (addr >= _bottom && addr < _end) {
+    return block_start_unsafe(addr);
+  } else {
+    return NULL;
+  }
+}
+
+inline HeapWord*
+G1BlockOffsetTable::block_start_const(const void* addr) const {
+  if (addr >= _bottom && addr < _end) {
+    return block_start_unsafe_const(addr);
+  } else {
+    return NULL;
+  }
+}
+
+inline size_t G1BlockOffsetSharedArray::index_for(const void* p) const {
+  char* pc = (char*)p;
+  assert(pc >= (char*)_reserved.start() &&
+         pc <  (char*)_reserved.end(),
+         "p not in range.");
+  size_t delta = pointer_delta(pc, _reserved.start(), sizeof(char));
+  size_t result = delta >> LogN;
+  assert(result < _vs.committed_size(), "bad index from address");
+  return result;
+}
+
+inline HeapWord*
+G1BlockOffsetSharedArray::address_for_index(size_t index) const {
+  assert(index < _vs.committed_size(), "bad index");
+  HeapWord* result = _reserved.start() + (index << LogN_words);
+  assert(result >= _reserved.start() && result < _reserved.end(),
+         "bad address from index");
+  return result;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::block_at_or_preceding(const void* addr,
+                                          bool has_max_index,
+                                          size_t max_index) const {
+  assert(_array->offset_array(0) == 0, "objects can't cross covered areas");
+  size_t index = _array->index_for(addr);
+  // We must make sure that the offset table entry we use is valid.  If
+  // "addr" is past the end, start at the last known one and go forward.
+  if (has_max_index) {
+    index = MIN2(index, max_index);
+  }
+  HeapWord* q = _array->address_for_index(index);
+
+  uint offset = _array->offset_array(index);  // Extend u_char to uint.
+  while (offset >= N_words) {
+    // The excess of the offset from N_words indicates a power of Base
+    // to go back by.
+    size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset);
+    q -= (N_words * n_cards_back);
+    assert(q >= _sp->bottom(), "Went below bottom!");
+    index -= n_cards_back;
+    offset = _array->offset_array(index);
+  }
+  assert(offset < N_words, "offset too large");
+  q -= offset;
+  return q;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::
+forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
+                                       const void* addr) const {
+  if (csp() != NULL) {
+    if (addr >= csp()->top()) return csp()->top();
+    while (n <= addr) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass() == NULL) return q;
+      n += obj->size();
+    }
+  } else {
+    while (n <= addr) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass() == NULL) return q;
+      n += _sp->block_size(q);
+    }
+  }
+  assert(q <= n, "wrong order for q and addr");
+  assert(addr < n, "wrong order for addr and n");
+  return q;
+}
+
+inline HeapWord*
+G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
+                                                     const void* addr) {
+  if (oop(q)->klass() == NULL) return q;
+  HeapWord* n = q + _sp->block_size(q);
+  // In the normal case, where the query "addr" is a card boundary, and the
+  // offset table chunks are the same size as cards, the block starting at
+  // "q" will contain addr, so the test below will fail, and we'll fall
+  // through quickly.
+  if (n <= addr) {
+    q = forward_to_block_containing_addr_slow(q, n, addr);
+  }
+  assert(q <= addr, "wrong order for current and arg");
+  return q;
+}
+
+//////////////////////////////////////////////////////////////////////////
+// BlockOffsetArrayNonContigSpace inlines
+//////////////////////////////////////////////////////////////////////////
+inline void G1BlockOffsetArray::freed(HeapWord* blk_start, HeapWord* blk_end) {
+  // Verify that the BOT shows [blk_start, blk_end) to be one block.
+  verify_single_block(blk_start, blk_end);
+  // adjust _unallocated_block upward or downward
+  // as appropriate
+  if (BlockOffsetArrayUseUnallocatedBlock) {
+    assert(_unallocated_block <= _end,
+           "Inconsistent value for _unallocated_block");
+    if (blk_end >= _unallocated_block && blk_start <= _unallocated_block) {
+      // CMS-specific note: a block abutting _unallocated_block to
+      // its left is being freed, a new block is being added or
+      // we are resetting following a compaction
+      _unallocated_block = blk_start;
+    }
+  }
+}
+
+inline void G1BlockOffsetArray::freed(HeapWord* blk, size_t size) {
+  freed(blk, blk + size);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp
@ -0,0 +1,91 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Inline functions for G1CollectedHeap
+
+inline HeapRegion*
+G1CollectedHeap::heap_region_containing(const void* addr) const {
+  HeapRegion* hr = _hrs->addr_to_region(addr);
+  // hr can be null if addr in perm_gen
+  if (hr != NULL && hr->continuesHumongous()) {
+    hr = hr->humongous_start_region();
+  }
+  return hr;
+}
+
+inline HeapRegion*
+G1CollectedHeap::heap_region_containing_raw(const void* addr) const {
+  HeapRegion* res = _hrs->addr_to_region(addr);
+  assert(res != NULL, "addr outside of heap?");
+  return res;
+}
+
+inline bool G1CollectedHeap::obj_in_cs(oop obj) {
+  HeapRegion* r = _hrs->addr_to_region(obj);
+  return r != NULL && r->in_collection_set();
+}
+
+inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size,
+                                              bool permit_collection_pause) {
+  HeapWord* res = NULL;
+
+  assert( SafepointSynchronize::is_at_safepoint() ||
+          Heap_lock->owned_by_self(), "pre-condition of the call" );
+
+  if (_cur_alloc_region != NULL) {
+
+    // If this allocation causes a region to become non empty,
+    // then we need to update our free_regions count.
+
+    if (_cur_alloc_region->is_empty()) {
+      res = _cur_alloc_region->allocate(word_size);
+      if (res != NULL)
+        _free_regions--;
+    } else {
+      res = _cur_alloc_region->allocate(word_size);
+    }
+  }
+  if (res != NULL) {
+    if (!SafepointSynchronize::is_at_safepoint()) {
+      assert( Heap_lock->owned_by_self(), "invariant" );
+      Heap_lock->unlock();
+    }
+    return res;
+  }
+  // attempt_allocation_slow will also unlock the heap lock when appropriate.
+  return attempt_allocation_slow(word_size, permit_collection_pause);
+}
+
+inline RefToScanQueue* G1CollectedHeap::task_queue(int i) {
+  return _task_queues->queue(i);
+}
+
+
+inline  bool G1CollectedHeap::isMarkedPrev(oop obj) const {
+  return _cm->prevMarkBitMap()->isMarked((HeapWord *)obj);
+}
+
+inline bool G1CollectedHeap::isMarkedNext(oop obj) const {
+  return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.cpp
@ -0,0 +1,187 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1MMUTracker.cpp.incl"
+
+#define _DISABLE_MMU                             0
+
+// can't rely on comparing doubles with tolerating a small margin for error
+#define SMALL_MARGIN 0.0000001
+#define is_double_leq_0(_value) ( (_value) < SMALL_MARGIN )
+#define is_double_leq(_val1, _val2) is_double_leq_0((_val1) - (_val2))
+#define is_double_geq(_val1, _val2) is_double_leq_0((_val2) - (_val1))
+
+/***** ALL TIMES ARE IN SECS!!!!!!! *****/
+
+G1MMUTracker::G1MMUTracker(double time_slice, double max_gc_time) :
+  _time_slice(time_slice),
+  _max_gc_time(max_gc_time),
+  _conc_overhead_time_sec(0.0) { }
+
+void
+G1MMUTracker::update_conc_overhead(double conc_overhead) {
+  double conc_overhead_time_sec = _time_slice * conc_overhead;
+  if (conc_overhead_time_sec > 0.9 * _max_gc_time) {
+    // We are screwed, as we only seem to have <10% of the soft
+    // real-time goal available for pauses. Let's admit defeat and
+    // allow something more generous as a pause target.
+    conc_overhead_time_sec = 0.75 * _max_gc_time;
+  }
+
+  _conc_overhead_time_sec = conc_overhead_time_sec;
+}
+
+G1MMUTrackerQueue::G1MMUTrackerQueue(double time_slice, double max_gc_time) :
+  G1MMUTracker(time_slice, max_gc_time),
+  _head_index(0),
+  _tail_index(trim_index(_head_index+1)),
+  _no_entries(0) { }
+
+void G1MMUTrackerQueue::remove_expired_entries(double current_time) {
+  double limit = current_time - _time_slice;
+  while (_no_entries > 0) {
+    if (is_double_geq(limit, _array[_tail_index].end_time())) {
+      _tail_index = trim_index(_tail_index + 1);
+      --_no_entries;
+    } else
+      return;
+  }
+  guarantee(_no_entries == 0, "should have no entries in the array");
+}
+
+double G1MMUTrackerQueue::calculate_gc_time(double current_time) {
+  double gc_time = 0.0;
+  double limit = current_time - _time_slice;
+  for (int i = 0; i < _no_entries; ++i) {
+    int index = trim_index(_tail_index + i);
+    G1MMUTrackerQueueElem *elem = &_array[index];
+    if (elem->end_time() > limit) {
+      if (elem->start_time() > limit)
+        gc_time += elem->duration();
+      else
+        gc_time += elem->end_time() - limit;
+    }
+  }
+  return gc_time;
+}
+
+void G1MMUTrackerQueue::add_pause(double start, double end, bool gc_thread) {
+  double longest_allowed = longest_pause_internal(start);
+  if (longest_allowed < 0.0)
+    longest_allowed = 0.0;
+  double duration = end - start;
+
+  remove_expired_entries(end);
+  if (_no_entries == QueueLength) {
+    // OK, right now when we fill up we bomb out
+    // there are a few ways of dealing with this "gracefully"
+    //   increase the array size (:-)
+    //   remove the oldest entry (this might allow more GC time for
+    //     the time slice than what's allowed)
+    //   concolidate the two entries with the minimum gap between them
+    //     (this mighte allow less GC time than what's allowed)
+    guarantee(0, "array full, currently we can't recover");
+  }
+  _head_index = trim_index(_head_index + 1);
+  ++_no_entries;
+  _array[_head_index] = G1MMUTrackerQueueElem(start, end);
+}
+
+// basically the _internal call does not remove expired entries
+// this is for trying things out in the future and a couple
+// of other places (debugging)
+
+double G1MMUTrackerQueue::longest_pause(double current_time) {
+  if (_DISABLE_MMU)
+    return _max_gc_time;
+
+  MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
+  remove_expired_entries(current_time);
+
+  return longest_pause_internal(current_time);
+}
+
+double G1MMUTrackerQueue::longest_pause_internal(double current_time) {
+  double target_time = _max_gc_time;
+
+  while( 1 ) {
+    double gc_time =
+      calculate_gc_time(current_time + target_time) + _conc_overhead_time_sec;
+    double diff = target_time + gc_time - _max_gc_time;
+    if (!is_double_leq_0(diff)) {
+      target_time -= diff;
+      if (is_double_leq_0(target_time)) {
+        target_time = -1.0;
+        break;
+      }
+    } else {
+      break;
+    }
+  }
+
+  return target_time;
+}
+
+// basically the _internal call does not remove expired entries
+// this is for trying things out in the future and a couple
+// of other places (debugging)
+
+double G1MMUTrackerQueue::when_sec(double current_time, double pause_time) {
+  if (_DISABLE_MMU)
+    return 0.0;
+
+  MutexLockerEx x(MMUTracker_lock, Mutex::_no_safepoint_check_flag);
+  remove_expired_entries(current_time);
+
+  return when_internal(current_time, pause_time);
+}
+
+double G1MMUTrackerQueue::when_internal(double current_time,
+                                        double pause_time) {
+  // if the pause is over the maximum, just assume that it's the maximum
+  double adjusted_pause_time =
+    (pause_time > max_gc_time()) ? max_gc_time() : pause_time;
+  double earliest_end = current_time + adjusted_pause_time;
+  double limit = earliest_end - _time_slice;
+  double gc_time = calculate_gc_time(earliest_end);
+  double diff = gc_time + adjusted_pause_time - max_gc_time();
+  if (is_double_leq_0(diff))
+    return 0.0;
+
+  int index = _tail_index;
+  while ( 1 ) {
+    G1MMUTrackerQueueElem *elem = &_array[index];
+    if (elem->end_time() > limit) {
+      if (elem->start_time() > limit)
+        diff -= elem->duration();
+      else
+        diff -= elem->end_time() - limit;
+      if (is_double_leq_0(diff))
+        return  elem->end_time() + diff + _time_slice - adjusted_pause_time - current_time;
+    }
+    index = trim_index(index+1);
+    guarantee(index != trim_index(_head_index + 1), "should not go past head");
+  }
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MMUTracker.hpp
@ -0,0 +1,130 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Keeps track of the GC work and decides when it is OK to do GC work
+// and for how long so that the MMU invariants are maintained.
+
+/***** ALL TIMES ARE IN SECS!!!!!!! *****/
+
+// this is the "interface"
+class G1MMUTracker {
+protected:
+  double          _time_slice;
+  double          _max_gc_time; // this is per time slice
+
+  double          _conc_overhead_time_sec;
+
+public:
+  G1MMUTracker(double time_slice, double max_gc_time);
+
+  void update_conc_overhead(double conc_overhead);
+
+  virtual void add_pause(double start, double end, bool gc_thread) = 0;
+  virtual double longest_pause(double current_time) = 0;
+  virtual double when_sec(double current_time, double pause_time) = 0;
+
+  double max_gc_time() {
+    return _max_gc_time - _conc_overhead_time_sec;
+  }
+
+  inline bool now_max_gc(double current_time) {
+    return when_sec(current_time, max_gc_time()) < 0.00001;
+  }
+
+  inline double when_max_gc_sec(double current_time) {
+    return when_sec(current_time, max_gc_time());
+  }
+
+  inline jlong when_max_gc_ms(double current_time) {
+    double when = when_max_gc_sec(current_time);
+    return (jlong) (when * 1000.0);
+  }
+
+  inline jlong when_ms(double current_time, double pause_time) {
+    double when = when_sec(current_time, pause_time);
+    return (jlong) (when * 1000.0);
+  }
+};
+
+class G1MMUTrackerQueueElem {
+private:
+  double _start_time;
+  double _end_time;
+
+public:
+  inline double start_time() { return _start_time; }
+  inline double end_time()   { return _end_time; }
+  inline double duration()   { return _end_time - _start_time; }
+
+  G1MMUTrackerQueueElem() {
+    _start_time = 0.0;
+    _end_time   = 0.0;
+  }
+
+  G1MMUTrackerQueueElem(double start_time, double end_time) {
+    _start_time = start_time;
+    _end_time   = end_time;
+  }
+};
+
+// this is an implementation of the MMUTracker using a (fixed-size) queue
+// that keeps track of all the recent pause times
+class G1MMUTrackerQueue: public G1MMUTracker {
+private:
+  enum PrivateConstants {
+    QueueLength = 64
+  };
+
+  // The array keeps track of all the pauses that fall within a time
+  // slice (the last time slice during which pauses took place).
+  // The data structure implemented is a circular queue.
+  // Head "points" to the most recent addition, tail to the oldest one.
+  // The array is of fixed size and I don't think we'll need more than
+  // two or three entries with the current behaviour of G1 pauses.
+  // If the array is full, an easy fix is to look for the pauses with
+  // the shortest gap between them and concolidate them.
+
+  G1MMUTrackerQueueElem _array[QueueLength];
+  int                   _head_index;
+  int                   _tail_index;
+  int                   _no_entries;
+
+  inline int trim_index(int index) {
+    return (index + QueueLength) % QueueLength;
+  }
+
+  void remove_expired_entries(double current_time);
+  double calculate_gc_time(double current_time);
+
+  double longest_pause_internal(double current_time);
+  double when_internal(double current_time, double pause_time);
+
+public:
+  G1MMUTrackerQueue(double time_slice, double max_gc_time);
+
+  virtual void add_pause(double start, double end, bool gc_thread);
+
+  virtual double longest_pause(double current_time);
+  virtual double when_sec(double current_time, double pause_time);
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp
@ -0,0 +1,385 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1MarkSweep.cpp.incl"
+
+class HeapRegion;
+
+void G1MarkSweep::invoke_at_safepoint(ReferenceProcessor* rp,
+                                      bool clear_all_softrefs) {
+  assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
+
+  // hook up weak ref data so it can be used during Mark-Sweep
+  assert(GenMarkSweep::ref_processor() == NULL, "no stomping");
+  GenMarkSweep::_ref_processor = rp;
+  assert(rp != NULL, "should be non-NULL");
+
+  // When collecting the permanent generation methodOops may be moving,
+  // so we either have to flush all bcp data or convert it into bci.
+  CodeCache::gc_prologue();
+  Threads::gc_prologue();
+
+  // Increment the invocation count for the permanent generation, since it is
+  // implicitly collected whenever we do a full mark sweep collection.
+  SharedHeap* sh = SharedHeap::heap();
+  sh->perm_gen()->stat_record()->invocations++;
+
+  bool marked_for_unloading = false;
+
+  allocate_stacks();
+
+  // We should save the marks of the currently locked biased monitors.
+  // The marking doesn't preserve the marks of biased objects.
+  BiasedLocking::preserve_marks();
+
+  mark_sweep_phase1(marked_for_unloading, clear_all_softrefs);
+
+  if (G1VerifyConcMark) {
+      G1CollectedHeap* g1h = G1CollectedHeap::heap();
+      g1h->checkConcurrentMark();
+  }
+
+  mark_sweep_phase2();
+
+  // Don't add any more derived pointers during phase3
+  COMPILER2_PRESENT(DerivedPointerTable::set_active(false));
+
+  mark_sweep_phase3();
+
+  mark_sweep_phase4();
+
+  GenMarkSweep::restore_marks();
+  BiasedLocking::restore_marks();
+  GenMarkSweep::deallocate_stacks();
+
+  // We must invalidate the perm-gen rs, so that it gets rebuilt.
+  GenRemSet* rs = sh->rem_set();
+  rs->invalidate(sh->perm_gen()->used_region(), true /*whole_heap*/);
+
+  // "free at last gc" is calculated from these.
+  // CHF: cheating for now!!!
+  //  Universe::set_heap_capacity_at_last_gc(Universe::heap()->capacity());
+  //  Universe::set_heap_used_at_last_gc(Universe::heap()->used());
+
+  Threads::gc_epilogue();
+  CodeCache::gc_epilogue();
+
+  // refs processing: clean slate
+  GenMarkSweep::_ref_processor = NULL;
+}
+
+
+void G1MarkSweep::allocate_stacks() {
+  GenMarkSweep::_preserved_count_max = 0;
+  GenMarkSweep::_preserved_marks = NULL;
+  GenMarkSweep::_preserved_count = 0;
+  GenMarkSweep::_preserved_mark_stack = NULL;
+  GenMarkSweep::_preserved_oop_stack = NULL;
+
+  GenMarkSweep::_marking_stack =
+    new (ResourceObj::C_HEAP) GrowableArray<oop>(4000, true);
+
+  size_t size = SystemDictionary::number_of_classes() * 2;
+  GenMarkSweep::_revisit_klass_stack =
+    new (ResourceObj::C_HEAP) GrowableArray<Klass*>((int)size, true);
+}
+
+void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading,
+                                    bool clear_all_softrefs) {
+  // Recursively traverse all live objects and mark them
+  EventMark m("1 mark object");
+  TraceTime tm("phase 1", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace(" 1");
+
+  SharedHeap* sh = SharedHeap::heap();
+
+  sh->process_strong_roots(true,  // Collecting permanent generation.
+                           SharedHeap::SO_SystemClasses,
+                           &GenMarkSweep::follow_root_closure,
+                           &GenMarkSweep::follow_root_closure);
+
+  // Process reference objects found during marking
+  ReferencePolicy *soft_ref_policy;
+  if (clear_all_softrefs) {
+    soft_ref_policy = new AlwaysClearPolicy();
+  } else {
+#ifdef COMPILER2
+    soft_ref_policy = new LRUMaxHeapPolicy();
+#else
+    soft_ref_policy = new LRUCurrentHeapPolicy();
+#endif
+  }
+  assert(soft_ref_policy != NULL,"No soft reference policy");
+  GenMarkSweep::ref_processor()->process_discovered_references(
+                                   soft_ref_policy,
+                                   &GenMarkSweep::is_alive,
+                                   &GenMarkSweep::keep_alive,
+                                   &GenMarkSweep::follow_stack_closure,
+                                   NULL);
+
+  // Follow system dictionary roots and unload classes
+  bool purged_class = SystemDictionary::do_unloading(&GenMarkSweep::is_alive);
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+
+  // Follow code cache roots (has to be done after system dictionary,
+  // assumes all live klasses are marked)
+  CodeCache::do_unloading(&GenMarkSweep::is_alive,
+                                   &GenMarkSweep::keep_alive,
+                                   purged_class);
+           GenMarkSweep::follow_stack();
+
+  // Update subklass/sibling/implementor links of live klasses
+  GenMarkSweep::follow_weak_klass_links();
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+
+  // Visit symbol and interned string tables and delete unmarked oops
+  SymbolTable::unlink(&GenMarkSweep::is_alive);
+  StringTable::unlink(&GenMarkSweep::is_alive);
+
+  assert(GenMarkSweep::_marking_stack->is_empty(),
+         "stack should be empty by now");
+}
+
+class G1PrepareCompactClosure: public HeapRegionClosure {
+  ModRefBarrierSet* _mrbs;
+  CompactPoint _cp;
+  bool _popular_only;
+
+  void free_humongous_region(HeapRegion* hr) {
+    HeapWord* bot = hr->bottom();
+    HeapWord* end = hr->end();
+    assert(hr->startsHumongous(),
+           "Only the start of a humongous region should be freed.");
+    G1CollectedHeap::heap()->free_region(hr);
+    hr->prepare_for_compaction(&_cp);
+    // Also clear the part of the card table that will be unused after
+    // compaction.
+    _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+  }
+
+public:
+  G1PrepareCompactClosure(CompactibleSpace* cs, bool popular_only) :
+    _cp(NULL, cs, cs->initialize_threshold()),
+    _mrbs(G1CollectedHeap::heap()->mr_bs()),
+    _popular_only(popular_only)
+  {}
+  bool doHeapRegion(HeapRegion* hr) {
+    if (_popular_only && !hr->popular())
+      return true; // terminate early
+    else if (!_popular_only && hr->popular())
+      return false; // skip this one.
+
+    if (hr->isHumongous()) {
+      if (hr->startsHumongous()) {
+        oop obj = oop(hr->bottom());
+        if (obj->is_gc_marked()) {
+          obj->forward_to(obj);
+        } else  {
+          free_humongous_region(hr);
+        }
+      } else {
+        assert(hr->continuesHumongous(), "Invalid humongous.");
+      }
+    } else {
+      hr->prepare_for_compaction(&_cp);
+      // Also clear the part of the card table that will be unused after
+      // compaction.
+      _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+    }
+    return false;
+  }
+};
+// Stolen verbatim from g1CollectedHeap.cpp
+class FindFirstRegionClosure: public HeapRegionClosure {
+  HeapRegion* _a_region;
+  bool _find_popular;
+public:
+  FindFirstRegionClosure(bool find_popular) :
+    _a_region(NULL), _find_popular(find_popular) {}
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->popular() == _find_popular) {
+      _a_region = r;
+      return true;
+    } else {
+      return false;
+    }
+  }
+  HeapRegion* result() { return _a_region; }
+};
+
+void G1MarkSweep::mark_sweep_phase2() {
+  // Now all live objects are marked, compute the new object addresses.
+
+  // It is imperative that we traverse perm_gen LAST. If dead space is
+  // allowed a range of dead object may get overwritten by a dead int
+  // array. If perm_gen is not traversed last a klassOop may get
+  // overwritten. This is fine since it is dead, but if the class has dead
+  // instances we have to skip them, and in order to find their size we
+  // need the klassOop!
+  //
+  // It is not required that we traverse spaces in the same order in
+  // phase2, phase3 and phase4, but the ValidateMarkSweep live oops
+  // tracking expects us to do so. See comment under phase4.
+
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  EventMark m("2 compute new addresses");
+  TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("2");
+
+  // First we compact the popular regions.
+  if (G1NumPopularRegions > 0) {
+    CompactibleSpace* sp = g1h->first_compactible_space();
+    FindFirstRegionClosure cl(true /*find_popular*/);
+    g1h->heap_region_iterate(&cl);
+    HeapRegion *r = cl.result();
+    assert(r->popular(), "should have found a popular region.");
+    assert(r == sp, "first popular heap region should "
+                    "== first compactible space");
+    G1PrepareCompactClosure blk(sp, true/*popular_only*/);
+    g1h->heap_region_iterate(&blk);
+  }
+
+  // Now we do the regular regions.
+  FindFirstRegionClosure cl(false /*find_popular*/);
+  g1h->heap_region_iterate(&cl);
+  HeapRegion *r = cl.result();
+  assert(!r->popular(), "should have founda non-popular region.");
+  CompactibleSpace* sp = r;
+  if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) {
+    sp = r->next_compaction_space();
+  }
+
+  G1PrepareCompactClosure blk(sp, false/*popular_only*/);
+  g1h->heap_region_iterate(&blk);
+
+  CompactPoint perm_cp(pg, NULL, NULL);
+  pg->prepare_for_compaction(&perm_cp);
+}
+
+class G1AdjustPointersClosure: public HeapRegionClosure {
+ public:
+  bool doHeapRegion(HeapRegion* r) {
+    if (r->isHumongous()) {
+      if (r->startsHumongous()) {
+        // We must adjust the pointers on the single H object.
+        oop obj = oop(r->bottom());
+        debug_only(GenMarkSweep::track_interior_pointers(obj));
+        // point all the oops to the new location
+        obj->adjust_pointers();
+        debug_only(GenMarkSweep::check_interior_pointers());
+      }
+    } else {
+      // This really ought to be "as_CompactibleSpace"...
+      r->adjust_pointers();
+    }
+    return false;
+  }
+};
+
+void G1MarkSweep::mark_sweep_phase3() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  // Adjust the pointers to reflect the new locations
+  EventMark m("3 adjust pointers");
+  TraceTime tm("phase 3", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("3");
+
+  SharedHeap* sh = SharedHeap::heap();
+
+  sh->process_strong_roots(true,  // Collecting permanent generation.
+                           SharedHeap::SO_AllClasses,
+                           &GenMarkSweep::adjust_root_pointer_closure,
+                           &GenMarkSweep::adjust_pointer_closure);
+
+  g1h->ref_processor()->weak_oops_do(&GenMarkSweep::adjust_root_pointer_closure);
+
+  // Now adjust pointers in remaining weak roots.  (All of which should
+  // have been cleared if they pointed to non-surviving objects.)
+  g1h->g1_process_weak_roots(&GenMarkSweep::adjust_root_pointer_closure,
+                             &GenMarkSweep::adjust_pointer_closure);
+
+  GenMarkSweep::adjust_marks();
+
+  G1AdjustPointersClosure blk;
+  g1h->heap_region_iterate(&blk);
+  pg->adjust_pointers();
+}
+
+class G1SpaceCompactClosure: public HeapRegionClosure {
+public:
+  G1SpaceCompactClosure() {}
+
+  bool doHeapRegion(HeapRegion* hr) {
+    if (hr->isHumongous()) {
+      if (hr->startsHumongous()) {
+        oop obj = oop(hr->bottom());
+        if (obj->is_gc_marked()) {
+          obj->init_mark();
+        } else {
+          assert(hr->is_empty(), "Should have been cleared in phase 2.");
+        }
+        hr->reset_during_compaction();
+      }
+    } else {
+      hr->compact();
+    }
+    return false;
+  }
+};
+
+void G1MarkSweep::mark_sweep_phase4() {
+  // All pointers are now adjusted, move objects accordingly
+
+  // It is imperative that we traverse perm_gen first in phase4. All
+  // classes must be allocated earlier than their instances, and traversing
+  // perm_gen first makes sure that all klassOops have moved to their new
+  // location before any instance does a dispatch through it's klass!
+
+  // The ValidateMarkSweep live oops tracking expects us to traverse spaces
+  // in the same order in phase2, phase3 and phase4. We don't quite do that
+  // here (perm_gen first rather than last), so we tell the validate code
+  // to use a higher index (saved from phase2) when verifying perm_gen.
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  Generation* pg = g1h->perm_gen();
+
+  EventMark m("4 compact heap");
+  TraceTime tm("phase 4", PrintGC && Verbose, true, gclog_or_tty);
+  GenMarkSweep::trace("4");
+
+  pg->compact();
+
+  G1SpaceCompactClosure blk;
+  g1h->heap_region_iterate(&blk);
+
+}
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
--- a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.hpp
@ -0,0 +1,57 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class ReferenceProcessor;
+
+// G1MarkSweep takes care of global mark-compact garbage collection for a
+// G1CollectedHeap using a four-phase pointer forwarding algorithm.  All
+// generations are assumed to support marking; those that can also support
+// compaction.
+//
+// Class unloading will only occur when a full gc is invoked.
+
+
+class G1MarkSweep : AllStatic {
+  friend class VM_G1MarkSweep;
+  friend class Scavenge;
+
+ public:
+
+  static void invoke_at_safepoint(ReferenceProcessor* rp,
+                                  bool clear_all_softrefs);
+
+ private:
+
+  // Mark live objects
+  static void mark_sweep_phase1(bool& marked_for_deopt,
+                                bool clear_all_softrefs);
+  // Calculate new addresses
+  static void mark_sweep_phase2();
+  // Update pointers
+  static void mark_sweep_phase3();
+  // Move objects to new positions
+  static void mark_sweep_phase4();
+
+  static void allocate_stacks();
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.hpp
@ -0,0 +1,202 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class HeapRegion;
+class G1CollectedHeap;
+class G1RemSet;
+class HRInto_G1RemSet;
+class G1RemSet;
+class ConcurrentMark;
+class DirtyCardToOopClosure;
+class CMBitMap;
+class CMMarkStack;
+class G1ParScanThreadState;
+
+// A class that scans oops in a given heap region (much as OopsInGenClosure
+// scans oops in a generation.)
+class OopsInHeapRegionClosure: public OopsInGenClosure {
+protected:
+  HeapRegion* _from;
+public:
+  virtual void set_region(HeapRegion* from) { _from = from; }
+};
+
+
+class G1ScanAndBalanceClosure : public OopClosure {
+  G1CollectedHeap* _g1;
+  static int _nq;
+public:
+  G1ScanAndBalanceClosure(G1CollectedHeap* g1) : _g1(g1) { }
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+};
+
+class G1ParClosureSuper : public OopsInHeapRegionClosure {
+protected:
+  G1CollectedHeap* _g1;
+  G1RemSet* _g1_rem;
+  ConcurrentMark* _cm;
+  G1ParScanThreadState* _par_scan_state;
+public:
+  G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
+  bool apply_to_weak_ref_discovered_field() { return true; }
+};
+
+class G1ParScanClosure : public G1ParClosureSuper {
+public:
+  G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ParClosureSuper(g1, par_scan_state) { }
+  void do_oop_nv(oop* p);   // should be made inline
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)          { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
+};
+
+#define G1_PARTIAL_ARRAY_MASK 1
+
+class G1ParScanPartialArrayClosure : public G1ParClosureSuper {
+  G1ParScanClosure _scanner;
+  template <class T> void process_array_chunk(oop obj, int start, int end);
+public:
+  G1ParScanPartialArrayClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ParClosureSuper(g1, par_scan_state), _scanner(g1, par_scan_state) { }
+  void do_oop_nv(oop* p);
+  void do_oop_nv(narrowOop* p)      { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+
+class G1ParCopyHelper : public G1ParClosureSuper {
+  G1ParScanClosure *_scanner;
+protected:
+  void mark_forwardee(oop* p);
+  oop copy_to_survivor_space(oop obj);
+public:
+  G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
+                  G1ParScanClosure *scanner) :
+    G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
+};
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+class G1ParCopyClosure : public G1ParCopyHelper {
+  G1ParScanClosure _scanner;
+  void do_oop_work(oop* p);
+  void do_oop_work(narrowOop* p) { guarantee(false, "NYI"); }
+public:
+  G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    _scanner(g1, par_scan_state), G1ParCopyHelper(g1, par_scan_state, &_scanner) { }
+  inline void do_oop_nv(oop* p) {
+    do_oop_work(p);
+    if (do_mark_forwardee)
+      mark_forwardee(p);
+  }
+  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+};
+
+typedef G1ParCopyClosure<false, G1BarrierNone, false> G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<true, G1BarrierNone, false> G1ParScanPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<true, G1BarrierNone, true> G1ParScanAndMarkPermClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS, false> G1ParScanHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierRS, true> G1ParScanAndMarkHeapRSClosure;
+typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+
+
+class FilterIntoCSClosure: public OopClosure {
+  G1CollectedHeap* _g1;
+  OopClosure* _oc;
+  DirtyCardToOopClosure* _dcto_cl;
+public:
+  FilterIntoCSClosure(  DirtyCardToOopClosure* dcto_cl,
+                        G1CollectedHeap* g1, OopClosure* oc) :
+    _dcto_cl(dcto_cl), _g1(g1), _oc(oc)
+  {}
+  inline void do_oop_nv(oop* p);
+  inline void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+};
+
+class FilterInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1;
+  OopsInHeapRegionClosure* _oc;
+public:
+  FilterInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1,
+                                     OopsInHeapRegionClosure* oc) :
+    _g1(g1), _oc(oc)
+  {}
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  void set_region(HeapRegion* from) {
+    _oc->set_region(from);
+  }
+};
+
+class FilterAndMarkInHeapRegionAndIntoCSClosure : public OopsInHeapRegionClosure {
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  OopsInHeapRegionClosure* _oc;
+public:
+  FilterAndMarkInHeapRegionAndIntoCSClosure(G1CollectedHeap* g1,
+                                            OopsInHeapRegionClosure* oc,
+                                            ConcurrentMark* cm)
+  : _g1(g1), _oc(oc), _cm(cm) { }
+
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)    { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  void set_region(HeapRegion* from) {
+    _oc->set_region(from);
+  }
+};
+
+class FilterOutOfRegionClosure: public OopClosure {
+  HeapWord* _r_bottom;
+  HeapWord* _r_end;
+  OopClosure* _oc;
+  int _out_of_region;
+public:
+  FilterOutOfRegionClosure(HeapRegion* r, OopClosure* oc);
+  inline  void do_oop_nv(oop* p);
+  inline  void do_oop_nv(narrowOop* p) { guarantee(false, "NYI"); }
+  virtual void do_oop(oop* p);
+  virtual void do_oop(narrowOop* p)   { guarantee(false, "NYI"); }
+  bool apply_to_weak_ref_discovered_field() { return true; }
+  bool do_header() { return false; }
+  int out_of_region() { return _out_of_region; }
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp
@ -0,0 +1,112 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+/*
+ * This really ought to be an inline function, but apparently the C++
+ * compiler sometimes sees fit to ignore inline declarations.  Sigh.
+ */
+
+// This must a ifdef'ed because the counting it controls is in a
+// perf-critical inner loop.
+#define FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT 0
+
+inline void FilterIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL && _g1->obj_in_cs(obj)) {
+    _oc->do_oop(p);
+#if FILTERINTOCSCLOSURE_DOHISTOGRAMCOUNT
+    _dcto_cl->incr_count();
+#endif
+  }
+}
+
+inline void FilterIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+#define FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT 0
+
+inline void FilterOutOfRegionClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  HeapWord* obj_hw = (HeapWord*)obj;
+  if (obj_hw != NULL && (obj_hw < _r_bottom || obj_hw >= _r_end)) {
+    _oc->do_oop(p);
+#if FILTEROUTOFREGIONCLOSURE_DOHISTOGRAMCOUNT
+    _out_of_region++;
+#endif
+  }
+}
+
+inline void FilterOutOfRegionClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+inline void FilterInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL && _g1->obj_in_cs(obj))
+    _oc->do_oop(p);
+}
+
+inline void FilterInHeapRegionAndIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+
+inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop_nv(oop* p) {
+  oop obj = *p;
+  if (obj != NULL) {
+    HeapRegion* hr = _g1->heap_region_containing((HeapWord*) obj);
+    if (hr != NULL) {
+      if (hr->in_collection_set())
+        _oc->do_oop(p);
+      else if (!hr->is_young())
+        _cm->grayRoot(obj);
+    }
+  }
+}
+
+inline void FilterAndMarkInHeapRegionAndIntoCSClosure::do_oop(oop* p)
+{
+  do_oop_nv(p);
+}
+
+inline void G1ScanAndBalanceClosure::do_oop_nv(oop* p) {
+  RefToScanQueue* q;
+  if (ParallelGCThreads > 0) {
+    // Deal the work out equally.
+    _nq = (_nq + 1) % ParallelGCThreads;
+    q = _g1->task_queue(_nq);
+  } else {
+    q = _g1->task_queue(0);
+  }
+  bool nooverflow = q->push(p);
+  guarantee(nooverflow, "Overflow during poplularity region processing");
+}
+
+inline void G1ScanAndBalanceClosure::do_oop(oop* p) {
+  do_oop_nv(p);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.hpp
@ -0,0 +1,216 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// A G1RemSet provides ways of iterating over pointers into a selected
+// collection set.
+
+class G1CollectedHeap;
+class CardTableModRefBarrierSet;
+class HRInto_G1RemSet;
+class ConcurrentG1Refine;
+
+class G1RemSet {
+protected:
+  G1CollectedHeap* _g1;
+
+  unsigned _conc_refine_traversals;
+  unsigned _conc_refine_cards;
+
+  size_t n_workers();
+
+public:
+  G1RemSet(G1CollectedHeap* g1) :
+    _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0)
+  {}
+
+  // Invoke "blk->do_oop" on all pointers into the CS in object in regions
+  // outside the CS (having invoked "blk->set_region" to set the "from"
+  // region correctly beforehand.) The "worker_i" param is for the
+  // parallel case where the number of the worker thread calling this
+  // function can be helpful in partitioning the work to be done. It
+  // should be the same as the "i" passed to the calling thread's
+  // work(i) function. In the sequential case this param will be ingored.
+  virtual void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                           int worker_i) = 0;
+
+  // Prepare for and cleanup after an oops_into_collection_set_do
+  // call.  Must call each of these once before and after (in sequential
+  // code) any threads call oops into collection set do.  (This offers an
+  // opportunity to sequential setup and teardown of structures needed by a
+  // parallel iteration over the CS's RS.)
+  virtual void prepare_for_oops_into_collection_set_do() = 0;
+  virtual void cleanup_after_oops_into_collection_set_do() = 0;
+
+  // If "this" is of the given subtype, return "this", else "NULL".
+  virtual HRInto_G1RemSet* as_HRInto_G1RemSet() { return NULL; }
+
+  // Record, if necessary, the fact that *p (where "p" is in region "from")
+  // has changed to its new value.
+  virtual void write_ref(HeapRegion* from, oop* p) = 0;
+  virtual void par_write_ref(HeapRegion* from, oop* p, int tid) = 0;
+
+  // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region
+  // or card, respectively, such that a region or card with a corresponding
+  // 0 bit contains no part of any live object.  Eliminates any remembered
+  // set entries that correspond to dead heap ranges.
+  virtual void scrub(BitMap* region_bm, BitMap* card_bm) = 0;
+  // Like the above, but assumes is called in parallel: "worker_num" is the
+  // parallel thread id of the current thread, and "claim_val" is the
+  // value that should be used to claim heap regions.
+  virtual void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                         int worker_num, int claim_val) = 0;
+
+  // Do any "refinement" activity that might be appropriate to the given
+  // G1RemSet.  If "refinement" has iterateive "passes", do one pass.
+  // If "t" is non-NULL, it is the thread performing the refinement.
+  // Default implementation does nothing.
+  virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {}
+
+  // Refine the card corresponding to "card_ptr".  If "sts" is non-NULL,
+  // join and leave around parts that must be atomic wrt GC.  (NULL means
+  // being done at a safepoint.)
+  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {}
+
+  unsigned conc_refine_cards() { return _conc_refine_cards; }
+
+  // Print any relevant summary info.
+  virtual void print_summary_info() {}
+
+  // Prepare remebered set for verification.
+  virtual void prepare_for_verify() {};
+};
+
+
+// The simplest possible G1RemSet: iterates over all objects in non-CS
+// regions, searching for pointers into the CS.
+class StupidG1RemSet: public G1RemSet {
+public:
+  StupidG1RemSet(G1CollectedHeap* g1) : G1RemSet(g1) {}
+
+  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                   int worker_i);
+
+  void prepare_for_oops_into_collection_set_do() {}
+  void cleanup_after_oops_into_collection_set_do() {}
+
+  // Nothing is necessary in the version below.
+  void write_ref(HeapRegion* from, oop* p) {}
+  void par_write_ref(HeapRegion* from, oop* p, int tid) {}
+
+  void scrub(BitMap* region_bm, BitMap* card_bm) {}
+  void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                 int worker_num, int claim_val) {}
+
+};
+
+// A G1RemSet in which each heap region has a rem set that records the
+// external heap references into it.  Uses a mod ref bs to track updates,
+// so that they can be used to update the individual region remsets.
+
+class HRInto_G1RemSet: public G1RemSet {
+protected:
+  enum SomePrivateConstants {
+    UpdateRStoMergeSync  = 0,
+    MergeRStoDoDirtySync = 1,
+    DoDirtySync          = 2,
+    LastSync             = 3,
+
+    SeqTask              = 0,
+    NumSeqTasks          = 1
+  };
+
+  CardTableModRefBS*             _ct_bs;
+  SubTasksDone*                  _seq_task;
+  G1CollectorPolicy* _g1p;
+
+  ConcurrentG1Refine* _cg1r;
+
+  size_t*             _cards_scanned;
+  size_t              _total_cards_scanned;
+
+  // _par_traversal_in_progress is "true" iff a parallel traversal is in
+  // progress.  If so, then cards added to remembered sets should also have
+  // their references into the collection summarized in "_new_refs".
+  bool _par_traversal_in_progress;
+  void set_par_traversal(bool b);
+  GrowableArray<oop*>** _new_refs;
+
+public:
+  // This is called to reset dual hash tables after the gc pause
+  // is finished and the initial hash table is no longer being
+  // scanned.
+  void cleanupHRRS();
+
+  HRInto_G1RemSet(G1CollectedHeap* g1, CardTableModRefBS* ct_bs);
+  ~HRInto_G1RemSet();
+
+  void oops_into_collection_set_do(OopsInHeapRegionClosure* blk,
+                                   int worker_i);
+
+  void prepare_for_oops_into_collection_set_do();
+  void cleanup_after_oops_into_collection_set_do();
+  void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
+  void scanNewRefsRS(OopsInHeapRegionClosure* oc, int worker_i);
+  void updateRS(int worker_i);
+  HeapRegion* calculateStartRegion(int i);
+
+  HRInto_G1RemSet* as_HRInto_G1RemSet() { return this; }
+
+  CardTableModRefBS* ct_bs() { return _ct_bs; }
+  size_t cardsScanned() { return _total_cards_scanned; }
+
+  // Record, if necessary, the fact that *p (where "p" is in region "from",
+  // which is required to be non-NULL) has changed to a new non-NULL value.
+  inline void write_ref(HeapRegion* from, oop* p);
+  // The "_nv" version is the same; it exists just so that it is not virtual.
+  inline void write_ref_nv(HeapRegion* from, oop* p);
+
+  inline bool self_forwarded(oop obj);
+  inline void par_write_ref(HeapRegion* from, oop* p, int tid);
+
+  void scrub(BitMap* region_bm, BitMap* card_bm);
+  void scrub_par(BitMap* region_bm, BitMap* card_bm,
+                 int worker_num, int claim_val);
+
+  virtual void concurrentRefinementPass(ConcurrentG1Refine* t);
+  virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i);
+
+  virtual void print_summary_info();
+  virtual void prepare_for_verify();
+};
+
+#define G1_REM_SET_LOGGING 0
+
+class CountNonCleanMemRegionClosure: public MemRegionClosure {
+  G1CollectedHeap* _g1;
+  int _n;
+  HeapWord* _start_first;
+public:
+  CountNonCleanMemRegionClosure(G1CollectedHeap* g1) :
+    _g1(g1), _n(0), _start_first(NULL)
+  {}
+  void do_MemRegion(MemRegion mr);
+  int n() { return _n; };
+  HeapWord* start_first() { return _start_first; }
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp
@ -0,0 +1,104 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline size_t G1RemSet::n_workers() {
+  if (_g1->workers() != NULL) {
+    return _g1->workers()->total_workers();
+  } else {
+    return 1;
+  }
+}
+
+inline void HRInto_G1RemSet::write_ref_nv(HeapRegion* from, oop* p) {
+  oop obj = *p;
+  assert(from != NULL && from->is_in_reserved(p),
+         "p is not in a from");
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  if (from != to && to != NULL) {
+    if (!to->popular() && !from->is_survivor()) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
+                             " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
+                             p, obj,
+                             to->bottom(), to->end());
+#endif
+      assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+      if (to->rem_set()->add_reference(p)) {
+        _g1->schedule_popular_region_evac(to);
+      }
+    }
+  }
+}
+
+inline void HRInto_G1RemSet::write_ref(HeapRegion* from, oop* p) {
+  write_ref_nv(from, p);
+}
+
+inline bool HRInto_G1RemSet::self_forwarded(oop obj) {
+  bool result =  (obj->is_forwarded() && (obj->forwardee()== obj));
+  return result;
+}
+
+inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) {
+  oop obj = *p;
+#ifdef ASSERT
+  // can't do because of races
+  // assert(obj == NULL || obj->is_oop(), "expected an oop");
+
+  // Do the safe subset of is_oop
+  if (obj != NULL) {
+#ifdef CHECK_UNHANDLED_OOPS
+    oopDesc* o = obj.obj();
+#else
+    oopDesc* o = obj;
+#endif // CHECK_UNHANDLED_OOPS
+    assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
+    assert(Universe::heap()->is_in_reserved(obj), "must be in heap");
+  }
+#endif // ASSERT
+  assert(from == NULL || from->is_in_reserved(p),
+         "p is not in from");
+  HeapRegion* to = _g1->heap_region_containing(obj);
+  // The test below could be optimized by applying a bit op to to and from.
+  if (to != NULL && from != NULL && from != to) {
+    if (!to->popular() && !from->is_survivor()) {
+#if G1_REM_SET_LOGGING
+      gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS"
+                             " for region [" PTR_FORMAT ", " PTR_FORMAT ")",
+                             p, obj,
+                             to->bottom(), to->end());
+#endif
+      assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
+      if (to->rem_set()->add_reference(p, tid)) {
+        _g1->schedule_popular_region_evac(to);
+      }
+    }
+    // There is a tricky infinite loop if we keep pushing
+    // self forwarding pointers onto our _new_refs list.
+    if (_par_traversal_in_progress &&
+        to->in_collection_set() && !self_forwarded(obj)) {
+      _new_refs[tid]->push(p);
+    }
+  }
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp
@ -0,0 +1,150 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1SATBCardTableModRefBS.cpp.incl"
+
+G1SATBCardTableModRefBS::G1SATBCardTableModRefBS(MemRegion whole_heap,
+                                                 int max_covered_regions) :
+    CardTableModRefBSForCTRS(whole_heap, max_covered_regions)
+{
+  _kind = G1SATBCT;
+}
+
+
+void G1SATBCardTableModRefBS::enqueue(oop pre_val) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  Thread* thr = Thread::current();
+  if (thr->is_Java_thread()) {
+    JavaThread* jt = (JavaThread*)thr;
+    jt->satb_mark_queue().enqueue(pre_val);
+  } else {
+    MutexLocker x(Shared_SATB_Q_lock);
+    JavaThread::satb_mark_queue_set().shared_satb_queue()->enqueue(pre_val);
+  }
+}
+
+// When we know the current java thread:
+void
+G1SATBCardTableModRefBS::write_ref_field_pre_static(void* field,
+                                                    oop newVal,
+                                                    JavaThread* jt) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
+  oop preVal = *(oop*)field;
+  if (preVal != NULL) {
+    jt->satb_mark_queue().enqueue(preVal);
+  }
+}
+
+void
+G1SATBCardTableModRefBS::write_ref_array_pre(MemRegion mr) {
+  if (!JavaThread::satb_mark_queue_set().active()) return;
+  assert(!UseCompressedOops, "Else will need to modify this to deal with narrowOop");
+  oop* elem_ptr = (oop*)mr.start();
+  while ((HeapWord*)elem_ptr < mr.end()) {
+    oop elem = *elem_ptr;
+    if (elem != NULL) enqueue(elem);
+    elem_ptr++;
+  }
+}
+
+
+
+G1SATBCardTableLoggingModRefBS::
+G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
+                               int max_covered_regions) :
+  G1SATBCardTableModRefBS(whole_heap, max_covered_regions),
+  _dcqs(JavaThread::dirty_card_queue_set())
+{
+  _kind = G1SATBCTLogging;
+}
+
+void
+G1SATBCardTableLoggingModRefBS::write_ref_field_work(void* field,
+                                                     oop new_val) {
+  jbyte* byte = byte_for(field);
+  if (*byte != dirty_card) {
+    *byte = dirty_card;
+    Thread* thr = Thread::current();
+    if (thr->is_Java_thread()) {
+      JavaThread* jt = (JavaThread*)thr;
+      jt->dirty_card_queue().enqueue(byte);
+    } else {
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      _dcqs.shared_dirty_card_queue()->enqueue(byte);
+    }
+  }
+}
+
+void
+G1SATBCardTableLoggingModRefBS::write_ref_field_static(void* field,
+                                                       oop new_val) {
+  uintptr_t field_uint = (uintptr_t)field;
+  uintptr_t new_val_uint = (uintptr_t)new_val;
+  uintptr_t comb = field_uint ^ new_val_uint;
+  comb = comb >> HeapRegion::LogOfHRGrainBytes;
+  if (comb == 0) return;
+  if (new_val == NULL) return;
+  // Otherwise, log it.
+  G1SATBCardTableLoggingModRefBS* g1_bs =
+    (G1SATBCardTableLoggingModRefBS*)Universe::heap()->barrier_set();
+  g1_bs->write_ref_field_work(field, new_val);
+}
+
+void
+G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr, bool whole_heap) {
+  jbyte* byte = byte_for(mr.start());
+  jbyte* last_byte = byte_for(mr.last());
+  Thread* thr = Thread::current();
+  if (whole_heap) {
+    while (byte <= last_byte) {
+      *byte = dirty_card;
+      byte++;
+    }
+  } else {
+    // Enqueue if necessary.
+    if (thr->is_Java_thread()) {
+      JavaThread* jt = (JavaThread*)thr;
+      while (byte <= last_byte) {
+        if (*byte != dirty_card) {
+          *byte = dirty_card;
+          jt->dirty_card_queue().enqueue(byte);
+        }
+        byte++;
+      }
+    } else {
+      MutexLockerEx x(Shared_DirtyCardQ_lock,
+                      Mutex::_no_safepoint_check_flag);
+      while (byte <= last_byte) {
+        if (*byte != dirty_card) {
+          *byte = dirty_card;
+          _dcqs.shared_dirty_card_queue()->enqueue(byte);
+        }
+        byte++;
+      }
+    }
+  }
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp
@ -0,0 +1,107 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#ifndef SERIALGC
+
+class DirtyCardQueueSet;
+
+// This barrier is specialized to use a logging barrier to support
+// snapshot-at-the-beginning marking.
+
+class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS {
+private:
+  // Add "pre_val" to a set of objects that may have been disconnected from the
+  // pre-marking object graph.
+  static void enqueue(oop pre_val);
+
+public:
+  G1SATBCardTableModRefBS(MemRegion whole_heap,
+                          int max_covered_regions);
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::G1SATBCT || CardTableModRefBS::is_a(bsn);
+  }
+
+  virtual bool has_write_ref_pre_barrier() { return true; }
+
+  // This notes that we don't need to access any BarrierSet data
+  // structures, so this can be called from a static context.
+  static void write_ref_field_pre_static(void* field, oop newVal) {
+    assert(!UseCompressedOops, "Else needs to be templatized");
+    oop preVal = *((oop*)field);
+    if (preVal != NULL) {
+      enqueue(preVal);
+    }
+  }
+
+  // When we know the current java thread:
+  static void write_ref_field_pre_static(void* field, oop newVal,
+                                         JavaThread* jt);
+
+  // We export this to make it available in cases where the static
+  // type of the barrier set is known.  Note that it is non-virtual.
+  inline void inline_write_ref_field_pre(void* field, oop newVal) {
+    write_ref_field_pre_static(field, newVal);
+  }
+
+  // This is the more general virtual version.
+  void write_ref_field_pre_work(void* field, oop new_val) {
+    inline_write_ref_field_pre(field, new_val);
+  }
+
+  virtual void write_ref_array_pre(MemRegion mr);
+
+};
+
+// Adds card-table logging to the post-barrier.
+// Usual invariant: all dirty cards are logged in the DirtyCardQueueSet.
+class G1SATBCardTableLoggingModRefBS: public G1SATBCardTableModRefBS {
+ private:
+  DirtyCardQueueSet& _dcqs;
+ public:
+  G1SATBCardTableLoggingModRefBS(MemRegion whole_heap,
+                                 int max_covered_regions);
+
+  bool is_a(BarrierSet::Name bsn) {
+    return bsn == BarrierSet::G1SATBCTLogging ||
+      G1SATBCardTableModRefBS::is_a(bsn);
+  }
+
+  void write_ref_field_work(void* field, oop new_val);
+
+  // Can be called from static contexts.
+  static void write_ref_field_static(void* field, oop new_val);
+
+  // NB: if you do a whole-heap invalidation, the "usual invariant" defined
+  // above no longer applies.
+  void invalidate(MemRegion mr, bool whole_heap = false);
+
+  void write_region_work(MemRegion mr)    { invalidate(mr); }
+  void write_ref_array_work(MemRegion mr) { invalidate(mr); }
+
+
+};
+
+
+#endif // SERIALGC
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.cpp
@ -0,0 +1,32 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_g1_globals.cpp.incl"
+
+G1_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, \
+         MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG,     \
+         MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_EXPERIMENTAL_FLAG, \
+         MATERIALIZE_NOTPRODUCT_FLAG,  \
+         MATERIALIZE_MANAGEABLE_FLAG, MATERIALIZE_PRODUCT_RW_FLAG)
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp
@ -0,0 +1,287 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Defines all globals flags used by the garbage-first compiler.
+//
+
+#define G1_FLAGS(develop, develop_pd, product, product_pd, diagnostic, experimental, notproduct, manageable, product_rw) \
+                                                                            \
+  product(intx, ParallelGCG1AllocBufferSize, 4*K,                           \
+          "Size of parallel G1 allocation buffers in to-space.")            \
+                                                                            \
+  product(intx, G1TimeSliceMS, 500,                                         \
+          "Time slice for MMU specification")                               \
+                                                                            \
+  product(intx, G1MaxPauseTimeMS, 200,                                      \
+          "Max GC time per MMU time slice")                                 \
+                                                                            \
+  product(intx, G1ConfidencePerc, 50,                                       \
+          "Confidence level for MMU/pause predictions")                     \
+                                                                            \
+  product(intx, G1MarkingOverheadPerc, 0,                                   \
+          "Overhead of concurrent marking")                                 \
+                                                                            \
+  product(bool, G1AccountConcurrentOverhead, false,                         \
+          "Whether soft real-time compliance in G1 will take into account"  \
+          "concurrent overhead")                                            \
+                                                                            \
+  product(intx, G1YoungGenSize, 0,                                          \
+          "Size of the G1 young generation, 0 is the adaptive policy")      \
+                                                                            \
+  product(bool, G1Gen, true,                                                \
+          "If true, it will enable the generational G1")                    \
+                                                                            \
+  develop(intx, G1GCPct, 10,                                                \
+          "The desired percent time spent on GC")                           \
+                                                                            \
+  product(intx, G1PolicyVerbose, 0,                                         \
+          "The verbosity level on G1 policy decisions")                     \
+                                                                            \
+  develop(bool, G1UseHRIntoRS, true,                                        \
+          "Determines whether the 'advanced' HR Into rem set is used.")     \
+                                                                            \
+  product(bool, G1VerifyRemSet, false,                                      \
+          "If true, verify the rem set functioning at each GC")             \
+                                                                            \
+  product(bool, G1VerifyConcMark, false,                                    \
+          "If true, verify the conc marking code at full GC time")          \
+                                                                            \
+  develop(intx, G1MarkingVerboseLevel, 0,                                   \
+          "Level (0-4) of verboseness of the marking code")                 \
+                                                                            \
+  develop(bool, G1VerifyConcMarkPrintReachable, true,                       \
+          "If conc mark verification fails, print reachable objects")       \
+                                                                            \
+  develop(bool, G1TraceMarkStackOverflow, false,                            \
+          "If true, extra debugging code for CM restart for ovflw.")        \
+                                                                            \
+  product(bool, G1VerifyMarkingInEvac, false,                               \
+          "If true, verify marking info during evacuation")                 \
+                                                                            \
+  develop(intx, G1PausesBtwnConcMark, -1,                                   \
+          "If positive, fixed number of pauses between conc markings")      \
+                                                                            \
+  product(intx, G1EfficiencyPctCausesMark, 80,                              \
+          "The cum gc efficiency since mark fall-off that causes "          \
+          "new marking")                                                    \
+                                                                            \
+  product(bool, TraceConcurrentMark, false,                                 \
+          "Trace concurrent mark")                                          \
+                                                                            \
+  product(bool, SummarizeG1ConcMark, false,                                 \
+          "Summarize concurrent mark info")                                 \
+                                                                            \
+  product(bool, SummarizeG1RSStats, false,                                  \
+          "Summarize remembered set processing info")                       \
+                                                                            \
+  product(bool, SummarizeG1ZFStats, false,                                  \
+          "Summarize zero-filling info")                                    \
+                                                                            \
+  product(bool, TraceG1Refine, false,                                       \
+          "Trace G1 concurrent refinement")                                 \
+                                                                            \
+  develop(bool, G1ConcMark, true,                                           \
+          "If true, run concurrent marking for G1")                         \
+                                                                            \
+  product(intx, G1CMStackSize, 2 * 1024 * 1024,                             \
+          "Size of the mark stack for concurrent marking.")                 \
+                                                                            \
+  product(intx, G1CMRegionStackSize, 1024 * 1024,                           \
+          "Size of the region stack for concurrent marking.")               \
+                                                                            \
+  develop(bool, G1ConcRefine, true,                                         \
+          "If true, run concurrent rem set refinement for G1")              \
+                                                                            \
+  develop(intx, G1ConcRefineTargTraversals, 4,                              \
+          "Number of concurrent refinement we try to achieve")              \
+                                                                            \
+  develop(intx, G1ConcRefineInitialDelta, 4,                                \
+          "Number of heap regions of alloc ahead of starting collection "   \
+          "pause to start concurrent refinement (initially)")               \
+                                                                            \
+  product(bool, G1SmoothConcRefine, true,                                   \
+          "Attempts to smooth out the overhead of concurrent refinement")   \
+                                                                            \
+  develop(bool, G1ConcZeroFill, true,                                       \
+          "If true, run concurrent zero-filling thread")                    \
+                                                                            \
+  develop(intx, G1ConcZFMaxRegions, 1,                                      \
+          "Stop zero-filling when # of zf'd regions reaches")               \
+                                                                            \
+  product(intx, G1SteadyStateUsed, 90,                                      \
+          "If non-0, try to maintain 'used' at this pct (of max)")          \
+                                                                            \
+  product(intx, G1SteadyStateUsedDelta, 30,                                 \
+          "If G1SteadyStateUsed is non-0, then do pause this number of "    \
+          "of percentage points earlier if no marking is in progress.")     \
+                                                                            \
+  develop(bool, G1SATBBarrierPrintNullPreVals, false,                       \
+          "If true, count frac of ptr writes with null pre-vals.")          \
+                                                                            \
+  product(intx, G1SATBLogBufferSize, 1*K,                                   \
+          "Number of entries in an SATB log buffer.")                       \
+                                                                            \
+  product(intx, G1SATBProcessCompletedThreshold, 20,                        \
+          "Number of completed buffers that triggers log processing.")      \
+                                                                            \
+  develop(intx, G1ExtraRegionSurvRate, 33,                                  \
+          "If the young survival rate is S, and there's room left in "      \
+          "to-space, we will allow regions whose survival rate is up to "   \
+          "S + (1 - S)*X, where X is this parameter (as a fraction.)")      \
+                                                                            \
+  develop(intx, G1InitYoungSurvRatio, 50,                                   \
+          "Expected Survival Rate for newly allocated bytes")               \
+                                                                            \
+  develop(bool, G1SATBPrintStubs, false,                                    \
+          "If true, print generated stubs for the SATB barrier")            \
+                                                                            \
+  product(intx, G1ExpandByPctOfAvail, 20,                                   \
+          "When expanding, % of uncommitted space to claim.")               \
+                                                                            \
+  develop(bool, G1RSBarrierRegionFilter, true,                              \
+          "If true, generate region filtering code in RS barrier")          \
+                                                                            \
+  develop(bool, G1RSBarrierNullFilter, true,                                \
+          "If true, generate null-pointer filtering code in RS barrier")    \
+                                                                            \
+  develop(bool, G1PrintCTFilterStats, false,                                \
+          "If true, print stats on RS filtering effectiveness")             \
+                                                                            \
+  develop(bool, G1RSBarrierUseQueue, true,                                  \
+          "If true, use queueing RS barrier")                               \
+                                                                            \
+  develop(bool, G1RSLogCheckCardTable, false,                               \
+          "If true, verify that no dirty cards remain after RS log "        \
+          "processing.")                                                    \
+                                                                            \
+  product(intx, G1MinPausesBetweenMarks, 2,                                 \
+          "Number of inefficient pauses necessary to trigger marking.")     \
+                                                                            \
+  product(intx, G1InefficientPausePct, 80,                                  \
+          "Threshold of an 'inefficient' pauses (as % of cum efficiency.")  \
+                                                                            \
+  product(intx, G1RSPopLimit, 32768,                                        \
+          "Limit that defines popularity.  Should go away! XXX")            \
+                                                                            \
+  develop(bool, G1RSCountHisto, false,                                      \
+          "If true, print a histogram of RS occupancies after each pause")  \
+                                                                            \
+  product(intx, G1ObjPopLimit, 256,                                         \
+          "Limit that defines popularity for an object.")                   \
+                                                                            \
+  product(bool, G1TraceFileOverwrite, false,                                \
+          "Allow the trace file to be overwritten")                         \
+                                                                            \
+  develop(intx, G1PrintRegionLivenessInfo, 0,                               \
+          "When > 0, print the occupancies of the <n> best and worst"       \
+          "regions.")                                                       \
+                                                                            \
+  develop(bool, G1TracePopularity, false,                                   \
+          "When true, provide detailed tracing of popularity.")             \
+                                                                            \
+  product(bool, G1SummarizePopularity, false,                               \
+          "When true, provide end-of-run-summarization of popularity.")     \
+                                                                            \
+  product(intx, G1NumPopularRegions, 1,                                     \
+          "Number of regions reserved to hold popular objects.  "           \
+          "Should go away later.")                                          \
+                                                                            \
+  develop(bool, G1PrintParCleanupStats, false,                              \
+          "When true, print extra stats about parallel cleanup.")           \
+                                                                            \
+  product(bool, G1DoAgeCohortChecks, false,                                 \
+          "When true, check well-formedness of age cohort structures.")     \
+                                                                            \
+  develop(bool, G1DisablePreBarrier, false,                                 \
+          "Disable generation of pre-barrier (i.e., marking barrier)   ")   \
+                                                                            \
+  develop(bool, G1DisablePostBarrier, false,                                \
+          "Disable generation of post-barrier (i.e., RS barrier)   ")       \
+                                                                            \
+  product(intx, G1DirtyCardQueueMax, 30,                                    \
+          "Maximum number of completed RS buffers before mutator threads "  \
+          "start processing them.")                                         \
+                                                                            \
+  develop(intx, G1ConcRSLogCacheSize, 10,                                   \
+          "Log base 2 of the length of conc RS hot-card cache.")            \
+                                                                            \
+  product(bool, G1ConcRSCountTraversals, false,                             \
+          "If true, gather data about the number of times CR traverses "    \
+          "cards ")                                                         \
+                                                                            \
+  product(intx, G1ConcRSHotCardLimit, 4,                                    \
+          "The threshold that defines (>=) a hot card.")                    \
+                                                                            \
+  develop(bool, G1PrintOopAppls, false,                                     \
+          "When true, print applications of closures to external locs.")    \
+                                                                            \
+  product(intx, G1LogRSRegionEntries, 7,                                    \
+          "Log_2 of max number of regions for which we keep bitmaps.")      \
+                                                                            \
+  develop(bool, G1RecordHRRSOops, false,                                    \
+          "When true, record recent calls to rem set operations.")          \
+                                                                            \
+  develop(bool, G1RecordHRRSEvents, false,                                  \
+          "When true, record recent calls to rem set operations.")          \
+                                                                            \
+  develop(intx, G1MaxVerifyFailures, -1,                                    \
+          "The maximum number of verification failrues to print.  "         \
+          "-1 means print all.")                                            \
+                                                                            \
+  develop(bool, G1ScrubRemSets, true,                                       \
+          "When true, do RS scrubbing after cleanup.")                      \
+                                                                            \
+  develop(bool, G1RSScrubVerbose, false,                                    \
+          "When true, do RS scrubbing with verbose output.")                \
+                                                                            \
+  develop(bool, G1YoungSurvRateVerbose, false,                              \
+          "print out the survival rate of young regions according to age.") \
+                                                                            \
+  develop(intx, G1YoungSurvRateNumRegionsSummary, 0,                        \
+          "the number of regions for which we'll print a surv rate "        \
+          "summary.")                                                       \
+                                                                            \
+  product(bool, G1UseScanOnlyPrefix, false,                                 \
+          "It determines whether the system will calculate an optimum "     \
+          "scan-only set.")                                                 \
+                                                                            \
+  product(intx, G1MinReservePerc, 10,                                       \
+          "It determines the minimum reserve we should have in the heap "   \
+          "to minimize the probability of promotion failure.")              \
+                                                                            \
+  product(bool, G1TraceRegions, false,                                      \
+          "If set G1 will print information on which regions are being "    \
+          "allocated and which are reclaimed.")                             \
+                                                                            \
+  develop(bool, G1HRRSUseSparseTable, true,                                 \
+          "When true, use sparse table to save space.")                     \
+                                                                            \
+  develop(bool, G1HRRSFlushLogBuffersOnVerify, false,                       \
+          "Forces flushing of log buffers before verification.")            \
+                                                                            \
+  product(intx, G1MaxSurvivorRegions, 0,                                    \
+          "The maximum number of survivor regions")
+
+G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
--- a/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp
@ -0,0 +1,64 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The following OopClosure types get specialized versions of
+// "oop_oop_iterate" that invoke the closures' do_oop methods
+// non-virtually, using a mechanism defined in this file.  Extend these
+// macros in the obvious way to add specializations for new closures.
+
+// Forward declarations.
+enum G1Barrier {
+  G1BarrierNone, G1BarrierRS, G1BarrierEvac
+};
+
+template<bool do_gen_barrier, G1Barrier barrier, bool do_mark_forwardee>
+class G1ParCopyClosure;
+class G1ParScanClosure;
+
+typedef G1ParCopyClosure<false, G1BarrierEvac, false> G1ParScanHeapEvacClosure;
+
+class FilterIntoCSClosure;
+class FilterOutOfRegionClosure;
+class FilterInHeapRegionAndIntoCSClosure;
+class FilterAndMarkInHeapRegionAndIntoCSClosure;
+class G1ScanAndBalanceClosure;
+
+#ifdef FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES
+#error "FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES already defined."
+#endif
+
+#define FURTHER_SPECIALIZED_OOP_OOP_ITERATE_CLOSURES(f) \
+      f(G1ParScanHeapEvacClosure,_nv)                   \
+      f(G1ParScanClosure,_nv)                           \
+      f(FilterIntoCSClosure,_nv)                        \
+      f(FilterOutOfRegionClosure,_nv)                   \
+      f(FilterInHeapRegionAndIntoCSClosure,_nv)         \
+      f(FilterAndMarkInHeapRegionAndIntoCSClosure,_nv)  \
+      f(G1ScanAndBalanceClosure,_nv)
+
+#ifdef FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES
+#error "FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES already defined."
+#endif
+
+#define FURTHER_SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(f)
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp
@ -0,0 +1,873 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegion.cpp.incl"
+
+HeapRegionDCTOC::HeapRegionDCTOC(G1CollectedHeap* g1,
+                                 HeapRegion* hr, OopClosure* cl,
+                                 CardTableModRefBS::PrecisionStyle precision,
+                                 FilterKind fk) :
+  ContiguousSpaceDCTOC(hr, cl, precision, NULL),
+  _hr(hr), _fk(fk), _g1(g1)
+{}
+
+FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
+                                                   OopClosure* oc) :
+  _r_bottom(r->bottom()), _r_end(r->end()),
+  _oc(oc), _out_of_region(0)
+{}
+
+class VerifyLiveClosure: public OopClosure {
+  G1CollectedHeap* _g1h;
+  CardTableModRefBS* _bs;
+  oop _containing_obj;
+  bool _failures;
+  int _n_failures;
+public:
+  VerifyLiveClosure(G1CollectedHeap* g1h) :
+    _g1h(g1h), _bs(NULL), _containing_obj(NULL),
+    _failures(false), _n_failures(0)
+  {
+    BarrierSet* bs = _g1h->barrier_set();
+    if (bs->is_a(BarrierSet::CardTableModRef))
+      _bs = (CardTableModRefBS*)bs;
+  }
+
+  void set_containing_obj(oop obj) {
+    _containing_obj = obj;
+  }
+
+  bool failures() { return _failures; }
+  int n_failures() { return _n_failures; }
+
+  virtual void do_oop(narrowOop* p) {
+    guarantee(false, "NYI");
+  }
+
+  void do_oop(oop* p) {
+    assert(_containing_obj != NULL, "Precondition");
+    assert(!_g1h->is_obj_dead(_containing_obj), "Precondition");
+    oop obj = *p;
+    if (obj != NULL) {
+      bool failed = false;
+      if (!_g1h->is_in_closed_subset(obj) || _g1h->is_obj_dead(obj)) {
+        if (!_failures) {
+          gclog_or_tty->print_cr("");
+          gclog_or_tty->print_cr("----------");
+        }
+        if (!_g1h->is_in_closed_subset(obj)) {
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                        " of live obj "PTR_FORMAT
+                        " points to obj "PTR_FORMAT
+                        " not in the heap.",
+                        p, (void*) _containing_obj, (void*) obj);
+        } else {
+          gclog_or_tty->print_cr("Field "PTR_FORMAT
+                        " of live obj "PTR_FORMAT
+                        " points to dead obj "PTR_FORMAT".",
+                        p, (void*) _containing_obj, (void*) obj);
+        }
+        gclog_or_tty->print_cr("Live obj:");
+        _containing_obj->print_on(gclog_or_tty);
+        gclog_or_tty->print_cr("Bad referent:");
+        obj->print_on(gclog_or_tty);
+        gclog_or_tty->print_cr("----------");
+        _failures = true;
+        failed = true;
+        _n_failures++;
+      }
+
+      if (!_g1h->full_collection()) {
+        HeapRegion* from = _g1h->heap_region_containing(p);
+        HeapRegion* to   = _g1h->heap_region_containing(*p);
+        if (from != NULL && to != NULL &&
+            from != to &&
+            !to->popular() &&
+            !to->isHumongous()) {
+          jbyte cv_obj = *_bs->byte_for_const(_containing_obj);
+          jbyte cv_field = *_bs->byte_for_const(p);
+          const jbyte dirty = CardTableModRefBS::dirty_card_val();
+
+          bool is_bad = !(from->is_young()
+                          || to->rem_set()->contains_reference(p)
+                          || !G1HRRSFlushLogBuffersOnVerify && // buffers were not flushed
+                              (_containing_obj->is_objArray() ?
+                                  cv_field == dirty
+                               : cv_obj == dirty || cv_field == dirty));
+          if (is_bad) {
+            if (!_failures) {
+              gclog_or_tty->print_cr("");
+              gclog_or_tty->print_cr("----------");
+            }
+            gclog_or_tty->print_cr("Missing rem set entry:");
+            gclog_or_tty->print_cr("Field "PTR_FORMAT
+                          " of obj "PTR_FORMAT
+                          ", in region %d ["PTR_FORMAT
+                          ", "PTR_FORMAT"),",
+                          p, (void*) _containing_obj,
+                          from->hrs_index(),
+                          from->bottom(),
+                          from->end());
+            _containing_obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("points to obj "PTR_FORMAT
+                          " in region %d ["PTR_FORMAT
+                          ", "PTR_FORMAT").",
+                          (void*) obj, to->hrs_index(),
+                          to->bottom(), to->end());
+            obj->print_on(gclog_or_tty);
+            gclog_or_tty->print_cr("Obj head CTE = %d, field CTE = %d.",
+                          cv_obj, cv_field);
+            gclog_or_tty->print_cr("----------");
+            _failures = true;
+            if (!failed) _n_failures++;
+          }
+        }
+      }
+    }
+  }
+};
+
+template<class ClosureType>
+HeapWord* walk_mem_region_loop(ClosureType* cl, G1CollectedHeap* g1h,
+                               HeapRegion* hr,
+                               HeapWord* cur, HeapWord* top) {
+  oop cur_oop = oop(cur);
+  int oop_size = cur_oop->size();
+  HeapWord* next_obj = cur + oop_size;
+  while (next_obj < top) {
+    // Keep filtering the remembered set.
+    if (!g1h->is_obj_dead(cur_oop, hr)) {
+      // Bottom lies entirely below top, so we can call the
+      // non-memRegion version of oop_iterate below.
+#ifndef PRODUCT
+      if (G1VerifyMarkingInEvac) {
+        VerifyLiveClosure vl_cl(g1h);
+        cur_oop->oop_iterate(&vl_cl);
+      }
+#endif
+      cur_oop->oop_iterate(cl);
+    }
+    cur = next_obj;
+    cur_oop = oop(cur);
+    oop_size = cur_oop->size();
+    next_obj = cur + oop_size;
+  }
+  return cur;
+}
+
+void HeapRegionDCTOC::walk_mem_region_with_cl(MemRegion mr,
+                                              HeapWord* bottom,
+                                              HeapWord* top,
+                                              OopClosure* cl) {
+  G1CollectedHeap* g1h = _g1;
+
+  int oop_size;
+
+  OopClosure* cl2 = cl;
+  FilterIntoCSClosure intoCSFilt(this, g1h, cl);
+  FilterOutOfRegionClosure outOfRegionFilt(_hr, cl);
+  switch (_fk) {
+  case IntoCSFilterKind:      cl2 = &intoCSFilt; break;
+  case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break;
+  }
+
+  // Start filtering what we add to the remembered set. If the object is
+  // not considered dead, either because it is marked (in the mark bitmap)
+  // or it was allocated after marking finished, then we add it. Otherwise
+  // we can safely ignore the object.
+  if (!g1h->is_obj_dead(oop(bottom), _hr)) {
+#ifndef PRODUCT
+    if (G1VerifyMarkingInEvac) {
+      VerifyLiveClosure vl_cl(g1h);
+      oop(bottom)->oop_iterate(&vl_cl, mr);
+    }
+#endif
+    oop_size = oop(bottom)->oop_iterate(cl2, mr);
+  } else {
+    oop_size = oop(bottom)->size();
+  }
+
+  bottom += oop_size;
+
+  if (bottom < top) {
+    // We replicate the loop below for several kinds of possible filters.
+    switch (_fk) {
+    case NoFilterKind:
+      bottom = walk_mem_region_loop(cl, g1h, _hr, bottom, top);
+      break;
+    case IntoCSFilterKind: {
+      FilterIntoCSClosure filt(this, g1h, cl);
+      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
+      break;
+    }
+    case OutOfRegionFilterKind: {
+      FilterOutOfRegionClosure filt(_hr, cl);
+      bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
+      break;
+    }
+    default:
+      ShouldNotReachHere();
+    }
+
+    // Last object. Need to do dead-obj filtering here too.
+    if (!g1h->is_obj_dead(oop(bottom), _hr)) {
+#ifndef PRODUCT
+      if (G1VerifyMarkingInEvac) {
+        VerifyLiveClosure vl_cl(g1h);
+        oop(bottom)->oop_iterate(&vl_cl, mr);
+      }
+#endif
+      oop(bottom)->oop_iterate(cl2, mr);
+    }
+  }
+}
+
+void HeapRegion::reset_after_compaction() {
+  G1OffsetTableContigSpace::reset_after_compaction();
+  // After a compaction the mark bitmap is invalid, so we must
+  // treat all objects as being inside the unmarked area.
+  zero_marked_bytes();
+  init_top_at_mark_start();
+}
+
+DirtyCardToOopClosure*
+HeapRegion::new_dcto_closure(OopClosure* cl,
+                             CardTableModRefBS::PrecisionStyle precision,
+                             HeapRegionDCTOC::FilterKind fk) {
+  return new HeapRegionDCTOC(G1CollectedHeap::heap(),
+                             this, cl, precision, fk);
+}
+
+void HeapRegion::hr_clear(bool par, bool clear_space) {
+  _humongous_type = NotHumongous;
+  _humongous_start_region = NULL;
+  _in_collection_set = false;
+  _is_gc_alloc_region = false;
+
+  // Age stuff (if parallel, this will be done separately, since it needs
+  // to be sequential).
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  set_young_index_in_cset(-1);
+  uninstall_surv_rate_group();
+  set_young_type(NotYoung);
+
+  // In case it had been the start of a humongous sequence, reset its end.
+  set_end(_orig_end);
+
+  if (!par) {
+    // If this is parallel, this will be done later.
+    HeapRegionRemSet* hrrs = rem_set();
+    if (hrrs != NULL) hrrs->clear();
+    _claimed = InitialClaimValue;
+  }
+  zero_marked_bytes();
+  set_sort_index(-1);
+  if ((uintptr_t)bottom() >= (uintptr_t)g1h->popular_object_boundary())
+    set_popular(false);
+
+  _offsets.resize(HeapRegion::GrainWords);
+  init_top_at_mark_start();
+  if (clear_space) clear(SpaceDecorator::Mangle);
+}
+
+// <PREDICTION>
+void HeapRegion::calc_gc_efficiency() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  _gc_efficiency = (double) garbage_bytes() /
+                            g1h->predict_region_elapsed_time_ms(this, false);
+}
+// </PREDICTION>
+
+void HeapRegion::set_startsHumongous() {
+  _humongous_type = StartsHumongous;
+  _humongous_start_region = this;
+  assert(end() == _orig_end, "Should be normal before alloc.");
+}
+
+bool HeapRegion::claimHeapRegion(jint claimValue) {
+  jint current = _claimed;
+  if (current != claimValue) {
+    jint res = Atomic::cmpxchg(claimValue, &_claimed, current);
+    if (res == current) {
+      return true;
+    }
+  }
+  return false;
+}
+
+HeapWord* HeapRegion::next_block_start_careful(HeapWord* addr) {
+  HeapWord* low = addr;
+  HeapWord* high = end();
+  while (low < high) {
+    size_t diff = pointer_delta(high, low);
+    // Must add one below to bias toward the high amount.  Otherwise, if
+  // "high" were at the desired value, and "low" were one less, we
+    // would not converge on "high".  This is not symmetric, because
+    // we set "high" to a block start, which might be the right one,
+    // which we don't do for "low".
+    HeapWord* middle = low + (diff+1)/2;
+    if (middle == high) return high;
+    HeapWord* mid_bs = block_start_careful(middle);
+    if (mid_bs < addr) {
+      low = middle;
+    } else {
+      high = mid_bs;
+    }
+  }
+  assert(low == high && low >= addr, "Didn't work.");
+  return low;
+}
+
+void HeapRegion::set_next_on_unclean_list(HeapRegion* r) {
+  assert(r == NULL || r->is_on_unclean_list(), "Malformed unclean list.");
+  _next_in_special_set = r;
+}
+
+void HeapRegion::set_on_unclean_list(bool b) {
+  _is_on_unclean_list = b;
+}
+
+void HeapRegion::initialize(MemRegion mr, bool clear_space, bool mangle_space) {
+  G1OffsetTableContigSpace::initialize(mr, false, mangle_space);
+  hr_clear(false/*par*/, clear_space);
+}
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+HeapRegion::
+HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray,
+                     MemRegion mr, bool is_zeroed)
+  : G1OffsetTableContigSpace(sharedOffsetArray, mr, is_zeroed),
+    _next_fk(HeapRegionDCTOC::NoFilterKind),
+    _hrs_index(-1),
+    _humongous_type(NotHumongous), _humongous_start_region(NULL),
+    _in_collection_set(false), _is_gc_alloc_region(false),
+    _is_on_free_list(false), _is_on_unclean_list(false),
+    _next_in_special_set(NULL), _orig_end(NULL),
+    _claimed(InitialClaimValue), _evacuation_failed(false),
+    _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1),
+    _popularity(NotPopular),
+    _young_type(NotYoung), _next_young_region(NULL),
+    _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1),
+    _rem_set(NULL), _zfs(NotZeroFilled)
+{
+  _orig_end = mr.end();
+  // Note that initialize() will set the start of the unmarked area of the
+  // region.
+  this->initialize(mr, !is_zeroed, SpaceDecorator::Mangle);
+  set_top(bottom());
+  set_saved_mark();
+
+  _rem_set =  new HeapRegionRemSet(sharedOffsetArray, this);
+
+  assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant.");
+  // In case the region is allocated during a pause, note the top.
+  // We haven't done any counting on a brand new region.
+  _top_at_conc_mark_count = bottom();
+}
+
+class NextCompactionHeapRegionClosure: public HeapRegionClosure {
+  const HeapRegion* _target;
+  bool _target_seen;
+  HeapRegion* _last;
+  CompactibleSpace* _res;
+public:
+  NextCompactionHeapRegionClosure(const HeapRegion* target) :
+    _target(target), _target_seen(false), _res(NULL) {}
+  bool doHeapRegion(HeapRegion* cur) {
+    if (_target_seen) {
+      if (!cur->isHumongous()) {
+        _res = cur;
+        return true;
+      }
+    } else if (cur == _target) {
+      _target_seen = true;
+    }
+    return false;
+  }
+  CompactibleSpace* result() { return _res; }
+};
+
+CompactibleSpace* HeapRegion::next_compaction_space() const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  // cast away const-ness
+  HeapRegion* r = (HeapRegion*) this;
+  NextCompactionHeapRegionClosure blk(r);
+  g1h->heap_region_iterate_from(r, &blk);
+  return blk.result();
+}
+
+void HeapRegion::set_continuesHumongous(HeapRegion* start) {
+  // The order is important here.
+  start->add_continuingHumongousRegion(this);
+  _humongous_type = ContinuesHumongous;
+  _humongous_start_region = start;
+}
+
+void HeapRegion::add_continuingHumongousRegion(HeapRegion* cont) {
+  // Must join the blocks of the current H region seq with the block of the
+  // added region.
+  offsets()->join_blocks(bottom(), cont->bottom());
+  arrayOop obj = (arrayOop)(bottom());
+  obj->set_length((int) (obj->length() + cont->capacity()/jintSize));
+  set_end(cont->end());
+  set_top(cont->end());
+}
+
+void HeapRegion::save_marks() {
+  set_saved_mark();
+}
+
+void HeapRegion::oops_in_mr_iterate(MemRegion mr, OopClosure* cl) {
+  HeapWord* p = mr.start();
+  HeapWord* e = mr.end();
+  oop obj;
+  while (p < e) {
+    obj = oop(p);
+    p += obj->oop_iterate(cl);
+  }
+  assert(p == e, "bad memregion: doesn't end on obj boundary");
+}
+
+#define HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix) \
+void HeapRegion::oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) { \
+  ContiguousSpace::oop_since_save_marks_iterate##nv_suffix(cl);              \
+}
+SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN)
+
+
+void HeapRegion::oop_before_save_marks_iterate(OopClosure* cl) {
+  oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
+}
+
+#ifdef DEBUG
+HeapWord* HeapRegion::allocate(size_t size) {
+  jint state = zero_fill_state();
+  assert(!G1CollectedHeap::heap()->allocs_are_zero_filled() ||
+         zero_fill_is_allocated(),
+         "When ZF is on, only alloc in ZF'd regions");
+  return G1OffsetTableContigSpace::allocate(size);
+}
+#endif
+
+void HeapRegion::set_zero_fill_state_work(ZeroFillState zfs) {
+  assert(top() == bottom() || zfs == Allocated,
+         "Region must be empty, or we must be setting it to allocated.");
+  assert(ZF_mon->owned_by_self() ||
+         Universe::heap()->is_gc_active(),
+         "Must hold the lock or be a full GC to modify.");
+  _zfs = zfs;
+}
+
+void HeapRegion::set_zero_fill_complete() {
+  set_zero_fill_state_work(ZeroFilled);
+  if (ZF_mon->owned_by_self()) {
+    ZF_mon->notify_all();
+  }
+}
+
+
+void HeapRegion::ensure_zero_filled() {
+  MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+  ensure_zero_filled_locked();
+}
+
+void HeapRegion::ensure_zero_filled_locked() {
+  assert(ZF_mon->owned_by_self(), "Precondition");
+  bool should_ignore_zf = SafepointSynchronize::is_at_safepoint();
+  assert(should_ignore_zf || Heap_lock->is_locked(),
+         "Either we're in a GC or we're allocating a region.");
+  switch (zero_fill_state()) {
+  case HeapRegion::NotZeroFilled:
+    set_zero_fill_in_progress(Thread::current());
+    {
+      ZF_mon->unlock();
+      Copy::fill_to_words(bottom(), capacity()/HeapWordSize);
+      ZF_mon->lock_without_safepoint_check();
+    }
+    // A trap.
+    guarantee(zero_fill_state() == HeapRegion::ZeroFilling
+              && zero_filler() == Thread::current(),
+              "AHA!  Tell Dave D if you see this...");
+    set_zero_fill_complete();
+    // gclog_or_tty->print_cr("Did sync ZF.");
+    ConcurrentZFThread::note_sync_zfs();
+    break;
+  case HeapRegion::ZeroFilling:
+    if (should_ignore_zf) {
+      // We can "break" the lock and take over the work.
+      Copy::fill_to_words(bottom(), capacity()/HeapWordSize);
+      set_zero_fill_complete();
+      ConcurrentZFThread::note_sync_zfs();
+      break;
+    } else {
+      ConcurrentZFThread::wait_for_ZF_completed(this);
+    }
+  case HeapRegion::ZeroFilled:
+    // Nothing to do.
+    break;
+  case HeapRegion::Allocated:
+    guarantee(false, "Should not call on allocated regions.");
+  }
+  assert(zero_fill_state() == HeapRegion::ZeroFilled, "Post");
+}
+
+HeapWord*
+HeapRegion::object_iterate_mem_careful(MemRegion mr,
+                                                 ObjectClosure* cl) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  // We used to use "block_start_careful" here.  But we're actually happy
+  // to update the BOT while we do this...
+  HeapWord* cur = block_start(mr.start());
+  mr = mr.intersection(used_region());
+  if (mr.is_empty()) return NULL;
+  // Otherwise, find the obj that extends onto mr.start().
+
+  assert(cur <= mr.start()
+         && (oop(cur)->klass() == NULL ||
+             cur + oop(cur)->size() > mr.start()),
+         "postcondition of block_start");
+  oop obj;
+  while (cur < mr.end()) {
+    obj = oop(cur);
+    if (obj->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    } else if (!g1h->is_obj_dead(obj)) {
+      cl->do_object(obj);
+    }
+    if (cl->abort()) return cur;
+    // The check above must occur before the operation below, since an
+    // abort might invalidate the "size" operation.
+    cur += obj->size();
+  }
+  return NULL;
+}
+
+HeapWord*
+HeapRegion::
+oops_on_card_seq_iterate_careful(MemRegion mr,
+                                     FilterOutOfRegionClosure* cl) {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+  // If we're within a stop-world GC, then we might look at a card in a
+  // GC alloc region that extends onto a GC LAB, which may not be
+  // parseable.  Stop such at the "saved_mark" of the region.
+  if (G1CollectedHeap::heap()->is_gc_active()) {
+    mr = mr.intersection(used_region_at_save_marks());
+  } else {
+    mr = mr.intersection(used_region());
+  }
+  if (mr.is_empty()) return NULL;
+  // Otherwise, find the obj that extends onto mr.start().
+
+  // We used to use "block_start_careful" here.  But we're actually happy
+  // to update the BOT while we do this...
+  HeapWord* cur = block_start(mr.start());
+  assert(cur <= mr.start(), "Postcondition");
+
+  while (cur <= mr.start()) {
+    if (oop(cur)->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    }
+    // Otherwise...
+    int sz = oop(cur)->size();
+    if (cur + sz > mr.start()) break;
+    // Otherwise, go on.
+    cur = cur + sz;
+  }
+  oop obj;
+  obj = oop(cur);
+  // If we finish this loop...
+  assert(cur <= mr.start()
+         && obj->klass() != NULL
+         && cur + obj->size() > mr.start(),
+         "Loop postcondition");
+  if (!g1h->is_obj_dead(obj)) {
+    obj->oop_iterate(cl, mr);
+  }
+
+  HeapWord* next;
+  while (cur < mr.end()) {
+    obj = oop(cur);
+    if (obj->klass() == NULL) {
+      // Ran into an unparseable point.
+      return cur;
+    };
+    // Otherwise:
+    next = (cur + obj->size());
+    if (!g1h->is_obj_dead(obj)) {
+      if (next < mr.end()) {
+        obj->oop_iterate(cl);
+      } else {
+        // this obj spans the boundary.  If it's an array, stop at the
+        // boundary.
+        if (obj->is_objArray()) {
+          obj->oop_iterate(cl, mr);
+        } else {
+          obj->oop_iterate(cl);
+        }
+      }
+    }
+    cur = next;
+  }
+  return NULL;
+}
+
+void HeapRegion::print() const { print_on(gclog_or_tty); }
+void HeapRegion::print_on(outputStream* st) const {
+  if (isHumongous()) {
+    if (startsHumongous())
+      st->print(" HS");
+    else
+      st->print(" HC");
+  } else {
+    st->print("   ");
+  }
+  if (in_collection_set())
+    st->print(" CS");
+  else if (is_gc_alloc_region())
+    st->print(" A ");
+  else
+    st->print("   ");
+  if (is_young())
+    st->print(is_scan_only() ? " SO" : (is_survivor() ? " SU" : " Y "));
+  else
+    st->print("   ");
+  if (is_empty())
+    st->print(" F");
+  else
+    st->print("  ");
+  st->print(" %d", _gc_time_stamp);
+  G1OffsetTableContigSpace::print_on(st);
+}
+
+#define OBJ_SAMPLE_INTERVAL 0
+#define BLOCK_SAMPLE_INTERVAL 100
+
+// This really ought to be commoned up into OffsetTableContigSpace somehow.
+// We would need a mechanism to make that code skip dead objects.
+
+void HeapRegion::verify(bool allow_dirty) const {
+  G1CollectedHeap* g1 = G1CollectedHeap::heap();
+  HeapWord* p = bottom();
+  HeapWord* prev_p = NULL;
+  int objs = 0;
+  int blocks = 0;
+  VerifyLiveClosure vl_cl(g1);
+  while (p < top()) {
+    size_t size = oop(p)->size();
+    if (blocks == BLOCK_SAMPLE_INTERVAL) {
+      guarantee(p == block_start_const(p + (size/2)),
+                "check offset computation");
+      blocks = 0;
+    } else {
+      blocks++;
+    }
+    if (objs == OBJ_SAMPLE_INTERVAL) {
+      oop obj = oop(p);
+      if (!g1->is_obj_dead(obj, this)) {
+        obj->verify();
+        vl_cl.set_containing_obj(obj);
+        obj->oop_iterate(&vl_cl);
+        if (G1MaxVerifyFailures >= 0
+            && vl_cl.n_failures() >= G1MaxVerifyFailures) break;
+      }
+      objs = 0;
+    } else {
+      objs++;
+    }
+    prev_p = p;
+    p += size;
+  }
+  HeapWord* rend = end();
+  HeapWord* rtop = top();
+  if (rtop < rend) {
+    guarantee(block_start_const(rtop + (rend - rtop) / 2) == rtop,
+              "check offset computation");
+  }
+  if (vl_cl.failures()) {
+    gclog_or_tty->print_cr("Heap:");
+    G1CollectedHeap::heap()->print();
+    gclog_or_tty->print_cr("");
+  }
+  if (G1VerifyConcMark &&
+      G1VerifyConcMarkPrintReachable &&
+      vl_cl.failures()) {
+    g1->concurrent_mark()->print_prev_bitmap_reachable();
+  }
+  guarantee(!vl_cl.failures(), "should not have had any failures");
+  guarantee(p == top(), "end of last object must match end of space");
+}
+
+// G1OffsetTableContigSpace code; copied from space.cpp.  Hope this can go
+// away eventually.
+
+void G1OffsetTableContigSpace::initialize(MemRegion mr, bool clear_space, bool mangle_space) {
+  // false ==> we'll do the clearing if there's clearing to be done.
+  ContiguousSpace::initialize(mr, false, mangle_space);
+  _offsets.zero_bottom_entry();
+  _offsets.initialize_threshold();
+  if (clear_space) clear(mangle_space);
+}
+
+void G1OffsetTableContigSpace::clear(bool mangle_space) {
+  ContiguousSpace::clear(mangle_space);
+  _offsets.zero_bottom_entry();
+  _offsets.initialize_threshold();
+}
+
+void G1OffsetTableContigSpace::set_bottom(HeapWord* new_bottom) {
+  Space::set_bottom(new_bottom);
+  _offsets.set_bottom(new_bottom);
+}
+
+void G1OffsetTableContigSpace::set_end(HeapWord* new_end) {
+  Space::set_end(new_end);
+  _offsets.resize(new_end - bottom());
+}
+
+void G1OffsetTableContigSpace::print() const {
+  print_short();
+  gclog_or_tty->print_cr(" [" INTPTR_FORMAT ", " INTPTR_FORMAT ", "
+                INTPTR_FORMAT ", " INTPTR_FORMAT ")",
+                bottom(), top(), _offsets.threshold(), end());
+}
+
+HeapWord* G1OffsetTableContigSpace::initialize_threshold() {
+  return _offsets.initialize_threshold();
+}
+
+HeapWord* G1OffsetTableContigSpace::cross_threshold(HeapWord* start,
+                                                    HeapWord* end) {
+  _offsets.alloc_block(start, end);
+  return _offsets.threshold();
+}
+
+HeapWord* G1OffsetTableContigSpace::saved_mark_word() const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  assert( _gc_time_stamp <= g1h->get_gc_time_stamp(), "invariant" );
+  if (_gc_time_stamp < g1h->get_gc_time_stamp())
+    return top();
+  else
+    return ContiguousSpace::saved_mark_word();
+}
+
+void G1OffsetTableContigSpace::set_saved_mark() {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp();
+
+  if (_gc_time_stamp < curr_gc_time_stamp) {
+    // The order of these is important, as another thread might be
+    // about to start scanning this region. If it does so after
+    // set_saved_mark and before _gc_time_stamp = ..., then the latter
+    // will be false, and it will pick up top() as the high water mark
+    // of region. If it does so after _gc_time_stamp = ..., then it
+    // will pick up the right saved_mark_word() as the high water mark
+    // of the region. Either way, the behaviour will be correct.
+    ContiguousSpace::set_saved_mark();
+    _gc_time_stamp = curr_gc_time_stamp;
+    OrderAccess::fence();
+  }
+}
+
+G1OffsetTableContigSpace::
+G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
+                         MemRegion mr, bool is_zeroed) :
+  _offsets(sharedOffsetArray, mr),
+  _par_alloc_lock(Mutex::leaf, "OffsetTableContigSpace par alloc lock", true),
+  _gc_time_stamp(0)
+{
+  _offsets.set_space(this);
+  initialize(mr, !is_zeroed, SpaceDecorator::Mangle);
+}
+
+size_t RegionList::length() {
+  size_t len = 0;
+  HeapRegion* cur = hd();
+  DEBUG_ONLY(HeapRegion* last = NULL);
+  while (cur != NULL) {
+    len++;
+    DEBUG_ONLY(last = cur);
+    cur = get_next(cur);
+  }
+  assert(last == tl(), "Invariant");
+  return len;
+}
+
+void RegionList::insert_before_head(HeapRegion* r) {
+  assert(well_formed(), "Inv");
+  set_next(r, hd());
+  _hd = r;
+  _sz++;
+  if (tl() == NULL) _tl = r;
+  assert(well_formed(), "Inv");
+}
+
+void RegionList::prepend_list(RegionList* new_list) {
+  assert(well_formed(), "Precondition");
+  assert(new_list->well_formed(), "Precondition");
+  HeapRegion* new_tl = new_list->tl();
+  if (new_tl != NULL) {
+    set_next(new_tl, hd());
+    _hd = new_list->hd();
+    _sz += new_list->sz();
+    if (tl() == NULL) _tl = new_list->tl();
+  } else {
+    assert(new_list->hd() == NULL && new_list->sz() == 0, "Inv");
+  }
+  assert(well_formed(), "Inv");
+}
+
+void RegionList::delete_after(HeapRegion* r) {
+  assert(well_formed(), "Precondition");
+  HeapRegion* next = get_next(r);
+  assert(r != NULL, "Precondition");
+  HeapRegion* next_tl = get_next(next);
+  set_next(r, next_tl);
+  dec_sz();
+  if (next == tl()) {
+    assert(next_tl == NULL, "Inv");
+    _tl = r;
+  }
+  assert(well_formed(), "Inv");
+}
+
+HeapRegion* RegionList::pop() {
+  assert(well_formed(), "Inv");
+  HeapRegion* res = hd();
+  if (res != NULL) {
+    _hd = get_next(res);
+    _sz--;
+    set_next(res, NULL);
+    if (sz() == 0) _tl = NULL;
+  }
+  assert(well_formed(), "Inv");
+  return res;
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp
@ -0,0 +1,936 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#ifndef SERIALGC
+
+// A HeapRegion is the smallest piece of a G1CollectedHeap that
+// can be collected independently.
+
+// NOTE: Although a HeapRegion is a Space, its
+// Space::initDirtyCardClosure method must not be called.
+// The problem is that the existence of this method breaks
+// the independence of barrier sets from remembered sets.
+// The solution is to remove this method from the definition
+// of a Space.
+
+class CompactibleSpace;
+class ContiguousSpace;
+class HeapRegionRemSet;
+class HeapRegionRemSetIterator;
+class HeapRegion;
+
+// A dirty card to oop closure for heap regions. It
+// knows how to get the G1 heap and how to use the bitmap
+// in the concurrent marker used by G1 to filter remembered
+// sets.
+
+class HeapRegionDCTOC : public ContiguousSpaceDCTOC {
+public:
+  // Specification of possible DirtyCardToOopClosure filtering.
+  enum FilterKind {
+    NoFilterKind,
+    IntoCSFilterKind,
+    OutOfRegionFilterKind
+  };
+
+protected:
+  HeapRegion* _hr;
+  FilterKind _fk;
+  G1CollectedHeap* _g1;
+
+  void walk_mem_region_with_cl(MemRegion mr,
+                               HeapWord* bottom, HeapWord* top,
+                               OopClosure* cl);
+
+  // We don't specialize this for FilteringClosure; filtering is handled by
+  // the "FilterKind" mechanism.  But we provide this to avoid a compiler
+  // warning.
+  void walk_mem_region_with_cl(MemRegion mr,
+                               HeapWord* bottom, HeapWord* top,
+                               FilteringClosure* cl) {
+    HeapRegionDCTOC::walk_mem_region_with_cl(mr, bottom, top,
+                                                       (OopClosure*)cl);
+  }
+
+  // Get the actual top of the area on which the closure will
+  // operate, given where the top is assumed to be (the end of the
+  // memory region passed to do_MemRegion) and where the object
+  // at the top is assumed to start. For example, an object may
+  // start at the top but actually extend past the assumed top,
+  // in which case the top becomes the end of the object.
+  HeapWord* get_actual_top(HeapWord* top, HeapWord* top_obj) {
+    return ContiguousSpaceDCTOC::get_actual_top(top, top_obj);
+  }
+
+  // Walk the given memory region from bottom to (actual) top
+  // looking for objects and applying the oop closure (_cl) to
+  // them. The base implementation of this treats the area as
+  // blocks, where a block may or may not be an object. Sub-
+  // classes should override this to provide more accurate
+  // or possibly more efficient walking.
+  void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top) {
+    Filtering_DCTOC::walk_mem_region(mr, bottom, top);
+  }
+
+public:
+  HeapRegionDCTOC(G1CollectedHeap* g1,
+                  HeapRegion* hr, OopClosure* cl,
+                  CardTableModRefBS::PrecisionStyle precision,
+                  FilterKind fk);
+};
+
+
+// The complicating factor is that BlockOffsetTable diverged
+// significantly, and we need functionality that is only in the G1 version.
+// So I copied that code, which led to an alternate G1 version of
+// OffsetTableContigSpace.  If the two versions of BlockOffsetTable could
+// be reconciled, then G1OffsetTableContigSpace could go away.
+
+// The idea behind time stamps is the following. Doing a save_marks on
+// all regions at every GC pause is time consuming (if I remember
+// well, 10ms or so). So, we would like to do that only for regions
+// that are GC alloc regions. To achieve this, we use time
+// stamps. For every evacuation pause, G1CollectedHeap generates a
+// unique time stamp (essentially a counter that gets
+// incremented). Every time we want to call save_marks on a region,
+// we set the saved_mark_word to top and also copy the current GC
+// time stamp to the time stamp field of the space. Reading the
+// saved_mark_word involves checking the time stamp of the
+// region. If it is the same as the current GC time stamp, then we
+// can safely read the saved_mark_word field, as it is valid. If the
+// time stamp of the region is not the same as the current GC time
+// stamp, then we instead read top, as the saved_mark_word field is
+// invalid. Time stamps (on the regions and also on the
+// G1CollectedHeap) are reset at every cleanup (we iterate over
+// the regions anyway) and at the end of a Full GC. The current scheme
+// that uses sequential unsigned ints will fail only if we have 4b
+// evacuation pauses between two cleanups, which is _highly_ unlikely.
+
+class G1OffsetTableContigSpace: public ContiguousSpace {
+  friend class VMStructs;
+ protected:
+  G1BlockOffsetArrayContigSpace _offsets;
+  Mutex _par_alloc_lock;
+  volatile unsigned _gc_time_stamp;
+
+ public:
+  // Constructor.  If "is_zeroed" is true, the MemRegion "mr" may be
+  // assumed to contain zeros.
+  G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
+                           MemRegion mr, bool is_zeroed = false);
+
+  void set_bottom(HeapWord* value);
+  void set_end(HeapWord* value);
+
+  virtual HeapWord* saved_mark_word() const;
+  virtual void set_saved_mark();
+  void reset_gc_time_stamp() { _gc_time_stamp = 0; }
+
+  virtual void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+  virtual void clear(bool mangle_space);
+
+  HeapWord* block_start(const void* p);
+  HeapWord* block_start_const(const void* p) const;
+
+  // Add offset table update.
+  virtual HeapWord* allocate(size_t word_size);
+  HeapWord* par_allocate(size_t word_size);
+
+  // MarkSweep support phase3
+  virtual HeapWord* initialize_threshold();
+  virtual HeapWord* cross_threshold(HeapWord* start, HeapWord* end);
+
+  virtual void print() const;
+};
+
+class HeapRegion: public G1OffsetTableContigSpace {
+  friend class VMStructs;
+ private:
+
+  enum HumongousType {
+    NotHumongous = 0,
+    StartsHumongous,
+    ContinuesHumongous
+  };
+
+  // The next filter kind that should be used for a "new_dcto_cl" call with
+  // the "traditional" signature.
+  HeapRegionDCTOC::FilterKind _next_fk;
+
+  // Requires that the region "mr" be dense with objects, and begin and end
+  // with an object.
+  void oops_in_mr_iterate(MemRegion mr, OopClosure* cl);
+
+  // The remembered set for this region.
+  // (Might want to make this "inline" later, to avoid some alloc failure
+  // issues.)
+  HeapRegionRemSet* _rem_set;
+
+  G1BlockOffsetArrayContigSpace* offsets() { return &_offsets; }
+
+ protected:
+  // If this region is a member of a HeapRegionSeq, the index in that
+  // sequence, otherwise -1.
+  int  _hrs_index;
+
+  HumongousType _humongous_type;
+  // For a humongous region, region in which it starts.
+  HeapRegion* _humongous_start_region;
+  // For the start region of a humongous sequence, it's original end().
+  HeapWord* _orig_end;
+
+  // True iff the region is in current collection_set.
+  bool _in_collection_set;
+
+    // True iff the region is on the unclean list, waiting to be zero filled.
+  bool _is_on_unclean_list;
+
+  // True iff the region is on the free list, ready for allocation.
+  bool _is_on_free_list;
+
+  // Is this or has it been an allocation region in the current collection
+  // pause.
+  bool _is_gc_alloc_region;
+
+  // True iff an attempt to evacuate an object in the region failed.
+  bool _evacuation_failed;
+
+  // A heap region may be a member one of a number of special subsets, each
+  // represented as linked lists through the field below.  Currently, these
+  // sets include:
+  //   The collection set.
+  //   The set of allocation regions used in a collection pause.
+  //   Spaces that may contain gray objects.
+  HeapRegion* _next_in_special_set;
+
+  // next region in the young "generation" region set
+  HeapRegion* _next_young_region;
+
+  // For parallel heapRegion traversal.
+  jint _claimed;
+
+  // We use concurrent marking to determine the amount of live data
+  // in each heap region.
+  size_t _prev_marked_bytes;    // Bytes known to be live via last completed marking.
+  size_t _next_marked_bytes;    // Bytes known to be live via in-progress marking.
+
+  // See "sort_index" method.  -1 means is not in the array.
+  int _sort_index;
+
+  // Means it has (or at least had) a very large RS, and should not be
+  // considered for membership in a collection set.
+  enum PopularityState {
+    NotPopular,
+    PopularPending,
+    Popular
+  };
+  PopularityState _popularity;
+
+  // <PREDICTION>
+  double _gc_efficiency;
+  // </PREDICTION>
+
+  enum YoungType {
+    NotYoung,                   // a region is not young
+    ScanOnly,                   // a region is young and scan-only
+    Young,                      // a region is young
+    Survivor                    // a region is young and it contains
+                                // survivor
+  };
+
+  YoungType _young_type;
+  int  _young_index_in_cset;
+  SurvRateGroup* _surv_rate_group;
+  int  _age_index;
+
+  // The start of the unmarked area. The unmarked area extends from this
+  // word until the top and/or end of the region, and is the part
+  // of the region for which no marking was done, i.e. objects may
+  // have been allocated in this part since the last mark phase.
+  // "prev" is the top at the start of the last completed marking.
+  // "next" is the top at the start of the in-progress marking (if any.)
+  HeapWord* _prev_top_at_mark_start;
+  HeapWord* _next_top_at_mark_start;
+  // If a collection pause is in progress, this is the top at the start
+  // of that pause.
+
+  // We've counted the marked bytes of objects below here.
+  HeapWord* _top_at_conc_mark_count;
+
+  void init_top_at_mark_start() {
+    assert(_prev_marked_bytes == 0 &&
+           _next_marked_bytes == 0,
+           "Must be called after zero_marked_bytes.");
+    HeapWord* bot = bottom();
+    _prev_top_at_mark_start = bot;
+    _next_top_at_mark_start = bot;
+    _top_at_conc_mark_count = bot;
+  }
+
+  jint _zfs;  // A member of ZeroFillState.  Protected by ZF_lock.
+  Thread* _zero_filler; // If _zfs is ZeroFilling, the thread that (last)
+                        // made it so.
+
+  void set_young_type(YoungType new_type) {
+    //assert(_young_type != new_type, "setting the same type" );
+    // TODO: add more assertions here
+    _young_type = new_type;
+  }
+
+ public:
+  // If "is_zeroed" is "true", the region "mr" can be assumed to contain zeros.
+  HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray,
+             MemRegion mr, bool is_zeroed);
+
+  enum SomePublicConstants {
+    // HeapRegions are GrainBytes-aligned
+    // and have sizes that are multiples of GrainBytes.
+    LogOfHRGrainBytes = 20,
+    LogOfHRGrainWords = LogOfHRGrainBytes - LogHeapWordSize,
+    GrainBytes = 1 << LogOfHRGrainBytes,
+    GrainWords = 1 <<LogOfHRGrainWords,
+    MaxAge = 2, NoOfAges = MaxAge+1
+  };
+
+  enum ClaimValues {
+    InitialClaimValue     = 0,
+    FinalCountClaimValue  = 1,
+    NoteEndClaimValue     = 2,
+    ScrubRemSetClaimValue = 3
+  };
+
+  // Concurrent refinement requires contiguous heap regions (in which TLABs
+  // might be allocated) to be zero-filled.  Each region therefore has a
+  // zero-fill-state.
+  enum ZeroFillState {
+    NotZeroFilled,
+    ZeroFilling,
+    ZeroFilled,
+    Allocated
+  };
+
+  // If this region is a member of a HeapRegionSeq, the index in that
+  // sequence, otherwise -1.
+  int hrs_index() const { return _hrs_index; }
+  void set_hrs_index(int index) { _hrs_index = index; }
+
+  // The number of bytes marked live in the region in the last marking phase.
+  size_t marked_bytes()    { return _prev_marked_bytes; }
+  // The number of bytes counted in the next marking.
+  size_t next_marked_bytes() { return _next_marked_bytes; }
+  // The number of bytes live wrt the next marking.
+  size_t next_live_bytes() {
+    return (top() - next_top_at_mark_start())
+      * HeapWordSize
+      + next_marked_bytes();
+  }
+
+  // A lower bound on the amount of garbage bytes in the region.
+  size_t garbage_bytes() {
+    size_t used_at_mark_start_bytes =
+      (prev_top_at_mark_start() - bottom()) * HeapWordSize;
+    assert(used_at_mark_start_bytes >= marked_bytes(),
+           "Can't mark more than we have.");
+    return used_at_mark_start_bytes - marked_bytes();
+  }
+
+  // An upper bound on the number of live bytes in the region.
+  size_t max_live_bytes() { return used() - garbage_bytes(); }
+
+  void add_to_marked_bytes(size_t incr_bytes) {
+    _next_marked_bytes = _next_marked_bytes + incr_bytes;
+    guarantee( _next_marked_bytes <= used(), "invariant" );
+  }
+
+  void zero_marked_bytes()      {
+    _prev_marked_bytes = _next_marked_bytes = 0;
+  }
+
+  bool isHumongous() const { return _humongous_type != NotHumongous; }
+  bool startsHumongous() const { return _humongous_type == StartsHumongous; }
+  bool continuesHumongous() const { return _humongous_type == ContinuesHumongous; }
+  // For a humongous region, region in which it starts.
+  HeapRegion* humongous_start_region() const {
+    return _humongous_start_region;
+  }
+
+  // Causes the current region to represent a humongous object spanning "n"
+  // regions.
+  virtual void set_startsHumongous();
+
+  // The regions that continue a humongous sequence should be added using
+  // this method, in increasing address order.
+  void set_continuesHumongous(HeapRegion* start);
+
+  void add_continuingHumongousRegion(HeapRegion* cont);
+
+  // If the region has a remembered set, return a pointer to it.
+  HeapRegionRemSet* rem_set() const {
+    return _rem_set;
+  }
+
+  // True iff the region is in current collection_set.
+  bool in_collection_set() const {
+    return _in_collection_set;
+  }
+  void set_in_collection_set(bool b) {
+    _in_collection_set = b;
+  }
+  HeapRegion* next_in_collection_set() {
+    assert(in_collection_set(), "should only invoke on member of CS.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->in_collection_set(),
+           "Malformed CS.");
+    return _next_in_special_set;
+  }
+  void set_next_in_collection_set(HeapRegion* r) {
+    assert(in_collection_set(), "should only invoke on member of CS.");
+    assert(r == NULL || r->in_collection_set(), "Malformed CS.");
+    _next_in_special_set = r;
+  }
+
+  // True iff it is or has been an allocation region in the current
+  // collection pause.
+  bool is_gc_alloc_region() const {
+    return _is_gc_alloc_region;
+  }
+  void set_is_gc_alloc_region(bool b) {
+    _is_gc_alloc_region = b;
+  }
+  HeapRegion* next_gc_alloc_region() {
+    assert(is_gc_alloc_region(), "should only invoke on member of CS.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_gc_alloc_region(),
+           "Malformed CS.");
+    return _next_in_special_set;
+  }
+  void set_next_gc_alloc_region(HeapRegion* r) {
+    assert(is_gc_alloc_region(), "should only invoke on member of CS.");
+    assert(r == NULL || r->is_gc_alloc_region(), "Malformed CS.");
+    _next_in_special_set = r;
+  }
+
+  bool is_reserved() {
+    return popular();
+  }
+
+  bool is_on_free_list() {
+    return _is_on_free_list;
+  }
+
+  void set_on_free_list(bool b) {
+    _is_on_free_list = b;
+  }
+
+  HeapRegion* next_from_free_list() {
+    assert(is_on_free_list(),
+           "Should only invoke on free space.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_on_free_list(),
+           "Malformed Free List.");
+    return _next_in_special_set;
+  }
+
+  void set_next_on_free_list(HeapRegion* r) {
+    assert(r == NULL || r->is_on_free_list(), "Malformed free list.");
+    _next_in_special_set = r;
+  }
+
+  bool is_on_unclean_list() {
+    return _is_on_unclean_list;
+  }
+
+  void set_on_unclean_list(bool b);
+
+  HeapRegion* next_from_unclean_list() {
+    assert(is_on_unclean_list(),
+           "Should only invoke on unclean space.");
+    assert(_next_in_special_set == NULL ||
+           _next_in_special_set->is_on_unclean_list(),
+           "Malformed unclean List.");
+    return _next_in_special_set;
+  }
+
+  void set_next_on_unclean_list(HeapRegion* r);
+
+  HeapRegion* get_next_young_region() { return _next_young_region; }
+  void set_next_young_region(HeapRegion* hr) {
+    _next_young_region = hr;
+  }
+
+  // Allows logical separation between objects allocated before and after.
+  void save_marks();
+
+  // Reset HR stuff to default values.
+  void hr_clear(bool par, bool clear_space);
+
+  void initialize(MemRegion mr, bool clear_space, bool mangle_space);
+
+  // Ensure that "this" is zero-filled.
+  void ensure_zero_filled();
+  // This one requires that the calling thread holds ZF_mon.
+  void ensure_zero_filled_locked();
+
+  // Get the start of the unmarked area in this region.
+  HeapWord* prev_top_at_mark_start() const { return _prev_top_at_mark_start; }
+  HeapWord* next_top_at_mark_start() const { return _next_top_at_mark_start; }
+
+  // Apply "cl->do_oop" to (the addresses of) all reference fields in objects
+  // allocated in the current region before the last call to "save_mark".
+  void oop_before_save_marks_iterate(OopClosure* cl);
+
+  // This call determines the "filter kind" argument that will be used for
+  // the next call to "new_dcto_cl" on this region with the "traditional"
+  // signature (i.e., the call below.)  The default, in the absence of a
+  // preceding call to this method, is "NoFilterKind", and a call to this
+  // method is necessary for each such call, or else it reverts to the
+  // default.
+  // (This is really ugly, but all other methods I could think of changed a
+  // lot of main-line code for G1.)
+  void set_next_filter_kind(HeapRegionDCTOC::FilterKind nfk) {
+    _next_fk = nfk;
+  }
+
+  DirtyCardToOopClosure*
+  new_dcto_closure(OopClosure* cl,
+                   CardTableModRefBS::PrecisionStyle precision,
+                   HeapRegionDCTOC::FilterKind fk);
+
+#if WHASSUP
+  DirtyCardToOopClosure*
+  new_dcto_closure(OopClosure* cl,
+                   CardTableModRefBS::PrecisionStyle precision,
+                   HeapWord* boundary) {
+    assert(boundary == NULL, "This arg doesn't make sense here.");
+    DirtyCardToOopClosure* res = new_dcto_closure(cl, precision, _next_fk);
+    _next_fk = HeapRegionDCTOC::NoFilterKind;
+    return res;
+  }
+#endif
+
+  //
+  // Note the start or end of marking. This tells the heap region
+  // that the collector is about to start or has finished (concurrently)
+  // marking the heap.
+  //
+
+  // Note the start of a marking phase. Record the
+  // start of the unmarked area of the region here.
+  void note_start_of_marking(bool during_initial_mark) {
+    init_top_at_conc_mark_count();
+    _next_marked_bytes = 0;
+    if (during_initial_mark && is_young() && !is_survivor())
+      _next_top_at_mark_start = bottom();
+    else
+      _next_top_at_mark_start = top();
+  }
+
+  // Note the end of a marking phase. Install the start of
+  // the unmarked area that was captured at start of marking.
+  void note_end_of_marking() {
+    _prev_top_at_mark_start = _next_top_at_mark_start;
+    _prev_marked_bytes = _next_marked_bytes;
+    _next_marked_bytes = 0;
+
+    guarantee(_prev_marked_bytes <=
+              (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize,
+              "invariant");
+  }
+
+  // After an evacuation, we need to update _next_top_at_mark_start
+  // to be the current top.  Note this is only valid if we have only
+  // ever evacuated into this region.  If we evacuate, allocate, and
+  // then evacuate we are in deep doodoo.
+  void note_end_of_copying() {
+    assert(top() >= _next_top_at_mark_start,
+           "Increase only");
+    _next_top_at_mark_start = top();
+  }
+
+  // Returns "false" iff no object in the region was allocated when the
+  // last mark phase ended.
+  bool is_marked() { return _prev_top_at_mark_start != bottom(); }
+
+  // If "is_marked()" is true, then this is the index of the region in
+  // an array constructed at the end of marking of the regions in a
+  // "desirability" order.
+  int sort_index() {
+    return _sort_index;
+  }
+  void set_sort_index(int i) {
+    _sort_index = i;
+  }
+
+  void init_top_at_conc_mark_count() {
+    _top_at_conc_mark_count = bottom();
+  }
+
+  void set_top_at_conc_mark_count(HeapWord *cur) {
+    assert(bottom() <= cur && cur <= end(), "Sanity.");
+    _top_at_conc_mark_count = cur;
+  }
+
+  HeapWord* top_at_conc_mark_count() {
+    return _top_at_conc_mark_count;
+  }
+
+  void reset_during_compaction() {
+    guarantee( isHumongous() && startsHumongous(),
+               "should only be called for humongous regions");
+
+    zero_marked_bytes();
+    init_top_at_mark_start();
+  }
+
+  bool popular() { return _popularity == Popular; }
+  void set_popular(bool b) {
+    if (b) {
+      _popularity = Popular;
+    } else {
+      _popularity = NotPopular;
+    }
+  }
+  bool popular_pending() { return _popularity == PopularPending; }
+  void set_popular_pending(bool b) {
+    if (b) {
+      _popularity = PopularPending;
+    } else {
+      _popularity = NotPopular;
+    }
+  }
+
+  // <PREDICTION>
+  void calc_gc_efficiency(void);
+  double gc_efficiency() { return _gc_efficiency;}
+  // </PREDICTION>
+
+  bool is_young() const     { return _young_type != NotYoung; }
+  bool is_scan_only() const { return _young_type == ScanOnly; }
+  bool is_survivor() const  { return _young_type == Survivor; }
+
+  int  young_index_in_cset() const { return _young_index_in_cset; }
+  void set_young_index_in_cset(int index) {
+    assert( (index == -1) || is_young(), "pre-condition" );
+    _young_index_in_cset = index;
+  }
+
+  int age_in_surv_rate_group() {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    return _surv_rate_group->age_in_group(_age_index);
+  }
+
+  void recalculate_age_in_surv_rate_group() {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    _age_index = _surv_rate_group->recalculate_age_index(_age_index);
+  }
+
+  void record_surv_words_in_group(size_t words_survived) {
+    assert( _surv_rate_group != NULL, "pre-condition" );
+    assert( _age_index > -1, "pre-condition" );
+    int age_in_group = age_in_surv_rate_group();
+    _surv_rate_group->record_surviving_words(age_in_group, words_survived);
+  }
+
+  int age_in_surv_rate_group_cond() {
+    if (_surv_rate_group != NULL)
+      return age_in_surv_rate_group();
+    else
+      return -1;
+  }
+
+  SurvRateGroup* surv_rate_group() {
+    return _surv_rate_group;
+  }
+
+  void install_surv_rate_group(SurvRateGroup* surv_rate_group) {
+    assert( surv_rate_group != NULL, "pre-condition" );
+    assert( _surv_rate_group == NULL, "pre-condition" );
+    assert( is_young(), "pre-condition" );
+
+    _surv_rate_group = surv_rate_group;
+    _age_index = surv_rate_group->next_age_index();
+  }
+
+  void uninstall_surv_rate_group() {
+    if (_surv_rate_group != NULL) {
+      assert( _age_index > -1, "pre-condition" );
+      assert( is_young(), "pre-condition" );
+
+      _surv_rate_group = NULL;
+      _age_index = -1;
+    } else {
+      assert( _age_index == -1, "pre-condition" );
+    }
+  }
+
+  void set_young() { set_young_type(Young); }
+
+  void set_scan_only() { set_young_type(ScanOnly); }
+
+  void set_survivor() { set_young_type(Survivor); }
+
+  void set_not_young() { set_young_type(NotYoung); }
+
+  // Determine if an object has been allocated since the last
+  // mark performed by the collector. This returns true iff the object
+  // is within the unmarked area of the region.
+  bool obj_allocated_since_prev_marking(oop obj) const {
+    return (HeapWord *) obj >= prev_top_at_mark_start();
+  }
+  bool obj_allocated_since_next_marking(oop obj) const {
+    return (HeapWord *) obj >= next_top_at_mark_start();
+  }
+
+  // For parallel heapRegion traversal.
+  bool claimHeapRegion(int claimValue);
+  jint claim_value() { return _claimed; }
+  // Use this carefully: only when you're sure no one is claiming...
+  void set_claim_value(int claimValue) { _claimed = claimValue; }
+
+  // Returns the "evacuation_failed" property of the region.
+  bool evacuation_failed() { return _evacuation_failed; }
+
+  // Sets the "evacuation_failed" property of the region.
+  void set_evacuation_failed(bool b) {
+    _evacuation_failed = b;
+
+    if (b) {
+      init_top_at_conc_mark_count();
+      _next_marked_bytes = 0;
+    }
+  }
+
+  // Requires that "mr" be entirely within the region.
+  // Apply "cl->do_object" to all objects that intersect with "mr".
+  // If the iteration encounters an unparseable portion of the region,
+  // or if "cl->abort()" is true after a closure application,
+  // terminate the iteration and return the address of the start of the
+  // subregion that isn't done.  (The two can be distinguished by querying
+  // "cl->abort()".)  Return of "NULL" indicates that the iteration
+  // completed.
+  HeapWord*
+  object_iterate_mem_careful(MemRegion mr, ObjectClosure* cl);
+
+  HeapWord*
+  oops_on_card_seq_iterate_careful(MemRegion mr,
+                                   FilterOutOfRegionClosure* cl);
+
+  // The region "mr" is entirely in "this", and starts and ends at block
+  // boundaries. The caller declares that all the contained blocks are
+  // coalesced into one.
+  void declare_filled_region_to_BOT(MemRegion mr) {
+    _offsets.single_block(mr.start(), mr.end());
+  }
+
+  // A version of block start that is guaranteed to find *some* block
+  // boundary at or before "p", but does not object iteration, and may
+  // therefore be used safely when the heap is unparseable.
+  HeapWord* block_start_careful(const void* p) const {
+    return _offsets.block_start_careful(p);
+  }
+
+  // Requires that "addr" is within the region.  Returns the start of the
+  // first ("careful") block that starts at or after "addr", or else the
+  // "end" of the region if there is no such block.
+  HeapWord* next_block_start_careful(HeapWord* addr);
+
+  // Returns the zero-fill-state of the current region.
+  ZeroFillState zero_fill_state() { return (ZeroFillState)_zfs; }
+  bool zero_fill_is_allocated() { return _zfs == Allocated; }
+  Thread* zero_filler() { return _zero_filler; }
+
+  // Indicate that the contents of the region are unknown, and therefore
+  // might require zero-filling.
+  void set_zero_fill_needed() {
+    set_zero_fill_state_work(NotZeroFilled);
+  }
+  void set_zero_fill_in_progress(Thread* t) {
+    set_zero_fill_state_work(ZeroFilling);
+    _zero_filler = t;
+  }
+  void set_zero_fill_complete();
+  void set_zero_fill_allocated() {
+    set_zero_fill_state_work(Allocated);
+  }
+
+  void set_zero_fill_state_work(ZeroFillState zfs);
+
+  // This is called when a full collection shrinks the heap.
+  // We want to set the heap region to a value which says
+  // it is no longer part of the heap.  For now, we'll let "NotZF" fill
+  // that role.
+  void reset_zero_fill() {
+    set_zero_fill_state_work(NotZeroFilled);
+    _zero_filler = NULL;
+  }
+
+#define HeapRegion_OOP_SINCE_SAVE_MARKS_DECL(OopClosureType, nv_suffix)  \
+  virtual void oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl);
+  SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DECL)
+
+  CompactibleSpace* next_compaction_space() const;
+
+  virtual void reset_after_compaction();
+
+  void print() const;
+  void print_on(outputStream* st) const;
+
+  // Override
+  virtual void verify(bool allow_dirty) const;
+
+#ifdef DEBUG
+  HeapWord* allocate(size_t size);
+#endif
+};
+
+// HeapRegionClosure is used for iterating over regions.
+// Terminates the iteration when the "doHeapRegion" method returns "true".
+class HeapRegionClosure : public StackObj {
+  friend class HeapRegionSeq;
+  friend class G1CollectedHeap;
+
+  bool _complete;
+  void incomplete() { _complete = false; }
+
+ public:
+  HeapRegionClosure(): _complete(true) {}
+
+  // Typically called on each region until it returns true.
+  virtual bool doHeapRegion(HeapRegion* r) = 0;
+
+  // True after iteration if the closure was applied to all heap regions
+  // and returned "false" in all cases.
+  bool complete() { return _complete; }
+};
+
+// A linked lists of heap regions.  It leaves the "next" field
+// unspecified; that's up to subtypes.
+class RegionList {
+protected:
+  virtual HeapRegion* get_next(HeapRegion* chr) = 0;
+  virtual void set_next(HeapRegion* chr,
+                        HeapRegion* new_next) = 0;
+
+  HeapRegion* _hd;
+  HeapRegion* _tl;
+  size_t _sz;
+
+  // Protected constructor because this type is only meaningful
+  // when the _get/_set next functions are defined.
+  RegionList() : _hd(NULL), _tl(NULL), _sz(0) {}
+public:
+  void reset() {
+    _hd = NULL;
+    _tl = NULL;
+    _sz = 0;
+  }
+  HeapRegion* hd() { return _hd; }
+  HeapRegion* tl() { return _tl; }
+  size_t sz() { return _sz; }
+  size_t length();
+
+  bool well_formed() {
+    return
+      ((hd() == NULL && tl() == NULL && sz() == 0)
+       || (hd() != NULL && tl() != NULL && sz() > 0))
+      && (sz() == length());
+  }
+  virtual void insert_before_head(HeapRegion* r);
+  void prepend_list(RegionList* new_list);
+  virtual HeapRegion* pop();
+  void dec_sz() { _sz--; }
+  // Requires that "r" is an element of the list, and is not the tail.
+  void delete_after(HeapRegion* r);
+};
+
+class EmptyNonHRegionList: public RegionList {
+protected:
+  // Protected constructor because this type is only meaningful
+  // when the _get/_set next functions are defined.
+  EmptyNonHRegionList() : RegionList() {}
+
+public:
+  void insert_before_head(HeapRegion* r) {
+    //    assert(r->is_empty(), "Better be empty");
+    assert(!r->isHumongous(), "Better not be humongous.");
+    RegionList::insert_before_head(r);
+  }
+  void prepend_list(EmptyNonHRegionList* new_list) {
+    //    assert(new_list->hd() == NULL || new_list->hd()->is_empty(),
+    //     "Better be empty");
+    assert(new_list->hd() == NULL || !new_list->hd()->isHumongous(),
+           "Better not be humongous.");
+    //    assert(new_list->tl() == NULL || new_list->tl()->is_empty(),
+    //     "Better be empty");
+    assert(new_list->tl() == NULL || !new_list->tl()->isHumongous(),
+           "Better not be humongous.");
+    RegionList::prepend_list(new_list);
+  }
+};
+
+class UncleanRegionList: public EmptyNonHRegionList {
+public:
+  HeapRegion* get_next(HeapRegion* hr) {
+    return hr->next_from_unclean_list();
+  }
+  void set_next(HeapRegion* hr, HeapRegion* new_next) {
+    hr->set_next_on_unclean_list(new_next);
+  }
+
+  UncleanRegionList() : EmptyNonHRegionList() {}
+
+  void insert_before_head(HeapRegion* r) {
+    assert(!r->is_on_free_list(),
+           "Better not already be on free list");
+    assert(!r->is_on_unclean_list(),
+           "Better not already be on unclean list");
+    r->set_zero_fill_needed();
+    r->set_on_unclean_list(true);
+    EmptyNonHRegionList::insert_before_head(r);
+  }
+  void prepend_list(UncleanRegionList* new_list) {
+    assert(new_list->tl() == NULL || !new_list->tl()->is_on_free_list(),
+           "Better not already be on free list");
+    assert(new_list->tl() == NULL || new_list->tl()->is_on_unclean_list(),
+           "Better already be marked as on unclean list");
+    assert(new_list->hd() == NULL || !new_list->hd()->is_on_free_list(),
+           "Better not already be on free list");
+    assert(new_list->hd() == NULL || new_list->hd()->is_on_unclean_list(),
+           "Better already be marked as on unclean list");
+    EmptyNonHRegionList::prepend_list(new_list);
+  }
+  HeapRegion* pop() {
+    HeapRegion* res = RegionList::pop();
+    if (res != NULL) res->set_on_unclean_list(false);
+    return res;
+  }
+};
+
+// Local Variables: ***
+// c-indentation-style: gnu ***
+// End: ***
+
+#endif // SERIALGC
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp
@ -0,0 +1,60 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) {
+  HeapWord* res = ContiguousSpace::allocate(size);
+  if (res != NULL) {
+    _offsets.alloc_block(res, size);
+  }
+  return res;
+}
+
+// Because of the requirement of keeping "_offsets" up to date with the
+// allocations, we sequentialize these with a lock.  Therefore, best if
+// this is used for larger LAB allocations only.
+inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
+  MutexLocker x(&_par_alloc_lock);
+  // This ought to be just "allocate", because of the lock above, but that
+  // ContiguousSpace::allocate asserts that either the allocating thread
+  // holds the heap lock or it is the VM thread and we're at a safepoint.
+  // The best I (dld) could figure was to put a field in ContiguousSpace
+  // meaning "locking at safepoint taken care of", and set/reset that
+  // here.  But this will do for now, especially in light of the comment
+  // above.  Perhaps in the future some lock-free manner of keeping the
+  // coordination.
+  HeapWord* res = ContiguousSpace::par_allocate(size);
+  if (res != NULL) {
+    _offsets.alloc_block(res, size);
+  }
+  return res;
+}
+
+inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) {
+  return _offsets.block_start(p);
+}
+
+inline HeapWord*
+G1OffsetTableContigSpace::block_start_const(const void* p) const {
+  return _offsets.block_start_const(p);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp
@ -0,0 +1,470 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Remembered set for a heap region.  Represent a set of "cards" that
+// contain pointers into the owner heap region.  Cards are defined somewhat
+// abstractly, in terms of what the "BlockOffsetTable" in use can parse.
+
+class G1CollectedHeap;
+class G1BlockOffsetSharedArray;
+class HeapRegion;
+class HeapRegionRemSetIterator;
+class PosParPRT;
+class SparsePRT;
+
+
+// The "_coarse_map" is a bitmap with one bit for each region, where set
+// bits indicate that the corresponding region may contain some pointer
+// into the owning region.
+
+// The "_fine_grain_entries" array is an open hash table of PerRegionTables
+// (PRTs), indicating regions for which we're keeping the RS as a set of
+// cards.  The strategy is to cap the size of the fine-grain table,
+// deleting an entry and setting the corresponding coarse-grained bit when
+// we would overflow this cap.
+
+// We use a mixture of locking and lock-free techniques here.  We allow
+// threads to locate PRTs without locking, but threads attempting to alter
+// a bucket list obtain a lock.  This means that any failing attempt to
+// find a PRT must be retried with the lock.  It might seem dangerous that
+// a read can find a PRT that is concurrently deleted.  This is all right,
+// because:
+//
+//   1) We only actually free PRT's at safe points (though we reuse them at
+//      other times).
+//   2) We find PRT's in an attempt to add entries.  If a PRT is deleted,
+//      it's _coarse_map bit is set, so the that we were attempting to add
+//      is represented.  If a deleted PRT is re-used, a thread adding a bit,
+//      thinking the PRT is for a different region, does no harm.
+
+class OtherRegionsTable: public CHeapObj {
+  friend class HeapRegionRemSetIterator;
+
+  G1CollectedHeap* _g1h;
+  Mutex            _m;
+  HeapRegion*      _hr;
+
+  // These are protected by "_m".
+  BitMap      _coarse_map;
+  size_t      _n_coarse_entries;
+  static jint _n_coarsenings;
+
+  PosParPRT** _fine_grain_regions;
+  size_t      _n_fine_entries;
+
+#define SAMPLE_FOR_EVICTION 1
+#if SAMPLE_FOR_EVICTION
+  size_t        _fine_eviction_start;
+  static size_t _fine_eviction_stride;
+  static size_t _fine_eviction_sample_size;
+#endif
+
+  SparsePRT   _sparse_table;
+
+  // These are static after init.
+  static size_t _max_fine_entries;
+  static size_t _mod_max_fine_entries_mask;
+
+  // Requires "prt" to be the first element of the bucket list appropriate
+  // for "hr".  If this list contains an entry for "hr", return it,
+  // otherwise return "NULL".
+  PosParPRT* find_region_table(size_t ind, HeapRegion* hr) const;
+
+  // Find, delete, and return a candidate PosParPRT, if any exists,
+  // adding the deleted region to the coarse bitmap.  Requires the caller
+  // to hold _m, and the fine-grain table to be full.
+  PosParPRT* delete_region_table();
+
+  // If a PRT for "hr" is in the bucket list indicated by "ind" (which must
+  // be the correct index for "hr"), delete it and return true; else return
+  // false.
+  bool del_single_region_table(size_t ind, HeapRegion* hr);
+
+  static jint _cache_probes;
+  static jint _cache_hits;
+
+  // Indexed by thread X heap region, to minimize thread contention.
+  static int** _from_card_cache;
+  static size_t _from_card_cache_max_regions;
+  static size_t _from_card_cache_mem_size;
+
+public:
+  OtherRegionsTable(HeapRegion* hr);
+
+  HeapRegion* hr() const { return _hr; }
+
+  // For now.  Could "expand" some tables in the future, so that this made
+  // sense.
+  void add_reference(oop* from, int tid);
+
+  void add_reference(oop* from) {
+    return add_reference(from, 0);
+  }
+
+  // Removes any entries shown by the given bitmaps to contain only dead
+  // objects.
+  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
+
+  // Not const because it takes a lock.
+  size_t occupied() const;
+  size_t occ_fine() const;
+  size_t occ_coarse() const;
+  size_t occ_sparse() const;
+
+  static jint n_coarsenings() { return _n_coarsenings; }
+
+  // Returns size in bytes.
+  // Not const because it takes a lock.
+  size_t mem_size() const;
+  static size_t static_mem_size();
+  static size_t fl_mem_size();
+
+  bool contains_reference(oop* from) const;
+  bool contains_reference_locked(oop* from) const;
+
+  void clear();
+
+  // Specifically clear the from_card_cache.
+  void clear_fcc();
+
+  // "from_hr" is being cleared; remove any entries from it.
+  void clear_incoming_entry(HeapRegion* from_hr);
+
+  // Declare the heap size (in # of regions) to the OtherRegionsTable.
+  // (Uses it to initialize from_card_cache).
+  static void init_from_card_cache(size_t max_regions);
+
+  // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
+  // Make sure any entries for higher regions are invalid.
+  static void shrink_from_card_cache(size_t new_n_regs);
+
+  static void print_from_card_cache();
+
+};
+
+
+class HeapRegionRemSet : public CHeapObj {
+  friend class VMStructs;
+  friend class HeapRegionRemSetIterator;
+
+public:
+  enum Event {
+    Event_EvacStart, Event_EvacEnd, Event_RSUpdateEnd
+  };
+
+private:
+  G1BlockOffsetSharedArray* _bosa;
+  G1BlockOffsetSharedArray* bosa() const { return _bosa; }
+
+  static bool _par_traversal;
+
+  OtherRegionsTable _other_regions;
+
+  // One set bit for every region that has an entry for this one.
+  BitMap _outgoing_region_map;
+
+  // Clear entries for the current region in any rem sets named in
+  // the _outgoing_region_map.
+  void clear_outgoing_entries();
+
+#if MAYBE
+  // Audit the given card index.
+  void audit_card(size_t card_num, HeapRegion* hr, u2* rc_arr,
+                  HeapRegionRemSet* empty_cards, size_t* one_obj_cards);
+
+  // Assumes that "audit_stage1" has been called for "hr", to set up
+  // "shadow" and "new_rs" appropriately.  Identifies individual popular
+  // objects; returns "true" if any are found.
+  bool audit_find_pop(HeapRegion* hr, u2* rc_arr);
+
+  // Assumes that "audit_stage1" has been called for "hr", to set up
+  // "shadow" and "new_rs" appropriately.  Identifies individual popular
+  // objects, and determines the number of entries in "new_rs" if any such
+  // popular objects are ignored.  If this is sufficiently small, returns
+  // "false" to indicate that a constraint should not be introduced.
+  // Otherwise, returns "true" to indicate that we should go ahead with
+  // adding the constraint.
+  bool audit_stag(HeapRegion* hr, u2* rc_arr);
+
+
+  u2* alloc_rc_array();
+
+  SeqHeapRegionRemSet* audit_post(u2* rc_arr, size_t multi_obj_crds,
+                                  SeqHeapRegionRemSet* empty_cards);
+#endif
+
+  enum ParIterState { Unclaimed, Claimed, Complete };
+  ParIterState _iter_state;
+
+  // Unused unless G1RecordHRRSOops is true.
+
+  static const int MaxRecorded = 1000000;
+  static oop**        _recorded_oops;
+  static HeapWord**   _recorded_cards;
+  static HeapRegion** _recorded_regions;
+  static int          _n_recorded;
+
+  static const int MaxRecordedEvents = 1000;
+  static Event*       _recorded_events;
+  static int*         _recorded_event_index;
+  static int          _n_recorded_events;
+
+  static void print_event(outputStream* str, Event evnt);
+
+public:
+  HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
+                   HeapRegion* hr);
+
+  static int num_par_rem_sets();
+  static bool par_traversal() { return _par_traversal; }
+  static void set_par_traversal(bool b);
+
+  HeapRegion* hr() const {
+    return _other_regions.hr();
+  }
+
+  size_t occupied() const {
+    return _other_regions.occupied();
+  }
+  size_t occ_fine() const {
+    return _other_regions.occ_fine();
+  }
+  size_t occ_coarse() const {
+    return _other_regions.occ_coarse();
+  }
+  size_t occ_sparse() const {
+    return _other_regions.occ_sparse();
+  }
+
+  static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); }
+
+  /* Used in the sequential case.  Returns "true" iff this addition causes
+     the size limit to be reached. */
+  bool add_reference(oop* from) {
+    _other_regions.add_reference(from);
+    return false;
+  }
+
+  /* Used in the parallel case.  Returns "true" iff this addition causes
+     the size limit to be reached. */
+  bool add_reference(oop* from, int tid) {
+    _other_regions.add_reference(from, tid);
+    return false;
+  }
+
+  // Records the fact that the current region contains an outgoing
+  // reference into "to_hr".
+  void add_outgoing_reference(HeapRegion* to_hr);
+
+  // Removes any entries shown by the given bitmaps to contain only dead
+  // objects.
+  void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
+
+  // The region is being reclaimed; clear its remset, and any mention of
+  // entries for this region in other remsets.
+  void clear();
+
+  // Forget any entries due to pointers from "from_hr".
+  void clear_incoming_entry(HeapRegion* from_hr) {
+    _other_regions.clear_incoming_entry(from_hr);
+  }
+
+#if 0
+  virtual void cleanup() = 0;
+#endif
+
+  // Should be called from single-threaded code.
+  void init_for_par_iteration();
+  // Attempt to claim the region.  Returns true iff this call caused an
+  // atomic transition from Unclaimed to Claimed.
+  bool claim_iter();
+  // Sets the iteration state to "complete".
+  void set_iter_complete();
+  // Returns "true" iff the region's iteration is complete.
+  bool iter_is_complete();
+
+  // Initialize the given iterator to iterate over this rem set.
+  void init_iterator(HeapRegionRemSetIterator* iter) const;
+
+#if 0
+  // Apply the "do_card" method to the start address of every card in the
+  // rem set.  Returns false if some application of the closure aborted.
+  virtual bool card_iterate(CardClosure* iter) = 0;
+#endif
+
+  // The actual # of bytes this hr_remset takes up.
+  size_t mem_size() {
+    return _other_regions.mem_size()
+      // This correction is necessary because the above includes the second
+      // part.
+      + sizeof(this) - sizeof(OtherRegionsTable);
+  }
+
+  // Returns the memory occupancy of all static data structures associated
+  // with remembered sets.
+  static size_t static_mem_size() {
+    return OtherRegionsTable::static_mem_size();
+  }
+
+  // Returns the memory occupancy of all free_list data structures associated
+  // with remembered sets.
+  static size_t fl_mem_size() {
+    return OtherRegionsTable::fl_mem_size();
+  }
+
+  bool contains_reference(oop* from) const {
+    return _other_regions.contains_reference(from);
+  }
+  void print() const;
+
+#if MAYBE
+  // We are about to introduce a constraint, requiring the collection time
+  // of the region owning this RS to be <= "hr", and forgetting pointers
+  // from the owning region to "hr."  Before doing so, examines this rem
+  // set for pointers to "hr", possibly identifying some popular objects.,
+  // and possibly finding some cards to no longer contain pointers to "hr",
+  //
+  // These steps may prevent the the constraint from being necessary; in
+  // which case returns a set of cards now thought to contain no pointers
+  // into HR.  In the normal (I assume) case, returns NULL, indicating that
+  // we should go ahead and add the constraint.
+  virtual SeqHeapRegionRemSet* audit(HeapRegion* hr) = 0;
+#endif
+
+  // Called during a stop-world phase to perform any deferred cleanups.
+  // The second version may be called by parallel threads after then finish
+  // collection work.
+  static void cleanup();
+  static void par_cleanup();
+
+  // Declare the heap size (in # of regions) to the HeapRegionRemSet(s).
+  // (Uses it to initialize from_card_cache).
+  static void init_heap(size_t max_regions) {
+    OtherRegionsTable::init_from_card_cache(max_regions);
+  }
+
+  // Declares that only regions i s.t. 0 <= i < new_n_regs are in use.
+  static void shrink_heap(size_t new_n_regs) {
+    OtherRegionsTable::shrink_from_card_cache(new_n_regs);
+  }
+
+#ifndef PRODUCT
+  static void print_from_card_cache() {
+    OtherRegionsTable::print_from_card_cache();
+  }
+#endif
+
+  static void record(HeapRegion* hr, oop* f);
+  static void print_recorded();
+  static void record_event(Event evnt);
+
+  // Run unit tests.
+#ifndef PRODUCT
+  static void test();
+#endif
+
+};
+
+class HeapRegionRemSetIterator : public CHeapObj {
+
+  // The region over which we're iterating.
+  const HeapRegionRemSet* _hrrs;
+
+  // Local caching of HRRS fields.
+  const BitMap*             _coarse_map;
+  PosParPRT**               _fine_grain_regions;
+
+  G1BlockOffsetSharedArray* _bosa;
+  G1CollectedHeap*          _g1h;
+
+  // The number yielded since initialization.
+  size_t _n_yielded_fine;
+  size_t _n_yielded_coarse;
+  size_t _n_yielded_sparse;
+
+  // If true we're iterating over the coarse table; if false the fine
+  // table.
+  enum IterState {
+    Sparse,
+    Fine,
+    Coarse
+  };
+  IterState _is;
+
+  // In both kinds of iteration, heap offset of first card of current
+  // region.
+  size_t _cur_region_card_offset;
+  // Card offset within cur region.
+  size_t _cur_region_cur_card;
+
+  // Coarse table iteration fields:
+
+  // Current region index;
+  int _coarse_cur_region_index;
+  int _coarse_cur_region_cur_card;
+
+  bool coarse_has_next(size_t& card_index);
+
+  // Fine table iteration fields:
+
+  // Index of bucket-list we're working on.
+  int _fine_array_index;
+  // Per Region Table we're doing within current bucket list.
+  PosParPRT* _fine_cur_prt;
+
+  /* SparsePRT::*/ SparsePRTIter _sparse_iter;
+
+  void fine_find_next_non_null_prt();
+
+  bool fine_has_next();
+  bool fine_has_next(size_t& card_index);
+
+public:
+  // We require an iterator to be initialized before use, so the
+  // constructor does little.
+  HeapRegionRemSetIterator();
+
+  void initialize(const HeapRegionRemSet* hrrs);
+
+  // If there remains one or more cards to be yielded, returns true and
+  // sets "card_index" to one of those cards (which is then considered
+  // yielded.)   Otherwise, returns false (and leaves "card_index"
+  // undefined.)
+  bool has_next(size_t& card_index);
+
+  size_t n_yielded_fine() { return _n_yielded_fine; }
+  size_t n_yielded_coarse() { return _n_yielded_coarse; }
+  size_t n_yielded_sparse() { return _n_yielded_sparse; }
+  size_t n_yielded() {
+    return n_yielded_fine() + n_yielded_coarse() + n_yielded_sparse();
+  }
+};
+
+#if 0
+class CardClosure: public Closure {
+public:
+  virtual void do_card(HeapWord* card_start) = 0;
+};
+
+#endif
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp
@ -0,0 +1,344 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_heapRegionSeq.cpp.incl"
+
+// Local to this file.
+
+static int orderRegions(HeapRegion** hr1p, HeapRegion** hr2p) {
+  if ((*hr1p)->end() <= (*hr2p)->bottom()) return -1;
+  else if ((*hr2p)->end() <= (*hr1p)->bottom()) return 1;
+  else if (*hr1p == *hr2p) return 0;
+  else {
+    assert(false, "We should never compare distinct overlapping regions.");
+  }
+  return 0;
+}
+
+HeapRegionSeq::HeapRegionSeq() :
+  _alloc_search_start(0),
+  // The line below is the worst bit of C++ hackery I've ever written
+  // (Detlefs, 11/23).  You should think of it as equivalent to
+  // "_regions(100, true)": initialize the growable array and inform it
+  // that it should allocate its elem array(s) on the C heap.  The first
+  // argument, however, is actually a comma expression (new-expr, 100).
+  // The purpose of the new_expr is to inform the growable array that it
+  // is *already* allocated on the C heap: it uses the placement syntax to
+  // keep it from actually doing any allocation.
+  _regions((ResourceObj::operator new (sizeof(GrowableArray<HeapRegion*>),
+                                       (void*)&_regions,
+                                       ResourceObj::C_HEAP),
+            100),
+           true),
+  _next_rr_candidate(0),
+  _seq_bottom(NULL)
+{}
+
+// Private methods.
+
+HeapWord*
+HeapRegionSeq::alloc_obj_from_region_index(int ind, size_t word_size) {
+  assert(G1CollectedHeap::isHumongous(word_size),
+         "Allocation size should be humongous");
+  int cur = ind;
+  int first = cur;
+  size_t sumSizes = 0;
+  while (cur < _regions.length() && sumSizes < word_size) {
+    // Loop invariant:
+    //  For all i in [first, cur):
+    //       _regions.at(i)->is_empty()
+    //    && _regions.at(i) is contiguous with its predecessor, if any
+    //  && sumSizes is the sum of the sizes of the regions in the interval
+    //       [first, cur)
+    HeapRegion* curhr = _regions.at(cur);
+    if (curhr->is_empty()
+        && !curhr->is_reserved()
+        && (first == cur
+            || (_regions.at(cur-1)->end() ==
+                curhr->bottom()))) {
+      sumSizes += curhr->capacity() / HeapWordSize;
+    } else {
+      first = cur + 1;
+      sumSizes = 0;
+    }
+    cur++;
+  }
+  if (sumSizes >= word_size) {
+    _alloc_search_start = cur;
+    // Mark the allocated regions as allocated.
+    bool zf = G1CollectedHeap::heap()->allocs_are_zero_filled();
+    HeapRegion* first_hr = _regions.at(first);
+    for (int i = first; i < cur; i++) {
+      HeapRegion* hr = _regions.at(i);
+      if (zf)
+        hr->ensure_zero_filled();
+      {
+        MutexLockerEx x(ZF_mon, Mutex::_no_safepoint_check_flag);
+        hr->set_zero_fill_allocated();
+      }
+      size_t sz = hr->capacity() / HeapWordSize;
+      HeapWord* tmp = hr->allocate(sz);
+      assert(tmp != NULL, "Humongous allocation failure");
+      MemRegion mr = MemRegion(tmp, sz);
+      SharedHeap::fill_region_with_object(mr);
+      hr->declare_filled_region_to_BOT(mr);
+      if (i == first) {
+        first_hr->set_startsHumongous();
+      } else {
+        assert(i > first, "sanity");
+        hr->set_continuesHumongous(first_hr);
+      }
+    }
+    HeapWord* first_hr_bot = first_hr->bottom();
+    HeapWord* obj_end = first_hr_bot + word_size;
+    first_hr->set_top(obj_end);
+    return first_hr_bot;
+  } else {
+    // If we started from the beginning, we want to know why we can't alloc.
+    return NULL;
+  }
+}
+
+void HeapRegionSeq::print_empty_runs(bool reserved_are_empty) {
+  int empty_run = 0;
+  int n_empty = 0;
+  bool at_least_one_reserved = false;
+  int empty_run_start;
+  for (int i = 0; i < _regions.length(); i++) {
+    HeapRegion* r = _regions.at(i);
+    if (r->continuesHumongous()) continue;
+    if (r->is_empty() && (reserved_are_empty || !r->is_reserved())) {
+      assert(!r->isHumongous(), "H regions should not be empty.");
+      if (empty_run == 0) empty_run_start = i;
+      empty_run++;
+      n_empty++;
+      if (r->is_reserved()) {
+        at_least_one_reserved = true;
+      }
+    } else {
+      if (empty_run > 0) {
+        gclog_or_tty->print("  %d:%d", empty_run_start, empty_run);
+        if (reserved_are_empty && at_least_one_reserved)
+          gclog_or_tty->print("(R)");
+        empty_run = 0;
+        at_least_one_reserved = false;
+      }
+    }
+  }
+  if (empty_run > 0) {
+    gclog_or_tty->print(" %d:%d", empty_run_start, empty_run);
+    if (reserved_are_empty && at_least_one_reserved) gclog_or_tty->print("(R)");
+  }
+  gclog_or_tty->print_cr(" [tot = %d]", n_empty);
+}
+
+int HeapRegionSeq::find(HeapRegion* hr) {
+  // FIXME: optimized for adjacent regions of fixed size.
+  int ind = hr->hrs_index();
+  if (ind != -1) {
+    assert(_regions.at(ind) == hr, "Mismatch");
+  }
+  return ind;
+}
+
+
+// Public methods.
+
+void HeapRegionSeq::insert(HeapRegion* hr) {
+  if (_regions.length() == 0
+      || _regions.top()->end() <= hr->bottom()) {
+    hr->set_hrs_index(_regions.length());
+    _regions.append(hr);
+  } else {
+    _regions.append(hr);
+    _regions.sort(orderRegions);
+    for (int i = 0; i < _regions.length(); i++) {
+      _regions.at(i)->set_hrs_index(i);
+    }
+  }
+  char* bot = (char*)_regions.at(0)->bottom();
+  if (_seq_bottom == NULL || bot < _seq_bottom) _seq_bottom = bot;
+}
+
+size_t HeapRegionSeq::length() {
+  return _regions.length();
+}
+
+size_t HeapRegionSeq::free_suffix() {
+  size_t res = 0;
+  int first = _regions.length() - 1;
+  int cur = first;
+  while (cur >= 0 &&
+         (_regions.at(cur)->is_empty()
+          && !_regions.at(cur)->is_reserved()
+          && (first == cur
+              || (_regions.at(cur+1)->bottom() ==
+                  _regions.at(cur)->end())))) {
+      res++;
+      cur--;
+  }
+  return res;
+}
+
+HeapWord* HeapRegionSeq::obj_allocate(size_t word_size) {
+  int cur = _alloc_search_start;
+  // Make sure "cur" is a valid index.
+  assert(cur >= 0, "Invariant.");
+  HeapWord* res = alloc_obj_from_region_index(cur, word_size);
+  if (res == NULL)
+    res = alloc_obj_from_region_index(0, word_size);
+  return res;
+}
+
+void HeapRegionSeq::iterate(HeapRegionClosure* blk) {
+  iterate_from((HeapRegion*)NULL, blk);
+}
+
+// The first argument r is the heap region at which iteration begins.
+// This operation runs fastest when r is NULL, or the heap region for
+// which a HeapRegionClosure most recently returned true, or the
+// heap region immediately to its right in the sequence.  In all
+// other cases a linear search is required to find the index of r.
+
+void HeapRegionSeq::iterate_from(HeapRegion* r, HeapRegionClosure* blk) {
+
+  // :::: FIXME ::::
+  // Static cache value is bad, especially when we start doing parallel
+  // remembered set update. For now just don't cache anything (the
+  // code in the def'd out blocks).
+
+#if 0
+  static int cached_j = 0;
+#endif
+  int len = _regions.length();
+  int j = 0;
+  // Find the index of r.
+  if (r != NULL) {
+#if 0
+    assert(cached_j >= 0, "Invariant.");
+    if ((cached_j < len) && (r == _regions.at(cached_j))) {
+      j = cached_j;
+    } else if ((cached_j + 1 < len) && (r == _regions.at(cached_j + 1))) {
+      j = cached_j + 1;
+    } else {
+      j = find(r);
+#endif
+      if (j < 0) {
+        j = 0;
+      }
+#if 0
+    }
+#endif
+  }
+  int i;
+  for (i = j; i < len; i += 1) {
+    int res = blk->doHeapRegion(_regions.at(i));
+    if (res) {
+#if 0
+      cached_j = i;
+#endif
+      blk->incomplete();
+      return;
+    }
+  }
+  for (i = 0; i < j; i += 1) {
+    int res = blk->doHeapRegion(_regions.at(i));
+    if (res) {
+#if 0
+      cached_j = i;
+#endif
+      blk->incomplete();
+      return;
+    }
+  }
+}
+
+void HeapRegionSeq::iterate_from(int idx, HeapRegionClosure* blk) {
+  int len = _regions.length();
+  int i;
+  for (i = idx; i < len; i++) {
+    if (blk->doHeapRegion(_regions.at(i))) {
+      blk->incomplete();
+      return;
+    }
+  }
+  for (i = 0; i < idx; i++) {
+    if (blk->doHeapRegion(_regions.at(i))) {
+      blk->incomplete();
+      return;
+    }
+  }
+}
+
+MemRegion HeapRegionSeq::shrink_by(size_t shrink_bytes,
+                                   size_t& num_regions_deleted) {
+  assert(shrink_bytes % os::vm_page_size() == 0, "unaligned");
+  assert(shrink_bytes % HeapRegion::GrainBytes == 0, "unaligned");
+
+  if (_regions.length() == 0) {
+    num_regions_deleted = 0;
+    return MemRegion();
+  }
+  int j = _regions.length() - 1;
+  HeapWord* end = _regions.at(j)->end();
+  HeapWord* last_start = end;
+  while (j >= 0 && shrink_bytes > 0) {
+    HeapRegion* cur = _regions.at(j);
+    // We have to leave humongous regions where they are,
+    // and work around them.
+    if (cur->isHumongous()) {
+      return MemRegion(last_start, end);
+    }
+    cur->reset_zero_fill();
+    assert(cur == _regions.top(), "Should be top");
+    if (!cur->is_empty()) break;
+    shrink_bytes -= cur->capacity();
+    num_regions_deleted++;
+    _regions.pop();
+    last_start = cur->bottom();
+    // We need to delete these somehow, but can't currently do so here: if
+    // we do, the ZF thread may still access the deleted region.  We'll
+    // leave this here as a reminder that we have to do something about
+    // this.
+    // delete cur;
+    j--;
+  }
+  return MemRegion(last_start, end);
+}
+
+
+class PrintHeapRegionClosure : public  HeapRegionClosure {
+public:
+  bool doHeapRegion(HeapRegion* r) {
+    gclog_or_tty->print(PTR_FORMAT ":", r);
+    r->print();
+    return false;
+  }
+};
+
+void HeapRegionSeq::print() {
+  PrintHeapRegionClosure cl;
+  iterate(&cl);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp
@ -0,0 +1,111 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class HeapRegion;
+class HeapRegionClosure;
+
+class HeapRegionSeq: public CHeapObj {
+
+  // _regions is kept sorted by start address order, and no two regions are
+  // overlapping.
+  GrowableArray<HeapRegion*> _regions;
+
+  // The index in "_regions" at which to start the next allocation search.
+  // (For efficiency only; private to obj_allocate after initialization.)
+  int _alloc_search_start;
+
+  // Attempts to allocate a block of the (assumed humongous) word_size,
+  // starting at the region "ind".
+  HeapWord* alloc_obj_from_region_index(int ind, size_t word_size);
+
+  // Currently, we're choosing collection sets in a round-robin fashion,
+  // starting here.
+  int _next_rr_candidate;
+
+  // The bottom address of the bottom-most region, or else NULL if there
+  // are no regions in the sequence.
+  char* _seq_bottom;
+
+ public:
+  // Initializes "this" to the empty sequence of regions.
+  HeapRegionSeq();
+
+  // Adds "hr" to "this" sequence.  Requires "hr" not to overlap with
+  // any region already in "this".  (Will perform better if regions are
+  // inserted in ascending address order.)
+  void insert(HeapRegion* hr);
+
+  // Given a HeapRegion*, returns its index within _regions,
+  // or returns -1 if not found.
+  int find(HeapRegion* hr);
+
+  // Requires the index to be valid, and return the region at the index.
+  HeapRegion* at(size_t i) { return _regions.at((int)i); }
+
+  // Return the number of regions in the sequence.
+  size_t length();
+
+  // Returns the number of contiguous regions at the end of the sequence
+  // that are available for allocation.
+  size_t free_suffix();
+
+  // Requires "word_size" to be humongous (in the technical sense).  If
+  // possible, allocates a contiguous subsequence of the heap regions to
+  // satisfy the allocation, and returns the address of the beginning of
+  // that sequence, otherwise returns NULL.
+  HeapWord* obj_allocate(size_t word_size);
+
+  // Apply the "doHeapRegion" method of "blk" to all regions in "this",
+  // in address order, terminating the iteration early
+  // if the "doHeapRegion" method returns "true".
+  void iterate(HeapRegionClosure* blk);
+
+  // Apply the "doHeapRegion" method of "blk" to all regions in "this",
+  // starting at "r" (or first region, if "r" is NULL), in a circular
+  // manner, terminating the iteration early if the "doHeapRegion" method
+  // returns "true".
+  void iterate_from(HeapRegion* r, HeapRegionClosure* blk);
+
+  // As above, but start from a given index in the sequence
+  // instead of a given heap region.
+  void iterate_from(int idx, HeapRegionClosure* blk);
+
+  // Requires "shrink_bytes" to be a multiple of the page size and heap
+  // region granularity.  Deletes as many "rightmost" completely free heap
+  // regions from the sequence as comprise shrink_bytes bytes.  Returns the
+  // MemRegion indicating the region those regions comprised, and sets
+  // "num_regions_deleted" to the number of regions deleted.
+  MemRegion shrink_by(size_t shrink_bytes, size_t& num_regions_deleted);
+
+  // If "addr" falls within a region in the sequence, return that region,
+  // or else NULL.
+  HeapRegion* addr_to_region(const void* addr);
+
+  void print();
+
+  // Prints out runs of empty regions.  If the arg is "true" reserved
+  // (popular regions are considered "empty".
+  void print_empty_runs(bool reserved_are_empty);
+
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.inline.hpp
@ -0,0 +1,40 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+inline HeapRegion* HeapRegionSeq::addr_to_region(const void* addr) {
+  assert(_seq_bottom != NULL, "bad _seq_bottom in addr_to_region");
+  if ((char*) addr >= _seq_bottom) {
+    size_t diff = (size_t) pointer_delta((HeapWord*) addr,
+                                         (HeapWord*) _seq_bottom);
+    int index = (int) (diff >> HeapRegion::LogOfHRGrainWords);
+    assert(index >= 0, "invariant / paranoia");
+    if (index < _regions.length()) {
+      HeapRegion* hr = _regions.at(index);
+      assert(hr->is_in_reserved(addr),
+             "addr_to_region is wrong...");
+      return hr;
+    }
+  }
+  return NULL;
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.cpp
@ -0,0 +1,208 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_ptrQueue.cpp.incl"
+
+PtrQueue::PtrQueue(PtrQueueSet* qset_, bool perm) :
+  _qset(qset_), _buf(NULL), _index(0), _active(false),
+  _perm(perm), _lock(NULL)
+{}
+
+PtrQueue::~PtrQueue() {
+  if (!_perm && _buf != NULL) {
+    if (_index == _sz) {
+      // No work to do.
+      qset()->deallocate_buffer(_buf);
+    } else {
+      // We must NULL out the unused entries, then enqueue.
+      for (size_t i = 0; i < _index; i += oopSize) {
+        _buf[byte_index_to_index((int)i)] = NULL;
+      }
+      qset()->enqueue_complete_buffer(_buf);
+      _buf = NULL;
+    }
+  }
+}
+
+
+static int byte_index_to_index(int ind) {
+  assert((ind % oopSize) == 0, "Invariant.");
+  return ind / oopSize;
+}
+
+static int index_to_byte_index(int byte_ind) {
+  return byte_ind * oopSize;
+}
+
+void PtrQueue::enqueue_known_active(void* ptr) {
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+  assert(_index == 0 || _buf != NULL, "invariant");
+
+  while (_index == 0) {
+    handle_zero_index();
+  }
+  assert(_index > 0, "postcondition");
+
+  _index -= oopSize;
+  _buf[byte_index_to_index((int)_index)] = ptr;
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+}
+
+void PtrQueue::locking_enqueue_completed_buffer(void** buf) {
+  assert(_lock->owned_by_self(), "Required.");
+  _lock->unlock();
+  qset()->enqueue_complete_buffer(buf);
+  // We must relock only because the caller will unlock, for the normal
+  // case.
+  _lock->lock_without_safepoint_check();
+}
+
+
+PtrQueueSet::PtrQueueSet(bool notify_when_complete) :
+  _max_completed_queue(0),
+  _cbl_mon(NULL), _fl_lock(NULL),
+  _notify_when_complete(notify_when_complete),
+  _sz(0),
+  _completed_buffers_head(NULL),
+  _completed_buffers_tail(NULL),
+  _n_completed_buffers(0),
+  _process_completed_threshold(0), _process_completed(false),
+  _buf_free_list(NULL), _buf_free_list_sz(0)
+{}
+
+void** PtrQueueSet::allocate_buffer() {
+  assert(_sz > 0, "Didn't set a buffer size.");
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  if (_buf_free_list != NULL) {
+    void** res = _buf_free_list;
+    _buf_free_list = (void**)_buf_free_list[0];
+    _buf_free_list_sz--;
+    // Just override the next pointer with NULL, just in case we scan this part
+    // of the buffer.
+    res[0] = NULL;
+    return res;
+  } else {
+    return NEW_C_HEAP_ARRAY(void*, _sz);
+  }
+}
+
+void PtrQueueSet::deallocate_buffer(void** buf) {
+  assert(_sz > 0, "Didn't set a buffer size.");
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  buf[0] = (void*)_buf_free_list;
+  _buf_free_list = buf;
+  _buf_free_list_sz++;
+}
+
+void PtrQueueSet::reduce_free_list() {
+  // For now we'll adopt the strategy of deleting half.
+  MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
+  size_t n = _buf_free_list_sz / 2;
+  while (n > 0) {
+    assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong.");
+    void** head = _buf_free_list;
+    _buf_free_list = (void**)_buf_free_list[0];
+    FREE_C_HEAP_ARRAY(void*,head);
+    n--;
+  }
+}
+
+void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) {
+  // I use explicit locking here because there's a bailout in the middle.
+  _cbl_mon->lock_without_safepoint_check();
+
+  Thread* thread = Thread::current();
+  assert( ignore_max_completed ||
+          thread->is_Java_thread() ||
+          SafepointSynchronize::is_at_safepoint(),
+          "invariant" );
+  ignore_max_completed = ignore_max_completed || !thread->is_Java_thread();
+
+  if (!ignore_max_completed && _max_completed_queue > 0 &&
+      _n_completed_buffers >= (size_t) _max_completed_queue) {
+    _cbl_mon->unlock();
+    bool b = mut_process_buffer(buf);
+    if (b) {
+      deallocate_buffer(buf);
+      return;
+    }
+
+    // Otherwise, go ahead and enqueue the buffer.  Must reaquire the lock.
+    _cbl_mon->lock_without_safepoint_check();
+  }
+
+  // Here we still hold the _cbl_mon.
+  CompletedBufferNode* cbn = new CompletedBufferNode;
+  cbn->buf = buf;
+  cbn->next = NULL;
+  cbn->index = index;
+  if (_completed_buffers_tail == NULL) {
+    assert(_completed_buffers_head == NULL, "Well-formedness");
+    _completed_buffers_head = cbn;
+    _completed_buffers_tail = cbn;
+  } else {
+    _completed_buffers_tail->next = cbn;
+    _completed_buffers_tail = cbn;
+  }
+  _n_completed_buffers++;
+
+  if (!_process_completed &&
+      _n_completed_buffers == _process_completed_threshold) {
+    _process_completed = true;
+    if (_notify_when_complete)
+      _cbl_mon->notify_all();
+  }
+  debug_only(assert_completed_buffer_list_len_correct_locked());
+  _cbl_mon->unlock();
+}
+
+int PtrQueueSet::completed_buffers_list_length() {
+  int n = 0;
+  CompletedBufferNode* cbn = _completed_buffers_head;
+  while (cbn != NULL) {
+    n++;
+    cbn = cbn->next;
+  }
+  return n;
+}
+
+void PtrQueueSet::assert_completed_buffer_list_len_correct() {
+  MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+  assert_completed_buffer_list_len_correct_locked();
+}
+
+void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() {
+  guarantee((size_t)completed_buffers_list_length() ==  _n_completed_buffers,
+            "Completed buffer length is wrong.");
+}
+
+void PtrQueueSet::set_buffer_size(size_t sz) {
+  assert(_sz == 0 && sz > 0, "Should be called only once.");
+  _sz = sz * oopSize;
+}
+
+void PtrQueueSet::set_process_completed_threshold(size_t sz) {
+  _process_completed_threshold = sz;
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.hpp
@ -0,0 +1,229 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// There are various techniques that require threads to be able to log
+// addresses.  For example, a generational write barrier might log
+// the addresses of modified old-generation objects.  This type supports
+// this operation.
+
+class PtrQueueSet;
+
+class PtrQueue: public CHeapObj {
+
+protected:
+  // The ptr queue set to which this queue belongs.
+  PtrQueueSet* _qset;
+
+  // Whether updates should be logged.
+  bool _active;
+
+  // The buffer.
+  void** _buf;
+  // The index at which an object was last enqueued.  Starts at "_sz"
+  // (indicating an empty buffer) and goes towards zero.
+  size_t _index;
+
+  // The size of the buffer.
+  size_t _sz;
+
+  // If true, the queue is permanent, and doesn't need to deallocate
+  // its buffer in the destructor (since that obtains a lock which may not
+  // be legally locked by then.
+  bool _perm;
+
+  // If there is a lock associated with this buffer, this is that lock.
+  Mutex* _lock;
+
+  PtrQueueSet* qset() { return _qset; }
+
+public:
+  // Initialize this queue to contain a null buffer, and be part of the
+  // given PtrQueueSet.
+  PtrQueue(PtrQueueSet*, bool perm = false);
+  // Release any contained resources.
+  ~PtrQueue();
+
+  // Associate a lock with a ptr queue.
+  void set_lock(Mutex* lock) { _lock = lock; }
+
+  void reset() { if (_buf != NULL) _index = _sz; }
+
+  // Enqueues the given "obj".
+  void enqueue(void* ptr) {
+    if (!_active) return;
+    else enqueue_known_active(ptr);
+  }
+
+  inline void handle_zero_index();
+  void locking_enqueue_completed_buffer(void** buf);
+
+  void enqueue_known_active(void* ptr);
+
+  size_t size() {
+    assert(_sz >= _index, "Invariant.");
+    return _buf == NULL ? 0 : _sz - _index;
+  }
+
+  // Set the "active" property of the queue to "b".  An enqueue to an
+  // inactive thread is a no-op.  Setting a queue to inactive resets its
+  // log to the empty state.
+  void set_active(bool b) {
+    _active = b;
+    if (!b && _buf != NULL) {
+      _index = _sz;
+    } else if (b && _buf != NULL) {
+      assert(_index == _sz, "invariant: queues are empty when activated.");
+    }
+  }
+
+  static int byte_index_to_index(int ind) {
+    assert((ind % oopSize) == 0, "Invariant.");
+    return ind / oopSize;
+  }
+
+  static int index_to_byte_index(int byte_ind) {
+    return byte_ind * oopSize;
+  }
+
+  // To support compiler.
+  static ByteSize byte_offset_of_index() {
+    return byte_offset_of(PtrQueue, _index);
+  }
+  static ByteSize byte_width_of_index() { return in_ByteSize(sizeof(size_t)); }
+
+  static ByteSize byte_offset_of_buf() {
+    return byte_offset_of(PtrQueue, _buf);
+  }
+  static ByteSize byte_width_of_buf() { return in_ByteSize(sizeof(void*)); }
+
+  static ByteSize byte_offset_of_active() {
+    return byte_offset_of(PtrQueue, _active);
+  }
+  static ByteSize byte_width_of_active() { return in_ByteSize(sizeof(bool)); }
+
+};
+
+// A PtrQueueSet represents resources common to a set of pointer queues.
+// In particular, the individual queues allocate buffers from this shared
+// set, and return completed buffers to the set.
+// All these variables are are protected by the TLOQ_CBL_mon. XXX ???
+class PtrQueueSet: public CHeapObj {
+
+protected:
+
+  class CompletedBufferNode: public CHeapObj {
+  public:
+    void** buf;
+    size_t index;
+    CompletedBufferNode* next;
+    CompletedBufferNode() : buf(NULL),
+      index(0), next(NULL){ }
+  };
+
+  Monitor* _cbl_mon;  // Protects the fields below.
+  CompletedBufferNode* _completed_buffers_head;
+  CompletedBufferNode* _completed_buffers_tail;
+  size_t _n_completed_buffers;
+  size_t _process_completed_threshold;
+  volatile bool _process_completed;
+
+  // This (and the interpretation of the first element as a "next"
+  // pointer) are protected by the TLOQ_FL_lock.
+  Mutex* _fl_lock;
+  void** _buf_free_list;
+  size_t _buf_free_list_sz;
+
+  // The size of all buffers in the set.
+  size_t _sz;
+
+  bool _all_active;
+
+  // If true, notify_all on _cbl_mon when the threshold is reached.
+  bool _notify_when_complete;
+
+  // Maximum number of elements allowed on completed queue: after that,
+  // enqueuer does the work itself.  Zero indicates no maximum.
+  int _max_completed_queue;
+
+  int completed_buffers_list_length();
+  void assert_completed_buffer_list_len_correct_locked();
+  void assert_completed_buffer_list_len_correct();
+
+protected:
+  // A mutator thread does the the work of processing a buffer.
+  // Returns "true" iff the work is complete (and the buffer may be
+  // deallocated).
+  virtual bool mut_process_buffer(void** buf) {
+    ShouldNotReachHere();
+    return false;
+  }
+
+public:
+  // Create an empty ptr queue set.
+  PtrQueueSet(bool notify_when_complete = false);
+
+  // Because of init-order concerns, we can't pass these as constructor
+  // arguments.
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0) {
+    _max_completed_queue = max_completed_queue;
+    assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?");
+    _cbl_mon = cbl_mon; _fl_lock = fl_lock;
+  }
+
+  // Return an empty oop array of size _sz (required to be non-zero).
+  void** allocate_buffer();
+
+  // Return an empty buffer to the free list.  The "buf" argument is
+  // required to be a pointer to the head of an array of length "_sz".
+  void deallocate_buffer(void** buf);
+
+  // Declares that "buf" is a complete buffer.
+  void enqueue_complete_buffer(void** buf, size_t index = 0,
+                               bool ignore_max_completed = false);
+
+  bool completed_buffers_exist_dirty() {
+    return _n_completed_buffers > 0;
+  }
+
+  bool process_completed_buffers() { return _process_completed; }
+
+  bool active() { return _all_active; }
+
+  // Set the buffer size.  Should be called before any "enqueue" operation
+  // can be called.  And should only be called once.
+  void set_buffer_size(size_t sz);
+
+  // Get the buffer size.
+  size_t buffer_size() { return _sz; }
+
+  // Set the number of completed buffers that triggers log processing.
+  void set_process_completed_threshold(size_t sz);
+
+  // Must only be called at a safe point.  Indicates that the buffer free
+  // list size may be reduced, if that is deemed desirable.
+  void reduce_free_list();
+
+  size_t completed_buffers_num() { return _n_completed_buffers; }
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/ptrQueue.inline.hpp
@ -0,0 +1,41 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+void PtrQueue::handle_zero_index() {
+  assert(0 == _index, "Precondition.");
+  // This thread records the full buffer and allocates a new one (while
+  // holding the lock if there is one).
+  void** buf = _buf;
+  _buf = qset()->allocate_buffer();
+  _sz = qset()->buffer_size();
+  _index = _sz;
+  assert(0 <= _index && _index <= _sz, "Invariant.");
+  if (buf != NULL) {
+    if (_lock) {
+      locking_enqueue_completed_buffer(buf);
+    } else {
+      qset()->enqueue_complete_buffer(buf);
+    }
+  }
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.cpp
@ -0,0 +1,160 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_satbQueue.cpp.incl"
+
+void ObjPtrQueue::apply_closure(ObjectClosure* cl) {
+  if (_buf != NULL) {
+    apply_closure_to_buffer(cl, _buf, _index, _sz);
+    _index = _sz;
+  }
+}
+
+void ObjPtrQueue::apply_closure_to_buffer(ObjectClosure* cl,
+                                          void** buf, size_t index, size_t sz) {
+  if (cl == NULL) return;
+  for (size_t i = index; i < sz; i += oopSize) {
+    oop obj = (oop)buf[byte_index_to_index((int)i)];
+    // There can be NULL entries because of destructors.
+    if (obj != NULL) {
+      cl->do_object(obj);
+    }
+  }
+}
+#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif // _MSC_VER
+
+
+SATBMarkQueueSet::SATBMarkQueueSet() :
+  PtrQueueSet(),
+  _closure(NULL), _par_closures(NULL),
+  _shared_satb_queue(this, true /*perm*/)
+{}
+
+void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                                  int max_completed_queue,
+                                  Mutex* lock) {
+  PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue);
+  _shared_satb_queue.set_lock(lock);
+  if (ParallelGCThreads > 0) {
+    _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads);
+  }
+}
+
+
+void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
+  t->satb_mark_queue().handle_zero_index();
+}
+
+void SATBMarkQueueSet::set_active_all_threads(bool b) {
+  _all_active = b;
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().set_active(b);
+  }
+}
+
+void SATBMarkQueueSet::set_closure(ObjectClosure* closure) {
+  _closure = closure;
+}
+
+void SATBMarkQueueSet::set_par_closure(int i, ObjectClosure* par_closure) {
+  assert(ParallelGCThreads > 0 && _par_closures != NULL, "Precondition");
+  _par_closures[i] = par_closure;
+}
+
+void SATBMarkQueueSet::iterate_closure_all_threads() {
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().apply_closure(_closure);
+  }
+  shared_satb_queue()->apply_closure(_closure);
+}
+
+void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) {
+  SharedHeap* sh = SharedHeap::heap();
+  int parity = sh->strong_roots_parity();
+
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    if (t->claim_oops_do(true, parity)) {
+      t->satb_mark_queue().apply_closure(_par_closures[worker]);
+    }
+  }
+  // We'll have worker 0 do this one.
+  if (worker == 0) {
+    shared_satb_queue()->apply_closure(_par_closures[0]);
+  }
+}
+
+bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
+                                                              int worker) {
+  CompletedBufferNode* nd = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    if (_completed_buffers_head != NULL) {
+      nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL;
+      _n_completed_buffers--;
+      if (_n_completed_buffers == 0) _process_completed = false;
+    }
+  }
+  ObjectClosure* cl = (par ? _par_closures[worker] : _closure);
+  if (nd != NULL) {
+    ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz);
+    deallocate_buffer(nd->buf);
+    delete nd;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+void SATBMarkQueueSet::abandon_partial_marking() {
+  CompletedBufferNode* buffers_to_delete = NULL;
+  {
+    MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
+    while (_completed_buffers_head != NULL) {
+      CompletedBufferNode* nd = _completed_buffers_head;
+      _completed_buffers_head = nd->next;
+      nd->next = buffers_to_delete;
+      buffers_to_delete = nd;
+    }
+    _completed_buffers_tail = NULL;
+    _n_completed_buffers = 0;
+    debug_only(assert_completed_buffer_list_len_correct_locked());
+  }
+  while (buffers_to_delete != NULL) {
+    CompletedBufferNode* nd = buffers_to_delete;
+    buffers_to_delete = nd->next;
+    deallocate_buffer(nd->buf);
+    delete nd;
+  }
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  // So we can safely manipulate these queues.
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().reset();
+  }
+  shared_satb_queue()->reset();
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/satbQueue.hpp
@ -0,0 +1,105 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class ObjectClosure;
+class JavaThread;
+
+// A ptrQueue whose elements are "oops", pointers to object heads.
+class ObjPtrQueue: public PtrQueue {
+public:
+  ObjPtrQueue(PtrQueueSet* qset_, bool perm = false) :
+    PtrQueue(qset_, perm)
+  {}
+  // Apply the closure to all elements, and reset the index to make the
+  // buffer empty.
+  void apply_closure(ObjectClosure* cl);
+
+  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
+  static void apply_closure_to_buffer(ObjectClosure* cl,
+                                      void** buf, size_t index, size_t sz);
+
+};
+
+
+
+class SATBMarkQueueSet: public PtrQueueSet {
+  ObjectClosure* _closure;
+  ObjectClosure** _par_closures;  // One per ParGCThread.
+
+  ObjPtrQueue _shared_satb_queue;
+
+  // Utility function to support sequential and parallel versions.  If
+  // "par" is true, then "worker" is the par thread id; if "false", worker
+  // is ignored.
+  bool apply_closure_to_completed_buffer_work(bool par, int worker);
+
+
+public:
+  SATBMarkQueueSet();
+
+  void initialize(Monitor* cbl_mon, Mutex* fl_lock,
+                  int max_completed_queue = 0,
+                  Mutex* lock = NULL);
+
+  static void handle_zero_index_for_thread(JavaThread* t);
+
+  // Apply "set_active(b)" to all thread tloq's.  Should be called only
+  // with the world stopped.
+  void set_active_all_threads(bool b);
+
+  // Register "blk" as "the closure" for all queues.  Only one such closure
+  // is allowed.  The "apply_closure_to_completed_buffer" method will apply
+  // this closure to a completed buffer, and "iterate_closure_all_threads"
+  // applies it to partially-filled buffers (the latter should only be done
+  // with the world stopped).
+  void set_closure(ObjectClosure* closure);
+  // Set the parallel closures: pointer is an array of pointers to
+  // closures, one for each parallel GC thread.
+  void set_par_closure(int i, ObjectClosure* closure);
+
+  // If there is a registered closure for buffers, apply it to all entries
+  // in all currently-active buffers.  This should only be applied at a
+  // safepoint.  (Currently must not be called in parallel; this should
+  // change in the future.)
+  void iterate_closure_all_threads();
+  // Parallel version of the above.
+  void par_iterate_closure_all_threads(int worker);
+
+  // If there exists some completed buffer, pop it, then apply the
+  // registered closure to all its elements, and return true.  If no
+  // completed buffers exist, return false.
+  bool apply_closure_to_completed_buffer() {
+    return apply_closure_to_completed_buffer_work(false, 0);
+  }
+  // Parallel version of the above.
+  bool par_apply_closure_to_completed_buffer(int worker) {
+    return apply_closure_to_completed_buffer_work(true, worker);
+  }
+
+  ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; }
+
+  // If a marking is being abandoned, reset any unprocessed log buffers.
+  void abandon_partial_marking();
+
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.cpp
@ -0,0 +1,530 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_sparsePRT.cpp.incl"
+
+#define SPARSE_PRT_VERBOSE 0
+
+#define UNROLL_CARD_LOOPS 1
+
+void SparsePRT::init_iterator(SparsePRTIter* sprt_iter) {
+    sprt_iter->init(this);
+}
+
+void SparsePRTEntry::init(short region_ind) {
+  _region_ind = region_ind;
+  _next_index = NullEntry;
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  _cards[0] = NullEntry;
+  _cards[1] = NullEntry;
+  _cards[2] = NullEntry;
+  _cards[3] = NullEntry;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) _cards[i] = NullEntry;
+#endif
+}
+
+bool SparsePRTEntry::contains_card(short card_index) const {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  if (_cards[0] == card_index) return true;
+  if (_cards[1] == card_index) return true;
+  if (_cards[2] == card_index) return true;
+  if (_cards[3] == card_index) return true;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    if (_cards[i] == card_index) return true;
+  }
+#endif
+  // Otherwise, we're full.
+  return false;
+}
+
+int SparsePRTEntry::num_valid_cards() const {
+  int sum = 0;
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  if (_cards[0] != NullEntry) sum++;
+  if (_cards[1] != NullEntry) sum++;
+  if (_cards[2] != NullEntry) sum++;
+  if (_cards[3] != NullEntry) sum++;
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    if (_cards[i] != NulLEntry) sum++;
+  }
+#endif
+  // Otherwise, we're full.
+  return sum;
+}
+
+SparsePRTEntry::AddCardResult SparsePRTEntry::add_card(short card_index) {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  short c = _cards[0];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[0] = card_index; return added; }
+  c = _cards[1];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[1] = card_index; return added; }
+  c = _cards[2];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[2] = card_index; return added; }
+  c = _cards[3];
+  if (c == card_index) return found;
+  if (c == NullEntry) { _cards[3] = card_index; return added; }
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    short c = _cards[i];
+    if (c == card_index) return found;
+    if (c == NullEntry) { _cards[i] = card_index; return added; }
+  }
+#endif
+  // Otherwise, we're full.
+  return overflow;
+}
+
+void SparsePRTEntry::copy_cards(short* cards) const {
+#if UNROLL_CARD_LOOPS
+  assert(CardsPerEntry == 4, "Assumption.  If changes, un-unroll.");
+  cards[0] = _cards[0];
+  cards[1] = _cards[1];
+  cards[2] = _cards[2];
+  cards[3] = _cards[3];
+#else
+  for (int i = 0; i < CardsPerEntry; i++) {
+    cards[i] = _cards[i];
+  }
+#endif
+}
+
+void SparsePRTEntry::copy_cards(SparsePRTEntry* e) const {
+  copy_cards(&e->_cards[0]);
+}
+
+// ----------------------------------------------------------------------
+
+RSHashTable::RSHashTable(size_t capacity) :
+  _capacity(capacity), _capacity_mask(capacity-1),
+  _occupied_entries(0), _occupied_cards(0),
+  _entries(NEW_C_HEAP_ARRAY(SparsePRTEntry, capacity)),
+  _buckets(NEW_C_HEAP_ARRAY(short, capacity)),
+  _next_deleted(NULL), _deleted(false),
+  _free_list(NullEntry), _free_region(0)
+{
+  clear();
+}
+
+RSHashTable::~RSHashTable() {
+  if (_entries != NULL) {
+    FREE_C_HEAP_ARRAY(SparsePRTEntry, _entries);
+    _entries = NULL;
+  }
+  if (_buckets != NULL) {
+    FREE_C_HEAP_ARRAY(short, _buckets);
+    _buckets = NULL;
+  }
+}
+
+void RSHashTable::clear() {
+  _occupied_entries = 0;
+  _occupied_cards = 0;
+  guarantee(_entries != NULL, "INV");
+  guarantee(_buckets != NULL, "INV");
+  // This will put -1 == NullEntry in the key field of all entries.
+  memset(_entries, -1, _capacity * sizeof(SparsePRTEntry));
+  memset(_buckets, -1, _capacity * sizeof(short));
+  _free_list = NullEntry;
+  _free_region = 0;
+}
+
+bool RSHashTable::add_card(short region_ind, short card_index) {
+  SparsePRTEntry* e = entry_for_region_ind_create(region_ind);
+  assert(e != NULL && e->r_ind() == region_ind,
+         "Postcondition of call above.");
+  SparsePRTEntry::AddCardResult res = e->add_card(card_index);
+  if (res == SparsePRTEntry::added) _occupied_cards++;
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("       after add_card[%d]: valid-cards = %d.",
+                pointer_delta(e, _entries, sizeof(SparsePRTEntry)),
+                e->num_valid_cards());
+#endif
+  assert(e->num_valid_cards() > 0, "Postcondition");
+  return res != SparsePRTEntry::overflow;
+}
+
+bool RSHashTable::get_cards(short region_ind, short* cards) {
+  short ind = (short) (region_ind & capacity_mask());
+  short cur_ind = _buckets[ind];
+  SparsePRTEntry* cur;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    cur_ind = cur->next_index();
+  }
+
+  if (cur_ind == NullEntry) return false;
+  // Otherwise...
+  assert(cur->r_ind() == region_ind, "Postcondition of loop + test above.");
+  assert(cur->num_valid_cards() > 0, "Inv");
+  cur->copy_cards(cards);
+  return true;
+}
+
+bool RSHashTable::delete_entry(short region_ind) {
+  short ind = (short) (region_ind & capacity_mask());
+  short* prev_loc = &_buckets[ind];
+  short cur_ind = *prev_loc;
+  SparsePRTEntry* cur;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    prev_loc = cur->next_index_addr();
+    cur_ind = *prev_loc;
+  }
+
+  if (cur_ind == NullEntry) return false;
+  // Otherwise, splice out "cur".
+  *prev_loc = cur->next_index();
+  _occupied_cards -= cur->num_valid_cards();
+  free_entry(cur_ind);
+  _occupied_entries--;
+  return true;
+}
+
+SparsePRTEntry* RSHashTable::entry_for_region_ind(short region_ind) const {
+  assert(occupied_entries() < capacity(), "Precondition");
+  short ind = (short) (region_ind & capacity_mask());
+  short cur_ind = _buckets[ind];
+  SparsePRTEntry* cur;
+  // XXX
+  // int k = 0;
+  while (cur_ind != NullEntry &&
+         (cur = entry(cur_ind))->r_ind() != region_ind) {
+    /*
+    k++;
+    if (k > 10) {
+      gclog_or_tty->print_cr("RSHashTable::entry_for_region_ind(%d): "
+                    "k = %d, cur_ind = %d.", region_ind, k, cur_ind);
+      if (k >= 1000) {
+        while (1) ;
+      }
+    }
+    */
+    cur_ind = cur->next_index();
+  }
+
+  if (cur_ind != NullEntry) {
+    assert(cur->r_ind() == region_ind, "Loop postcondition + test");
+    return cur;
+  } else {
+    return NULL;
+  }
+}
+
+SparsePRTEntry* RSHashTable::entry_for_region_ind_create(short region_ind) {
+  SparsePRTEntry* res = entry_for_region_ind(region_ind);
+  if (res == NULL) {
+    short new_ind = alloc_entry();
+    assert(0 <= new_ind && (size_t)new_ind < capacity(), "There should be room.");
+    res = entry(new_ind);
+    res->init(region_ind);
+    // Insert at front.
+    short ind = (short) (region_ind & capacity_mask());
+    res->set_next_index(_buckets[ind]);
+    _buckets[ind] = new_ind;
+    _occupied_entries++;
+  }
+  return res;
+}
+
+short RSHashTable::alloc_entry() {
+  short res;
+  if (_free_list != NullEntry) {
+    res = _free_list;
+    _free_list = entry(res)->next_index();
+    return res;
+  } else if ((size_t) _free_region+1 < capacity()) {
+    res = _free_region;
+    _free_region++;
+    return res;
+  } else {
+    return NullEntry;
+  }
+}
+
+
+void RSHashTable::free_entry(short fi) {
+  entry(fi)->set_next_index(_free_list);
+  _free_list = fi;
+}
+
+
+void RSHashTable::add_entry(SparsePRTEntry* e) {
+  assert(e->num_valid_cards() > 0, "Precondition.");
+  SparsePRTEntry* e2 = entry_for_region_ind_create(e->r_ind());
+  e->copy_cards(e2);
+  _occupied_cards += e2->num_valid_cards();
+  assert(e2->num_valid_cards() > 0, "Postcondition.");
+}
+
+RSHashTable* RSHashTable::_head_deleted_list = NULL;
+
+void RSHashTable::add_to_deleted_list(RSHashTable* rsht) {
+  assert(!rsht->deleted(), "Should delete only once.");
+  rsht->set_deleted(true);
+  RSHashTable* hd = _head_deleted_list;
+  while (true) {
+    rsht->_next_deleted = hd;
+    RSHashTable* res =
+      (RSHashTable*)
+      Atomic::cmpxchg_ptr(rsht, &_head_deleted_list, hd);
+    if (res == hd) return;
+    else hd = res;
+  }
+}
+
+RSHashTable* RSHashTable::get_from_deleted_list() {
+  RSHashTable* hd = _head_deleted_list;
+  while (hd != NULL) {
+    RSHashTable* next = hd->next_deleted();
+    RSHashTable* res =
+      (RSHashTable*)
+      Atomic::cmpxchg_ptr(next, &_head_deleted_list, hd);
+    if (res == hd) {
+      hd->set_next_deleted(NULL);
+      hd->set_deleted(false);
+      return hd;
+    } else {
+      hd = res;
+    }
+  }
+  return NULL;
+}
+
+short /* RSHashTable:: */ RSHashTableIter::find_first_card_in_list() {
+  short res;
+  while (_bl_ind != RSHashTable::NullEntry) {
+    res = _rsht->entry(_bl_ind)->card(0);
+    if (res != SparsePRTEntry::NullEntry) {
+      return res;
+    } else {
+      _bl_ind = _rsht->entry(_bl_ind)->next_index();
+    }
+  }
+  // Otherwise, none found:
+  return SparsePRTEntry::NullEntry;
+}
+
+size_t /* RSHashTable:: */ RSHashTableIter::compute_card_ind(short ci) {
+  return
+    _heap_bot_card_ind
+    + (_rsht->entry(_bl_ind)->r_ind() * CardsPerRegion)
+    + ci;
+}
+
+bool /* RSHashTable:: */ RSHashTableIter::has_next(size_t& card_index) {
+  _card_ind++;
+  short ci;
+  if (_card_ind < SparsePRTEntry::CardsPerEntry &&
+      ((ci = _rsht->entry(_bl_ind)->card(_card_ind)) !=
+       SparsePRTEntry::NullEntry)) {
+    card_index = compute_card_ind(ci);
+    return true;
+  }
+  // Otherwise, must find the next valid entry.
+  _card_ind = 0;
+
+  if (_bl_ind != RSHashTable::NullEntry) {
+      _bl_ind = _rsht->entry(_bl_ind)->next_index();
+      ci = find_first_card_in_list();
+      if (ci != SparsePRTEntry::NullEntry) {
+        card_index = compute_card_ind(ci);
+        return true;
+      }
+  }
+  // If we didn't return above, must go to the next non-null table index.
+  _tbl_ind++;
+  while ((size_t)_tbl_ind < _rsht->capacity()) {
+    _bl_ind = _rsht->_buckets[_tbl_ind];
+    ci = find_first_card_in_list();
+    if (ci != SparsePRTEntry::NullEntry) {
+      card_index = compute_card_ind(ci);
+      return true;
+    }
+    // Otherwise, try next entry.
+    _tbl_ind++;
+  }
+  // Otherwise, there were no entry.
+  return false;
+}
+
+bool RSHashTable::contains_card(short region_index, short card_index) const {
+  SparsePRTEntry* e = entry_for_region_ind(region_index);
+  return (e != NULL && e->contains_card(card_index));
+}
+
+size_t RSHashTable::mem_size() const {
+  return sizeof(this) + capacity() * (sizeof(SparsePRTEntry) + sizeof(short));
+}
+
+
+// ----------------------------------------------------------------------
+
+SparsePRT* SparsePRT::_head_expanded_list = NULL;
+
+void SparsePRT::add_to_expanded_list(SparsePRT* sprt) {
+  // We could expand multiple times in a pause -- only put on list once.
+  if (sprt->expanded()) return;
+  sprt->set_expanded(true);
+  SparsePRT* hd = _head_expanded_list;
+  while (true) {
+    sprt->_next_expanded = hd;
+    SparsePRT* res =
+      (SparsePRT*)
+      Atomic::cmpxchg_ptr(sprt, &_head_expanded_list, hd);
+    if (res == hd) return;
+    else hd = res;
+  }
+}
+
+SparsePRT* SparsePRT::get_from_expanded_list() {
+  SparsePRT* hd = _head_expanded_list;
+  while (hd != NULL) {
+    SparsePRT* next = hd->next_expanded();
+    SparsePRT* res =
+      (SparsePRT*)
+      Atomic::cmpxchg_ptr(next, &_head_expanded_list, hd);
+    if (res == hd) {
+      hd->set_next_expanded(NULL);
+      return hd;
+    } else {
+      hd = res;
+    }
+  }
+  return NULL;
+}
+
+
+void SparsePRT::cleanup_all() {
+  // First clean up all expanded tables so they agree on next and cur.
+  SparsePRT* sprt = get_from_expanded_list();
+  while (sprt != NULL) {
+    sprt->cleanup();
+    sprt = get_from_expanded_list();
+  }
+  // Now delete all deleted RSHashTables.
+  RSHashTable* rsht = RSHashTable::get_from_deleted_list();
+  while (rsht != NULL) {
+#if SPARSE_PRT_VERBOSE
+    gclog_or_tty->print_cr("About to delete RSHT " PTR_FORMAT ".", rsht);
+#endif
+    delete rsht;
+    rsht = RSHashTable::get_from_deleted_list();
+  }
+}
+
+
+SparsePRT::SparsePRT(HeapRegion* hr) :
+  _expanded(false), _next_expanded(NULL)
+{
+  _cur = new RSHashTable(InitialCapacity);
+  _next = _cur;
+}
+
+SparsePRT::~SparsePRT() {
+  assert(_next != NULL && _cur != NULL, "Inv");
+  if (_cur != _next) { delete _cur; }
+  delete _next;
+}
+
+
+size_t SparsePRT::mem_size() const {
+  // We ignore "_cur" here, because it either = _next, or else it is
+  // on the deleted list.
+  return sizeof(this) + _next->mem_size();
+}
+
+bool SparsePRT::add_card(short region_id, short card_index) {
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("  Adding card %d from region %d to region %d sparse.",
+                card_index, region_id, _hr->hrs_index());
+#endif
+  if (_next->occupied_entries() * 2 > _next->capacity()) {
+    expand();
+  }
+  return _next->add_card(region_id, card_index);
+}
+
+bool SparsePRT::get_cards(short region_id, short* cards) {
+  return _next->get_cards(region_id, cards);
+}
+
+bool SparsePRT::delete_entry(short region_id) {
+  return _next->delete_entry(region_id);
+}
+
+void SparsePRT::clear() {
+  // If they differ, _next is bigger then cur, so next has no chance of
+  // being the initial size.
+  if (_next != _cur) {
+    delete _next;
+  }
+
+  if (_cur->capacity() != InitialCapacity) {
+    delete _cur;
+    _cur = new RSHashTable(InitialCapacity);
+  } else {
+    _cur->clear();
+  }
+  _next = _cur;
+}
+
+void SparsePRT::cleanup() {
+  // Make sure that the current and next tables agree.  (Another mechanism
+  // takes care of deleting now-unused tables.)
+  _cur = _next;
+}
+
+void SparsePRT::expand() {
+  RSHashTable* last = _next;
+  _next = new RSHashTable(last->capacity() * 2);
+
+#if SPARSE_PRT_VERBOSE
+  gclog_or_tty->print_cr("  Expanded sparse table for %d to %d.",
+                _hr->hrs_index(), _next->capacity());
+#endif
+  for (size_t i = 0; i < last->capacity(); i++) {
+    SparsePRTEntry* e = last->entry((int)i);
+    if (e->valid_entry()) {
+#if SPARSE_PRT_VERBOSE
+      gclog_or_tty->print_cr("    During expansion, transferred entry for %d.",
+                    e->r_ind());
+#endif
+      _next->add_entry(e);
+    }
+  }
+  if (last != _cur)
+    RSHashTable::add_to_deleted_list(last);
+  add_to_expanded_list(this);
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/sparsePRT.hpp
@ -0,0 +1,308 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Sparse remembered set for a heap region (the "owning" region).  Maps
+// indices of other regions to short sequences of cards in the other region
+// that might contain pointers into the owner region.
+
+// These tables only expand while they are accessed in parallel --
+// deletions may be done in single-threaded code.  This allows us to allow
+// unsynchronized reads/iterations, as long as expansions caused by
+// insertions only enqueue old versions for deletions, but do not delete
+// old versions synchronously.
+
+
+class SparsePRTEntry {
+public:
+  enum SomePublicConstants {
+    CardsPerEntry = (short)4,
+    NullEntry = (short)-1,
+    DeletedEntry = (short)-2
+  };
+
+private:
+  short _region_ind;
+  short _next_index;
+  short _cards[CardsPerEntry];
+
+public:
+
+  // Set the region_ind to the given value, and delete all cards.
+  inline void init(short region_ind);
+
+  short r_ind() const { return _region_ind; }
+  bool valid_entry() const { return r_ind() >= 0; }
+  void set_r_ind(short rind) { _region_ind = rind; }
+
+  short next_index() const { return _next_index; }
+  short* next_index_addr() { return &_next_index; }
+  void set_next_index(short ni) { _next_index = ni; }
+
+  // Returns "true" iff the entry contains the given card index.
+  inline bool contains_card(short card_index) const;
+
+  // Returns the number of non-NULL card entries.
+  inline int num_valid_cards() const;
+
+  // Requires that the entry not contain the given card index.  If there is
+  // space available, add the given card index to the entry and return
+  // "true"; otherwise, return "false" to indicate that the entry is full.
+  enum AddCardResult {
+    overflow,
+    found,
+    added
+  };
+  inline AddCardResult add_card(short card_index);
+
+  // Copy the current entry's cards into "cards".
+  inline void copy_cards(short* cards) const;
+  // Copy the current entry's cards into the "_card" array of "e."
+  inline void copy_cards(SparsePRTEntry* e) const;
+
+  inline short card(int i) const { return _cards[i]; }
+};
+
+
+class RSHashTable : public CHeapObj {
+
+  friend class RSHashTableIter;
+
+  enum SomePrivateConstants {
+    NullEntry = -1
+  };
+
+  size_t _capacity;
+  size_t _capacity_mask;
+  size_t _occupied_entries;
+  size_t _occupied_cards;
+
+  SparsePRTEntry* _entries;
+  short* _buckets;
+  short  _free_region;
+  short  _free_list;
+
+  static RSHashTable* _head_deleted_list;
+  RSHashTable* _next_deleted;
+  RSHashTable* next_deleted() { return _next_deleted; }
+  void set_next_deleted(RSHashTable* rsht) { _next_deleted = rsht; }
+  bool _deleted;
+  void set_deleted(bool b) { _deleted = b; }
+
+  // Requires that the caller hold a lock preventing parallel modifying
+  // operations, and that the the table be less than completely full.  If
+  // an entry for "region_ind" is already in the table, finds it and
+  // returns its address; otherwise returns "NULL."
+  SparsePRTEntry* entry_for_region_ind(short region_ind) const;
+
+  // Requires that the caller hold a lock preventing parallel modifying
+  // operations, and that the the table be less than completely full.  If
+  // an entry for "region_ind" is already in the table, finds it and
+  // returns its address; otherwise allocates, initializes, inserts and
+  // returns a new entry for "region_ind".
+  SparsePRTEntry* entry_for_region_ind_create(short region_ind);
+
+  // Returns the index of the next free entry in "_entries".
+  short alloc_entry();
+  // Declares the entry "fi" to be free.  (It must have already been
+  // deleted from any bucket lists.
+  void free_entry(short fi);
+
+public:
+  RSHashTable(size_t capacity);
+  ~RSHashTable();
+
+  // Attempts to ensure that the given card_index in the given region is in
+  // the sparse table.  If successful (because the card was already
+  // present, or because it was successfullly added) returns "true".
+  // Otherwise, returns "false" to indicate that the addition would
+  // overflow the entry for the region.  The caller must transfer these
+  // entries to a larger-capacity representation.
+  bool add_card(short region_id, short card_index);
+
+  bool get_cards(short region_id, short* cards);
+  bool delete_entry(short region_id);
+
+  bool contains_card(short region_id, short card_index) const;
+
+  void add_entry(SparsePRTEntry* e);
+
+  void clear();
+
+  size_t capacity() const      { return _capacity;       }
+  size_t capacity_mask() const { return _capacity_mask;  }
+  size_t occupied_entries() const { return _occupied_entries; }
+  size_t occupied_cards() const   { return _occupied_cards;   }
+  size_t mem_size() const;
+  bool deleted() { return _deleted; }
+
+  SparsePRTEntry* entry(int i) const { return &_entries[i]; }
+
+  void print();
+
+  static void add_to_deleted_list(RSHashTable* rsht);
+  static RSHashTable* get_from_deleted_list();
+
+
+};
+
+  // ValueObj because will be embedded in HRRS iterator.
+class RSHashTableIter: public CHeapObj {
+    short _tbl_ind;
+    short _bl_ind;
+    short _card_ind;
+    RSHashTable* _rsht;
+    size_t _heap_bot_card_ind;
+
+    enum SomePrivateConstants {
+      CardsPerRegion = HeapRegion::GrainBytes >> CardTableModRefBS::card_shift
+    };
+
+    // If the bucket list pointed to by _bl_ind contains a card, sets
+    // _bl_ind to the index of that entry, and returns the card.
+    // Otherwise, returns SparseEntry::NullEnty.
+    short find_first_card_in_list();
+    // Computes the proper card index for the card whose offset in the
+    // current region (as indicated by _bl_ind) is "ci".
+    // This is subject to errors when there is iteration concurrent with
+    // modification, but these errors should be benign.
+    size_t compute_card_ind(short ci);
+
+  public:
+    RSHashTableIter(size_t heap_bot_card_ind) :
+      _tbl_ind(RSHashTable::NullEntry),
+      _bl_ind(RSHashTable::NullEntry),
+      _card_ind((SparsePRTEntry::CardsPerEntry-1)),
+      _rsht(NULL),
+      _heap_bot_card_ind(heap_bot_card_ind)
+    {}
+
+    void init(RSHashTable* rsht) {
+      _rsht = rsht;
+      _tbl_ind = -1; // So that first increment gets to 0.
+      _bl_ind = RSHashTable::NullEntry;
+      _card_ind = (SparsePRTEntry::CardsPerEntry-1);
+    }
+
+    bool has_next(size_t& card_index);
+
+  };
+
+// Concurrent accesss to a SparsePRT must be serialized by some external
+// mutex.
+
+class SparsePRTIter;
+
+class SparsePRT : public CHeapObj {
+  //  Iterations are done on the _cur hash table, since they only need to
+  //  see entries visible at the start of a collection pause.
+  //  All other operations are done using the _next hash table.
+  RSHashTable* _cur;
+  RSHashTable* _next;
+
+  HeapRegion* _hr;
+
+  enum SomeAdditionalPrivateConstants {
+    InitialCapacity = 16
+  };
+
+  void expand();
+
+  bool _expanded;
+
+  bool expanded() { return _expanded; }
+  void set_expanded(bool b) { _expanded = b; }
+
+  SparsePRT* _next_expanded;
+
+  SparsePRT* next_expanded() { return _next_expanded; }
+  void set_next_expanded(SparsePRT* nxt) { _next_expanded = nxt; }
+
+
+  static SparsePRT* _head_expanded_list;
+
+public:
+  SparsePRT(HeapRegion* hr);
+
+  ~SparsePRT();
+
+  size_t occupied() const { return _next->occupied_cards(); }
+  size_t mem_size() const;
+
+  // Attempts to ensure that the given card_index in the given region is in
+  // the sparse table.  If successful (because the card was already
+  // present, or because it was successfullly added) returns "true".
+  // Otherwise, returns "false" to indicate that the addition would
+  // overflow the entry for the region.  The caller must transfer these
+  // entries to a larger-capacity representation.
+  bool add_card(short region_id, short card_index);
+
+  // If the table hold an entry for "region_ind",  Copies its
+  // cards into "cards", which must be an array of length at least
+  // "CardsPerEntry", and returns "true"; otherwise, returns "false".
+  bool get_cards(short region_ind, short* cards);
+
+  // If there is an entry for "region_ind", removes it and return "true";
+  // otherwise returns "false."
+  bool delete_entry(short region_ind);
+
+  // Clear the table, and reinitialize to initial capacity.
+  void clear();
+
+  // Ensure that "_cur" and "_next" point to the same table.
+  void cleanup();
+
+  // Clean up all tables on the expanded list.  Called single threaded.
+  static void cleanup_all();
+  RSHashTable* next() const { return _next; }
+
+
+  void init_iterator(SparsePRTIter* sprt_iter);
+
+  static void add_to_expanded_list(SparsePRT* sprt);
+  static SparsePRT* get_from_expanded_list();
+
+  bool contains_card(short region_id, short card_index) const {
+    return _next->contains_card(region_id, card_index);
+  }
+
+#if 0
+  void verify_is_cleared();
+  void print();
+#endif
+};
+
+
+class SparsePRTIter: public /* RSHashTable:: */RSHashTableIter {
+public:
+  SparsePRTIter(size_t heap_bot_card_ind) :
+    /* RSHashTable:: */RSHashTableIter(heap_bot_card_ind)
+  {}
+
+  void init(const SparsePRT* sprt) {
+    RSHashTableIter::init(sprt->next());
+  }
+  bool has_next(size_t& card_index) {
+    return RSHashTableIter::has_next(card_index);
+  }
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.cpp
@ -0,0 +1,264 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_survRateGroup.cpp.incl"
+
+SurvRateGroup::SurvRateGroup(G1CollectorPolicy* g1p,
+                             const char* name,
+                             size_t summary_surv_rates_len) :
+    _g1p(g1p), _name(name),
+    _all_regions_allocated(0),
+    _curr_length(0), _scan_only_prefix(0), _setup_seq_num(0),
+    _array_length(0), _surv_rate(NULL), _accum_surv_rate_pred(NULL),
+    _accum_surv_rate(0.0), _surv_rate_pred(NULL), _last_pred(0.0),
+    _summary_surv_rates_len(summary_surv_rates_len),
+    _summary_surv_rates_max_len(0),
+    _summary_surv_rates(NULL) {
+
+  // the following will set up the arrays with length 1
+  _curr_length = 1;
+  stop_adding_regions();
+  guarantee( _array_length == 1, "invariant" );
+  guarantee( _surv_rate_pred[0] != NULL, "invariant" );
+  _surv_rate_pred[0]->add(0.4);
+  all_surviving_words_recorded(false);
+  _curr_length = 0;
+
+  if (summary_surv_rates_len > 0) {
+    size_t length = summary_surv_rates_len;
+      _summary_surv_rates = NEW_C_HEAP_ARRAY(NumberSeq*, length);
+    if (_summary_surv_rates == NULL) {
+      vm_exit_out_of_memory(sizeof(NumberSeq*) * length,
+                            "Not enough space for surv rate summary");
+    }
+    for (size_t i = 0; i < length; ++i)
+      _summary_surv_rates[i] = new NumberSeq();
+  }
+
+  start_adding_regions();
+}
+
+void
+SurvRateGroup::start_adding_regions() {
+  _setup_seq_num   = _array_length;
+  _curr_length     = _scan_only_prefix;
+  _accum_surv_rate = 0.0;
+
+#if 0
+  gclog_or_tty->print_cr("start adding regions, seq num %d, length %d",
+                         _setup_seq_num, _curr_length);
+#endif // 0
+}
+
+void
+SurvRateGroup::stop_adding_regions() {
+  size_t length = _curr_length;
+
+#if 0
+  gclog_or_tty->print_cr("stop adding regions, length %d", length);
+#endif // 0
+
+  if (length > _array_length) {
+    double* old_surv_rate = _surv_rate;
+    double* old_accum_surv_rate_pred = _accum_surv_rate_pred;
+    TruncatedSeq** old_surv_rate_pred = _surv_rate_pred;
+
+    _surv_rate = NEW_C_HEAP_ARRAY(double, length);
+    if (_surv_rate == NULL) {
+      vm_exit_out_of_memory(sizeof(double) * length,
+                            "Not enough space for surv rate array.");
+    }
+    _accum_surv_rate_pred = NEW_C_HEAP_ARRAY(double, length);
+    if (_accum_surv_rate_pred == NULL) {
+      vm_exit_out_of_memory(sizeof(double) * length,
+                         "Not enough space for accum surv rate pred array.");
+    }
+    _surv_rate_pred = NEW_C_HEAP_ARRAY(TruncatedSeq*, length);
+    if (_surv_rate == NULL) {
+      vm_exit_out_of_memory(sizeof(TruncatedSeq*) * length,
+                            "Not enough space for surv rate pred array.");
+    }
+
+    for (size_t i = 0; i < _array_length; ++i)
+      _surv_rate_pred[i] = old_surv_rate_pred[i];
+
+#if 0
+    gclog_or_tty->print_cr("stop adding regions, new seqs %d to %d",
+                  _array_length, length - 1);
+#endif // 0
+
+    for (size_t i = _array_length; i < length; ++i) {
+      _surv_rate_pred[i] = new TruncatedSeq(10);
+      // _surv_rate_pred[i]->add(last_pred);
+    }
+
+    _array_length = length;
+
+    if (old_surv_rate != NULL)
+      FREE_C_HEAP_ARRAY(double, old_surv_rate);
+    if (old_accum_surv_rate_pred != NULL)
+      FREE_C_HEAP_ARRAY(double, old_accum_surv_rate_pred);
+    if (old_surv_rate_pred != NULL)
+      FREE_C_HEAP_ARRAY(NumberSeq*, old_surv_rate_pred);
+  }
+
+  for (size_t i = 0; i < _array_length; ++i)
+    _surv_rate[i] = 0.0;
+}
+
+double
+SurvRateGroup::accum_surv_rate(size_t adjustment) {
+  // we might relax this one in the future...
+  guarantee( adjustment == 0 || adjustment == 1, "pre-condition" );
+
+  double ret = _accum_surv_rate;
+  if (adjustment > 0) {
+    TruncatedSeq* seq = get_seq(_curr_length+1);
+    double surv_rate = _g1p->get_new_prediction(seq);
+    ret += surv_rate;
+  }
+
+  return ret;
+}
+
+int
+SurvRateGroup::next_age_index() {
+  TruncatedSeq* seq = get_seq(_curr_length);
+  double surv_rate = _g1p->get_new_prediction(seq);
+  _accum_surv_rate += surv_rate;
+
+  ++_curr_length;
+  return (int) ++_all_regions_allocated;
+}
+
+void
+SurvRateGroup::record_scan_only_prefix(size_t scan_only_prefix) {
+  guarantee( scan_only_prefix <= _curr_length, "pre-condition" );
+  _scan_only_prefix = scan_only_prefix;
+}
+
+void
+SurvRateGroup::record_surviving_words(int age_in_group, size_t surv_words) {
+  guarantee( 0 <= age_in_group && (size_t) age_in_group < _curr_length,
+             "pre-condition" );
+  guarantee( _surv_rate[age_in_group] <= 0.00001,
+             "should only update each slot once" );
+
+  double surv_rate = (double) surv_words / (double) HeapRegion::GrainWords;
+  _surv_rate[age_in_group] = surv_rate;
+  _surv_rate_pred[age_in_group]->add(surv_rate);
+  if ((size_t)age_in_group < _summary_surv_rates_len) {
+    _summary_surv_rates[age_in_group]->add(surv_rate);
+    if ((size_t)(age_in_group+1) > _summary_surv_rates_max_len)
+      _summary_surv_rates_max_len = age_in_group+1;
+  }
+}
+
+void
+SurvRateGroup::all_surviving_words_recorded(bool propagate) {
+  if (propagate && _curr_length > 0) { // conservative
+    double surv_rate = _surv_rate_pred[_curr_length-1]->last();
+
+#if 0
+    gclog_or_tty->print_cr("propagating %1.2lf from %d to %d",
+                  surv_rate, _curr_length, _array_length - 1);
+#endif // 0
+
+    for (size_t i = _curr_length; i < _array_length; ++i) {
+      guarantee( _surv_rate[i] <= 0.00001,
+                 "the slot should not have been updated" );
+      _surv_rate_pred[i]->add(surv_rate);
+    }
+  }
+
+  double accum = 0.0;
+  double pred = 0.0;
+  for (size_t i = 0; i < _array_length; ++i) {
+    pred = _g1p->get_new_prediction(_surv_rate_pred[i]);
+    if (pred > 1.0) pred = 1.0;
+    accum += pred;
+    _accum_surv_rate_pred[i] = accum;
+    // gclog_or_tty->print_cr("age %3d, accum %10.2lf", i, accum);
+  }
+  _last_pred = pred;
+}
+
+#ifndef PRODUCT
+void
+SurvRateGroup::print() {
+  gclog_or_tty->print_cr("Surv Rate Group: %s (%d entries, %d scan-only)",
+                _name, _curr_length, _scan_only_prefix);
+  for (size_t i = 0; i < _curr_length; ++i) {
+    gclog_or_tty->print_cr("    age %4d   surv rate %6.2lf %%   pred %6.2lf %%%s",
+                  i, _surv_rate[i] * 100.0,
+                  _g1p->get_new_prediction(_surv_rate_pred[i]) * 100.0,
+                  (i < _scan_only_prefix) ? " S-O" : "    ");
+  }
+}
+
+void
+SurvRateGroup::print_surv_rate_summary() {
+  size_t length = _summary_surv_rates_max_len;
+  if (length == 0)
+    return;
+
+  gclog_or_tty->print_cr("");
+  gclog_or_tty->print_cr("%s Rate Summary (for up to age %d)", _name, length-1);
+  gclog_or_tty->print_cr("      age range     survival rate (avg)      samples (avg)");
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+
+  size_t index = 0;
+  size_t limit = MIN2((int) length, 10);
+  while (index < limit) {
+    gclog_or_tty->print_cr("           %4d                 %6.2lf%%             %6.2lf",
+                  index, _summary_surv_rates[index]->avg() * 100.0,
+                  (double) _summary_surv_rates[index]->num());
+    ++index;
+  }
+
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+
+  int num = 0;
+  double sum = 0.0;
+  int samples = 0;
+  while (index < length) {
+    ++num;
+    sum += _summary_surv_rates[index]->avg() * 100.0;
+    samples += _summary_surv_rates[index]->num();
+    ++index;
+
+    if (index == length || num % 10 == 0) {
+      gclog_or_tty->print_cr("   %4d .. %4d                 %6.2lf%%             %6.2lf",
+                    (index-1) / 10 * 10, index-1, sum / (double) num,
+                    (double) samples / (double) num);
+      sum = 0.0;
+      num = 0;
+      samples = 0;
+    }
+  }
+
+  gclog_or_tty->print_cr("  ---------------------------------------------------------");
+}
+#endif // PRODUCT
--- a/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/survRateGroup.hpp
@ -0,0 +1,102 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class G1CollectorPolicy;
+
+class SurvRateGroup : public CHeapObj {
+private:
+  G1CollectorPolicy* _g1p;
+  const char* _name;
+
+  size_t  _array_length;
+  double* _surv_rate;
+  double* _accum_surv_rate_pred;
+  double  _last_pred;
+  double  _accum_surv_rate;
+  TruncatedSeq** _surv_rate_pred;
+  NumberSeq**    _summary_surv_rates;
+  size_t         _summary_surv_rates_len;
+  size_t         _summary_surv_rates_max_len;
+
+  int _all_regions_allocated;
+  size_t _curr_length;
+  size_t _scan_only_prefix;
+  size_t _setup_seq_num;
+
+public:
+  SurvRateGroup(G1CollectorPolicy* g1p,
+                const char* name,
+                size_t summary_surv_rates_len);
+  void start_adding_regions();
+  void stop_adding_regions();
+  void record_scan_only_prefix(size_t scan_only_prefix);
+  void record_surviving_words(int age_in_group, size_t surv_words);
+  void all_surviving_words_recorded(bool propagate);
+  const char* name() { return _name; }
+
+  size_t region_num() { return _curr_length; }
+  size_t scan_only_length() { return _scan_only_prefix; }
+  double accum_surv_rate_pred(int age) {
+    assert(age >= 0, "must be");
+    if ((size_t)age < _array_length)
+      return _accum_surv_rate_pred[age];
+    else {
+      double diff = (double) (age - _array_length + 1);
+      return _accum_surv_rate_pred[_array_length-1] + diff * _last_pred;
+    }
+  }
+
+  double accum_surv_rate(size_t adjustment);
+
+  TruncatedSeq* get_seq(size_t age) {
+    guarantee( 0 <= age, "pre-condition" );
+    if (age >= _setup_seq_num) {
+      guarantee( _setup_seq_num > 0, "invariant" );
+      age = _setup_seq_num-1;
+    }
+    TruncatedSeq* seq = _surv_rate_pred[age];
+    guarantee( seq != NULL, "invariant" );
+    return seq;
+  }
+
+  int next_age_index();
+  int age_in_group(int age_index) {
+    int ret = (int) (_all_regions_allocated -  age_index);
+    assert( ret >= 0, "invariant" );
+    return ret;
+  }
+  int recalculate_age_index(int age_index) {
+    int new_age_index = (int) _scan_only_prefix - age_in_group(age_index);
+    guarantee( new_age_index >= 0, "invariant" );
+    return new_age_index;
+  }
+  void finished_recalculating_age_indexes() {
+    _all_regions_allocated = (int) _scan_only_prefix;
+  }
+
+#ifndef PRODUCT
+  void print();
+  void print_surv_rate_summary();
+#endif // PRODUCT
+};
--- a/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp
@ -0,0 +1,79 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_vm_operations_g1.cpp.incl"
+
+void VM_G1CollectForAllocation::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  _res = g1h->satisfy_failed_allocation(_size);
+  assert(g1h->is_in_or_null(_res), "result not in heap");
+}
+
+void VM_G1CollectFull::doit() {
+  JvmtiGCFullMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  GCCauseSetter x(g1h, _gc_cause);
+  g1h->do_full_collection(false /* clear_all_soft_refs */);
+}
+
+void VM_G1IncCollectionPause::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  GCCauseSetter x(g1h, GCCause::_g1_inc_collection_pause);
+  g1h->do_collection_pause_at_safepoint(NULL);
+}
+
+void VM_G1PopRegionCollectionPause::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  g1h->do_collection_pause_at_safepoint(_pop_region);
+}
+
+
+void VM_CGC_Operation::doit() {
+  gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps);
+  TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
+  TraceTime t(_printGCMessage, PrintGC, true, gclog_or_tty);
+  SharedHeap* sh = SharedHeap::heap();
+  // This could go away if CollectedHeap gave access to _gc_is_active...
+  if (sh != NULL) {
+    IsGCActiveMark x;
+    _cl->do_void();
+  } else {
+    _cl->do_void();
+  }
+}
+
+bool VM_CGC_Operation::doit_prologue() {
+  Heap_lock->lock();
+  SharedHeap::heap()->_thread_holds_heap_lock_for_gc = true;
+  return true;
+}
+
+void VM_CGC_Operation::doit_epilogue() {
+  SharedHeap::heap()->_thread_holds_heap_lock_for_gc = false;
+  Heap_lock->unlock();
+}
--- a/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp
+++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp
@ -0,0 +1,114 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// VM_operations for the G1 collector.
+// VM_GC_Operation:
+//   - VM_CGC_Operation
+//   - VM_G1CollectFull
+//   - VM_G1CollectForAllocation
+//   - VM_G1IncCollectionPause
+//   - VM_G1PopRegionCollectionPause
+
+class VM_G1CollectFull: public VM_GC_Operation {
+ private:
+ public:
+  VM_G1CollectFull(int gc_count_before,
+                   GCCause::Cause gc_cause)
+    : VM_GC_Operation(gc_count_before)
+  {
+    _gc_cause = gc_cause;
+  }
+  ~VM_G1CollectFull() {}
+  virtual VMOp_Type type() const { return VMOp_G1CollectFull; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "full garbage-first collection";
+  }
+};
+
+class VM_G1CollectForAllocation: public VM_GC_Operation {
+ private:
+  HeapWord*   _res;
+  size_t      _size;                       // size of object to be allocated
+ public:
+  VM_G1CollectForAllocation(size_t size, int gc_count_before)
+    : VM_GC_Operation(gc_count_before) {
+    _size        = size;
+    _res         = NULL;
+  }
+  ~VM_G1CollectForAllocation()        {}
+  virtual VMOp_Type type() const { return VMOp_G1CollectForAllocation; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first collection to satisfy allocation";
+  }
+  HeapWord* result() { return _res; }
+};
+
+class VM_G1IncCollectionPause: public VM_GC_Operation {
+ public:
+  VM_G1IncCollectionPause(int gc_count_before) :
+    VM_GC_Operation(gc_count_before) {}
+  virtual VMOp_Type type() const { return VMOp_G1IncCollectionPause; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first incremental collection pause";
+  }
+};
+
+class VM_G1PopRegionCollectionPause: public VM_GC_Operation {
+  HeapRegion* _pop_region;
+ public:
+  VM_G1PopRegionCollectionPause(int gc_count_before, HeapRegion* pop_region) :
+    VM_GC_Operation(gc_count_before),
+    _pop_region(pop_region)
+  {}
+  virtual VMOp_Type type() const { return VMOp_G1PopRegionCollectionPause; }
+  virtual void doit();
+  virtual const char* name() const {
+    return "garbage-first popular region collection pause";
+  }
+};
+
+// Concurrent GC stop-the-world operations such as initial and final mark;
+// consider sharing these with CMS's counterparts.
+class VM_CGC_Operation: public VM_Operation {
+  VoidClosure* _cl;
+  const char* _printGCMessage;
+ public:
+  VM_CGC_Operation(VoidClosure* cl, const char *printGCMsg) :
+    _cl(cl),
+    _printGCMessage(printGCMsg)
+    {}
+
+  ~VM_CGC_Operation() {}
+
+  virtual VMOp_Type type() const { return VMOp_CGC_Operation; }
+  virtual void doit();
+  virtual bool doit_prologue();
+  virtual void doit_epilogue();
+  virtual const char* name() const {
+    return "concurrent gc";
+  }
+};
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep
@ -125,17 +125,6 @@ compactibleFreeListSpace.hpp            space.hpp

 compactingPermGenGen.cpp                concurrentMarkSweepGeneration.inline.hpp

-concurrentGCThread.cpp                  concurrentGCThread.hpp
-concurrentGCThread.cpp                  init.hpp
-concurrentGCThread.cpp                  instanceRefKlass.hpp
-concurrentGCThread.cpp                  interfaceSupport.hpp
-concurrentGCThread.cpp                  java.hpp
-concurrentGCThread.cpp                  javaCalls.hpp
-concurrentGCThread.cpp                  oop.inline.hpp
-concurrentGCThread.cpp                  systemDictionary.hpp
-
-concurrentGCThread.hpp                  thread.hpp
-
 concurrentMarkSweepGeneration.cpp       cardTableRS.hpp
 concurrentMarkSweepGeneration.cpp       cmsAdaptiveSizePolicy.hpp
 concurrentMarkSweepGeneration.cpp       cmsCollectorPolicy.hpp
@ -167,7 +156,7 @@ concurrentMarkSweepGeneration.cpp       systemDictionary.hpp
 concurrentMarkSweepGeneration.cpp       vmCMSOperations.hpp
 concurrentMarkSweepGeneration.cpp       vmThread.hpp

-concurrentMarkSweepGeneration.hpp       bitMap.hpp
+concurrentMarkSweepGeneration.hpp       bitMap.inline.hpp
 concurrentMarkSweepGeneration.hpp       freeBlockDictionary.hpp
 concurrentMarkSweepGeneration.hpp       gSpaceCounters.hpp
 concurrentMarkSweepGeneration.hpp       gcStats.hpp
--- a/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1
+++ b/hotspot/src/share/vm/gc_implementation/includeDB_gc_g1
@ -0,0 +1,351 @@
+//
+// Copyright 2004-2006 Sun Microsystems, Inc.  All Rights Reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+// CA 95054 USA or visit www.sun.com if you need additional information or
+// have any questions.
+//
+//
+
+// NOTE: DO NOT CHANGE THIS COPYRIGHT TO NEW STYLE - IT WILL BREAK makeDeps!
+
+bufferingOopClosure.hpp			genOopClosures.hpp
+bufferingOopClosure.hpp			generation.hpp
+bufferingOopClosure.hpp			os.hpp
+
+cardTableRS.cpp				concurrentMark.hpp
+cardTableRS.cpp				g1SATBCardTableModRefBS.hpp
+
+collectionSetChooser.cpp		g1CollectedHeap.hpp
+collectionSetChooser.cpp		g1CollectorPolicy.hpp
+collectionSetChooser.cpp		collectionSetChooser.hpp
+
+collectionSetChooser.hpp		heapRegion.hpp
+collectionSetChooser.hpp                growableArray.hpp
+
+concurrentG1Refine.cpp			atomic.hpp
+concurrentG1Refine.cpp			concurrentG1Refine.hpp
+concurrentG1Refine.cpp			concurrentG1RefineThread.hpp
+concurrentG1Refine.cpp			copy.hpp
+concurrentG1Refine.cpp			g1CollectedHeap.hpp
+concurrentG1Refine.cpp			g1RemSet.hpp
+
+concurrentG1Refine.hpp			globalDefinitions.hpp
+
+concurrentG1RefineThread.cpp		concurrentG1Refine.hpp
+concurrentG1RefineThread.cpp		concurrentG1RefineThread.hpp
+concurrentG1RefineThread.cpp		g1CollectedHeap.hpp
+concurrentG1RefineThread.cpp            g1CollectorPolicy.hpp
+concurrentG1RefineThread.cpp		handles.inline.hpp
+concurrentG1RefineThread.cpp		mutexLocker.hpp
+concurrentG1RefineThread.cpp		resourceArea.hpp
+
+concurrentG1RefineThread.hpp		concurrentGCThread.hpp
+concurrentG1RefineThread.hpp		coTracker.hpp
+
+concurrentMark.cpp			concurrentMark.hpp
+concurrentMark.cpp			concurrentMarkThread.inline.hpp
+concurrentMark.cpp			g1CollectedHeap.inline.hpp
+concurrentMark.cpp                      g1CollectorPolicy.hpp
+concurrentMark.cpp                      g1RemSet.hpp
+concurrentMark.cpp		        gcOverheadReporter.hpp
+concurrentMark.cpp		        genOopClosures.inline.hpp
+concurrentMark.cpp                      heapRegionRemSet.hpp
+concurrentMark.cpp                      heapRegionSeq.inline.hpp
+concurrentMark.cpp                      handles.inline.hpp
+concurrentMark.cpp			java.hpp
+concurrentMark.cpp			oop.inline.hpp
+concurrentMark.cpp                      referencePolicy.hpp
+concurrentMark.cpp			resourceArea.hpp
+concurrentMark.cpp			symbolTable.hpp
+
+concurrentMark.hpp			coTracker.hpp
+concurrentMark.hpp			heapRegion.hpp
+concurrentMark.hpp			taskqueue.hpp
+
+concurrentMarkThread.cpp		concurrentMarkThread.inline.hpp
+concurrentMarkThread.cpp		g1CollectedHeap.inline.hpp
+concurrentMarkThread.cpp		g1CollectorPolicy.hpp
+concurrentMarkThread.cpp                g1MMUTracker.hpp
+concurrentMarkThread.cpp		resourceArea.hpp
+concurrentMarkThread.cpp		vm_operations_g1.hpp
+concurrentMarkThread.cpp                vmThread.hpp
+
+concurrentMarkThread.hpp		concurrentGCThread.hpp
+
+concurrentMarkThread.inline.hpp		concurrentMark.hpp
+concurrentMarkThread.inline.hpp		concurrentMarkThread.hpp
+
+concurrentZFThread.cpp			concurrentZFThread.hpp
+concurrentZFThread.cpp			heapRegion.hpp
+concurrentZFThread.cpp			g1CollectedHeap.inline.hpp
+concurrentZFThread.cpp			copy.hpp
+concurrentZFThread.cpp			mutexLocker.hpp
+concurrentZFThread.cpp			space.inline.hpp
+
+concurrentZFThread.hpp			concurrentGCThread.hpp
+concurrentZFThread.hpp			coTracker.hpp
+
+dirtyCardQueue.cpp                      atomic.hpp
+dirtyCardQueue.cpp                      dirtyCardQueue.hpp
+dirtyCardQueue.cpp			heapRegionRemSet.hpp
+dirtyCardQueue.cpp                      mutexLocker.hpp
+dirtyCardQueue.cpp                      ptrQueue.inline.hpp
+dirtyCardQueue.cpp                      safepoint.hpp
+dirtyCardQueue.cpp                      thread.hpp
+dirtyCardQueue.cpp                      thread_<os_family>.inline.hpp
+dirtyCardQueue.cpp                      workgroup.hpp
+
+dirtyCardQueue.hpp                      allocation.hpp
+dirtyCardQueue.hpp                      ptrQueue.hpp
+
+g1BlockOffsetTable.cpp			g1BlockOffsetTable.inline.hpp
+g1BlockOffsetTable.cpp			java.hpp
+g1BlockOffsetTable.cpp			oop.inline.hpp
+g1BlockOffsetTable.cpp			space.hpp
+
+g1BlockOffsetTable.hpp			globalDefinitions.hpp
+g1BlockOffsetTable.hpp			memRegion.hpp
+g1BlockOffsetTable.hpp			virtualspace.hpp
+
+g1BlockOffsetTable.inline.hpp		g1BlockOffsetTable.hpp
+g1BlockOffsetTable.inline.hpp		space.hpp
+
+g1CollectedHeap.cpp                     aprofiler.hpp
+g1CollectedHeap.cpp                     bufferingOopClosure.hpp
+g1CollectedHeap.cpp                     concurrentG1Refine.hpp
+g1CollectedHeap.cpp                     concurrentG1RefineThread.hpp
+g1CollectedHeap.cpp			concurrentMarkThread.inline.hpp
+g1CollectedHeap.cpp                     concurrentZFThread.hpp
+g1CollectedHeap.cpp                     g1CollectedHeap.inline.hpp
+g1CollectedHeap.cpp                     g1CollectorPolicy.hpp
+g1CollectedHeap.cpp                     g1MarkSweep.hpp
+g1CollectedHeap.cpp                     g1RemSet.hpp
+g1CollectedHeap.cpp                     g1OopClosures.inline.hpp
+g1CollectedHeap.cpp                     genOopClosures.inline.hpp
+g1CollectedHeap.cpp                     gcLocker.inline.hpp
+g1CollectedHeap.cpp                     gcOverheadReporter.hpp
+g1CollectedHeap.cpp                     generationSpec.hpp
+g1CollectedHeap.cpp                     heapRegionRemSet.hpp
+g1CollectedHeap.cpp                     heapRegionSeq.inline.hpp
+g1CollectedHeap.cpp                     icBuffer.hpp
+g1CollectedHeap.cpp                     isGCActiveMark.hpp
+g1CollectedHeap.cpp			oop.inline.hpp
+g1CollectedHeap.cpp			oop.pcgc.inline.hpp
+g1CollectedHeap.cpp			parGCAllocBuffer.hpp
+g1CollectedHeap.cpp                     vm_operations_g1.hpp
+g1CollectedHeap.cpp                     vmThread.hpp
+
+g1CollectedHeap.hpp                     barrierSet.hpp
+g1CollectedHeap.hpp                     heapRegion.hpp
+g1CollectedHeap.hpp                     memRegion.hpp
+g1CollectedHeap.hpp                     sharedHeap.hpp
+
+g1CollectedHeap.inline.hpp              concurrentMark.hpp
+g1CollectedHeap.inline.hpp              g1CollectedHeap.hpp
+g1CollectedHeap.inline.hpp              heapRegionSeq.hpp
+g1CollectedHeap.inline.hpp		taskqueue.hpp
+
+g1CollectorPolicy.cpp			concurrentG1Refine.hpp
+g1CollectorPolicy.cpp			concurrentMark.hpp
+g1CollectorPolicy.cpp			concurrentMarkThread.inline.hpp
+g1CollectorPolicy.cpp			debug.hpp
+g1CollectorPolicy.cpp			java.hpp
+g1CollectorPolicy.cpp                   g1CollectedHeap.hpp
+g1CollectorPolicy.cpp                   g1CollectorPolicy.hpp
+g1CollectorPolicy.cpp                   heapRegionRemSet.hpp
+g1CollectorPolicy.cpp			mutexLocker.hpp
+
+g1CollectorPolicy.hpp                   collectorPolicy.hpp
+g1CollectorPolicy.hpp                   collectionSetChooser.hpp
+g1CollectorPolicy.hpp			g1MMUTracker.hpp
+
+g1_globals.cpp				g1_globals.hpp
+
+g1_globals.hpp                          globals.hpp
+
+globals.cpp                             g1_globals.hpp
+top.hpp                                 g1_globals.hpp
+
+g1MarkSweep.cpp                         aprofiler.hpp
+g1MarkSweep.cpp                         biasedLocking.hpp
+g1MarkSweep.cpp                         codeCache.hpp
+g1MarkSweep.cpp                         events.hpp
+g1MarkSweep.cpp                         fprofiler.hpp
+g1MarkSweep.hpp                         g1CollectedHeap.hpp
+g1MarkSweep.cpp                         g1MarkSweep.hpp
+g1MarkSweep.cpp                         gcLocker.hpp
+g1MarkSweep.cpp                         genCollectedHeap.hpp
+g1MarkSweep.hpp                         heapRegion.hpp
+g1MarkSweep.cpp                         icBuffer.hpp
+g1MarkSweep.cpp                         instanceRefKlass.hpp
+g1MarkSweep.cpp                         javaClasses.hpp
+g1MarkSweep.cpp				jvmtiExport.hpp
+g1MarkSweep.cpp                         copy.hpp
+g1MarkSweep.cpp                         modRefBarrierSet.hpp
+g1MarkSweep.cpp                         oop.inline.hpp
+g1MarkSweep.cpp                         referencePolicy.hpp
+g1MarkSweep.cpp                         space.hpp
+g1MarkSweep.cpp                         symbolTable.hpp
+g1MarkSweep.cpp                         synchronizer.hpp
+g1MarkSweep.cpp                         systemDictionary.hpp
+g1MarkSweep.cpp                         thread.hpp
+g1MarkSweep.cpp                         vmSymbols.hpp
+g1MarkSweep.cpp                         vmThread.hpp
+
+g1MarkSweep.hpp                         generation.hpp
+g1MarkSweep.hpp                         growableArray.hpp
+g1MarkSweep.hpp                         markOop.hpp
+g1MarkSweep.hpp                         genMarkSweep.hpp
+g1MarkSweep.hpp                         oop.hpp
+g1MarkSweep.hpp                         timer.hpp
+g1MarkSweep.hpp                         universe.hpp
+
+g1OopClosures.inline.hpp		concurrentMark.hpp
+g1OopClosures.inline.hpp		g1OopClosures.hpp
+g1OopClosures.inline.hpp		g1CollectedHeap.hpp
+g1OopClosures.inline.hpp		g1RemSet.hpp
+
+g1MMUTracker.cpp			g1MMUTracker.hpp
+g1MMUTracker.cpp			ostream.hpp
+g1MMUTracker.cpp			mutexLocker.hpp
+
+g1MMUTracker.hpp			debug.hpp
+
+g1RemSet.cpp				bufferingOopClosure.hpp
+g1RemSet.cpp				concurrentG1Refine.hpp
+g1RemSet.cpp				concurrentG1RefineThread.hpp
+g1RemSet.cpp				g1BlockOffsetTable.inline.hpp
+g1RemSet.cpp				g1CollectedHeap.inline.hpp
+g1RemSet.cpp				g1CollectorPolicy.hpp
+g1RemSet.cpp				g1RemSet.inline.hpp
+g1RemSet.cpp				g1OopClosures.inline.hpp
+g1RemSet.cpp				heapRegionSeq.inline.hpp
+g1RemSet.cpp				intHisto.hpp
+g1RemSet.cpp				iterator.hpp
+g1RemSet.cpp				oop.inline.hpp
+
+g1RemSet.inline.hpp			g1RemSet.hpp
+g1RemSet.inline.hpp			heapRegionRemSet.hpp
+
+g1SATBCardTableModRefBS.cpp		g1SATBCardTableModRefBS.hpp
+g1SATBCardTableModRefBS.cpp		heapRegion.hpp
+g1SATBCardTableModRefBS.cpp		mutexLocker.hpp
+g1SATBCardTableModRefBS.cpp		thread.hpp
+g1SATBCardTableModRefBS.cpp		thread_<os_family>.inline.hpp
+g1SATBCardTableModRefBS.cpp		satbQueue.hpp
+
+g1SATBCardTableModRefBS.hpp		cardTableModRefBS.hpp
+g1SATBCardTableModRefBS.hpp		memRegion.hpp
+
+heapRegion.cpp                          concurrentZFThread.hpp
+heapRegion.cpp                          g1BlockOffsetTable.inline.hpp
+heapRegion.cpp                          g1CollectedHeap.inline.hpp
+heapRegion.cpp                          g1OopClosures.inline.hpp
+heapRegion.cpp                          genOopClosures.inline.hpp
+heapRegion.cpp                          heapRegion.inline.hpp
+heapRegion.cpp                          heapRegionRemSet.hpp
+heapRegion.cpp                          heapRegionSeq.inline.hpp
+heapRegion.cpp                          iterator.hpp
+heapRegion.cpp                          oop.inline.hpp
+
+heapRegion.hpp                          space.hpp
+heapRegion.hpp                          spaceDecorator.hpp
+heapRegion.hpp                          g1BlockOffsetTable.inline.hpp
+heapRegion.hpp                          watermark.hpp
+heapRegion.hpp				g1_specialized_oop_closures.hpp
+heapRegion.hpp				survRateGroup.hpp
+
+heapRegionRemSet.hpp			sparsePRT.hpp
+
+heapRegionRemSet.cpp                    allocation.hpp
+heapRegionRemSet.cpp                    bitMap.inline.hpp
+heapRegionRemSet.cpp                    g1BlockOffsetTable.inline.hpp
+heapRegionRemSet.cpp                    g1CollectedHeap.inline.hpp
+heapRegionRemSet.cpp                    heapRegionRemSet.hpp
+heapRegionRemSet.cpp			heapRegionSeq.inline.hpp
+heapRegionRemSet.cpp                    globalDefinitions.hpp
+heapRegionRemSet.cpp                    space.inline.hpp
+
+heapRegionSeq.cpp                       allocation.hpp
+heapRegionSeq.cpp                       g1CollectedHeap.hpp
+heapRegionSeq.cpp                       heapRegionSeq.hpp
+
+heapRegionSeq.hpp                       growableArray.hpp
+heapRegionSeq.hpp                       heapRegion.hpp
+
+heapRegionSeq.inline.hpp                heapRegionSeq.hpp
+
+klass.hpp				g1OopClosures.hpp
+
+ptrQueue.cpp                            allocation.hpp
+ptrQueue.cpp                            allocation.inline.hpp
+ptrQueue.cpp                            mutex.hpp
+ptrQueue.cpp                            mutexLocker.hpp
+ptrQueue.cpp                            ptrQueue.hpp
+ptrQueue.cpp                            ptrQueue.inline.hpp
+ptrQueue.cpp                            thread_<os_family>.inline.hpp
+
+ptrQueue.hpp                            allocation.hpp
+ptrQueue.hpp                            sizes.hpp
+
+ptrQueue.inline.hpp                     ptrQueue.hpp
+
+satbQueue.cpp                           allocation.inline.hpp
+satbQueue.cpp                           mutexLocker.hpp
+satbQueue.cpp                           ptrQueue.inline.hpp
+satbQueue.cpp                           satbQueue.hpp
+satbQueue.cpp                           sharedHeap.hpp
+satbQueue.cpp                           thread.hpp
+
+satbQueue.hpp                           ptrQueue.hpp
+
+sparsePRT.cpp				allocation.inline.hpp
+sparsePRT.cpp				cardTableModRefBS.hpp
+sparsePRT.cpp				heapRegion.hpp
+sparsePRT.cpp				heapRegionRemSet.hpp
+sparsePRT.cpp				mutexLocker.hpp
+sparsePRT.cpp				sparsePRT.hpp
+sparsePRT.cpp				space.inline.hpp
+
+sparsePRT.hpp				allocation.hpp
+sparsePRT.hpp				cardTableModRefBS.hpp
+sparsePRT.hpp				globalDefinitions.hpp
+sparsePRT.hpp				heapRegion.hpp
+sparsePRT.hpp				mutex.hpp
+
+specialized_oop_closures.hpp		g1_specialized_oop_closures.hpp
+
+survRateGroup.hpp			numberSeq.hpp
+
+survRateGroup.cpp			allocation.hpp
+survRateGroup.cpp			g1CollectedHeap.hpp
+survRateGroup.cpp			g1CollectorPolicy.hpp
+survRateGroup.cpp			heapRegion.hpp
+survRateGroup.cpp			survRateGroup.hpp
+
+thread.cpp				concurrentMarkThread.inline.hpp
+
+universe.cpp                            g1CollectedHeap.hpp
+universe.cpp                            g1CollectorPolicy.hpp
+
+vm_operations_g1.hpp			vmGCOperations.hpp
+
+vm_operations_g1.cpp			vm_operations_g1.hpp
+vm_operations_g1.cpp                    g1CollectedHeap.hpp
+vm_operations_g1.cpp                    isGCActiveMark.hpp
--- a/Show more
+++ b/Show more