8144019: PPC64 C1: Introduce Client Compiler

Reviewed-by: goetz
2025-08-26 14:24:46 +02:00 · 2015-12-04 16:38:04 +01:00 · 2015-12-04 16:38:04 +01:00 · 8c5da27f19
commit 8c5da27f19
parent c64b2175e7
50 changed files with 9055 additions and 623 deletions
--- a/hotspot/make/aix/Makefile
+++ b/hotspot/make/aix/Makefile
@ -1,6 +1,6 @@
 #
 # Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2012, 2013 SAP AG. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@ -61,10 +61,6 @@ ifndef CC_INTERP
    FORCE_TIERED=1
  endif
 endif
-# C1 is not ported on ppc64(le), so we cannot build a tiered VM:
-ifneq (,$(filter $(ARCH),ppc64 pp64le))
-  FORCE_TIERED=0
-endif

 ifdef LP64
  ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")
--- a/hotspot/make/aix/makefiles/fastdebug.make
+++ b/hotspot/make/aix/makefiles/fastdebug.make
@ -68,5 +68,5 @@ MAPFILE = $(GAMMADIR)/make/aix/makefiles/mapfile-vers-debug
 LFLAGS_QIPA=

 VERSION = optimized
-SYSDEFS += -DASSERT -DFASTDEBUG
+SYSDEFS += -DASSERT
 PICFLAGS = DEFAULT
--- a/hotspot/make/aix/makefiles/tiered.make
+++ b/hotspot/make/aix/makefiles/tiered.make
@ -0,0 +1,32 @@
+#
+# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+#
+
+# Sets make macros for making tiered version of VM
+
+TYPE=TIERED
+
+VM_SUBDIR = server
+
+CFLAGS += -DCOMPILER2 -DCOMPILER1
--- a/hotspot/make/linux/Makefile
+++ b/hotspot/make/linux/Makefile
@ -57,14 +57,6 @@ ifndef CC_INTERP
    FORCE_TIERED=1
  endif
 endif
-# C1 is not ported on ppc64, so we cannot build a tiered VM:
-# Notice: after 8046471 ARCH will be 'ppc' for top-level ppc64 builds but
-# 'ppc64' for HotSpot-only ppc64 builds. Need to detect both variants here!
-ifneq (,$(findstring $(ARCH), ppc ppc64))
-  ifeq ($(ARCH_DATA_MODEL), 64)
-    FORCE_TIERED=0
-  endif
-endif

 ifdef LP64
  ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp
@ -53,9 +53,6 @@ int AbstractAssembler::code_fill_byte() {
  return 0x00;                  // illegal instruction 0x00000000
 }

-void Assembler::print_instruction(int inst) {
-  Unimplemented();
-}

 // Patch instruction `inst' at offset `inst_pos' to refer to
 // `dest_pos' and return the resulting instruction.  We should have
@ -484,7 +481,7 @@ int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp,
      if (d != s) { mr(d, s); }
      return 0;
    }
-    if (return_simm16_rest) {
+    if (return_simm16_rest && (d == s)) {
      return xd;
    }
    addi(d, s, xd);
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp
@ -31,10 +31,37 @@
 // Address is an abstraction used to represent a memory location
 // as used in assembler instructions.
 // PPC instructions grok either baseReg + indexReg or baseReg + disp.
-// So far we do not use this as simplification by this class is low
-// on PPC with its simple addressing mode. Use RegisterOrConstant to
-// represent an offset.
 class Address VALUE_OBJ_CLASS_SPEC {
+ private:
+  Register _base;         // Base register.
+  Register _index;        // Index register.
+  intptr_t _disp;         // Displacement.
+
+ public:
+  Address(Register b, Register i, address d = 0)
+    : _base(b), _index(i), _disp((intptr_t)d) {
+    assert(i == noreg || d == 0, "can't have both");
+  }
+
+  Address(Register b, address d = 0)
+    : _base(b), _index(noreg), _disp((intptr_t)d) {}
+
+  Address(Register b, intptr_t d)
+    : _base(b), _index(noreg), _disp(d) {}
+
+  Address(Register b, RegisterOrConstant roc)
+    : _base(b), _index(noreg), _disp(0) {
+    if (roc.is_constant()) _disp = roc.as_constant(); else _index = roc.as_register();
+  }
+
+  Address()
+    : _base(noreg), _index(noreg), _disp(0) {}
+
+  // accessors
+  Register base()  const { return _base; }
+  Register index() const { return _index; }
+  int      disp()  const { return (int)_disp; }
+  bool     is_const() const { return _base == noreg && _index == noreg; }
 };

 class AddressLiteral VALUE_OBJ_CLASS_SPEC {
@ -164,10 +191,14 @@ struct FunctionDescriptor VALUE_OBJ_CLASS_SPEC {
 };
 #endif

+
+// The PPC Assembler: Pure assembler doing NO optimizations on the
+// instruction level; i.e., what you write is what you get. The
+// Assembler is generating code into a CodeBuffer.
+
 class Assembler : public AbstractAssembler {
 protected:
  // Displacement routines
-  static void print_instruction(int inst);
  static int  patched_branch(int dest_pos, int inst, int inst_pos);
  static int  branch_destination(int inst, int pos);

@ -839,41 +870,38 @@ class Assembler : public AbstractAssembler {

  enum Predict { pt = 1, pn = 0 }; // pt = predict taken

-  // instruction must start at passed address
+  // Instruction must start at passed address.
  static int instr_len(unsigned char *instr) { return BytesPerInstWord; }

-  // instruction must be left-justified in argument
-  static int instr_len(unsigned long instr)  { return BytesPerInstWord; }
-
  // longest instructions
  static int instr_maxlen() { return BytesPerInstWord; }

  // Test if x is within signed immediate range for nbits.
  static bool is_simm(int x, unsigned int nbits) {
    assert(0 < nbits && nbits < 32, "out of bounds");
-    const int   min      = -( ((int)1) << nbits-1 );
-    const int   maxplus1 =  ( ((int)1) << nbits-1 );
+    const int   min      = -(((int)1) << nbits-1);
+    const int   maxplus1 =  (((int)1) << nbits-1);
    return min <= x && x < maxplus1;
  }

  static bool is_simm(jlong x, unsigned int nbits) {
    assert(0 < nbits && nbits < 64, "out of bounds");
-    const jlong min      = -( ((jlong)1) << nbits-1 );
-    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
+    const jlong min      = -(((jlong)1) << nbits-1);
+    const jlong maxplus1 =  (((jlong)1) << nbits-1);
    return min <= x && x < maxplus1;
  }

-  // Test if x is within unsigned immediate range for nbits
+  // Test if x is within unsigned immediate range for nbits.
  static bool is_uimm(int x, unsigned int nbits) {
    assert(0 < nbits && nbits < 32, "out of bounds");
-    const int   maxplus1 = ( ((int)1) << nbits );
-    return 0 <= x && x < maxplus1;
+    const unsigned int maxplus1 = (((unsigned int)1) << nbits);
+    return (unsigned int)x < maxplus1;
  }

  static bool is_uimm(jlong x, unsigned int nbits) {
    assert(0 < nbits && nbits < 64, "out of bounds");
-    const jlong maxplus1 =  ( ((jlong)1) << nbits );
-    return 0 <= x && x < maxplus1;
+    const julong maxplus1 = (((julong)1) << nbits);
+    return (julong)x < maxplus1;
  }

 protected:
@ -1376,8 +1404,11 @@ class Assembler : public AbstractAssembler {
  inline void orc(    Register a, Register s, Register b);
  inline void orc_(   Register a, Register s, Register b);
  inline void extsb(  Register a, Register s);
+  inline void extsb_( Register a, Register s);
  inline void extsh(  Register a, Register s);
+  inline void extsh_( Register a, Register s);
  inline void extsw(  Register a, Register s);
+  inline void extsw_( Register a, Register s);

  // extended mnemonics
  inline void nop();
@ -1767,6 +1798,8 @@ class Assembler : public AbstractAssembler {
  inline void smt_yield();
  inline void smt_mdoio();
  inline void smt_mdoom();
+  // >= Power8
+  inline void smt_miso();

  // trap instructions
  inline void twi_0(Register a); // for load with acquire semantics use load+twi_0+isync (trap can't occur)
@ -2168,6 +2201,7 @@ class Assembler : public AbstractAssembler {
  inline void load_const(Register d, void* a,           Register tmp = noreg);
  inline void load_const(Register d, Label& L,          Register tmp = noreg);
  inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg);
+  inline void load_const32(Register d, int i); // load signed int (patchable)

  // Load a 64 bit constant, optimized, not identifyable.
  // Tmp can be used to increase ILP. Set return_simm16_rest = true to get a
--- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
+++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp
@ -206,8 +206,11 @@ inline void Assembler::andc_(   Register a, Register s, Register b)    { emit_in
 inline void Assembler::orc(     Register a, Register s, Register b)    { emit_int32(ORC_OPCODE     | rta(a) | rs(s) | rb(b) | rc(0)); }
 inline void Assembler::orc_(    Register a, Register s, Register b)    { emit_int32(ORC_OPCODE     | rta(a) | rs(s) | rb(b) | rc(1)); }
 inline void Assembler::extsb(   Register a, Register s)                { emit_int32(EXTSB_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsb_(  Register a, Register s)                { emit_int32(EXTSB_OPCODE   | rta(a) | rs(s) | rc(1)); }
 inline void Assembler::extsh(   Register a, Register s)                { emit_int32(EXTSH_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsh_(  Register a, Register s)                { emit_int32(EXTSH_OPCODE   | rta(a) | rs(s) | rc(1)); }
 inline void Assembler::extsw(   Register a, Register s)                { emit_int32(EXTSW_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsw_(  Register a, Register s)                { emit_int32(EXTSW_OPCODE   | rta(a) | rs(s) | rc(1)); }

 // extended mnemonics
 inline void Assembler::nop()                              { Assembler::ori(R0, R0, 0); }
@ -609,6 +612,8 @@ inline void Assembler::smt_prio_high()        { Assembler::or_unchecked(R3,  R3,
 inline void Assembler::smt_yield()            { Assembler::or_unchecked(R27, R27, R27); }
 inline void Assembler::smt_mdoio()            { Assembler::or_unchecked(R29, R29, R29); }
 inline void Assembler::smt_mdoom()            { Assembler::or_unchecked(R30, R30, R30); }
+// >= Power8
+inline void Assembler::smt_miso()             { Assembler::or_unchecked(R26, R26, R26); }

 inline void Assembler::twi_0(Register a)      { twi_unchecked(0, a, 0);}

@ -967,12 +972,15 @@ inline void Assembler::load_const(Register d, Label& L, Register tmp) {

 // Load a 64 bit constant encoded by an AddressLiteral. patchable.
 inline void Assembler::load_const(Register d, AddressLiteral& a, Register tmp) {
-  assert(d != R0, "R0 not allowed");
  // First relocate (we don't change the offset in the RelocationHolder,
  // just pass a.rspec()), then delegate to load_const(Register, long).
  relocate(a.rspec());
  load_const(d, (long)a.value(), tmp);
 }

+inline void Assembler::load_const32(Register d, int i) {
+  lis(d, i >> 16);
+  ori(d, d, i & 0xFFFF);
+}

 #endif // CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP
--- a/hotspot/src/cpu/ppc/vm/c1_CodeStubs_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c1_CodeStubs_ppc.cpp
@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "nativeInst_ppc.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "utilities/macros.hpp"
+#include "vmreg_ppc.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#define __ ce->masm()->
+
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
+                               bool throw_index_out_of_bounds_exception)
+  : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception)
+  , _index(index) {
+  assert(info != NULL, "must have info");
+  _info = new CodeEmitInfo(info);
+}
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  if (_info->deoptimize_on_exception()) {
+    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+    // May be used by optimizations like LoopInvariantCodeMotion or RangeCheckEliminator.
+    DEBUG_ONLY( __ untested("RangeCheckStub: predicate_failed_trap_id"); )
+    //__ load_const_optimized(R0, a);
+    __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+    __ mtctr(R0);
+    __ bctrl();
+    ce->add_call_info_here(_info);
+    ce->verify_oop_map(_info);
+    debug_only(__ illtrap());
+    return;
+  }
+
+  address stub = _throw_index_out_of_bounds_exception ? Runtime1::entry_for(Runtime1::throw_index_exception_id)
+                                                      : Runtime1::entry_for(Runtime1::throw_range_check_failed_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+
+  Register index = R0; // pass in R0
+  if (_index->is_register()) {
+    __ extsw(index, _index->as_register());
+  } else {
+    __ load_const_optimized(index, _index->as_jint());
+  }
+
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
+  _info = new CodeEmitInfo(info);
+}
+
+void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  //__ load_const_optimized(R0, a);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  // Parameter 1: bci
+  __ load_const_optimized(R0, _bci);
+  __ std(R0, -16, R1_SP);
+
+  // Parameter 2: Method*
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  AddressLiteral md = __ constant_metadata_address(m); // Notify OOP recorder (don't need the relocation).
+  __ load_const_optimized(R0, md.value());
+  __ std(R0, -8, R1_SP);
+
+  address a = Runtime1::entry_for(Runtime1::counter_overflow_id);
+  //__ load_const_optimized(R0, a);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+
+  __ b(_continuation);
+}
+
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+  if (_offset != -1) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(Runtime1::throw_div0_exception_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+  address a;
+  if (_info->deoptimize_on_exception()) {
+    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
+    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  } else {
+    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+  }
+
+  if (ImplicitNullChecks || TrapBasedNullChecks) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  //__ load_const_optimized(R0, a);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+// Implementation of SimpleExceptionStub
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(_stub);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  if (_obj->is_valid()) { __ mr_if_needed(/*tmp1 in do_CheckCast*/ R4_ARG2, _obj->as_register()); }
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  debug_only( __ illtrap(); )
+}
+
+
+// Implementation of NewInstanceStub
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+  _result = result;
+  _klass = klass;
+  _klass_reg = klass_reg;
+  _info = new CodeEmitInfo(info);
+  assert(stub_id == Runtime1::new_instance_id                 ||
+         stub_id == Runtime1::fast_new_instance_id            ||
+         stub_id == Runtime1::fast_new_instance_init_check_id,
+         "need new_instance id");
+  _stub_id = stub_id;
+}
+
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  address entry = Runtime1::entry_for(_stub_id);
+  //__ load_const_optimized(R0, entry);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+// Implementation of NewTypeArrayStub
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  address entry = Runtime1::entry_for(Runtime1::new_type_array_id);
+  //__ load_const_optimized(R0, entry);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
+  __ mr_if_needed(/*op->tmp1()->as_register()*/ R5_ARG3, _length->as_register()); // already sign-extended
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+// Implementation of NewObjectArrayStub
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  address entry = Runtime1::entry_for(Runtime1::new_object_array_id);
+  //__ load_const_optimized(R0, entry);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
+  __ mr_if_needed(/*op->tmp1()->as_register()*/ R5_ARG3, _length->as_register()); // already sign-extended
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+// Implementation of MonitorAccessStubs
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+  : MonitorAccessStub(obj_reg, lock_reg) {
+  _info = new CodeEmitInfo(info);
+}
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? Runtime1::monitorenter_id : Runtime1::monitorenter_nofpu_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mr_if_needed(/*scratch_opr()->as_register()*/ R4_ARG2, _obj_reg->as_register());
+  assert(_lock_reg->as_register() == R5_ARG3, "");
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_compute_lock) {
+    ce->monitor_address(_monitor_ix, _lock_reg);
+  }
+  address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? Runtime1::monitorexit_id : Runtime1::monitorexit_nofpu_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  assert(_lock_reg->as_register() == R4_ARG2, "");
+  __ mtctr(R0);
+  __ bctrl();
+  __ b(_continuation);
+}
+
+
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes).
+// - Replace original code with a call to the stub.
+// At Runtime:
+// - call to stub, jump to runtime
+// - in runtime: preserve all registers (especially objects, i.e., source and destination object)
+// - in runtime: after initializing class, restore original code, reexecute instruction
+
+int PatchingStub::_patch_info_offset = -(5 * BytesPerInstWord);
+
+void PatchingStub::align_patch_site(MacroAssembler* ) {
+  // Patch sites on ppc are always properly aligned.
+}
+
+#ifdef ASSERT
+inline void compare_with_patch_site(address template_start, address pc_start, int bytes_to_copy) {
+  address start = template_start;
+  for (int i = 0; i < bytes_to_copy; i++) {
+    address ptr = (address)(pc_start + i);
+    int a_byte = (*ptr) & 0xFF;
+    assert(a_byte == *start++, "should be the same code");
+  }
+}
+#endif
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+  // copy original code here
+  assert(NativeGeneralJump::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF,
+         "not enough room for call");
+  assert((_bytes_to_copy & 0x3) == 0, "must copy a multiple of four bytes");
+
+  Label call_patch;
+
+  int being_initialized_entry = __ offset();
+
+  if (_id == load_klass_id) {
+    // Produce a copy of the load klass instruction for use by the being initialized case.
+    AddressLiteral addrlit((address)NULL, metadata_Relocation::spec(_index));
+    __ load_const(_obj, addrlit, R0);
+    DEBUG_ONLY( compare_with_patch_site(__ code_section()->start() + being_initialized_entry, _pc_start, _bytes_to_copy); )
+  } else if (_id == load_mirror_id || _id == load_appendix_id) {
+    // Produce a copy of the load mirror instruction for use by the being initialized case.
+    AddressLiteral addrlit((address)NULL, oop_Relocation::spec(_index));
+    __ load_const(_obj, addrlit, R0);
+    DEBUG_ONLY( compare_with_patch_site(__ code_section()->start() + being_initialized_entry, _pc_start, _bytes_to_copy); )
+  } else {
+    // Make a copy the code which is going to be patched.
+    for (int i = 0; i < _bytes_to_copy; i++) {
+      address ptr = (address)(_pc_start + i);
+      int a_byte = (*ptr) & 0xFF;
+      __ emit_int8 (a_byte);
+    }
+  }
+
+  address end_of_patch = __ pc();
+  int bytes_to_skip = 0;
+  if (_id == load_mirror_id) {
+    int offset = __ offset();
+    __ block_comment(" being_initialized check");
+
+    // Static field accesses have special semantics while the class
+    // initializer is being run so we emit a test which can be used to
+    // check that this code is being executed by the initializing
+    // thread.
+    assert(_obj != noreg, "must be a valid register");
+    assert(_index >= 0, "must have oop index");
+    __ mr(R0, _obj); // spill
+    __ ld(_obj, java_lang_Class::klass_offset_in_bytes(), _obj);
+    __ ld(_obj, in_bytes(InstanceKlass::init_thread_offset()), _obj);
+    __ cmpd(CCR0, _obj, R16_thread);
+    __ mr(_obj, R0); // restore
+    __ bne(CCR0, call_patch);
+
+    // Load_klass patches may execute the patched code before it's
+    // copied back into place so we need to jump back into the main
+    // code of the nmethod to continue execution.
+    __ b(_patch_site_continuation);
+
+    // Make sure this extra code gets skipped.
+    bytes_to_skip += __ offset() - offset;
+  }
+
+  // Now emit the patch record telling the runtime how to find the
+  // pieces of the patch.  We only need 3 bytes but it has to be
+  // aligned as an instruction so emit 4 bytes.
+  int sizeof_patch_record = 4;
+  bytes_to_skip += sizeof_patch_record;
+
+  // Emit the offsets needed to find the code to patch.
+  int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
+
+  // Emit the patch record.  We need to emit a full word, so emit an extra empty byte.
+  __ emit_int8(0);
+  __ emit_int8(being_initialized_entry_offset);
+  __ emit_int8(bytes_to_skip);
+  __ emit_int8(_bytes_to_copy);
+  address patch_info_pc = __ pc();
+  assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
+
+  address entry = __ pc();
+  NativeGeneralJump::insert_unconditional((address)_pc_start, entry);
+  address target = NULL;
+  relocInfo::relocType reloc_type = relocInfo::none;
+  switch (_id) {
+    case access_field_id:  target = Runtime1::entry_for(Runtime1::access_field_patching_id); break;
+    case load_klass_id:    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+                           reloc_type = relocInfo::metadata_type; break;
+    case load_mirror_id:   target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+                           reloc_type = relocInfo::oop_type; break;
+    case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+                           reloc_type = relocInfo::oop_type; break;
+    default: ShouldNotReachHere();
+  }
+  __ bind(call_patch);
+
+  __ block_comment("patch entry point");
+  //__ load_const(R0, target); + mtctr + bctrl must have size -_patch_info_offset
+  __ load_const32(R0, MacroAssembler::offset_to_global_toc(target));
+  __ add(R0, R29_TOC, R0);
+  __ mtctr(R0);
+  __ bctrl();
+  assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change");
+  ce->add_call_info_here(_info);
+  __ b(_patch_site_entry);
+  if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) {
+    CodeSection* cs = __ code_section();
+    address pc = (address)_pc_start;
+    RelocIterator iter(cs, pc, pc + 1);
+    relocInfo::change_reloc_info_for_address(&iter, (address) pc, reloc_type, relocInfo::none);
+  }
+}
+
+
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(Runtime1::deoptimize_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+
+  __ load_const_optimized(R0, _trap_request); // Pass trap request in R0.
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  debug_only(__ illtrap());
+}
+
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+  //---------------slow case: call to native-----------------
+  __ bind(_entry);
+  __ mr(R3_ARG1, src()->as_register());
+  __ extsw(R4_ARG2, src_pos()->as_register());
+  __ mr(R5_ARG3, dst()->as_register());
+  __ extsw(R6_ARG4, dst_pos()->as_register());
+  __ extsw(R7_ARG5, length()->as_register());
+
+  ce->emit_static_call_stub();
+
+  bool success = ce->emit_trampoline_stub_for_call(SharedRuntime::get_resolve_static_call_stub());
+  if (!success) { return; }
+
+  __ relocate(relocInfo::static_call_type);
+  // Note: At this point we do not have the address of the trampoline
+  // stub, and the entry point might be too far away for bl, so __ pc()
+  // serves as dummy and the bl will be patched later.
+  __ code()->set_insts_mark();
+  __ bl(__ pc());
+  ce->add_call_info_here(info());
+  ce->verify_oop_map(info());
+
+#ifndef PRODUCT
+  const address counter = (address)&Runtime1::_arraycopy_slowcase_cnt;
+  const Register tmp = R3, tmp2 = R4;
+  int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+  __ lwz(tmp2, simm16_offs, tmp);
+  __ addi(tmp2, tmp2, 1);
+  __ stw(tmp2, simm16_offs, tmp);
+#endif
+
+  __ b(_continuation);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+
+  __ bind(_entry);
+
+  assert(pre_val()->is_register(), "Precondition.");
+  Register pre_val_reg = pre_val()->as_register();
+
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
+
+  __ cmpdi(CCR0, pre_val_reg, 0);
+  __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), _continuation);
+
+  address stub = Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ std(pre_val_reg, -8, R1_SP); // Pass pre_val on stack.
+  __ mtctr(R0);
+  __ bctrl();
+  __ b(_continuation);
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register addr_reg = addr()->as_pointer_register();
+  Register new_val_reg = new_val()->as_register();
+
+  __ cmpdi(CCR0, new_val_reg, 0);
+  __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), _continuation);
+
+  address stub = Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+  __ mr(R0, addr_reg); // Pass addr in R0.
+  __ bctrl();
+  __ b(_continuation);
+}
+
+#endif // INCLUDE_ALL_GCS
+///////////////////////////////////////////////////////////////////////////////////
+
+#undef __
--- a/hotspot/src/cpu/ppc/vm/c1_Defs_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c1_Defs_ppc.hpp
@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_DEFS_PPC_HPP
+#define CPU_PPC_VM_C1_DEFS_PPC_HPP
+
+// Native word offsets from memory address.
+enum {
+#if defined(VM_LITTLE_ENDIAN)
+  pd_lo_word_offset_in_bytes = 0,
+  pd_hi_word_offset_in_bytes = BytesPerInt
+#else
+  pd_lo_word_offset_in_bytes = BytesPerInt,
+  pd_hi_word_offset_in_bytes = 0
+#endif
+};
+
+
+// Explicit rounding operations are not required to implement the strictFP mode.
+enum {
+  pd_strict_fp_requires_explicit_rounding = false
+};
+
+
+// registers
+enum {
+  pd_nof_cpu_regs_frame_map = 32,              // Number of registers used during code emission.
+  pd_nof_caller_save_cpu_regs_frame_map = 27,  // Number of cpu registers killed by calls. (At least R3_ARG1 ... R10_ARG8, but using all like C2.)
+  pd_nof_cpu_regs_reg_alloc = 27,              // Number of registers that are visible to register allocator.
+  pd_nof_cpu_regs_linearscan = 32,             // Number of registers visible linear scan.
+  pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map,
+  pd_last_callee_saved_reg = pd_nof_cpu_regs_reg_alloc - 1,
+  pd_first_cpu_reg = 0,
+  pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1,
+
+  pd_nof_fpu_regs_frame_map = 32,              // Number of registers used during code emission.
+  pd_nof_caller_save_fpu_regs_frame_map = 32,  // Number of fpu registers killed by calls.
+  pd_nof_fpu_regs_reg_alloc = 32,              // Number of registers that are visible to register allocator.
+  pd_nof_fpu_regs_linearscan = 32,             // Number of registers visible to linear scan.
+  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
+  pd_last_fpu_reg =  pd_nof_cpu_regs_frame_map + pd_nof_fpu_regs_reg_alloc - 1,
+
+  pd_nof_xmm_regs_linearscan = 0,
+  pd_nof_caller_save_xmm_regs = 0,
+  pd_first_xmm_reg = -1,
+  pd_last_xmm_reg = -1
+};
+
+// For debug info: a float value in a register is saved in single precision by runtime stubs.
+enum {
+  pd_float_saved_as_double = true
+};
+
+#endif // CPU_PPC_VM_C1_DEFS_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/c1_FpuStackSim_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c1_FpuStackSim_ppc.hpp
@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_FPUSTACKSIM_PPC_HPP
+#define CPU_PPC_VM_C1_FPUSTACKSIM_PPC_HPP
+
+// No FPU stack on PPC.
+class FpuStackSim;
+
+#endif // CPU_PPC_VM_C1_FPUSTACKSIM_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/c1_FrameMap_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c1_FrameMap_ppc.cpp
@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_ppc.inline.hpp"
+
+
+const int FrameMap::pd_c_runtime_reserved_arg_size = 7;
+
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool outgoing) {
+  LIR_Opr opr = LIR_OprFact::illegalOpr;
+  VMReg r_1 = reg->first();
+  VMReg r_2 = reg->second();
+  if (r_1->is_stack()) {
+    // Convert stack slot to an SP offset.
+    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
+    // so we must add it in here.
+    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+    opr = LIR_OprFact::address(new LIR_Address(SP_opr, st_off + STACK_BIAS, type));
+  } else if (r_1->is_Register()) {
+    Register reg = r_1->as_Register();
+    //if (outgoing) {
+    //  assert(!reg->is_in(), "should be using I regs");
+    //} else {
+    //  assert(!reg->is_out(), "should be using O regs");
+    //}
+    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
+      opr = as_long_opr(reg);
+    } else if (type == T_OBJECT || type == T_ARRAY) {
+      opr = as_oop_opr(reg);
+    } else {
+      opr = as_opr(reg);
+    }
+  } else if (r_1->is_FloatRegister()) {
+    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
+    FloatRegister f = r_1->as_FloatRegister();
+    if (type == T_DOUBLE) {
+      opr = as_double_opr(f);
+    } else {
+      opr = as_float_opr(f);
+    }
+  }
+  return opr;
+}
+
+//               FrameMap
+//--------------------------------------------------------
+
+FloatRegister FrameMap::_fpu_regs [FrameMap::nof_fpu_regs];
+
+LIR_Opr  FrameMap::R0_opr;
+LIR_Opr  FrameMap::R1_opr;
+LIR_Opr  FrameMap::R2_opr;
+LIR_Opr  FrameMap::R3_opr;
+LIR_Opr  FrameMap::R4_opr;
+LIR_Opr  FrameMap::R5_opr;
+LIR_Opr  FrameMap::R6_opr;
+LIR_Opr  FrameMap::R7_opr;
+LIR_Opr  FrameMap::R8_opr;
+LIR_Opr  FrameMap::R9_opr;
+LIR_Opr FrameMap::R10_opr;
+LIR_Opr FrameMap::R11_opr;
+LIR_Opr FrameMap::R12_opr;
+LIR_Opr FrameMap::R13_opr;
+LIR_Opr FrameMap::R14_opr;
+LIR_Opr FrameMap::R15_opr;
+LIR_Opr FrameMap::R16_opr;
+LIR_Opr FrameMap::R17_opr;
+LIR_Opr FrameMap::R18_opr;
+LIR_Opr FrameMap::R19_opr;
+LIR_Opr FrameMap::R20_opr;
+LIR_Opr FrameMap::R21_opr;
+LIR_Opr FrameMap::R22_opr;
+LIR_Opr FrameMap::R23_opr;
+LIR_Opr FrameMap::R24_opr;
+LIR_Opr FrameMap::R25_opr;
+LIR_Opr FrameMap::R26_opr;
+LIR_Opr FrameMap::R27_opr;
+LIR_Opr FrameMap::R28_opr;
+LIR_Opr FrameMap::R29_opr;
+LIR_Opr FrameMap::R30_opr;
+LIR_Opr FrameMap::R31_opr;
+
+LIR_Opr  FrameMap::R0_oop_opr;
+//LIR_Opr  FrameMap::R1_oop_opr;
+LIR_Opr  FrameMap::R2_oop_opr;
+LIR_Opr  FrameMap::R3_oop_opr;
+LIR_Opr  FrameMap::R4_oop_opr;
+LIR_Opr  FrameMap::R5_oop_opr;
+LIR_Opr  FrameMap::R6_oop_opr;
+LIR_Opr  FrameMap::R7_oop_opr;
+LIR_Opr  FrameMap::R8_oop_opr;
+LIR_Opr  FrameMap::R9_oop_opr;
+LIR_Opr FrameMap::R10_oop_opr;
+LIR_Opr FrameMap::R11_oop_opr;
+LIR_Opr FrameMap::R12_oop_opr;
+//LIR_Opr FrameMap::R13_oop_opr;
+LIR_Opr FrameMap::R14_oop_opr;
+LIR_Opr FrameMap::R15_oop_opr;
+//LIR_Opr FrameMap::R16_oop_opr;
+LIR_Opr FrameMap::R17_oop_opr;
+LIR_Opr FrameMap::R18_oop_opr;
+LIR_Opr FrameMap::R19_oop_opr;
+LIR_Opr FrameMap::R20_oop_opr;
+LIR_Opr FrameMap::R21_oop_opr;
+LIR_Opr FrameMap::R22_oop_opr;
+LIR_Opr FrameMap::R23_oop_opr;
+LIR_Opr FrameMap::R24_oop_opr;
+LIR_Opr FrameMap::R25_oop_opr;
+LIR_Opr FrameMap::R26_oop_opr;
+LIR_Opr FrameMap::R27_oop_opr;
+LIR_Opr FrameMap::R28_oop_opr;
+//LIR_Opr FrameMap::R29_oop_opr;
+LIR_Opr FrameMap::R30_oop_opr;
+LIR_Opr FrameMap::R31_oop_opr;
+
+LIR_Opr  FrameMap::R0_metadata_opr;
+//LIR_Opr  FrameMap::R1_metadata_opr;
+LIR_Opr  FrameMap::R2_metadata_opr;
+LIR_Opr  FrameMap::R3_metadata_opr;
+LIR_Opr  FrameMap::R4_metadata_opr;
+LIR_Opr  FrameMap::R5_metadata_opr;
+LIR_Opr  FrameMap::R6_metadata_opr;
+LIR_Opr  FrameMap::R7_metadata_opr;
+LIR_Opr  FrameMap::R8_metadata_opr;
+LIR_Opr  FrameMap::R9_metadata_opr;
+LIR_Opr FrameMap::R10_metadata_opr;
+LIR_Opr FrameMap::R11_metadata_opr;
+LIR_Opr FrameMap::R12_metadata_opr;
+//LIR_Opr FrameMap::R13_metadata_opr;
+LIR_Opr FrameMap::R14_metadata_opr;
+LIR_Opr FrameMap::R15_metadata_opr;
+//LIR_Opr FrameMap::R16_metadata_opr;
+LIR_Opr FrameMap::R17_metadata_opr;
+LIR_Opr FrameMap::R18_metadata_opr;
+LIR_Opr FrameMap::R19_metadata_opr;
+LIR_Opr FrameMap::R20_metadata_opr;
+LIR_Opr FrameMap::R21_metadata_opr;
+LIR_Opr FrameMap::R22_metadata_opr;
+LIR_Opr FrameMap::R23_metadata_opr;
+LIR_Opr FrameMap::R24_metadata_opr;
+LIR_Opr FrameMap::R25_metadata_opr;
+LIR_Opr FrameMap::R26_metadata_opr;
+LIR_Opr FrameMap::R27_metadata_opr;
+LIR_Opr FrameMap::R28_metadata_opr;
+//LIR_Opr FrameMap::R29_metadata_opr;
+LIR_Opr FrameMap::R30_metadata_opr;
+LIR_Opr FrameMap::R31_metadata_opr;
+
+LIR_Opr FrameMap::SP_opr;
+
+LIR_Opr FrameMap::R0_long_opr;
+LIR_Opr FrameMap::R3_long_opr;
+
+LIR_Opr FrameMap::F1_opr;
+LIR_Opr FrameMap::F1_double_opr;
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
+
+FloatRegister FrameMap::nr2floatreg (int rnr) {
+  assert(_init_done, "tables not initialized");
+  debug_only(fpu_range_check(rnr);)
+  return _fpu_regs[rnr];
+}
+
+
+// Returns true if reg could be smashed by a callee.
+bool FrameMap::is_caller_save_register (LIR_Opr reg) {
+  if (reg->is_single_fpu() || reg->is_double_fpu()) { return true; }
+  if (reg->is_double_cpu()) {
+    return is_caller_save_register(reg->as_register_lo()) ||
+           is_caller_save_register(reg->as_register_hi());
+  }
+  return is_caller_save_register(reg->as_register());
+}
+
+
+bool FrameMap::is_caller_save_register (Register r) {
+  // not visible to allocator: R0: scratch, R1: SP
+  // r->encoding() < 2 + nof_caller_save_cpu_regs();
+  return true; // Currently all regs are caller save.
+}
+
+
+void FrameMap::initialize() {
+  assert(!_init_done, "once");
+
+  int i = 0;
+
+  // Put generally available registers at the beginning (allocated, saved for GC).
+  for (int j = 0; j < nof_cpu_regs; ++j) {
+    Register rj = as_Register(j);
+    if (reg_needs_save(rj)) {
+      map_register(i++, rj);
+    }
+  }
+  assert(i == nof_cpu_regs_reg_alloc, "number of allocated registers");
+
+  // The following registers are not normally available.
+  for (int j = 0; j < nof_cpu_regs; ++j) {
+    Register rj = as_Register(j);
+    if (!reg_needs_save(rj)) {
+      map_register(i++, rj);
+    }
+  }
+  assert(i == nof_cpu_regs, "number of CPU registers");
+
+  for (i = 0; i < nof_fpu_regs; i++) {
+    _fpu_regs[i] = as_FloatRegister(i);
+  }
+
+  _init_done = true;
+
+  R0_opr  = as_opr(R0);
+  R1_opr  = as_opr(R1);
+  R2_opr  = as_opr(R2);
+  R3_opr  = as_opr(R3);
+  R4_opr  = as_opr(R4);
+  R5_opr  = as_opr(R5);
+  R6_opr  = as_opr(R6);
+  R7_opr  = as_opr(R7);
+  R8_opr  = as_opr(R8);
+  R9_opr  = as_opr(R9);
+  R10_opr = as_opr(R10);
+  R11_opr = as_opr(R11);
+  R12_opr = as_opr(R12);
+  R13_opr = as_opr(R13);
+  R14_opr = as_opr(R14);
+  R15_opr = as_opr(R15);
+  R16_opr = as_opr(R16);
+  R17_opr = as_opr(R17);
+  R18_opr = as_opr(R18);
+  R19_opr = as_opr(R19);
+  R20_opr = as_opr(R20);
+  R21_opr = as_opr(R21);
+  R22_opr = as_opr(R22);
+  R23_opr = as_opr(R23);
+  R24_opr = as_opr(R24);
+  R25_opr = as_opr(R25);
+  R26_opr = as_opr(R26);
+  R27_opr = as_opr(R27);
+  R28_opr = as_opr(R28);
+  R29_opr = as_opr(R29);
+  R30_opr = as_opr(R30);
+  R31_opr = as_opr(R31);
+
+  R0_oop_opr  = as_oop_opr(R0);
+  //R1_oop_opr  = as_oop_opr(R1);
+  R2_oop_opr  = as_oop_opr(R2);
+  R3_oop_opr  = as_oop_opr(R3);
+  R4_oop_opr  = as_oop_opr(R4);
+  R5_oop_opr  = as_oop_opr(R5);
+  R6_oop_opr  = as_oop_opr(R6);
+  R7_oop_opr  = as_oop_opr(R7);
+  R8_oop_opr  = as_oop_opr(R8);
+  R9_oop_opr  = as_oop_opr(R9);
+  R10_oop_opr = as_oop_opr(R10);
+  R11_oop_opr = as_oop_opr(R11);
+  R12_oop_opr = as_oop_opr(R12);
+  //R13_oop_opr = as_oop_opr(R13);
+  R14_oop_opr = as_oop_opr(R14);
+  R15_oop_opr = as_oop_opr(R15);
+  //R16_oop_opr = as_oop_opr(R16);
+  R17_oop_opr = as_oop_opr(R17);
+  R18_oop_opr = as_oop_opr(R18);
+  R19_oop_opr = as_oop_opr(R19);
+  R20_oop_opr = as_oop_opr(R20);
+  R21_oop_opr = as_oop_opr(R21);
+  R22_oop_opr = as_oop_opr(R22);
+  R23_oop_opr = as_oop_opr(R23);
+  R24_oop_opr = as_oop_opr(R24);
+  R25_oop_opr = as_oop_opr(R25);
+  R26_oop_opr = as_oop_opr(R26);
+  R27_oop_opr = as_oop_opr(R27);
+  R28_oop_opr = as_oop_opr(R28);
+  //R29_oop_opr = as_oop_opr(R29);
+  R30_oop_opr = as_oop_opr(R30);
+  R31_oop_opr = as_oop_opr(R31);
+
+  R0_metadata_opr  = as_metadata_opr(R0);
+  //R1_metadata_opr  = as_metadata_opr(R1);
+  R2_metadata_opr  = as_metadata_opr(R2);
+  R3_metadata_opr  = as_metadata_opr(R3);
+  R4_metadata_opr  = as_metadata_opr(R4);
+  R5_metadata_opr  = as_metadata_opr(R5);
+  R6_metadata_opr  = as_metadata_opr(R6);
+  R7_metadata_opr  = as_metadata_opr(R7);
+  R8_metadata_opr  = as_metadata_opr(R8);
+  R9_metadata_opr  = as_metadata_opr(R9);
+  R10_metadata_opr = as_metadata_opr(R10);
+  R11_metadata_opr = as_metadata_opr(R11);
+  R12_metadata_opr = as_metadata_opr(R12);
+  //R13_metadata_opr = as_metadata_opr(R13);
+  R14_metadata_opr = as_metadata_opr(R14);
+  R15_metadata_opr = as_metadata_opr(R15);
+  //R16_metadata_opr = as_metadata_opr(R16);
+  R17_metadata_opr = as_metadata_opr(R17);
+  R18_metadata_opr = as_metadata_opr(R18);
+  R19_metadata_opr = as_metadata_opr(R19);
+  R20_metadata_opr = as_metadata_opr(R20);
+  R21_metadata_opr = as_metadata_opr(R21);
+  R22_metadata_opr = as_metadata_opr(R22);
+  R23_metadata_opr = as_metadata_opr(R23);
+  R24_metadata_opr = as_metadata_opr(R24);
+  R25_metadata_opr = as_metadata_opr(R25);
+  R26_metadata_opr = as_metadata_opr(R26);
+  R27_metadata_opr = as_metadata_opr(R27);
+  R28_metadata_opr = as_metadata_opr(R28);
+  //R29_metadata_opr = as_metadata_opr(R29);
+  R30_metadata_opr = as_metadata_opr(R30);
+  R31_metadata_opr = as_metadata_opr(R31);
+
+  SP_opr = as_pointer_opr(R1_SP);
+
+  R0_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(R0), cpu_reg2rnr(R0));
+  R3_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(R3), cpu_reg2rnr(R3));
+
+  F1_opr = as_float_opr(F1);
+  F1_double_opr = as_double_opr(F1);
+
+  // All the allocated cpu regs are caller saved.
+  for (int i = 0; i < max_nof_caller_save_cpu_regs; i++) {
+    _caller_save_cpu_regs[i] = LIR_OprFact::single_cpu(i);
+  }
+
+  // All the fpu regs are caller saved.
+  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
+    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+  }
+}
+
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+  return Address(R1_SP, STACK_BIAS + in_bytes(sp_offset));
+}
+
+
+VMReg FrameMap::fpu_regname (int n) {
+  return as_FloatRegister(n)->as_VMReg();
+}
+
+
+LIR_Opr FrameMap::stack_pointer() {
+  return SP_opr;
+}
+
+
+// JSR 292
+// On PPC64, there is no need to save the SP, because neither
+// method handle intrinsics, nor compiled lambda forms modify it.
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
+  return LIR_OprFact::illegalOpr;
+}
+
+
+bool FrameMap::validate_frame() {
+  int max_offset = in_bytes(framesize_in_bytes());
+  int java_index = 0;
+  for (int i = 0; i < _incoming_arguments->length(); i++) {
+    LIR_Opr opr = _incoming_arguments->at(i);
+    if (opr->is_stack()) {
+      max_offset = MAX2(_argument_locations->at(java_index), max_offset);
+    }
+    java_index += type2size[opr->type()];
+  }
+  return Assembler::is_simm16(max_offset + STACK_BIAS);
+}
--- a/hotspot/src/cpu/ppc/vm/c1_FrameMap_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c1_FrameMap_ppc.hpp
@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_FRAMEMAP_PPC_HPP
+#define CPU_PPC_VM_C1_FRAMEMAP_PPC_HPP
+
+ public:
+
+  enum {
+    nof_reg_args = 8,   // Registers R3-R10 are available for parameter passing.
+    first_available_sp_in_frame = frame::jit_out_preserve_size,
+    frame_pad_in_bytes = 0
+  };
+
+  static const int pd_c_runtime_reserved_arg_size;
+
+  static LIR_Opr  R0_opr;
+  static LIR_Opr  R1_opr;
+  static LIR_Opr  R2_opr;
+  static LIR_Opr  R3_opr;
+  static LIR_Opr  R4_opr;
+  static LIR_Opr  R5_opr;
+  static LIR_Opr  R6_opr;
+  static LIR_Opr  R7_opr;
+  static LIR_Opr  R8_opr;
+  static LIR_Opr  R9_opr;
+  static LIR_Opr R10_opr;
+  static LIR_Opr R11_opr;
+  static LIR_Opr R12_opr;
+  static LIR_Opr R13_opr;
+  static LIR_Opr R14_opr;
+  static LIR_Opr R15_opr;
+  static LIR_Opr R16_opr;
+  static LIR_Opr R17_opr;
+  static LIR_Opr R18_opr;
+  static LIR_Opr R19_opr;
+  static LIR_Opr R20_opr;
+  static LIR_Opr R21_opr;
+  static LIR_Opr R22_opr;
+  static LIR_Opr R23_opr;
+  static LIR_Opr R24_opr;
+  static LIR_Opr R25_opr;
+  static LIR_Opr R26_opr;
+  static LIR_Opr R27_opr;
+  static LIR_Opr R28_opr;
+  static LIR_Opr R29_opr;
+  static LIR_Opr R30_opr;
+  static LIR_Opr R31_opr;
+
+  static LIR_Opr  R0_oop_opr;
+  //R1: Stack pointer. Not an oop.
+  static LIR_Opr  R2_oop_opr;
+  static LIR_Opr  R3_oop_opr;
+  static LIR_Opr  R4_oop_opr;
+  static LIR_Opr  R5_oop_opr;
+  static LIR_Opr  R6_oop_opr;
+  static LIR_Opr  R7_oop_opr;
+  static LIR_Opr  R8_oop_opr;
+  static LIR_Opr  R9_oop_opr;
+  static LIR_Opr R10_oop_opr;
+  static LIR_Opr R11_oop_opr;
+  static LIR_Opr R12_oop_opr;
+  //R13: System thread register. Not usable.
+  static LIR_Opr R14_oop_opr;
+  static LIR_Opr R15_oop_opr;
+  //R16: Java thread register. Not an oop.
+  static LIR_Opr R17_oop_opr;
+  static LIR_Opr R18_oop_opr;
+  static LIR_Opr R19_oop_opr;
+  static LIR_Opr R20_oop_opr;
+  static LIR_Opr R21_oop_opr;
+  static LIR_Opr R22_oop_opr;
+  static LIR_Opr R23_oop_opr;
+  static LIR_Opr R24_oop_opr;
+  static LIR_Opr R25_oop_opr;
+  static LIR_Opr R26_oop_opr;
+  static LIR_Opr R27_oop_opr;
+  static LIR_Opr R28_oop_opr;
+  static LIR_Opr R29_oop_opr;
+  //R29: TOC register. Not an oop.
+  static LIR_Opr R30_oop_opr;
+  static LIR_Opr R31_oop_opr;
+
+  static LIR_Opr  R0_metadata_opr;
+  //R1: Stack pointer. Not metadata.
+  static LIR_Opr  R2_metadata_opr;
+  static LIR_Opr  R3_metadata_opr;
+  static LIR_Opr  R4_metadata_opr;
+  static LIR_Opr  R5_metadata_opr;
+  static LIR_Opr  R6_metadata_opr;
+  static LIR_Opr  R7_metadata_opr;
+  static LIR_Opr  R8_metadata_opr;
+  static LIR_Opr  R9_metadata_opr;
+  static LIR_Opr R10_metadata_opr;
+  static LIR_Opr R11_metadata_opr;
+  static LIR_Opr R12_metadata_opr;
+  //R13: System thread register. Not usable.
+  static LIR_Opr R14_metadata_opr;
+  static LIR_Opr R15_metadata_opr;
+  //R16: Java thread register. Not metadata.
+  static LIR_Opr R17_metadata_opr;
+  static LIR_Opr R18_metadata_opr;
+  static LIR_Opr R19_metadata_opr;
+  static LIR_Opr R20_metadata_opr;
+  static LIR_Opr R21_metadata_opr;
+  static LIR_Opr R22_metadata_opr;
+  static LIR_Opr R23_metadata_opr;
+  static LIR_Opr R24_metadata_opr;
+  static LIR_Opr R25_metadata_opr;
+  static LIR_Opr R26_metadata_opr;
+  static LIR_Opr R27_metadata_opr;
+  static LIR_Opr R28_metadata_opr;
+  //R29: TOC register. Not metadata.
+  static LIR_Opr R30_metadata_opr;
+  static LIR_Opr R31_metadata_opr;
+
+  static LIR_Opr SP_opr;
+
+  static LIR_Opr R0_long_opr;
+  static LIR_Opr R3_long_opr;
+
+  static LIR_Opr F1_opr;
+  static LIR_Opr F1_double_opr;
+
+ private:
+  static FloatRegister  _fpu_regs [nof_fpu_regs];
+
+  static LIR_Opr as_long_single_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+  static LIR_Opr as_long_pair_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r->successor()), cpu_reg2rnr(r));
+  }
+
+ public:
+
+#ifdef _LP64
+  static LIR_Opr as_long_opr(Register r) {
+    return as_long_single_opr(r);
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    return as_long_single_opr(r);
+  }
+#else
+  static LIR_Opr as_long_opr(Register r) {
+    Unimplemented(); return 0;
+//    return as_long_pair_opr(r);
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    Unimplemented(); return 0;
+//    return as_opr(r);
+  }
+#endif
+  static LIR_Opr as_float_opr(FloatRegister r) {
+    return LIR_OprFact::single_fpu(r->encoding());
+  }
+  static LIR_Opr as_double_opr(FloatRegister r) {
+    return LIR_OprFact::double_fpu(r->encoding());
+  }
+
+  static FloatRegister nr2floatreg (int rnr);
+
+  static VMReg fpu_regname (int n);
+
+  static bool is_caller_save_register(LIR_Opr  reg);
+  static bool is_caller_save_register(Register r);
+
+  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
+  static int last_cpu_reg()             { return pd_last_cpu_reg; }
+
+  // Registers which need to be saved in the frames (e.g. for GC).
+  // Register usage:
+  //  R0: scratch
+  //  R1: sp
+  // R13: system thread id
+  // R16: java thread
+  // R29: global TOC
+  static bool reg_needs_save(Register r) { return r != R0 && r != R1 && r != R13 && r != R16 && r != R29; }
+
+#endif // CPU_PPC_VM_C1_FRAMEMAP_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp
--- a/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.hpp
@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_LIRASSEMBLER_PPC_HPP
+#define CPU_PPC_VM_C1_LIRASSEMBLER_PPC_HPP
+
+ private:
+
+  //////////////////////////////////////////////////////////////////////////////
+  // PPC64 load/store emission
+  //
+  // The PPC ld/st instructions cannot accomodate displacements > 16 bits long.
+  // The following "pseudo" instructions (load/store) make it easier to
+  // use the indexed addressing mode by allowing 32 bit displacements:
+  //
+
+  void explicit_null_check(Register addr, CodeEmitInfo* info);
+
+  int store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide, bool unaligned);
+  int store(LIR_Opr from_reg, Register base, Register disp, BasicType type, bool wide);
+
+  int load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide, bool unaligned);
+  int load(Register base, Register disp, LIR_Opr to_reg, BasicType type, bool wide);
+
+  int shift_amount(BasicType t);
+
+  // Record the type of the receiver in ReceiverTypeData.
+  void type_profile_helper(Register mdo, int mdo_offset_bias,
+                           ciMethodData *md, ciProfileData *data,
+                           Register recv, Register tmp1, Label* update_done);
+  // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot.
+  void setup_md_access(ciMethod* method, int bci,
+                       ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias);
+ public:
+  static const ConditionRegister BOOL_RESULT;
+
+  // Emit trampoline stub for call. Call bailout() if failed. Return true on success.
+  bool emit_trampoline_stub_for_call(address target, Register Rtoc = noreg);
+
+enum {
+  max_static_call_stub_size = 4 * BytesPerInstWord + MacroAssembler::b64_patchable_size,
+  call_stub_size = max_static_call_stub_size + MacroAssembler::trampoline_stub_size, // or smaller
+  exception_handler_size = MacroAssembler::b64_patchable_size, // or smaller
+  deopt_handler_size = MacroAssembler::bl64_patchable_size
+};
+
+#endif // CPU_PPC_VM_C1_LIRASSEMBLER_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp
--- a/hotspot/src/cpu/ppc/vm/c1_LinearScan_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c1_LinearScan_ppc.cpp
@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LinearScan.hpp"
+#include "utilities/bitMap.inline.hpp"
+
+void LinearScan::allocate_fpu_stack() {
+  Unimplemented();
+  // No FPU stack on PPC
+}
--- a/hotspot/src/cpu/ppc/vm/c1_LinearScan_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c1_LinearScan_ppc.hpp
@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_LINEARSCAN_PPC_HPP
+#define CPU_PPC_VM_C1_LINEARSCAN_PPC_HPP
+
+inline bool LinearScan::is_processed_reg_num(int reg_num) {
+  assert(FrameMap::R0_opr->cpu_regnr() == FrameMap::last_cpu_reg() + 1, "wrong assumption below");
+  assert(FrameMap::R1_opr->cpu_regnr() == FrameMap::last_cpu_reg() + 2, "wrong assumption below");
+  assert(FrameMap::R13_opr->cpu_regnr() == FrameMap::last_cpu_reg() + 3, "wrong assumption below");
+  assert(FrameMap::R16_opr->cpu_regnr() == FrameMap::last_cpu_reg() + 4, "wrong assumption below");
+  assert(FrameMap::R29_opr->cpu_regnr() == FrameMap::last_cpu_reg() + 5, "wrong assumption below");
+  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
+}
+
+inline int LinearScan::num_physical_regs(BasicType type) {
+  return 1;
+}
+
+
+inline bool LinearScan::requires_adjacent_regs(BasicType type) {
+  return false;
+}
+
+inline bool LinearScan::is_caller_save(int assigned_reg) {
+  return true; // assigned_reg < pd_first_callee_saved_reg;
+}
+
+
+inline void LinearScan::pd_add_temps(LIR_Op* op) {
+  // No special case behaviours yet
+}
+
+
+inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
+  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
+    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
+    _first_reg = pd_first_callee_saved_reg;
+    _last_reg = pd_last_callee_saved_reg;
+    ShouldNotReachHere(); // Currently no callee saved regs.
+    return true;
+  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT ||
+             cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
+    _first_reg = pd_first_cpu_reg;
+    _last_reg = pd_last_cpu_reg;
+    return true;
+  }
+  return false;
+}
+
+#endif // CPU_PPC_VM_C1_LINEARSCAN_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/c1_MacroAssembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c1_MacroAssembler_ppc.cpp
@ -0,0 +1,486 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "interpreter/interpreter.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/os.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+
+void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
+  const Register temp_reg = R12_scratch2;
+  verify_oop(receiver);
+  load_klass(temp_reg, receiver);
+  if (TrapBasedICMissChecks) {
+    trap_ic_miss_check(temp_reg, iCache);
+  } else {
+    Label L;
+    cmpd(CCR0, temp_reg, iCache);
+    beq(CCR0, L);
+    //load_const_optimized(temp_reg, SharedRuntime::get_ic_miss_stub(), R0);
+    calculate_address_from_global_toc(temp_reg, SharedRuntime::get_ic_miss_stub(), true, true, false);
+    mtctr(temp_reg);
+    bctr();
+    align(32, 12);
+    bind(L);
+  }
+}
+
+
+void C1_MacroAssembler::explicit_null_check(Register base) {
+  Unimplemented();
+}
+
+
+void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_bytes) {
+  assert(bang_size_in_bytes >= frame_size_in_bytes, "stack bang size incorrect");
+  // Make sure there is enough stack space for this method's activation.
+  generate_stack_overflow_check(bang_size_in_bytes);
+
+  // Create the frame.
+  const Register return_pc  = R0;
+
+  mflr(return_pc);
+  // Get callers sp.
+  std(return_pc, _abi(lr), R1_SP);           // SP->lr = return_pc
+  push_frame(frame_size_in_bytes, R0);       // SP -= frame_size_in_bytes
+}
+
+
+void C1_MacroAssembler::unverified_entry(Register receiver, Register ic_klass) {
+  Unimplemented(); // Currently unused.
+  //if (C1Breakpoint) illtrap();
+  //inline_cache_check(receiver, ic_klass);
+}
+
+
+void C1_MacroAssembler::verified_entry() {
+  if (C1Breakpoint) illtrap();
+  // build frame
+}
+
+
+void C1_MacroAssembler::lock_object(Register Rmark, Register Roop, Register Rbox, Register Rscratch, Label& slow_case) {
+  assert_different_registers(Rmark, Roop, Rbox, Rscratch);
+
+  Label done, cas_failed, slow_int;
+
+  // The following move must be the first instruction of emitted since debug
+  // information may be generated for it.
+  // Load object header.
+  ld(Rmark, oopDesc::mark_offset_in_bytes(), Roop);
+
+  verify_oop(Roop);
+
+  // Save object being locked into the BasicObjectLock...
+  std(Roop, BasicObjectLock::obj_offset_in_bytes(), Rbox);
+
+  if (UseBiasedLocking) {
+    biased_locking_enter(CCR0, Roop, Rmark, Rscratch, R0, done, &slow_int);
+  }
+
+  // ... and mark it unlocked.
+  ori(Rmark, Rmark, markOopDesc::unlocked_value);
+
+  // Save unlocked object header into the displaced header location on the stack.
+  std(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox);
+
+  // Compare object markOop with Rmark and if equal exchange Rscratch with object markOop.
+  assert(oopDesc::mark_offset_in_bytes() == 0, "cas must take a zero displacement");
+  cmpxchgd(/*flag=*/CCR0,
+           /*current_value=*/Rscratch,
+           /*compare_value=*/Rmark,
+           /*exchange_value=*/Rbox,
+           /*where=*/Roop/*+0==mark_offset_in_bytes*/,
+           MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+           MacroAssembler::cmpxchgx_hint_acquire_lock(),
+           noreg,
+           &cas_failed,
+           /*check without membar and ldarx first*/true);
+  // If compare/exchange succeeded we found an unlocked object and we now have locked it
+  // hence we are done.
+  b(done);
+
+  bind(slow_int);
+  b(slow_case); // far
+
+  bind(cas_failed);
+  // We did not find an unlocked object so see if this is a recursive case.
+  sub(Rscratch, Rscratch, R1_SP);
+  load_const_optimized(R0, (~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place));
+  and_(R0/*==0?*/, Rscratch, R0);
+  std(R0/*==0, perhaps*/, BasicLock::displaced_header_offset_in_bytes(), Rbox);
+  bne(CCR0, slow_int);
+
+  bind(done);
+}
+
+
+void C1_MacroAssembler::unlock_object(Register Rmark, Register Roop, Register Rbox, Label& slow_case) {
+  assert_different_registers(Rmark, Roop, Rbox);
+
+  Label slow_int, done;
+
+  Address mark_addr(Roop, oopDesc::mark_offset_in_bytes());
+  assert(mark_addr.disp() == 0, "cas must take a zero displacement");
+
+  if (UseBiasedLocking) {
+    // Load the object out of the BasicObjectLock.
+    ld(Roop, BasicObjectLock::obj_offset_in_bytes(), Rbox);
+    verify_oop(Roop);
+    biased_locking_exit(CCR0, Roop, R0, done);
+  }
+  // Test first it it is a fast recursive unlock.
+  ld(Rmark, BasicLock::displaced_header_offset_in_bytes(), Rbox);
+  cmpdi(CCR0, Rmark, 0);
+  beq(CCR0, done);
+  if (!UseBiasedLocking) {
+    // Load object.
+    ld(Roop, BasicObjectLock::obj_offset_in_bytes(), Rbox);
+    verify_oop(Roop);
+  }
+
+  // Check if it is still a light weight lock, this is is true if we see
+  // the stack address of the basicLock in the markOop of the object.
+  cmpxchgd(/*flag=*/CCR0,
+           /*current_value=*/R0,
+           /*compare_value=*/Rbox,
+           /*exchange_value=*/Rmark,
+           /*where=*/Roop,
+           MacroAssembler::MemBarRel,
+           MacroAssembler::cmpxchgx_hint_release_lock(),
+           noreg,
+           &slow_int);
+  b(done);
+  bind(slow_int);
+  b(slow_case); // far
+
+  // Done
+  bind(done);
+}
+
+
+void C1_MacroAssembler::try_allocate(
+  Register obj,                        // result: pointer to object after successful allocation
+  Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
+  int      con_size_in_bytes,          // object size in bytes if   known at compile time
+  Register t1,                         // temp register, must be global register for incr_allocated_bytes
+  Register t2,                         // temp register
+  Label&   slow_case                   // continuation point if fast allocation fails
+) {
+  if (UseTLAB) {
+    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
+  } else {
+    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
+    RegisterOrConstant size_in_bytes = var_size_in_bytes->is_valid()
+                                       ? RegisterOrConstant(var_size_in_bytes)
+                                       : RegisterOrConstant(con_size_in_bytes);
+    incr_allocated_bytes(size_in_bytes, t1, t2);
+  }
+}
+
+
+void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
+  assert_different_registers(obj, klass, len, t1, t2);
+  if (UseBiasedLocking && !len->is_valid()) {
+    ld(t1, in_bytes(Klass::prototype_header_offset()), klass);
+  } else {
+    load_const_optimized(t1, (intx)markOopDesc::prototype());
+  }
+  std(t1, oopDesc::mark_offset_in_bytes(), obj);
+  store_klass(obj, klass);
+  if (len->is_valid()) {
+    stw(len, arrayOopDesc::length_offset_in_bytes(), obj);
+  } else if (UseCompressedClassPointers) {
+    // Otherwise length is in the class gap.
+    store_klass_gap(obj);
+  }
+}
+
+
+void C1_MacroAssembler::initialize_body(Register base, Register index) {
+  assert_different_registers(base, index);
+  srdi(index, index, LogBytesPerWord);
+  clear_memory_doubleword(base, index);
+}
+
+void C1_MacroAssembler::initialize_body(Register obj, Register tmp1, Register tmp2,
+                                        int obj_size_in_bytes, int hdr_size_in_bytes) {
+  const int index = (obj_size_in_bytes - hdr_size_in_bytes) / HeapWordSize;
+
+  const int cl_size         = VM_Version::L1_data_cache_line_size(),
+            cl_dwords       = cl_size>>3,
+            cl_dw_addr_bits = exact_log2(cl_dwords);
+
+  const Register tmp = R0,
+                 base_ptr = tmp1,
+                 cnt_dwords = tmp2;
+
+  if (index <= 6) {
+    // Use explicit NULL stores.
+    if (index > 0) { li(tmp, 0); }
+    for (int i = 0; i < index; ++i) { std(tmp, hdr_size_in_bytes + i * HeapWordSize, obj); }
+
+  } else if (index < (2<<cl_dw_addr_bits)-1) {
+    // simple loop
+    Label loop;
+
+    li(cnt_dwords, index);
+    addi(base_ptr, obj, hdr_size_in_bytes); // Compute address of first element.
+    li(tmp, 0);
+    mtctr(cnt_dwords);                      // Load counter.
+  bind(loop);
+    std(tmp, 0, base_ptr);                  // Clear 8byte aligned block.
+    addi(base_ptr, base_ptr, 8);
+    bdnz(loop);
+
+  } else {
+    // like clear_memory_doubleword
+    Label startloop, fast, fastloop, restloop, done;
+
+    addi(base_ptr, obj, hdr_size_in_bytes);           // Compute address of first element.
+    load_const_optimized(cnt_dwords, index);
+    rldicl_(tmp, base_ptr, 64-3, 64-cl_dw_addr_bits); // Extract dword offset within first cache line.
+    beq(CCR0, fast);                                  // Already 128byte aligned.
+
+    subfic(tmp, tmp, cl_dwords);
+    mtctr(tmp);                        // Set ctr to hit 128byte boundary (0<ctr<cl_dwords).
+    subf(cnt_dwords, tmp, cnt_dwords); // rest.
+    li(tmp, 0);
+
+  bind(startloop);                     // Clear at the beginning to reach 128byte boundary.
+    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
+    addi(base_ptr, base_ptr, 8);
+    bdnz(startloop);
+
+  bind(fast);                                  // Clear 128byte blocks.
+    srdi(tmp, cnt_dwords, cl_dw_addr_bits);    // Loop count for 128byte loop (>0).
+    andi(cnt_dwords, cnt_dwords, cl_dwords-1); // Rest in dwords.
+    mtctr(tmp);                                // Load counter.
+
+  bind(fastloop);
+    dcbz(base_ptr);                    // Clear 128byte aligned block.
+    addi(base_ptr, base_ptr, cl_size);
+    bdnz(fastloop);
+
+    cmpdi(CCR0, cnt_dwords, 0);        // size 0?
+    beq(CCR0, done);                   // rest == 0
+    li(tmp, 0);
+    mtctr(cnt_dwords);                 // Load counter.
+
+  bind(restloop);                      // Clear rest.
+    std(tmp, 0, base_ptr);             // Clear 8byte aligned block.
+    addi(base_ptr, base_ptr, 8);
+    bdnz(restloop);
+
+  bind(done);
+  }
+}
+
+void C1_MacroAssembler::allocate_object(
+  Register obj,                        // result: pointer to object after successful allocation
+  Register t1,                         // temp register
+  Register t2,                         // temp register
+  Register t3,                         // temp register
+  int      hdr_size,                   // object header size in words
+  int      obj_size,                   // object size in words
+  Register klass,                      // object klass
+  Label&   slow_case                   // continuation point if fast allocation fails
+) {
+  assert_different_registers(obj, t1, t2, t3, klass);
+
+  // allocate space & initialize header
+  if (!is_simm16(obj_size * wordSize)) {
+    // Would need to use extra register to load
+    // object size => go the slow case for now.
+    b(slow_case);
+    return;
+  }
+  try_allocate(obj, noreg, obj_size * wordSize, t2, t3, slow_case);
+
+  initialize_object(obj, klass, noreg, obj_size * HeapWordSize, t1, t2);
+}
+
+void C1_MacroAssembler::initialize_object(
+  Register obj,                        // result: pointer to object after successful allocation
+  Register klass,                      // object klass
+  Register var_size_in_bytes,          // object size in bytes if unknown at compile time; invalid otherwise
+  int      con_size_in_bytes,          // object size in bytes if   known at compile time
+  Register t1,                         // temp register
+  Register t2                          // temp register
+  ) {
+  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
+
+  initialize_header(obj, klass, noreg, t1, t2);
+
+#ifdef ASSERT
+  {
+    lwz(t1, in_bytes(Klass::layout_helper_offset()), klass);
+    if (var_size_in_bytes != noreg) {
+      cmpw(CCR0, t1, var_size_in_bytes);
+    } else {
+      cmpwi(CCR0, t1, con_size_in_bytes);
+    }
+    asm_assert_eq("bad size in initialize_object", 0x753);
+  }
+#endif
+
+  // Initialize body.
+  if (var_size_in_bytes != noreg) {
+    // Use a loop.
+    addi(t1, obj, hdr_size_in_bytes);                // Compute address of first element.
+    addi(t2, var_size_in_bytes, -hdr_size_in_bytes); // Compute size of body.
+    initialize_body(t1, t2);
+  } else if (con_size_in_bytes > hdr_size_in_bytes) {
+    // Use a loop.
+    initialize_body(obj, t1, t2, con_size_in_bytes, hdr_size_in_bytes);
+  }
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    Unimplemented();
+//    assert(obj == O0, "must be");
+//    call(CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)),
+//         relocInfo::runtime_call_type);
+  }
+
+  verify_oop(obj);
+}
+
+
+void C1_MacroAssembler::allocate_array(
+  Register obj,                        // result: pointer to array after successful allocation
+  Register len,                        // array length
+  Register t1,                         // temp register
+  Register t2,                         // temp register
+  Register t3,                         // temp register
+  int      hdr_size,                   // object header size in words
+  int      elt_size,                   // element size in bytes
+  Register klass,                      // object klass
+  Label&   slow_case                   // continuation point if fast allocation fails
+) {
+  assert_different_registers(obj, len, t1, t2, t3, klass);
+
+  // Determine alignment mask.
+  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
+  int log2_elt_size = exact_log2(elt_size);
+
+  // Check for negative or excessive length.
+  size_t max_length = max_array_allocation_length >> log2_elt_size;
+  if (UseTLAB) {
+    size_t max_tlab = align_size_up(ThreadLocalAllocBuffer::max_size() >> log2_elt_size, 64*K);
+    if (max_tlab < max_length) { max_length = max_tlab; }
+  }
+  load_const_optimized(t1, max_length);
+  cmpld(CCR0, len, t1);
+  bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::greater), slow_case);
+
+  // compute array size
+  // note: If 0 <= len <= max_length, len*elt_size + header + alignment is
+  //       smaller or equal to the largest integer; also, since top is always
+  //       aligned, we can do the alignment here instead of at the end address
+  //       computation.
+  const Register arr_size = t1;
+  Register arr_len_in_bytes = len;
+  if (elt_size != 1) {
+    sldi(t1, len, log2_elt_size);
+    arr_len_in_bytes = t1;
+  }
+  addi(arr_size, arr_len_in_bytes, hdr_size * wordSize + MinObjAlignmentInBytesMask); // Add space for header & alignment.
+  clrrdi(arr_size, arr_size, LogMinObjAlignmentInBytes);                              // Align array size.
+
+  // Allocate space & initialize header.
+  if (UseTLAB) {
+    tlab_allocate(obj, arr_size, 0, t2, slow_case);
+  } else {
+    eden_allocate(obj, arr_size, 0, t2, t3, slow_case);
+  }
+  initialize_header(obj, klass, len, t2, t3);
+
+  // Initialize body.
+  const Register base  = t2;
+  const Register index = t3;
+  addi(base, obj, hdr_size * wordSize);               // compute address of first element
+  addi(index, arr_size, -(hdr_size * wordSize));      // compute index = number of bytes to clear
+  initialize_body(base, index);
+
+  if (CURRENT_ENV->dtrace_alloc_probes()) {
+    Unimplemented();
+    //assert(obj == O0, "must be");
+    //call(CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)),
+    //     relocInfo::runtime_call_type);
+  }
+
+  verify_oop(obj);
+}
+
+
+#ifndef PRODUCT
+
+void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
+  verify_oop_addr((RegisterOrConstant)(stack_offset + STACK_BIAS), R1_SP, "broken oop in stack slot");
+}
+
+void C1_MacroAssembler::verify_not_null_oop(Register r) {
+  Label not_null;
+  cmpdi(CCR0, r, 0);
+  bne(CCR0, not_null);
+  stop("non-null oop required");
+  bind(not_null);
+  if (!VerifyOops) return;
+  verify_oop(r);
+}
+
+#endif // PRODUCT
+
+void C1_MacroAssembler::null_check(Register r, Label* Lnull) {
+  if (TrapBasedNullChecks) { // SIGTRAP based
+    trap_null_check(r);
+  } else { // explicit
+    //const address exception_entry = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+    assert(Lnull != NULL, "must have Label for explicit check");
+    cmpdi(CCR0, r, 0);
+    bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::equal), *Lnull);
+  }
+}
+
+address C1_MacroAssembler::call_c_with_frame_resize(address dest, int frame_resize) {
+  if (frame_resize) { resize_frame(-frame_resize, R0); }
+#if defined(ABI_ELFv2)
+  address return_pc = call_c(dest, relocInfo::runtime_call_type);
+#else
+  address return_pc = call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, dest), relocInfo::runtime_call_type);
+#endif
+  if (frame_resize) { resize_frame(frame_resize, R0); }
+  return return_pc;
+}
--- a/hotspot/src/cpu/ppc/vm/c1_MacroAssembler_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c1_MacroAssembler_ppc.hpp
@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_MACROASSEMBLER_PPC_HPP
+#define CPU_PPC_VM_C1_MACROASSEMBLER_PPC_HPP
+
+  void pd_init() { /* nothing to do */ }
+
+ public:
+   void try_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+
+  void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
+  void initialize_body(Register base, Register index);
+  void initialize_body(Register obj, Register tmp1, Register tmp2, int obj_size_in_bytes, int hdr_size_in_bytes);
+
+  // locking/unlocking
+  void lock_object  (Register Rmark, Register Roop, Register Rbox, Register Rscratch, Label& slow_case);
+  void unlock_object(Register Rmark, Register Roop, Register Rbox,                    Label& slow_case);
+
+  void initialize_object(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register klass,                    // object klass
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2                        // temp register
+  );
+
+  // Allocation of fixed-size objects
+  // (Can also be used to allocate fixed-size arrays, by setting
+  // hdr_size correctly and storing the array length afterwards.)
+  void allocate_object(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Register t3,                       // temp register
+    int      hdr_size,                 // object header size in words
+    int      obj_size,                 // object size in words
+    Register klass,                    // object klass
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+
+  enum {
+    max_array_allocation_length = 0x40000000 // ppc friendly value, requires lis only
+  };
+
+  // Allocation of arrays
+  void allocate_array(
+    Register obj,                      // result: pointer to array after successful allocation
+    Register len,                      // array length
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Register t3,                       // temp register
+    int      hdr_size,                 // object header size in words
+    int      elt_size,                 // element size in bytes
+    Register klass,                    // object klass
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+
+  void null_check(Register r, Label *Lnull = NULL);
+
+  address call_c_with_frame_resize(address dest, int frame_resize);
+
+#endif // CPU_PPC_VM_C1_MACROASSEMBLER_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/c1_Runtime1_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c1_Runtime1_ppc.cpp
--- a/hotspot/src/cpu/ppc/vm/c1_globals_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c1_globals_ppc.hpp
@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_GLOBALS_PPC_HPP
+#define CPU_PPC_VM_C1_GLOBALS_PPC_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the client compiler.
+// (see c1_globals.hpp)
+
+#ifndef TIERED
+define_pd_global(bool, BackgroundCompilation,        true );
+define_pd_global(bool, CICompileOSR,                 true );
+define_pd_global(bool, InlineIntrinsics,             true );
+define_pd_global(bool, PreferInterpreterNativeStubs, false);
+define_pd_global(bool, ProfileTraps,                 false);
+define_pd_global(bool, UseOnStackReplacement,        true );
+define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(intx, CompileThreshold,             1000 );
+
+define_pd_global(intx, OnStackReplacePercentage,     1400 );
+define_pd_global(bool, UseTLAB,                      true );
+define_pd_global(bool, ProfileInterpreter,           false);
+define_pd_global(intx, FreqInlineSize,               325  );
+define_pd_global(bool, ResizeTLAB,                   true );
+define_pd_global(intx, ReservedCodeCacheSize,        32*M );
+define_pd_global(intx, CodeCacheExpansionSize,       32*K );
+define_pd_global(uintx,CodeCacheMinBlockLength,      1);
+define_pd_global(uintx,MetaspaceSize,                12*M );
+define_pd_global(bool, NeverActAsServerClassMachine, true );
+define_pd_global(intx, NewSizeThreadIncrease,        16*K );
+define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
+define_pd_global(intx, InitialCodeCacheSize,         160*K);
+#endif // !TIERED
+
+define_pd_global(bool, UseTypeProfile,               false);
+define_pd_global(bool, RoundFPResults,               false);
+
+define_pd_global(bool, LIRFillDelaySlots,            false);
+define_pd_global(bool, OptimizeSinglePrecision,      false);
+define_pd_global(bool, CSEArrayLength,               true );
+define_pd_global(bool, TwoOperandLIRForm,            false);
+
+#endif // CPU_PPC_VM_C1_GLOBALS_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp
@ -39,7 +39,7 @@ define_pd_global(bool, PreferInterpreterNativeStubs, false);
 define_pd_global(bool, ProfileTraps,                 true);
 define_pd_global(bool, UseOnStackReplacement,        true);
 define_pd_global(bool, ProfileInterpreter,           true);
-define_pd_global(bool, TieredCompilation,            false);
+define_pd_global(bool, TieredCompilation,            true);
 define_pd_global(intx, CompileThreshold,             10000);

 define_pd_global(intx, OnStackReplacePercentage,     140);
--- a/hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -45,4 +45,8 @@ void Compile::pd_compiler2_init() {
      FLAG_SET_ERGO(bool, InsertEndGroupPPC64, true);
    }
  }
+
+  if (!VM_Version::has_isel() && FLAG_IS_DEFAULT(ConditionalMoveLimit)) {
+    FLAG_SET_ERGO(intx, ConditionalMoveLimit, 0);
+  }
 }
--- a/hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/compiledIC_ppc.cpp
@ -1,5 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -129,13 +130,20 @@ address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark/*
  // - call
  __ calculate_address_from_global_toc(reg_scratch, __ method_toc());
  AddressLiteral ic = __ allocate_metadata_address((Metadata *)NULL);
-  __ load_const_from_method_toc(as_Register(Matcher::inline_cache_reg_encode()), ic, reg_scratch);
+  bool success = __ load_const_from_method_toc(as_Register(Matcher::inline_cache_reg_encode()),
+                                               ic, reg_scratch, /*fixed_size*/ true);
+  if (!success) {
+    return NULL; // CodeCache is full
+  }

  if (ReoptimizeCallSequences) {
    __ b64_patchable((address)-1, relocInfo::none);
  } else {
    AddressLiteral a((address)-1);
-    __ load_const_from_method_toc(reg_scratch, a, reg_scratch);
+    success = __ load_const_from_method_toc(reg_scratch, a, reg_scratch, /*fixed_size*/ true);
+    if (!success) {
+      return NULL; // CodeCache is full
+    }
    __ mtctr(reg_scratch);
    __ bctr();
  }
@ -153,6 +161,7 @@ address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark/*
  return stub;
 #else
  ShouldNotReachHere();
+  return NULL;
 #endif
 }
 #undef __
--- a/hotspot/src/cpu/ppc/vm/frame_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/frame_ppc.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -271,39 +271,6 @@ void frame::describe_pd(FrameValues& values, int frame_no) {
 }
 #endif

-void frame::adjust_unextended_sp() {
-  // If we are returning to a compiled MethodHandle call site, the
-  // saved_fp will in fact be a saved value of the unextended SP. The
-  // simplest way to tell whether we are returning to such a call site
-  // is as follows:
-
-  if (is_compiled_frame() && false /*is_at_mh_callsite()*/) {  // TODO PPC port
-    // If the sender PC is a deoptimization point, get the original
-    // PC. For MethodHandle call site the unextended_sp is stored in
-    // saved_fp.
-    _unextended_sp = _fp - _cb->frame_size();
-
-#ifdef ASSERT
-    nmethod *sender_nm = _cb->as_nmethod_or_null();
-    assert(sender_nm && *_sp == *_unextended_sp, "backlink changed");
-
-    intptr_t* sp = _unextended_sp;  // check if stack can be walked from here
-    for (int x = 0; x < 5; ++x) {   // check up to a couple of backlinks
-      intptr_t* prev_sp = *(intptr_t**)sp;
-      if (prev_sp == 0) break;      // end of stack
-      assert(prev_sp>sp, "broken stack");
-      sp = prev_sp;
-    }
-
-    if (sender_nm->is_deopt_mh_entry(_pc)) { // checks for deoptimization
-      address original_pc = sender_nm->get_original_pc(this);
-      assert(sender_nm->insts_contains(original_pc), "original PC must be in nmethod");
-      assert(sender_nm->is_method_handle_return(original_pc), "must be");
-    }
-#endif
-  }
-}
-
 intptr_t *frame::initial_deoptimization_info() {
  // unused... but returns fp() to minimize changes introduced by 7087445
  return fp();
--- a/hotspot/src/cpu/ppc/vm/frame_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/frame_ppc.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -465,7 +465,6 @@
  // The frame's stack pointer before it has been extended by a c2i adapter;
  // needed by deoptimization
  intptr_t* _unextended_sp;
-  void adjust_unextended_sp();

 public:

--- a/hotspot/src/cpu/ppc/vm/frame_ppc.inline.hpp
+++ b/hotspot/src/cpu/ppc/vm/frame_ppc.inline.hpp
@ -39,9 +39,6 @@ inline void frame::find_codeblob_and_set_pc_and_deopt_state(address pc) {
  _pc = pc;   // Must be set for get_deopt_original_pc()

  _fp = (intptr_t*)own_abi()->callers_sp;
-  // Use _fp - frame_size, needs to be done between _cb and _pc initialization
-  // and get_deopt_original_pc.
-  adjust_unextended_sp();

  address original_pc = nmethod::get_deopt_original_pc(this);
  if (original_pc != NULL) {
--- a/hotspot/src/cpu/ppc/vm/globalDefinitions_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/globalDefinitions_ppc.hpp
@ -36,4 +36,7 @@ const int StackAlignmentInBytes = 16;
 // The PPC CPUs are NOT multiple-copy-atomic.
 #define CPU_NOT_MULTIPLE_COPY_ATOMIC

+// The expected size in bytes of a cache line, used to pad data structures.
+#define DEFAULT_CACHE_LINE_SIZE 128
+
 #endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp
@ -93,9 +93,9 @@ void InterpreterMacroAssembler::dispatch_prolog(TosState state, int bcp_incr) {
 // own dispatch. The dispatch address in R24_dispatch_addr is used for the
 // dispatch.
 void InterpreterMacroAssembler::dispatch_epilog(TosState state, int bcp_incr) {
+  if (bcp_incr) { addi(R14_bcp, R14_bcp, bcp_incr); }
  mtctr(R24_dispatch_addr);
-  addi(R14_bcp, R14_bcp, bcp_incr);
-  bctr();
+  bcctr(bcondAlways, 0, bhintbhBCCTRisNotPredictable);
 }

 void InterpreterMacroAssembler::check_and_handle_popframe(Register scratch_reg) {
@ -212,9 +212,6 @@ void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, Register byt
    unimplemented("dispatch_Lbyte_code: verify"); // See Sparc Implementation to implement this
  }

-#ifdef FAST_DISPATCH
-  unimplemented("dispatch_Lbyte_code FAST_DISPATCH");
-#else
  assert_different_registers(bytecode, R11_scratch1);

  // Calc dispatch table address.
@ -225,8 +222,7 @@ void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, Register byt

  // Jump off!
  mtctr(R11_scratch1);
-  bctr();
-#endif
+  bcctr(bcondAlways, 0, bhintbhBCCTRisNotPredictable);
 }

 void InterpreterMacroAssembler::load_receiver(Register Rparam_count, Register Rrecv_dst) {
@ -546,8 +542,8 @@ void InterpreterMacroAssembler::index_check_without_pop(Register Rarray, Registe
  sldi(RsxtIndex, RsxtIndex, index_shift);
  blt(CCR0, LnotOOR);
  // Index should be in R17_tos, array should be in R4_ARG2.
-  mr(R17_tos, Rindex);
-  mr(R4_ARG2, Rarray);
+  mr_if_needed(R17_tos, Rindex);
+  mr_if_needed(R4_ARG2, Rarray);
  load_dispatch_table(Rtmp, (address*)Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
  mtctr(Rtmp);
  bctr();
@ -842,7 +838,6 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {

    // Must fence, otherwise, preceding store(s) may float below cmpxchg.
    // CmpxchgX sets CCR0 to cmpX(current, displaced).
-    fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
    cmpxchgd(/*flag=*/CCR0,
             /*current_value=*/current_header,
             /*compare_value=*/displaced_header, /*exchange_value=*/monitor,
@ -850,7 +845,8 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
             MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
             MacroAssembler::cmpxchgx_hint_acquire_lock(),
             noreg,
-             &cas_failed);
+             &cas_failed,
+             /*check without membar and ldarx first*/true);

    // If the compare-and-exchange succeeded, then we found an unlocked
    // object and we have now locked it.
@ -868,9 +864,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
    sub(current_header, current_header, R1_SP);

    assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
-    load_const_optimized(tmp,
-                         (address) (~(os::vm_page_size()-1) |
-                                    markOopDesc::lock_mask_in_place));
+    load_const_optimized(tmp, ~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place);

    and_(R0/*==0?*/, current_header, tmp);
    // If condition is true we are done and hence we can store 0 in the displaced
@ -1107,6 +1101,7 @@ void InterpreterMacroAssembler::verify_method_data_pointer() {
 }

 void InterpreterMacroAssembler::test_invocation_counter_for_mdp(Register invocation_count,
+                                                                Register method_counters,
                                                                Register Rscratch,
                                                                Label &profile_continue) {
  assert(ProfileInterpreter, "must be profiling interpreter");
@ -1115,12 +1110,11 @@ void InterpreterMacroAssembler::test_invocation_counter_for_mdp(Register invocat
  Label done;

  // If no method data exists, and the counter is high enough, make one.
-  int ipl_offs = load_const_optimized(Rscratch, &InvocationCounter::InterpreterProfileLimit, R0, true);
-  lwz(Rscratch, ipl_offs, Rscratch);
+  lwz(Rscratch, in_bytes(MethodCounters::interpreter_profile_limit_offset()), method_counters);

  cmpdi(CCR0, R28_mdx, 0);
  // Test to see if we should create a method data oop.
-  cmpd(CCR1, Rscratch /* InterpreterProfileLimit */, invocation_count);
+  cmpd(CCR1, Rscratch, invocation_count);
  bne(CCR0, done);
  bge(CCR1, profile_continue);

@ -1133,15 +1127,15 @@ void InterpreterMacroAssembler::test_invocation_counter_for_mdp(Register invocat
  bind(done);
 }

-void InterpreterMacroAssembler::test_backedge_count_for_osr(Register backedge_count, Register branch_bcp, Register Rtmp) {
-  assert_different_registers(backedge_count, Rtmp, branch_bcp);
+void InterpreterMacroAssembler::test_backedge_count_for_osr(Register backedge_count, Register method_counters,
+                                                            Register target_bcp, Register disp, Register Rtmp) {
+  assert_different_registers(backedge_count, target_bcp, disp, Rtmp, R4_ARG2);
  assert(UseOnStackReplacement,"Must UseOnStackReplacement to test_backedge_count_for_osr");

  Label did_not_overflow;
  Label overflow_with_error;

-  int ibbl_offs = load_const_optimized(Rtmp, &InvocationCounter::InterpreterBackwardBranchLimit, R0, true);
-  lwz(Rtmp, ibbl_offs, Rtmp);
+  lwz(Rtmp, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset()), method_counters);
  cmpw(CCR0, backedge_count, Rtmp);

  blt(CCR0, did_not_overflow);
@ -1153,17 +1147,15 @@ void InterpreterMacroAssembler::test_backedge_count_for_osr(Register backedge_co
  // the overflow function is called only once every overflow_frequency.
  if (ProfileInterpreter) {
    const int overflow_frequency = 1024;
-    li(Rtmp, overflow_frequency-1);
-    andr(Rtmp, Rtmp, backedge_count);
-    cmpwi(CCR0, Rtmp, 0);
+    andi_(Rtmp, backedge_count, overflow_frequency-1);
    bne(CCR0, did_not_overflow);
  }

  // Overflow in loop, pass branch bytecode.
-  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), branch_bcp, true);
+  subf(R4_ARG2, disp, target_bcp); // Compute branch bytecode (previous bcp).
+  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, true);

  // Was an OSR adapter generated?
-  // O0 = osr nmethod
  cmpdi(CCR0, R3_RET, 0);
  beq(CCR0, overflow_with_error);

@ -1324,7 +1316,7 @@ void InterpreterMacroAssembler::increment_backedge_counter(const Register Rcount
  assert_different_registers(Rdst, Rtmp1);
  const Register invocation_counter = Rtmp1;
  const Register counter = Rdst;
-  // TODO ppc port assert(4 == InvocationCounter::sz_counter(), "unexpected field size.");
+  // TODO: PPC port: assert(4 == InvocationCounter::sz_counter(), "unexpected field size.");

  // Load backedge counter.
  lwz(counter, in_bytes(MethodCounters::backedge_counter_offset()) +
@ -1337,8 +1329,7 @@ void InterpreterMacroAssembler::increment_backedge_counter(const Register Rcount
  addi(counter, counter, InvocationCounter::count_increment);

  // Mask the invocation counter.
-  li(Rscratch, InvocationCounter::count_mask_value);
-  andr(invocation_counter, invocation_counter, Rscratch);
+  andi(invocation_counter, invocation_counter, InvocationCounter::count_mask_value);

  // Store new counter value.
  stw(counter, in_bytes(MethodCounters::backedge_counter_offset()) +
@ -1817,15 +1808,13 @@ void InterpreterMacroAssembler::profile_return_type(Register ret, Register tmp1,
    test_method_data_pointer(profile_continue);

    if (MethodData::profile_return_jsr292_only()) {
-      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
-
      // If we don't profile all invoke bytecodes we must make sure
      // it's a bytecode we indeed profile. We can't go back to the
      // begining of the ProfileData we intend to update to check its
      // type because we're right after it and we don't known its
      // length.
      lbz(tmp1, 0, R14_bcp);
-      lhz(tmp2, Method::intrinsic_id_offset_in_bytes(), R19_method);
+      lbz(tmp2, Method::intrinsic_id_offset_in_bytes(), R19_method);
      cmpwi(CCR0, tmp1, Bytecodes::_invokedynamic);
      cmpwi(CCR1, tmp1, Bytecodes::_invokehandle);
      cror(CCR0, Assembler::equal, CCR1, Assembler::equal);
@ -2207,9 +2196,7 @@ void InterpreterMacroAssembler::increment_invocation_counter(Register Rcounters,
  // Load the backedge counter.
  lwz(backedge_count, be_counter_offset, Rcounters); // is unsigned int
  // Mask the backedge counter.
-  Register tmp = invocation_count;
-  li(tmp, InvocationCounter::count_mask_value);
-  andr(backedge_count, tmp, backedge_count); // Cannot use andi, need sign extension of count_mask_value.
+  andi(backedge_count, backedge_count, InvocationCounter::count_mask_value);

  // Load the invocation counter.
  lwz(invocation_count, inv_counter_offset, Rcounters); // is unsigned int
@ -2266,7 +2253,7 @@ void InterpreterMacroAssembler::verify_oop_or_return_address(Register reg, Regis
  bne(CCR0, test);

  address fd = CAST_FROM_FN_PTR(address, verify_return_address);
-  const int nbytes_save = 11*8; // volatile gprs except R0
+  const int nbytes_save = MacroAssembler::num_volatile_regs * 8;
  save_volatile_gprs(R1_SP, -nbytes_save); // except R0
  save_LR_CR(Rtmp); // Save in old frame.
  push_frame_reg_args(nbytes_save, Rtmp);
--- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
+++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp
@ -203,7 +203,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void restore_interpreter_state(Register scratch, bool bcp_and_mdx_only = false);

  void increment_backedge_counter(const Register Rcounters, Register Rtmp, Register Rtmp2, Register Rscratch);
-  void test_backedge_count_for_osr(Register backedge_count, Register branch_bcp, Register Rtmp);
+  void test_backedge_count_for_osr(Register backedge_count, Register method_counters, Register target_bcp, Register disp, Register Rtmp);

  void record_static_call_in_profile(Register Rentry, Register Rtmp);
  void record_receiver_call_in_profile(Register Rklass, Register Rentry, Register Rtmp);
@ -222,7 +222,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
  void set_method_data_pointer_for_bcp();
  void test_method_data_pointer(Label& zero_continue);
  void verify_method_data_pointer();
-  void test_invocation_counter_for_mdp(Register invocation_count, Register Rscratch, Label &profile_continue);
+  void test_invocation_counter_for_mdp(Register invocation_count, Register method_counters, Register Rscratch, Label &profile_continue);

  void set_mdp_data_at(int constant, Register value);

--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
@ -30,6 +30,7 @@
 #include "gc/shared/collectedHeap.inline.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/resourceArea.hpp"
+#include "nativeInst_ppc.hpp"
 #include "prims/methodHandles.hpp"
 #include "runtime/biasedLocking.hpp"
 #include "runtime/icache.hpp"
@ -114,7 +115,7 @@ void MacroAssembler::calculate_address_from_global_toc(Register dst, address add
  }

  if (hi16) {
-    addis(dst, R29, MacroAssembler::largeoffset_si16_si16_hi(offset));
+    addis(dst, R29_TOC, MacroAssembler::largeoffset_si16_si16_hi(offset));
  }
  if (lo16) {
    if (add_relocation) {
@ -256,7 +257,9 @@ narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
 }
 #endif // _LP64

-void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) {
+// Returns true if successful.
+bool MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a,
+                                                Register toc, bool fixed_size) {
  int toc_offset = 0;
  // Use RelocationHolder::none for the constant pool entry, otherwise
  // we will end up with a failing NativeCall::verify(x) where x is
@ -264,11 +267,13 @@ void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a,
  // FIXME: We should insert relocation information for oops at the constant
  // pool entries instead of inserting it at the loads; patching of a constant
  // pool entry should be less expensive.
-  address oop_address = address_constant((address)a.value(), RelocationHolder::none);
+  address const_address = address_constant((address)a.value(), RelocationHolder::none);
+  if (const_address == NULL) { return false; } // allocation failure
  // Relocate at the pc of the load.
  relocate(a.rspec());
-  toc_offset = (int)(oop_address - code()->consts()->start());
-  ld_largeoffset_unchecked(dst, toc_offset, toc, true);
+  toc_offset = (int)(const_address - code()->consts()->start());
+  ld_largeoffset_unchecked(dst, toc_offset, toc, fixed_size);
+  return true;
 }

 bool MacroAssembler::is_load_const_from_method_toc_at(address a) {
@ -446,6 +451,15 @@ void MacroAssembler::bc_far(int boint, int biint, Label& dest, int optimize) {
  assert(dest.is_bound() || target_pc == b_pc, "postcondition");
 }

+// 1 or 2 instructions
+void MacroAssembler::bc_far_optimized(int boint, int biint, Label& dest) {
+  if (dest.is_bound() && is_within_range_of_bcxx(target(dest), pc())) {
+    bc(boint, biint, dest);
+  } else {
+    bc_far(boint, biint, dest, MacroAssembler::bc_far_optimize_on_relocate);
+  }
+}
+
 bool MacroAssembler::is_bc_far_at(address instruction_addr) {
  return is_bc_far_variant1_at(instruction_addr) ||
         is_bc_far_variant2_at(instruction_addr) ||
@ -496,7 +510,7 @@ void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address des
      // variant 1, the 1st instruction contains the destination address:
      //
      //    bcxx  DEST
-      //    endgroup
+      //    nop
      //
      const int instruction_1 = *(int*)(instruction_addr);
      boint = inv_bo_field(instruction_1);
@ -523,10 +537,10 @@ void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address des
      // variant 1:
      //
      //    bcxx  DEST
-      //    endgroup
+      //    nop
      //
      masm.bc(boint, biint, dest);
-      masm.endgroup();
+      masm.nop();
    } else {
      // variant 2:
      //
@ -810,7 +824,22 @@ void MacroAssembler::save_volatile_gprs(Register dst, int offset) {
  std(R9,  offset, dst);   offset += 8;
  std(R10, offset, dst);   offset += 8;
  std(R11, offset, dst);   offset += 8;
-  std(R12, offset, dst);
+  std(R12, offset, dst);   offset += 8;
+
+  stfd(F0, offset, dst);   offset += 8;
+  stfd(F1, offset, dst);   offset += 8;
+  stfd(F2, offset, dst);   offset += 8;
+  stfd(F3, offset, dst);   offset += 8;
+  stfd(F4, offset, dst);   offset += 8;
+  stfd(F5, offset, dst);   offset += 8;
+  stfd(F6, offset, dst);   offset += 8;
+  stfd(F7, offset, dst);   offset += 8;
+  stfd(F8, offset, dst);   offset += 8;
+  stfd(F9, offset, dst);   offset += 8;
+  stfd(F10, offset, dst);  offset += 8;
+  stfd(F11, offset, dst);  offset += 8;
+  stfd(F12, offset, dst);  offset += 8;
+  stfd(F13, offset, dst);
 }

 // For verify_oops.
@ -825,7 +854,22 @@ void MacroAssembler::restore_volatile_gprs(Register src, int offset) {
  ld(R9,  offset, src);   offset += 8;
  ld(R10, offset, src);   offset += 8;
  ld(R11, offset, src);   offset += 8;
-  ld(R12, offset, src);
+  ld(R12, offset, src);   offset += 8;
+
+  lfd(F0, offset, src);   offset += 8;
+  lfd(F1, offset, src);   offset += 8;
+  lfd(F2, offset, src);   offset += 8;
+  lfd(F3, offset, src);   offset += 8;
+  lfd(F4, offset, src);   offset += 8;
+  lfd(F5, offset, src);   offset += 8;
+  lfd(F6, offset, src);   offset += 8;
+  lfd(F7, offset, src);   offset += 8;
+  lfd(F8, offset, src);   offset += 8;
+  lfd(F9, offset, src);   offset += 8;
+  lfd(F10, offset, src);  offset += 8;
+  lfd(F11, offset, src);  offset += 8;
+  lfd(F12, offset, src);  offset += 8;
+  lfd(F13, offset, src);
 }

 void MacroAssembler::save_LR_CR(Register tmp) {
@ -908,7 +952,7 @@ void MacroAssembler::push_frame(unsigned int bytes, Register tmp) {
  if (is_simm(-offset, 16)) {
    stdu(R1_SP, -offset, R1_SP);
  } else {
-    load_const(tmp, -offset);
+    load_const_optimized(tmp, -offset);
    stdux(R1_SP, R1_SP, tmp);
  }
 }
@ -1090,20 +1134,21 @@ address MacroAssembler::call_c_using_toc(const FunctionDescriptor* fd,
    assert(fd->entry() != NULL, "function must be linked");

    AddressLiteral fd_entry(fd->entry());
-    load_const_from_method_toc(R11, fd_entry, toc);
+    bool success = load_const_from_method_toc(R11, fd_entry, toc, /*fixed_size*/ true);
    mtctr(R11);
    if (fd->env() == NULL) {
      li(R11, 0);
      nop();
    } else {
      AddressLiteral fd_env(fd->env());
-      load_const_from_method_toc(R11, fd_env, toc);
+      success = success && load_const_from_method_toc(R11, fd_env, toc, /*fixed_size*/ true);
    }
    AddressLiteral fd_toc(fd->toc());
-    load_toc_from_toc(R2_TOC, fd_toc, toc);
-    // R2_TOC is killed.
+    // Set R2_TOC (load from toc)
+    success = success && load_const_from_method_toc(R2_TOC, fd_toc, toc, /*fixed_size*/ true);
    bctrl();
    _last_calls_return_pc = pc();
+    if (!success) { return NULL; }
  } else {
    // It's a friend function, load the entry point and don't care about
    // toc and env. Use an optimizable call instruction, but ensure the
@ -1367,11 +1412,6 @@ void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_valu
  bool preset_result_reg = (int_flag_success != dest_current_value && int_flag_success != compare_value &&
                            int_flag_success != exchange_value && int_flag_success != addr_base);

-  // release/fence semantics
-  if (semantics & MemBarRel) {
-    release();
-  }
-
  if (use_result_reg && preset_result_reg) {
    li(int_flag_success, 0); // preset (assume cas failed)
  }
@ -1383,6 +1423,11 @@ void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_valu
    bne(flag, failed);
  }

+  // release/fence semantics
+  if (semantics & MemBarRel) {
+    release();
+  }
+
  // atomic emulation loop
  bind(retry);

@ -1462,11 +1507,6 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag,
                            int_flag_success!=exchange_value && int_flag_success!=addr_base);
  assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");

-  // release/fence semantics
-  if (semantics & MemBarRel) {
-    release();
-  }
-
  if (use_result_reg && preset_result_reg) {
    li(int_flag_success, 0); // preset (assume cas failed)
  }
@ -1478,6 +1518,11 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag,
    bne(flag, failed);
  }

+  // release/fence semantics
+  if (semantics & MemBarRel) {
+    release();
+  }
+
  // atomic emulation loop
  bind(retry);

@ -1501,8 +1546,6 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag,
    li(int_flag_success, 1);
  }

-  // POWER6 doesn't need isync in CAS.
-  // Always emit isync to be on the safe side.
  if (semantics & MemBarFenceAfter) {
    fence();
  } else if (semantics & MemBarAcq) {
@ -1627,13 +1670,14 @@ void MacroAssembler::lookup_virtual_method(Register recv_klass,
 }

 /////////////////////////////////////////// subtype checking ////////////////////////////////////////////
-
 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
                                                   Register super_klass,
                                                   Register temp1_reg,
                                                   Register temp2_reg,
-                                                   Label& L_success,
-                                                   Label& L_failure) {
+                                                   Label* L_success,
+                                                   Label* L_failure,
+                                                   Label* L_slow_path,
+                                                   RegisterOrConstant super_check_offset) {

  const Register check_cache_offset = temp1_reg;
  const Register cached_super       = temp2_reg;
@ -1643,6 +1687,18 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  int sco_offset = in_bytes(Klass::super_check_offset_offset());
  int sc_offset  = in_bytes(Klass::secondary_super_cache_offset());

+  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
+  bool need_slow_path = (must_load_sco || super_check_offset.constant_or_zero() == sco_offset);
+
+  Label L_fallthrough;
+  int label_nulls = 0;
+  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
+  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
+  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
+  assert(label_nulls <= 1 ||
+         (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
+         "at most one NULL in the batch, usually");
+
  // If the pointers are equal, we are done (e.g., String[] elements).
  // This self-check enables sharing of secondary supertype arrays among
  // non-primary types such as array-of-interface. Otherwise, each such
@ -1651,15 +1707,20 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  // type checks are in fact trivially successful in this manner,
  // so we get a nicely predicted branch right at the start of the check.
  cmpd(CCR0, sub_klass, super_klass);
-  beq(CCR0, L_success);
+  beq(CCR0, *L_success);

  // Check the supertype display:
+  if (must_load_sco) {
+    // The super check offset is always positive...
  lwz(check_cache_offset, sco_offset, super_klass);
+    super_check_offset = RegisterOrConstant(check_cache_offset);
+    // super_check_offset is register.
+    assert_different_registers(sub_klass, super_klass, cached_super, super_check_offset.as_register());
+  }
  // The loaded value is the offset from KlassOopDesc.

-  ldx(cached_super, check_cache_offset, sub_klass);
+  ld(cached_super, super_check_offset, sub_klass);
  cmpd(CCR0, cached_super, super_klass);
-  beq(CCR0, L_success);

  // This check has worked decisively for primary supers.
  // Secondary supers are sought in the super_cache ('super_cache_addr').
@ -1672,9 +1733,39 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
  // So if it was a primary super, we can just fail immediately.
  // Otherwise, it's the slow path for us (no success at this point).

-  cmpwi(CCR0, check_cache_offset, sc_offset);
-  bne(CCR0, L_failure);
-  // bind(slow_path); // fallthru
+#define FINAL_JUMP(label) if (&(label) != &L_fallthrough) { b(label); }
+
+  if (super_check_offset.is_register()) {
+    beq(CCR0, *L_success);
+    cmpwi(CCR0, super_check_offset.as_register(), sc_offset);
+    if (L_failure == &L_fallthrough) {
+      beq(CCR0, *L_slow_path);
+    } else {
+      bne(CCR0, *L_failure);
+      FINAL_JUMP(*L_slow_path);
+    }
+  } else {
+    if (super_check_offset.as_constant() == sc_offset) {
+      // Need a slow path; fast failure is impossible.
+      if (L_slow_path == &L_fallthrough) {
+        beq(CCR0, *L_success);
+      } else {
+        bne(CCR0, *L_slow_path);
+        FINAL_JUMP(*L_success);
+      }
+    } else {
+      // No slow path; it's a fast decision.
+      if (L_failure == &L_fallthrough) {
+        beq(CCR0, *L_success);
+      } else {
+        bne(CCR0, *L_failure);
+        FINAL_JUMP(*L_success);
+      }
+    }
+  }
+
+  bind(L_fallthrough);
+#undef FINAL_JUMP
 }

 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
@ -1698,7 +1789,7 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,

  ld(array_ptr, source_offset, sub_klass);

-  //assert(4 == arrayOopDesc::length_length_in_bytes(), "precondition violated.");
+  // TODO: PPC port: assert(4 == arrayOopDesc::length_length_in_bytes(), "precondition violated.");
  lwz(temp, length_offset, array_ptr);
  cmpwi(CCR0, temp, 0);
  beq(CCR0, result_reg!=noreg ? failure : fallthru); // length 0
@ -1719,8 +1810,9 @@ void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,

  bind(hit);
  std(super_klass, target_offset, sub_klass); // save result to cache
-  if (result_reg != noreg) li(result_reg, 0); // load zero result (indicates a hit)
-  if (L_success != NULL) b(*L_success);
+  if (result_reg != noreg) { li(result_reg, 0); } // load zero result (indicates a hit)
+  if (L_success != NULL) { b(*L_success); }
+  else if (result_reg == noreg) { blr(); } // return with CR0.eq if neither label nor result reg provided

  bind(fallthru);
 }
@ -1732,7 +1824,7 @@ void MacroAssembler::check_klass_subtype(Register sub_klass,
                         Register temp2_reg,
                         Label& L_success) {
  Label L_failure;
-  check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, temp2_reg, L_success, L_failure);
+  check_klass_subtype_fast_path(sub_klass, super_klass, temp1_reg, temp2_reg, &L_success, &L_failure);
  check_klass_subtype_slow_path(sub_klass, super_klass, temp1_reg, temp2_reg, &L_success);
  bind(L_failure); // Fallthru if not successful.
 }
@ -1765,6 +1857,7 @@ RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot,
  }
 }

+// Supports temp2_reg = R0.
 void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj_reg,
                                          Register mark_reg, Register temp_reg,
                                          Register temp2_reg, Label& done, Label* slow_case) {
@ -1788,10 +1881,10 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
         "biased locking makes assumptions about bit layout");

  if (PrintBiasedLockingStatistics) {
-    load_const(temp_reg, (address) BiasedLocking::total_entry_count_addr(), temp2_reg);
-    lwz(temp2_reg, 0, temp_reg);
-    addi(temp2_reg, temp2_reg, 1);
-    stw(temp2_reg, 0, temp_reg);
+    load_const(temp2_reg, (address) BiasedLocking::total_entry_count_addr(), temp_reg);
+    lwzx(temp_reg, temp2_reg);
+    addi(temp_reg, temp_reg, 1);
+    stwx(temp_reg, temp2_reg);
  }

  andi(temp_reg, mark_reg, markOopDesc::biased_lock_mask_in_place);
@ -1809,10 +1902,10 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
  if (PrintBiasedLockingStatistics) {
    Label l;
    bne(cr_reg, l);
-    load_const(mark_reg, (address) BiasedLocking::biased_lock_entry_count_addr());
-    lwz(temp2_reg, 0, mark_reg);
-    addi(temp2_reg, temp2_reg, 1);
-    stw(temp2_reg, 0, mark_reg);
+    load_const(temp2_reg, (address) BiasedLocking::biased_lock_entry_count_addr());
+    lwzx(mark_reg, temp2_reg);
+    addi(mark_reg, mark_reg, 1);
+    stwx(mark_reg, temp2_reg);
    // restore mark_reg
    ld(mark_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
    bind(l);
@ -1878,10 +1971,10 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
  // need to revoke that bias. The revocation will occur in the
  // interpreter runtime in the slow case.
  if (PrintBiasedLockingStatistics) {
-    load_const(temp_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp2_reg);
-    lwz(temp2_reg, 0, temp_reg);
-    addi(temp2_reg, temp2_reg, 1);
-    stw(temp2_reg, 0, temp_reg);
+    load_const(temp2_reg, (address) BiasedLocking::anonymously_biased_lock_entry_count_addr(), temp_reg);
+    lwzx(temp_reg, temp2_reg);
+    addi(temp_reg, temp_reg, 1);
+    stwx(temp_reg, temp2_reg);
  }
  b(done);

@ -1892,15 +1985,14 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
  // value as the comparison value when doing the cas to acquire the
  // bias in the current epoch. In other words, we allow transfer of
  // the bias from one thread to another directly in this situation.
-  andi(temp_reg, mark_reg, markOopDesc::age_mask_in_place);
-  orr(temp_reg, R16_thread, temp_reg);
-  load_klass(temp2_reg, obj_reg);
-  ld(temp2_reg, in_bytes(Klass::prototype_header_offset()), temp2_reg);
-  orr(temp_reg, temp_reg, temp2_reg);
+  load_klass(temp_reg, obj_reg);
+  andi(temp2_reg, mark_reg, markOopDesc::age_mask_in_place);
+  orr(temp2_reg, R16_thread, temp2_reg);
+  ld(temp_reg, in_bytes(Klass::prototype_header_offset()), temp_reg);
+  orr(temp_reg, temp2_reg, temp_reg);

  assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");

-  // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
  cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
                 /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
                 /*where=*/obj_reg,
@ -1913,10 +2005,10 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
  // need to revoke that bias. The revocation will occur in the
  // interpreter runtime in the slow case.
  if (PrintBiasedLockingStatistics) {
-    load_const(temp_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp2_reg);
-    lwz(temp2_reg, 0, temp_reg);
-    addi(temp2_reg, temp2_reg, 1);
-    stw(temp2_reg, 0, temp_reg);
+    load_const(temp2_reg, (address) BiasedLocking::rebiased_lock_entry_count_addr(), temp_reg);
+    lwzx(temp_reg, temp2_reg);
+    addi(temp_reg, temp_reg, 1);
+    stwx(temp_reg, temp2_reg);
  }
  b(done);

@ -1952,10 +2044,10 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj
  if (PrintBiasedLockingStatistics) {
    Label l;
    bne(cr_reg, l);
-    load_const(temp_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp2_reg);
-    lwz(temp2_reg, 0, temp_reg);
-    addi(temp2_reg, temp2_reg, 1);
-    stw(temp2_reg, 0, temp_reg);
+    load_const(temp2_reg, (address) BiasedLocking::revoked_lock_entry_count_addr(), temp_reg);
+    lwzx(temp_reg, temp2_reg);
+    addi(temp_reg, temp_reg, 1);
+    stwx(temp_reg, temp2_reg);
    bind(l);
  }

@ -1977,6 +2069,109 @@ void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mar
  beq(cr_reg, done);
 }

+// allocation (for C1)
+void MacroAssembler::eden_allocate(
+  Register obj,                      // result: pointer to object after successful allocation
+  Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+  int      con_size_in_bytes,        // object size in bytes if   known at compile time
+  Register t1,                       // temp register
+  Register t2,                       // temp register
+  Label&   slow_case                 // continuation point if fast allocation fails
+) {
+  b(slow_case);
+}
+
+void MacroAssembler::tlab_allocate(
+  Register obj,                      // result: pointer to object after successful allocation
+  Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+  int      con_size_in_bytes,        // object size in bytes if   known at compile time
+  Register t1,                       // temp register
+  Label&   slow_case                 // continuation point if fast allocation fails
+) {
+  // make sure arguments make sense
+  assert_different_registers(obj, var_size_in_bytes, t1);
+  assert(0 <= con_size_in_bytes && is_simm13(con_size_in_bytes), "illegal object size");
+  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment");
+
+  const Register new_top = t1;
+  //verify_tlab(); not implemented
+
+  ld(obj, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
+  ld(R0, in_bytes(JavaThread::tlab_end_offset()), R16_thread);
+  if (var_size_in_bytes == noreg) {
+    addi(new_top, obj, con_size_in_bytes);
+  } else {
+    add(new_top, obj, var_size_in_bytes);
+  }
+  cmpld(CCR0, new_top, R0);
+  bc_far_optimized(Assembler::bcondCRbiIs1, bi0(CCR0, Assembler::greater), slow_case);
+
+#ifdef ASSERT
+  // make sure new free pointer is properly aligned
+  {
+    Label L;
+    andi_(R0, new_top, MinObjAlignmentInBytesMask);
+    beq(CCR0, L);
+    stop("updated TLAB free is not properly aligned", 0x934);
+    bind(L);
+  }
+#endif // ASSERT
+
+  // update the tlab top pointer
+  std(new_top, in_bytes(JavaThread::tlab_top_offset()), R16_thread);
+  //verify_tlab(); not implemented
+}
+void MacroAssembler::tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case) {
+  unimplemented("tlab_refill");
+}
+void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2) {
+  unimplemented("incr_allocated_bytes");
+}
+
+address MacroAssembler::emit_trampoline_stub(int destination_toc_offset,
+                                             int insts_call_instruction_offset, Register Rtoc) {
+  // Start the stub.
+  address stub = start_a_stub(64);
+  if (stub == NULL) { return NULL; } // CodeCache full: bail out
+
+  // Create a trampoline stub relocation which relates this trampoline stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+  relocate(trampoline_stub_Relocation::spec(code()->insts()->start() + insts_call_instruction_offset));
+  const int stub_start_offset = offset();
+
+  // For java_to_interp stubs we use R11_scratch1 as scratch register
+  // and in call trampoline stubs we use R12_scratch2. This way we
+  // can distinguish them (see is_NativeCallTrampolineStub_at()).
+  Register reg_scratch = R12_scratch2;
+
+  // Now, create the trampoline stub's code:
+  // - load the TOC
+  // - load the call target from the constant pool
+  // - call
+  if (Rtoc == noreg) {
+    calculate_address_from_global_toc(reg_scratch, method_toc());
+    Rtoc = reg_scratch;
+  }
+
+  ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, Rtoc, false);
+  mtctr(reg_scratch);
+  bctr();
+
+  const address stub_start_addr = addr_at(stub_start_offset);
+
+  // Assert that the encoded destination_toc_offset can be identified and that it is correct.
+  assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
+         "encoded offset into the constant pool must match");
+  // Trampoline_stub_size should be good.
+  assert((uint)(offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
+  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
+
+  // End the stub.
+  end_a_stub();
+  return stub;
+}
+
 // TM on PPC64.
 void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
  Label retry;
@ -2387,17 +2582,16 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register

  // Must fence, otherwise, preceding store(s) may float below cmpxchg.
  // Compare object markOop with mark and if equal exchange scratch1 with object markOop.
-  // CmpxchgX sets cr_reg to cmpX(current, displaced).
-  membar(Assembler::StoreStore);
  cmpxchgd(/*flag=*/flag,
           /*current_value=*/current_header,
           /*compare_value=*/displaced_header,
           /*exchange_value=*/box,
           /*where=*/oop,
-           MacroAssembler::MemBarAcq,
+           MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
           MacroAssembler::cmpxchgx_hint_acquire_lock(),
           noreg,
-           &cas_failed);
+           &cas_failed,
+           /*check without membar and ldarx first*/true);
  assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");

  // If the compare-and-exchange succeeded, then we found an unlocked
@ -2410,8 +2604,7 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register
  // Check if the owner is self by comparing the value in the markOop of object
  // (current_header) with the stack pointer.
  sub(current_header, current_header, R1_SP);
-  load_const_optimized(temp, (address) (~(os::vm_page_size()-1) |
-                                        markOopDesc::lock_mask_in_place));
+  load_const_optimized(temp, ~(os::vm_page_size()-1) | markOopDesc::lock_mask_in_place);

  and_(R0/*==0?*/, current_header, temp);
  // If condition is true we are cont and hence we can store 0 as the
@ -2437,8 +2630,6 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register

    // Try to CAS m->owner from NULL to current thread.
    addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
-    li(displaced_header, 0);
-    // CmpxchgX sets flag to cmpX(current, displaced).
    cmpxchgd(/*flag=*/flag,
             /*current_value=*/current_header,
             /*compare_value=*/(intptr_t)0,
@ -2928,31 +3119,12 @@ void MacroAssembler::load_klass(Register dst, Register src) {
  }
 }

-void MacroAssembler::load_klass_with_trap_null_check(Register dst, Register src) {
-  if (!os::zero_page_read_protected()) {
-    if (TrapBasedNullChecks) {
-      trap_null_check(src);
-    }
-  }
-  load_klass(dst, src);
-}
-
-void MacroAssembler::reinit_heapbase(Register d, Register tmp) {
-  if (Universe::heap() != NULL) {
-    load_const_optimized(R30, Universe::narrow_ptrs_base(), tmp);
-  } else {
-    // Heap not yet allocated. Load indirectly.
-    int simm16_offset = load_const_optimized(R30, Universe::narrow_ptrs_base_addr(), tmp, true);
-    ld(R30, simm16_offset, R30);
-  }
-}
-
 // Clear Array
 // Kills both input registers. tmp == R0 is allowed.
 void MacroAssembler::clear_memory_doubleword(Register base_ptr, Register cnt_dwords, Register tmp) {
  // Procedure for large arrays (uses data cache block zero instruction).
    Label startloop, fast, fastloop, small_rest, restloop, done;
-    const int cl_size         = VM_Version::get_cache_line_size(),
+    const int cl_size         = VM_Version::L1_data_cache_line_size(),
              cl_dwords       = cl_size>>3,
              cl_dw_addr_bits = exact_log2(cl_dwords),
              dcbz_min        = 1;                     // Min count of dcbz executions, needs to be >0.
@ -4025,7 +4197,7 @@ void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
  bind(L_check_1);

  addi(idx, idx, 0x2);
-  andi_(idx, idx, 0x1) ;
+  andi_(idx, idx, 0x1);
  addic_(idx, idx, -1);
  blt(CCR0, L_post_third_loop_done);

@ -4255,17 +4427,42 @@ void MacroAssembler::verify_oop(Register oop, const char* msg) {

  address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
  const Register tmp = R11; // Will be preserved.
-  const int nbytes_save = 11*8; // Volatile gprs except R0.
+  const int nbytes_save = MacroAssembler::num_volatile_regs * 8;
  save_volatile_gprs(R1_SP, -nbytes_save); // except R0

-  if (oop == tmp) mr(R4_ARG2, oop);
+  mr_if_needed(R4_ARG2, oop);
+  save_LR_CR(tmp); // save in old frame
+  push_frame_reg_args(nbytes_save, tmp);
+  // load FunctionDescriptor** / entry_address *
+  load_const_optimized(tmp, fd, R0);
+  // load FunctionDescriptor* / entry_address
+  ld(tmp, 0, tmp);
+  load_const_optimized(R3_ARG1, (address)msg, R0);
+  // Call destination for its side effect.
+  call_c(tmp);
+
+  pop_frame();
+  restore_LR_CR(tmp);
+  restore_volatile_gprs(R1_SP, -nbytes_save); // except R0
+}
+
+void MacroAssembler::verify_oop_addr(RegisterOrConstant offs, Register base, const char* msg) {
+  if (!VerifyOops) {
+    return;
+  }
+
+  address/* FunctionDescriptor** */fd = StubRoutines::verify_oop_subroutine_entry_address();
+  const Register tmp = R11; // Will be preserved.
+  const int nbytes_save = MacroAssembler::num_volatile_regs * 8;
+  save_volatile_gprs(R1_SP, -nbytes_save); // except R0
+
+  ld(R4_ARG2, offs, base);
  save_LR_CR(tmp); // save in old frame
  push_frame_reg_args(nbytes_save, tmp);
  // load FunctionDescriptor** / entry_address *
  load_const_optimized(tmp, fd, R0);
  // load FunctionDescriptor* / entry_address
  ld(tmp, 0, tmp);
-  if (oop != tmp) mr_if_needed(R4_ARG2, oop);
  load_const_optimized(R3_ARG1, (address)msg, R0);
  // Call destination for its side effect.
  call_c(tmp);
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp
@ -119,11 +119,8 @@ class MacroAssembler: public Assembler {

  // Emits an oop const to the constant pool, loads the constant, and
  // sets a relocation info with address current_pc.
-  void load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc);
-  void load_toc_from_toc(Register dst, AddressLiteral& a, Register toc) {
-    assert(dst == R2_TOC, "base register must be TOC");
-    load_const_from_method_toc(dst, a, toc);
-  }
+  // Returns true if successful.
+  bool load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc, bool fixed_size = false);

  static bool is_load_const_from_method_toc_at(address a);
  static int get_offset_of_load_const_from_method_toc_at(address a);
@ -174,6 +171,7 @@ class MacroAssembler: public Assembler {
  // optimize: flag for telling the conditional far branch to optimize
  //           itself when relocated.
  void bc_far(int boint, int biint, Label& dest, int optimize);
+  void bc_far_optimized(int boint, int biint, Label& dest); // 1 or 2 instructions
  // Relocation of conditional far branches.
  static bool    is_bc_far_at(address instruction_addr);
  static address get_dest_of_bc_far_at(address instruction_addr);
@ -262,6 +260,7 @@ class MacroAssembler: public Assembler {
  // some ABI-related functions
  void save_nonvolatile_gprs(   Register dst_base, int offset);
  void restore_nonvolatile_gprs(Register src_base, int offset);
+  enum { num_volatile_regs = 11 + 14 }; // GPR + FPR
  void save_volatile_gprs(   Register dst_base, int offset);
  void restore_volatile_gprs(Register src_base, int offset);
  void save_LR_CR(   Register tmp);     // tmp contains LR on return.
@ -461,8 +460,10 @@ class MacroAssembler: public Assembler {
                                     Register super_klass,
                                     Register temp1_reg,
                                     Register temp2_reg,
-                                     Label& L_success,
-                                     Label& L_failure);
+                                     Label* L_success,
+                                     Label* L_failure,
+                                     Label* L_slow_path = NULL, // default fall through
+                                     RegisterOrConstant super_check_offset = RegisterOrConstant(-1));

  // The rest of the type check; must be wired to a corresponding fast path.
  // It does not repeat the fast path logic, so don't use it standalone.
@ -507,6 +508,28 @@ class MacroAssembler: public Assembler {
  // biased locking exit case failed.
  void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done);

+  // allocation (for C1)
+  void eden_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Register t2,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_allocate(
+    Register obj,                      // result: pointer to object after successful allocation
+    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
+    int      con_size_in_bytes,        // object size in bytes if   known at compile time
+    Register t1,                       // temp register
+    Label&   slow_case                 // continuation point if fast allocation fails
+  );
+  void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case);
+  void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register t1, Register t2);
+
+  enum { trampoline_stub_size = 6 * 4 };
+  address emit_trampoline_stub(int destination_toc_offset, int insts_call_instruction_offset, Register Rtoc = noreg);
+
  void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
  void atomic_ori_int(Register addr, Register result, int uimm16);

@ -597,9 +620,7 @@ class MacroAssembler: public Assembler {

  // Implicit or explicit null check, jumps to static address exception_entry.
  inline void null_check_throw(Register a, int offset, Register temp_reg, address exception_entry);
-
-  // Check accessed object for null. Use SIGTRAP-based null checks on AIX.
-  inline void load_with_trap_null_check(Register d, int si16, Register s1);
+  inline void null_check(Register a, int offset, Label *Lis_null); // implicit only if Lis_null not provided

  // Load heap oop and decompress. Loaded oop may not be null.
  // Specify tmp to save one cycle.
@ -619,20 +640,17 @@ class MacroAssembler: public Assembler {
  inline Register decode_heap_oop_not_null(Register d, Register src = noreg);

  // Null allowed.
+  inline Register encode_heap_oop(Register d, Register src); // Prefer null check in GC barrier!
  inline void decode_heap_oop(Register d);

  // Load/Store klass oop from klass field. Compress.
  void load_klass(Register dst, Register src);
-  void load_klass_with_trap_null_check(Register dst, Register src);
  void store_klass(Register dst_oop, Register klass, Register tmp = R0);
  void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
  static int instr_size_for_decode_klass_not_null();
  void decode_klass_not_null(Register dst, Register src = noreg);
  Register encode_klass_not_null(Register dst, Register src = noreg);

-  // Load common heap base into register.
-  void reinit_heapbase(Register d, Register tmp = noreg);
-
  // SIGTRAP-based range checks for arrays.
  inline void trap_range_check_l(Register a, Register b);
  inline void trap_range_check_l(Register a, int si16);
@ -750,6 +768,7 @@ class MacroAssembler: public Assembler {

  // Emit code to verify that reg contains a valid oop if +VerifyOops is set.
  void verify_oop(Register reg, const char* s = "broken oop");
+  void verify_oop_addr(RegisterOrConstant offs, Register base, const char* s = "contains broken oop");

  // TODO: verify method and klass metadata (compare against vptr?)
  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp
@ -70,9 +70,11 @@ inline void MacroAssembler::endgroup_if_needed(bool needed) {
 }

 inline void MacroAssembler::membar(int bits) {
-  // TODO: use elemental_membar(bits) for Power 8 and disable optimization of acquire-release
-  // (Matcher::post_membar_release where we use PPC64_ONLY(xop == Op_MemBarRelease ||))
-  if (bits & StoreLoad) sync(); else lwsync();
+  // Comment: Usage of elemental_membar(bits) is not recommended for Power 8.
+  // If elemental_membar(bits) is used, disable optimization of acquire-release
+  // (Matcher::post_membar_release where we use PPC64_ONLY(xop == Op_MemBarRelease ||))!
+  if (bits & StoreLoad) { sync(); }
+  else if (bits) { lwsync(); }
 }
 inline void MacroAssembler::release() { membar(LoadStore | StoreStore); }
 inline void MacroAssembler::acquire() { membar(LoadLoad | LoadStore); }
@ -86,7 +88,7 @@ inline address MacroAssembler::global_toc() {
 // Offset of given address to the global TOC.
 inline int MacroAssembler::offset_to_global_toc(const address addr) {
  intptr_t offset = (intptr_t)addr - (intptr_t)MacroAssembler::global_toc();
-  assert(Assembler::is_simm((long)offset, 31) && offset >= 0, "must be in range");
+  assert(Assembler::is_uimm((long)offset, 31), "must be in range");
  return (int)offset;
 }

@ -98,7 +100,7 @@ inline address MacroAssembler::method_toc() {
 // Offset of given address to current method's TOC.
 inline int MacroAssembler::offset_to_method_toc(address addr) {
  intptr_t offset = (intptr_t)addr - (intptr_t)method_toc();
-  assert(is_simm((long)offset, 31) && offset >= 0, "must be in range");
+  assert(Assembler::is_uimm((long)offset, 31), "must be in range");
  return (int)offset;
 }

@ -190,13 +192,13 @@ inline bool MacroAssembler::is_bc_far_variant1_at(address instruction_addr) {
  // Variant 1, the 1st instruction contains the destination address:
  //
  //    bcxx  DEST
-  //    endgroup
+  //    nop
  //
  const int instruction_1 = *(int*)(instruction_addr);
  const int instruction_2 = *(int*)(instruction_addr + 4);
  return is_bcxx(instruction_1) &&
         (inv_bd_field(instruction_1, (intptr_t)instruction_addr) != (intptr_t)(instruction_addr + 2*4)) &&
-         is_endgroup(instruction_2);
+         is_nop(instruction_2);
 }

 // Relocation of conditional far branches.
@ -302,13 +304,17 @@ inline void MacroAssembler::null_check_throw(Register a, int offset, Register te
  }
 }

-inline void MacroAssembler::load_with_trap_null_check(Register d, int si16, Register s1) {
-  if (!os::zero_page_read_protected()) {
+inline void MacroAssembler::null_check(Register a, int offset, Label *Lis_null) {
+  if (!ImplicitNullChecks || needs_explicit_null_check(offset) || !os::zero_page_read_protected()) {
    if (TrapBasedNullChecks) {
-      trap_null_check(s1);
+      assert(UseSIGTRAP, "sanity");
+      trap_null_check(a);
+    } else if (Lis_null){
+      Label ok;
+      cmpdi(CCR0, a, 0);
+      beq(CCR0, *Lis_null);
    }
  }
-  ld(d, si16, s1);
 }

 inline void MacroAssembler::load_heap_oop_not_null(Register d, RegisterOrConstant offs, Register s1, Register tmp) {
@ -365,6 +371,26 @@ inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register sr
  return current; // Encoded oop is in this register.
 }

+inline Register MacroAssembler::encode_heap_oop(Register d, Register src) {
+  if (Universe::narrow_oop_base() != NULL) {
+    if (VM_Version::has_isel()) {
+      cmpdi(CCR0, src, 0);
+      Register co = encode_heap_oop_not_null(d, src);
+      assert(co == d, "sanity");
+      isel_0(d, CCR0, Assembler::equal);
+    } else {
+      Label isNull;
+      or_(d, src, src); // move and compare 0
+      beq(CCR0, isNull);
+      encode_heap_oop_not_null(d, src);
+      bind(isNull);
+    }
+    return d;
+  } else {
+    return encode_heap_oop_not_null(d, src);
+  }
+}
+
 inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register src) {
  if (Universe::narrow_oop_base_disjoint() && src != noreg && src != d &&
      Universe::narrow_oop_shift() != 0) {
--- a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -504,8 +504,7 @@ void trace_method_handle_stub(const char* adaptername,
      frame cur_frame = os::current_frame();

      // Robust search of trace_calling_frame (independant of inlining).
-      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
-      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
+      assert(cur_frame.sp() <= saved_regs, "registers not saved on stack ?");
      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
      while (trace_calling_frame.fp() < saved_regs) {
        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
@ -539,7 +538,7 @@ void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adapt
  BLOCK_COMMENT("trace_method_handle {");

  const Register tmp = R11; // Will be preserved.
-  const int nbytes_save = 11*8; // volatile gprs except R0
+  const int nbytes_save = MacroAssembler::num_volatile_regs * 8;
  __ save_volatile_gprs(R1_SP, -nbytes_save); // except R0
  __ save_LR_CR(tmp); // save in old frame

--- a/hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/nativeInst_ppc.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -65,13 +65,17 @@ address NativeCall::destination() const {
  address destination = Assembler::bxx_destination(addr);

  // Do we use a trampoline stub for this call?
-  CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
-  assert(cb && cb->is_nmethod(), "sanity");
-  nmethod *nm = (nmethod *)cb;
-  if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
-    // Yes we do, so get the destination from the trampoline stub.
-    const address trampoline_stub_addr = destination;
-    destination = NativeCallTrampolineStub_at(trampoline_stub_addr)->destination(nm);
+  // Trampoline stubs are located behind the main code.
+  if (destination > addr) {
+    // Filter out recursive method invocation (call to verified/unverified entry point).
+    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
+    assert(cb && cb->is_nmethod(), "sanity");
+    nmethod *nm = (nmethod *)cb;
+    if (nm->stub_contains(destination) && is_NativeCallTrampolineStub_at(destination)) {
+      // Yes we do, so get the destination from the trampoline stub.
+      const address trampoline_stub_addr = destination;
+      destination = NativeCallTrampolineStub_at(trampoline_stub_addr)->destination(nm);
+    }
  }

  return destination;
@ -267,7 +271,7 @@ void NativeMovConstReg::set_data(intptr_t data) {
          oop_addr = r->oop_addr();
          *oop_addr = cast_to_oop(data);
        } else {
-          assert(oop_addr == r->oop_addr(), "must be only one set-oop here") ;
+          assert(oop_addr == r->oop_addr(), "must be only one set-oop here");
        }
      }
      if (iter.type() == relocInfo::metadata_type) {
@ -351,6 +355,27 @@ void NativeJump::verify() {
 }
 #endif // ASSERT

+
+void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
+  CodeBuffer cb(code_pos, BytesPerInstWord + 1);
+  MacroAssembler* a = new MacroAssembler(&cb);
+  a->b(entry);
+  ICache::ppc64_flush_icache_bytes(code_pos, NativeGeneralJump::instruction_size);
+}
+
+// MT-safe patching of a jmp instruction.
+void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
+  // Bytes beyond offset NativeGeneralJump::instruction_size are copied by caller.
+
+  // Finally patch out the jump.
+  volatile juint *jump_addr = (volatile juint*)instr_addr;
+  // Release not needed because caller uses invalidate_range after copying the remaining bytes.
+  //OrderAccess::release_store(jump_addr, *((juint*)code_buffer));
+  *jump_addr = *((juint*)code_buffer); // atomically store code over branch instruction
+  ICache::ppc64_flush_icache_bytes(instr_addr, NativeGeneralJump::instruction_size);
+}
+
+
 //-------------------------------------------------------------------

 // Call trampoline stubs.
@ -364,10 +389,12 @@ void NativeJump::verify() {
 //

 address NativeCallTrampolineStub::encoded_destination_addr() const {
-  address instruction_addr = addr_at(2 * BytesPerInstWord);
-  assert(MacroAssembler::is_ld_largeoffset(instruction_addr),
-         "must be a ld with large offset (from the constant pool)");
-
+  address instruction_addr = addr_at(0 * BytesPerInstWord);
+  if (!MacroAssembler::is_ld_largeoffset(instruction_addr)) {
+    instruction_addr = addr_at(2 * BytesPerInstWord);
+    assert(MacroAssembler::is_ld_largeoffset(instruction_addr),
+           "must be a ld with large offset (from the constant pool)");
+  }
  return instruction_addr;
 }

--- a/hotspot/src/cpu/ppc/vm/nativeInst_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/nativeInst_ppc.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -50,6 +50,8 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {
  friend class Relocation;

 public:
+  bool is_jump() { return Assembler::is_b(long_at(0)); } // See NativeGeneralJump.
+
  bool is_sigtrap_ic_miss_check() {
    assert(UseSIGTRAP, "precondition");
    return MacroAssembler::is_trap_ic_miss_check(long_at(0));
@ -235,8 +237,8 @@ inline NativeFarCall* nativeFarCall_at(address instr) {
  return call;
 }

-// An interface for accessing/manipulating native set_oop imm, reg instructions.
-// (used to manipulate inlined data references, etc.)
+// An interface for accessing/manipulating native set_oop imm, reg instructions
+// (used to manipulate inlined data references, etc.).
 class NativeMovConstReg: public NativeInstruction {
 public:

@ -384,10 +386,21 @@ class NativeCallTrampolineStub : public NativeInstruction {
  void set_destination(address new_destination);
 };

+// Note: Other stubs must not begin with this pattern.
 inline bool is_NativeCallTrampolineStub_at(address address) {
  int first_instr = *(int*)address;
-  return Assembler::is_addis(first_instr) &&
-    (Register)(intptr_t)Assembler::inv_rt_field(first_instr) == R12_scratch2;
+  // calculate_address_from_global_toc and long form of ld_largeoffset_unchecked begin with addis with target R12
+  if (Assembler::is_addis(first_instr) &&
+      (Register)(intptr_t)Assembler::inv_rt_field(first_instr) == R12_scratch2) return true;
+
+  // short form of ld_largeoffset_unchecked is ld which is followed by mtctr
+  int second_instr = *((int*)address + 1);
+  if (Assembler::is_ld(first_instr) &&
+      (Register)(intptr_t)Assembler::inv_rt_field(first_instr) == R12_scratch2 &&
+      Assembler::is_mtctr(second_instr) &&
+      (Register)(intptr_t)Assembler::inv_rs_field(second_instr) == R12_scratch2) return true;
+
+  return false;
 }

 inline NativeCallTrampolineStub* NativeCallTrampolineStub_at(address address) {
@ -395,4 +408,102 @@ inline NativeCallTrampolineStub* NativeCallTrampolineStub_at(address address) {
  return (NativeCallTrampolineStub*)address;
 }

+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+//-------------------------------------
+//  N a t i v e G e n e r a l J u m p
+//-------------------------------------
+
+// Despite the name, handles only simple branches.
+class NativeGeneralJump;
+inline NativeGeneralJump* nativeGeneralJump_at(address address);
+
+// Currently only implemented as single unconditional branch.
+class NativeGeneralJump: public NativeInstruction {
+ public:
+
+  enum PPC64_specific_constants {
+    instruction_size = 4
+  };
+
+  address instruction_address() const { return addr_at(0); }
+
+  // Creation.
+  friend inline NativeGeneralJump* nativeGeneralJump_at(address addr) {
+    NativeGeneralJump* jump = (NativeGeneralJump*)(addr);
+    DEBUG_ONLY( jump->verify(); )
+    return jump;
+  }
+
+  // Insertion of native general jump instruction.
+  static void insert_unconditional(address code_pos, address entry);
+
+  address jump_destination() const {
+    DEBUG_ONLY( verify(); )
+    return addr_at(0) + Assembler::inv_li_field(long_at(0));
+  }
+
+  void set_jump_destination(address dest) {
+    DEBUG_ONLY( verify(); )
+    insert_unconditional(addr_at(0), dest);
+  }
+
+  static void replace_mt_safe(address instr_addr, address code_buffer);
+
+  void verify() const { guarantee(Assembler::is_b(long_at(0)), "invalid NativeGeneralJump"); }
+};
+
+// An interface for accessing/manipulating native load int (load_const32).
+class NativeMovRegMem;
+inline NativeMovRegMem* nativeMovRegMem_at(address address);
+class NativeMovRegMem: public NativeInstruction {
+ public:
+
+  enum PPC64_specific_constants {
+    instruction_size = 8
+  };
+
+  address instruction_address() const { return addr_at(0); }
+
+  intptr_t offset() const {
+#ifdef VM_LITTLE_ENDIAN
+    short *hi_ptr = (short*)(addr_at(0));
+    short *lo_ptr = (short*)(addr_at(4));
+#else
+    short *hi_ptr = (short*)(addr_at(0) + 2);
+    short *lo_ptr = (short*)(addr_at(4) + 2);
+#endif
+    return ((*hi_ptr) << 16) | ((*lo_ptr) & 0xFFFF);
+  }
+
+  void set_offset(intptr_t x) {
+#ifdef VM_LITTLE_ENDIAN
+    short *hi_ptr = (short*)(addr_at(0));
+    short *lo_ptr = (short*)(addr_at(4));
+#else
+    short *hi_ptr = (short*)(addr_at(0) + 2);
+    short *lo_ptr = (short*)(addr_at(4) + 2);
+#endif
+    *hi_ptr = x >> 16;
+    *lo_ptr = x & 0xFFFF;
+    ICache::ppc64_flush_icache_bytes(addr_at(0), NativeMovRegMem::instruction_size);
+  }
+
+  void add_offset_in_bytes(intptr_t radd_offset) {
+    set_offset(offset() + radd_offset);
+  }
+
+  void verify() const {
+    guarantee(Assembler::is_lis(long_at(0)), "load_const32 1st instr");
+    guarantee(Assembler::is_ori(long_at(4)), "load_const32 2nd instr");
+  }
+
+ private:
+  friend inline NativeMovRegMem* nativeMovRegMem_at(address address) {
+    NativeMovRegMem* test = (NativeMovRegMem*)address;
+    DEBUG_ONLY( test->verify(); )
+    return test;
+  }
+};
+
 #endif // CPU_PPC_VM_NATIVEINST_PPC_HPP
--- a/hotspot/src/cpu/ppc/vm/ppc.ad
+++ b/hotspot/src/cpu/ppc/vm/ppc.ad
@ -698,7 +698,7 @@ reg_class ctr_reg(SR_CTR);
 // ----------------------------

 reg_class flt_reg(
-/*F0*/              // scratch
+  F0,
  F1,
  F2,
  F3,
@ -735,7 +735,7 @@ reg_class flt_reg(
 // Double precision float registers have virtual `high halves' that
 // are needed by the allocator.
 reg_class dbl_reg(
-/*F0,  F0_H*/     // scratch
+  F0,  F0_H,
  F1,  F1_H,
  F2,  F2_H,
  F3,  F3_H,
@ -1040,8 +1040,6 @@ source_hpp %{ // Header information of the source block.
 //---<  Used for optimization in Compile::Shorten_branches  >---
 //--------------------------------------------------------------

-const uint trampoline_stub_size     =  6 * BytesPerInstWord;
-
 class CallStubImpl {

 public:
@ -1053,7 +1051,7 @@ class CallStubImpl {
  // This doesn't need to be accurate to the byte, but it
  // must be larger than or equal to the real size of the stub.
  static uint size_call_trampoline() {
-    return trampoline_stub_size;
+    return MacroAssembler::trampoline_stub_size;
  }

  // number of relocations needed by a call trampoline stub
@ -1079,46 +1077,10 @@ source %{
 //   branch via CTR (LR/link still points to the call-site above)

 void CallStubImpl::emit_trampoline_stub(MacroAssembler &_masm, int destination_toc_offset, int insts_call_instruction_offset) {
-  // Start the stub.
-  address stub = __ start_a_stub(Compile::MAX_stubs_size/2);
+  address stub = __ emit_trampoline_stub(destination_toc_offset, insts_call_instruction_offset);
  if (stub == NULL) {
-    ciEnv::current()->record_failure("CodeCache is full");
-    return;
+    ciEnv::current()->record_out_of_memory_failure();
  }
-
-  // For java_to_interp stubs we use R11_scratch1 as scratch register
-  // and in call trampoline stubs we use R12_scratch2. This way we
-  // can distinguish them (see is_NativeCallTrampolineStub_at()).
-  Register reg_scratch = R12_scratch2;
-
-  // Create a trampoline stub relocation which relates this trampoline stub
-  // with the call instruction at insts_call_instruction_offset in the
-  // instructions code-section.
-  __ relocate(trampoline_stub_Relocation::spec(__ code()->insts()->start() + insts_call_instruction_offset));
-  const int stub_start_offset = __ offset();
-
-  // Now, create the trampoline stub's code:
-  // - load the TOC
-  // - load the call target from the constant pool
-  // - call
-  __ calculate_address_from_global_toc(reg_scratch, __ method_toc());
-  __ ld_largeoffset_unchecked(reg_scratch, destination_toc_offset, reg_scratch, false);
-  __ mtctr(reg_scratch);
-  __ bctr();
-
-  const address stub_start_addr = __ addr_at(stub_start_offset);
-
-  // FIXME: Assert that the trampoline stub can be identified and patched.
-
-  // Assert that the encoded destination_toc_offset can be identified and that it is correct.
-  assert(destination_toc_offset == NativeCallTrampolineStub_at(stub_start_addr)->destination_toc_offset(),
-         "encoded offset into the constant pool must match");
-  // Trampoline_stub_size should be good.
-  assert((uint)(__ offset() - stub_start_offset) <= trampoline_stub_size, "should be good size");
-  assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
-
-  // End the stub.
-  __ end_a_stub();
 }

 //=============================================================================
@ -1156,6 +1118,10 @@ EmitCallOffsets emit_call_with_trampoline_stub(MacroAssembler &_masm, address en
  if (!Compile::current()->in_scratch_emit_size()) {
    // Put the entry point as a constant into the constant pool.
    const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
+    if (entry_point_toc_addr == NULL) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return offsets;
+    }
    const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);

    // Emit the trampoline stub which will be related to the branch-and-link below.
@ -2474,6 +2440,10 @@ encode %{
      // Create a non-oop constant, no relocation needed.
      // If it is an IC, it has a virtual_call_Relocation.
      const_toc_addr = __ long_constant((jlong)$src$$constant);
+      if (const_toc_addr == NULL) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }

      // Get the constant's TOC offset.
      toc_offset = __ offset_to_method_toc(const_toc_addr);
@ -2495,6 +2465,10 @@ encode %{
      // Create a non-oop constant, no relocation needed.
      // If it is an IC, it has a virtual_call_Relocation.
      const_toc_addr = __ long_constant((jlong)$src$$constant);
+      if (const_toc_addr == NULL) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }

      // Get the constant's TOC offset.
      const int toc_offset = __ offset_to_method_toc(const_toc_addr);
@ -2631,6 +2605,10 @@ encode %{
        const_toc_addr = __ long_constant((jlong)$src$$constant);
      }

+      if (const_toc_addr == NULL) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }
      // Get the constant's TOC offset.
      toc_offset = __ offset_to_method_toc(const_toc_addr);
    }
@ -2660,6 +2638,10 @@ encode %{
        const_toc_addr = __ long_constant((jlong)$src$$constant);
      }

+      if (const_toc_addr == NULL) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }
      // Get the constant's TOC offset.
      const int toc_offset = __ offset_to_method_toc(const_toc_addr);
      // Store the toc offset of the constant.
@ -3408,6 +3390,10 @@ encode %{

        // Put the entry point as a constant into the constant pool.
        const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
+        if (entry_point_toc_addr == NULL) {
+          ciEnv::current()->record_out_of_memory_failure();
+          return;
+        }
        const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);

        // Emit the trampoline stub which will be related to the branch-and-link below.
@ -3433,76 +3419,6 @@ encode %{
    }
  %}

-  // Emit a method handle call.
-  //
-  // Method handle calls from compiled to compiled are going thru a
-  // c2i -> i2c adapter, extending the frame for their arguments. The
-  // caller however, returns directly to the compiled callee, that has
-  // to cope with the extended frame. We restore the original frame by
-  // loading the callers sp and adding the calculated framesize.
-  enc_class enc_java_handle_call(method meth) %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-
-    MacroAssembler _masm(&cbuf);
-    address entry_point = (address)$meth$$method;
-
-    // Remember the offset not the address.
-    const int start_offset = __ offset();
-    // The trampoline stub.
-    if (!ra_->C->in_scratch_emit_size()) {
-      // No entry point given, use the current pc.
-      // Make sure branch fits into
-      if (entry_point == 0) entry_point = __ pc();
-
-      // Put the entry point as a constant into the constant pool.
-      const address entry_point_toc_addr   = __ address_constant(entry_point, RelocationHolder::none);
-      const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
-
-      // Emit the trampoline stub which will be related to the branch-and-link below.
-      CallStubImpl::emit_trampoline_stub(_masm, entry_point_toc_offset, start_offset);
-      if (ra_->C->env()->failing()) { return; } // Code cache may be full.
-      assert(_optimized_virtual, "methodHandle call should be a virtual call");
-      __ relocate(relocInfo::opt_virtual_call_type);
-    }
-
-    // The real call.
-    // Note: At this point we do not have the address of the trampoline
-    // stub, and the entry point might be too far away for bl, so __ pc()
-    // serves as dummy and the bl will be patched later.
-    cbuf.set_insts_mark();
-    __ bl(__ pc());  // Emits a relocation.
-
-    assert(_method, "execute next statement conditionally");
-    // The stub for call to interpreter.
-    address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
-    if (stub == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full");
-      return;
-    }
-
-    // Restore original sp.
-    __ ld(R11_scratch1, 0, R1_SP); // Load caller sp.
-    const long framesize = ra_->C->frame_slots() << LogBytesPerInt;
-    unsigned int bytes = (unsigned int)framesize;
-    long offset = Assembler::align_addr(bytes, frame::alignment_in_bytes);
-    if (Assembler::is_simm(-offset, 16)) {
-      __ addi(R1_SP, R11_scratch1, -offset);
-    } else {
-      __ load_const_optimized(R12_scratch2, -offset);
-      __ add(R1_SP, R11_scratch1, R12_scratch2);
-    }
-#ifdef ASSERT
-  __ ld(R12_scratch2, 0, R1_SP); // Load from unextended_sp.
-  __ cmpd(CCR0, R11_scratch1, R12_scratch2);
-  __ asm_assert_eq("backlink changed", 0x8000);
-#endif
-    // If fails should store backlink before unextending.
-
-    if (ra_->C->env()->failing()) {
-      return;
-    }
-  %}
-
  // Second node of expanded dynamic call - the call.
  enc_class enc_java_dynamic_call_sched(method meth) %{
    // TODO: PPC port $archOpcode(ppc64Opcode_bl);
@ -3513,6 +3429,10 @@ encode %{
      // Create a call trampoline stub for the given method.
      const address entry_point = !($meth$$method) ? 0 : (address)$meth$$method;
      const address entry_point_const = __ address_constant(entry_point, RelocationHolder::none);
+      if (entry_point_const == NULL) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }
      const int entry_point_const_toc_offset = __ offset_to_method_toc(entry_point_const);
      CallStubImpl::emit_trampoline_stub(_masm, entry_point_const_toc_offset, __ offset());
      if (ra_->C->env()->failing()) { return; } // Code cache may be full.
@ -3620,7 +3540,11 @@ encode %{
      address virtual_call_meta_addr = __ pc();
      // Load a clear inline cache.
      AddressLiteral empty_ic((address) Universe::non_oop_word());
-      __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc);
+      bool success = __ load_const_from_method_toc(ic_reg, empty_ic, Rtoc, /*fixed_size*/ true);
+      if (!success) {
+        ciEnv::current()->record_out_of_memory_failure();
+        return;
+      }
      // CALL to fixup routine.  Fixup routine uses ScopeDesc info
      // to determine who we intended to call.
      __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
@ -3676,7 +3600,11 @@ encode %{
    __ calculate_address_from_global_toc(Rtoc, __ method_toc());
    // Put entry, env, toc into the constant pool, this needs up to 3 constant
    // pool entries; call_c_using_toc will optimize the call.
-    __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
+    bool success = __ call_c_using_toc(fd, relocInfo::runtime_call_type, Rtoc);
+    if (!success) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return;
+    }
 #endif

    // Check the ret_addr_offset.
@ -6263,6 +6191,10 @@ instruct loadConF(regF dst, immF src, iRegLdst toc) %{
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lfs);
    address float_address = __ float_constant($src$$constant);
+    if (float_address == NULL) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return;
+    }
    __ lfs($dst$$FloatRegister, __ offset_to_method_toc(float_address), $toc$$Register);
  %}
  ins_pipe(pipe_class_memory);
@ -6284,6 +6216,10 @@ instruct loadConFComp(regF dst, immF src, iRegLdst toc) %{
    FloatRegister Rdst    = $dst$$FloatRegister;
    Register Rtoc         = $toc$$Register;
    address float_address = __ float_constant($src$$constant);
+    if (float_address == NULL) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return;
+    }
    int offset            = __ offset_to_method_toc(float_address);
    int hi = (offset + (1<<15))>>16;
    int lo = offset - hi * (1<<16);
@ -6318,7 +6254,12 @@ instruct loadConD(regD dst, immD src, iRegLdst toc) %{
  size(4);
  ins_encode %{
    // TODO: PPC port $archOpcode(ppc64Opcode_lfd);
-    int offset =  __ offset_to_method_toc(__ double_constant($src$$constant));
+    address float_address = __ double_constant($src$$constant);
+    if (float_address == NULL) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return;
+    }
+    int offset =  __ offset_to_method_toc(float_address);
    __ lfd($dst$$FloatRegister, offset, $toc$$Register);
  %}
  ins_pipe(pipe_class_memory);
@ -6340,7 +6281,11 @@ instruct loadConDComp(regD dst, immD src, iRegLdst toc) %{
    FloatRegister Rdst    = $dst$$FloatRegister;
    Register      Rtoc    = $toc$$Register;
    address float_address = __ double_constant($src$$constant);
-    int offset            = __ offset_to_method_toc(float_address);
+    if (float_address == NULL) {
+      ciEnv::current()->record_out_of_memory_failure();
+      return;
+    }
+    int offset = __ offset_to_method_toc(float_address);
    int hi = (offset + (1<<15))>>16;
    int lo = offset - hi * (1<<16);

@ -11790,7 +11735,6 @@ instruct safePoint_poll_conPollAddr(rscratch2RegP poll) %{
 instruct CallStaticJavaDirect(method meth) %{
  match(CallStaticJava);
  effect(USE meth);
-  predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
  ins_cost(CALL_COST);

  ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
@ -11801,20 +11745,6 @@ instruct CallStaticJavaDirect(method meth) %{
  ins_pipe(pipe_class_call);
 %}

-// Schedulable version of call static node.
-instruct CallStaticJavaDirectHandle(method meth) %{
-  match(CallStaticJava);
-  effect(USE meth);
-  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
-  ins_cost(CALL_COST);
-
-  ins_num_consts(3 /* up to 3 patchable constants: inline cache, 2 call targets. */);
-
-  format %{ "CALL,static $meth \t// ==> " %}
-  ins_encode( enc_java_handle_call(meth) );
-  ins_pipe(pipe_class_call);
-%}
-
 // Call Java Dynamic Instruction

 // Used by postalloc expand of CallDynamicJavaDirectSchedEx (actual call).
--- a/hotspot/src/cpu/ppc/vm/register_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/register_ppc.hpp
@ -627,6 +627,9 @@ REGISTER_DECLARATION(Register, R27_constPoolCache,    R27);
 REGISTER_DECLARATION(Register, R28_mdx,               R28);
 #endif // CC_INTERP

+REGISTER_DECLARATION(Register, R19_inline_cache_reg, R19);
+REGISTER_DECLARATION(Register, R29_TOC, R29);
+
 #ifndef DONT_USE_REGISTER_DEFINES
 #define R21_tmp1         AS_REGISTER(Register, R21)
 #define R22_tmp2         AS_REGISTER(Register, R22)
@ -648,6 +651,9 @@ REGISTER_DECLARATION(Register, R28_mdx,               R28);
 #define R28_mdx               AS_REGISTER(Register, R28)
 #endif

+#define R19_inline_cache_reg AS_REGISTER(Register, R19)
+#define R29_TOC AS_REGISTER(Register, R29)
+
 #define CCR4_is_synced AS_REGISTER(ConditionRegister, CCR4)
 #endif

--- a/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp
@ -84,13 +84,11 @@ address Relocation::pd_call_destination(address orig_addr) {
    NativeConditionalFarBranch* branch = NativeConditionalFarBranch_at(inst_loc);
    return branch->branch_destination();
  } else {
-    // There are two instructions at the beginning of a stub, therefore we
-    // load at orig_addr + 8.
    orig_addr = nativeCall_at(inst_loc)->get_trampoline();
    if (orig_addr == NULL) {
      return (address) -1;
    } else {
-      return (address) nativeMovConstReg_at(orig_addr + 8)->data();
+      return ((NativeCallTrampolineStub*)orig_addr)->destination();
    }
  }
 }
--- a/hotspot/src/cpu/ppc/vm/runtime_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/runtime_ppc.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -45,16 +45,6 @@

 #ifdef COMPILER2

-// SP adjustment (must use unextended SP) for method handle call sites
-// during exception handling.
-static intptr_t adjust_SP_for_methodhandle_callsite(JavaThread *thread) {
-  RegisterMap map(thread, false);
-  // The frame constructor will do the correction for us (see frame::adjust_unextended_SP).
-  frame mh_caller_frame = thread->last_frame().sender(&map);
-  assert(mh_caller_frame.is_compiled_frame(), "Only may reach here for compiled MH call sites");
-  return (intptr_t) mh_caller_frame.unextended_sp();
-}
-
 //------------------------------generate_exception_blob---------------------------
 // Creates exception blob at the end.
 // Using exception blob, this code is jumped from a compiled method.
@ -129,17 +119,10 @@ void OptoRuntime::generate_exception_blob() {
  OopMapSet* oop_maps = new OopMapSet();
  oop_maps->add_gc_map(calls_return_pc - start, map);

-  // Get unextended_sp for method handle call sites.
-  Label mh_callsite, mh_done; // Use a 2nd c call if it's a method handle call site.
-  __ lwa(R4_ARG2, in_bytes(JavaThread::is_method_handle_return_offset()), R16_thread);
-  __ cmpwi(CCR0, R4_ARG2, 0);
-  __ bne(CCR0, mh_callsite);
-
  __ mtctr(R3_RET); // Move address of exception handler to SR_CTR.
  __ reset_last_Java_frame();
  __ pop_frame();

-  __ bind(mh_done);
  // We have a handler in register SR_CTR (could be deopt blob).

  // Get the exception oop.
@ -161,25 +144,6 @@ void OptoRuntime::generate_exception_blob() {
  __ mtlr(R4_ARG2);
  __ bctr();

-
-  // Same as above, but also set sp to unextended_sp.
-  __ bind(mh_callsite);
-  __ mr(R31, R3_RET); // Save branch address.
-  __ mr(R3_ARG1, R16_thread);
-#if defined(ABI_ELFv2)
-  __ call_c((address) adjust_SP_for_methodhandle_callsite, relocInfo::none);
-#else
-  __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, adjust_SP_for_methodhandle_callsite), relocInfo::none);
-#endif
-  // Returns unextended_sp in R3_RET.
-
-  __ mtctr(R31); // Move address of exception handler to SR_CTR.
-  __ reset_last_Java_frame();
-
-  __ mr(R1_SP, R3_RET); // Set sp to unextended_sp.
-  __ b(mh_done);
-
-
  // Make sure all code is generated.
  masm->flush();

--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp
@ -62,7 +62,7 @@ class RegisterSaver {
  // Support different return pc locations.
  enum ReturnPCLocation {
    return_pc_is_lr,
-    return_pc_is_r4,
+    return_pc_is_pre_saved,
    return_pc_is_thread_saved_exception_pc
  };

@ -241,16 +241,17 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble
  __ mfcr(R31);
  __ std(R31, _abi(cr), R1_SP);
  switch (return_pc_location) {
-    case return_pc_is_lr:    __ mflr(R31);           break;
-    case return_pc_is_r4:    __ mr(R31, R4);     break;
-    case return_pc_is_thread_saved_exception_pc:
-                             __ ld(R31, thread_(saved_exception_pc)); break;
+    case return_pc_is_lr: __ mflr(R31); break;
+    case return_pc_is_pre_saved: assert(return_pc_adjustment == 0, "unsupported"); break;
+    case return_pc_is_thread_saved_exception_pc: __ ld(R31, thread_(saved_exception_pc)); break;
    default: ShouldNotReachHere();
  }
-  if (return_pc_adjustment != 0) {
-    __ addi(R31, R31, return_pc_adjustment);
+  if (return_pc_location != return_pc_is_pre_saved) {
+    if (return_pc_adjustment != 0) {
+      __ addi(R31, R31, return_pc_adjustment);
+    }
+    __ std(R31, _abi(lr), R1_SP);
  }
-  __ std(R31, _abi(lr), R1_SP);

  // push a new frame
  __ push_frame(frame_size_in_bytes, R31);
@ -646,7 +647,7 @@ int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
  return round_to(stk, 2);
 }

-#ifdef COMPILER2
+#if defined(COMPILER1) || defined(COMPILER2)
 // Calling convention for calling C code.
 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                        VMRegPair *regs,
@ -2576,7 +2577,7 @@ uint SharedRuntime::out_preserve_stack_slots() {
 #endif
 }

-#ifdef COMPILER2
+#if defined(COMPILER1) || defined(COMPILER2)
 // Frame generation for deopt and uncommon trap blobs.
 static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
                                /* Read */
@ -2734,7 +2735,7 @@ void SharedRuntime::generate_deopt_blob() {

  const address start = __ pc();

-#ifdef COMPILER2
+#if defined(COMPILER1) || defined(COMPILER2)
  // --------------------------------------------------------------------------
  // Prolog for non exception case!

@ -2783,28 +2784,43 @@ void SharedRuntime::generate_deopt_blob() {

  BLOCK_COMMENT("Prolog for exception case");

-  // The RegisterSaves doesn't need to adjust the return pc for this situation.
-  const int return_pc_adjustment_exception = 0;
-
-  // Push the "unpack frame".
-  // Save everything in sight.
-  assert(R4 == R4_ARG2, "exception pc must be in r4");
-  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
-                                                             &first_frame_size_in_bytes,
-                                                             /*generate_oop_map=*/ false,
-                                                             return_pc_adjustment_exception,
-                                                             RegisterSaver::return_pc_is_r4);
-
-  // Deopt during an exception. Save exec mode for unpack_frames.
-  __ li(exec_mode_reg, Deoptimization::Unpack_exception);
-
  // Store exception oop and pc in thread (location known to GC).
  // This is needed since the call to "fetch_unroll_info()" may safepoint.
  __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
  __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
+  __ std(R4_ARG2, _abi(lr), R1_SP);
+
+  // Vanilla deoptimization with an exception pending in exception_oop.
+  int exception_in_tls_offset = __ pc() - start;
+
+  // Push the "unpack frame".
+  // Save everything in sight.
+  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
+                                                             &first_frame_size_in_bytes,
+                                                             /*generate_oop_map=*/ false,
+                                                             /*return_pc_adjustment_exception=*/ 0,
+                                                             RegisterSaver::return_pc_is_pre_saved);
+
+  // Deopt during an exception. Save exec mode for unpack_frames.
+  __ li(exec_mode_reg, Deoptimization::Unpack_exception);

  // fall through

+  int reexecute_offset = 0;
+#ifdef COMPILER1
+  __ b(exec_mode_initialized);
+
+  // Reexecute entry, similar to c2 uncommon trap
+  reexecute_offset = __ pc() - start;
+
+  RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
+                                                             &first_frame_size_in_bytes,
+                                                             /*generate_oop_map=*/ false,
+                                                             /*return_pc_adjustment_reexecute=*/ 0,
+                                                             RegisterSaver::return_pc_is_pre_saved);
+  __ li(exec_mode_reg, Deoptimization::Unpack_reexecute);
+#endif
+
  // --------------------------------------------------------------------------
  __ BIND(exec_mode_initialized);

@ -2918,7 +2934,9 @@ void SharedRuntime::generate_deopt_blob() {
  int exception_offset = __ pc() - start;
 #endif // COMPILER2

-  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, 0, first_frame_size_in_bytes / wordSize);
+  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
+                                           reexecute_offset, first_frame_size_in_bytes / wordSize);
+  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
 }

 #ifdef COMPILER2
--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
@ -48,6 +48,12 @@
 #define BLOCK_COMMENT(str) __ block_comment(str)
 #endif

+#if defined(ABI_ELFv2)
+#define STUB_ENTRY(name) StubRoutines::name()
+#else
+#define STUB_ENTRY(name) ((FunctionDescriptor*)StubRoutines::name())->entry()
+#endif
+
 class StubGenerator: public StubCodeGenerator {
 private:

@ -259,8 +265,7 @@ class StubGenerator: public StubCodeGenerator {
      //

      // global toc register
-      __ load_const(R29, MacroAssembler::global_toc(), R11_scratch1);
-
+      __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R11_scratch1);
      // Remember the senderSP so we interpreter can pop c2i arguments off of the stack
      // when called via a c2i.

@ -619,14 +624,17 @@ class StubGenerator: public StubCodeGenerator {
  //  Kills:
  //     nothing
  //
-  void gen_write_ref_array_pre_barrier(Register from, Register to, Register count, bool dest_uninitialized, Register Rtmp1) {
+  void gen_write_ref_array_pre_barrier(Register from, Register to, Register count, bool dest_uninitialized, Register Rtmp1,
+                                       Register preserve1 = noreg, Register preserve2 = noreg) {
    BarrierSet* const bs = Universe::heap()->barrier_set();
    switch (bs->kind()) {
      case BarrierSet::G1SATBCTLogging:
        // With G1, don't generate the call if we statically know that the target in uninitialized
        if (!dest_uninitialized) {
-          const int spill_slots = 4 * wordSize;
-          const int frame_size  = frame::abi_reg_args_size + spill_slots;
+          int spill_slots = 3;
+          if (preserve1 != noreg) { spill_slots++; }
+          if (preserve2 != noreg) { spill_slots++; }
+          const int frame_size = align_size_up(frame::abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
          Label filtered;

          // Is marking active?
@ -640,17 +648,23 @@ class StubGenerator: public StubCodeGenerator {
          __ beq(CCR0, filtered);

          __ save_LR_CR(R0);
-          __ push_frame_reg_args(spill_slots, R0);
-          __ std(from,  frame_size - 1 * wordSize, R1_SP);
-          __ std(to,    frame_size - 2 * wordSize, R1_SP);
-          __ std(count, frame_size - 3 * wordSize, R1_SP);
+          __ push_frame(frame_size, R0);
+          int slot_nr = 0;
+          __ std(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
+          __ std(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
+          __ std(count, frame_size - (++slot_nr) * wordSize, R1_SP);
+          if (preserve1 != noreg) { __ std(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
+          if (preserve2 != noreg) { __ std(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }

          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_pre), to, count);

-          __ ld(from,  frame_size - 1 * wordSize, R1_SP);
-          __ ld(to,    frame_size - 2 * wordSize, R1_SP);
-          __ ld(count, frame_size - 3 * wordSize, R1_SP);
-          __ pop_frame();
+          slot_nr = 0;
+          __ ld(from,  frame_size - (++slot_nr) * wordSize, R1_SP);
+          __ ld(to,    frame_size - (++slot_nr) * wordSize, R1_SP);
+          __ ld(count, frame_size - (++slot_nr) * wordSize, R1_SP);
+          if (preserve1 != noreg) { __ ld(preserve1, frame_size - (++slot_nr) * wordSize, R1_SP); }
+          if (preserve2 != noreg) { __ ld(preserve2, frame_size - (++slot_nr) * wordSize, R1_SP); }
+          __ addi(R1_SP, R1_SP, frame_size); // pop_frame()
          __ restore_LR_CR(R0);

          __ bind(filtered);
@ -674,27 +688,22 @@ class StubGenerator: public StubCodeGenerator {
  //
  //  The input registers and R0 are overwritten.
  //
-  void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp, bool branchToEnd) {
+  void gen_write_ref_array_post_barrier(Register addr, Register count, Register tmp, Register preserve = noreg) {
    BarrierSet* const bs = Universe::heap()->barrier_set();

    switch (bs->kind()) {
      case BarrierSet::G1SATBCTLogging:
        {
-          if (branchToEnd) {
-            __ save_LR_CR(R0);
-            // We need this frame only to spill LR.
-            __ push_frame_reg_args(0, R0);
-            __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
-            __ pop_frame();
-            __ restore_LR_CR(R0);
-          } else {
-            // Tail call: fake call from stub caller by branching without linking.
-            address entry_point = (address)CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post);
-            __ mr_if_needed(R3_ARG1, addr);
-            __ mr_if_needed(R4_ARG2, count);
-            __ load_const(R11, entry_point, R0);
-            __ call_c_and_return_to_caller(R11);
-          }
+          int spill_slots = (preserve != noreg) ? 1 : 0;
+          const int frame_size = align_size_up(frame::abi_reg_args_size + spill_slots * BytesPerWord, frame::alignment_in_bytes);
+
+          __ save_LR_CR(R0);
+          __ push_frame(frame_size, R0);
+          if (preserve != noreg) { __ std(preserve, frame_size - 1 * wordSize, R1_SP); }
+          __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSet::static_write_ref_array_post), addr, count);
+          if (preserve != noreg) { __ ld(preserve, frame_size - 1 * wordSize, R1_SP); }
+          __ addi(R1_SP, R1_SP, frame_size); // pop_frame();
+          __ restore_LR_CR(R0);
        }
        break;
      case BarrierSet::CardTableForRS:
@ -729,12 +738,9 @@ class StubGenerator: public StubCodeGenerator {
          __ addi(addr, addr, 1);
          __ bdnz(Lstore_loop);
          __ bind(Lskip_loop);
-
-          if (!branchToEnd) __ blr();
        }
      break;
      case BarrierSet::ModRef:
-        if (!branchToEnd) __ blr();
        break;
      default:
        ShouldNotReachHere();
@ -763,8 +769,10 @@ class StubGenerator: public StubCodeGenerator {

    // Procedure for large arrays (uses data cache block zero instruction).
    Label dwloop, fast, fastloop, restloop, lastdword, done;
-    int cl_size=VM_Version::get_cache_line_size(), cl_dwords=cl_size>>3, cl_dwordaddr_bits=exact_log2(cl_dwords);
-    int min_dcbz=2; // Needs to be positive, apply dcbz only to at least min_dcbz cache lines.
+    int cl_size = VM_Version::L1_data_cache_line_size();
+    int cl_dwords = cl_size >> 3;
+    int cl_dwordaddr_bits = exact_log2(cl_dwords);
+    int min_dcbz = 2; // Needs to be positive, apply dcbz only to at least min_dcbz cache lines.

    // Clear up to 128byte boundary if long enough, dword_cnt=(16-(base>>3))%16.
    __ dcbtst(base_ptr_reg);                    // Indicate write access to first cache line ...
@ -1081,7 +1089,6 @@ class StubGenerator: public StubCodeGenerator {
    Register tmp1 = R6_ARG4;
    Register tmp2 = R7_ARG5;

-    Label l_overlap;
 #ifdef ASSERT
    __ srdi_(tmp2, R5_ARG3, 31);
    __ asm_assert_eq("missing zero extend", 0xAFFE);
@ -1091,19 +1098,11 @@ class StubGenerator: public StubCodeGenerator {
    __ sldi(tmp2, R5_ARG3, log2_elem_size); // size in bytes
    __ cmpld(CCR0, R3_ARG1, R4_ARG2); // Use unsigned comparison!
    __ cmpld(CCR1, tmp1, tmp2);
-    __ crand(CCR0, Assembler::less, CCR1, Assembler::less);
-    __ blt(CCR0, l_overlap); // Src before dst and distance smaller than size.
+    __ crnand(CCR0, Assembler::less, CCR1, Assembler::less);
+    // Overlaps if Src before dst and distance smaller than size.
+    // Branch to forward copy routine otherwise (within range of 32kB).
+    __ bc(Assembler::bcondCRbiIs1, Assembler::bi0(CCR0, Assembler::less), no_overlap_target);

-    // need to copy forwards
-    if (__ is_within_range_of_b(no_overlap_target, __ pc())) {
-      __ b(no_overlap_target);
-    } else {
-      __ load_const(tmp1, no_overlap_target, tmp2);
-      __ mtctr(tmp1);
-      __ bctr();
-    }
-
-    __ bind(l_overlap);
    // need to copy backwards
  }

@ -1248,6 +1247,7 @@ class StubGenerator: public StubCodeGenerator {
    }

    __ bind(l_4);
+    __ li(R3_RET, 0); // return 0
    __ blr();

    return start;
@ -1269,15 +1269,9 @@ class StubGenerator: public StubCodeGenerator {
    Register tmp2 = R7_ARG5;
    Register tmp3 = R8_ARG6;

-#if defined(ABI_ELFv2)
    address nooverlap_target = aligned ?
-      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
-      StubRoutines::jbyte_disjoint_arraycopy();
-#else
-    address nooverlap_target = aligned ?
-      ((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() :
-      ((FunctionDescriptor*)StubRoutines::jbyte_disjoint_arraycopy())->entry();
-#endif
+      STUB_ENTRY(arrayof_jbyte_disjoint_arraycopy) :
+      STUB_ENTRY(jbyte_disjoint_arraycopy);

    array_overlap_test(nooverlap_target, 0);
    // Do reverse copy. We assume the case of actual overlap is rare enough
@ -1292,6 +1286,7 @@ class StubGenerator: public StubCodeGenerator {
    __ lbzx(tmp1, R3_ARG1, R5_ARG3);
    __ bge(CCR0, l_1);

+    __ li(R3_RET, 0); // return 0
    __ blr();

    return start;
@ -1474,6 +1469,7 @@ class StubGenerator: public StubCodeGenerator {
      __ bdnz(l_5);
    }
    __ bind(l_4);
+    __ li(R3_RET, 0); // return 0
    __ blr();

    return start;
@ -1495,15 +1491,9 @@ class StubGenerator: public StubCodeGenerator {
    Register tmp2 = R7_ARG5;
    Register tmp3 = R8_ARG6;

-#if defined(ABI_ELFv2)
    address nooverlap_target = aligned ?
-        StubRoutines::arrayof_jshort_disjoint_arraycopy() :
-        StubRoutines::jshort_disjoint_arraycopy();
-#else
-    address nooverlap_target = aligned ?
-        ((FunctionDescriptor*)StubRoutines::arrayof_jshort_disjoint_arraycopy())->entry() :
-        ((FunctionDescriptor*)StubRoutines::jshort_disjoint_arraycopy())->entry();
-#endif
+      STUB_ENTRY(arrayof_jshort_disjoint_arraycopy) :
+      STUB_ENTRY(jshort_disjoint_arraycopy);

    array_overlap_test(nooverlap_target, 1);

@ -1517,6 +1507,7 @@ class StubGenerator: public StubCodeGenerator {
    __ lhzx(tmp2, R3_ARG1, tmp1);
    __ bge(CCR0, l_1);

+    __ li(R3_RET, 0); // return 0
    __ blr();

    return start;
@ -1620,6 +1611,7 @@ class StubGenerator: public StubCodeGenerator {
    StubCodeMark mark(this, "StubRoutines", name);
    address start = __ function_entry();
    generate_disjoint_int_copy_core(aligned);
+    __ li(R3_RET, 0); // return 0
    __ blr();
    return start;
  }
@ -1704,20 +1696,15 @@ class StubGenerator: public StubCodeGenerator {
    StubCodeMark mark(this, "StubRoutines", name);
    address start = __ function_entry();

-#if defined(ABI_ELFv2)
    address nooverlap_target = aligned ?
-      StubRoutines::arrayof_jint_disjoint_arraycopy() :
-      StubRoutines::jint_disjoint_arraycopy();
-#else
-    address nooverlap_target = aligned ?
-      ((FunctionDescriptor*)StubRoutines::arrayof_jint_disjoint_arraycopy())->entry() :
-      ((FunctionDescriptor*)StubRoutines::jint_disjoint_arraycopy())->entry();
-#endif
+      STUB_ENTRY(arrayof_jint_disjoint_arraycopy) :
+      STUB_ENTRY(jint_disjoint_arraycopy);

    array_overlap_test(nooverlap_target, 2);

    generate_conjoint_int_copy_core(aligned);

+    __ li(R3_RET, 0); // return 0
    __ blr();

    return start;
@ -1796,6 +1783,7 @@ class StubGenerator: public StubCodeGenerator {
    StubCodeMark mark(this, "StubRoutines", name);
    address start = __ function_entry();
    generate_disjoint_long_copy_core(aligned);
+    __ li(R3_RET, 0); // return 0
    __ blr();

    return start;
@ -1878,19 +1866,14 @@ class StubGenerator: public StubCodeGenerator {
    StubCodeMark mark(this, "StubRoutines", name);
    address start = __ function_entry();

-#if defined(ABI_ELFv2)
    address nooverlap_target = aligned ?
-      StubRoutines::arrayof_jlong_disjoint_arraycopy() :
-      StubRoutines::jlong_disjoint_arraycopy();
-#else
-    address nooverlap_target = aligned ?
-      ((FunctionDescriptor*)StubRoutines::arrayof_jlong_disjoint_arraycopy())->entry() :
-      ((FunctionDescriptor*)StubRoutines::jlong_disjoint_arraycopy())->entry();
-#endif
+      STUB_ENTRY(arrayof_jlong_disjoint_arraycopy) :
+      STUB_ENTRY(jlong_disjoint_arraycopy);

    array_overlap_test(nooverlap_target, 3);
    generate_conjoint_long_copy_core(aligned);

+    __ li(R3_RET, 0); // return 0
    __ blr();

    return start;
@ -1910,15 +1893,9 @@ class StubGenerator: public StubCodeGenerator {

    address start = __ function_entry();

-#if defined(ABI_ELFv2)
    address nooverlap_target = aligned ?
-      StubRoutines::arrayof_oop_disjoint_arraycopy() :
-      StubRoutines::oop_disjoint_arraycopy();
-#else
-    address nooverlap_target = aligned ?
-      ((FunctionDescriptor*)StubRoutines::arrayof_oop_disjoint_arraycopy())->entry() :
-      ((FunctionDescriptor*)StubRoutines::oop_disjoint_arraycopy())->entry();
-#endif
+      STUB_ENTRY(arrayof_oop_disjoint_arraycopy) :
+      STUB_ENTRY(oop_disjoint_arraycopy);

    gen_write_ref_array_pre_barrier(R3_ARG1, R4_ARG2, R5_ARG3, dest_uninitialized, R9_ARG7);

@ -1934,7 +1911,9 @@ class StubGenerator: public StubCodeGenerator {
      generate_conjoint_long_copy_core(aligned);
    }

-    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
+    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
+    __ li(R3_RET, 0); // return 0
+    __ blr();
    return start;
  }

@ -1964,11 +1943,460 @@ class StubGenerator: public StubCodeGenerator {
      generate_disjoint_long_copy_core(aligned);
    }

-    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1, /*branchToEnd*/ false);
+    gen_write_ref_array_post_barrier(R9_ARG7, R10_ARG8, R11_scratch1);
+    __ li(R3_RET, 0); // return 0
+    __ blr();

    return start;
  }

+
+  // Helper for generating a dynamic type check.
+  // Smashes only the given temp registers.
+  void generate_type_check(Register sub_klass,
+                           Register super_check_offset,
+                           Register super_klass,
+                           Register temp,
+                           Label& L_success) {
+    assert_different_registers(sub_klass, super_check_offset, super_klass);
+
+    BLOCK_COMMENT("type_check:");
+
+    Label L_miss;
+
+    __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, R0, &L_success, &L_miss, NULL,
+                                     super_check_offset);
+    __ check_klass_subtype_slow_path(sub_klass, super_klass, temp, R0, &L_success, NULL);
+
+    // Fall through on failure!
+    __ bind(L_miss);
+  }
+
+
+  //  Generate stub for checked oop copy.
+  //
+  // Arguments for generated stub:
+  //      from:  R3
+  //      to:    R4
+  //      count: R5 treated as signed
+  //      ckoff: R6 (super_check_offset)
+  //      ckval: R7 (super_klass)
+  //      ret:   R3 zero for success; (-1^K) where K is partial transfer count
+  //
+  address generate_checkcast_copy(const char *name, bool dest_uninitialized) {
+
+    const Register R3_from   = R3_ARG1;      // source array address
+    const Register R4_to     = R4_ARG2;      // destination array address
+    const Register R5_count  = R5_ARG3;      // elements count
+    const Register R6_ckoff  = R6_ARG4;      // super_check_offset
+    const Register R7_ckval  = R7_ARG5;      // super_klass
+
+    const Register R8_offset = R8_ARG6;      // loop var, with stride wordSize
+    const Register R9_remain = R9_ARG7;      // loop var, with stride -1
+    const Register R10_oop   = R10_ARG8;     // actual oop copied
+    const Register R11_klass = R11_scratch1; // oop._klass
+    const Register R12_tmp   = R12_scratch2;
+
+    const Register R2_minus1 = R2;
+
+    //__ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    // TODO: Assert that int is 64 bit sign extended and arrays are not conjoint.
+
+    gen_write_ref_array_pre_barrier(R3_from, R4_to, R5_count, dest_uninitialized, R12_tmp, /* preserve: */ R6_ckoff, R7_ckval);
+
+    //inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, R12_tmp, R3_RET);
+
+    Label load_element, store_element, store_null, success, do_card_marks;
+    __ or_(R9_remain, R5_count, R5_count); // Initialize loop index, and test it.
+    __ li(R8_offset, 0);                   // Offset from start of arrays.
+    __ li(R2_minus1, -1);
+    __ bne(CCR0, load_element);
+
+    // Empty array: Nothing to do.
+    __ li(R3_RET, 0);           // Return 0 on (trivial) success.
+    __ blr();
+
+    // ======== begin loop ========
+    // (Entry is load_element.)
+    __ align(OptoLoopAlignment);
+    __ bind(store_element);
+    if (UseCompressedOops) {
+      __ encode_heap_oop_not_null(R10_oop);
+      __ bind(store_null);
+      __ stw(R10_oop, R8_offset, R4_to);
+    } else {
+      __ bind(store_null);
+      __ std(R10_oop, R8_offset, R4_to);
+    }
+
+    __ addi(R8_offset, R8_offset, heapOopSize);   // Step to next offset.
+    __ add_(R9_remain, R2_minus1, R9_remain);     // Decrement the count.
+    __ beq(CCR0, success);
+
+    // ======== loop entry is here ========
+    __ bind(load_element);
+    __ load_heap_oop(R10_oop, R8_offset, R3_from, &store_null);  // Load the oop.
+
+    __ load_klass(R11_klass, R10_oop); // Query the object klass.
+
+    generate_type_check(R11_klass, R6_ckoff, R7_ckval, R12_tmp,
+                        // Branch to this on success:
+                        store_element);
+    // ======== end loop ========
+
+    // It was a real error; we must depend on the caller to finish the job.
+    // Register R9_remain has number of *remaining* oops, R5_count number of *total* oops.
+    // Emit GC store barriers for the oops we have copied (R5_count minus R9_remain),
+    // and report their number to the caller.
+    __ subf_(R5_count, R9_remain, R5_count);
+    __ nand(R3_RET, R5_count, R5_count);   // report (-1^K) to caller
+    __ bne(CCR0, do_card_marks);
+    __ blr();
+
+    __ bind(success);
+    __ li(R3_RET, 0);
+
+    __ bind(do_card_marks);
+    // Store check on R4_to[0..R5_count-1].
+    gen_write_ref_array_post_barrier(R4_to, R5_count, R12_tmp, /* preserve: */ R3_RET);
+    __ blr();
+    return start;
+  }
+
+
+  //  Generate 'unsafe' array copy stub.
+  //  Though just as safe as the other stubs, it takes an unscaled
+  //  size_t argument instead of an element count.
+  //
+  // Arguments for generated stub:
+  //      from:  R3
+  //      to:    R4
+  //      count: R5 byte count, treated as ssize_t, can be zero
+  //
+  // Examines the alignment of the operands and dispatches
+  // to a long, int, short, or byte copy loop.
+  //
+  address generate_unsafe_copy(const char* name,
+                               address byte_copy_entry,
+                               address short_copy_entry,
+                               address int_copy_entry,
+                               address long_copy_entry) {
+
+    const Register R3_from   = R3_ARG1;      // source array address
+    const Register R4_to     = R4_ARG2;      // destination array address
+    const Register R5_count  = R5_ARG3;      // elements count (as long on PPC64)
+
+    const Register R6_bits   = R6_ARG4;      // test copy of low bits
+    const Register R7_tmp    = R7_ARG5;
+
+    //__ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    // Bump this on entry, not on exit:
+    //inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr, R6_bits, R7_tmp);
+
+    Label short_copy, int_copy, long_copy;
+
+    __ orr(R6_bits, R3_from, R4_to);
+    __ orr(R6_bits, R6_bits, R5_count);
+    __ andi_(R0, R6_bits, (BytesPerLong-1));
+    __ beq(CCR0, long_copy);
+
+    __ andi_(R0, R6_bits, (BytesPerInt-1));
+    __ beq(CCR0, int_copy);
+
+    __ andi_(R0, R6_bits, (BytesPerShort-1));
+    __ beq(CCR0, short_copy);
+
+    // byte_copy:
+    __ b(byte_copy_entry);
+
+    __ bind(short_copy);
+    __ srwi(R5_count, R5_count, LogBytesPerShort);
+    __ b(short_copy_entry);
+
+    __ bind(int_copy);
+    __ srwi(R5_count, R5_count, LogBytesPerInt);
+    __ b(int_copy_entry);
+
+    __ bind(long_copy);
+    __ srwi(R5_count, R5_count, LogBytesPerLong);
+    __ b(long_copy_entry);
+
+    return start;
+  }
+
+
+  // Perform range checks on the proposed arraycopy.
+  // Kills the two temps, but nothing else.
+  // Also, clean the sign bits of src_pos and dst_pos.
+  void arraycopy_range_checks(Register src,     // source array oop
+                              Register src_pos, // source position
+                              Register dst,     // destination array oop
+                              Register dst_pos, // destination position
+                              Register length,  // length of copy
+                              Register temp1, Register temp2,
+                              Label& L_failed) {
+    BLOCK_COMMENT("arraycopy_range_checks:");
+
+    const Register array_length = temp1;  // scratch
+    const Register end_pos      = temp2;  // scratch
+
+    //  if (src_pos + length > arrayOop(src)->length() ) FAIL;
+    __ lwa(array_length, arrayOopDesc::length_offset_in_bytes(), src);
+    __ add(end_pos, src_pos, length);  // src_pos + length
+    __ cmpd(CCR0, end_pos, array_length);
+    __ bgt(CCR0, L_failed);
+
+    //  if (dst_pos + length > arrayOop(dst)->length() ) FAIL;
+    __ lwa(array_length, arrayOopDesc::length_offset_in_bytes(), dst);
+    __ add(end_pos, dst_pos, length);  // src_pos + length
+    __ cmpd(CCR0, end_pos, array_length);
+    __ bgt(CCR0, L_failed);
+
+    BLOCK_COMMENT("arraycopy_range_checks done");
+  }
+
+
+  //
+  //  Generate generic array copy stubs
+  //
+  //  Input:
+  //    R3    -  src oop
+  //    R4    -  src_pos
+  //    R5    -  dst oop
+  //    R6    -  dst_pos
+  //    R7    -  element count
+  //
+  //  Output:
+  //    R3 ==  0  -  success
+  //    R3 == -1  -  need to call System.arraycopy
+  //
+  address generate_generic_copy(const char *name,
+                                address entry_jbyte_arraycopy,
+                                address entry_jshort_arraycopy,
+                                address entry_jint_arraycopy,
+                                address entry_oop_arraycopy,
+                                address entry_disjoint_oop_arraycopy,
+                                address entry_jlong_arraycopy,
+                                address entry_checkcast_arraycopy) {
+    Label L_failed, L_objArray;
+
+    // Input registers
+    const Register src       = R3_ARG1;  // source array oop
+    const Register src_pos   = R4_ARG2;  // source position
+    const Register dst       = R5_ARG3;  // destination array oop
+    const Register dst_pos   = R6_ARG4;  // destination position
+    const Register length    = R7_ARG5;  // elements count
+
+    // registers used as temp
+    const Register src_klass = R8_ARG6;  // source array klass
+    const Register dst_klass = R9_ARG7;  // destination array klass
+    const Register lh        = R10_ARG8; // layout handler
+    const Register temp      = R2;
+
+    //__ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();
+
+    // Bump this on entry, not on exit:
+    //inc_counter_np(SharedRuntime::_generic_array_copy_ctr, lh, temp);
+
+    // In principle, the int arguments could be dirty.
+
+    //-----------------------------------------------------------------------
+    // Assembler stubs will be used for this call to arraycopy
+    // if the following conditions are met:
+    //
+    // (1) src and dst must not be null.
+    // (2) src_pos must not be negative.
+    // (3) dst_pos must not be negative.
+    // (4) length  must not be negative.
+    // (5) src klass and dst klass should be the same and not NULL.
+    // (6) src and dst should be arrays.
+    // (7) src_pos + length must not exceed length of src.
+    // (8) dst_pos + length must not exceed length of dst.
+    BLOCK_COMMENT("arraycopy initial argument checks");
+
+    __ cmpdi(CCR1, src, 0);      // if (src == NULL) return -1;
+    __ extsw_(src_pos, src_pos); // if (src_pos < 0) return -1;
+    __ cmpdi(CCR5, dst, 0);      // if (dst == NULL) return -1;
+    __ cror(CCR1, Assembler::equal, CCR0, Assembler::less);
+    __ extsw_(dst_pos, dst_pos); // if (src_pos < 0) return -1;
+    __ cror(CCR5, Assembler::equal, CCR0, Assembler::less);
+    __ extsw_(length, length);   // if (length < 0) return -1;
+    __ cror(CCR1, Assembler::equal, CCR5, Assembler::equal);
+    __ cror(CCR1, Assembler::equal, CCR0, Assembler::less);
+    __ beq(CCR1, L_failed);
+
+    BLOCK_COMMENT("arraycopy argument klass checks");
+    __ load_klass(src_klass, src);
+    __ load_klass(dst_klass, dst);
+
+    // Load layout helper
+    //
+    //  |array_tag|     | header_size | element_type |     |log2_element_size|
+    // 32        30    24            16              8     2                 0
+    //
+    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
+    //
+
+    int lh_offset = in_bytes(Klass::layout_helper_offset());
+
+    // Load 32-bits signed value. Use br() instruction with it to check icc.
+    __ lwz(lh, lh_offset, src_klass);
+
+    // Handle objArrays completely differently...
+    jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+    __ load_const_optimized(temp, objArray_lh, R0);
+    __ cmpw(CCR0, lh, temp);
+    __ beq(CCR0, L_objArray);
+
+    __ cmpd(CCR5, src_klass, dst_klass);          // if (src->klass() != dst->klass()) return -1;
+    __ cmpwi(CCR6, lh, Klass::_lh_neutral_value); // if (!src->is_Array()) return -1;
+
+    __ crnand(CCR5, Assembler::equal, CCR6, Assembler::less);
+    __ beq(CCR5, L_failed);
+
+    // At this point, it is known to be a typeArray (array_tag 0x3).
+#ifdef ASSERT
+    { Label L;
+      jint lh_prim_tag_in_place = (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
+      __ load_const_optimized(temp, lh_prim_tag_in_place, R0);
+      __ cmpw(CCR0, lh, temp);
+      __ bge(CCR0, L);
+      __ stop("must be a primitive array");
+      __ bind(L);
+    }
+#endif
+
+    arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
+                           temp, dst_klass, L_failed);
+
+    // TypeArrayKlass
+    //
+    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
+    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
+    //
+
+    const Register offset = dst_klass;    // array offset
+    const Register elsize = src_klass;    // log2 element size
+
+    __ rldicl(offset, lh, 64 - Klass::_lh_header_size_shift, 64 - exact_log2(Klass::_lh_header_size_mask + 1));
+    __ andi(elsize, lh, Klass::_lh_log2_element_size_mask);
+    __ add(src, offset, src);       // src array offset
+    __ add(dst, offset, dst);       // dst array offset
+
+    // Next registers should be set before the jump to corresponding stub.
+    const Register from     = R3_ARG1;  // source array address
+    const Register to       = R4_ARG2;  // destination array address
+    const Register count    = R5_ARG3;  // elements count
+
+    // 'from', 'to', 'count' registers should be set in this order
+    // since they are the same as 'src', 'src_pos', 'dst'.
+
+    BLOCK_COMMENT("scale indexes to element size");
+    __ sld(src_pos, src_pos, elsize);
+    __ sld(dst_pos, dst_pos, elsize);
+    __ add(from, src_pos, src);  // src_addr
+    __ add(to, dst_pos, dst);    // dst_addr
+    __ mr(count, length);        // length
+
+    BLOCK_COMMENT("choose copy loop based on element size");
+    // Using conditional branches with range 32kB.
+    const int bo = Assembler::bcondCRbiIs1, bi = Assembler::bi0(CCR0, Assembler::equal);
+    __ cmpwi(CCR0, elsize, 0);
+    __ bc(bo, bi, entry_jbyte_arraycopy);
+    __ cmpwi(CCR0, elsize, LogBytesPerShort);
+    __ bc(bo, bi, entry_jshort_arraycopy);
+    __ cmpwi(CCR0, elsize, LogBytesPerInt);
+    __ bc(bo, bi, entry_jint_arraycopy);
+#ifdef ASSERT
+    { Label L;
+      __ cmpwi(CCR0, elsize, LogBytesPerLong);
+      __ beq(CCR0, L);
+      __ stop("must be long copy, but elsize is wrong");
+      __ bind(L);
+    }
+#endif
+    __ b(entry_jlong_arraycopy);
+
+    // ObjArrayKlass
+  __ bind(L_objArray);
+    // live at this point:  src_klass, dst_klass, src[_pos], dst[_pos], length
+
+    Label L_disjoint_plain_copy, L_checkcast_copy;
+    //  test array classes for subtyping
+    __ cmpd(CCR0, src_klass, dst_klass);         // usual case is exact equality
+    __ bne(CCR0, L_checkcast_copy);
+
+    // Identically typed arrays can be copied without element-wise checks.
+    arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
+                           temp, lh, L_failed);
+
+    __ addi(src, src, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //src offset
+    __ addi(dst, dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //dst offset
+    __ sldi(src_pos, src_pos, LogBytesPerHeapOop);
+    __ sldi(dst_pos, dst_pos, LogBytesPerHeapOop);
+    __ add(from, src_pos, src);  // src_addr
+    __ add(to, dst_pos, dst);    // dst_addr
+    __ mr(count, length);        // length
+    __ b(entry_oop_arraycopy);
+
+  __ bind(L_checkcast_copy);
+    // live at this point:  src_klass, dst_klass
+    {
+      // Before looking at dst.length, make sure dst is also an objArray.
+      __ lwz(temp, lh_offset, dst_klass);
+      __ cmpw(CCR0, lh, temp);
+      __ bne(CCR0, L_failed);
+
+      // It is safe to examine both src.length and dst.length.
+      arraycopy_range_checks(src, src_pos, dst, dst_pos, length,
+                             temp, lh, L_failed);
+
+      // Marshal the base address arguments now, freeing registers.
+      __ addi(src, src, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //src offset
+      __ addi(dst, dst, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); //dst offset
+      __ sldi(src_pos, src_pos, LogBytesPerHeapOop);
+      __ sldi(dst_pos, dst_pos, LogBytesPerHeapOop);
+      __ add(from, src_pos, src);  // src_addr
+      __ add(to, dst_pos, dst);    // dst_addr
+      __ mr(count, length);        // length
+
+      Register sco_temp = R6_ARG4;             // This register is free now.
+      assert_different_registers(from, to, count, sco_temp,
+                                 dst_klass, src_klass);
+
+      // Generate the type check.
+      int sco_offset = in_bytes(Klass::super_check_offset_offset());
+      __ lwz(sco_temp, sco_offset, dst_klass);
+      generate_type_check(src_klass, sco_temp, dst_klass,
+                          temp, L_disjoint_plain_copy);
+
+      // Fetch destination element klass from the ObjArrayKlass header.
+      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
+
+      // The checkcast_copy loop needs two extra arguments:
+      __ ld(R7_ARG5, ek_offset, dst_klass);   // dest elem klass
+      __ lwz(R6_ARG4, sco_offset, R7_ARG5);   // sco of elem klass
+      __ b(entry_checkcast_arraycopy);
+    }
+
+    __ bind(L_disjoint_plain_copy);
+    __ b(entry_disjoint_oop_arraycopy);
+
+  __ bind(L_failed);
+    __ li(R3_RET, -1); // return -1
+    __ blr();
+    return start;
+  }
+
+
  void generate_arraycopy_stubs() {
    // Note: the disjoint stubs must be generated first, some of
    // the conjoint stubs use them.
@ -2005,6 +2433,24 @@ class StubGenerator: public StubCodeGenerator {
    StubRoutines::_arrayof_oop_arraycopy        = generate_conjoint_oop_copy(true, "arrayof_oop_arraycopy", false);
    StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(true, "arrayof_oop_arraycopy", true);

+    // special/generic versions
+    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", false);
+    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", true);
+
+    StubRoutines::_unsafe_arraycopy  = generate_unsafe_copy("unsafe_arraycopy",
+                                                            STUB_ENTRY(jbyte_arraycopy),
+                                                            STUB_ENTRY(jshort_arraycopy),
+                                                            STUB_ENTRY(jint_arraycopy),
+                                                            STUB_ENTRY(jlong_arraycopy));
+    StubRoutines::_generic_arraycopy = generate_generic_copy("generic_arraycopy",
+                                                             STUB_ENTRY(jbyte_arraycopy),
+                                                             STUB_ENTRY(jshort_arraycopy),
+                                                             STUB_ENTRY(jint_arraycopy),
+                                                             STUB_ENTRY(oop_arraycopy),
+                                                             STUB_ENTRY(oop_disjoint_arraycopy),
+                                                             STUB_ENTRY(jlong_arraycopy),
+                                                             STUB_ENTRY(checkcast_arraycopy));
+
    // fill routines
    StubRoutines::_jbyte_fill          = generate_fill(T_BYTE,  false, "jbyte_fill");
    StubRoutines::_jshort_fill         = generate_fill(T_SHORT, false, "jshort_fill");
--- a/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp
@ -34,7 +34,7 @@

 // CRC32 Intrinsics.
 void StubRoutines::ppc64::generate_load_crc_table_addr(MacroAssembler* masm, Register table) {
-  __ load_const(table, StubRoutines::_crc_table_adr);
+  __ load_const_optimized(table, StubRoutines::_crc_table_adr, R0);
 }

 // CRC32 Intrinsics.
--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp
@ -255,34 +255,33 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label*

  if (TieredCompilation) {
    const int increment = InvocationCounter::count_increment;
-    const int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
    Label no_mdo;
    if (ProfileInterpreter) {
-      const Register Rmdo = Rscratch1;
+      const Register Rmdo = R3_counters;
      // If no method data exists, go to profile_continue.
      __ ld(Rmdo, in_bytes(Method::method_data_offset()), R19_method);
      __ cmpdi(CCR0, Rmdo, 0);
      __ beq(CCR0, no_mdo);

      // Increment backedge counter in the MDO.
-      const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
-      __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
+      const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+      __ lwz(Rscratch2, mdo_ic_offs, Rmdo);
+      __ lwz(Rscratch1, in_bytes(MethodData::invoke_mask_offset()), Rmdo);
      __ addi(Rscratch2, Rscratch2, increment);
-      __ stw(Rscratch2, mdo_bc_offs, Rmdo);
-      __ load_const_optimized(Rscratch1, mask, R0);
+      __ stw(Rscratch2, mdo_ic_offs, Rmdo);
      __ and_(Rscratch1, Rscratch2, Rscratch1);
      __ bne(CCR0, done);
      __ b(*overflow);
    }

    // Increment counter in MethodCounters*.
-    const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+    const int mo_bc_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
    __ bind(no_mdo);
    __ get_method_counters(R19_method, R3_counters, done);
    __ lwz(Rscratch2, mo_bc_offs, R3_counters);
+    __ lwz(Rscratch1, in_bytes(MethodCounters::invoke_mask_offset()), R3_counters);
    __ addi(Rscratch2, Rscratch2, increment);
    __ stw(Rscratch2, mo_bc_offs, R3_counters);
-    __ load_const_optimized(Rscratch1, mask, R0);
    __ and_(Rscratch1, Rscratch2, Rscratch1);
    __ beq(CCR0, *overflow);

@ -303,8 +302,7 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label*
    // Check if we must create a method data obj.
    if (ProfileInterpreter && profile_method != NULL) {
      const Register profile_limit = Rscratch1;
-      int pl_offs = __ load_const_optimized(profile_limit, &InvocationCounter::InterpreterProfileLimit, R0, true);
-      __ lwz(profile_limit, pl_offs, profile_limit);
+      __ lwz(profile_limit, in_bytes(MethodCounters::interpreter_profile_limit_offset()), R3_counters);
      // Test to see if we should create a method data oop.
      __ cmpw(CCR0, Rsum_ivc_bec, profile_limit);
      __ blt(CCR0, *profile_method_continue);
@ -314,9 +312,7 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label*
    // Finally check for counter overflow.
    if (overflow) {
      const Register invocation_limit = Rscratch1;
-      int il_offs = __ load_const_optimized(invocation_limit, &InvocationCounter::InterpreterInvocationLimit, R0, true);
-      __ lwz(invocation_limit, il_offs, invocation_limit);
-      assert(4 == sizeof(InvocationCounter::InterpreterInvocationLimit), "unexpected field size");
+      __ lwz(invocation_limit, in_bytes(MethodCounters::interpreter_invocation_limit_offset()), R3_counters);
      __ cmpw(CCR0, Rsum_ivc_bec, invocation_limit);
      __ bge(CCR0, *overflow);
    }
@ -1484,9 +1480,9 @@ void AbstractInterpreter::layout_activation(Method* method,

  intptr_t* locals_base  = (caller->is_interpreted_frame()) ?
    caller->interpreter_frame_esp() + caller_actual_parameters :
-    caller->sp() + method->max_locals() - 1 + (frame::abi_minframe_size / Interpreter::stackElementSize) ;
+    caller->sp() + method->max_locals() - 1 + (frame::abi_minframe_size / Interpreter::stackElementSize);

-  intptr_t* monitor_base = caller->sp() - frame::ijava_state_size / Interpreter::stackElementSize ;
+  intptr_t* monitor_base = caller->sp() - frame::ijava_state_size / Interpreter::stackElementSize;
  intptr_t* monitor      = monitor_base - (moncount * frame::interpreter_frame_monitor_size());
  intptr_t* esp_base     = monitor - 1;
  intptr_t* esp          = esp_base - tempcount - popframe_extra_args;
--- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp
@ -37,5 +37,3 @@
  const static int InterpreterCodeSize = 230*K;

 #endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
-
-
--- a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp
@ -1626,12 +1626,13 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
  // --------------------------------------------------------------------------
  // Normal (non-jsr) branch handling

+  // Bump bytecode pointer by displacement (take the branch).
+  __ add(R14_bcp, Rdisp, R14_bcp); // Add to bc addr.
+
  const bool increment_invocation_counter_for_backward_branches = UseCompiler && UseLoopCounter;
  if (increment_invocation_counter_for_backward_branches) {
-    //__ unimplemented("branch invocation counter");
-
    Label Lforward;
-    __ add(R14_bcp, Rdisp, R14_bcp); // Add to bc addr.
+    __ dispatch_prolog(vtos);

    // Check branch direction.
    __ cmpdi(CCR0, Rdisp, 0);
@ -1642,7 +1643,6 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
    if (TieredCompilation) {
      Label Lno_mdo, Loverflow;
      const int increment = InvocationCounter::count_increment;
-      const int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
      if (ProfileInterpreter) {
        Register Rmdo = Rscratch1;

@ -1654,7 +1654,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
        // Increment backedge counter in the MDO.
        const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
        __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
-        __ load_const_optimized(Rscratch3, mask, R0);
+        __ lwz(Rscratch3, in_bytes(MethodData::backedge_mask_offset()), Rmdo);
        __ addi(Rscratch2, Rscratch2, increment);
        __ stw(Rscratch2, mdo_bc_offs, Rmdo);
        __ and_(Rscratch3, Rscratch2, Rscratch3);
@ -1666,19 +1666,19 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
      const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
      __ bind(Lno_mdo);
      __ lwz(Rscratch2, mo_bc_offs, R4_counters);
-      __ load_const_optimized(Rscratch3, mask, R0);
+      __ lwz(Rscratch3, in_bytes(MethodCounters::backedge_mask_offset()), R4_counters);
      __ addi(Rscratch2, Rscratch2, increment);
-      __ stw(Rscratch2, mo_bc_offs, R19_method);
+      __ stw(Rscratch2, mo_bc_offs, R4_counters);
      __ and_(Rscratch3, Rscratch2, Rscratch3);
      __ bne(CCR0, Lforward);

      __ bind(Loverflow);

      // Notify point for loop, pass branch bytecode.
-      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R14_bcp, true);
+      __ subf(R4_ARG2, Rdisp, R14_bcp); // Compute branch bytecode (previous bcp).
+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, true);

      // Was an OSR adapter generated?
-      // O0 = osr nmethod
      __ cmpdi(CCR0, R3_RET, 0);
      __ beq(CCR0, Lforward);

@ -1714,27 +1714,23 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
      __ increment_backedge_counter(R4_counters, invoke_ctr, Rscratch2, Rscratch3);

      if (ProfileInterpreter) {
-        __ test_invocation_counter_for_mdp(invoke_ctr, Rscratch2, Lforward);
+        __ test_invocation_counter_for_mdp(invoke_ctr, R4_counters, Rscratch2, Lforward);
        if (UseOnStackReplacement) {
-          __ test_backedge_count_for_osr(bumped_count, R14_bcp, Rscratch2);
+          __ test_backedge_count_for_osr(bumped_count, R4_counters, R14_bcp, Rdisp, Rscratch2);
        }
      } else {
        if (UseOnStackReplacement) {
-          __ test_backedge_count_for_osr(invoke_ctr, R14_bcp, Rscratch2);
+          __ test_backedge_count_for_osr(invoke_ctr, R4_counters, R14_bcp, Rdisp, Rscratch2);
        }
      }
    }

    __ bind(Lforward);
+    __ dispatch_epilog(vtos);

  } else {
-    // Bump bytecode pointer by displacement (take the branch).
-    __ add(R14_bcp, Rdisp, R14_bcp); // Add to bc addr.
+    __ dispatch_next(vtos);
  }
-  // Continue with bytecode @ target.
-  // %%%%% Like Intel, could speed things up by moving bytecode fetch to code above,
-  // %%%%% and changing dispatch_next to dispatch_only.
-  __ dispatch_next(vtos);
 }

 // Helper function for if_cmp* methods below.
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
@ -38,7 +38,6 @@
 # include <sys/sysinfo.h>

 int VM_Version::_features = VM_Version::unknown_m;
-int VM_Version::_measured_cache_line_size = 32; // pessimistic init value
 const char* VM_Version::_features_str = "";
 bool VM_Version::_is_determine_features_test_running = false;

@ -56,7 +55,7 @@ void VM_Version::initialize() {

  // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
  if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
-    if (VM_Version::has_lqarx()) {
+    if (VM_Version::has_tcheck() && VM_Version::has_lqarx()) {
      FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
    } else if (VM_Version::has_popcntw()) {
      FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
@ -68,10 +67,19 @@ void VM_Version::initialize() {
      FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 0);
    }
  }
-  guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
-            PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 ||
-            PowerArchitecturePPC64 == 8,
-            "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8");
+
+  bool PowerArchitecturePPC64_ok = false;
+  switch (PowerArchitecturePPC64) {
+    case 8: if (!VM_Version::has_tcheck() ) break;
+            if (!VM_Version::has_lqarx()  ) break;
+    case 7: if (!VM_Version::has_popcntw()) break;
+    case 6: if (!VM_Version::has_cmpb()   ) break;
+    case 5: if (!VM_Version::has_popcntb()) break;
+    case 0: PowerArchitecturePPC64_ok = true; break;
+    default: break;
+  }
+  guarantee(PowerArchitecturePPC64_ok, "PowerArchitecturePPC64 cannot be set to "
+            UINTX_FORMAT " on this machine", PowerArchitecturePPC64);

  // Power 8: Configure Data Stream Control Register.
  if (PowerArchitecturePPC64 >= 8) {
@ -132,9 +140,15 @@ void VM_Version::initialize() {
  // and 'atomic long memory ops' (see Unsafe_GetLongVolatile).
  _supports_cx8 = true;

+  // Used by C1.
+  _supports_atomic_getset4 = true;
+  _supports_atomic_getadd4 = true;
+  _supports_atomic_getset8 = true;
+  _supports_atomic_getadd8 = true;
+
  UseSSE = 0; // Only on x86 and x64

-  intx cache_line_size = _measured_cache_line_size;
+  intx cache_line_size = L1_data_cache_line_size();

  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) AllocatePrefetchStyle = 1;

@ -261,11 +275,9 @@ void VM_Version::initialize() {
    }
  }

-  // This machine does not allow unaligned memory accesses
-  if (UseUnalignedAccesses) {
-    if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
-      warning("Unaligned memory access is not available on this CPU");
-    FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+  // This machine allows unaligned memory accesses
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
  }
 }

@ -291,7 +303,7 @@ bool VM_Version::use_biased_locking() {
 }

 void VM_Version::print_features() {
-  tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size());
+  tty->print_cr("Version: %s L1_data_cache_line_size=%d", cpu_features(), L1_data_cache_line_size());
 }

 #ifdef COMPILER2
@ -592,7 +604,7 @@ void VM_Version::determine_features() {
  int count = 0; // count zeroed bytes
  for (int i = 0; i < BUFFER_SIZE; i++) if (test_area[i] == 0) count++;
  guarantee(is_power_of_2(count), "cache line size needs to be a power of 2");
-  _measured_cache_line_size = count;
+  _L1_data_cache_line_size = count;

  // Execute code. Illegal instructions will be replaced by 0 in the signal handler.
  VM_Version::_is_determine_features_test_running = true;
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp
@ -65,7 +65,6 @@ protected:
    all_features_m        = -1
  };
  static int  _features;
-  static int  _measured_cache_line_size;
  static const char* _features_str;
  static bool _is_determine_features_test_running;

@ -99,8 +98,6 @@ public:

  static const char* cpu_features() { return _features_str; }

-  static int get_cache_line_size()  { return _measured_cache_line_size; }
-
  // Assembler testing
  static void allow_all();
  static void revert();
--- a/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp
@ -76,7 +76,8 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {

  // We might implicit NULL fault here.
  address npe_addr = __ pc(); // npe = null pointer exception
-  __ load_klass_with_trap_null_check(rcvr_klass, R3);
+  __ null_check(R3, oopDesc::klass_offset_in_bytes(), /*implicit only*/NULL);
+  __ load_klass(rcvr_klass, R3);

 // Set method (in case of interpreted method), and destination address.
  int entry_offset = InstanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
@ -111,8 +112,8 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {

  // If the vtable entry is null, the method is abstract.
  address ame_addr = __ pc(); // ame = abstract method error
-
-  __ load_with_trap_null_check(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
+  __ null_check(R19_method, in_bytes(Method::from_compiled_offset()), /*implicit only*/NULL);
+  __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
  __ mtctr(R12_scratch2);
  __ bctr();
  masm->flush();
@ -158,7 +159,8 @@ VtableStub* VtableStubs::create_itable_stub(int vtable_index) {

  // We might implicit NULL fault here.
  address npe_addr = __ pc(); // npe = null pointer exception
-  __ load_klass_with_trap_null_check(rcvr_klass, R3_ARG1);
+  __ null_check(R3_ARG1, oopDesc::klass_offset_in_bytes(), /*implicit only*/NULL);
+  __ load_klass(rcvr_klass, R3_ARG1);

  BLOCK_COMMENT("Load start of itable entries into itable_entry.");
  __ lwz(vtable_len, InstanceKlass::vtable_length_offset() * wordSize, rcvr_klass);
@ -217,15 +219,7 @@ VtableStub* VtableStubs::create_itable_stub(int vtable_index) {
  address ame_addr = __ pc(); // ame = abstract method error

  // Must do an explicit check if implicit checks are disabled.
-  assert(!MacroAssembler::needs_explicit_null_check(in_bytes(Method::from_compiled_offset())), "sanity");
-  if (!ImplicitNullChecks || !os::zero_page_read_protected()) {
-    if (TrapBasedNullChecks) {
-      __ trap_null_check(R19_method);
-    } else {
-      __ cmpdi(CCR0, R19_method, 0);
-      __ beq(CCR0, throw_icce);
-    }
-  }
+  __ null_check(R19_method, in_bytes(Method::from_compiled_offset()), &throw_icce);
  __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
  __ mtctr(R12_scratch2);
  __ bctr();
--- a/hotspot/src/os/aix/vm/c1_globals_aix.hpp
+++ b/hotspot/src/os/aix/vm/c1_globals_aix.hpp
@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef OS_AIX_VM_C1_GLOBALS_AIX_HPP
+#define OS_AIX_VM_C1_GLOBALS_AIX_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+//
+// Sets the default values for operating system dependent flags used by the
+// client compiler. (see c1_globals.hpp)
+//
+
+#endif // OS_AIX_VM_C1_GLOBALS_AIX_HPP