This commit is contained in:
Vladimir Kozlov 2009-03-19 09:13:24 -07:00
commit 41463d1d3a
81 changed files with 2355 additions and 948 deletions

View file

@ -118,9 +118,9 @@ public interface Debugger extends SymbolLookup, ThreadAccess {
public long getJIntSize(); public long getJIntSize();
public long getJLongSize(); public long getJLongSize();
public long getJShortSize(); public long getJShortSize();
public long getHeapBase();
public long getHeapOopSize(); public long getHeapOopSize();
public long getLogMinObjAlignmentInBytes(); public long getNarrowOopBase();
public int getNarrowOopShift();
public ReadResult readBytesFromProcess(long address, long numBytes) public ReadResult readBytesFromProcess(long address, long numBytes)
throws DebuggerException; throws DebuggerException;

View file

@ -56,8 +56,8 @@ public abstract class DebuggerBase implements Debugger {
// heap data. // heap data.
protected long oopSize; protected long oopSize;
protected long heapOopSize; protected long heapOopSize;
protected long heapBase; // heap base for compressed oops. protected long narrowOopBase; // heap base for compressed oops.
protected long logMinObjAlignmentInBytes; // Used to decode compressed oops. protected int narrowOopShift; // shift to decode compressed oops.
// Should be initialized if desired by calling initCache() // Should be initialized if desired by calling initCache()
private PageCache cache; private PageCache cache;
@ -159,10 +159,10 @@ public abstract class DebuggerBase implements Debugger {
javaPrimitiveTypesConfigured = true; javaPrimitiveTypesConfigured = true;
} }
public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignmentInBytes) { public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift) {
this.heapBase = heapBase;
this.heapOopSize = heapOopSize; this.heapOopSize = heapOopSize;
this.logMinObjAlignmentInBytes = logMinObjAlignmentInBytes; this.narrowOopBase = narrowOopBase;
this.narrowOopShift = narrowOopShift;
} }
/** May be called by subclasses if desired to initialize the page /** May be called by subclasses if desired to initialize the page
@ -459,7 +459,7 @@ public abstract class DebuggerBase implements Debugger {
long value = readCInteger(address, getHeapOopSize(), true); long value = readCInteger(address, getHeapOopSize(), true);
if (value != 0) { if (value != 0) {
// See oop.inline.hpp decode_heap_oop // See oop.inline.hpp decode_heap_oop
value = (long)(heapBase + (long)(value << logMinObjAlignmentInBytes)); value = (long)(narrowOopBase + (long)(value << narrowOopShift));
} }
return value; return value;
} }
@ -545,10 +545,10 @@ public abstract class DebuggerBase implements Debugger {
return heapOopSize; return heapOopSize;
} }
public long getHeapBase() { public long getNarrowOopBase() {
return heapBase; return narrowOopBase;
} }
public long getLogMinObjAlignmentInBytes() { public int getNarrowOopShift() {
return logMinObjAlignmentInBytes; return narrowOopShift;
} }
} }

View file

@ -42,5 +42,5 @@ public interface JVMDebugger extends Debugger {
long jintSize, long jintSize,
long jlongSize, long jlongSize,
long jshortSize); long jshortSize);
public void putHeapConst(long heapBase, long heapOopSize, long logMinObjAlignment); public void putHeapConst(long heapOopSize, long narrowOopBase, int narrowOopShift);
} }

View file

@ -65,9 +65,10 @@ public interface RemoteDebugger extends Remote {
public long getJIntSize() throws RemoteException; public long getJIntSize() throws RemoteException;
public long getJLongSize() throws RemoteException; public long getJLongSize() throws RemoteException;
public long getJShortSize() throws RemoteException; public long getJShortSize() throws RemoteException;
public long getHeapBase() throws RemoteException;
public long getHeapOopSize() throws RemoteException; public long getHeapOopSize() throws RemoteException;
public long getLogMinObjAlignmentInBytes() throws RemoteException; public long getNarrowOopBase() throws RemoteException;
public int getNarrowOopShift() throws RemoteException;
public boolean areThreadsEqual(long addrOrId1, boolean isAddress1, public boolean areThreadsEqual(long addrOrId1, boolean isAddress1,
long addrOrId2, boolean isAddress2) throws RemoteException; long addrOrId2, boolean isAddress2) throws RemoteException;
public int getThreadHashCode(long addrOrId, boolean isAddress) throws RemoteException; public int getThreadHashCode(long addrOrId, boolean isAddress) throws RemoteException;

View file

@ -85,9 +85,9 @@ public class RemoteDebuggerClient extends DebuggerBase implements JVMDebugger {
jlongSize = remoteDebugger.getJLongSize(); jlongSize = remoteDebugger.getJLongSize();
jshortSize = remoteDebugger.getJShortSize(); jshortSize = remoteDebugger.getJShortSize();
javaPrimitiveTypesConfigured = true; javaPrimitiveTypesConfigured = true;
heapBase = remoteDebugger.getHeapBase(); narrowOopBase = remoteDebugger.getNarrowOopBase();
narrowOopShift = remoteDebugger.getNarrowOopShift();
heapOopSize = remoteDebugger.getHeapOopSize(); heapOopSize = remoteDebugger.getHeapOopSize();
logMinObjAlignmentInBytes = remoteDebugger.getLogMinObjAlignmentInBytes();
} }
catch (RemoteException e) { catch (RemoteException e) {
throw new DebuggerException(e); throw new DebuggerException(e);

View file

@ -114,17 +114,18 @@ public class RemoteDebuggerServer extends UnicastRemoteObject
return debugger.getJShortSize(); return debugger.getJShortSize();
} }
public long getHeapBase() throws RemoteException {
return debugger.getHeapBase();
}
public long getHeapOopSize() throws RemoteException { public long getHeapOopSize() throws RemoteException {
return debugger.getHeapOopSize(); return debugger.getHeapOopSize();
} }
public long getLogMinObjAlignmentInBytes() throws RemoteException { public long getNarrowOopBase() throws RemoteException {
return debugger.getLogMinObjAlignmentInBytes(); return debugger.getNarrowOopBase();
} }
public int getNarrowOopShift() throws RemoteException {
return debugger.getNarrowOopShift();
}
public boolean areThreadsEqual(long addrOrId1, boolean isAddress1, public boolean areThreadsEqual(long addrOrId1, boolean isAddress1,
long addrOrId2, boolean isAddress2) throws RemoteException { long addrOrId2, boolean isAddress2) throws RemoteException {
ThreadProxy t1 = getThreadProxy(addrOrId1, isAddress1); ThreadProxy t1 = getThreadProxy(addrOrId1, isAddress1);

View file

@ -53,7 +53,8 @@ public class Universe {
// system obj array klass object // system obj array klass object
private static sun.jvm.hotspot.types.OopField systemObjArrayKlassObjField; private static sun.jvm.hotspot.types.OopField systemObjArrayKlassObjField;
private static AddressField heapBaseField; private static AddressField narrowOopBaseField;
private static CIntegerField narrowOopShiftField;
static { static {
VM.registerVMInitializedObserver(new Observer() { VM.registerVMInitializedObserver(new Observer() {
@ -86,7 +87,8 @@ public class Universe {
systemObjArrayKlassObjField = type.getOopField("_systemObjArrayKlassObj"); systemObjArrayKlassObjField = type.getOopField("_systemObjArrayKlassObj");
heapBaseField = type.getAddressField("_heap_base"); narrowOopBaseField = type.getAddressField("_narrow_oop._base");
narrowOopShiftField = type.getCIntegerField("_narrow_oop._shift");
} }
public Universe() { public Universe() {
@ -100,14 +102,18 @@ public class Universe {
} }
} }
public static long getHeapBase() { public static long getNarrowOopBase() {
if (heapBaseField.getValue() == null) { if (narrowOopBaseField.getValue() == null) {
return 0; return 0;
} else { } else {
return heapBaseField.getValue().minus(null); return narrowOopBaseField.getValue().minus(null);
} }
} }
public static int getNarrowOopShift() {
return (int)narrowOopShiftField.getValue();
}
/** Returns "TRUE" iff "p" points into the allocated area of the heap. */ /** Returns "TRUE" iff "p" points into the allocated area of the heap. */
public boolean isIn(Address p) { public boolean isIn(Address p) {
return heap().isIn(p); return heap().isIn(p);

View file

@ -342,8 +342,8 @@ public class VM {
throw new RuntimeException("Attempt to initialize VM twice"); throw new RuntimeException("Attempt to initialize VM twice");
} }
soleInstance = new VM(db, debugger, debugger.getMachineDescription().isBigEndian()); soleInstance = new VM(db, debugger, debugger.getMachineDescription().isBigEndian());
debugger.putHeapConst(Universe.getHeapBase(), soleInstance.getHeapOopSize(), debugger.putHeapConst(soleInstance.getHeapOopSize(), Universe.getNarrowOopBase(),
soleInstance.logMinObjAlignmentInBytes); Universe.getNarrowOopShift());
for (Iterator iter = vmInitializedObservers.iterator(); iter.hasNext(); ) { for (Iterator iter = vmInitializedObservers.iterator(); iter.hasNext(); ) {
((Observer) iter.next()).update(null, null); ((Observer) iter.next()).update(null, null);
} }

View file

@ -29,6 +29,7 @@
# cl version 13.10.3077 returns "MSC_VER=1310" # cl version 13.10.3077 returns "MSC_VER=1310"
# cl version 14.00.30701 returns "MSC_VER=1399" (OLD_MSSDK version) # cl version 14.00.30701 returns "MSC_VER=1399" (OLD_MSSDK version)
# cl version 14.00.40310.41 returns "MSC_VER=1400" # cl version 14.00.40310.41 returns "MSC_VER=1400"
# cl version 15.00.21022.8 returns "MSC_VER=1500"
# Note that we currently do not have a way to set HotSpotMksHome in # Note that we currently do not have a way to set HotSpotMksHome in
# the batch build, but so far this has not seemed to be a problem. The # the batch build, but so far this has not seemed to be a problem. The

View file

@ -170,11 +170,9 @@ LINK_FLAGS = /manifest $(LINK_FLAGS) $(BUFFEROVERFLOWLIB)
# Manifest Tool - used in VS2005 and later to adjust manifests stored # Manifest Tool - used in VS2005 and later to adjust manifests stored
# as resources inside build artifacts. # as resources inside build artifacts.
MT=mt.exe MT=mt.exe
!if "$(BUILDARCH)" == "i486" # VS2005 and later restricts the use of certain libc functions without this
# VS2005 on x86 restricts the use of certain libc functions without this
CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE
!endif !endif
!endif
!if "$(COMPILER_NAME)" == "VS2008" !if "$(COMPILER_NAME)" == "VS2008"
PRODUCT_OPT_OPTION = /O2 /Oy- PRODUCT_OPT_OPTION = /O2 /Oy-
@ -185,11 +183,9 @@ LINK_FLAGS = /manifest $(LINK_FLAGS)
# Manifest Tool - used in VS2005 and later to adjust manifests stored # Manifest Tool - used in VS2005 and later to adjust manifests stored
# as resources inside build artifacts. # as resources inside build artifacts.
MT=mt.exe MT=mt.exe
!if "$(BUILDARCH)" == "i486" # VS2005 and later restricts the use of certain libc functions without this
# VS2005 on x86 restricts the use of certain libc functions without this
CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE CPP_FLAGS=$(CPP_FLAGS) /D _CRT_SECURE_NO_DEPRECATE
!endif !endif
!endif
# Compile for space above time. # Compile for space above time.
!if "$(Variant)" == "kernel" !if "$(Variant)" == "kernel"

View file

@ -89,9 +89,11 @@ checkAndBuildSA:: $(SAWINDBG)
SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
!elseif "$(BUILDARCH)" == "amd64" !elseif "$(BUILDARCH)" == "amd64"
SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
!if "$(COMPILER_NAME)" == "VS2005"
# On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line,
# otherwise we get missing __security_check_cookie externals at link time. # otherwise we get missing __security_check_cookie externals at link time.
SA_LINK_FLAGS = bufferoverflowU.lib SA_LINK_FLAGS = bufferoverflowU.lib
!endif
!else !else
SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /ZI /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /ZI /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
!endif !endif

View file

@ -27,9 +27,9 @@
all: checkCL checkLink all: checkCL checkLink
checkCL: checkCL:
@ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" \ @ if "$(MSC_VER)" NEQ "1310" if "$(MSC_VER)" NEQ "1399" if "$(MSC_VER)" NEQ "1400" if "$(MSC_VER)" NEQ "1500" \
echo *** WARNING *** unrecognized cl.exe version $(MSC_VER) ($(RAW_MSC_VER)). Use FORCE_MSC_VER to override automatic detection. echo *** WARNING *** unrecognized cl.exe version $(MSC_VER) ($(RAW_MSC_VER)). Use FORCE_MSC_VER to override automatic detection.
checkLink: checkLink:
@ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" \ @ if "$(LINK_VER)" NEQ "710" if "$(LINK_VER)" NEQ "800" if "$(LINK_VER)" NEQ "900" \
echo *** WARNING *** unrecognized link.exe version $(LINK_VER) ($(RAW_LINK_VER)). Use FORCE_LINK_VER to override automatic detection. echo *** WARNING *** unrecognized link.exe version $(LINK_VER) ($(RAW_LINK_VER)). Use FORCE_LINK_VER to override automatic detection.

View file

@ -2767,6 +2767,268 @@ void MacroAssembler::lookup_interface_method(Register recv_klass,
} }
void MacroAssembler::check_klass_subtype(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label& L_success) {
Label L_failure, L_pop_to_failure;
check_klass_subtype_fast_path(sub_klass, super_klass,
temp_reg, temp2_reg,
&L_success, &L_failure, NULL);
Register sub_2 = sub_klass;
Register sup_2 = super_klass;
if (!sub_2->is_global()) sub_2 = L0;
if (!sup_2->is_global()) sup_2 = L1;
save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
check_klass_subtype_slow_path(sub_2, sup_2,
L2, L3, L4, L5,
NULL, &L_pop_to_failure);
// on success:
restore();
ba(false, L_success);
delayed()->nop();
// on failure:
bind(L_pop_to_failure);
restore();
bind(L_failure);
}
void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterConstant super_check_offset,
Register instanceof_hack) {
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::super_check_offset_offset_in_bytes());
bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
bool need_slow_path = (must_load_sco ||
super_check_offset.constant_or_zero() == sco_offset);
assert_different_registers(sub_klass, super_klass, temp_reg);
if (super_check_offset.is_register()) {
assert_different_registers(sub_klass, super_klass,
super_check_offset.as_register());
} else if (must_load_sco) {
assert(temp2_reg != noreg, "supply either a temp or a register offset");
}
Label L_fallthrough;
int label_nulls = 0;
if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1 || instanceof_hack != noreg ||
(L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path),
"at most one NULL in the batch, usually");
// Support for the instanceof hack, which uses delay slots to
// set a destination register to zero or one.
bool do_bool_sets = (instanceof_hack != noreg);
#define BOOL_SET(bool_value) \
if (do_bool_sets && bool_value >= 0) \
set(bool_value, instanceof_hack)
#define DELAYED_BOOL_SET(bool_value) \
if (do_bool_sets && bool_value >= 0) \
delayed()->set(bool_value, instanceof_hack); \
else delayed()->nop()
// Hacked ba(), which may only be used just before L_fallthrough.
#define FINAL_JUMP(label, bool_value) \
if (&(label) == &L_fallthrough) { \
BOOL_SET(bool_value); \
} else { \
ba((do_bool_sets && bool_value >= 0), label); \
DELAYED_BOOL_SET(bool_value); \
}
// If the pointers are equal, we are done (e.g., String[] elements).
// This self-check enables sharing of secondary supertype arrays among
// non-primary types such as array-of-interface. Otherwise, each such
// type would need its own customized SSA.
// We move this check to the front of the fast path because many
// type checks are in fact trivially successful in this manner,
// so we get a nicely predicted branch right at the start of the check.
cmp(super_klass, sub_klass);
brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
DELAYED_BOOL_SET(1);
// Check the supertype display:
if (must_load_sco) {
// The super check offset is always positive...
lduw(super_klass, sco_offset, temp2_reg);
super_check_offset = RegisterConstant(temp2_reg);
}
ld_ptr(sub_klass, super_check_offset, temp_reg);
cmp(super_klass, temp_reg);
// This check has worked decisively for primary supers.
// Secondary supers are sought in the super_cache ('super_cache_addr').
// (Secondary supers are interfaces and very deeply nested subtypes.)
// This works in the same check above because of a tricky aliasing
// between the super_cache and the primary super display elements.
// (The 'super_check_addr' can address either, as the case requires.)
// Note that the cache is updated below if it does not help us find
// what we need immediately.
// So if it was a primary super, we can just fail immediately.
// Otherwise, it's the slow path for us (no success at this point).
if (super_check_offset.is_register()) {
brx(Assembler::equal, do_bool_sets, Assembler::pn, *L_success);
delayed(); if (do_bool_sets) BOOL_SET(1);
// if !do_bool_sets, sneak the next cmp into the delay slot:
cmp(super_check_offset.as_register(), sc_offset);
if (L_failure == &L_fallthrough) {
brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_slow_path);
delayed()->nop();
BOOL_SET(0); // fallthrough on failure
} else {
brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
DELAYED_BOOL_SET(0);
FINAL_JUMP(*L_slow_path, -1); // -1 => vanilla delay slot
}
} else if (super_check_offset.as_constant() == sc_offset) {
// Need a slow path; fast failure is impossible.
if (L_slow_path == &L_fallthrough) {
brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
DELAYED_BOOL_SET(1);
} else {
brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path);
delayed()->nop();
FINAL_JUMP(*L_success, 1);
}
} else {
// No slow path; it's a fast decision.
if (L_failure == &L_fallthrough) {
brx(Assembler::equal, do_bool_sets, Assembler::pt, *L_success);
DELAYED_BOOL_SET(1);
BOOL_SET(0);
} else {
brx(Assembler::notEqual, do_bool_sets, Assembler::pn, *L_failure);
DELAYED_BOOL_SET(0);
FINAL_JUMP(*L_success, 1);
}
}
bind(L_fallthrough);
#undef final_jump
#undef bool_set
#undef DELAYED_BOOL_SET
#undef final_jump
}
void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register count_temp,
Register scan_temp,
Register scratch_reg,
Register coop_reg,
Label* L_success,
Label* L_failure) {
assert_different_registers(sub_klass, super_klass,
count_temp, scan_temp, scratch_reg, coop_reg);
Label L_fallthrough, L_loop;
int label_nulls = 0;
if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
// a couple of useful fields in sub_klass:
int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_supers_offset_in_bytes());
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
// Do a linear scan of the secondary super-klass chain.
// This code is rarely used, so simplicity is a virtue here.
#ifndef PRODUCT
int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
inc_counter((address) pst_counter, count_temp, scan_temp);
#endif
// We will consult the secondary-super array.
ld_ptr(sub_klass, ss_offset, scan_temp);
// Compress superclass if necessary.
Register search_key = super_klass;
bool decode_super_klass = false;
if (UseCompressedOops) {
if (coop_reg != noreg) {
encode_heap_oop_not_null(super_klass, coop_reg);
search_key = coop_reg;
} else {
encode_heap_oop_not_null(super_klass);
decode_super_klass = true; // scarce temps!
}
// The superclass is never null; it would be a basic system error if a null
// pointer were to sneak in here. Note that we have already loaded the
// Klass::super_check_offset from the super_klass in the fast path,
// so if there is a null in that register, we are already in the afterlife.
}
// Load the array length. (Positive movl does right thing on LP64.)
lduw(scan_temp, arrayOopDesc::length_offset_in_bytes(), count_temp);
// Check for empty secondary super list
tst(count_temp);
// Top of search loop
bind(L_loop);
br(Assembler::equal, false, Assembler::pn, *L_failure);
delayed()->add(scan_temp, heapOopSize, scan_temp);
assert(heapOopSize != 0, "heapOopSize should be initialized");
// Skip the array header in all array accesses.
int elem_offset = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
elem_offset -= heapOopSize; // the scan pointer was pre-incremented also
// Load next super to check
if (UseCompressedOops) {
// Don't use load_heap_oop; we don't want to decode the element.
lduw( scan_temp, elem_offset, scratch_reg );
} else {
ld_ptr( scan_temp, elem_offset, scratch_reg );
}
// Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
cmp(scratch_reg, search_key);
// A miss means we are NOT a subtype and need to keep looping
brx(Assembler::notEqual, false, Assembler::pn, L_loop);
delayed()->deccc(count_temp); // decrement trip counter in delay slot
// Falling out the bottom means we found a hit; we ARE a subtype
if (decode_super_klass) decode_heap_oop(super_klass);
// Success. Cache the super we found and proceed in triumph.
st_ptr(super_klass, sub_klass, sc_offset);
if (L_success != &L_fallthrough) {
ba(false, *L_success);
delayed()->nop();
}
bind(L_fallthrough);
}
void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg,
Register temp_reg, Register temp_reg,
Label& done, Label* slow_case, Label& done, Label* slow_case,
@ -4316,7 +4578,13 @@ void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) {
void MacroAssembler::encode_heap_oop(Register src, Register dst) { void MacroAssembler::encode_heap_oop(Register src, Register dst) {
assert (UseCompressedOops, "must be compressed"); assert (UseCompressedOops, "must be compressed");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
verify_oop(src); verify_oop(src);
if (Universe::narrow_oop_base() == NULL) {
srlx(src, LogMinObjAlignmentInBytes, dst);
return;
}
Label done; Label done;
if (src == dst) { if (src == dst) {
// optimize for frequent case src == dst // optimize for frequent case src == dst
@ -4338,26 +4606,39 @@ void MacroAssembler::encode_heap_oop(Register src, Register dst) {
void MacroAssembler::encode_heap_oop_not_null(Register r) { void MacroAssembler::encode_heap_oop_not_null(Register r) {
assert (UseCompressedOops, "must be compressed"); assert (UseCompressedOops, "must be compressed");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
verify_oop(r); verify_oop(r);
if (Universe::narrow_oop_base() != NULL)
sub(r, G6_heapbase, r); sub(r, G6_heapbase, r);
srlx(r, LogMinObjAlignmentInBytes, r); srlx(r, LogMinObjAlignmentInBytes, r);
} }
void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) { void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) {
assert (UseCompressedOops, "must be compressed"); assert (UseCompressedOops, "must be compressed");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
verify_oop(src); verify_oop(src);
if (Universe::narrow_oop_base() == NULL) {
srlx(src, LogMinObjAlignmentInBytes, dst);
} else {
sub(src, G6_heapbase, dst); sub(src, G6_heapbase, dst);
srlx(dst, LogMinObjAlignmentInBytes, dst); srlx(dst, LogMinObjAlignmentInBytes, dst);
} }
}
// Same algorithm as oops.inline.hpp decode_heap_oop. // Same algorithm as oops.inline.hpp decode_heap_oop.
void MacroAssembler::decode_heap_oop(Register src, Register dst) { void MacroAssembler::decode_heap_oop(Register src, Register dst) {
assert (UseCompressedOops, "must be compressed"); assert (UseCompressedOops, "must be compressed");
Label done; assert (Universe::heap() != NULL, "java heap should be initialized");
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
sllx(src, LogMinObjAlignmentInBytes, dst); sllx(src, LogMinObjAlignmentInBytes, dst);
if (Universe::narrow_oop_base() != NULL) {
Label done;
bpr(rc_nz, true, Assembler::pt, dst, done); bpr(rc_nz, true, Assembler::pt, dst, done);
delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken
bind(done); bind(done);
}
verify_oop(dst); verify_oop(dst);
} }
@ -4366,7 +4647,10 @@ void MacroAssembler::decode_heap_oop_not_null(Register r) {
// pd_code_size_limit. // pd_code_size_limit.
// Also do not verify_oop as this is called by verify_oop. // Also do not verify_oop as this is called by verify_oop.
assert (UseCompressedOops, "must be compressed"); assert (UseCompressedOops, "must be compressed");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
sllx(r, LogMinObjAlignmentInBytes, r); sllx(r, LogMinObjAlignmentInBytes, r);
if (Universe::narrow_oop_base() != NULL)
add(r, G6_heapbase, r); add(r, G6_heapbase, r);
} }
@ -4375,14 +4659,17 @@ void MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) {
// pd_code_size_limit. // pd_code_size_limit.
// Also do not verify_oop as this is called by verify_oop. // Also do not verify_oop as this is called by verify_oop.
assert (UseCompressedOops, "must be compressed"); assert (UseCompressedOops, "must be compressed");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
sllx(src, LogMinObjAlignmentInBytes, dst); sllx(src, LogMinObjAlignmentInBytes, dst);
if (Universe::narrow_oop_base() != NULL)
add(dst, G6_heapbase, dst); add(dst, G6_heapbase, dst);
} }
void MacroAssembler::reinit_heapbase() { void MacroAssembler::reinit_heapbase() {
if (UseCompressedOops) { if (UseCompressedOops) {
// call indirectly to solve generation ordering problem // call indirectly to solve generation ordering problem
Address base(G6_heapbase, (address)Universe::heap_base_addr()); Address base(G6_heapbase, (address)Universe::narrow_oop_base_addr());
load_ptr_contents(base, G6_heapbase); load_ptr_contents(base, G6_heapbase);
} }
} }

View file

@ -2327,6 +2327,46 @@ class MacroAssembler: public Assembler {
Register temp_reg, Register temp2_reg, Register temp_reg, Register temp2_reg,
Label& no_such_interface); Label& no_such_interface);
// Test sub_klass against super_klass, with fast and slow paths.
// The fast path produces a tri-state answer: yes / no / maybe-slow.
// One of the three labels can be NULL, meaning take the fall-through.
// If super_check_offset is -1, the value is loaded up from super_klass.
// No registers are killed, except temp_reg and temp2_reg.
// If super_check_offset is not -1, temp2_reg is not used and can be noreg.
void check_klass_subtype_fast_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterConstant super_check_offset = RegisterConstant(-1),
Register instanceof_hack = noreg);
// The rest of the type check; must be wired to a corresponding fast path.
// It does not repeat the fast path logic, so don't use it standalone.
// The temp_reg can be noreg, if no temps are available.
// It can also be sub_klass or super_klass, meaning it's OK to kill that one.
// Updates the sub's secondary super cache as necessary.
void check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Register temp3_reg,
Register temp4_reg,
Label* L_success,
Label* L_failure);
// Simplified, combined version, good for typical uses.
// Falls through on failure.
void check_klass_subtype(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label& L_success);
// Stack overflow checking // Stack overflow checking
// Note: this clobbers G3_scratch // Note: this clobbers G3_scratch

View file

@ -2393,23 +2393,11 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
// get instance klass // get instance klass
load(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc), k_RInfo, T_OBJECT, NULL); load(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc), k_RInfo, T_OBJECT, NULL);
// get super_check_offset // perform the fast part of the checking logic
load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL); __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, &done, stub->entry(), NULL);
// See if we get an immediate positive hit
__ ld_ptr(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr->as_register());
__ cmp(k_RInfo, O7);
__ br(Assembler::equal, false, Assembler::pn, done);
__ delayed()->nop();
// check for immediate negative hit
__ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
__ delayed()->nop();
// check for self
__ cmp(klass_RInfo, k_RInfo);
__ br(Assembler::equal, false, Assembler::pn, done);
__ delayed()->nop();
// assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup"); // call out-of-line instance of __ check_klass_subtype_slow_path(...):
assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
__ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
__ delayed()->nop(); __ delayed()->nop();
__ cmp(G3, 0); __ cmp(G3, 0);
@ -2493,25 +2481,23 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ delayed()->nop(); __ delayed()->nop();
__ bind(done); __ bind(done);
} else { } else {
bool need_slow_path = true;
if (k->is_loaded()) { if (k->is_loaded()) {
load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL); if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
need_slow_path = false;
if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) { // perform the fast part of the checking logic
// See if we get an immediate positive hit __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg,
__ cmp(Rtmp1, k_RInfo ); (need_slow_path ? &done : NULL),
__ br(Assembler::notEqual, false, Assembler::pn, *stub->entry()); stub->entry(), NULL,
__ delayed()->nop(); RegisterConstant(k->super_check_offset()));
} else { } else {
// See if we get an immediate positive hit // perform the fast part of the checking logic
assert_different_registers(Rtmp1, k_RInfo, klass_RInfo); __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7,
__ cmp(Rtmp1, k_RInfo ); &done, stub->entry(), NULL);
__ br(Assembler::equal, false, Assembler::pn, done); }
// check for self if (need_slow_path) {
__ delayed()->cmp(klass_RInfo, k_RInfo); // call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ br(Assembler::equal, false, Assembler::pn, done); assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
__ delayed()->nop();
// assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
__ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
__ delayed()->nop(); __ delayed()->nop();
__ cmp(G3, 0); __ cmp(G3, 0);
@ -2519,32 +2505,6 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ delayed()->nop(); __ delayed()->nop();
} }
__ bind(done); __ bind(done);
} else {
assert_different_registers(Rtmp1, klass_RInfo, k_RInfo);
load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1, T_INT, NULL);
// See if we get an immediate positive hit
load(klass_RInfo, Rtmp1, FrameMap::O7_oop_opr, T_OBJECT);
__ cmp(k_RInfo, O7);
__ br(Assembler::equal, false, Assembler::pn, done);
__ delayed()->nop();
// check for immediate negative hit
__ cmp(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ br(Assembler::notEqual, false, Assembler::pn, *stub->entry());
// check for self
__ delayed()->cmp(klass_RInfo, k_RInfo);
__ br(Assembler::equal, false, Assembler::pn, done);
__ delayed()->nop();
// assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
__ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
__ delayed()->nop();
__ cmp(G3, 0);
__ br(Assembler::equal, false, Assembler::pn, *stub->entry());
__ delayed()->nop();
__ bind(done);
}
} }
__ mov(obj, dst); __ mov(obj, dst);
} else if (code == lir_instanceof) { } else if (code == lir_instanceof) {
@ -2582,58 +2542,32 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ set(0, dst); __ set(0, dst);
__ bind(done); __ bind(done);
} else { } else {
bool need_slow_path = true;
if (k->is_loaded()) { if (k->is_loaded()) {
assert_different_registers(Rtmp1, klass_RInfo, k_RInfo); if (k->super_check_offset() != sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())
load(klass_RInfo, k->super_check_offset(), Rtmp1, T_OBJECT, NULL); need_slow_path = false;
// perform the fast part of the checking logic
if (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() != k->super_check_offset()) { __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, noreg,
// See if we get an immediate positive hit (need_slow_path ? &done : NULL),
__ cmp(Rtmp1, k_RInfo ); (need_slow_path ? &done : NULL), NULL,
__ br(Assembler::equal, true, Assembler::pt, done); RegisterConstant(k->super_check_offset()),
__ delayed()->set(1, dst); dst);
__ set(0, dst);
__ bind(done);
} else {
// See if we get an immediate positive hit
assert_different_registers(Rtmp1, k_RInfo, klass_RInfo);
__ cmp(Rtmp1, k_RInfo );
__ br(Assembler::equal, true, Assembler::pt, done);
__ delayed()->set(1, dst);
// check for self
__ cmp(klass_RInfo, k_RInfo);
__ br(Assembler::equal, true, Assembler::pt, done);
__ delayed()->set(1, dst);
// assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
__ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
__ delayed()->nop();
__ mov(G3, dst);
__ bind(done);
}
} else { } else {
assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers"); assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
// perform the fast part of the checking logic
load(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), dst, T_INT, NULL); __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, dst,
// See if we get an immediate positive hit &done, &done, NULL,
load(klass_RInfo, dst, FrameMap::O7_oop_opr, T_OBJECT); RegisterConstant(-1),
__ cmp(k_RInfo, O7); dst);
__ br(Assembler::equal, true, Assembler::pt, done); }
__ delayed()->set(1, dst); if (need_slow_path) {
// check for immediate negative hit // call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ cmp(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()); assert(klass_RInfo == G3 && k_RInfo == G1, "incorrect call setup");
__ br(Assembler::notEqual, true, Assembler::pt, done);
__ delayed()->set(0, dst);
// check for self
__ cmp(klass_RInfo, k_RInfo);
__ br(Assembler::equal, true, Assembler::pt, done);
__ delayed()->set(1, dst);
// assert(sub.is_same(FrameMap::G3_RInfo) && super.is_same(FrameMap::G1_RInfo), "incorrect call setup");
__ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type); __ call(Runtime1::entry_for(Runtime1::slow_subtype_check_id), relocInfo::runtime_call_type);
__ delayed()->nop(); __ delayed()->nop();
__ mov(G3, dst); __ mov(G3, dst);
__ bind(done);
} }
__ bind(done);
} }
} else { } else {
ShouldNotReachHere(); ShouldNotReachHere();

View file

@ -714,38 +714,19 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
// sub : G3, argument, destroyed // sub : G3, argument, destroyed
// super: G1, argument, not changed // super: G1, argument, not changed
// raddr: O7, blown by call // raddr: O7, blown by call
Label loop, miss; Label miss;
__ save_frame(0); // Blow no registers! __ save_frame(0); // Blow no registers!
__ ld_ptr( G3, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 ); __ check_klass_subtype_slow_path(G3, G1, L0, L1, L2, L4, NULL, &miss);
__ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0); // length in l0
__ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1); // ptr into array
__ clr(L4); // Index
// Load a little early; will load 1 off the end of the array.
// Ok for now; revisit if we have other uses of this routine.
__ ld_ptr(L1,0,L2); // Will load a little early
// The scan loop
__ bind(loop);
__ add(L1,wordSize,L1); // Bump by OOP size
__ cmp(L4,L0);
__ br(Assembler::equal,false,Assembler::pn,miss);
__ delayed()->inc(L4); // Bump index
__ subcc(L2,G1,L3); // Check for match; zero in L3 for a hit
__ brx( Assembler::notEqual, false, Assembler::pt, loop );
__ delayed()->ld_ptr(L1,0,L2); // Will load a little early
// Got a hit; report success; set cache
__ st_ptr( G1, G3, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
__ mov(1, G3); __ mov(1, G3);
__ ret(); // Result in G5 is ok; flags set __ ret(); // Result in G5 is 'true'
__ delayed()->restore(); // free copy or add can go here __ delayed()->restore(); // free copy or add can go here
__ bind(miss); __ bind(miss);
__ mov(0, G3); __ mov(0, G3);
__ ret(); // Result in G5 is ok; flags set __ ret(); // Result in G5 is 'false'
__ delayed()->restore(); // free copy or add can go here __ delayed()->restore(); // free copy or add can go here
} }

View file

@ -866,65 +866,18 @@ void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
Register Rtmp2, Register Rtmp2,
Register Rtmp3, Register Rtmp3,
Label &ok_is_subtype ) { Label &ok_is_subtype ) {
Label not_subtype, loop; Label not_subtype;
// Profile the not-null value's klass. // Profile the not-null value's klass.
profile_typecheck(Rsub_klass, Rtmp1); profile_typecheck(Rsub_klass, Rtmp1);
// Load the super-klass's check offset into Rtmp1 check_klass_subtype_fast_path(Rsub_klass, Rsuper_klass,
ld( Rsuper_klass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes(), Rtmp1 ); Rtmp1, Rtmp2,
// Load from the sub-klass's super-class display list, or a 1-word cache of &ok_is_subtype, &not_subtype, NULL);
// the secondary superclass list, or a failing value with a sentinel offset
// if the super-klass is an interface or exceptionally deep in the Java
// hierarchy and we have to scan the secondary superclass list the hard way.
ld_ptr( Rsub_klass, Rtmp1, Rtmp2 );
// See if we get an immediate positive hit
cmp( Rtmp2, Rsuper_klass );
brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
// In the delay slot, check for immediate negative hit
delayed()->cmp( Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
br( Assembler::notEqual, false, Assembler::pt, not_subtype );
// In the delay slot, check for self
delayed()->cmp( Rsub_klass, Rsuper_klass );
brx( Assembler::equal, false, Assembler::pt, ok_is_subtype );
// Now do a linear scan of the secondary super-klass chain. check_klass_subtype_slow_path(Rsub_klass, Rsuper_klass,
delayed()->ld_ptr( Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), Rtmp2 ); Rtmp1, Rtmp2, Rtmp3, /*hack:*/ noreg,
&ok_is_subtype, NULL);
// compress superclass
if (UseCompressedOops) encode_heap_oop(Rsuper_klass);
// Rtmp2 holds the objArrayOop of secondary supers.
ld( Rtmp2, arrayOopDesc::length_offset_in_bytes(), Rtmp1 );// Load the array length
// Check for empty secondary super list
tst(Rtmp1);
// Top of search loop
bind( loop );
br( Assembler::equal, false, Assembler::pn, not_subtype );
delayed()->nop();
// load next super to check
if (UseCompressedOops) {
lduw( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
// Bump array pointer forward one oop
add( Rtmp2, 4, Rtmp2 );
} else {
ld_ptr( Rtmp2, arrayOopDesc::base_offset_in_bytes(T_OBJECT), Rtmp3);
// Bump array pointer forward one oop
add( Rtmp2, wordSize, Rtmp2);
}
// Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
cmp( Rtmp3, Rsuper_klass );
// A miss means we are NOT a subtype and need to keep looping
brx( Assembler::notEqual, false, Assembler::pt, loop );
delayed()->deccc( Rtmp1 ); // dec trip counter in delay slot
// Falling out the bottom means we found a hit; we ARE a subtype
if (UseCompressedOops) decode_heap_oop(Rsuper_klass);
br( Assembler::always, false, Assembler::pt, ok_is_subtype );
// Update the cache
delayed()->st_ptr( Rsuper_klass, Rsub_klass,
sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
bind(not_subtype); bind(not_subtype);
profile_typecheck_failed(Rtmp1); profile_typecheck_failed(Rtmp1);

View file

@ -547,7 +547,11 @@ int MachCallDynamicJavaNode::ret_addr_offset() {
int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes(); int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
int klass_load_size; int klass_load_size;
if (UseCompressedOops) { if (UseCompressedOops) {
klass_load_size = 3*BytesPerInstWord; // see MacroAssembler::load_klass() assert(Universe::heap() != NULL, "java heap should be initialized");
if (Universe::narrow_oop_base() == NULL)
klass_load_size = 2*BytesPerInstWord; // see MacroAssembler::load_klass()
else
klass_load_size = 3*BytesPerInstWord;
} else { } else {
klass_load_size = 1*BytesPerInstWord; klass_load_size = 1*BytesPerInstWord;
} }
@ -1601,8 +1605,10 @@ void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
st->print_cr("\nUEP:"); st->print_cr("\nUEP:");
#ifdef _LP64 #ifdef _LP64
if (UseCompressedOops) { if (UseCompressedOops) {
assert(Universe::heap() != NULL, "java heap should be initialized");
st->print_cr("\tLDUW [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass"); st->print_cr("\tLDUW [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass");
st->print_cr("\tSLL R_G5,3,R_G5"); st->print_cr("\tSLL R_G5,3,R_G5");
if (Universe::narrow_oop_base() != NULL)
st->print_cr("\tADD R_G5,R_G6_heap_base,R_G5"); st->print_cr("\tADD R_G5,R_G6_heap_base,R_G5");
} else { } else {
st->print_cr("\tLDX [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check"); st->print_cr("\tLDX [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
@ -2502,6 +2508,10 @@ encode %{
__ load_klass(O0, G3_scratch); __ load_klass(O0, G3_scratch);
int klass_load_size; int klass_load_size;
if (UseCompressedOops) { if (UseCompressedOops) {
assert(Universe::heap() != NULL, "java heap should be initialized");
if (Universe::narrow_oop_base() == NULL)
klass_load_size = 2*BytesPerInstWord;
else
klass_load_size = 3*BytesPerInstWord; klass_load_size = 3*BytesPerInstWord;
} else { } else {
klass_load_size = 1*BytesPerInstWord; klass_load_size = 1*BytesPerInstWord;
@ -9005,6 +9015,33 @@ instruct string_compare(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, note
ins_pipe(long_memory_op); ins_pipe(long_memory_op);
%} %}
//---------- Population Count Instructions -------------------------------------
instruct popCountI(iRegI dst, iRegI src) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI src));
format %{ "POPC $src, $dst" %}
ins_encode %{
__ popc($src$$Register, $dst$$Register);
%}
ins_pipe(ialu_reg);
%}
// Note: Long.bitCount(long) returns an int.
instruct popCountL(iRegI dst, iRegL src) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL src));
format %{ "POPC $src, $dst" %}
ins_encode %{
__ popc($src$$Register, $dst$$Register);
%}
ins_pipe(ialu_reg);
%}
// ============================================================================ // ============================================================================
//------------Bytes reverse-------------------------------------------------- //------------Bytes reverse--------------------------------------------------

View file

@ -900,19 +900,7 @@ class StubGenerator: public StubCodeGenerator {
__ align(CodeEntryAlignment); __ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "partial_subtype_check"); StubCodeMark mark(this, "StubRoutines", "partial_subtype_check");
address start = __ pc(); address start = __ pc();
Label loop, miss; Label miss;
// Compare super with sub directly, since super is not in its own SSA.
// The compiler used to emit this test, but we fold it in here,
// to increase overall code density, with no real loss of speed.
{ Label L;
__ cmp(O1, O2);
__ brx(Assembler::notEqual, false, Assembler::pt, L);
__ delayed()->nop();
__ retl();
__ delayed()->addcc(G0,0,O0); // set Z flags, zero result
__ bind(L);
}
#if defined(COMPILER2) && !defined(_LP64) #if defined(COMPILER2) && !defined(_LP64)
// Do not use a 'save' because it blows the 64-bit O registers. // Do not use a 'save' because it blows the 64-bit O registers.
@ -936,56 +924,12 @@ class StubGenerator: public StubCodeGenerator {
Register L2_super = L2; Register L2_super = L2;
Register L3_index = L3; Register L3_index = L3;
#ifdef _LP64 __ check_klass_subtype_slow_path(Rsub, Rsuper,
Register L4_ooptmp = L4; L0, L1, L2, L3,
NULL, &miss);
if (UseCompressedOops) { // Match falls through here.
// this must be under UseCompressedOops check, as we rely upon fact __ addcc(G0,0,Rret); // set Z flags, Z result
// that L4 not clobbered in C2 on 32-bit platforms, where we do explicit save
// on stack, see several lines above
__ encode_heap_oop(Rsuper, L4_ooptmp);
}
#endif
inc_counter_np(SharedRuntime::_partial_subtype_ctr, L0, L1);
__ ld_ptr( Rsub, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes(), L3 );
__ lduw(L3,arrayOopDesc::length_offset_in_bytes(),L0_ary_len);
__ add(L3,arrayOopDesc::base_offset_in_bytes(T_OBJECT),L1_ary_ptr);
__ clr(L3_index); // zero index
// Load a little early; will load 1 off the end of the array.
// Ok for now; revisit if we have other uses of this routine.
if (UseCompressedOops) {
__ lduw(L1_ary_ptr,0,L2_super);// Will load a little early
} else {
__ ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
}
assert(heapOopSize != 0, "heapOopSize should be initialized");
// The scan loop
__ BIND(loop);
__ add(L1_ary_ptr, heapOopSize, L1_ary_ptr); // Bump by OOP size
__ cmp(L3_index,L0_ary_len);
__ br(Assembler::equal,false,Assembler::pn,miss);
__ delayed()->inc(L3_index); // Bump index
if (UseCompressedOops) {
#ifdef _LP64
__ subcc(L2_super,L4_ooptmp,Rret); // Check for match; zero in Rret for a hit
__ br( Assembler::notEqual, false, Assembler::pt, loop );
__ delayed()->lduw(L1_ary_ptr,0,L2_super);// Will load a little early
#else
ShouldNotReachHere();
#endif
} else {
__ subcc(L2_super,Rsuper,Rret); // Check for match; zero in Rret for a hit
__ brx( Assembler::notEqual, false, Assembler::pt, loop );
__ delayed()->ld_ptr(L1_ary_ptr,0,L2_super);// Will load a little early
}
// Got a hit; report success; set cache. Cache load doesn't
// happen here; for speed it is directly emitted by the compiler.
__ st_ptr( Rsuper, Rsub, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
#if defined(COMPILER2) && !defined(_LP64) #if defined(COMPILER2) && !defined(_LP64)
__ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0); __ ld_ptr(SP,(frame::register_save_words+0)*wordSize,L0);
@ -999,7 +943,6 @@ class StubGenerator: public StubCodeGenerator {
__ delayed()->restore(); __ delayed()->restore();
#endif #endif
// Hit or miss falls through here
__ BIND(miss); __ BIND(miss);
__ addcc(G0,1,Rret); // set NZ flags, NZ result __ addcc(G0,1,Rret); // set NZ flags, NZ result
@ -2330,51 +2273,31 @@ class StubGenerator: public StubCodeGenerator {
Register super_check_offset, Register super_check_offset,
Register super_klass, Register super_klass,
Register temp, Register temp,
Label& L_success, Label& L_success) {
Register deccc_hack = noreg) {
assert_different_registers(sub_klass, super_check_offset, super_klass, temp); assert_different_registers(sub_klass, super_check_offset, super_klass, temp);
BLOCK_COMMENT("type_check:"); BLOCK_COMMENT("type_check:");
Label L_miss; Label L_miss, L_pop_to_miss;
assert_clean_int(super_check_offset, temp); assert_clean_int(super_check_offset, temp);
// maybe decrement caller's trip count: __ check_klass_subtype_fast_path(sub_klass, super_klass, temp, noreg,
#define DELAY_SLOT delayed(); \ &L_success, &L_miss, NULL,
{ if (deccc_hack == noreg) __ nop(); else __ deccc(deccc_hack); } super_check_offset);
// if the pointers are equal, we are done (e.g., String[] elements)
__ cmp(sub_klass, super_klass);
__ brx(Assembler::equal, true, Assembler::pt, L_success);
__ DELAY_SLOT;
// check the supertype display:
__ ld_ptr(sub_klass, super_check_offset, temp); // query the super type
__ cmp(super_klass, temp); // test the super type
__ brx(Assembler::equal, true, Assembler::pt, L_success);
__ DELAY_SLOT;
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
__ cmp(super_klass, sc_offset);
__ brx(Assembler::notEqual, true, Assembler::pt, L_miss);
__ delayed()->nop();
BLOCK_COMMENT("type_check_slow_path:");
__ save_frame(0); __ save_frame(0);
__ mov(sub_klass->after_save(), O1); __ check_klass_subtype_slow_path(sub_klass->after_save(),
// mov(super_klass->after_save(), O2); //fill delay slot super_klass->after_save(),
assert(StubRoutines::Sparc::_partial_subtype_check != NULL, "order of generation"); L0, L1, L2, L4,
__ call(StubRoutines::Sparc::_partial_subtype_check); NULL, &L_pop_to_miss);
__ delayed()->mov(super_klass->after_save(), O2); __ ba(false, L_success);
__ delayed()->restore();
__ bind(L_pop_to_miss);
__ restore(); __ restore();
// Upon return, the condition codes are already set.
__ brx(Assembler::equal, true, Assembler::pt, L_success);
__ DELAY_SLOT;
#undef DELAY_SLOT
// Fall through on failure! // Fall through on failure!
__ BIND(L_miss); __ BIND(L_miss);
} }
@ -2411,7 +2334,7 @@ class StubGenerator: public StubCodeGenerator {
gen_write_ref_array_pre_barrier(O1, O2); gen_write_ref_array_pre_barrier(O1, O2);
#ifdef ASSERT #ifdef ASSERT
// We sometimes save a frame (see partial_subtype_check below). // We sometimes save a frame (see generate_type_check below).
// If this will cause trouble, let's fail now instead of later. // If this will cause trouble, let's fail now instead of later.
__ save_frame(0); __ save_frame(0);
__ restore(); __ restore();
@ -2455,41 +2378,39 @@ class StubGenerator: public StubCodeGenerator {
// G3, G4, G5 --- current oop, oop.klass, oop.klass.super // G3, G4, G5 --- current oop, oop.klass, oop.klass.super
__ align(16); __ align(16);
__ bind(store_element); __ BIND(store_element);
// deccc(G1_remain); // decrement the count (hoisted) __ deccc(G1_remain); // decrement the count
__ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop __ store_heap_oop(G3_oop, O1_to, O5_offset); // store the oop
__ inc(O5_offset, heapOopSize); // step to next offset __ inc(O5_offset, heapOopSize); // step to next offset
__ brx(Assembler::zero, true, Assembler::pt, do_card_marks); __ brx(Assembler::zero, true, Assembler::pt, do_card_marks);
__ delayed()->set(0, O0); // return -1 on success __ delayed()->set(0, O0); // return -1 on success
// ======== loop entry is here ======== // ======== loop entry is here ========
__ bind(load_element); __ BIND(load_element);
__ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop __ load_heap_oop(O0_from, O5_offset, G3_oop); // load the oop
__ br_null(G3_oop, true, Assembler::pt, store_element); __ br_null(G3_oop, true, Assembler::pt, store_element);
__ delayed()->deccc(G1_remain); // decrement the count __ delayed()->nop();
__ load_klass(G3_oop, G4_klass); // query the object klass __ load_klass(G3_oop, G4_klass); // query the object klass
generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super, generate_type_check(G4_klass, O3_ckoff, O4_ckval, G5_super,
// branch to this on success: // branch to this on success:
store_element, store_element);
// decrement this on success:
G1_remain);
// ======== end loop ======== // ======== end loop ========
// It was a real error; we must depend on the caller to finish the job. // It was a real error; we must depend on the caller to finish the job.
// Register G1 has number of *remaining* oops, O2 number of *total* oops. // Register G1 has number of *remaining* oops, O2 number of *total* oops.
// Emit GC store barriers for the oops we have copied (O2 minus G1), // Emit GC store barriers for the oops we have copied (O2 minus G1),
// and report their number to the caller. // and report their number to the caller.
__ bind(fail); __ BIND(fail);
__ subcc(O2_count, G1_remain, O2_count); __ subcc(O2_count, G1_remain, O2_count);
__ brx(Assembler::zero, false, Assembler::pt, done); __ brx(Assembler::zero, false, Assembler::pt, done);
__ delayed()->not1(O2_count, O0); // report (-1^K) to caller __ delayed()->not1(O2_count, O0); // report (-1^K) to caller
__ bind(do_card_marks); __ BIND(do_card_marks);
gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2] gen_write_ref_array_post_barrier(O1_to, O2_count, O3); // store check on O1[0..O2]
__ bind(done); __ BIND(done);
inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4); inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr, O3, O4);
__ retl(); __ retl();
__ delayed()->nop(); // return value in 00 __ delayed()->nop(); // return value in 00
@ -2942,14 +2863,15 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry; StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry;
StubRoutines::_fence_entry = generate_fence(); StubRoutines::_fence_entry = generate_fence();
#endif // COMPILER2 !=> _LP64 #endif // COMPILER2 !=> _LP64
StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check();
} }
void generate_all() { void generate_all() {
// Generates all stubs and initializes the entry points // Generates all stubs and initializes the entry points
// Generate partial_subtype_check first here since its code depends on
// UseZeroBaseCompressedOops which is defined after heap initialization.
StubRoutines::Sparc::_partial_subtype_check = generate_partial_subtype_check();
// These entry points require SharedInfo::stack0 to be set up in non-core builds // These entry points require SharedInfo::stack0 to be set up in non-core builds
StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false);
StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false); StubRoutines::_throw_IncompatibleClassChangeError_entry= generate_throw_exception("IncompatibleClassChangeError throw_exception", CAST_FROM_FN_PTR(address, SharedRuntime::throw_IncompatibleClassChangeError), false);

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -72,6 +72,9 @@ void VM_Version::initialize() {
FLAG_SET_ERGO(bool, UseCompressedOops, false); FLAG_SET_ERGO(bool, UseCompressedOops, false);
} }
} }
// 32-bit oops don't make sense for the 64-bit VM on sparc
// since the 32-bit VM has the same registers and smaller objects.
Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
#endif // _LP64 #endif // _LP64
#ifdef COMPILER2 #ifdef COMPILER2
// Indirect branch is the same cost as direct // Indirect branch is the same cost as direct
@ -89,16 +92,26 @@ void VM_Version::initialize() {
#endif #endif
} }
// Use hardware population count instruction if available.
if (has_hardware_popc()) {
if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
UsePopCountInstruction = true;
}
}
char buf[512]; char buf[512];
jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s", jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s",
(has_v8() ? ", has_v8" : ""), (has_v8() ? ", has_v8" : ""),
(has_v9() ? ", has_v9" : ""), (has_v9() ? ", has_v9" : ""),
(has_hardware_popc() ? ", popc" : ""),
(has_vis1() ? ", has_vis1" : ""), (has_vis1() ? ", has_vis1" : ""),
(has_vis2() ? ", has_vis2" : ""), (has_vis2() ? ", has_vis2" : ""),
(is_ultra3() ? ", is_ultra3" : ""), (is_ultra3() ? ", is_ultra3" : ""),
(is_sun4v() ? ", is_sun4v" : ""), (is_sun4v() ? ", is_sun4v" : ""),
(is_niagara1() ? ", is_niagara1" : ""), (is_niagara1() ? ", is_niagara1" : ""),
(!has_hardware_int_muldiv() ? ", no-muldiv" : ""), (is_niagara1_plus() ? ", is_niagara1_plus" : ""),
(!has_hardware_mul32() ? ", no-mul32" : ""),
(!has_hardware_div32() ? ", no-div32" : ""),
(!has_hardware_fsmuld() ? ", no-fsmuld" : "")); (!has_hardware_fsmuld() ? ", no-fsmuld" : ""));
// buf is started with ", " or is empty // buf is started with ", " or is empty

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -26,12 +26,14 @@ class VM_Version: public Abstract_VM_Version {
protected: protected:
enum Feature_Flag { enum Feature_Flag {
v8_instructions = 0, v8_instructions = 0,
hardware_int_muldiv = 1, hardware_mul32 = 1,
hardware_fsmuld = 2, hardware_div32 = 2,
v9_instructions = 3, hardware_fsmuld = 3,
vis1_instructions = 4, hardware_popc = 4,
vis2_instructions = 5, v9_instructions = 5,
sun4v_instructions = 6 vis1_instructions = 6,
vis2_instructions = 7,
sun4v_instructions = 8
}; };
enum Feature_Flag_Set { enum Feature_Flag_Set {
@ -39,16 +41,18 @@ protected:
all_features_m = -1, all_features_m = -1,
v8_instructions_m = 1 << v8_instructions, v8_instructions_m = 1 << v8_instructions,
hardware_int_muldiv_m = 1 << hardware_int_muldiv, hardware_mul32_m = 1 << hardware_mul32,
hardware_div32_m = 1 << hardware_div32,
hardware_fsmuld_m = 1 << hardware_fsmuld, hardware_fsmuld_m = 1 << hardware_fsmuld,
hardware_popc_m = 1 << hardware_popc,
v9_instructions_m = 1 << v9_instructions, v9_instructions_m = 1 << v9_instructions,
vis1_instructions_m = 1 << vis1_instructions, vis1_instructions_m = 1 << vis1_instructions,
vis2_instructions_m = 1 << vis2_instructions, vis2_instructions_m = 1 << vis2_instructions,
sun4v_m = 1 << sun4v_instructions, sun4v_m = 1 << sun4v_instructions,
generic_v8_m = v8_instructions_m | hardware_int_muldiv_m | hardware_fsmuld_m, generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
generic_v9_m = generic_v8_m | v9_instructions_m | vis1_instructions_m, generic_v9_m = generic_v8_m | v9_instructions_m,
ultra3_m = generic_v9_m | vis2_instructions_m, ultra3_m = generic_v9_m | vis1_instructions_m | vis2_instructions_m,
// Temporary until we have something more accurate // Temporary until we have something more accurate
niagara1_unique_m = sun4v_m, niagara1_unique_m = sun4v_m,
@ -62,7 +66,7 @@ protected:
static int determine_features(); static int determine_features();
static int platform_features(int features); static int platform_features(int features);
static bool is_niagara1(int features) { return (features & niagara1_m) == niagara1_m; } static bool is_niagara1(int features) { return (features & sun4v_m) != 0; }
static int maximum_niagara1_processor_count() { return 32; } static int maximum_niagara1_processor_count() { return 32; }
// Returns true if the platform is in the niagara line and // Returns true if the platform is in the niagara line and
@ -76,8 +80,10 @@ public:
// Instruction support // Instruction support
static bool has_v8() { return (_features & v8_instructions_m) != 0; } static bool has_v8() { return (_features & v8_instructions_m) != 0; }
static bool has_v9() { return (_features & v9_instructions_m) != 0; } static bool has_v9() { return (_features & v9_instructions_m) != 0; }
static bool has_hardware_int_muldiv() { return (_features & hardware_int_muldiv_m) != 0; } static bool has_hardware_mul32() { return (_features & hardware_mul32_m) != 0; }
static bool has_hardware_div32() { return (_features & hardware_div32_m) != 0; }
static bool has_hardware_fsmuld() { return (_features & hardware_fsmuld_m) != 0; } static bool has_hardware_fsmuld() { return (_features & hardware_fsmuld_m) != 0; }
static bool has_hardware_popc() { return (_features & hardware_popc_m) != 0; }
static bool has_vis1() { return (_features & vis1_instructions_m) != 0; } static bool has_vis1() { return (_features & vis1_instructions_m) != 0; }
static bool has_vis2() { return (_features & vis2_instructions_m) != 0; } static bool has_vis2() { return (_features & vis2_instructions_m) != 0; }

View file

@ -221,13 +221,15 @@ int VtableStub::pd_code_size_limit(bool is_vtable_stub) {
if (is_vtable_stub) { if (is_vtable_stub) {
// ld;ld;ld,jmp,nop // ld;ld;ld,jmp,nop
const int basic = 5*BytesPerInstWord + const int basic = 5*BytesPerInstWord +
// shift;add for load_klass // shift;add for load_klass (only shift with zero heap based)
(UseCompressedOops ? 2*BytesPerInstWord : 0); (UseCompressedOops ?
((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
return basic + slop; return basic + slop;
} else { } else {
const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord + const int basic = (28 LP64_ONLY(+ 6)) * BytesPerInstWord +
// shift;add for load_klass // shift;add for load_klass (only shift with zero heap based)
(UseCompressedOops ? 2*BytesPerInstWord : 0); (UseCompressedOops ?
((Universe::narrow_oop_base() == NULL) ? BytesPerInstWord : 2*BytesPerInstWord) : 0);
return (basic + slop); return (basic + slop);
} }
} }

View file

@ -727,7 +727,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
} }
#ifdef _LP64 #ifdef _LP64
assert(false, "fix locate_operand"); assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32");
#else #else
assert(which == imm_operand, "instruction has only an imm field"); assert(which == imm_operand, "instruction has only an imm field");
#endif // LP64 #endif // LP64
@ -2193,6 +2193,25 @@ void Assembler::pop(Register dst) {
emit_byte(0x58 | encode); emit_byte(0x58 | encode);
} }
void Assembler::popcntl(Register dst, Address src) {
assert(VM_Version::supports_popcnt(), "must support");
InstructionMark im(this);
emit_byte(0xF3);
prefix(src, dst);
emit_byte(0x0F);
emit_byte(0xB8);
emit_operand(dst, src);
}
void Assembler::popcntl(Register dst, Register src) {
assert(VM_Version::supports_popcnt(), "must support");
emit_byte(0xF3);
int encode = prefix_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xB8);
emit_byte(0xC0 | encode);
}
void Assembler::popf() { void Assembler::popf() {
emit_byte(0x9D); emit_byte(0x9D);
} }
@ -3224,12 +3243,6 @@ void Assembler::fyl2x() {
emit_byte(0xF1); emit_byte(0xF1);
} }
void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format) {
InstructionMark im(this);
int encode = prefix_and_encode(dst->encoding());
emit_byte(0xB8 | encode);
emit_data((int)imm32, rspec, format);
}
#ifndef _LP64 #ifndef _LP64
@ -3249,6 +3262,12 @@ void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder cons
emit_data((int)imm32, rspec, 0); emit_data((int)imm32, rspec, 0);
} }
void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) {
InstructionMark im(this);
int encode = prefix_and_encode(dst->encoding());
emit_byte(0xB8 | encode);
emit_data((int)imm32, rspec, 0);
}
void Assembler::popa() { // 32bit void Assembler::popa() { // 32bit
emit_byte(0x61); emit_byte(0x61);
@ -3857,6 +3876,37 @@ void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder con
emit_data64(imm64, rspec); emit_data64(imm64, rspec);
} }
void Assembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec) {
InstructionMark im(this);
int encode = prefix_and_encode(dst->encoding());
emit_byte(0xB8 | encode);
emit_data((int)imm32, rspec, narrow_oop_operand);
}
void Assembler::mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec) {
InstructionMark im(this);
prefix(dst);
emit_byte(0xC7);
emit_operand(rax, dst, 4);
emit_data((int)imm32, rspec, narrow_oop_operand);
}
void Assembler::cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec) {
InstructionMark im(this);
int encode = prefix_and_encode(src1->encoding());
emit_byte(0x81);
emit_byte(0xF8 | encode);
emit_data((int)imm32, rspec, narrow_oop_operand);
}
void Assembler::cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec) {
InstructionMark im(this);
prefix(src1);
emit_byte(0x81);
emit_operand(rax, src1, 4);
emit_data((int)imm32, rspec, narrow_oop_operand);
}
void Assembler::movdq(XMMRegister dst, Register src) { void Assembler::movdq(XMMRegister dst, Register src) {
// table D-1 says MMX/SSE2 // table D-1 says MMX/SSE2
NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), "")); NOT_LP64(assert(VM_Version::supports_sse2() || VM_Version::supports_mmx(), ""));
@ -4049,6 +4099,25 @@ void Assembler::popa() { // 64bit
addq(rsp, 16 * wordSize); addq(rsp, 16 * wordSize);
} }
void Assembler::popcntq(Register dst, Address src) {
assert(VM_Version::supports_popcnt(), "must support");
InstructionMark im(this);
emit_byte(0xF3);
prefixq(src, dst);
emit_byte(0x0F);
emit_byte(0xB8);
emit_operand(dst, src);
}
void Assembler::popcntq(Register dst, Register src) {
assert(VM_Version::supports_popcnt(), "must support");
emit_byte(0xF3);
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_byte(0x0F);
emit_byte(0xB8);
emit_byte(0xC0 | encode);
}
void Assembler::popq(Address dst) { void Assembler::popq(Address dst) {
InstructionMark im(this); InstructionMark im(this);
prefixq(dst); prefixq(dst);
@ -7217,6 +7286,225 @@ void MacroAssembler::lookup_interface_method(Register recv_klass,
} }
void MacroAssembler::check_klass_subtype(Register sub_klass,
Register super_klass,
Register temp_reg,
Label& L_success) {
Label L_failure;
check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
bind(L_failure);
}
void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterConstant super_check_offset) {
assert_different_registers(sub_klass, super_klass, temp_reg);
bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
if (super_check_offset.is_register()) {
assert_different_registers(sub_klass, super_klass,
super_check_offset.as_register());
} else if (must_load_sco) {
assert(temp_reg != noreg, "supply either a temp or a register offset");
}
Label L_fallthrough;
int label_nulls = 0;
if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
int sco_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::super_check_offset_offset_in_bytes());
Address super_check_offset_addr(super_klass, sco_offset);
// Hacked jcc, which "knows" that L_fallthrough, at least, is in
// range of a jccb. If this routine grows larger, reconsider at
// least some of these.
#define local_jcc(assembler_cond, label) \
if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \
else jcc( assembler_cond, label) /*omit semi*/
// Hacked jmp, which may only be used just before L_fallthrough.
#define final_jmp(label) \
if (&(label) == &L_fallthrough) { /*do nothing*/ } \
else jmp(label) /*omit semi*/
// If the pointers are equal, we are done (e.g., String[] elements).
// This self-check enables sharing of secondary supertype arrays among
// non-primary types such as array-of-interface. Otherwise, each such
// type would need its own customized SSA.
// We move this check to the front of the fast path because many
// type checks are in fact trivially successful in this manner,
// so we get a nicely predicted branch right at the start of the check.
cmpptr(sub_klass, super_klass);
local_jcc(Assembler::equal, *L_success);
// Check the supertype display:
if (must_load_sco) {
// Positive movl does right thing on LP64.
movl(temp_reg, super_check_offset_addr);
super_check_offset = RegisterConstant(temp_reg);
}
Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
cmpptr(super_klass, super_check_addr); // load displayed supertype
// This check has worked decisively for primary supers.
// Secondary supers are sought in the super_cache ('super_cache_addr').
// (Secondary supers are interfaces and very deeply nested subtypes.)
// This works in the same check above because of a tricky aliasing
// between the super_cache and the primary super display elements.
// (The 'super_check_addr' can address either, as the case requires.)
// Note that the cache is updated below if it does not help us find
// what we need immediately.
// So if it was a primary super, we can just fail immediately.
// Otherwise, it's the slow path for us (no success at this point).
if (super_check_offset.is_register()) {
local_jcc(Assembler::equal, *L_success);
cmpl(super_check_offset.as_register(), sc_offset);
if (L_failure == &L_fallthrough) {
local_jcc(Assembler::equal, *L_slow_path);
} else {
local_jcc(Assembler::notEqual, *L_failure);
final_jmp(*L_slow_path);
}
} else if (super_check_offset.as_constant() == sc_offset) {
// Need a slow path; fast failure is impossible.
if (L_slow_path == &L_fallthrough) {
local_jcc(Assembler::equal, *L_success);
} else {
local_jcc(Assembler::notEqual, *L_slow_path);
final_jmp(*L_success);
}
} else {
// No slow path; it's a fast decision.
if (L_failure == &L_fallthrough) {
local_jcc(Assembler::equal, *L_success);
} else {
local_jcc(Assembler::notEqual, *L_failure);
final_jmp(*L_success);
}
}
bind(L_fallthrough);
#undef local_jcc
#undef final_jmp
}
void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label* L_success,
Label* L_failure,
bool set_cond_codes) {
assert_different_registers(sub_klass, super_klass, temp_reg);
if (temp2_reg != noreg)
assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
Label L_fallthrough;
int label_nulls = 0;
if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; }
if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; }
assert(label_nulls <= 1, "at most one NULL in the batch");
// a couple of useful fields in sub_klass:
int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_supers_offset_in_bytes());
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
// Do a linear scan of the secondary super-klass chain.
// This code is rarely used, so simplicity is a virtue here.
// The repne_scan instruction uses fixed registers, which we must spill.
// Don't worry too much about pre-existing connections with the input regs.
assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super)
assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter)
// Get super_klass value into rax (even if it was in rdi or rcx).
bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false;
if (super_klass != rax || UseCompressedOops) {
if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; }
mov(rax, super_klass);
}
if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; }
if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; }
#ifndef PRODUCT
int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
ExternalAddress pst_counter_addr((address) pst_counter);
NOT_LP64( incrementl(pst_counter_addr) );
LP64_ONLY( lea(rcx, pst_counter_addr) );
LP64_ONLY( incrementl(Address(rcx, 0)) );
#endif //PRODUCT
// We will consult the secondary-super array.
movptr(rdi, secondary_supers_addr);
// Load the array length. (Positive movl does right thing on LP64.)
movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
// Skip to start of data.
addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Scan RCX words at [RDI] for an occurrence of RAX.
// Set NZ/Z based on last compare.
#ifdef _LP64
// This part is tricky, as values in supers array could be 32 or 64 bit wide
// and we store values in objArrays always encoded, thus we need to encode
// the value of rax before repne. Note that rax is dead after the repne.
if (UseCompressedOops) {
encode_heap_oop_not_null(rax);
// The superclass is never null; it would be a basic system error if a null
// pointer were to sneak in here. Note that we have already loaded the
// Klass::super_check_offset from the super_klass in the fast path,
// so if there is a null in that register, we are already in the afterlife.
repne_scanl();
} else
#endif // _LP64
repne_scan();
// Unspill the temp. registers:
if (pushed_rdi) pop(rdi);
if (pushed_rcx) pop(rcx);
if (pushed_rax) pop(rax);
if (set_cond_codes) {
// Special hack for the AD files: rdi is guaranteed non-zero.
assert(!pushed_rdi, "rdi must be left non-NULL");
// Also, the condition codes are properly set Z/NZ on succeed/failure.
}
if (L_failure == &L_fallthrough)
jccb(Assembler::notEqual, *L_failure);
else jcc(Assembler::notEqual, *L_failure);
// Success. Cache the super we found and proceed in triumph.
movptr(super_cache_addr, super_klass);
if (L_success != &L_fallthrough) {
jmp(*L_success);
}
#undef IS_A_TEMP
bind(L_fallthrough);
}
void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) {
ucomisd(dst, as_Address(src)); ucomisd(dst, as_Address(src));
} }
@ -7710,8 +7998,15 @@ void MacroAssembler::load_klass(Register dst, Register src) {
void MacroAssembler::load_prototype_header(Register dst, Register src) { void MacroAssembler::load_prototype_header(Register dst, Register src) {
#ifdef _LP64 #ifdef _LP64
if (UseCompressedOops) { if (UseCompressedOops) {
assert (Universe::heap() != NULL, "java heap should be initialized");
movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); movl(dst, Address(src, oopDesc::klass_offset_in_bytes()));
if (Universe::narrow_oop_shift() != 0) {
assert(Address::times_8 == LogMinObjAlignmentInBytes &&
Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes())); movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
} else {
movq(dst, Address(dst, Klass::prototype_header_offset_in_bytes() + klassOopDesc::klass_part_offset_in_bytes()));
}
} else } else
#endif #endif
{ {
@ -7760,11 +8055,20 @@ void MacroAssembler::store_heap_oop(Address dst, Register src) {
// Algorithm must match oop.inline.hpp encode_heap_oop. // Algorithm must match oop.inline.hpp encode_heap_oop.
void MacroAssembler::encode_heap_oop(Register r) { void MacroAssembler::encode_heap_oop(Register r) {
assert (UseCompressedOops, "should be compressed"); assert (UseCompressedOops, "should be compressed");
assert (Universe::heap() != NULL, "java heap should be initialized");
if (Universe::narrow_oop_base() == NULL) {
verify_oop(r, "broken oop in encode_heap_oop");
if (Universe::narrow_oop_shift() != 0) {
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
shrq(r, LogMinObjAlignmentInBytes);
}
return;
}
#ifdef ASSERT #ifdef ASSERT
if (CheckCompressedOops) { if (CheckCompressedOops) {
Label ok; Label ok;
push(rscratch1); // cmpptr trashes rscratch1 push(rscratch1); // cmpptr trashes rscratch1
cmpptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
jcc(Assembler::equal, ok); jcc(Assembler::equal, ok);
stop("MacroAssembler::encode_heap_oop: heap base corrupted?"); stop("MacroAssembler::encode_heap_oop: heap base corrupted?");
bind(ok); bind(ok);
@ -7780,6 +8084,7 @@ void MacroAssembler::encode_heap_oop(Register r) {
void MacroAssembler::encode_heap_oop_not_null(Register r) { void MacroAssembler::encode_heap_oop_not_null(Register r) {
assert (UseCompressedOops, "should be compressed"); assert (UseCompressedOops, "should be compressed");
assert (Universe::heap() != NULL, "java heap should be initialized");
#ifdef ASSERT #ifdef ASSERT
if (CheckCompressedOops) { if (CheckCompressedOops) {
Label ok; Label ok;
@ -7790,12 +8095,18 @@ void MacroAssembler::encode_heap_oop_not_null(Register r) {
} }
#endif #endif
verify_oop(r, "broken oop in encode_heap_oop_not_null"); verify_oop(r, "broken oop in encode_heap_oop_not_null");
if (Universe::narrow_oop_base() != NULL) {
subq(r, r12_heapbase); subq(r, r12_heapbase);
}
if (Universe::narrow_oop_shift() != 0) {
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
shrq(r, LogMinObjAlignmentInBytes); shrq(r, LogMinObjAlignmentInBytes);
} }
}
void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
assert (UseCompressedOops, "should be compressed"); assert (UseCompressedOops, "should be compressed");
assert (Universe::heap() != NULL, "java heap should be initialized");
#ifdef ASSERT #ifdef ASSERT
if (CheckCompressedOops) { if (CheckCompressedOops) {
Label ok; Label ok;
@ -7809,18 +8120,32 @@ void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
if (dst != src) { if (dst != src) {
movq(dst, src); movq(dst, src);
} }
if (Universe::narrow_oop_base() != NULL) {
subq(dst, r12_heapbase); subq(dst, r12_heapbase);
}
if (Universe::narrow_oop_shift() != 0) {
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
shrq(dst, LogMinObjAlignmentInBytes); shrq(dst, LogMinObjAlignmentInBytes);
} }
}
void MacroAssembler::decode_heap_oop(Register r) { void MacroAssembler::decode_heap_oop(Register r) {
assert (UseCompressedOops, "should be compressed"); assert (UseCompressedOops, "should be compressed");
assert (Universe::heap() != NULL, "java heap should be initialized");
if (Universe::narrow_oop_base() == NULL) {
if (Universe::narrow_oop_shift() != 0) {
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
shlq(r, LogMinObjAlignmentInBytes);
}
verify_oop(r, "broken oop in decode_heap_oop");
return;
}
#ifdef ASSERT #ifdef ASSERT
if (CheckCompressedOops) { if (CheckCompressedOops) {
Label ok; Label ok;
push(rscratch1); push(rscratch1);
cmpptr(r12_heapbase, cmpptr(r12_heapbase,
ExternalAddress((address)Universe::heap_base_addr())); ExternalAddress((address)Universe::narrow_oop_base_addr()));
jcc(Assembler::equal, ok); jcc(Assembler::equal, ok);
stop("MacroAssembler::decode_heap_oop: heap base corrupted?"); stop("MacroAssembler::decode_heap_oop: heap base corrupted?");
bind(ok); bind(ok);
@ -7844,32 +8169,76 @@ void MacroAssembler::decode_heap_oop(Register r) {
void MacroAssembler::decode_heap_oop_not_null(Register r) { void MacroAssembler::decode_heap_oop_not_null(Register r) {
assert (UseCompressedOops, "should only be used for compressed headers"); assert (UseCompressedOops, "should only be used for compressed headers");
assert (Universe::heap() != NULL, "java heap should be initialized");
// Cannot assert, unverified entry point counts instructions (see .ad file) // Cannot assert, unverified entry point counts instructions (see .ad file)
// vtableStubs also counts instructions in pd_code_size_limit. // vtableStubs also counts instructions in pd_code_size_limit.
// Also do not verify_oop as this is called by verify_oop. // Also do not verify_oop as this is called by verify_oop.
assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong"); if (Universe::narrow_oop_base() == NULL) {
if (Universe::narrow_oop_shift() != 0) {
assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
shlq(r, LogMinObjAlignmentInBytes);
}
} else {
assert (Address::times_8 == LogMinObjAlignmentInBytes &&
Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
leaq(r, Address(r12_heapbase, r, Address::times_8, 0)); leaq(r, Address(r12_heapbase, r, Address::times_8, 0));
} }
}
void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
assert (UseCompressedOops, "should only be used for compressed headers"); assert (UseCompressedOops, "should only be used for compressed headers");
assert (Universe::heap() != NULL, "java heap should be initialized");
// Cannot assert, unverified entry point counts instructions (see .ad file) // Cannot assert, unverified entry point counts instructions (see .ad file)
// vtableStubs also counts instructions in pd_code_size_limit. // vtableStubs also counts instructions in pd_code_size_limit.
// Also do not verify_oop as this is called by verify_oop. // Also do not verify_oop as this is called by verify_oop.
assert(Address::times_8 == LogMinObjAlignmentInBytes, "decode alg wrong"); if (Universe::narrow_oop_shift() != 0) {
assert (Address::times_8 == LogMinObjAlignmentInBytes &&
Address::times_8 == Universe::narrow_oop_shift(), "decode alg wrong");
leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); leaq(dst, Address(r12_heapbase, src, Address::times_8, 0));
} else if (dst != src) {
movq(dst, src);
}
} }
void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
assert (UseCompressedOops, "should only be used for compressed headers");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
int oop_index = oop_recorder()->find_index(obj); int oop_index = oop_recorder()->find_index(obj);
RelocationHolder rspec = oop_Relocation::spec(oop_index); RelocationHolder rspec = oop_Relocation::spec(oop_index);
mov_literal32(dst, oop_index, rspec, narrow_oop_operand); mov_narrow_oop(dst, oop_index, rspec);
}
void MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
assert (UseCompressedOops, "should only be used for compressed headers");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
int oop_index = oop_recorder()->find_index(obj);
RelocationHolder rspec = oop_Relocation::spec(oop_index);
mov_narrow_oop(dst, oop_index, rspec);
}
void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) {
assert (UseCompressedOops, "should only be used for compressed headers");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
int oop_index = oop_recorder()->find_index(obj);
RelocationHolder rspec = oop_Relocation::spec(oop_index);
Assembler::cmp_narrow_oop(dst, oop_index, rspec);
}
void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) {
assert (UseCompressedOops, "should only be used for compressed headers");
assert (Universe::heap() != NULL, "java heap should be initialized");
assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
int oop_index = oop_recorder()->find_index(obj);
RelocationHolder rspec = oop_Relocation::spec(oop_index);
Assembler::cmp_narrow_oop(dst, oop_index, rspec);
} }
void MacroAssembler::reinit_heapbase() { void MacroAssembler::reinit_heapbase() {
if (UseCompressedOops) { if (UseCompressedOops) {
movptr(r12_heapbase, ExternalAddress((address)Universe::heap_base_addr())); movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
} }
} }
#endif // _LP64 #endif // _LP64

View file

@ -578,20 +578,25 @@ private:
// These are all easily abused and hence protected // These are all easily abused and hence protected
void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec, int format = 0);
// 32BIT ONLY SECTION // 32BIT ONLY SECTION
#ifndef _LP64 #ifndef _LP64
// Make these disappear in 64bit mode since they would never be correct // Make these disappear in 64bit mode since they would never be correct
void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY
#else #else
// 64BIT ONLY SECTION // 64BIT ONLY SECTION
void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY
void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
#endif // _LP64 #endif // _LP64
// These are unique in that we are ensured by the caller that the 32bit // These are unique in that we are ensured by the caller that the 32bit
@ -1219,6 +1224,14 @@ private:
void popq(Address dst); void popq(Address dst);
#endif #endif
void popcntl(Register dst, Address src);
void popcntl(Register dst, Register src);
#ifdef _LP64
void popcntq(Register dst, Address src);
void popcntq(Register dst, Register src);
#endif
// Prefetches (SSE, SSE2, 3DNOW only) // Prefetches (SSE, SSE2, 3DNOW only)
void prefetchnta(Address src); void prefetchnta(Address src);
@ -1647,6 +1660,9 @@ class MacroAssembler: public Assembler {
void decode_heap_oop_not_null(Register dst, Register src); void decode_heap_oop_not_null(Register dst, Register src);
void set_narrow_oop(Register dst, jobject obj); void set_narrow_oop(Register dst, jobject obj);
void set_narrow_oop(Address dst, jobject obj);
void cmp_narrow_oop(Register dst, jobject obj);
void cmp_narrow_oop(Address dst, jobject obj);
// if heap base register is used - reinit it with the correct value // if heap base register is used - reinit it with the correct value
void reinit_heapbase(); void reinit_heapbase();
@ -1791,6 +1807,40 @@ class MacroAssembler: public Assembler {
Register scan_temp, Register scan_temp,
Label& no_such_interface); Label& no_such_interface);
// Test sub_klass against super_klass, with fast and slow paths.
// The fast path produces a tri-state answer: yes / no / maybe-slow.
// One of the three labels can be NULL, meaning take the fall-through.
// If super_check_offset is -1, the value is loaded up from super_klass.
// No registers are killed, except temp_reg.
void check_klass_subtype_fast_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Label* L_success,
Label* L_failure,
Label* L_slow_path,
RegisterConstant super_check_offset = RegisterConstant(-1));
// The rest of the type check; must be wired to a corresponding fast path.
// It does not repeat the fast path logic, so don't use it standalone.
// The temp_reg and temp2_reg can be noreg, if no temps are available.
// Updates the sub's secondary super cache as necessary.
// If set_cond_codes, condition codes will be Z on success, NZ on failure.
void check_klass_subtype_slow_path(Register sub_klass,
Register super_klass,
Register temp_reg,
Register temp2_reg,
Label* L_success,
Label* L_failure,
bool set_cond_codes = false);
// Simplified, combined version, good for typical uses.
// Falls through on failure.
void check_klass_subtype(Register sub_klass,
Register super_klass,
Register temp_reg,
Label& L_success);
//---- //----
void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0

View file

@ -1598,18 +1598,9 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
// get instance klass // get instance klass
__ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc))); __ movptr(k_RInfo, Address(k_RInfo, objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)));
// get super_check_offset // perform the fast part of the checking logic
__ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes())); __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
// See if we get an immediate positive hit // call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1));
__ jcc(Assembler::equal, done);
// check for immediate negative hit
__ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ jcc(Assembler::notEqual, *stub->entry());
// check for self
__ cmpptr(klass_RInfo, k_RInfo);
__ jcc(Assembler::equal, done);
__ push(klass_RInfo); __ push(klass_RInfo);
__ push(k_RInfo); __ push(k_RInfo);
__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
@ -1735,17 +1726,9 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
} }
__ bind(done); __ bind(done);
} else { } else {
__ movl(Rtmp1, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes())); // perform the fast part of the checking logic
// See if we get an immediate positive hit __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, &done, stub->entry(), NULL);
__ cmpptr(k_RInfo, Address(klass_RInfo, Rtmp1, Address::times_1)); // call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ jcc(Assembler::equal, done);
// check for immediate negative hit
__ cmpl(Rtmp1, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ jcc(Assembler::notEqual, *stub->entry());
// check for self
__ cmpptr(klass_RInfo, k_RInfo);
__ jcc(Assembler::equal, done);
__ push(klass_RInfo); __ push(klass_RInfo);
__ push(k_RInfo); __ push(k_RInfo);
__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
@ -1821,23 +1804,15 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
__ pop(dst); __ pop(dst);
__ jmp(done); __ jmp(done);
} }
} else { }
#else else // next block is unconditional if LP64:
{ // YUCK
#endif // LP64 #endif // LP64
{
assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers"); assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers");
__ movl(dst, Address(k_RInfo, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes())); // perform the fast part of the checking logic
// See if we get an immediate positive hit __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, dst, &one, &zero, NULL);
__ cmpptr(k_RInfo, Address(klass_RInfo, dst, Address::times_1)); // call out-of-line instance of __ check_klass_subtype_slow_path(...):
__ jcc(Assembler::equal, one);
// check for immediate negative hit
__ cmpl(dst, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
__ jcc(Assembler::notEqual, zero);
// check for self
__ cmpptr(klass_RInfo, k_RInfo);
__ jcc(Assembler::equal, one);
__ push(klass_RInfo); __ push(klass_RInfo);
__ push(k_RInfo); __ push(k_RInfo);
__ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id))); __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));

View file

@ -1354,6 +1354,13 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
case slow_subtype_check_id: case slow_subtype_check_id:
{ {
// Typical calling sequence:
// __ push(klass_RInfo); // object klass or other subclass
// __ push(sup_k_RInfo); // array element klass or other superclass
// __ call(slow_subtype_check);
// Note that the subclass is pushed first, and is therefore deepest.
// Previous versions of this code reversed the names 'sub' and 'super'.
// This was operationally harmless but made the code unreadable.
enum layout { enum layout {
rax_off, SLOT2(raxH_off) rax_off, SLOT2(raxH_off)
rcx_off, SLOT2(rcxH_off) rcx_off, SLOT2(rcxH_off)
@ -1361,9 +1368,10 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
rdi_off, SLOT2(rdiH_off) rdi_off, SLOT2(rdiH_off)
// saved_rbp_off, SLOT2(saved_rbpH_off) // saved_rbp_off, SLOT2(saved_rbpH_off)
return_off, SLOT2(returnH_off) return_off, SLOT2(returnH_off)
sub_off, SLOT2(subH_off) sup_k_off, SLOT2(sup_kH_off)
super_off, SLOT2(superH_off) klass_off, SLOT2(superH_off)
framesize framesize,
result_off = klass_off // deepest argument is also the return value
}; };
__ set_info("slow_subtype_check", dont_gc_arguments); __ set_info("slow_subtype_check", dont_gc_arguments);
@ -1373,19 +1381,14 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ push(rax); __ push(rax);
// This is called by pushing args and not with C abi // This is called by pushing args and not with C abi
__ movptr(rsi, Address(rsp, (super_off) * VMRegImpl::stack_slot_size)); // super __ movptr(rsi, Address(rsp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass
__ movptr(rax, Address(rsp, (sub_off ) * VMRegImpl::stack_slot_size)); // sub __ movptr(rax, Address(rsp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass
__ movptr(rdi,Address(rsi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
// since size is postive movl does right thing on 64bit
__ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
__ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
Label miss; Label miss;
__ repne_scan(); __ check_klass_subtype_slow_path(rsi, rax, rcx, rdi, NULL, &miss);
__ jcc(Assembler::notEqual, miss);
__ movptr(Address(rsi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax); // fallthrough on success:
__ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), 1); // result __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), 1); // result
__ pop(rax); __ pop(rax);
__ pop(rcx); __ pop(rcx);
__ pop(rsi); __ pop(rsi);
@ -1393,7 +1396,7 @@ OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
__ ret(0); __ ret(0);
__ bind(miss); __ bind(miss);
__ movptr(Address(rsp, (super_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result __ movptr(Address(rsp, (result_off) * VMRegImpl::stack_slot_size), NULL_WORD); // result
__ pop(rax); __ pop(rax);
__ pop(rcx); __ pop(rcx);
__ pop(rsi); __ pop(rsi);

View file

@ -219,47 +219,16 @@ void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, R
// Resets EDI to locals. Register sub_klass cannot be any of the above. // Resets EDI to locals. Register sub_klass cannot be any of the above.
void InterpreterMacroAssembler::gen_subtype_check( Register Rsub_klass, Label &ok_is_subtype ) { void InterpreterMacroAssembler::gen_subtype_check( Register Rsub_klass, Label &ok_is_subtype ) {
assert( Rsub_klass != rax, "rax, holds superklass" ); assert( Rsub_klass != rax, "rax, holds superklass" );
assert( Rsub_klass != rcx, "rcx holds 2ndary super array length" ); assert( Rsub_klass != rcx, "used as a temp" );
assert( Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr" ); assert( Rsub_klass != rdi, "used as a temp, restored from locals" );
Label not_subtype, loop;
// Profile the not-null value's klass. // Profile the not-null value's klass.
profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
// Load the super-klass's check offset into ECX // Do the check.
movl( rcx, Address(rax, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() ) ); check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
// Load from the sub-klass's super-class display list, or a 1-word cache of
// the secondary superclass list, or a failing value with a sentinel offset
// if the super-klass is an interface or exceptionally deep in the Java
// hierarchy and we have to scan the secondary superclass list the hard way.
// See if we get an immediate positive hit
cmpptr( rax, Address(Rsub_klass,rcx,Address::times_1) );
jcc( Assembler::equal,ok_is_subtype );
// Check for immediate negative hit // Profile the failure of the check.
cmpl( rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes() );
jcc( Assembler::notEqual, not_subtype );
// Check for self
cmpptr( Rsub_klass, rax );
jcc( Assembler::equal, ok_is_subtype );
// Now do a linear scan of the secondary super-klass chain.
movptr( rdi, Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()) );
// EDI holds the objArrayOop of secondary supers.
movl( rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));// Load the array length
// Skip to start of data; also clear Z flag incase ECX is zero
addptr( rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT) );
// Scan ECX words at [EDI] for occurance of EAX
// Set NZ/Z based on last compare
repne_scan();
restore_locals(); // Restore EDI; Must not blow flags
// Not equal?
jcc( Assembler::notEqual, not_subtype );
// Must be equal but missed in cache. Update cache.
movptr( Address(Rsub_klass, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()), rax );
jmp( ok_is_subtype );
bind(not_subtype);
profile_typecheck_failed(rcx); // blows rcx profile_typecheck_failed(rcx); // blows rcx
} }

View file

@ -232,65 +232,13 @@ void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
assert(Rsub_klass != rcx, "rcx holds 2ndary super array length"); assert(Rsub_klass != rcx, "rcx holds 2ndary super array length");
assert(Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr"); assert(Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr");
Label not_subtype, not_subtype_pop, loop;
// Profile the not-null value's klass. // Profile the not-null value's klass.
profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, rdi profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
// Load the super-klass's check offset into rcx // Do the check.
movl(rcx, Address(rax, sizeof(oopDesc) + check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
Klass::super_check_offset_offset_in_bytes()));
// Load from the sub-klass's super-class display list, or a 1-word
// cache of the secondary superclass list, or a failing value with a
// sentinel offset if the super-klass is an interface or
// exceptionally deep in the Java hierarchy and we have to scan the
// secondary superclass list the hard way. See if we get an
// immediate positive hit
cmpptr(rax, Address(Rsub_klass, rcx, Address::times_1));
jcc(Assembler::equal,ok_is_subtype);
// Check for immediate negative hit // Profile the failure of the check.
cmpl(rcx, sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes());
jcc( Assembler::notEqual, not_subtype );
// Check for self
cmpptr(Rsub_klass, rax);
jcc(Assembler::equal, ok_is_subtype);
// Now do a linear scan of the secondary super-klass chain.
movptr(rdi, Address(Rsub_klass, sizeof(oopDesc) +
Klass::secondary_supers_offset_in_bytes()));
// rdi holds the objArrayOop of secondary supers.
// Load the array length
movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
// Skip to start of data; also clear Z flag incase rcx is zero
addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Scan rcx words at [rdi] for occurance of rax
// Set NZ/Z based on last compare
// this part is kind tricky, as values in supers array could be 32 or 64 bit wide
// and we store values in objArrays always encoded, thus we need to encode value
// before repne
if (UseCompressedOops) {
push(rax);
encode_heap_oop(rax);
repne_scanl();
// Not equal?
jcc(Assembler::notEqual, not_subtype_pop);
// restore heap oop here for movq
pop(rax);
} else {
repne_scan();
jcc(Assembler::notEqual, not_subtype);
}
// Must be equal but missed in cache. Update cache.
movptr(Address(Rsub_klass, sizeof(oopDesc) +
Klass::secondary_super_cache_offset_in_bytes()), rax);
jmp(ok_is_subtype);
bind(not_subtype_pop);
// restore heap oop here for miss
if (UseCompressedOops) pop(rax);
bind(not_subtype);
profile_typecheck_failed(rcx); // blows rcx profile_typecheck_failed(rcx); // blows rcx
} }

View file

@ -349,7 +349,7 @@ class SlowSignatureHandler
if (_num_args < Argument::n_float_register_parameters_c-1) { if (_num_args < Argument::n_float_register_parameters_c-1) {
*_reg_args++ = from_obj; *_reg_args++ = from_obj;
*_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float *_fp_identifiers |= (intptr_t)(0x01 << (_num_args*2)); // mark as float
_num_args++; _num_args++;
} else { } else {
*_to++ = from_obj; *_to++ = from_obj;
@ -364,7 +364,7 @@ class SlowSignatureHandler
if (_num_args < Argument::n_float_register_parameters_c-1) { if (_num_args < Argument::n_float_register_parameters_c-1) {
*_reg_args++ = from_obj; *_reg_args++ = from_obj;
*_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double *_fp_identifiers |= (intptr_t)(0x3 << (_num_args*2)); // mark as double
_num_args++; _num_args++;
} else { } else {
*_to++ = from_obj; *_to++ = from_obj;

View file

@ -1310,81 +1310,51 @@ class StubGenerator: public StubCodeGenerator {
Address& super_check_offset_addr, Address& super_check_offset_addr,
Address& super_klass_addr, Address& super_klass_addr,
Register temp, Register temp,
Label* L_success_ptr, Label* L_failure_ptr) { Label* L_success, Label* L_failure) {
BLOCK_COMMENT("type_check:"); BLOCK_COMMENT("type_check:");
Label L_fallthrough; Label L_fallthrough;
bool fall_through_on_success = (L_success_ptr == NULL); #define LOCAL_JCC(assembler_con, label_ptr) \
if (fall_through_on_success) { if (label_ptr != NULL) __ jcc(assembler_con, *(label_ptr)); \
L_success_ptr = &L_fallthrough; else __ jcc(assembler_con, L_fallthrough) /*omit semi*/
} else {
L_failure_ptr = &L_fallthrough;
}
Label& L_success = *L_success_ptr;
Label& L_failure = *L_failure_ptr;
// The following is a strange variation of the fast path which requires
// one less register, because needed values are on the argument stack.
// __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp,
// L_success, L_failure, NULL);
assert_different_registers(sub_klass, temp); assert_different_registers(sub_klass, temp);
// a couple of useful fields in sub_klass:
int ss_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_supers_offset_in_bytes());
int sc_offset = (klassOopDesc::header_size() * HeapWordSize + int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes()); Klass::secondary_super_cache_offset_in_bytes());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
// if the pointers are equal, we are done (e.g., String[] elements) // if the pointers are equal, we are done (e.g., String[] elements)
__ cmpptr(sub_klass, super_klass_addr); __ cmpptr(sub_klass, super_klass_addr);
__ jcc(Assembler::equal, L_success); LOCAL_JCC(Assembler::equal, L_success);
// check the supertype display: // check the supertype display:
__ movl2ptr(temp, super_check_offset_addr); __ movl2ptr(temp, super_check_offset_addr);
Address super_check_addr(sub_klass, temp, Address::times_1, 0); Address super_check_addr(sub_klass, temp, Address::times_1, 0);
__ movptr(temp, super_check_addr); // load displayed supertype __ movptr(temp, super_check_addr); // load displayed supertype
__ cmpptr(temp, super_klass_addr); // test the super type __ cmpptr(temp, super_klass_addr); // test the super type
__ jcc(Assembler::equal, L_success); LOCAL_JCC(Assembler::equal, L_success);
// if it was a primary super, we can just fail immediately // if it was a primary super, we can just fail immediately
__ cmpl(super_check_offset_addr, sc_offset); __ cmpl(super_check_offset_addr, sc_offset);
__ jcc(Assembler::notEqual, L_failure); LOCAL_JCC(Assembler::notEqual, L_failure);
// Now do a linear scan of the secondary super-klass chain. // The repne_scan instruction uses fixed registers, which will get spilled.
// This code is rarely used, so simplicity is a virtue here. // We happen to know this works best when super_klass is in rax.
inc_counter_np(SharedRuntime::_partial_subtype_ctr); Register super_klass = temp;
{ __ movptr(super_klass, super_klass_addr);
// The repne_scan instruction uses fixed registers, which we must spill. __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg,
// (We need a couple more temps in any case.) L_success, L_failure);
__ push(rax);
__ push(rcx);
__ push(rdi);
assert_different_registers(sub_klass, rax, rcx, rdi);
__ movptr(rdi, secondary_supers_addr);
// Load the array length.
__ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
// Skip to start of data.
__ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Scan rcx words at [edi] for occurance of rax,
// Set NZ/Z based on last compare
__ movptr(rax, super_klass_addr);
__ repne_scan();
// Unspill the temp. registers:
__ pop(rdi);
__ pop(rcx);
__ pop(rax);
}
__ jcc(Assembler::notEqual, L_failure);
// Success. Cache the super we found and proceed in triumph.
__ movptr(temp, super_klass_addr); // note: rax, is dead
__ movptr(super_cache_addr, temp);
if (!fall_through_on_success)
__ jmp(L_success);
// Fall through on failure!
__ bind(L_fallthrough); __ bind(L_fallthrough);
if (L_success == NULL) { BLOCK_COMMENT("L_success:"); }
if (L_failure == NULL) { BLOCK_COMMENT("L_failure:"); }
#undef LOCAL_JCC
} }
// //

View file

@ -2091,66 +2091,9 @@ class StubGenerator: public StubCodeGenerator {
Label L_miss; Label L_miss;
// a couple of useful fields in sub_klass: __ check_klass_subtype_fast_path(sub_klass, super_klass, noreg, &L_success, &L_miss, NULL,
int ss_offset = (klassOopDesc::header_size() * HeapWordSize + super_check_offset);
Klass::secondary_supers_offset_in_bytes()); __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, &L_success, NULL);
int sc_offset = (klassOopDesc::header_size() * HeapWordSize +
Klass::secondary_super_cache_offset_in_bytes());
Address secondary_supers_addr(sub_klass, ss_offset);
Address super_cache_addr( sub_klass, sc_offset);
// if the pointers are equal, we are done (e.g., String[] elements)
__ cmpptr(super_klass, sub_klass);
__ jcc(Assembler::equal, L_success);
// check the supertype display:
Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
__ cmpptr(super_klass, super_check_addr); // test the super type
__ jcc(Assembler::equal, L_success);
// if it was a primary super, we can just fail immediately
__ cmpl(super_check_offset, sc_offset);
__ jcc(Assembler::notEqual, L_miss);
// Now do a linear scan of the secondary super-klass chain.
// The repne_scan instruction uses fixed registers, which we must spill.
// (We need a couple more temps in any case.)
// This code is rarely used, so simplicity is a virtue here.
inc_counter_np(SharedRuntime::_partial_subtype_ctr);
{
__ push(rax);
__ push(rcx);
__ push(rdi);
assert_different_registers(sub_klass, super_klass, rax, rcx, rdi);
__ movptr(rdi, secondary_supers_addr);
// Load the array length.
__ movl(rcx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
// Skip to start of data.
__ addptr(rdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
// Scan rcx words at [rdi] for occurance of rax
// Set NZ/Z based on last compare
__ movptr(rax, super_klass);
if (UseCompressedOops) {
// Compare against compressed form. Don't need to uncompress because
// looks like orig rax is restored in popq below.
__ encode_heap_oop(rax);
__ repne_scanl();
} else {
__ repne_scan();
}
// Unspill the temp. registers:
__ pop(rdi);
__ pop(rcx);
__ pop(rax);
__ jcc(Assembler::notEqual, L_miss);
}
// Success. Cache the super we found and proceed in triumph.
__ movptr(super_cache_addr, super_klass); // note: rax is dead
__ jmp(L_success);
// Fall through on failure! // Fall through on failure!
__ BIND(L_miss); __ BIND(L_miss);

View file

@ -284,7 +284,7 @@ void VM_Version::get_processor_features() {
} }
char buf[256]; char buf[256];
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
cores_per_cpu(), threads_per_core(), cores_per_cpu(), threads_per_core(),
cpu_family(), _model, _stepping, cpu_family(), _model, _stepping,
(supports_cmov() ? ", cmov" : ""), (supports_cmov() ? ", cmov" : ""),
@ -297,6 +297,7 @@ void VM_Version::get_processor_features() {
(supports_ssse3()? ", ssse3": ""), (supports_ssse3()? ", ssse3": ""),
(supports_sse4_1() ? ", sse4.1" : ""), (supports_sse4_1() ? ", sse4.1" : ""),
(supports_sse4_2() ? ", sse4.2" : ""), (supports_sse4_2() ? ", sse4.2" : ""),
(supports_popcnt() ? ", popcnt" : ""),
(supports_mmx_ext() ? ", mmxext" : ""), (supports_mmx_ext() ? ", mmxext" : ""),
(supports_3dnow() ? ", 3dnow" : ""), (supports_3dnow() ? ", 3dnow" : ""),
(supports_3dnow2() ? ", 3dnowext" : ""), (supports_3dnow2() ? ", 3dnowext" : ""),
@ -410,6 +411,13 @@ void VM_Version::get_processor_features() {
} }
} }
// Use population count instruction if available.
if (supports_popcnt()) {
if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
UsePopCountInstruction = true;
}
}
assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value"); assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value"); assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");

View file

@ -70,7 +70,9 @@ public:
dca : 1, dca : 1,
sse4_1 : 1, sse4_1 : 1,
sse4_2 : 1, sse4_2 : 1,
: 11; : 2,
popcnt : 1,
: 8;
} bits; } bits;
}; };
@ -179,7 +181,8 @@ protected:
CPU_SSSE3 = (1 << 9), CPU_SSSE3 = (1 << 9),
CPU_SSE4A = (1 << 10), CPU_SSE4A = (1 << 10),
CPU_SSE4_1 = (1 << 11), CPU_SSE4_1 = (1 << 11),
CPU_SSE4_2 = (1 << 12) CPU_SSE4_2 = (1 << 12),
CPU_POPCNT = (1 << 13)
} cpuFeatureFlags; } cpuFeatureFlags;
// cpuid information block. All info derived from executing cpuid with // cpuid information block. All info derived from executing cpuid with
@ -290,6 +293,8 @@ protected:
result |= CPU_SSE4_1; result |= CPU_SSE4_1;
if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
result |= CPU_SSE4_2; result |= CPU_SSE4_2;
if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
result |= CPU_POPCNT;
return result; return result;
} }
@ -379,6 +384,7 @@ public:
static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; } static bool supports_ssse3() { return (_cpuFeatures & CPU_SSSE3)!= 0; }
static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; } static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; } static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; }
// //
// AMD features // AMD features
// //

View file

@ -1483,15 +1483,19 @@ encode %{
// main source block for now. In future, we can generalize this by // main source block for now. In future, we can generalize this by
// adding a syntax that specifies the sizes of fields in an order, // adding a syntax that specifies the sizes of fields in an order,
// so that the adlc can build the emit functions automagically // so that the adlc can build the emit functions automagically
enc_class OpcP %{ // Emit opcode
// Emit primary opcode
enc_class OpcP %{
emit_opcode(cbuf, $primary); emit_opcode(cbuf, $primary);
%} %}
enc_class OpcS %{ // Emit opcode // Emit secondary opcode
enc_class OpcS %{
emit_opcode(cbuf, $secondary); emit_opcode(cbuf, $secondary);
%} %}
enc_class Opcode(immI d8 ) %{ // Emit opcode // Emit opcode directly
enc_class Opcode(immI d8) %{
emit_opcode(cbuf, $d8$$constant); emit_opcode(cbuf, $d8$$constant);
%} %}
@ -1688,26 +1692,15 @@ encode %{
Register Reax = as_Register(EAX_enc); // super class Register Reax = as_Register(EAX_enc); // super class
Register Recx = as_Register(ECX_enc); // killed Register Recx = as_Register(ECX_enc); // killed
Register Resi = as_Register(ESI_enc); // sub class Register Resi = as_Register(ESI_enc); // sub class
Label hit, miss; Label miss;
MacroAssembler _masm(&cbuf); MacroAssembler _masm(&cbuf);
// Compare super with sub directly, since super is not in its own SSA. __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
// The compiler used to emit this test, but we fold it in here, NULL, &miss,
// to allow platform-specific tweaking on sparc. /*set_cond_codes:*/ true);
__ cmpptr(Reax, Resi); if ($primary) {
__ jcc(Assembler::equal, hit);
#ifndef PRODUCT
__ incrementl(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
#endif //PRODUCT
__ movptr(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
__ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes()));
__ addptr(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT));
__ repne_scan();
__ jcc(Assembler::notEqual, miss);
__ movptr(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax);
__ bind(hit);
if( $primary )
__ xorptr(Redi, Redi); __ xorptr(Redi, Redi);
}
__ bind(miss); __ bind(miss);
%} %}
@ -6387,6 +6380,67 @@ instruct bytes_reverse_long(eRegL dst) %{
%} %}
//---------- Population Count Instructions -------------------------------------
instruct popCountI(eRegI dst, eRegI src) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI src));
format %{ "POPCNT $dst, $src" %}
ins_encode %{
__ popcntl($dst$$Register, $src$$Register);
%}
ins_pipe(ialu_reg);
%}
instruct popCountI_mem(eRegI dst, memory mem) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI (LoadI mem)));
format %{ "POPCNT $dst, $mem" %}
ins_encode %{
__ popcntl($dst$$Register, $mem$$Address);
%}
ins_pipe(ialu_reg);
%}
// Note: Long.bitCount(long) returns an int.
instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL src));
effect(KILL cr, TEMP tmp, TEMP dst);
format %{ "POPCNT $dst, $src.lo\n\t"
"POPCNT $tmp, $src.hi\n\t"
"ADD $dst, $tmp" %}
ins_encode %{
__ popcntl($dst$$Register, $src$$Register);
__ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
__ addl($dst$$Register, $tmp$$Register);
%}
ins_pipe(ialu_reg);
%}
// Note: Long.bitCount(long) returns an int.
instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL (LoadL mem)));
effect(KILL cr, TEMP tmp, TEMP dst);
format %{ "POPCNT $dst, $mem\n\t"
"POPCNT $tmp, $mem+4\n\t"
"ADD $dst, $tmp" %}
ins_encode %{
//__ popcntl($dst$$Register, $mem$$Address$$first);
//__ popcntl($tmp$$Register, $mem$$Address$$second);
__ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
__ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
__ addl($dst$$Register, $tmp$$Register);
%}
ins_pipe(ialu_reg);
%}
//----------Load/Store/Move Instructions--------------------------------------- //----------Load/Store/Move Instructions---------------------------------------
//----------Load Instructions-------------------------------------------------- //----------Load Instructions--------------------------------------------------
// Load Byte (8bit signed) // Load Byte (8bit signed)
@ -12501,15 +12555,12 @@ instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXReg
effect( KILL rcx, KILL cr ); effect( KILL rcx, KILL cr );
ins_cost(1100); // slightly larger than the next version ins_cost(1100); // slightly larger than the next version
format %{ "CMPL EAX,ESI\n\t" format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
"JEQ,s hit\n\t"
"MOV EDI,[$sub+Klass::secondary_supers]\n\t"
"MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
"ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
"REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
"JNE,s miss\t\t# Missed: EDI not-zero\n\t" "JNE,s miss\t\t# Missed: EDI not-zero\n\t"
"MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
"hit:\n\t"
"XOR $result,$result\t\t Hit: EDI zero\n\t" "XOR $result,$result\t\t Hit: EDI zero\n\t"
"miss:\t" %} "miss:\t" %}
@ -12523,9 +12574,7 @@ instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super,
effect( KILL rcx, KILL result ); effect( KILL rcx, KILL result );
ins_cost(1000); ins_cost(1000);
format %{ "CMPL EAX,ESI\n\t" format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t"
"JEQ,s miss\t# Actually a hit; we are done.\n\t"
"MOV EDI,[$sub+Klass::secondary_supers]\n\t"
"MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t" "MOV ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
"ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" "ADD EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
"REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"

View file

@ -326,7 +326,6 @@ reg_class ptr_no_rax_reg(RDX, RDX_H,
R9, R9_H, R9, R9_H,
R10, R10_H, R10, R10_H,
R11, R11_H, R11, R11_H,
R12, R12_H,
R13, R13_H, R13, R13_H,
R14, R14_H); R14, R14_H);
@ -340,7 +339,6 @@ reg_class ptr_no_rbp_reg(RDX, RDX_H,
R9, R9_H, R9, R9_H,
R10, R10_H, R10, R10_H,
R11, R11_H, R11, R11_H,
R12, R12_H,
R13, R13_H, R13, R13_H,
R14, R14_H); R14, R14_H);
@ -354,7 +352,6 @@ reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
R9, R9_H, R9, R9_H,
R10, R10_H, R10, R10_H,
R11, R11_H, R11, R11_H,
R12, R12_H,
R13, R13_H, R13, R13_H,
R14, R14_H); R14, R14_H);
@ -444,9 +441,6 @@ reg_class long_rcx_reg(RCX, RCX_H);
// Singleton class for RDX long register // Singleton class for RDX long register
reg_class long_rdx_reg(RDX, RDX_H); reg_class long_rdx_reg(RDX, RDX_H);
// Singleton class for R12 long register
reg_class long_r12_reg(R12, R12_H);
// Class for all int registers (except RSP) // Class for all int registers (except RSP)
reg_class int_reg(RAX, reg_class int_reg(RAX,
RDX, RDX,
@ -1842,7 +1836,9 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
{ {
if (UseCompressedOops) { if (UseCompressedOops) {
st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes()); st->print_cr("movl rscratch1, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t", oopDesc::klass_offset_in_bytes());
if (Universe::narrow_oop_shift() != 0) {
st->print_cr("leaq rscratch1, [r12_heapbase, r, Address::times_8, 0]"); st->print_cr("leaq rscratch1, [r12_heapbase, r, Address::times_8, 0]");
}
st->print_cr("cmpq rax, rscratch1\t # Inline cache check"); st->print_cr("cmpq rax, rscratch1\t # Inline cache check");
} else { } else {
st->print_cr("cmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t" st->print_cr("cmpq rax, [j_rarg0 + oopDesc::klass_offset_in_bytes() #%d]\t"
@ -1891,7 +1887,11 @@ void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
uint MachUEPNode::size(PhaseRegAlloc* ra_) const uint MachUEPNode::size(PhaseRegAlloc* ra_) const
{ {
if (UseCompressedOops) { if (UseCompressedOops) {
if (Universe::narrow_oop_shift() == 0) {
return OptoBreakpoint ? 15 : 16;
} else {
return OptoBreakpoint ? 19 : 20; return OptoBreakpoint ? 19 : 20;
}
} else { } else {
return OptoBreakpoint ? 11 : 12; return OptoBreakpoint ? 11 : 12;
} }
@ -2575,45 +2575,13 @@ encode %{
Register Rrax = as_Register(RAX_enc); // super class Register Rrax = as_Register(RAX_enc); // super class
Register Rrcx = as_Register(RCX_enc); // killed Register Rrcx = as_Register(RCX_enc); // killed
Register Rrsi = as_Register(RSI_enc); // sub class Register Rrsi = as_Register(RSI_enc); // sub class
Label hit, miss, cmiss; Label miss;
const bool set_cond_codes = true;
MacroAssembler _masm(&cbuf); MacroAssembler _masm(&cbuf);
// Compare super with sub directly, since super is not in its own SSA. __ check_klass_subtype_slow_path(Rrsi, Rrax, Rrcx, Rrdi,
// The compiler used to emit this test, but we fold it in here, NULL, &miss,
// to allow platform-specific tweaking on sparc. /*set_cond_codes:*/ true);
__ cmpptr(Rrax, Rrsi);
__ jcc(Assembler::equal, hit);
#ifndef PRODUCT
__ lea(Rrcx, ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
__ incrementl(Address(Rrcx, 0));
#endif //PRODUCT
__ movptr(Rrdi, Address(Rrsi,
sizeof(oopDesc) +
Klass::secondary_supers_offset_in_bytes()));
__ movl(Rrcx, Address(Rrdi, arrayOopDesc::length_offset_in_bytes()));
__ addptr(Rrdi, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
if (UseCompressedOops) {
__ encode_heap_oop(Rrax);
__ repne_scanl();
__ jcc(Assembler::notEqual, cmiss);
__ decode_heap_oop(Rrax);
__ movptr(Address(Rrsi,
sizeof(oopDesc) +
Klass::secondary_super_cache_offset_in_bytes()),
Rrax);
__ jmp(hit);
__ bind(cmiss);
__ decode_heap_oop(Rrax);
__ jmp(miss);
} else {
__ repne_scan();
__ jcc(Assembler::notEqual, miss);
__ movptr(Address(Rrsi,
sizeof(oopDesc) +
Klass::secondary_super_cache_offset_in_bytes()),
Rrax);
}
__ bind(hit);
if ($primary) { if ($primary) {
__ xorptr(Rrdi, Rrdi); __ xorptr(Rrdi, Rrdi);
} }
@ -4906,15 +4874,6 @@ operand rRegP()
interface(REG_INTER); interface(REG_INTER);
%} %}
operand r12RegL() %{
constraint(ALLOC_IN_RC(long_r12_reg));
match(RegL);
format %{ %}
interface(REG_INTER);
%}
operand rRegN() %{ operand rRegN() %{
constraint(ALLOC_IN_RC(int_reg)); constraint(ALLOC_IN_RC(int_reg));
match(RegN); match(RegN);
@ -5289,21 +5248,6 @@ operand indIndexScaleOffset(any_RegP reg, immL32 off, rRegL lreg, immI2 scale)
%} %}
%} %}
// Indirect Narrow Oop Plus Offset Operand
operand indNarrowOopOffset(rRegN src, immL32 off) %{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN src) off);
op_cost(10);
format %{"[R12 + $src << 3 + $off] (compressed oop addressing)" %}
interface(MEMORY_INTER) %{
base(0xc); // R12
index($src);
scale(0x3);
disp($off);
%}
%}
// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand // Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale) operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
%{ %{
@ -5321,6 +5265,158 @@ operand indPosIndexScaleOffset(any_RegP reg, immL32 off, rRegI idx, immI2 scale)
%} %}
%} %}
// Indirect Narrow Oop Plus Offset Operand
// Note: x86 architecture doesn't support "scale * index + offset" without a base
// we can't free r12 even with Universe::narrow_oop_base() == NULL.
operand indCompressedOopOffset(rRegN reg, immL32 off) %{
predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) off);
op_cost(10);
format %{"[R12 + $reg << 3 + $off] (compressed oop addressing)" %}
interface(MEMORY_INTER) %{
base(0xc); // R12
index($reg);
scale(0x3);
disp($off);
%}
%}
// Indirect Memory Operand
operand indirectNarrow(rRegN reg)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(DecodeN reg);
format %{ "[$reg]" %}
interface(MEMORY_INTER) %{
base($reg);
index(0x4);
scale(0x0);
disp(0x0);
%}
%}
// Indirect Memory Plus Short Offset Operand
operand indOffset8Narrow(rRegN reg, immL8 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) off);
format %{ "[$reg + $off (8-bit)]" %}
interface(MEMORY_INTER) %{
base($reg);
index(0x4);
scale(0x0);
disp($off);
%}
%}
// Indirect Memory Plus Long Offset Operand
operand indOffset32Narrow(rRegN reg, immL32 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) off);
format %{ "[$reg + $off (32-bit)]" %}
interface(MEMORY_INTER) %{
base($reg);
index(0x4);
scale(0x0);
disp($off);
%}
%}
// Indirect Memory Plus Index Register Plus Offset Operand
operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL32 off)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP (DecodeN reg) lreg) off);
op_cost(10);
format %{"[$reg + $off + $lreg]" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale(0x0);
disp($off);
%}
%}
// Indirect Memory Plus Index Register Plus Offset Operand
operand indIndexNarrow(rRegN reg, rRegL lreg)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) lreg);
op_cost(10);
format %{"[$reg + $lreg]" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale(0x0);
disp(0x0);
%}
%}
// Indirect Memory Times Scale Plus Index Register
operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (DecodeN reg) (LShiftL lreg scale));
op_cost(10);
format %{"[$reg + $lreg << $scale]" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale($scale);
disp(0x0);
%}
%}
// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
operand indIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegL lreg, immI2 scale)
%{
predicate(Universe::narrow_oop_shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
op_cost(10);
format %{"[$reg + $off + $lreg << $scale]" %}
interface(MEMORY_INTER) %{
base($reg);
index($lreg);
scale($scale);
disp($off);
%}
%}
// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
operand indPosIndexScaleOffsetNarrow(rRegN reg, immL32 off, rRegI idx, immI2 scale)
%{
constraint(ALLOC_IN_RC(ptr_reg));
predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
op_cost(10);
format %{"[$reg + $off + $idx << $scale]" %}
interface(MEMORY_INTER) %{
base($reg);
index($idx);
scale($scale);
disp($off);
%}
%}
//----------Special Memory Operands-------------------------------------------- //----------Special Memory Operands--------------------------------------------
// Stack Slot Operand - This operand is used for loading and storing temporary // Stack Slot Operand - This operand is used for loading and storing temporary
// values on the stack where a match requires a value to // values on the stack where a match requires a value to
@ -5488,7 +5584,10 @@ operand cmpOpUCF2() %{
opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex, opclass memory(indirect, indOffset8, indOffset32, indIndexOffset, indIndex,
indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset, indIndexScale, indIndexScaleOffset, indPosIndexScaleOffset,
indNarrowOopOffset); indCompressedOopOffset,
indirectNarrow, indOffset8Narrow, indOffset32Narrow,
indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow,
indIndexScaleOffsetNarrow, indPosIndexScaleOffsetNarrow);
//----------PIPELINE----------------------------------------------------------- //----------PIPELINE-----------------------------------------------------------
// Rules which define the behavior of the target architectures pipeline. // Rules which define the behavior of the target architectures pipeline.
@ -6234,9 +6333,7 @@ instruct loadN(rRegN dst, memory mem)
ins_cost(125); // XXX ins_cost(125); // XXX
format %{ "movl $dst, $mem\t# compressed ptr" %} format %{ "movl $dst, $mem\t# compressed ptr" %}
ins_encode %{ ins_encode %{
Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); __ movl($dst$$Register, $mem$$Address);
Register dst = as_Register($dst$$reg);
__ movl(dst, addr);
%} %}
ins_pipe(ialu_reg_mem); // XXX ins_pipe(ialu_reg_mem); // XXX
%} %}
@ -6262,9 +6359,7 @@ instruct loadNKlass(rRegN dst, memory mem)
ins_cost(125); // XXX ins_cost(125); // XXX
format %{ "movl $dst, $mem\t# compressed klass ptr" %} format %{ "movl $dst, $mem\t# compressed klass ptr" %}
ins_encode %{ ins_encode %{
Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); __ movl($dst$$Register, $mem$$Address);
Register dst = as_Register($dst$$reg);
__ movl(dst, addr);
%} %}
ins_pipe(ialu_reg_mem); // XXX ins_pipe(ialu_reg_mem); // XXX
%} %}
@ -6418,6 +6513,102 @@ instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
ins_pipe(ialu_reg_reg_fat); ins_pipe(ialu_reg_reg_fat);
%} %}
instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
%{
match(Set dst mem);
ins_cost(110);
format %{ "leaq $dst, $mem\t# ptr posidxscaleoff" %}
opcode(0x8D);
ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
ins_pipe(ialu_reg_reg_fat);
%}
// Load Effective Address which uses Narrow (32-bits) oop
instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
%{
predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
match(Set dst mem);
ins_cost(110);
format %{ "leaq $dst, $mem\t# ptr compressedoopoff32" %}
opcode(0x8D);
ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
ins_pipe(ialu_reg_reg_fat);
%}
instruct leaP8Narrow(rRegP dst, indOffset8Narrow mem)
%{
predicate(Universe::narrow_oop_shift() == 0);
match(Set dst mem);
ins_cost(110); // XXX
format %{ "leaq $dst, $mem\t# ptr off8narrow" %}
opcode(0x8D);
ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
ins_pipe(ialu_reg_reg_fat);
%}
instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
%{
predicate(Universe::narrow_oop_shift() == 0);
match(Set dst mem);
ins_cost(110);
format %{ "leaq $dst, $mem\t# ptr off32narrow" %}
opcode(0x8D);
ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
ins_pipe(ialu_reg_reg_fat);
%}
instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
%{
predicate(Universe::narrow_oop_shift() == 0);
match(Set dst mem);
ins_cost(110);
format %{ "leaq $dst, $mem\t# ptr idxoffnarrow" %}
opcode(0x8D);
ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
ins_pipe(ialu_reg_reg_fat);
%}
instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
%{
predicate(Universe::narrow_oop_shift() == 0);
match(Set dst mem);
ins_cost(110);
format %{ "leaq $dst, $mem\t# ptr idxscalenarrow" %}
opcode(0x8D);
ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
ins_pipe(ialu_reg_reg_fat);
%}
instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
%{
predicate(Universe::narrow_oop_shift() == 0);
match(Set dst mem);
ins_cost(110);
format %{ "leaq $dst, $mem\t# ptr idxscaleoffnarrow" %}
opcode(0x8D);
ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
ins_pipe(ialu_reg_reg_fat);
%}
instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
%{
predicate(Universe::narrow_oop_shift() == 0);
match(Set dst mem);
ins_cost(110);
format %{ "leaq $dst, $mem\t# ptr posidxscaleoffnarrow" %}
opcode(0x8D);
ins_encode(REX_reg_mem_wide(dst, mem), OpcP, reg_mem(dst, mem));
ins_pipe(ialu_reg_reg_fat);
%}
instruct loadConI(rRegI dst, immI src) instruct loadConI(rRegI dst, immI src)
%{ %{
match(Set dst src); match(Set dst src);
@ -6528,8 +6719,7 @@ instruct loadConN0(rRegN dst, immN0 src, rFlagsReg cr) %{
effect(KILL cr); effect(KILL cr);
format %{ "xorq $dst, $src\t# compressed NULL ptr" %} format %{ "xorq $dst, $src\t# compressed NULL ptr" %}
ins_encode %{ ins_encode %{
Register dst = $dst$$Register; __ xorq($dst$$Register, $dst$$Register);
__ xorq(dst, dst);
%} %}
ins_pipe(ialu_reg); ins_pipe(ialu_reg);
%} %}
@ -6541,11 +6731,10 @@ instruct loadConN(rRegN dst, immN src) %{
format %{ "movl $dst, $src\t# compressed ptr" %} format %{ "movl $dst, $src\t# compressed ptr" %}
ins_encode %{ ins_encode %{
address con = (address)$src$$constant; address con = (address)$src$$constant;
Register dst = $dst$$Register;
if (con == NULL) { if (con == NULL) {
ShouldNotReachHere(); ShouldNotReachHere();
} else { } else {
__ set_narrow_oop(dst, (jobject)$src$$constant); __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
} }
%} %}
ins_pipe(ialu_reg_fat); // XXX ins_pipe(ialu_reg_fat); // XXX
@ -6794,12 +6983,25 @@ instruct storeP(memory mem, any_RegP src)
ins_pipe(ialu_mem_reg); ins_pipe(ialu_mem_reg);
%} %}
instruct storeImmP0(memory mem, immP0 zero)
%{
predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
match(Set mem (StoreP mem zero));
ins_cost(125); // XXX
format %{ "movq $mem, R12\t# ptr (R12_heapbase==0)" %}
ins_encode %{
__ movq($mem$$Address, r12);
%}
ins_pipe(ialu_mem_reg);
%}
// Store NULL Pointer, mark word, or other simple pointer constant. // Store NULL Pointer, mark word, or other simple pointer constant.
instruct storeImmP(memory mem, immP31 src) instruct storeImmP(memory mem, immP31 src)
%{ %{
match(Set mem (StoreP mem src)); match(Set mem (StoreP mem src));
ins_cost(125); // XXX ins_cost(150); // XXX
format %{ "movq $mem, $src\t# ptr" %} format %{ "movq $mem, $src\t# ptr" %}
opcode(0xC7); /* C7 /0 */ opcode(0xC7); /* C7 /0 */
ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src)); ins_encode(REX_mem_wide(mem), OpcP, RM_opc_mem(0x00, mem), Con32(src));
@ -6814,14 +7016,55 @@ instruct storeN(memory mem, rRegN src)
ins_cost(125); // XXX ins_cost(125); // XXX
format %{ "movl $mem, $src\t# compressed ptr" %} format %{ "movl $mem, $src\t# compressed ptr" %}
ins_encode %{ ins_encode %{
Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); __ movl($mem$$Address, $src$$Register);
Register src = as_Register($src$$reg);
__ movl(addr, src);
%} %}
ins_pipe(ialu_mem_reg); ins_pipe(ialu_mem_reg);
%} %}
instruct storeImmN0(memory mem, immN0 zero)
%{
predicate(Universe::narrow_oop_base() == NULL);
match(Set mem (StoreN mem zero));
ins_cost(125); // XXX
format %{ "movl $mem, R12\t# compressed ptr (R12_heapbase==0)" %}
ins_encode %{
__ movl($mem$$Address, r12);
%}
ins_pipe(ialu_mem_reg);
%}
instruct storeImmN(memory mem, immN src)
%{
match(Set mem (StoreN mem src));
ins_cost(150); // XXX
format %{ "movl $mem, $src\t# compressed ptr" %}
ins_encode %{
address con = (address)$src$$constant;
if (con == NULL) {
__ movl($mem$$Address, (int32_t)0);
} else {
__ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
}
%}
ins_pipe(ialu_mem_imm);
%}
// Store Integer Immediate // Store Integer Immediate
instruct storeImmI0(memory mem, immI0 zero)
%{
predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
match(Set mem (StoreI mem zero));
ins_cost(125); // XXX
format %{ "movl $mem, R12\t# int (R12_heapbase==0)" %}
ins_encode %{
__ movl($mem$$Address, r12);
%}
ins_pipe(ialu_mem_reg);
%}
instruct storeImmI(memory mem, immI src) instruct storeImmI(memory mem, immI src)
%{ %{
match(Set mem (StoreI mem src)); match(Set mem (StoreI mem src));
@ -6834,6 +7077,19 @@ instruct storeImmI(memory mem, immI src)
%} %}
// Store Long Immediate // Store Long Immediate
instruct storeImmL0(memory mem, immL0 zero)
%{
predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
match(Set mem (StoreL mem zero));
ins_cost(125); // XXX
format %{ "movq $mem, R12\t# long (R12_heapbase==0)" %}
ins_encode %{
__ movq($mem$$Address, r12);
%}
ins_pipe(ialu_mem_reg);
%}
instruct storeImmL(memory mem, immL32 src) instruct storeImmL(memory mem, immL32 src)
%{ %{
match(Set mem (StoreL mem src)); match(Set mem (StoreL mem src));
@ -6846,6 +7102,19 @@ instruct storeImmL(memory mem, immL32 src)
%} %}
// Store Short/Char Immediate // Store Short/Char Immediate
instruct storeImmC0(memory mem, immI0 zero)
%{
predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
match(Set mem (StoreC mem zero));
ins_cost(125); // XXX
format %{ "movw $mem, R12\t# short/char (R12_heapbase==0)" %}
ins_encode %{
__ movw($mem$$Address, r12);
%}
ins_pipe(ialu_mem_reg);
%}
instruct storeImmI16(memory mem, immI16 src) instruct storeImmI16(memory mem, immI16 src)
%{ %{
predicate(UseStoreImmI16); predicate(UseStoreImmI16);
@ -6859,6 +7128,19 @@ instruct storeImmI16(memory mem, immI16 src)
%} %}
// Store Byte Immediate // Store Byte Immediate
instruct storeImmB0(memory mem, immI0 zero)
%{
predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
match(Set mem (StoreB mem zero));
ins_cost(125); // XXX
format %{ "movb $mem, R12\t# short/char (R12_heapbase==0)" %}
ins_encode %{
__ movb($mem$$Address, r12);
%}
ins_pipe(ialu_mem_reg);
%}
instruct storeImmB(memory mem, immI8 src) instruct storeImmB(memory mem, immI8 src)
%{ %{
match(Set mem (StoreB mem src)); match(Set mem (StoreB mem src));
@ -6898,6 +7180,19 @@ instruct storeA2I(memory mem, regD src) %{
%} %}
// Store CMS card-mark Immediate // Store CMS card-mark Immediate
instruct storeImmCM0_reg(memory mem, immI0 zero)
%{
predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
match(Set mem (StoreCM mem zero));
ins_cost(125); // XXX
format %{ "movb $mem, R12\t# CMS card-mark byte 0 (R12_heapbase==0)" %}
ins_encode %{
__ movb($mem$$Address, r12);
%}
ins_pipe(ialu_mem_reg);
%}
instruct storeImmCM0(memory mem, immI0 src) instruct storeImmCM0(memory mem, immI0 src)
%{ %{
match(Set mem (StoreCM mem src)); match(Set mem (StoreCM mem src));
@ -6931,6 +7226,19 @@ instruct storeF(memory mem, regF src)
%} %}
// Store immediate Float value (it is faster than store from XMM register) // Store immediate Float value (it is faster than store from XMM register)
instruct storeF0(memory mem, immF0 zero)
%{
predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
match(Set mem (StoreF mem zero));
ins_cost(25); // XXX
format %{ "movl $mem, R12\t# float 0. (R12_heapbase==0)" %}
ins_encode %{
__ movl($mem$$Address, r12);
%}
ins_pipe(ialu_mem_reg);
%}
instruct storeF_imm(memory mem, immF src) instruct storeF_imm(memory mem, immF src)
%{ %{
match(Set mem (StoreF mem src)); match(Set mem (StoreF mem src));
@ -6957,6 +7265,7 @@ instruct storeD(memory mem, regD src)
// Store immediate double 0.0 (it is faster than store from XMM register) // Store immediate double 0.0 (it is faster than store from XMM register)
instruct storeD0_imm(memory mem, immD0 src) instruct storeD0_imm(memory mem, immD0 src)
%{ %{
predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
match(Set mem (StoreD mem src)); match(Set mem (StoreD mem src));
ins_cost(50); ins_cost(50);
@ -6966,6 +7275,19 @@ instruct storeD0_imm(memory mem, immD0 src)
ins_pipe(ialu_mem_imm); ins_pipe(ialu_mem_imm);
%} %}
instruct storeD0(memory mem, immD0 zero)
%{
predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
match(Set mem (StoreD mem zero));
ins_cost(25); // XXX
format %{ "movq $mem, R12\t# double 0. (R12_heapbase==0)" %}
ins_encode %{
__ movq($mem$$Address, r12);
%}
ins_pipe(ialu_mem_reg);
%}
instruct storeSSI(stackSlotI dst, rRegI src) instruct storeSSI(stackSlotI dst, rRegI src)
%{ %{
match(Set dst src); match(Set dst src);
@ -7077,6 +7399,56 @@ instruct storeL_reversed(memory dst, rRegL src) %{
ins_pipe( ialu_mem_reg ); ins_pipe( ialu_mem_reg );
%} %}
//---------- Population Count Instructions -------------------------------------
instruct popCountI(rRegI dst, rRegI src) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI src));
format %{ "popcnt $dst, $src" %}
ins_encode %{
__ popcntl($dst$$Register, $src$$Register);
%}
ins_pipe(ialu_reg);
%}
instruct popCountI_mem(rRegI dst, memory mem) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountI (LoadI mem)));
format %{ "popcnt $dst, $mem" %}
ins_encode %{
__ popcntl($dst$$Register, $mem$$Address);
%}
ins_pipe(ialu_reg);
%}
// Note: Long.bitCount(long) returns an int.
instruct popCountL(rRegI dst, rRegL src) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL src));
format %{ "popcnt $dst, $src" %}
ins_encode %{
__ popcntq($dst$$Register, $src$$Register);
%}
ins_pipe(ialu_reg);
%}
// Note: Long.bitCount(long) returns an int.
instruct popCountL_mem(rRegI dst, memory mem) %{
predicate(UsePopCountInstruction);
match(Set dst (PopCountL (LoadL mem)));
format %{ "popcnt $dst, $mem" %}
ins_encode %{
__ popcntq($dst$$Register, $mem$$Address);
%}
ins_pipe(ialu_reg);
%}
//----------MemBar Instructions----------------------------------------------- //----------MemBar Instructions-----------------------------------------------
// Memory barrier flavors // Memory barrier flavors
@ -7192,9 +7564,7 @@ instruct encodeHeapOop_not_null(rRegN dst, rRegP src, rFlagsReg cr) %{
effect(KILL cr); effect(KILL cr);
format %{ "encode_heap_oop_not_null $dst,$src" %} format %{ "encode_heap_oop_not_null $dst,$src" %}
ins_encode %{ ins_encode %{
Register s = $src$$Register; __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
Register d = $dst$$Register;
__ encode_heap_oop_not_null(d, s);
%} %}
ins_pipe(ialu_reg_long); ins_pipe(ialu_reg_long);
%} %}
@ -7224,7 +7594,11 @@ instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
ins_encode %{ ins_encode %{
Register s = $src$$Register; Register s = $src$$Register;
Register d = $dst$$Register; Register d = $dst$$Register;
if (s != d) {
__ decode_heap_oop_not_null(d, s); __ decode_heap_oop_not_null(d, s);
} else {
__ decode_heap_oop_not_null(d);
}
%} %}
ins_pipe(ialu_reg_long); ins_pipe(ialu_reg_long);
%} %}
@ -11389,8 +11763,9 @@ instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
// This will generate a signed flags result. This should be OK since // This will generate a signed flags result. This should be OK since
// any compare to a zero should be eq/neq. // any compare to a zero should be eq/neq.
instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero) instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
%{ %{
predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
match(Set cr (CmpP (LoadP op) zero)); match(Set cr (CmpP (LoadP op) zero));
ins_cost(500); // XXX ins_cost(500); // XXX
@ -11401,13 +11776,24 @@ instruct testP_reg_mem(rFlagsReg cr, memory op, immP0 zero)
ins_pipe(ialu_cr_reg_imm); ins_pipe(ialu_cr_reg_imm);
%} %}
instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
%{
predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL));
match(Set cr (CmpP (LoadP mem) zero));
format %{ "cmpq R12, $mem\t# ptr (R12_heapbase==0)" %}
ins_encode %{
__ cmpq(r12, $mem$$Address);
%}
ins_pipe(ialu_cr_reg_mem);
%}
instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2) instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
%{ %{
match(Set cr (CmpN op1 op2)); match(Set cr (CmpN op1 op2));
format %{ "cmpl $op1, $op2\t# compressed ptr" %} format %{ "cmpl $op1, $op2\t# compressed ptr" %}
ins_encode %{ __ cmpl(as_Register($op1$$reg), as_Register($op2$$reg)); %} ins_encode %{ __ cmpl($op1$$Register, $op2$$Register); %}
ins_pipe(ialu_cr_reg_reg); ins_pipe(ialu_cr_reg_reg);
%} %}
@ -11415,11 +11801,30 @@ instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
%{ %{
match(Set cr (CmpN src (LoadN mem))); match(Set cr (CmpN src (LoadN mem)));
ins_cost(500); // XXX format %{ "cmpl $src, $mem\t# compressed ptr" %}
format %{ "cmpl $src, mem\t# compressed ptr" %}
ins_encode %{ ins_encode %{
Address adr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); __ cmpl($src$$Register, $mem$$Address);
__ cmpl(as_Register($src$$reg), adr); %}
ins_pipe(ialu_cr_reg_mem);
%}
instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
match(Set cr (CmpN op1 op2));
format %{ "cmpl $op1, $op2\t# compressed ptr" %}
ins_encode %{
__ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant);
%}
ins_pipe(ialu_cr_reg_imm);
%}
instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
%{
match(Set cr (CmpN src (LoadN mem)));
format %{ "cmpl $mem, $src\t# compressed ptr" %}
ins_encode %{
__ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant);
%} %}
ins_pipe(ialu_cr_reg_mem); ins_pipe(ialu_cr_reg_mem);
%} %}
@ -11432,15 +11837,27 @@ instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
ins_pipe(ialu_cr_reg_imm); ins_pipe(ialu_cr_reg_imm);
%} %}
instruct testN_reg_mem(rFlagsReg cr, memory mem, immN0 zero) instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
%{ %{
predicate(Universe::narrow_oop_base() != NULL);
match(Set cr (CmpN (LoadN mem) zero)); match(Set cr (CmpN (LoadN mem) zero));
ins_cost(500); // XXX ins_cost(500); // XXX
format %{ "testl $mem, 0xffffffff\t# compressed ptr" %} format %{ "testl $mem, 0xffffffff\t# compressed ptr" %}
ins_encode %{ ins_encode %{
Address addr = build_address($mem$$base, $mem$$index, $mem$$scale, $mem$$disp); __ cmpl($mem$$Address, (int)0xFFFFFFFF);
__ cmpl(addr, (int)0xFFFFFFFF); %}
ins_pipe(ialu_cr_reg_mem);
%}
instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
%{
predicate(Universe::narrow_oop_base() == NULL);
match(Set cr (CmpN (LoadN mem) zero));
format %{ "cmpl R12, $mem\t# compressed ptr (R12_heapbase==0)" %}
ins_encode %{
__ cmpl(r12, $mem$$Address);
%} %}
ins_pipe(ialu_cr_reg_mem); ins_pipe(ialu_cr_reg_mem);
%} %}
@ -11472,7 +11889,6 @@ instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
%{ %{
match(Set cr (CmpL op1 (LoadL op2))); match(Set cr (CmpL op1 (LoadL op2)));
ins_cost(500); // XXX
format %{ "cmpq $op1, $op2" %} format %{ "cmpq $op1, $op2" %}
opcode(0x3B); /* Opcode 3B /r */ opcode(0x3B); /* Opcode 3B /r */
ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2)); ins_encode(REX_reg_mem_wide(op1, op2), OpcP, reg_mem(op1, op2));
@ -11733,15 +12149,12 @@ instruct partialSubtypeCheck(rdi_RegP result,
effect(KILL rcx, KILL cr); effect(KILL rcx, KILL cr);
ins_cost(1100); // slightly larger than the next version ins_cost(1100); // slightly larger than the next version
format %{ "cmpq rax, rsi\n\t" format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
"jeq,s hit\n\t"
"movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
"movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
"addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
"repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t" "repne scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
"jne,s miss\t\t# Missed: rdi not-zero\n\t" "jne,s miss\t\t# Missed: rdi not-zero\n\t"
"movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t" "movq [$sub + (sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes())], $super\t# Hit: update cache\n\t"
"hit:\n\t"
"xorq $result, $result\t\t Hit: rdi zero\n\t" "xorq $result, $result\t\t Hit: rdi zero\n\t"
"miss:\t" %} "miss:\t" %}
@ -11756,13 +12169,10 @@ instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
rdi_RegP result) rdi_RegP result)
%{ %{
match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
predicate(!UseCompressedOops); // decoding oop kills condition codes
effect(KILL rcx, KILL result); effect(KILL rcx, KILL result);
ins_cost(1000); ins_cost(1000);
format %{ "cmpq rax, rsi\n\t" format %{ "movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
"jeq,s miss\t# Actually a hit; we are done.\n\t"
"movq rdi, [$sub + (sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes())]\n\t"
"movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t" "movl rcx, [rdi + arrayOopDesc::length_offset_in_bytes()]\t# length to scan\n\t"
"addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t" "addq rdi, arrayOopDex::base_offset_in_bytes(T_OBJECT)\t# Skip to start of data; set NZ in case count is zero\n\t"
"repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t" "repne scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"

View file

@ -2582,7 +2582,7 @@ bool os::large_page_init() {
#define SHM_HUGETLB 04000 #define SHM_HUGETLB 04000
#endif #endif
char* os::reserve_memory_special(size_t bytes) { char* os::reserve_memory_special(size_t bytes, char* req_addr) {
assert(UseLargePages, "only for large pages"); assert(UseLargePages, "only for large pages");
key_t key = IPC_PRIVATE; key_t key = IPC_PRIVATE;

View file

@ -249,6 +249,10 @@ int generateJvmOffsets(GEN_variant gen_variant) {
printf("\n"); printf("\n");
GEN_OFFS(NarrowOopStruct, _base);
GEN_OFFS(NarrowOopStruct, _shift);
printf("\n");
GEN_VALUE(SIZE_HeapBlockHeader, sizeof(HeapBlock::Header)); GEN_VALUE(SIZE_HeapBlockHeader, sizeof(HeapBlock::Header));
GEN_SIZE(oopDesc); GEN_SIZE(oopDesc);
GEN_SIZE(constantPoolOopDesc); GEN_SIZE(constantPoolOopDesc);

View file

@ -46,7 +46,10 @@ extern pointer __JvmOffsets;
extern pointer __1cJCodeCacheF_heap_; extern pointer __1cJCodeCacheF_heap_;
extern pointer __1cIUniverseP_methodKlassObj_; extern pointer __1cIUniverseP_methodKlassObj_;
extern pointer __1cIUniverseO_collectedHeap_; extern pointer __1cIUniverseO_collectedHeap_;
extern pointer __1cIUniverseK_heap_base_; extern pointer __1cIUniverseL_narrow_oop_;
#ifdef _LP64
extern pointer UseCompressedOops;
#endif
extern pointer __1cHnmethodG__vtbl_; extern pointer __1cHnmethodG__vtbl_;
extern pointer __1cKBufferBlobG__vtbl_; extern pointer __1cKBufferBlobG__vtbl_;
@ -56,6 +59,7 @@ extern pointer __1cKBufferBlobG__vtbl_;
#define copyin_uint16(ADDR) *(uint16_t*) copyin((pointer) (ADDR), sizeof(uint16_t)) #define copyin_uint16(ADDR) *(uint16_t*) copyin((pointer) (ADDR), sizeof(uint16_t))
#define copyin_uint32(ADDR) *(uint32_t*) copyin((pointer) (ADDR), sizeof(uint32_t)) #define copyin_uint32(ADDR) *(uint32_t*) copyin((pointer) (ADDR), sizeof(uint32_t))
#define copyin_int32(ADDR) *(int32_t*) copyin((pointer) (ADDR), sizeof(int32_t)) #define copyin_int32(ADDR) *(int32_t*) copyin((pointer) (ADDR), sizeof(int32_t))
#define copyin_uint8(ADDR) *(uint8_t*) copyin((pointer) (ADDR), sizeof(uint8_t))
#define SAME(x) x #define SAME(x) x
#define copyin_offset(JVM_CONST) JVM_CONST = \ #define copyin_offset(JVM_CONST) JVM_CONST = \
@ -132,6 +136,9 @@ dtrace:helper:ustack:
copyin_offset(SIZE_oopDesc); copyin_offset(SIZE_oopDesc);
copyin_offset(SIZE_constantPoolOopDesc); copyin_offset(SIZE_constantPoolOopDesc);
copyin_offset(OFFSET_NarrowOopStruct_base);
copyin_offset(OFFSET_NarrowOopStruct_shift);
/* /*
* The PC to translate is in arg0. * The PC to translate is in arg0.
*/ */
@ -151,9 +158,19 @@ dtrace:helper:ustack:
this->Universe_methodKlassOop = copyin_ptr(&``__1cIUniverseP_methodKlassObj_); this->Universe_methodKlassOop = copyin_ptr(&``__1cIUniverseP_methodKlassObj_);
this->CodeCache_heap_address = copyin_ptr(&``__1cJCodeCacheF_heap_); this->CodeCache_heap_address = copyin_ptr(&``__1cJCodeCacheF_heap_);
this->Universe_heap_base = copyin_ptr(&``__1cIUniverseK_heap_base_);
/* Reading volatile values */ /* Reading volatile values */
#ifdef _LP64
this->Use_Compressed_Oops = copyin_uint8(&``UseCompressedOops);
#else
this->Use_Compressed_Oops = 0;
#endif
this->Universe_narrow_oop_base = copyin_ptr(&``__1cIUniverseL_narrow_oop_ +
OFFSET_NarrowOopStruct_base);
this->Universe_narrow_oop_shift = copyin_int32(&``__1cIUniverseL_narrow_oop_ +
OFFSET_NarrowOopStruct_shift);
this->CodeCache_low = copyin_ptr(this->CodeCache_heap_address + this->CodeCache_low = copyin_ptr(this->CodeCache_heap_address +
OFFSET_CodeHeap_memory + OFFSET_VirtualSpace_low); OFFSET_CodeHeap_memory + OFFSET_VirtualSpace_low);
@ -295,7 +312,7 @@ dtrace:helper:ustack:
dtrace:helper:ustack: dtrace:helper:ustack:
/!this->done && this->vtbl == this->BufferBlob_vtbl && /!this->done && this->vtbl == this->BufferBlob_vtbl &&
this->Universe_heap_base == NULL && this->Use_Compressed_Oops == 0 &&
this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/ this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
{ {
MARK_LINE; MARK_LINE;
@ -306,7 +323,7 @@ this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
dtrace:helper:ustack: dtrace:helper:ustack:
/!this->done && this->vtbl == this->BufferBlob_vtbl && /!this->done && this->vtbl == this->BufferBlob_vtbl &&
this->Universe_heap_base != NULL && this->Use_Compressed_Oops != 0 &&
this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/ this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
{ {
MARK_LINE; MARK_LINE;
@ -314,8 +331,8 @@ this->methodOopPtr > this->heap_start && this->methodOopPtr < this->heap_end/
* Read compressed pointer and decode heap oop, same as oop.inline.hpp * Read compressed pointer and decode heap oop, same as oop.inline.hpp
*/ */
this->cklass = copyin_uint32(this->methodOopPtr + OFFSET_oopDesc_metadata); this->cklass = copyin_uint32(this->methodOopPtr + OFFSET_oopDesc_metadata);
this->klass = (uint64_t)((uintptr_t)this->Universe_heap_base + this->klass = (uint64_t)((uintptr_t)this->Universe_narrow_oop_base +
((uintptr_t)this->cklass << 3)); ((uintptr_t)this->cklass << this->Universe_narrow_oop_shift));
this->methodOop = this->klass == this->Universe_methodKlassOop; this->methodOop = this->klass == this->Universe_methodKlassOop;
this->done = !this->methodOop; this->done = !this->methodOop;
} }

View file

@ -146,13 +146,17 @@ struct jvm_agent {
uint64_t BufferBlob_vtbl; uint64_t BufferBlob_vtbl;
uint64_t RuntimeStub_vtbl; uint64_t RuntimeStub_vtbl;
uint64_t Use_Compressed_Oops_address;
uint64_t Universe_methodKlassObj_address; uint64_t Universe_methodKlassObj_address;
uint64_t Universe_narrow_oop_base_address;
uint64_t Universe_narrow_oop_shift_address;
uint64_t CodeCache_heap_address; uint64_t CodeCache_heap_address;
uint64_t Universe_heap_base_address;
/* Volatiles */ /* Volatiles */
uint8_t Use_Compressed_Oops;
uint64_t Universe_methodKlassObj; uint64_t Universe_methodKlassObj;
uint64_t Universe_heap_base; uint64_t Universe_narrow_oop_base;
uint32_t Universe_narrow_oop_shift;
uint64_t CodeCache_low; uint64_t CodeCache_low;
uint64_t CodeCache_high; uint64_t CodeCache_high;
uint64_t CodeCache_segmap_low; uint64_t CodeCache_segmap_low;
@ -279,8 +283,11 @@ static int parse_vmstructs(jvm_agent_t* J) {
if (strcmp("_methodKlassObj", vmp->fieldName) == 0) { if (strcmp("_methodKlassObj", vmp->fieldName) == 0) {
J->Universe_methodKlassObj_address = vmp->address; J->Universe_methodKlassObj_address = vmp->address;
} }
if (strcmp("_heap_base", vmp->fieldName) == 0) { if (strcmp("_narrow_oop._base", vmp->fieldName) == 0) {
J->Universe_heap_base_address = vmp->address; J->Universe_narrow_oop_base_address = vmp->address;
}
if (strcmp("_narrow_oop._shift", vmp->fieldName) == 0) {
J->Universe_narrow_oop_shift_address = vmp->address;
} }
} }
CHECK_FAIL(err); CHECK_FAIL(err);
@ -298,14 +305,39 @@ static int parse_vmstructs(jvm_agent_t* J) {
return -1; return -1;
} }
static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) {
psaddr_t sym_addr;
int err;
err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
if (err != PS_OK) goto fail;
*valuep = sym_addr;
return PS_OK;
fail:
return err;
}
static int read_volatiles(jvm_agent_t* J) { static int read_volatiles(jvm_agent_t* J) {
uint64_t ptr; uint64_t ptr;
int err; int err;
err = find_symbol(J, "UseCompressedOops", &J->Use_Compressed_Oops_address);
if (err == PS_OK) {
err = ps_pread(J->P, J->Use_Compressed_Oops_address, &J->Use_Compressed_Oops, sizeof(uint8_t));
CHECK_FAIL(err);
} else {
J->Use_Compressed_Oops = 0;
}
err = read_pointer(J, J->Universe_methodKlassObj_address, &J->Universe_methodKlassObj); err = read_pointer(J, J->Universe_methodKlassObj_address, &J->Universe_methodKlassObj);
CHECK_FAIL(err); CHECK_FAIL(err);
err = read_pointer(J, J->Universe_heap_base_address, &J->Universe_heap_base);
err = read_pointer(J, J->Universe_narrow_oop_base_address, &J->Universe_narrow_oop_base);
CHECK_FAIL(err); CHECK_FAIL(err);
err = ps_pread(J->P, J->Universe_narrow_oop_shift_address, &J->Universe_narrow_oop_shift, sizeof(uint32_t));
CHECK_FAIL(err);
err = read_pointer(J, J->CodeCache_heap_address + OFFSET_CodeHeap_memory + err = read_pointer(J, J->CodeCache_heap_address + OFFSET_CodeHeap_memory +
OFFSET_VirtualSpace_low, &J->CodeCache_low); OFFSET_VirtualSpace_low, &J->CodeCache_low);
CHECK_FAIL(err); CHECK_FAIL(err);
@ -374,19 +406,6 @@ static int find_start(jvm_agent_t* J, uint64_t ptr, uint64_t *startp) {
return -1; return -1;
} }
static int find_symbol(jvm_agent_t* J, const char *name, uint64_t* valuep) {
psaddr_t sym_addr;
int err;
err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
if (err != PS_OK) goto fail;
*valuep = sym_addr;
return PS_OK;
fail:
return err;
}
static int find_jlong_constant(jvm_agent_t* J, const char *name, uint64_t* valuep) { static int find_jlong_constant(jvm_agent_t* J, const char *name, uint64_t* valuep) {
psaddr_t sym_addr; psaddr_t sym_addr;
int err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr); int err = ps_pglobal_lookup(J->P, LIBJVM_SO, name, &sym_addr);
@ -458,14 +477,14 @@ void Jagent_destroy(jvm_agent_t *J) {
static int is_methodOop(jvm_agent_t* J, uint64_t methodOopPtr) { static int is_methodOop(jvm_agent_t* J, uint64_t methodOopPtr) {
uint64_t klass; uint64_t klass;
int err; int err;
// If heap_base is nonnull, this was a compressed oop. // If UseCompressedOops, this was a compressed oop.
if (J->Universe_heap_base != NULL) { if (J->Use_Compressed_Oops != 0) {
uint32_t cklass; uint32_t cklass;
err = read_compressed_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata, err = read_compressed_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata,
&cklass); &cklass);
// decode heap oop, same as oop.inline.hpp // decode heap oop, same as oop.inline.hpp
klass = (uint64_t)((uintptr_t)J->Universe_heap_base + klass = (uint64_t)((uintptr_t)J->Universe_narrow_oop_base +
((uintptr_t)cklass << 3)); ((uintptr_t)cklass << J->Universe_narrow_oop_shift));
} else { } else {
err = read_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata, &klass); err = read_pointer(J, methodOopPtr + OFFSET_oopDesc_metadata, &klass);
} }

View file

@ -3220,7 +3220,7 @@ bool os::Solaris::set_mpss_range(caddr_t start, size_t bytes, size_t align) {
return true; return true;
} }
char* os::reserve_memory_special(size_t bytes) { char* os::reserve_memory_special(size_t bytes, char* addr) {
assert(UseLargePages && UseISM, "only for ISM large pages"); assert(UseLargePages && UseISM, "only for ISM large pages");
size_t size = bytes; size_t size = bytes;
@ -4451,6 +4451,9 @@ int_fnP_thread_t_i os::Solaris::_thr_setmutator;
int_fnP_thread_t os::Solaris::_thr_suspend_mutator; int_fnP_thread_t os::Solaris::_thr_suspend_mutator;
int_fnP_thread_t os::Solaris::_thr_continue_mutator; int_fnP_thread_t os::Solaris::_thr_continue_mutator;
// (Static) wrapper for getisax(2) call.
os::Solaris::getisax_func_t os::Solaris::_getisax = 0;
// (Static) wrappers for the liblgrp API // (Static) wrappers for the liblgrp API
os::Solaris::lgrp_home_func_t os::Solaris::_lgrp_home; os::Solaris::lgrp_home_func_t os::Solaris::_lgrp_home;
os::Solaris::lgrp_init_func_t os::Solaris::_lgrp_init; os::Solaris::lgrp_init_func_t os::Solaris::_lgrp_init;
@ -4465,17 +4468,20 @@ os::Solaris::lgrp_cookie_t os::Solaris::_lgrp_cookie = 0;
// (Static) wrapper for meminfo() call. // (Static) wrapper for meminfo() call.
os::Solaris::meminfo_func_t os::Solaris::_meminfo = 0; os::Solaris::meminfo_func_t os::Solaris::_meminfo = 0;
static address resolve_symbol(const char *name) { static address resolve_symbol_lazy(const char* name) {
address addr; address addr = (address) dlsym(RTLD_DEFAULT, name);
addr = (address) dlsym(RTLD_DEFAULT, name);
if(addr == NULL) { if(addr == NULL) {
// RTLD_DEFAULT was not defined on some early versions of 2.5.1 // RTLD_DEFAULT was not defined on some early versions of 2.5.1
addr = (address) dlsym(RTLD_NEXT, name); addr = (address) dlsym(RTLD_NEXT, name);
}
return addr;
}
static address resolve_symbol(const char* name) {
address addr = resolve_symbol_lazy(name);
if(addr == NULL) { if(addr == NULL) {
fatal(dlerror()); fatal(dlerror());
} }
}
return addr; return addr;
} }
@ -4673,15 +4679,26 @@ bool os::Solaris::liblgrp_init() {
} }
void os::Solaris::misc_sym_init() { void os::Solaris::misc_sym_init() {
address func = (address)dlsym(RTLD_DEFAULT, "meminfo"); address func;
if(func == NULL) {
func = (address) dlsym(RTLD_NEXT, "meminfo"); // getisax
func = resolve_symbol_lazy("getisax");
if (func != NULL) {
os::Solaris::_getisax = CAST_TO_FN_PTR(getisax_func_t, func);
} }
// meminfo
func = resolve_symbol_lazy("meminfo");
if (func != NULL) { if (func != NULL) {
os::Solaris::set_meminfo(CAST_TO_FN_PTR(meminfo_func_t, func)); os::Solaris::set_meminfo(CAST_TO_FN_PTR(meminfo_func_t, func));
} }
} }
uint_t os::Solaris::getisax(uint32_t* array, uint_t n) {
assert(_getisax != NULL, "_getisax not set");
return _getisax(array, n);
}
// Symbol doesn't exist in Solaris 8 pset.h // Symbol doesn't exist in Solaris 8 pset.h
#ifndef PS_MYID #ifndef PS_MYID
#define PS_MYID -3 #define PS_MYID -3
@ -4716,6 +4733,10 @@ void os::init(void) {
Solaris::initialize_system_info(); Solaris::initialize_system_info();
// Initialize misc. symbols as soon as possible, so we can use them
// if we need them.
Solaris::misc_sym_init();
int fd = open("/dev/zero", O_RDWR); int fd = open("/dev/zero", O_RDWR);
if (fd < 0) { if (fd < 0) {
fatal1("os::init: cannot open /dev/zero (%s)", strerror(errno)); fatal1("os::init: cannot open /dev/zero (%s)", strerror(errno));
@ -4857,7 +4878,6 @@ jint os::init_2(void) {
} }
} }
Solaris::misc_sym_init();
Solaris::signal_sets_init(); Solaris::signal_sets_init();
Solaris::init_signal_mem(); Solaris::init_signal_mem();
Solaris::install_signal_handlers(); Solaris::install_signal_handlers();

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -72,6 +72,8 @@ class Solaris {
LGRP_VIEW_OS /* what's available to operating system */ LGRP_VIEW_OS /* what's available to operating system */
} lgrp_view_t; } lgrp_view_t;
typedef uint_t (*getisax_func_t)(uint32_t* array, uint_t n);
typedef lgrp_id_t (*lgrp_home_func_t)(idtype_t idtype, id_t id); typedef lgrp_id_t (*lgrp_home_func_t)(idtype_t idtype, id_t id);
typedef lgrp_cookie_t (*lgrp_init_func_t)(lgrp_view_t view); typedef lgrp_cookie_t (*lgrp_init_func_t)(lgrp_view_t view);
typedef int (*lgrp_fini_func_t)(lgrp_cookie_t cookie); typedef int (*lgrp_fini_func_t)(lgrp_cookie_t cookie);
@ -87,6 +89,8 @@ class Solaris {
const uint_t info_req[], int info_count, const uint_t info_req[], int info_count,
uint64_t outdata[], uint_t validity[]); uint64_t outdata[], uint_t validity[]);
static getisax_func_t _getisax;
static lgrp_home_func_t _lgrp_home; static lgrp_home_func_t _lgrp_home;
static lgrp_init_func_t _lgrp_init; static lgrp_init_func_t _lgrp_init;
static lgrp_fini_func_t _lgrp_fini; static lgrp_fini_func_t _lgrp_fini;
@ -283,6 +287,9 @@ class Solaris {
} }
static lgrp_cookie_t lgrp_cookie() { return _lgrp_cookie; } static lgrp_cookie_t lgrp_cookie() { return _lgrp_cookie; }
static bool supports_getisax() { return _getisax != NULL; }
static uint_t getisax(uint32_t* array, uint_t n);
static void set_meminfo(meminfo_func_t func) { _meminfo = func; } static void set_meminfo(meminfo_func_t func) { _meminfo = func; }
static int meminfo (const uint64_t inaddr[], int addr_count, static int meminfo (const uint64_t inaddr[], int addr_count,
const uint_t info_req[], int info_count, const uint_t info_req[], int info_count,

View file

@ -2595,7 +2595,7 @@ bool os::can_execute_large_page_memory() {
return true; return true;
} }
char* os::reserve_memory_special(size_t bytes) { char* os::reserve_memory_special(size_t bytes, char* addr) {
if (UseLargePagesIndividualAllocation) { if (UseLargePagesIndividualAllocation) {
if (TracePageSizes && Verbose) { if (TracePageSizes && Verbose) {
@ -2615,7 +2615,7 @@ char* os::reserve_memory_special(size_t bytes) {
"use -XX:-UseLargePagesIndividualAllocation to turn off"); "use -XX:-UseLargePagesIndividualAllocation to turn off");
return NULL; return NULL;
} }
p_buf = (char *) VirtualAlloc(NULL, p_buf = (char *) VirtualAlloc(addr,
size_of_reserve, // size of Reserve size_of_reserve, // size of Reserve
MEM_RESERVE, MEM_RESERVE,
PAGE_EXECUTE_READWRITE); PAGE_EXECUTE_READWRITE);

View file

@ -30,5 +30,7 @@
define_pd_global(uintx, JVMInvokeMethodSlack, 12288); define_pd_global(uintx, JVMInvokeMethodSlack, 12288);
define_pd_global(intx, CompilerThreadStackSize, 0); define_pd_global(intx, CompilerThreadStackSize, 0);
// Only used on 64 bit platforms
define_pd_global(uintx, HeapBaseMinAddress, 4*G);
// Only used on 64 bit Windows platforms // Only used on 64 bit Windows platforms
define_pd_global(bool, UseVectoredExceptions, false); define_pd_global(bool, UseVectoredExceptions, false);

View file

@ -43,5 +43,7 @@ define_pd_global(intx, SurvivorRatio, 8);
define_pd_global(uintx, JVMInvokeMethodSlack, 8192); define_pd_global(uintx, JVMInvokeMethodSlack, 8192);
// Only used on 64 bit platforms
define_pd_global(uintx, HeapBaseMinAddress, 2*G);
// Only used on 64 bit Windows platforms // Only used on 64 bit Windows platforms
define_pd_global(bool, UseVectoredExceptions, false); define_pd_global(bool, UseVectoredExceptions, false);

View file

@ -30,5 +30,9 @@
define_pd_global(uintx, JVMInvokeMethodSlack, 12288); define_pd_global(uintx, JVMInvokeMethodSlack, 12288);
define_pd_global(intx, CompilerThreadStackSize, 0); define_pd_global(intx, CompilerThreadStackSize, 0);
// Only used on 64 bit platforms
define_pd_global(uintx, HeapBaseMinAddress, 4*G);
// Only used on 64 bit Windows platforms // Only used on 64 bit Windows platforms
define_pd_global(bool, UseVectoredExceptions, false); define_pd_global(bool, UseVectoredExceptions, false);

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 2006 Sun Microsystems, Inc. All Rights Reserved. * Copyright 2006-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -25,19 +25,75 @@
# include "incls/_precompiled.incl" # include "incls/_precompiled.incl"
# include "incls/_vm_version_solaris_sparc.cpp.incl" # include "incls/_vm_version_solaris_sparc.cpp.incl"
# include <sys/auxv.h>
# include <sys/auxv_SPARC.h>
# include <sys/systeminfo.h> # include <sys/systeminfo.h>
int VM_Version::platform_features(int features) { // We need to keep these here as long as we have to build on Solaris
// We determine what sort of hardware we have via sysinfo(SI_ISALIST, ...). // versions before 10.
// This isn't the best of all possible ways because there's not enough #ifndef SI_ARCHITECTURE_32
// detail in the isa list it returns, but it's a bit less arcane than #define SI_ARCHITECTURE_32 516 /* basic 32-bit SI_ARCHITECTURE */
// generating assembly code and an illegal instruction handler. We used #endif
// to generate a getpsr trap, but that's even more arcane.
//
// Another possibility would be to use sysinfo(SI_PLATFORM, ...), but
// that would require more knowledge here than is wise.
// isalist spec via 'man isalist' as of 01-Aug-2001 #ifndef SI_ARCHITECTURE_64
#define SI_ARCHITECTURE_64 517 /* basic 64-bit SI_ARCHITECTURE */
#endif
static void do_sysinfo(int si, const char* string, int* features, int mask) {
char tmp;
size_t bufsize = sysinfo(si, &tmp, 1);
// All SI defines used below must be supported.
guarantee(bufsize != -1, "must be supported");
char* buf = (char*) malloc(bufsize);
if (buf == NULL)
return;
if (sysinfo(si, buf, bufsize) == bufsize) {
// Compare the string.
if (strcmp(buf, string) == 0) {
*features |= mask;
}
}
free(buf);
}
int VM_Version::platform_features(int features) {
// getisax(2), SI_ARCHITECTURE_32, and SI_ARCHITECTURE_64 are
// supported on Solaris 10 and later.
if (os::Solaris::supports_getisax()) {
#ifndef PRODUCT
if (PrintMiscellaneous && Verbose)
tty->print_cr("getisax(2) supported.");
#endif
// Check 32-bit architecture.
do_sysinfo(SI_ARCHITECTURE_32, "sparc", &features, v8_instructions_m);
// Check 64-bit architecture.
do_sysinfo(SI_ARCHITECTURE_64, "sparcv9", &features, generic_v9_m);
// Extract valid instruction set extensions.
uint_t av;
uint_t avn = os::Solaris::getisax(&av, 1);
assert(avn == 1, "should only return one av");
if (av & AV_SPARC_MUL32) features |= hardware_mul32_m;
if (av & AV_SPARC_DIV32) features |= hardware_div32_m;
if (av & AV_SPARC_FSMULD) features |= hardware_fsmuld_m;
if (av & AV_SPARC_V8PLUS) features |= v9_instructions_m;
if (av & AV_SPARC_POPC) features |= hardware_popc_m;
if (av & AV_SPARC_VIS) features |= vis1_instructions_m;
if (av & AV_SPARC_VIS2) features |= vis2_instructions_m;
} else {
// getisax(2) failed, use the old legacy code.
#ifndef PRODUCT
if (PrintMiscellaneous && Verbose)
tty->print_cr("getisax(2) not supported.");
#endif
char tmp; char tmp;
size_t bufsize = sysinfo(SI_ISALIST, &tmp, 1); size_t bufsize = sysinfo(SI_ISALIST, &tmp, 1);
@ -50,8 +106,9 @@ int VM_Version::platform_features(int features) {
if (sparc_string != NULL) { features |= v8_instructions_m; if (sparc_string != NULL) { features |= v8_instructions_m;
if (sparc_string[5] == 'v') { if (sparc_string[5] == 'v') {
if (sparc_string[6] == '8') { if (sparc_string[6] == '8') {
if (sparc_string[7] == '-') features |= hardware_int_muldiv_m; if (sparc_string[7] == '-') { features |= hardware_mul32_m;
else if (sparc_string[7] == 'p') features |= generic_v9_m; features |= hardware_div32_m;
} else if (sparc_string[7] == 'p') features |= generic_v9_m;
else features |= generic_v8_m; else features |= generic_v8_m;
} else if (sparc_string[6] == '9') features |= generic_v9_m; } else if (sparc_string[6] == '9') features |= generic_v9_m;
} }
@ -65,18 +122,10 @@ int VM_Version::platform_features(int features) {
} }
free(buf); free(buf);
} }
}
bufsize = sysinfo(SI_MACHINE, &tmp, 1); // Determine the machine type.
buf = (char*)malloc(bufsize); do_sysinfo(SI_MACHINE, "sun4v", &features, sun4v_m);
if (buf != NULL) {
if (sysinfo(SI_MACHINE, buf, bufsize) == bufsize) {
if (strstr(buf, "sun4v") != NULL) {
features |= sun4v_m;
}
}
free(buf);
}
return features; return features;
} }

View file

@ -46,5 +46,7 @@ define_pd_global(uintx, JVMInvokeMethodSlack, 10*K);
define_pd_global(intx, CompilerThreadStackSize, 0); define_pd_global(intx, CompilerThreadStackSize, 0);
// Only used on 64 bit platforms
define_pd_global(uintx, HeapBaseMinAddress, 256*M);
// Only used on 64 bit Windows platforms // Only used on 64 bit Windows platforms
define_pd_global(bool, UseVectoredExceptions, false); define_pd_global(bool, UseVectoredExceptions, false);

View file

@ -45,5 +45,7 @@ define_pd_global(intx, CompilerThreadStackSize, 0);
define_pd_global(uintx, JVMInvokeMethodSlack, 8192); define_pd_global(uintx, JVMInvokeMethodSlack, 8192);
// Only used on 64 bit platforms
define_pd_global(uintx, HeapBaseMinAddress, 2*G);
// Only used on 64 bit Windows platforms // Only used on 64 bit Windows platforms
define_pd_global(bool, UseVectoredExceptions, false); define_pd_global(bool, UseVectoredExceptions, false);

View file

@ -68,6 +68,9 @@ typedef struct _DISPATCHER_CONTEXT {
PVOID HandlerData; PVOID HandlerData;
} DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT; } DISPATCHER_CONTEXT, *PDISPATCHER_CONTEXT;
#if MSC_VER < 1500
/* Not needed for VS2008 compiler, comes from winnt.h. */
typedef EXCEPTION_DISPOSITION (*PEXCEPTION_ROUTINE) ( typedef EXCEPTION_DISPOSITION (*PEXCEPTION_ROUTINE) (
IN PEXCEPTION_RECORD ExceptionRecord, IN PEXCEPTION_RECORD ExceptionRecord,
IN ULONG64 EstablisherFrame, IN ULONG64 EstablisherFrame,
@ -75,4 +78,6 @@ typedef EXCEPTION_DISPOSITION (*PEXCEPTION_ROUTINE) (
IN OUT PDISPATCHER_CONTEXT DispatcherContext IN OUT PDISPATCHER_CONTEXT DispatcherContext
); );
#endif
#endif // AMD64 #endif // AMD64

View file

@ -44,7 +44,7 @@ using namespace std;
#error "Something is wrong with the detection of MSC_VER in the makefiles" #error "Something is wrong with the detection of MSC_VER in the makefiles"
#endif #endif
#if _MSC_VER >= 1400 && !defined(_WIN64) #if _MSC_VER >= 1400
#define strdup _strdup #define strdup _strdup
#endif #endif

View file

@ -321,16 +321,19 @@ void AbstractAssembler::block_comment(const char* comment) {
bool MacroAssembler::needs_explicit_null_check(intptr_t offset) { bool MacroAssembler::needs_explicit_null_check(intptr_t offset) {
// Exception handler checks the nmethod's implicit null checks table // Exception handler checks the nmethod's implicit null checks table
// only when this method returns false. // only when this method returns false.
if (UseCompressedOops) { #ifdef _LP64
if (UseCompressedOops && Universe::narrow_oop_base() != NULL) {
assert (Universe::heap() != NULL, "java heap should be initialized");
// The first page after heap_base is unmapped and // The first page after heap_base is unmapped and
// the 'offset' is equal to [heap_base + offset] for // the 'offset' is equal to [heap_base + offset] for
// narrow oop implicit null checks. // narrow oop implicit null checks.
uintptr_t heap_base = (uintptr_t)Universe::heap_base(); uintptr_t base = (uintptr_t)Universe::narrow_oop_base();
if ((uintptr_t)offset >= heap_base) { if ((uintptr_t)offset >= base) {
// Normalize offset for the next check. // Normalize offset for the next check.
offset = (intptr_t)(pointer_delta((void*)offset, (void*)heap_base, 1)); offset = (intptr_t)(pointer_delta((void*)offset, (void*)base, 1));
} }
} }
#endif
return offset < 0 || os::vm_page_size() <= offset; return offset < 0 || os::vm_page_size() <= offset;
} }

View file

@ -284,6 +284,11 @@ ciMethodBlocks::ciMethodBlocks(Arena *arena, ciMethod *meth): _method(meth),
// //
int ex_start = handler->start(); int ex_start = handler->start();
int ex_end = handler->limit(); int ex_end = handler->limit();
// ensure a block at the start of exception range and start of following code
(void) make_block_at(ex_start);
if (ex_end < _code_size)
(void) make_block_at(ex_end);
if (eb->is_handler()) { if (eb->is_handler()) {
// Extend old handler exception range to cover additional range. // Extend old handler exception range to cover additional range.
int old_ex_start = eb->ex_start_bci(); int old_ex_start = eb->ex_start_bci();
@ -295,10 +300,6 @@ ciMethodBlocks::ciMethodBlocks(Arena *arena, ciMethod *meth): _method(meth),
eb->clear_exception_handler(); // Reset exception information eb->clear_exception_handler(); // Reset exception information
} }
eb->set_exception_range(ex_start, ex_end); eb->set_exception_range(ex_start, ex_end);
// ensure a block at the start of exception range and start of following code
(void) make_block_at(ex_start);
if (ex_end < _code_size)
(void) make_block_at(ex_end);
} }
} }

View file

@ -284,6 +284,7 @@
template(value_name, "value") \ template(value_name, "value") \
template(frontCacheEnabled_name, "frontCacheEnabled") \ template(frontCacheEnabled_name, "frontCacheEnabled") \
template(stringCacheEnabled_name, "stringCacheEnabled") \ template(stringCacheEnabled_name, "stringCacheEnabled") \
template(bitCount_name, "bitCount") \
\ \
/* non-intrinsic name/signature pairs: */ \ /* non-intrinsic name/signature pairs: */ \
template(register_method_name, "register") \ template(register_method_name, "register") \
@ -304,6 +305,7 @@
template(double_long_signature, "(D)J") \ template(double_long_signature, "(D)J") \
template(double_double_signature, "(D)D") \ template(double_double_signature, "(D)D") \
template(int_float_signature, "(I)F") \ template(int_float_signature, "(I)F") \
template(long_int_signature, "(J)I") \
template(long_long_signature, "(J)J") \ template(long_long_signature, "(J)J") \
template(long_double_signature, "(J)D") \ template(long_double_signature, "(J)D") \
template(byte_signature, "B") \ template(byte_signature, "B") \
@ -507,6 +509,10 @@
do_name( doubleToLongBits_name, "doubleToLongBits") \ do_name( doubleToLongBits_name, "doubleToLongBits") \
do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_S) \ do_intrinsic(_longBitsToDouble, java_lang_Double, longBitsToDouble_name, long_double_signature, F_S) \
do_name( longBitsToDouble_name, "longBitsToDouble") \ do_name( longBitsToDouble_name, "longBitsToDouble") \
\
do_intrinsic(_bitCount_i, java_lang_Integer, bitCount_name, int_int_signature, F_S) \
do_intrinsic(_bitCount_l, java_lang_Long, bitCount_name, long_int_signature, F_S) \
\
do_intrinsic(_reverseBytes_i, java_lang_Integer, reverseBytes_name, int_int_signature, F_S) \ do_intrinsic(_reverseBytes_i, java_lang_Integer, reverseBytes_name, int_int_signature, F_S) \
do_name( reverseBytes_name, "reverseBytes") \ do_name( reverseBytes_name, "reverseBytes") \
do_intrinsic(_reverseBytes_l, java_lang_Long, reverseBytes_name, long_long_signature, F_S) \ do_intrinsic(_reverseBytes_l, java_lang_Long, reverseBytes_name, long_long_signature, F_S) \
@ -696,7 +702,6 @@
do_signature(putShort_raw_signature, "(JS)V") \ do_signature(putShort_raw_signature, "(JS)V") \
do_signature(getChar_raw_signature, "(J)C") \ do_signature(getChar_raw_signature, "(J)C") \
do_signature(putChar_raw_signature, "(JC)V") \ do_signature(putChar_raw_signature, "(JC)V") \
do_signature(getInt_raw_signature, "(J)I") \
do_signature(putInt_raw_signature, "(JI)V") \ do_signature(putInt_raw_signature, "(JI)V") \
do_alias(getLong_raw_signature, /*(J)J*/ long_long_signature) \ do_alias(getLong_raw_signature, /*(J)J*/ long_long_signature) \
do_alias(putLong_raw_signature, /*(JJ)V*/ long_long_void_signature) \ do_alias(putLong_raw_signature, /*(JJ)V*/ long_long_void_signature) \
@ -713,7 +718,7 @@
do_intrinsic(_getByte_raw, sun_misc_Unsafe, getByte_name, getByte_raw_signature, F_RN) \ do_intrinsic(_getByte_raw, sun_misc_Unsafe, getByte_name, getByte_raw_signature, F_RN) \
do_intrinsic(_getShort_raw, sun_misc_Unsafe, getShort_name, getShort_raw_signature, F_RN) \ do_intrinsic(_getShort_raw, sun_misc_Unsafe, getShort_name, getShort_raw_signature, F_RN) \
do_intrinsic(_getChar_raw, sun_misc_Unsafe, getChar_name, getChar_raw_signature, F_RN) \ do_intrinsic(_getChar_raw, sun_misc_Unsafe, getChar_name, getChar_raw_signature, F_RN) \
do_intrinsic(_getInt_raw, sun_misc_Unsafe, getInt_name, getInt_raw_signature, F_RN) \ do_intrinsic(_getInt_raw, sun_misc_Unsafe, getInt_name, long_int_signature, F_RN) \
do_intrinsic(_getLong_raw, sun_misc_Unsafe, getLong_name, getLong_raw_signature, F_RN) \ do_intrinsic(_getLong_raw, sun_misc_Unsafe, getLong_name, getLong_raw_signature, F_RN) \
do_intrinsic(_getFloat_raw, sun_misc_Unsafe, getFloat_name, getFloat_raw_signature, F_RN) \ do_intrinsic(_getFloat_raw, sun_misc_Unsafe, getFloat_name, getFloat_raw_signature, F_RN) \
do_intrinsic(_getDouble_raw, sun_misc_Unsafe, getDouble_name, getDouble_raw_signature, F_RN) \ do_intrinsic(_getDouble_raw, sun_misc_Unsafe, getDouble_name, getDouble_raw_signature, F_RN) \

View file

@ -107,7 +107,7 @@ void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
#ifndef PRODUCT #ifndef PRODUCT
bool CMBitMapRO::covers(ReservedSpace rs) const { bool CMBitMapRO::covers(ReservedSpace rs) const {
// assert(_bm.map() == _virtual_space.low(), "map inconsistency"); // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize, assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
"size inconsistency"); "size inconsistency");
return _bmStartWord == (HeapWord*)(rs.base()) && return _bmStartWord == (HeapWord*)(rs.base()) &&
_bmWordSize == rs.size()>>LogHeapWordSize; _bmWordSize == rs.size()>>LogHeapWordSize;

View file

@ -1422,9 +1422,34 @@ jint G1CollectedHeap::initialize() {
// Reserve the maximum. // Reserve the maximum.
PermanentGenerationSpec* pgs = collector_policy()->permanent_generation(); PermanentGenerationSpec* pgs = collector_policy()->permanent_generation();
// Includes the perm-gen. // Includes the perm-gen.
const size_t total_reserved = max_byte_size + pgs->max_size();
char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
ReservedSpace heap_rs(max_byte_size + pgs->max_size(), ReservedSpace heap_rs(max_byte_size + pgs->max_size(),
HeapRegion::GrainBytes, HeapRegion::GrainBytes,
false /*ism*/); false /*ism*/, addr);
if (UseCompressedOops) {
if (addr != NULL && !heap_rs.is_reserved()) {
// Failed to reserve at specified address - the requested memory
// region is taken already, for example, by 'java' launcher.
// Try again to reserver heap higher.
addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
ReservedSpace heap_rs0(total_reserved, HeapRegion::GrainBytes,
false /*ism*/, addr);
if (addr != NULL && !heap_rs0.is_reserved()) {
// Failed to reserve at specified address again - give up.
addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
assert(addr == NULL, "");
ReservedSpace heap_rs1(total_reserved, HeapRegion::GrainBytes,
false /*ism*/, addr);
heap_rs = heap_rs1;
} else {
heap_rs = heap_rs0;
}
}
}
if (!heap_rs.is_reserved()) { if (!heap_rs.is_reserved()) {
vm_exit_during_initialization("Could not reserve enough space for object heap"); vm_exit_during_initialization("Could not reserve enough space for object heap");

View file

@ -508,7 +508,7 @@ OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) :
typedef PosParPRT* PosParPRTPtr; typedef PosParPRT* PosParPRTPtr;
if (_max_fine_entries == 0) { if (_max_fine_entries == 0) {
assert(_mod_max_fine_entries_mask == 0, "Both or none."); assert(_mod_max_fine_entries_mask == 0, "Both or none.");
_max_fine_entries = (1 << G1LogRSRegionEntries); _max_fine_entries = (size_t)(1 << G1LogRSRegionEntries);
_mod_max_fine_entries_mask = _max_fine_entries - 1; _mod_max_fine_entries_mask = _max_fine_entries - 1;
#if SAMPLE_FOR_EVICTION #if SAMPLE_FOR_EVICTION
assert(_fine_eviction_sample_size == 0 assert(_fine_eviction_sample_size == 0

View file

@ -104,12 +104,38 @@ jint ParallelScavengeHeap::initialize() {
og_min_size, og_max_size, og_min_size, og_max_size,
yg_min_size, yg_max_size); yg_min_size, yg_max_size);
const size_t total_reserved = pg_max_size + og_max_size + yg_max_size;
char* addr = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
// The main part of the heap (old gen + young gen) can often use a larger page // The main part of the heap (old gen + young gen) can often use a larger page
// size than is needed or wanted for the perm gen. Use the "compound // size than is needed or wanted for the perm gen. Use the "compound
// alignment" ReservedSpace ctor to avoid having to use the same page size for // alignment" ReservedSpace ctor to avoid having to use the same page size for
// all gens. // all gens.
ReservedHeapSpace heap_rs(pg_max_size, pg_align, og_max_size + yg_max_size, ReservedHeapSpace heap_rs(pg_max_size, pg_align, og_max_size + yg_max_size,
og_align); og_align, addr);
if (UseCompressedOops) {
if (addr != NULL && !heap_rs.is_reserved()) {
// Failed to reserve at specified address - the requested memory
// region is taken already, for example, by 'java' launcher.
// Try again to reserver heap higher.
addr = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
ReservedHeapSpace heap_rs0(pg_max_size, pg_align, og_max_size + yg_max_size,
og_align, addr);
if (addr != NULL && !heap_rs0.is_reserved()) {
// Failed to reserve at specified address again - give up.
addr = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
assert(addr == NULL, "");
ReservedHeapSpace heap_rs1(pg_max_size, pg_align, og_max_size + yg_max_size,
og_align, addr);
heap_rs = heap_rs1;
} else {
heap_rs = heap_rs0;
}
}
}
os::trace_page_sizes("ps perm", pg_min_size, pg_max_size, pg_page_sz, os::trace_page_sizes("ps perm", pg_min_size, pg_max_size, pg_page_sz,
heap_rs.base(), pg_max_size); heap_rs.base(), pg_max_size);
os::trace_page_sizes("ps main", og_min_size + yg_min_size, os::trace_page_sizes("ps main", og_min_size + yg_min_size,

View file

@ -4598,6 +4598,7 @@ vm_version_<arch>.cpp vm_version_<arch>.hpp
vm_version_<arch>.hpp globals_extension.hpp vm_version_<arch>.hpp globals_extension.hpp
vm_version_<arch>.hpp vm_version.hpp vm_version_<arch>.hpp vm_version.hpp
vm_version_<os_arch>.cpp os.hpp
vm_version_<os_arch>.cpp vm_version_<arch>.hpp vm_version_<os_arch>.cpp vm_version_<arch>.hpp
vmreg.cpp assembler.hpp vmreg.cpp assembler.hpp

View file

@ -235,7 +235,7 @@ class BlockOffsetArray: public BlockOffsetTable {
}; };
static size_t power_to_cards_back(uint i) { static size_t power_to_cards_back(uint i) {
return 1 << (LogBase * i); return (size_t)(1 << (LogBase * i));
} }
static size_t power_to_words_back(uint i) { static size_t power_to_words_back(uint i) {
return power_to_cards_back(i) * N_words; return power_to_cards_back(i) * N_words;

View file

@ -218,6 +218,31 @@ char* GenCollectedHeap::allocate(size_t alignment,
heap_address -= total_reserved; heap_address -= total_reserved;
} else { } else {
heap_address = NULL; // any address will do. heap_address = NULL; // any address will do.
if (UseCompressedOops) {
heap_address = Universe::preferred_heap_base(total_reserved, Universe::UnscaledNarrowOop);
*_total_reserved = total_reserved;
*_n_covered_regions = n_covered_regions;
*heap_rs = ReservedHeapSpace(total_reserved, alignment,
UseLargePages, heap_address);
if (heap_address != NULL && !heap_rs->is_reserved()) {
// Failed to reserve at specified address - the requested memory
// region is taken already, for example, by 'java' launcher.
// Try again to reserver heap higher.
heap_address = Universe::preferred_heap_base(total_reserved, Universe::ZeroBasedNarrowOop);
*heap_rs = ReservedHeapSpace(total_reserved, alignment,
UseLargePages, heap_address);
if (heap_address != NULL && !heap_rs->is_reserved()) {
// Failed to reserve at specified address again - give up.
heap_address = Universe::preferred_heap_base(total_reserved, Universe::HeapBasedNarrowOop);
assert(heap_address == NULL, "");
*heap_rs = ReservedHeapSpace(total_reserved, alignment,
UseLargePages, heap_address);
}
}
return heap_address;
}
} }
*_total_reserved = total_reserved; *_total_reserved = total_reserved;

View file

@ -99,7 +99,8 @@ size_t Universe::_heap_capacity_at_last_gc;
size_t Universe::_heap_used_at_last_gc = 0; size_t Universe::_heap_used_at_last_gc = 0;
CollectedHeap* Universe::_collectedHeap = NULL; CollectedHeap* Universe::_collectedHeap = NULL;
address Universe::_heap_base = NULL;
NarrowOopStruct Universe::_narrow_oop = { NULL, 0, true };
void Universe::basic_type_classes_do(void f(klassOop)) { void Universe::basic_type_classes_do(void f(klassOop)) {
@ -729,6 +730,53 @@ jint universe_init() {
return JNI_OK; return JNI_OK;
} }
// Choose the heap base address and oop encoding mode
// when compressed oops are used:
// Unscaled - Use 32-bits oops without encoding when
// NarrowOopHeapBaseMin + heap_size < 4Gb
// ZeroBased - Use zero based compressed oops with encoding when
// NarrowOopHeapBaseMin + heap_size < 32Gb
// HeapBased - Use compressed oops with heap base + encoding.
// 4Gb
static const uint64_t NarrowOopHeapMax = (uint64_t(max_juint) + 1);
// 32Gb
static const uint64_t OopEncodingHeapMax = NarrowOopHeapMax << LogMinObjAlignmentInBytes;
char* Universe::preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode) {
#ifdef _LP64
if (UseCompressedOops) {
assert(mode == UnscaledNarrowOop ||
mode == ZeroBasedNarrowOop ||
mode == HeapBasedNarrowOop, "mode is invalid");
const size_t total_size = heap_size + HeapBaseMinAddress;
if (total_size <= OopEncodingHeapMax && (mode != HeapBasedNarrowOop)) {
if (total_size <= NarrowOopHeapMax && (mode == UnscaledNarrowOop) &&
(Universe::narrow_oop_shift() == 0)) {
// Use 32-bits oops without encoding and
// place heap's top on the 4Gb boundary
return (char*)(NarrowOopHeapMax - heap_size);
} else {
// Can't reserve with NarrowOopShift == 0
Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
if (mode == UnscaledNarrowOop ||
mode == ZeroBasedNarrowOop && total_size <= NarrowOopHeapMax) {
// Use zero based compressed oops with encoding and
// place heap's top on the 32Gb boundary in case
// total_size > 4Gb or failed to reserve below 4Gb.
return (char*)(OopEncodingHeapMax - heap_size);
}
}
} else {
// Can't reserve below 32Gb.
Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
}
}
#endif
return NULL; // also return NULL (don't care) for 32-bit VM
}
jint Universe::initialize_heap() { jint Universe::initialize_heap() {
if (UseParallelGC) { if (UseParallelGC) {
@ -773,6 +821,8 @@ jint Universe::initialize_heap() {
if (status != JNI_OK) { if (status != JNI_OK) {
return status; return status;
} }
#ifdef _LP64
if (UseCompressedOops) { if (UseCompressedOops) {
// Subtract a page because something can get allocated at heap base. // Subtract a page because something can get allocated at heap base.
// This also makes implicit null checking work, because the // This also makes implicit null checking work, because the
@ -780,8 +830,49 @@ jint Universe::initialize_heap() {
// See needs_explicit_null_check. // See needs_explicit_null_check.
// Only set the heap base for compressed oops because it indicates // Only set the heap base for compressed oops because it indicates
// compressed oops for pstack code. // compressed oops for pstack code.
Universe::_heap_base = Universe::heap()->base() - os::vm_page_size(); if (PrintCompressedOopsMode) {
tty->cr();
tty->print("heap address: "PTR_FORMAT, Universe::heap()->base());
} }
if ((uint64_t)Universe::heap()->reserved_region().end() > OopEncodingHeapMax) {
// Can't reserve heap below 32Gb.
Universe::set_narrow_oop_base(Universe::heap()->base() - os::vm_page_size());
Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
if (PrintCompressedOopsMode) {
tty->print(", Compressed Oops with base: "PTR_FORMAT, Universe::narrow_oop_base());
}
} else {
Universe::set_narrow_oop_base(0);
if (PrintCompressedOopsMode) {
tty->print(", zero based Compressed Oops");
}
#ifdef _WIN64
if (!Universe::narrow_oop_use_implicit_null_checks()) {
// Don't need guard page for implicit checks in indexed addressing
// mode with zero based Compressed Oops.
Universe::set_narrow_oop_use_implicit_null_checks(true);
}
#endif // _WIN64
if((uint64_t)Universe::heap()->reserved_region().end() > NarrowOopHeapMax) {
// Can't reserve heap below 4Gb.
Universe::set_narrow_oop_shift(LogMinObjAlignmentInBytes);
} else {
assert(Universe::narrow_oop_shift() == 0, "use unscaled narrow oop");
if (PrintCompressedOopsMode) {
tty->print(", 32-bits Oops");
}
}
}
if (PrintCompressedOopsMode) {
tty->cr();
tty->cr();
}
}
assert(Universe::narrow_oop_base() == (Universe::heap()->base() - os::vm_page_size()) ||
Universe::narrow_oop_base() == NULL, "invalid value");
assert(Universe::narrow_oop_shift() == LogMinObjAlignmentInBytes ||
Universe::narrow_oop_shift() == 0, "invalid value");
#endif
// We will never reach the CATCH below since Exceptions::_throw will cause // We will never reach the CATCH below since Exceptions::_throw will cause
// the VM to exit if an exception is thrown during initialization // the VM to exit if an exception is thrown during initialization

View file

@ -90,6 +90,19 @@ class LatestMethodOopCache : public CommonMethodOopCache {
methodOop get_methodOop(); methodOop get_methodOop();
}; };
// For UseCompressedOops.
struct NarrowOopStruct {
// Base address for oop-within-java-object materialization.
// NULL if using wide oops or zero based narrow oops.
address _base;
// Number of shift bits for encoding/decoding narrow oops.
// 0 if using wide oops or zero based unscaled narrow oops,
// LogMinObjAlignmentInBytes otherwise.
int _shift;
// Generate code with implicit null checks for narrow oops.
bool _use_implicit_null_checks;
};
class Universe: AllStatic { class Universe: AllStatic {
// Ugh. Universe is much too friendly. // Ugh. Universe is much too friendly.
@ -181,9 +194,9 @@ class Universe: AllStatic {
// The particular choice of collected heap. // The particular choice of collected heap.
static CollectedHeap* _collectedHeap; static CollectedHeap* _collectedHeap;
// Base address for oop-within-java-object materialization.
// NULL if using wide oops. Doubles as heap oop null value. // For UseCompressedOops.
static address _heap_base; static struct NarrowOopStruct _narrow_oop;
// array of dummy objects used with +FullGCAlot // array of dummy objects used with +FullGCAlot
debug_only(static objArrayOop _fullgc_alot_dummy_array;) debug_only(static objArrayOop _fullgc_alot_dummy_array;)
@ -328,8 +341,25 @@ class Universe: AllStatic {
static CollectedHeap* heap() { return _collectedHeap; } static CollectedHeap* heap() { return _collectedHeap; }
// For UseCompressedOops // For UseCompressedOops
static address heap_base() { return _heap_base; } static address* narrow_oop_base_addr() { return &_narrow_oop._base; }
static address* heap_base_addr() { return &_heap_base; } static address narrow_oop_base() { return _narrow_oop._base; }
static int narrow_oop_shift() { return _narrow_oop._shift; }
static void set_narrow_oop_base(address base) { _narrow_oop._base = base; }
static void set_narrow_oop_shift(int shift) { _narrow_oop._shift = shift; }
static bool narrow_oop_use_implicit_null_checks() { return _narrow_oop._use_implicit_null_checks; }
static void set_narrow_oop_use_implicit_null_checks(bool use) { _narrow_oop._use_implicit_null_checks = use; }
// Narrow Oop encoding mode:
// 0 - Use 32-bits oops without encoding when
// NarrowOopHeapBaseMin + heap_size < 4Gb
// 1 - Use zero based compressed oops with encoding when
// NarrowOopHeapBaseMin + heap_size < 32Gb
// 2 - Use compressed oops with heap base + encoding.
enum NARROW_OOP_MODE {
UnscaledNarrowOop = 0,
ZeroBasedNarrowOop = 1,
HeapBasedNarrowOop = 2
};
static char* preferred_heap_base(size_t heap_size, NARROW_OOP_MODE mode);
// Historic gc information // Historic gc information
static size_t get_heap_capacity_at_last_gc() { return _heap_capacity_at_last_gc; } static size_t get_heap_capacity_at_last_gc() { return _heap_capacity_at_last_gc; }

View file

@ -148,10 +148,11 @@ inline bool oopDesc::is_null(narrowOop obj) { return obj == 0; }
inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) { inline narrowOop oopDesc::encode_heap_oop_not_null(oop v) {
assert(!is_null(v), "oop value can never be zero"); assert(!is_null(v), "oop value can never be zero");
address heap_base = Universe::heap_base(); address base = Universe::narrow_oop_base();
uint64_t pd = (uint64_t)(pointer_delta((void*)v, (void*)heap_base, 1)); int shift = Universe::narrow_oop_shift();
uint64_t pd = (uint64_t)(pointer_delta((void*)v, (void*)base, 1));
assert(OopEncodingHeapMax > pd, "change encoding max if new encoding"); assert(OopEncodingHeapMax > pd, "change encoding max if new encoding");
uint64_t result = pd >> LogMinObjAlignmentInBytes; uint64_t result = pd >> shift;
assert((result & CONST64(0xffffffff00000000)) == 0, "narrow oop overflow"); assert((result & CONST64(0xffffffff00000000)) == 0, "narrow oop overflow");
return (narrowOop)result; return (narrowOop)result;
} }
@ -162,8 +163,9 @@ inline narrowOop oopDesc::encode_heap_oop(oop v) {
inline oop oopDesc::decode_heap_oop_not_null(narrowOop v) { inline oop oopDesc::decode_heap_oop_not_null(narrowOop v) {
assert(!is_null(v), "narrow oop value can never be zero"); assert(!is_null(v), "narrow oop value can never be zero");
address heap_base = Universe::heap_base(); address base = Universe::narrow_oop_base();
return (oop)(void*)((uintptr_t)heap_base + ((uintptr_t)v << LogMinObjAlignmentInBytes)); int shift = Universe::narrow_oop_shift();
return (oop)(void*)((uintptr_t)base + ((uintptr_t)v << shift));
} }
inline oop oopDesc::decode_heap_oop(narrowOop v) { inline oop oopDesc::decode_heap_oop(narrowOop v) {

View file

@ -756,7 +756,13 @@ const Type *AddPNode::mach_bottom_type( const MachNode* n) {
if ( eti == NULL ) { if ( eti == NULL ) {
// there must be one pointer among the operands // there must be one pointer among the operands
guarantee(tptr == NULL, "must be only one pointer operand"); guarantee(tptr == NULL, "must be only one pointer operand");
if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
// 32-bits narrow oop can be the base of address expressions
tptr = et->make_ptr()->isa_oopptr();
} else {
// only regular oops are expected here
tptr = et->isa_oopptr(); tptr = et->isa_oopptr();
}
guarantee(tptr != NULL, "non-int operand must be pointer"); guarantee(tptr != NULL, "non-int operand must be pointer");
if (tptr->higher_equal(tp->add_offset(tptr->offset()))) if (tptr->higher_equal(tp->add_offset(tptr->offset())))
tp = tptr; // Set more precise type for bailout tp = tptr; // Set more precise type for bailout

View file

@ -184,6 +184,8 @@ macro(PCTable)
macro(Parm) macro(Parm)
macro(PartialSubtypeCheck) macro(PartialSubtypeCheck)
macro(Phi) macro(Phi)
macro(PopCountI)
macro(PopCountL)
macro(PowD) macro(PowD)
macro(PrefetchRead) macro(PrefetchRead)
macro(PrefetchWrite) macro(PrefetchWrite)

View file

@ -2081,7 +2081,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
#ifdef _LP64 #ifdef _LP64
case Op_CastPP: case Op_CastPP:
if (n->in(1)->is_DecodeN() && UseImplicitNullCheckForNarrowOop) { if (n->in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks()) {
Compile* C = Compile::current(); Compile* C = Compile::current();
Node* in1 = n->in(1); Node* in1 = n->in(1);
const Type* t = n->bottom_type(); const Type* t = n->bottom_type();
@ -2136,7 +2136,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
new_in2 = in2->in(1); new_in2 = in2->in(1);
} else if (in2->Opcode() == Op_ConP) { } else if (in2->Opcode() == Op_ConP) {
const Type* t = in2->bottom_type(); const Type* t = in2->bottom_type();
if (t == TypePtr::NULL_PTR && UseImplicitNullCheckForNarrowOop) { if (t == TypePtr::NULL_PTR && Universe::narrow_oop_use_implicit_null_checks()) {
new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR);
// //
// This transformation together with CastPP transformation above // This transformation together with CastPP transformation above

View file

@ -433,7 +433,7 @@ Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
// If not converting int->oop, throw away cast after constant propagation // If not converting int->oop, throw away cast after constant propagation
Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
const Type *t = ccp->type(in(1)); const Type *t = ccp->type(in(1));
if (!t->isa_oop_ptr() || in(1)->is_DecodeN()) { if (!t->isa_oop_ptr() || (in(1)->is_DecodeN() && Universe::narrow_oop_use_implicit_null_checks())) {
return NULL; // do not transform raw pointers or narrow oops return NULL; // do not transform raw pointers or narrow oops
} }
return ConstraintCastNode::Ideal_DU_postCCP(ccp); return ConstraintCastNode::Ideal_DU_postCCP(ccp);

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -635,3 +635,23 @@ class MoveD2LNode : public Node {
virtual uint ideal_reg() const { return Op_RegL; } virtual uint ideal_reg() const { return Op_RegL; }
virtual const Type* Value( PhaseTransform *phase ) const; virtual const Type* Value( PhaseTransform *phase ) const;
}; };
//---------- PopCountINode -----------------------------------------------------
// Population count (bit count) of an integer.
class PopCountINode : public Node {
public:
PopCountINode(Node* in1) : Node(0, in1) {}
virtual int Opcode() const;
const Type* bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
};
//---------- PopCountLNode -----------------------------------------------------
// Population count (bit count) of a long.
class PopCountLNode : public Node {
public:
PopCountLNode(Node* in1) : Node(0, in1) {}
virtual int Opcode() const;
const Type* bottom_type() const { return TypeInt::INT; }
virtual uint ideal_reg() const { return Op_RegI; }
};

View file

@ -2277,7 +2277,7 @@ Node* GraphKit::gen_subtype_check(Node* subklass, Node* superklass) {
r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) ); r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) );
set_control( _gvn.transform( new (C, 1) IfFalseNode(iff2) ) ); set_control( _gvn.transform( new (C, 1) IfFalseNode(iff2) ) );
// Check for self. Very rare to get here, but its taken 1/3 the time. // Check for self. Very rare to get here, but it is taken 1/3 the time.
// No performance impact (too rare) but allows sharing of secondary arrays // No performance impact (too rare) but allows sharing of secondary arrays
// which has some footprint reduction. // which has some footprint reduction.
Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) ); Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) );
@ -2286,11 +2286,27 @@ Node* GraphKit::gen_subtype_check(Node* subklass, Node* superklass) {
r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) ); r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) );
set_control( _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) ); set_control( _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) );
// -- Roads not taken here: --
// We could also have chosen to perform the self-check at the beginning
// of this code sequence, as the assembler does. This would not pay off
// the same way, since the optimizer, unlike the assembler, can perform
// static type analysis to fold away many successful self-checks.
// Non-foldable self checks work better here in second position, because
// the initial primary superclass check subsumes a self-check for most
// types. An exception would be a secondary type like array-of-interface,
// which does not appear in its own primary supertype display.
// Finally, we could have chosen to move the self-check into the
// PartialSubtypeCheckNode, and from there out-of-line in a platform
// dependent manner. But it is worthwhile to have the check here,
// where it can be perhaps be optimized. The cost in code space is
// small (register compare, branch).
// Now do a linear scan of the secondary super-klass array. Again, no real // Now do a linear scan of the secondary super-klass array. Again, no real
// performance impact (too rare) but it's gotta be done. // performance impact (too rare) but it's gotta be done.
// (The stub also contains the self-check of subklass == superklass.
// Since the code is rarely used, there is no penalty for moving it // Since the code is rarely used, there is no penalty for moving it
// out of line, and it can only improve I-cache density.) // out of line, and it can only improve I-cache density.
// The decision to inline or out-of-line this final check is platform
// dependent, and is found in the AD file definition of PartialSubtypeCheck.
Node* psc = _gvn.transform( Node* psc = _gvn.transform(
new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) ); new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) );

View file

@ -158,7 +158,14 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe
continue; // Give up if offset is beyond page size continue; // Give up if offset is beyond page size
// cannot reason about it; is probably not implicit null exception // cannot reason about it; is probably not implicit null exception
} else { } else {
const TypePtr* tptr = base->bottom_type()->is_ptr(); const TypePtr* tptr;
if (UseCompressedOops && Universe::narrow_oop_shift() == 0) {
// 32-bits narrow oop can be the base of address expressions
tptr = base->bottom_type()->make_ptr();
} else {
// only regular oops are expected here
tptr = base->bottom_type()->is_ptr();
}
// Give up if offset is not a compile-time constant // Give up if offset is not a compile-time constant
if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot ) if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
continue; continue;

View file

@ -1,5 +1,5 @@
/* /*
* Copyright 1999-2008 Sun Microsystems, Inc. All Rights Reserved. * Copyright 1999-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -221,6 +221,7 @@ class LibraryCallKit : public GraphKit {
bool inline_unsafe_CAS(BasicType type); bool inline_unsafe_CAS(BasicType type);
bool inline_unsafe_ordered_store(BasicType type); bool inline_unsafe_ordered_store(BasicType type);
bool inline_fp_conversions(vmIntrinsics::ID id); bool inline_fp_conversions(vmIntrinsics::ID id);
bool inline_bitCount(vmIntrinsics::ID id);
bool inline_reverseBytes(vmIntrinsics::ID id); bool inline_reverseBytes(vmIntrinsics::ID id);
}; };
@ -314,6 +315,11 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
if (!JDK_Version::is_gte_jdk14x_version()) return NULL; if (!JDK_Version::is_gte_jdk14x_version()) return NULL;
break; break;
case vmIntrinsics::_bitCount_i:
case vmIntrinsics::_bitCount_l:
if (!UsePopCountInstruction) return NULL;
break;
default: default:
break; break;
} }
@ -617,6 +623,10 @@ bool LibraryCallKit::try_to_inline() {
case vmIntrinsics::_longBitsToDouble: case vmIntrinsics::_longBitsToDouble:
return inline_fp_conversions(intrinsic_id()); return inline_fp_conversions(intrinsic_id());
case vmIntrinsics::_bitCount_i:
case vmIntrinsics::_bitCount_l:
return inline_bitCount(intrinsic_id());
case vmIntrinsics::_reverseBytes_i: case vmIntrinsics::_reverseBytes_i:
case vmIntrinsics::_reverseBytes_l: case vmIntrinsics::_reverseBytes_l:
return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id()); return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
@ -1714,6 +1724,27 @@ inline Node* LibraryCallKit::make_unsafe_address(Node* base, Node* offset) {
} }
} }
//----------------------------inline_bitCount_int/long-----------------------
// inline int Integer.bitCount(int)
// inline int Long.bitCount(long)
bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) {
assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount");
if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false;
if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false;
_sp += arg_size(); // restore stack pointer
switch (id) {
case vmIntrinsics::_bitCount_i:
push(_gvn.transform(new (C, 2) PopCountINode(pop())));
break;
case vmIntrinsics::_bitCount_l:
push(_gvn.transform(new (C, 2) PopCountLNode(pop_pair())));
break;
default:
ShouldNotReachHere();
}
return true;
}
//----------------------------inline_reverseBytes_int/long------------------- //----------------------------inline_reverseBytes_int/long-------------------
// inline Integer.reverseBytes(int) // inline Integer.reverseBytes(int)
// inline Long.reverseBytes(long) // inline Long.reverseBytes(long)

View file

@ -1481,8 +1481,13 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
const Type* mach_at = mach->adr_type(); const Type* mach_at = mach->adr_type();
// DecodeN node consumed by an address may have different type // DecodeN node consumed by an address may have different type
// then its input. Don't compare types for such case. // then its input. Don't compare types for such case.
if (m->adr_type() != mach_at && m->in(MemNode::Address)->is_AddP() && if (m->adr_type() != mach_at &&
m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN()) { (m->in(MemNode::Address)->is_DecodeN() ||
m->in(MemNode::Address)->is_AddP() &&
m->in(MemNode::Address)->in(AddPNode::Address)->is_DecodeN() ||
m->in(MemNode::Address)->is_AddP() &&
m->in(MemNode::Address)->in(AddPNode::Address)->is_AddP() &&
m->in(MemNode::Address)->in(AddPNode::Address)->in(AddPNode::Address)->is_DecodeN())) {
mach_at = m->adr_type(); mach_at = m->adr_type();
} }
if (m->adr_type() != mach_at) { if (m->adr_type() != mach_at) {

View file

@ -1211,7 +1211,9 @@ void Arguments::set_ergonomics_flags() {
if (UseLargePages && UseCompressedOops) { if (UseLargePages && UseCompressedOops) {
// Cannot allocate guard pages for implicit checks in indexed addressing // Cannot allocate guard pages for implicit checks in indexed addressing
// mode, when large pages are specified on windows. // mode, when large pages are specified on windows.
FLAG_SET_DEFAULT(UseImplicitNullCheckForNarrowOop, false); // This flag could be switched ON if narrow oop base address is set to 0,
// see code in Universe::initialize_heap().
Universe::set_narrow_oop_use_implicit_null_checks(false);
} }
#endif // _WIN64 #endif // _WIN64
} else { } else {

View file

@ -303,11 +303,14 @@ class CommandLineFlags {
"Use 32-bit object references in 64-bit VM. " \ "Use 32-bit object references in 64-bit VM. " \
"lp64_product means flag is always constant in 32 bit VM") \ "lp64_product means flag is always constant in 32 bit VM") \
\ \
lp64_product(bool, CheckCompressedOops, trueInDebug, \ notproduct(bool, CheckCompressedOops, true, \
"generate checks in encoding/decoding code") \ "generate checks in encoding/decoding code in debug VM") \
\ \
product(bool, UseImplicitNullCheckForNarrowOop, true, \ product_pd(uintx, HeapBaseMinAddress, \
"generate implicit null check in indexed addressing mode.") \ "OS specific low limit for heap base address") \
\
diagnostic(bool, PrintCompressedOopsMode, false, \
"Print compressed oops base address and encoding mode") \
\ \
/* UseMembar is theoretically a temp flag used for memory barrier \ /* UseMembar is theoretically a temp flag used for memory barrier \
* removal testing. It was supposed to be removed before FCS but has \ * removal testing. It was supposed to be removed before FCS but has \
@ -2169,6 +2172,9 @@ class CommandLineFlags {
diagnostic(bool, PrintIntrinsics, false, \ diagnostic(bool, PrintIntrinsics, false, \
"prints attempted and successful inlining of intrinsics") \ "prints attempted and successful inlining of intrinsics") \
\ \
product(bool, UsePopCountInstruction, false, \
"Use population count instruction") \
\
diagnostic(ccstrlist, DisableIntrinsic, "", \ diagnostic(ccstrlist, DisableIntrinsic, "", \
"do not expand intrinsics whose (internal) names appear here") \ "do not expand intrinsics whose (internal) names appear here") \
\ \

View file

@ -243,7 +243,7 @@ class os: AllStatic {
static char* non_memory_address_word(); static char* non_memory_address_word();
// reserve, commit and pin the entire memory region // reserve, commit and pin the entire memory region
static char* reserve_memory_special(size_t size); static char* reserve_memory_special(size_t size, char* addr = NULL);
static bool release_memory_special(char* addr, size_t bytes); static bool release_memory_special(char* addr, size_t bytes);
static bool large_page_init(); static bool large_page_init();
static size_t large_page_size(); static size_t large_page_size();

View file

@ -109,6 +109,7 @@ ReservedSpace::ReservedSpace(const size_t prefix_size,
const size_t prefix_align, const size_t prefix_align,
const size_t suffix_size, const size_t suffix_size,
const size_t suffix_align, const size_t suffix_align,
char* requested_address,
const size_t noaccess_prefix) const size_t noaccess_prefix)
{ {
assert(prefix_size != 0, "sanity"); assert(prefix_size != 0, "sanity");
@ -131,7 +132,7 @@ ReservedSpace::ReservedSpace(const size_t prefix_size,
const bool try_reserve_special = UseLargePages && const bool try_reserve_special = UseLargePages &&
prefix_align == os::large_page_size(); prefix_align == os::large_page_size();
if (!os::can_commit_large_page_memory() && try_reserve_special) { if (!os::can_commit_large_page_memory() && try_reserve_special) {
initialize(size, prefix_align, true, NULL, noaccess_prefix); initialize(size, prefix_align, true, requested_address, noaccess_prefix);
return; return;
} }
@ -146,7 +147,13 @@ ReservedSpace::ReservedSpace(const size_t prefix_size,
noaccess_prefix == prefix_align, "noaccess prefix wrong"); noaccess_prefix == prefix_align, "noaccess prefix wrong");
// Optimistically try to reserve the exact size needed. // Optimistically try to reserve the exact size needed.
char* addr = os::reserve_memory(size, NULL, prefix_align); char* addr;
if (requested_address != 0) {
addr = os::attempt_reserve_memory_at(size,
requested_address-noaccess_prefix);
} else {
addr = os::reserve_memory(size, NULL, prefix_align);
}
if (addr == NULL) return; if (addr == NULL) return;
// Check whether the result has the needed alignment (unlikely unless // Check whether the result has the needed alignment (unlikely unless
@ -206,12 +213,8 @@ void ReservedSpace::initialize(size_t size, size_t alignment, bool large,
char* base = NULL; char* base = NULL;
if (special) { if (special) {
// It's not hard to implement reserve_memory_special() such that it can
// allocate at fixed address, but there seems no use of this feature
// for now, so it's not implemented.
assert(requested_address == NULL, "not implemented");
base = os::reserve_memory_special(size); base = os::reserve_memory_special(size, requested_address);
if (base != NULL) { if (base != NULL) {
// Check alignment constraints // Check alignment constraints
@ -372,7 +375,8 @@ ReservedHeapSpace::ReservedHeapSpace(size_t size, size_t alignment,
bool large, char* requested_address) : bool large, char* requested_address) :
ReservedSpace(size, alignment, large, ReservedSpace(size, alignment, large,
requested_address, requested_address,
UseCompressedOops && UseImplicitNullCheckForNarrowOop ? (UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
Universe::narrow_oop_use_implicit_null_checks()) ?
lcm(os::vm_page_size(), alignment) : 0) { lcm(os::vm_page_size(), alignment) : 0) {
// Only reserved space for the java heap should have a noaccess_prefix // Only reserved space for the java heap should have a noaccess_prefix
// if using compressed oops. // if using compressed oops.
@ -382,9 +386,12 @@ ReservedHeapSpace::ReservedHeapSpace(size_t size, size_t alignment,
ReservedHeapSpace::ReservedHeapSpace(const size_t prefix_size, ReservedHeapSpace::ReservedHeapSpace(const size_t prefix_size,
const size_t prefix_align, const size_t prefix_align,
const size_t suffix_size, const size_t suffix_size,
const size_t suffix_align) : const size_t suffix_align,
char* requested_address) :
ReservedSpace(prefix_size, prefix_align, suffix_size, suffix_align, ReservedSpace(prefix_size, prefix_align, suffix_size, suffix_align,
UseCompressedOops && UseImplicitNullCheckForNarrowOop ? requested_address,
(UseCompressedOops && (Universe::narrow_oop_base() != NULL) &&
Universe::narrow_oop_use_implicit_null_checks()) ?
lcm(os::vm_page_size(), prefix_align) : 0) { lcm(os::vm_page_size(), prefix_align) : 0) {
protect_noaccess_prefix(prefix_size+suffix_size); protect_noaccess_prefix(prefix_size+suffix_size);
} }

View file

@ -73,7 +73,8 @@ class ReservedSpace VALUE_OBJ_CLASS_SPEC {
const size_t noaccess_prefix = 0); const size_t noaccess_prefix = 0);
ReservedSpace(const size_t prefix_size, const size_t prefix_align, ReservedSpace(const size_t prefix_size, const size_t prefix_align,
const size_t suffix_size, const size_t suffix_align, const size_t suffix_size, const size_t suffix_align,
const size_t noaccess_prefix); char* requested_address,
const size_t noaccess_prefix = 0);
// Accessors // Accessors
char* base() const { return _base; } char* base() const { return _base; }
@ -121,7 +122,8 @@ public:
ReservedHeapSpace(size_t size, size_t forced_base_alignment, ReservedHeapSpace(size_t size, size_t forced_base_alignment,
bool large, char* requested_address); bool large, char* requested_address);
ReservedHeapSpace(const size_t prefix_size, const size_t prefix_align, ReservedHeapSpace(const size_t prefix_size, const size_t prefix_align,
const size_t suffix_size, const size_t suffix_align); const size_t suffix_size, const size_t suffix_align,
char* requested_address);
}; };
// VirtualSpace is data structure for committing a previously reserved address range in smaller chunks. // VirtualSpace is data structure for committing a previously reserved address range in smaller chunks.

View file

@ -263,7 +263,9 @@ static inline uint64_t cast_uint64_t(size_t x)
static_field(Universe, _bootstrapping, bool) \ static_field(Universe, _bootstrapping, bool) \
static_field(Universe, _fully_initialized, bool) \ static_field(Universe, _fully_initialized, bool) \
static_field(Universe, _verify_count, int) \ static_field(Universe, _verify_count, int) \
static_field(Universe, _heap_base, address) \ static_field(Universe, _narrow_oop._base, address) \
static_field(Universe, _narrow_oop._shift, int) \
static_field(Universe, _narrow_oop._use_implicit_null_checks, bool) \
\ \
/**********************************************************************************/ \ /**********************************************************************************/ \
/* Generation and Space hierarchies */ \ /* Generation and Space hierarchies */ \

View file

@ -163,9 +163,11 @@ const char* Abstract_VM_Version::internal_vm_info_string() {
#elif _MSC_VER == 1200 #elif _MSC_VER == 1200
#define HOTSPOT_BUILD_COMPILER "MS VC++ 6.0" #define HOTSPOT_BUILD_COMPILER "MS VC++ 6.0"
#elif _MSC_VER == 1310 #elif _MSC_VER == 1310
#define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1" #define HOTSPOT_BUILD_COMPILER "MS VC++ 7.1 (VS2003)"
#elif _MSC_VER == 1400 #elif _MSC_VER == 1400
#define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0" #define HOTSPOT_BUILD_COMPILER "MS VC++ 8.0 (VS2005)"
#elif _MSC_VER == 1500
#define HOTSPOT_BUILD_COMPILER "MS VC++ 9.0 (VS2008)"
#else #else
#define HOTSPOT_BUILD_COMPILER "unknown MS VC++:" XSTR(_MSC_VER) #define HOTSPOT_BUILD_COMPILER "unknown MS VC++:" XSTR(_MSC_VER)
#endif #endif

View file

@ -162,7 +162,7 @@ inline int vsnprintf(char* buf, size_t count, const char* fmt, va_list argptr) {
} }
// Visual Studio 2005 deprecates POSIX names - use ISO C++ names instead // Visual Studio 2005 deprecates POSIX names - use ISO C++ names instead
#if _MSC_VER >= 1400 && !defined(_WIN64) #if _MSC_VER >= 1400
#define open _open #define open _open
#define close _close #define close _close
#define read _read #define read _read

View file

@ -0,0 +1,75 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
/**
* @test
* @bug 6378821
* @summary where available, bitCount() should use POPC on SPARC processors and AMD+10h
*
* @run main/othervm -Xcomp -XX:CompileOnly=Test6378821.fcomp Test6378821
*/
public class Test6378821 {
static final int[] ia = new int[] { 0x12345678 };
static final long[] la = new long[] { 0x12345678abcdefL };
public static void main(String [] args) {
// Resolve the class and the method.
Integer.bitCount(1);
Long.bitCount(1);
sub(ia[0]);
sub(la[0]);
sub(ia);
sub(la);
}
static void check(int i, int expected, int result) {
if (result != expected) {
throw new InternalError("Wrong population count for " + i + ": " + result + " != " + expected);
}
}
static void check(long l, int expected, int result) {
if (result != expected) {
throw new InternalError("Wrong population count for " + l + ": " + result + " != " + expected);
}
}
static void sub(int i) { check(i, fint(i), fcomp(i) ); }
static void sub(int[] ia) { check(ia[0], fint(ia), fcomp(ia)); }
static void sub(long l) { check(l, fint(l), fcomp(l) ); }
static void sub(long[] la) { check(la[0], fint(la), fcomp(la)); }
static int fint (int i) { return Integer.bitCount(i); }
static int fcomp(int i) { return Integer.bitCount(i); }
static int fint (int[] ia) { return Integer.bitCount(ia[0]); }
static int fcomp(int[] ia) { return Integer.bitCount(ia[0]); }
static int fint (long l) { return Long.bitCount(l); }
static int fcomp(long l) { return Long.bitCount(l); }
static int fint (long[] la) { return Long.bitCount(la[0]); }
static int fcomp(long[] la) { return Long.bitCount(la[0]); }
}