8154473: Update for CompilerDirectives to control stub generation and intrinsics

Use -XX:DisableIntrinsic flag to control intrinsics usage in Interpreter.

Reviewed-by: twisti, neliasso, kvn, simonis
This commit is contained in:
Vivek Deshpande 2016-05-20 14:17:55 -07:00
parent 6dfcf1dd81
commit 489b38091e
9 changed files with 144 additions and 298 deletions

View file

@ -2584,6 +2584,11 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in
}
// Use this method when MacroAssembler version of call_VM_leaf_base() should be called from Interpreter.
void MacroAssembler::call_VM_leaf0(address entry_point) {
MacroAssembler::call_VM_leaf_base(entry_point, 0);
}
void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
call_VM_leaf_base(entry_point, number_of_arguments);
}
@ -5629,235 +5634,6 @@ void MacroAssembler::incr_allocated_bytes(Register thread,
#endif
}
void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
pusha();
// if we are coming from c1, xmm registers may be live
int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
if (UseAVX > 2) {
num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
}
if (UseSSE == 1) {
subptr(rsp, sizeof(jdouble)*8);
for (int n = 0; n < 8; n++) {
movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
}
} else if (UseSSE >= 2) {
if (UseAVX > 2) {
push(rbx);
movl(rbx, 0xffff);
kmovwl(k1, rbx);
pop(rbx);
}
#ifdef COMPILER2
if (MaxVectorSize > 16) {
if(UseAVX > 2) {
// Save upper half of ZMM registers
subptr(rsp, 32*num_xmm_regs);
for (int n = 0; n < num_xmm_regs; n++) {
vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
}
}
assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
// Save upper half of YMM registers
subptr(rsp, 16*num_xmm_regs);
for (int n = 0; n < num_xmm_regs; n++) {
vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
}
}
#endif
// Save whole 128bit (16 bytes) XMM registers
subptr(rsp, 16*num_xmm_regs);
#ifdef _LP64
if (VM_Version::supports_evex()) {
for (int n = 0; n < num_xmm_regs; n++) {
vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
}
} else {
for (int n = 0; n < num_xmm_regs; n++) {
movdqu(Address(rsp, n*16), as_XMMRegister(n));
}
}
#else
for (int n = 0; n < num_xmm_regs; n++) {
movdqu(Address(rsp, n*16), as_XMMRegister(n));
}
#endif
}
// Preserve registers across runtime call
int incoming_argument_and_return_value_offset = -1;
if (num_fpu_regs_in_use > 1) {
// Must preserve all other FPU regs (could alternatively convert
// SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
// FPU state, but can not trust C compiler)
NEEDS_CLEANUP;
// NOTE that in this case we also push the incoming argument(s) to
// the stack and restore it later; we also use this stack slot to
// hold the return value from dsin, dcos etc.
for (int i = 0; i < num_fpu_regs_in_use; i++) {
subptr(rsp, sizeof(jdouble));
fstp_d(Address(rsp, 0));
}
incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
for (int i = nb_args-1; i >= 0; i--) {
fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
}
}
subptr(rsp, nb_args*sizeof(jdouble));
for (int i = 0; i < nb_args; i++) {
fstp_d(Address(rsp, i*sizeof(jdouble)));
}
#ifdef _LP64
if (nb_args > 0) {
movdbl(xmm0, Address(rsp, 0));
}
if (nb_args > 1) {
movdbl(xmm1, Address(rsp, sizeof(jdouble)));
}
assert(nb_args <= 2, "unsupported number of args");
#endif // _LP64
// NOTE: we must not use call_VM_leaf here because that requires a
// complete interpreter frame in debug mode -- same bug as 4387334
// MacroAssembler::call_VM_leaf_base is perfectly safe and will
// do proper 64bit abi
NEEDS_CLEANUP;
// Need to add stack banging before this runtime call if it needs to
// be taken; however, there is no generic stack banging routine at
// the MacroAssembler level
MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
#ifdef _LP64
movsd(Address(rsp, 0), xmm0);
fld_d(Address(rsp, 0));
#endif // _LP64
addptr(rsp, sizeof(jdouble)*nb_args);
if (num_fpu_regs_in_use > 1) {
// Must save return value to stack and then restore entire FPU
// stack except incoming arguments
fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
fld_d(Address(rsp, 0));
addptr(rsp, sizeof(jdouble));
}
fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
addptr(rsp, sizeof(jdouble)*nb_args);
}
if (UseSSE == 1) {
for (int n = 0; n < 8; n++) {
movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
}
addptr(rsp, sizeof(jdouble)*8);
} else if (UseSSE >= 2) {
// Restore whole 128bit (16 bytes) XMM registers
#ifdef _LP64
if (VM_Version::supports_evex()) {
for (int n = 0; n < num_xmm_regs; n++) {
vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
}
} else {
for (int n = 0; n < num_xmm_regs; n++) {
movdqu(as_XMMRegister(n), Address(rsp, n*16));
}
}
#else
for (int n = 0; n < num_xmm_regs; n++) {
movdqu(as_XMMRegister(n), Address(rsp, n*16));
}
#endif
addptr(rsp, 16*num_xmm_regs);
#ifdef COMPILER2
if (MaxVectorSize > 16) {
// Restore upper half of YMM registers.
for (int n = 0; n < num_xmm_regs; n++) {
vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
}
addptr(rsp, 16*num_xmm_regs);
if(UseAVX > 2) {
for (int n = 0; n < num_xmm_regs; n++) {
vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
}
addptr(rsp, 32*num_xmm_regs);
}
}
#endif
}
popa();
}
static const double pi_4 = 0.7853981633974483;
void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
// A hand-coded argument reduction for values in fabs(pi/4, pi/2)
// was attempted in this code; unfortunately it appears that the
// switch to 80-bit precision and back causes this to be
// unprofitable compared with simply performing a runtime call if
// the argument is out of the (-pi/4, pi/4) range.
Register tmp = noreg;
if (!VM_Version::supports_cmov()) {
// fcmp needs a temporary so preserve rbx,
tmp = rbx;
push(tmp);
}
Label slow_case, done;
if (trig == 't') {
ExternalAddress pi4_adr = (address)&pi_4;
if (reachable(pi4_adr)) {
// x ?<= pi/4
fld_d(pi4_adr);
fld_s(1); // Stack: X PI/4 X
fabs(); // Stack: |X| PI/4 X
fcmp(tmp);
jcc(Assembler::above, slow_case);
// fastest case: -pi/4 <= x <= pi/4
ftan();
jmp(done);
}
}
// slow case: runtime call
bind(slow_case);
switch(trig) {
case 's':
{
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
}
break;
case 'c':
{
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
}
break;
case 't':
{
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
}
break;
default:
assert(false, "bad intrinsic");
break;
}
// Come here with result in F-TOS
bind(done);
if (tmp != noreg) {
pop(tmp);
}
}
// Look up the method for a megamorphic invokeinterface call.
// The target method is determined by <intf_klass, itable_index>.
// The receiver klass is in recv_klass.