mirror of
https://github.com/openjdk/jdk.git
synced 2025-09-20 11:04:34 +02:00
8154473: Update for CompilerDirectives to control stub generation and intrinsics
Use -XX:DisableIntrinsic flag to control intrinsics usage in Interpreter. Reviewed-by: twisti, neliasso, kvn, simonis
This commit is contained in:
parent
6dfcf1dd81
commit
489b38091e
9 changed files with 144 additions and 298 deletions
|
@ -2584,6 +2584,11 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in
|
|||
|
||||
}
|
||||
|
||||
// Use this method when MacroAssembler version of call_VM_leaf_base() should be called from Interpreter.
|
||||
void MacroAssembler::call_VM_leaf0(address entry_point) {
|
||||
MacroAssembler::call_VM_leaf_base(entry_point, 0);
|
||||
}
|
||||
|
||||
void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
|
||||
call_VM_leaf_base(entry_point, number_of_arguments);
|
||||
}
|
||||
|
@ -5629,235 +5634,6 @@ void MacroAssembler::incr_allocated_bytes(Register thread,
|
|||
#endif
|
||||
}
|
||||
|
||||
void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
|
||||
pusha();
|
||||
|
||||
// if we are coming from c1, xmm registers may be live
|
||||
int num_xmm_regs = LP64_ONLY(16) NOT_LP64(8);
|
||||
if (UseAVX > 2) {
|
||||
num_xmm_regs = LP64_ONLY(32) NOT_LP64(8);
|
||||
}
|
||||
|
||||
if (UseSSE == 1) {
|
||||
subptr(rsp, sizeof(jdouble)*8);
|
||||
for (int n = 0; n < 8; n++) {
|
||||
movflt(Address(rsp, n*sizeof(jdouble)), as_XMMRegister(n));
|
||||
}
|
||||
} else if (UseSSE >= 2) {
|
||||
if (UseAVX > 2) {
|
||||
push(rbx);
|
||||
movl(rbx, 0xffff);
|
||||
kmovwl(k1, rbx);
|
||||
pop(rbx);
|
||||
}
|
||||
#ifdef COMPILER2
|
||||
if (MaxVectorSize > 16) {
|
||||
if(UseAVX > 2) {
|
||||
// Save upper half of ZMM registers
|
||||
subptr(rsp, 32*num_xmm_regs);
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
|
||||
}
|
||||
}
|
||||
assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
|
||||
// Save upper half of YMM registers
|
||||
subptr(rsp, 16*num_xmm_regs);
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// Save whole 128bit (16 bytes) XMM registers
|
||||
subptr(rsp, 16*num_xmm_regs);
|
||||
#ifdef _LP64
|
||||
if (VM_Version::supports_evex()) {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
|
||||
}
|
||||
} else {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
movdqu(Address(rsp, n*16), as_XMMRegister(n));
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
movdqu(Address(rsp, n*16), as_XMMRegister(n));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Preserve registers across runtime call
|
||||
int incoming_argument_and_return_value_offset = -1;
|
||||
if (num_fpu_regs_in_use > 1) {
|
||||
// Must preserve all other FPU regs (could alternatively convert
|
||||
// SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
|
||||
// FPU state, but can not trust C compiler)
|
||||
NEEDS_CLEANUP;
|
||||
// NOTE that in this case we also push the incoming argument(s) to
|
||||
// the stack and restore it later; we also use this stack slot to
|
||||
// hold the return value from dsin, dcos etc.
|
||||
for (int i = 0; i < num_fpu_regs_in_use; i++) {
|
||||
subptr(rsp, sizeof(jdouble));
|
||||
fstp_d(Address(rsp, 0));
|
||||
}
|
||||
incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
|
||||
for (int i = nb_args-1; i >= 0; i--) {
|
||||
fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
|
||||
}
|
||||
}
|
||||
|
||||
subptr(rsp, nb_args*sizeof(jdouble));
|
||||
for (int i = 0; i < nb_args; i++) {
|
||||
fstp_d(Address(rsp, i*sizeof(jdouble)));
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
if (nb_args > 0) {
|
||||
movdbl(xmm0, Address(rsp, 0));
|
||||
}
|
||||
if (nb_args > 1) {
|
||||
movdbl(xmm1, Address(rsp, sizeof(jdouble)));
|
||||
}
|
||||
assert(nb_args <= 2, "unsupported number of args");
|
||||
#endif // _LP64
|
||||
|
||||
// NOTE: we must not use call_VM_leaf here because that requires a
|
||||
// complete interpreter frame in debug mode -- same bug as 4387334
|
||||
// MacroAssembler::call_VM_leaf_base is perfectly safe and will
|
||||
// do proper 64bit abi
|
||||
|
||||
NEEDS_CLEANUP;
|
||||
// Need to add stack banging before this runtime call if it needs to
|
||||
// be taken; however, there is no generic stack banging routine at
|
||||
// the MacroAssembler level
|
||||
|
||||
MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
|
||||
|
||||
#ifdef _LP64
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
fld_d(Address(rsp, 0));
|
||||
#endif // _LP64
|
||||
addptr(rsp, sizeof(jdouble)*nb_args);
|
||||
if (num_fpu_regs_in_use > 1) {
|
||||
// Must save return value to stack and then restore entire FPU
|
||||
// stack except incoming arguments
|
||||
fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
|
||||
for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
|
||||
fld_d(Address(rsp, 0));
|
||||
addptr(rsp, sizeof(jdouble));
|
||||
}
|
||||
fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
|
||||
addptr(rsp, sizeof(jdouble)*nb_args);
|
||||
}
|
||||
|
||||
if (UseSSE == 1) {
|
||||
for (int n = 0; n < 8; n++) {
|
||||
movflt(as_XMMRegister(n), Address(rsp, n*sizeof(jdouble)));
|
||||
}
|
||||
addptr(rsp, sizeof(jdouble)*8);
|
||||
} else if (UseSSE >= 2) {
|
||||
// Restore whole 128bit (16 bytes) XMM registers
|
||||
#ifdef _LP64
|
||||
if (VM_Version::supports_evex()) {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
|
||||
}
|
||||
} else {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
movdqu(as_XMMRegister(n), Address(rsp, n*16));
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
movdqu(as_XMMRegister(n), Address(rsp, n*16));
|
||||
}
|
||||
#endif
|
||||
addptr(rsp, 16*num_xmm_regs);
|
||||
|
||||
#ifdef COMPILER2
|
||||
if (MaxVectorSize > 16) {
|
||||
// Restore upper half of YMM registers.
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
|
||||
}
|
||||
addptr(rsp, 16*num_xmm_regs);
|
||||
if(UseAVX > 2) {
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
|
||||
}
|
||||
addptr(rsp, 32*num_xmm_regs);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
popa();
|
||||
}
|
||||
|
||||
static const double pi_4 = 0.7853981633974483;
|
||||
|
||||
void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
|
||||
// A hand-coded argument reduction for values in fabs(pi/4, pi/2)
|
||||
// was attempted in this code; unfortunately it appears that the
|
||||
// switch to 80-bit precision and back causes this to be
|
||||
// unprofitable compared with simply performing a runtime call if
|
||||
// the argument is out of the (-pi/4, pi/4) range.
|
||||
|
||||
Register tmp = noreg;
|
||||
if (!VM_Version::supports_cmov()) {
|
||||
// fcmp needs a temporary so preserve rbx,
|
||||
tmp = rbx;
|
||||
push(tmp);
|
||||
}
|
||||
|
||||
Label slow_case, done;
|
||||
if (trig == 't') {
|
||||
ExternalAddress pi4_adr = (address)&pi_4;
|
||||
if (reachable(pi4_adr)) {
|
||||
// x ?<= pi/4
|
||||
fld_d(pi4_adr);
|
||||
fld_s(1); // Stack: X PI/4 X
|
||||
fabs(); // Stack: |X| PI/4 X
|
||||
fcmp(tmp);
|
||||
jcc(Assembler::above, slow_case);
|
||||
|
||||
// fastest case: -pi/4 <= x <= pi/4
|
||||
ftan();
|
||||
|
||||
jmp(done);
|
||||
}
|
||||
}
|
||||
// slow case: runtime call
|
||||
bind(slow_case);
|
||||
|
||||
switch(trig) {
|
||||
case 's':
|
||||
{
|
||||
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
{
|
||||
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
{
|
||||
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false, "bad intrinsic");
|
||||
break;
|
||||
}
|
||||
|
||||
// Come here with result in F-TOS
|
||||
bind(done);
|
||||
|
||||
if (tmp != noreg) {
|
||||
pop(tmp);
|
||||
}
|
||||
}
|
||||
|
||||
// Look up the method for a megamorphic invokeinterface call.
|
||||
// The target method is determined by <intf_klass, itable_index>.
|
||||
// The receiver klass is in recv_klass.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue