mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-26 14:24:46 +02:00
7133857: exp() and pow() should use the x87 ISA on x86
Use x87 instructions to implement exp() and pow() in interpreter/c1/c2. Reviewed-by: kvn, never, twisti
This commit is contained in:
parent
eb4a860bc3
commit
b305cf722e
26 changed files with 783 additions and 279 deletions
|
@ -3578,6 +3578,21 @@ void Assembler::fyl2x() {
|
|||
emit_byte(0xF1);
|
||||
}
|
||||
|
||||
void Assembler::frndint() {
|
||||
emit_byte(0xD9);
|
||||
emit_byte(0xFC);
|
||||
}
|
||||
|
||||
void Assembler::f2xm1() {
|
||||
emit_byte(0xD9);
|
||||
emit_byte(0xF0);
|
||||
}
|
||||
|
||||
void Assembler::fldl2e() {
|
||||
emit_byte(0xD9);
|
||||
emit_byte(0xEA);
|
||||
}
|
||||
|
||||
// SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding.
|
||||
static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 };
|
||||
// SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding.
|
||||
|
@ -6868,6 +6883,242 @@ void MacroAssembler::fldcw(AddressLiteral src) {
|
|||
Assembler::fldcw(as_Address(src));
|
||||
}
|
||||
|
||||
void MacroAssembler::pow_exp_core_encoding() {
|
||||
// kills rax, rcx, rdx
|
||||
subptr(rsp,sizeof(jdouble));
|
||||
// computes 2^X. Stack: X ...
|
||||
// f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and
|
||||
// keep it on the thread's stack to compute 2^int(X) later
|
||||
// then compute 2^(X-int(X)) as (2^(X-int(X)-1+1)
|
||||
// final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X))
|
||||
fld_s(0); // Stack: X X ...
|
||||
frndint(); // Stack: int(X) X ...
|
||||
fsuba(1); // Stack: int(X) X-int(X) ...
|
||||
fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ...
|
||||
f2xm1(); // Stack: 2^(X-int(X))-1 ...
|
||||
fld1(); // Stack: 1 2^(X-int(X))-1 ...
|
||||
faddp(1); // Stack: 2^(X-int(X))
|
||||
// computes 2^(int(X)): add exponent bias (1023) to int(X), then
|
||||
// shift int(X)+1023 to exponent position.
|
||||
// Exponent is limited to 11 bits if int(X)+1023 does not fit in 11
|
||||
// bits, set result to NaN. 0x000 and 0x7FF are reserved exponent
|
||||
// values so detect them and set result to NaN.
|
||||
movl(rax,Address(rsp,0));
|
||||
movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding
|
||||
addl(rax, 1023);
|
||||
movl(rdx,rax);
|
||||
shll(rax,20);
|
||||
// Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN.
|
||||
addl(rdx,1);
|
||||
// Check that 1 < int(X)+1023+1 < 2048
|
||||
// in 3 steps:
|
||||
// 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048
|
||||
// 2- (int(X)+1023+1)&-2048 != 0
|
||||
// 3- (int(X)+1023+1)&-2048 != 1
|
||||
// Do 2- first because addl just updated the flags.
|
||||
cmov32(Assembler::equal,rax,rcx);
|
||||
cmpl(rdx,1);
|
||||
cmov32(Assembler::equal,rax,rcx);
|
||||
testl(rdx,rcx);
|
||||
cmov32(Assembler::notEqual,rax,rcx);
|
||||
movl(Address(rsp,4),rax);
|
||||
movl(Address(rsp,0),0);
|
||||
fmul_d(Address(rsp,0)); // Stack: 2^X ...
|
||||
addptr(rsp,sizeof(jdouble));
|
||||
}
|
||||
|
||||
void MacroAssembler::fast_pow() {
|
||||
// computes X^Y = 2^(Y * log2(X))
|
||||
// if fast computation is not possible, result is NaN. Requires
|
||||
// fallback from user of this macro.
|
||||
fyl2x(); // Stack: (Y*log2(X)) ...
|
||||
pow_exp_core_encoding(); // Stack: exp(X) ...
|
||||
}
|
||||
|
||||
void MacroAssembler::fast_exp() {
|
||||
// computes exp(X) = 2^(X * log2(e))
|
||||
// if fast computation is not possible, result is NaN. Requires
|
||||
// fallback from user of this macro.
|
||||
fldl2e(); // Stack: log2(e) X ...
|
||||
fmulp(1); // Stack: (X*log2(e)) ...
|
||||
pow_exp_core_encoding(); // Stack: exp(X) ...
|
||||
}
|
||||
|
||||
void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
|
||||
// kills rax, rcx, rdx
|
||||
// pow and exp needs 2 extra registers on the fpu stack.
|
||||
Label slow_case, done;
|
||||
Register tmp = noreg;
|
||||
if (!VM_Version::supports_cmov()) {
|
||||
// fcmp needs a temporary so preserve rdx,
|
||||
tmp = rdx;
|
||||
}
|
||||
Register tmp2 = rax;
|
||||
NOT_LP64(Register tmp3 = rcx;)
|
||||
|
||||
if (is_exp) {
|
||||
// Stack: X
|
||||
fld_s(0); // duplicate argument for runtime call. Stack: X X
|
||||
fast_exp(); // Stack: exp(X) X
|
||||
fcmp(tmp, 0, false, false); // Stack: exp(X) X
|
||||
// exp(X) not equal to itself: exp(X) is NaN go to slow case.
|
||||
jcc(Assembler::parity, slow_case);
|
||||
// get rid of duplicate argument. Stack: exp(X)
|
||||
if (num_fpu_regs_in_use > 0) {
|
||||
fxch();
|
||||
fpop();
|
||||
} else {
|
||||
ffree(1);
|
||||
}
|
||||
jmp(done);
|
||||
} else {
|
||||
// Stack: X Y
|
||||
Label x_negative, y_odd;
|
||||
|
||||
fldz(); // Stack: 0 X Y
|
||||
fcmp(tmp, 1, true, false); // Stack: X Y
|
||||
jcc(Assembler::above, x_negative);
|
||||
|
||||
// X >= 0
|
||||
|
||||
fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
|
||||
fld_s(1); // Stack: X Y X Y
|
||||
fast_pow(); // Stack: X^Y X Y
|
||||
fcmp(tmp, 0, false, false); // Stack: X^Y X Y
|
||||
// X^Y not equal to itself: X^Y is NaN go to slow case.
|
||||
jcc(Assembler::parity, slow_case);
|
||||
// get rid of duplicate arguments. Stack: X^Y
|
||||
if (num_fpu_regs_in_use > 0) {
|
||||
fxch(); fpop();
|
||||
fxch(); fpop();
|
||||
} else {
|
||||
ffree(2);
|
||||
ffree(1);
|
||||
}
|
||||
jmp(done);
|
||||
|
||||
// X <= 0
|
||||
bind(x_negative);
|
||||
|
||||
fld_s(1); // Stack: Y X Y
|
||||
frndint(); // Stack: int(Y) X Y
|
||||
fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
|
||||
jcc(Assembler::notEqual, slow_case);
|
||||
|
||||
subptr(rsp, 8);
|
||||
|
||||
// For X^Y, when X < 0, Y has to be an integer and the final
|
||||
// result depends on whether it's odd or even. We just checked
|
||||
// that int(Y) == Y. We move int(Y) to gp registers as a 64 bit
|
||||
// integer to test its parity. If int(Y) is huge and doesn't fit
|
||||
// in the 64 bit integer range, the integer indefinite value will
|
||||
// end up in the gp registers. Huge numbers are all even, the
|
||||
// integer indefinite number is even so it's fine.
|
||||
|
||||
#ifdef ASSERT
|
||||
// Let's check we don't end up with an integer indefinite number
|
||||
// when not expected. First test for huge numbers: check whether
|
||||
// int(Y)+1 == int(Y) which is true for very large numbers and
|
||||
// those are all even. A 64 bit integer is guaranteed to not
|
||||
// overflow for numbers where y+1 != y (when precision is set to
|
||||
// double precision).
|
||||
Label y_not_huge;
|
||||
|
||||
fld1(); // Stack: 1 int(Y) X Y
|
||||
fadd(1); // Stack: 1+int(Y) int(Y) X Y
|
||||
|
||||
#ifdef _LP64
|
||||
// trip to memory to force the precision down from double extended
|
||||
// precision
|
||||
fstp_d(Address(rsp, 0));
|
||||
fld_d(Address(rsp, 0));
|
||||
#endif
|
||||
|
||||
fcmp(tmp, 1, true, false); // Stack: int(Y) X Y
|
||||
#endif
|
||||
|
||||
// move int(Y) as 64 bit integer to thread's stack
|
||||
fistp_d(Address(rsp,0)); // Stack: X Y
|
||||
|
||||
#ifdef ASSERT
|
||||
jcc(Assembler::notEqual, y_not_huge);
|
||||
|
||||
// Y is huge so we know it's even. It may not fit in a 64 bit
|
||||
// integer and we don't want the debug code below to see the
|
||||
// integer indefinite value so overwrite int(Y) on the thread's
|
||||
// stack with 0.
|
||||
movl(Address(rsp, 0), 0);
|
||||
movl(Address(rsp, 4), 0);
|
||||
|
||||
bind(y_not_huge);
|
||||
#endif
|
||||
|
||||
fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y
|
||||
fld_s(1); // Stack: X Y X Y
|
||||
fabs(); // Stack: abs(X) Y X Y
|
||||
fast_pow(); // Stack: abs(X)^Y X Y
|
||||
fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
|
||||
// abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
|
||||
|
||||
pop(tmp2);
|
||||
NOT_LP64(pop(tmp3));
|
||||
jcc(Assembler::parity, slow_case);
|
||||
|
||||
#ifdef ASSERT
|
||||
// Check that int(Y) is not integer indefinite value (int
|
||||
// overflow). Shouldn't happen because for values that would
|
||||
// overflow, 1+int(Y)==Y which was tested earlier.
|
||||
#ifndef _LP64
|
||||
{
|
||||
Label integer;
|
||||
testl(tmp2, tmp2);
|
||||
jcc(Assembler::notZero, integer);
|
||||
cmpl(tmp3, 0x80000000);
|
||||
jcc(Assembler::notZero, integer);
|
||||
stop("integer indefinite value shouldn't be seen here");
|
||||
bind(integer);
|
||||
}
|
||||
#else
|
||||
{
|
||||
Label integer;
|
||||
shlq(tmp2, 1);
|
||||
jcc(Assembler::carryClear, integer);
|
||||
jcc(Assembler::notZero, integer);
|
||||
stop("integer indefinite value shouldn't be seen here");
|
||||
bind(integer);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// get rid of duplicate arguments. Stack: X^Y
|
||||
if (num_fpu_regs_in_use > 0) {
|
||||
fxch(); fpop();
|
||||
fxch(); fpop();
|
||||
} else {
|
||||
ffree(2);
|
||||
ffree(1);
|
||||
}
|
||||
|
||||
testl(tmp2, 1);
|
||||
jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
|
||||
// X <= 0, Y even: X^Y = -abs(X)^Y
|
||||
|
||||
fchs(); // Stack: -abs(X)^Y Y
|
||||
jmp(done);
|
||||
}
|
||||
|
||||
// slow case: runtime call
|
||||
bind(slow_case);
|
||||
|
||||
fpop(); // pop incorrect result or int(Y)
|
||||
|
||||
fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
|
||||
is_exp ? 1 : 2, num_fpu_regs_in_use);
|
||||
|
||||
// Come here with result in F-TOS
|
||||
bind(done);
|
||||
}
|
||||
|
||||
void MacroAssembler::fpop() {
|
||||
ffree();
|
||||
fincstp();
|
||||
|
@ -8045,6 +8296,144 @@ void MacroAssembler::incr_allocated_bytes(Register thread,
|
|||
#endif
|
||||
}
|
||||
|
||||
void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) {
|
||||
pusha();
|
||||
|
||||
// if we are coming from c1, xmm registers may be live
|
||||
if (UseSSE >= 1) {
|
||||
subptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
|
||||
}
|
||||
int off = 0;
|
||||
if (UseSSE == 1) {
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm0);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm1);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm2);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm3);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm4);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm5);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm6);
|
||||
movflt(Address(rsp,off++*sizeof(jdouble)),xmm7);
|
||||
} else if (UseSSE >= 2) {
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm0);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm1);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm2);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm3);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm4);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm5);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm6);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm7);
|
||||
#ifdef _LP64
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm8);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm9);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm10);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm11);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm12);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm13);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm14);
|
||||
movdbl(Address(rsp,off++*sizeof(jdouble)),xmm15);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Preserve registers across runtime call
|
||||
int incoming_argument_and_return_value_offset = -1;
|
||||
if (num_fpu_regs_in_use > 1) {
|
||||
// Must preserve all other FPU regs (could alternatively convert
|
||||
// SharedRuntime::dsin, dcos etc. into assembly routines known not to trash
|
||||
// FPU state, but can not trust C compiler)
|
||||
NEEDS_CLEANUP;
|
||||
// NOTE that in this case we also push the incoming argument(s) to
|
||||
// the stack and restore it later; we also use this stack slot to
|
||||
// hold the return value from dsin, dcos etc.
|
||||
for (int i = 0; i < num_fpu_regs_in_use; i++) {
|
||||
subptr(rsp, sizeof(jdouble));
|
||||
fstp_d(Address(rsp, 0));
|
||||
}
|
||||
incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
|
||||
for (int i = nb_args-1; i >= 0; i--) {
|
||||
fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble)));
|
||||
}
|
||||
}
|
||||
|
||||
subptr(rsp, nb_args*sizeof(jdouble));
|
||||
for (int i = 0; i < nb_args; i++) {
|
||||
fstp_d(Address(rsp, i*sizeof(jdouble)));
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
if (nb_args > 0) {
|
||||
movdbl(xmm0, Address(rsp, 0));
|
||||
}
|
||||
if (nb_args > 1) {
|
||||
movdbl(xmm1, Address(rsp, sizeof(jdouble)));
|
||||
}
|
||||
assert(nb_args <= 2, "unsupported number of args");
|
||||
#endif // _LP64
|
||||
|
||||
// NOTE: we must not use call_VM_leaf here because that requires a
|
||||
// complete interpreter frame in debug mode -- same bug as 4387334
|
||||
// MacroAssembler::call_VM_leaf_base is perfectly safe and will
|
||||
// do proper 64bit abi
|
||||
|
||||
NEEDS_CLEANUP;
|
||||
// Need to add stack banging before this runtime call if it needs to
|
||||
// be taken; however, there is no generic stack banging routine at
|
||||
// the MacroAssembler level
|
||||
|
||||
MacroAssembler::call_VM_leaf_base(runtime_entry, 0);
|
||||
|
||||
#ifdef _LP64
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
fld_d(Address(rsp, 0));
|
||||
#endif // _LP64
|
||||
addptr(rsp, sizeof(jdouble) * nb_args);
|
||||
if (num_fpu_regs_in_use > 1) {
|
||||
// Must save return value to stack and then restore entire FPU
|
||||
// stack except incoming arguments
|
||||
fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
|
||||
for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) {
|
||||
fld_d(Address(rsp, 0));
|
||||
addptr(rsp, sizeof(jdouble));
|
||||
}
|
||||
fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble)));
|
||||
addptr(rsp, sizeof(jdouble) * nb_args);
|
||||
}
|
||||
|
||||
off = 0;
|
||||
if (UseSSE == 1) {
|
||||
movflt(xmm0, Address(rsp,off++*sizeof(jdouble)));
|
||||
movflt(xmm1, Address(rsp,off++*sizeof(jdouble)));
|
||||
movflt(xmm2, Address(rsp,off++*sizeof(jdouble)));
|
||||
movflt(xmm3, Address(rsp,off++*sizeof(jdouble)));
|
||||
movflt(xmm4, Address(rsp,off++*sizeof(jdouble)));
|
||||
movflt(xmm5, Address(rsp,off++*sizeof(jdouble)));
|
||||
movflt(xmm6, Address(rsp,off++*sizeof(jdouble)));
|
||||
movflt(xmm7, Address(rsp,off++*sizeof(jdouble)));
|
||||
} else if (UseSSE >= 2) {
|
||||
movdbl(xmm0, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm1, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm2, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm3, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm4, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm5, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm6, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm7, Address(rsp,off++*sizeof(jdouble)));
|
||||
#ifdef _LP64
|
||||
movdbl(xmm8, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm9, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm10, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm11, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm12, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm13, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm14, Address(rsp,off++*sizeof(jdouble)));
|
||||
movdbl(xmm15, Address(rsp,off++*sizeof(jdouble)));
|
||||
#endif
|
||||
}
|
||||
if (UseSSE >= 1) {
|
||||
addptr(rsp, sizeof(jdouble)* LP64_ONLY(16) NOT_LP64(8));
|
||||
}
|
||||
popa();
|
||||
}
|
||||
|
||||
static const double pi_4 = 0.7853981633974483;
|
||||
|
||||
void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
|
||||
|
@ -8092,73 +8481,27 @@ void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
|
|||
|
||||
// slow case: runtime call
|
||||
bind(slow_case);
|
||||
// Preserve registers across runtime call
|
||||
pusha();
|
||||
int incoming_argument_and_return_value_offset = -1;
|
||||
if (num_fpu_regs_in_use > 1) {
|
||||
// Must preserve all other FPU regs (could alternatively convert
|
||||
// SharedRuntime::dsin and dcos into assembly routines known not to trash
|
||||
// FPU state, but can not trust C compiler)
|
||||
NEEDS_CLEANUP;
|
||||
// NOTE that in this case we also push the incoming argument to
|
||||
// the stack and restore it later; we also use this stack slot to
|
||||
// hold the return value from dsin or dcos.
|
||||
for (int i = 0; i < num_fpu_regs_in_use; i++) {
|
||||
subptr(rsp, sizeof(jdouble));
|
||||
fstp_d(Address(rsp, 0));
|
||||
}
|
||||
incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1);
|
||||
fld_d(Address(rsp, incoming_argument_and_return_value_offset));
|
||||
}
|
||||
subptr(rsp, sizeof(jdouble));
|
||||
fstp_d(Address(rsp, 0));
|
||||
#ifdef _LP64
|
||||
movdbl(xmm0, Address(rsp, 0));
|
||||
#endif // _LP64
|
||||
|
||||
// NOTE: we must not use call_VM_leaf here because that requires a
|
||||
// complete interpreter frame in debug mode -- same bug as 4387334
|
||||
// MacroAssembler::call_VM_leaf_base is perfectly safe and will
|
||||
// do proper 64bit abi
|
||||
|
||||
NEEDS_CLEANUP;
|
||||
// Need to add stack banging before this runtime call if it needs to
|
||||
// be taken; however, there is no generic stack banging routine at
|
||||
// the MacroAssembler level
|
||||
switch(trig) {
|
||||
case 's':
|
||||
{
|
||||
MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 0);
|
||||
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use);
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
{
|
||||
MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 0);
|
||||
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use);
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
{
|
||||
MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 0);
|
||||
fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert(false, "bad intrinsic");
|
||||
break;
|
||||
}
|
||||
#ifdef _LP64
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
fld_d(Address(rsp, 0));
|
||||
#endif // _LP64
|
||||
addptr(rsp, sizeof(jdouble));
|
||||
if (num_fpu_regs_in_use > 1) {
|
||||
// Must save return value to stack and then restore entire FPU stack
|
||||
fstp_d(Address(rsp, incoming_argument_and_return_value_offset));
|
||||
for (int i = 0; i < num_fpu_regs_in_use; i++) {
|
||||
fld_d(Address(rsp, 0));
|
||||
addptr(rsp, sizeof(jdouble));
|
||||
}
|
||||
}
|
||||
popa();
|
||||
|
||||
// Come here with result in F-TOS
|
||||
bind(done);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue