This commit is contained in:
Bharadwaj Yadavalli 2016-04-12 14:17:42 -04:00
commit b2ead5ac2d
142 changed files with 12761 additions and 10109 deletions

View file

@ -83,6 +83,21 @@ suite = {
"workingSets" : "API,JVMCI",
},
"jdk.vm.ci.code.test" : {
"subDir" : "test/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:JUNIT",
"jdk.vm.ci.amd64",
"jdk.vm.ci.sparc",
"jdk.vm.ci.code",
"jdk.vm.ci.hotspot",
],
"checkstyle" : "jdk.vm.ci.services",
"javaCompliance" : "1.8",
"workingSets" : "API,JVMCI",
},
"jdk.vm.ci.runtime" : {
"subDir" : "src/jdk.vm.ci/share/classes",
"sourceDirs" : ["src"],
@ -164,7 +179,7 @@ suite = {
"subDir" : "test/compiler/jvmci",
"sourceDirs" : ["src"],
"dependencies" : [
"mx:TESTNG",
"TESTNG",
"jdk.vm.ci.hotspot",
],
"checkstyle" : "jdk.vm.ci.services",

View file

@ -4190,55 +4190,6 @@ encode %{
}
%}
enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
MacroAssembler _masm(&cbuf);
Register cnt_reg = as_Register($cnt$$reg);
Register base_reg = as_Register($base$$reg);
// base is word aligned
// cnt is count of words
Label loop;
Label entry;
// Algorithm:
//
// scratch1 = cnt & 7;
// cnt -= scratch1;
// p += scratch1;
// switch (scratch1) {
// do {
// cnt -= 8;
// p[-8] = 0;
// case 7:
// p[-7] = 0;
// case 6:
// p[-6] = 0;
// // ...
// case 1:
// p[-1] = 0;
// case 0:
// p += 8;
// } while (cnt);
// }
const int unroll = 8; // Number of str(zr) instructions we'll unroll
__ andr(rscratch1, cnt_reg, unroll - 1); // tmp1 = cnt % unroll
__ sub(cnt_reg, cnt_reg, rscratch1); // cnt -= unroll
// base_reg always points to the end of the region we're about to zero
__ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
__ adr(rscratch2, entry);
__ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
__ br(rscratch2);
__ bind(loop);
__ sub(cnt_reg, cnt_reg, unroll);
for (int i = -unroll; i < 0; i++)
__ str(zr, Address(base_reg, i * wordSize));
__ bind(entry);
__ add(base_reg, base_reg, unroll * wordSize);
__ cbnz(cnt_reg, loop);
%}
/// mov envcodings
enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
@ -13363,7 +13314,9 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
ins_cost(4 * INSN_COST);
format %{ "ClearArray $cnt, $base" %}
ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
ins_encode %{
__ zero_words($base$$Register, $cnt$$Register);
%}
ins_pipe(pipe_class_memory);
%}

View file

@ -2942,6 +2942,10 @@ void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); }
void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); }
void LIR_Assembler::on_spin_wait() {
Unimplemented();
}
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
__ mov(result_reg->as_register(), rthread);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -1127,7 +1127,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View file

@ -4670,6 +4670,61 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
}
// base: Address of a buffer to be zeroed, 8 bytes aligned.
// cnt: Count in 8-byte unit.
void MacroAssembler::zero_words(Register base, Register cnt)
{
fill_words(base, cnt, zr);
}
// base: Address of a buffer to be filled, 8 bytes aligned.
// cnt: Count in 8-byte unit.
// value: Value to be filled with.
// base will point to the end of the buffer after filling.
void MacroAssembler::fill_words(Register base, Register cnt, Register value)
{
// Algorithm:
//
// scratch1 = cnt & 7;
// cnt -= scratch1;
// p += scratch1;
// switch (scratch1) {
// do {
// cnt -= 8;
// p[-8] = v;
// case 7:
// p[-7] = v;
// case 6:
// p[-6] = v;
// // ...
// case 1:
// p[-1] = v;
// case 0:
// p += 8;
// } while (cnt);
// }
assert_different_registers(base, cnt, value, rscratch1, rscratch2);
Label entry, loop;
const int unroll = 8; // Number of str instructions we'll unroll
andr(rscratch1, cnt, unroll - 1); // tmp1 = cnt % unroll
cbz(rscratch1, entry);
sub(cnt, cnt, rscratch1); // cnt -= tmp1
// base always points to the end of the region we're about to fill
add(base, base, rscratch1, Assembler::LSL, 3);
adr(rscratch2, entry);
sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
br(rscratch2);
bind(loop);
add(base, base, unroll * 8);
sub(cnt, cnt, unroll);
for (int i = -unroll; i < 0; i++)
str(value, Address(base, i * 8));
bind(entry);
cbnz(cnt, loop);
}
// encode char[] to byte[] in ISO_8859_1
void MacroAssembler::encode_iso_array(Register src, Register dst,

View file

@ -1184,6 +1184,9 @@ public:
Register result, Register cnt1,
int elem_size, bool is_string);
void fill_words(Register base, Register cnt, Register value);
void zero_words(Register base, Register cnt);
void encode_iso_array(Register src, Register dst,
Register len, Register result,
FloatRegister Vtmp1, FloatRegister Vtmp2,

View file

@ -2022,6 +2022,136 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
//
// Generate stub for array fill. If "aligned" is true, the
// "to" address is assumed to be heapword aligned.
//
// Arguments for generated stub:
// to: c_rarg0
// value: c_rarg1
// count: c_rarg2 treated as signed
//
address generate_fill(BasicType t, bool aligned, const char *name) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
BLOCK_COMMENT("Entry:");
const Register to = c_rarg0; // source array address
const Register value = c_rarg1; // value
const Register count = c_rarg2; // elements count
const Register cnt_words = c_rarg3; // temp register
__ enter();
Label L_fill_elements, L_exit1;
int shift = -1;
switch (t) {
case T_BYTE:
shift = 0;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ bfi(value, value, 8, 8); // 8 bit -> 16 bit
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
__ br(Assembler::LO, L_fill_elements);
break;
case T_SHORT:
shift = 1;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
__ br(Assembler::LO, L_fill_elements);
break;
case T_INT:
shift = 2;
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
__ br(Assembler::LO, L_fill_elements);
break;
default: ShouldNotReachHere();
}
// Align source address at 8 bytes address boundary.
Label L_skip_align1, L_skip_align2, L_skip_align4;
if (!aligned) {
switch (t) {
case T_BYTE:
// One byte misalignment happens only for byte arrays.
__ tbz(to, 0, L_skip_align1);
__ strb(value, Address(__ post(to, 1)));
__ subw(count, count, 1);
__ bind(L_skip_align1);
// Fallthrough
case T_SHORT:
// Two bytes misalignment happens only for byte and short (char) arrays.
__ tbz(to, 1, L_skip_align2);
__ strh(value, Address(__ post(to, 2)));
__ subw(count, count, 2 >> shift);
__ bind(L_skip_align2);
// Fallthrough
case T_INT:
// Align to 8 bytes, we know we are 4 byte aligned to start.
__ tbz(to, 2, L_skip_align4);
__ strw(value, Address(__ post(to, 4)));
__ subw(count, count, 4 >> shift);
__ bind(L_skip_align4);
break;
default: ShouldNotReachHere();
}
}
//
// Fill large chunks
//
__ lsrw(cnt_words, count, 3 - shift); // number of words
__ bfi(value, value, 32, 32); // 32 bit -> 64 bit
__ subw(count, count, cnt_words, Assembler::LSL, 3 - shift);
__ fill_words(to, cnt_words, value);
// Remaining count is less than 8 bytes. Fill it by a single store.
// Note that the total length is no less than 8 bytes.
if (t == T_BYTE || t == T_SHORT) {
Label L_exit1;
__ cbzw(count, L_exit1);
__ add(to, to, count, Assembler::LSL, shift); // points to the end
__ str(value, Address(to, -8)); // overwrite some elements
__ bind(L_exit1);
__ leave();
__ ret(lr);
}
// Handle copies less than 8 bytes.
Label L_fill_2, L_fill_4, L_exit2;
__ bind(L_fill_elements);
switch (t) {
case T_BYTE:
__ tbz(count, 0, L_fill_2);
__ strb(value, Address(__ post(to, 1)));
__ bind(L_fill_2);
__ tbz(count, 1, L_fill_4);
__ strh(value, Address(__ post(to, 2)));
__ bind(L_fill_4);
__ tbz(count, 2, L_exit2);
__ strw(value, Address(to));
break;
case T_SHORT:
__ tbz(count, 0, L_fill_4);
__ strh(value, Address(__ post(to, 2)));
__ bind(L_fill_4);
__ tbz(count, 1, L_exit2);
__ strw(value, Address(to));
break;
case T_INT:
__ cbzw(count, L_exit2);
__ strw(value, Address(to));
break;
default: ShouldNotReachHere();
}
__ bind(L_exit2);
__ leave();
__ ret(lr);
return start;
}
void generate_arraycopy_stubs() {
address entry;
address entry_jbyte_arraycopy;
@ -2125,6 +2255,12 @@ class StubGenerator: public StubCodeGenerator {
entry_jlong_arraycopy,
entry_checkcast_arraycopy);
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
}
void generate_math_stubs() { Unimplemented(); }

View file

@ -2845,6 +2845,9 @@ void LIR_Assembler::membar_storeload() {
__ membar(Assembler::StoreLoad);
}
void LIR_Assembler::on_spin_wait() {
Unimplemented();
}
void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
LIR_Address* addr = addr_opr->as_address_ptr();

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -1055,7 +1055,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View file

@ -3313,6 +3313,9 @@ void LIR_Assembler::membar_storeload() {
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
}
void LIR_Assembler::on_spin_wait() {
Unimplemented();
}
// Pack two sequential registers containing 32 bit values
// into a single 64 bit register.

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -1033,7 +1033,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View file

@ -1827,6 +1827,15 @@ void Assembler::cvttss2sil(Register dst, XMMRegister src) {
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xE6);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::decl(Address dst) {
// Don't use it directly. Use MacroAssembler::decrement() instead.
InstructionMark im(this);
@ -4993,7 +5002,7 @@ void Assembler::paddq(XMMRegister dst, XMMRegister src) {
}
void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
assert(VM_Version::supports_sse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x01);
@ -5001,7 +5010,7 @@ void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
}
void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
assert(VM_Version::supports_sse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x02);

View file

@ -1048,6 +1048,8 @@ private:
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
void cvttpd2dq(XMMRegister dst, XMMRegister src);
// Divide Scalar Double-Precision Floating-Point Values
void divsd(XMMRegister dst, Address src);
void divsd(XMMRegister dst, XMMRegister src);

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -169,18 +169,18 @@ void FpuStackSim::clear() {
intArray* FpuStackSim::write_state() {
intArray* res = new intArray(1 + FrameMap::nof_fpu_regs);
(*res)[0] = stack_size();
res->append(stack_size());
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
(*res)[1 + i] = regs_at(i);
res->append(regs_at(i));
}
return res;
}
void FpuStackSim::read_state(intArray* fpu_stack_state) {
_stack_size = (*fpu_stack_state)[0];
_stack_size = fpu_stack_state->at(0);
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
set_regs_at(i, (*fpu_stack_state)[1 + i]);
set_regs_at(i, fpu_stack_state->at(1 + i));
}
}

View file

@ -2365,13 +2365,8 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
} else if (value->is_double_fpu()) {
assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
switch(code) {
case lir_log10 : __ flog10() ; break;
case lir_abs : __ fabs() ; break;
case lir_sqrt : __ fsqrt(); break;
case lir_tan :
// Should consider not saving rbx, if not necessary
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
break;
default : ShouldNotReachHere();
}
} else {
@ -3886,6 +3881,10 @@ void LIR_Assembler::membar_storeload() {
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
}
void LIR_Assembler::on_spin_wait() {
__ pause ();
}
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
assert(result_reg->is_register(), "check");
#ifdef _LP64

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -812,7 +812,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
x->id() == vmIntrinsics::_dsin) {
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
x->id() == vmIntrinsics::_dlog10) {
do_LibmIntrinsic(x);
return;
}
@ -820,58 +821,17 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
LIRItem value(x->argument_at(0), this);
bool use_fpu = false;
if (UseSSE >= 2) {
switch(x->id()) {
case vmIntrinsics::_dtan:
case vmIntrinsics::_dlog10:
use_fpu = true;
break;
}
} else {
if (UseSSE < 2) {
value.set_destroys_register();
}
value.load_item();
LIR_Opr calc_input = value.result();
LIR_Opr calc_input2 = NULL;
if (x->id() == vmIntrinsics::_dpow) {
LIRItem extra_arg(x->argument_at(1), this);
if (UseSSE < 2) {
extra_arg.set_destroys_register();
}
extra_arg.load_item();
calc_input2 = extra_arg.result();
}
LIR_Opr calc_result = rlock_result(x);
// sin, cos, pow and exp need two free fpu stack slots, so register
// two temporary operands
LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0);
LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1);
if (use_fpu) {
LIR_Opr tmp = FrameMap::fpu0_double_opr;
int tmp_start = 1;
if (calc_input2 != NULL) {
__ move(calc_input2, tmp);
tmp_start = 2;
calc_input2 = tmp;
}
__ move(calc_input, tmp);
calc_input = tmp;
calc_result = tmp;
tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start);
tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1);
}
switch(x->id()) {
case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
case vmIntrinsics::_dsqrt: __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
default: ShouldNotReachHere();
}
@ -912,21 +872,28 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
result_reg = tmp;
switch(x->id()) {
case vmIntrinsics::_dexp:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dexp() != NULL) {
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dlog() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog10:
if (StubRoutines::dlog10() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dpow:
if (VM_Version::supports_sse2()) {
if (StubRoutines::dpow() != NULL) {
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
@ -946,18 +913,44 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != NULL) {
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
default: ShouldNotReachHere();
}
#else
switch (x->id()) {
case vmIntrinsics::_dexp:
if (StubRoutines::dexp() != NULL) {
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog:
if (StubRoutines::dlog() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dlog10:
if (StubRoutines::dlog10() != NULL) {
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dpow:
if (StubRoutines::dpow() != NULL) {
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dsin:
if (StubRoutines::dsin() != NULL) {
@ -973,6 +966,13 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
}
break;
case vmIntrinsics::_dtan:
if (StubRoutines::dtan() != NULL) {
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
} else {
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
}
break;
default: ShouldNotReachHere();
}
#endif // _LP64
@ -1260,7 +1260,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
Values* dims = x->dims();
int i = dims->length();
LIRItemList* items = new LIRItemList(dims->length(), NULL);
LIRItemList* items = new LIRItemList(i, i, NULL);
while (i-- > 0) {
LIRItem* size = new LIRItem(dims->at(i), this);
items->at_put(i, size);

View file

@ -786,58 +786,6 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
break;
}
case lir_log10: {
// log and log10 need one temporary fpu stack slot, so
// there is one temporary registers stored in temp of the
// operation. the stack allocator must guarantee that the stack
// slots are really free, otherwise there might be a stack
// overflow.
assert(right->is_illegal(), "must be");
assert(left->is_fpu_register(), "must be");
assert(res->is_fpu_register(), "must be");
assert(op2->tmp1_opr()->is_fpu_register(), "must be");
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(res, left);
insert_exchange(left);
do_rename(left, res);
new_left = to_fpu_stack_top(res);
new_res = new_left;
op2->set_fpu_stack_size(sim()->stack_size());
assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
break;
}
case lir_tan: {
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary
// registers (stored in right and temp of the operation).
// the stack allocator must guarantee that the stack slots are really free,
// otherwise there might be a stack overflow.
assert(left->is_fpu_register(), "must be");
assert(res->is_fpu_register(), "must be");
// assert(left->is_last_use(), "old value gets destroyed");
assert(right->is_fpu_register(), "right is used as the first temporary register");
assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register");
assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
insert_free_if_dead(right);
insert_free_if_dead(op2->tmp1_opr());
insert_free_if_dead(res, left);
insert_exchange(left);
do_rename(left, res);
new_left = to_fpu_stack_top(res);
new_res = new_left;
op2->set_fpu_stack_size(sim()->stack_size());
assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
break;
}
default: {
assert(false, "missed a fpu-operation");
}

View file

@ -194,9 +194,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
product(bool, UseBMI2Instructions, false, \
"Use BMI2 instructions") \
\
diagnostic(bool, UseLibmSinIntrinsic, true, \
"Use Libm Sin Intrinsic") \
\
diagnostic(bool, UseLibmCosIntrinsic, true, \
"Use Libm Cos Intrinsic")
diagnostic(bool, UseLibmIntrinsic, true, \
"Use Libm Intrinsics")
#endif // CPU_X86_VM_GLOBALS_X86_HPP

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -928,6 +928,10 @@ class MacroAssembler: public Assembler {
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register r11);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
@ -941,11 +945,19 @@ class MacroAssembler: public Assembler {
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1,
Register tmp2, Register tmp3, Register tmp4);
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1,
Register tmp2, Register tmp3, Register tmp4);
#else
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp1);
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp);
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
Register rdx, Register tmp);
@ -964,6 +976,14 @@ class MacroAssembler: public Assembler {
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
Register esi, Register edi, Register ebp, Register esp);
void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
Register edx, Register ebx, Register esi, Register edi,
Register ebp, Register esp);
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
Register rax, Register rcx, Register rdx, Register tmp);
#endif
void increase_precision();

View file

@ -0,0 +1,889 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - COS()
// ---------------------
//
// 1. RANGE REDUCTION
//
// We perform an initial range reduction from X to r with
//
// X =~= N * pi/32 + r
//
// so that |r| <= pi/64 + epsilon. We restrict inputs to those
// where |N| <= 932560. Beyond this, the range reduction is
// insufficiently accurate. For extremely small inputs,
// denormalization can occur internally, impacting performance.
// This means that the main path is actually only taken for
// 2^-252 <= |X| < 90112.
//
// To avoid branches, we perform the range reduction to full
// accuracy each time.
//
// X - N * (P_1 + P_2 + P_3)
//
// where P_1 and P_2 are 32-bit numbers (so multiplication by N
// is exact) and P_3 is a 53-bit number. Together, these
// approximate pi well enough for all cases in the restricted
// range.
//
// The main reduction sequence is:
//
// y = 32/pi * x
// N = integer(y)
// (computed by adding and subtracting off SHIFTER)
//
// m_1 = N * P_1
// m_2 = N * P_2
// r_1 = x - m_1
// r = r_1 - m_2
// (this r can be used for most of the calculation)
//
// c_1 = r_1 - r
// m_3 = N * P_3
// c_2 = c_1 - m_2
// c = c_2 - m_3
//
// 2. MAIN ALGORITHM
//
// The algorithm uses a table lookup based on B = M * pi / 32
// where M = N mod 64. The stored values are:
// sigma closest power of 2 to cos(B)
// C_hl 53-bit cos(B) - sigma
// S_hi + S_lo 2 * 53-bit sin(B)
//
// The computation is organized as follows:
//
// sin(B + r + c) = [sin(B) + sigma * r] +
// r * (cos(B) - sigma) +
// sin(B) * [cos(r + c) - 1] +
// cos(B) * [sin(r + c) - r]
//
// which is approximately:
//
// [S_hi + sigma * r] +
// C_hl * r +
// S_lo + S_hi * [(cos(r) - 1) - r * c] +
// (C_hl + sigma) * [(sin(r) - r) + c]
//
// and this is what is actually computed. We separate this sum
// into four parts:
//
// hi + med + pols + corr
//
// where
//
// hi = S_hi + sigma r
// med = C_hl * r
// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
// corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
//
// 3. POLYNOMIAL
//
// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
// (sin(r) - r) can be rearranged freely, since it is quite
// small, so we exploit parallelism to the fullest.
//
// psc4 = SC_4 * r_1
// msc4 = psc4 * r
// r2 = r * r
// msc2 = SC_2 * r2
// r4 = r2 * r2
// psc3 = SC_3 + msc4
// psc1 = SC_1 + msc2
// msc3 = r4 * psc3
// sincospols = psc1 + msc3
// pols = sincospols *
// <S_hi * r^2 | (C_hl + sigma) * r^3>
//
// 4. CORRECTION TERM
//
// This is where the "c" component of the range reduction is
// taken into account; recall that just "r" is used for most of
// the calculation.
//
// -c = m_3 - c_2
// -d = S_hi * r - (C_hl + sigma)
// corr = -c * -d + S_lo
//
// 5. COMPENSATED SUMMATIONS
//
// The two successive compensated summations add up the high
// and medium parts, leaving just the low parts to add up at
// the end.
//
// rs = sigma * r
// res_int = S_hi + rs
// k_0 = S_hi - res_int
// k_2 = k_0 + rs
// med = C_hl * r
// res_hi = res_int + med
// k_1 = res_int - res_hi
// k_3 = k_1 + med
//
// 6. FINAL SUMMATION
//
// We now add up all the small parts:
//
// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
//
// Now the overall result is just:
//
// res_hi + res_lo
//
// 7. SMALL ARGUMENTS
//
// Inputs with |X| < 2^-252 are treated specially as
// 1 - |x|.
//
// Special cases:
// cos(NaN) = quiet NaN, and raise invalid exception
// cos(INF) = NaN and raise invalid exception
// cos(0) = 1
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(8) juint _ONE[] =
{
0x00000000UL, 0x3ff00000UL
};
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) {
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start;
assert_different_registers(r8, r9, r10, r11, eax, ecx, edx);
address ONEHALF = StubRoutines::x86::_ONEHALF_addr();
address P_2 = StubRoutines::x86::_P_2_addr();
address SC_4 = StubRoutines::x86::_SC_4_addr();
address Ctable = StubRoutines::x86::_Ctable_addr();
address SC_2 = StubRoutines::x86::_SC_2_addr();
address SC_3 = StubRoutines::x86::_SC_3_addr();
address SC_1 = StubRoutines::x86::_SC_1_addr();
address PI_INV_TABLE = StubRoutines::x86::_PI_INV_TABLE_addr();
address PI_4 = (address)StubRoutines::x86::_PI_4_addr();
address PI32INV = (address)StubRoutines::x86::_PI32INV_addr();
address SIGN_MASK = (address)StubRoutines::x86::_SIGN_MASK_addr();
address P_1 = (address)StubRoutines::x86::_P_1_addr();
address P_3 = (address)StubRoutines::x86::_P_3_addr();
address ONE = (address)_ONE;
address NEG_ZERO = (address)StubRoutines::x86::_NEG_ZERO_addr();
bind(start);
push(rbx);
subq(rsp, 16);
movsd(Address(rsp, 8), xmm0);
bind(B1_2);
movl(eax, Address(rsp, 12));
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
andl(eax, 2147418112);
subl(eax, 808452096);
cmpl(eax, 281346048);
jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
mulsd(xmm1, xmm0);
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
addq(rdx, 1865232);
movdqu(xmm4, xmm0);
andq(rdx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable));
shlq(rdx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
movdqu(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(rax, 0));
subsd(xmm1, xmm3);
movq(xmm3, Address(rax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
mulpd(xmm1, xmm7);
movdqu(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movq(xmm5, Address(rax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(rax, 16));
mulpd(xmm6, xmm2);
addsd(xmm0, xmm5);
addsd(xmm3, xmm7);
addsd(xmm0, xmm1);
addsd(xmm0, xmm3);
addsd(xmm0, xmm6);
unpckhpd(xmm6, xmm6);
addsd(xmm0, xmm6);
addsd(xmm0, xmm4);
jmp(B1_4);
bind(L_2TAG_PACKET_0_0_1);
jcc(Assembler::greater, L_2TAG_PACKET_1_0_1);
pextrw(eax, xmm0, 3);
andl(eax, 32767);
pinsrw(xmm0, eax, 3);
movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
subsd(xmm1, xmm0);
movdqu(xmm0, xmm1);
jmp(B1_4);
bind(L_2TAG_PACKET_1_0_1);
pextrw(eax, xmm0, 3);
andl(eax, 32752);
cmpl(eax, 32752);
jcc(Assembler::equal, L_2TAG_PACKET_2_0_1);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
subl(ecx, 16224);
shrl(ecx, 7);
andl(ecx, 65532);
lea(r11, ExternalAddress(PI_INV_TABLE));
addq(rcx, r11);
movdq(rax, xmm0);
movl(r10, Address(rcx, 20));
movl(r8, Address(rcx, 24));
movl(edx, eax);
shrq(rax, 21);
orl(eax, INT_MIN);
shrl(eax, 11);
movl(r9, r10);
imulq(r10, rdx);
imulq(r9, rax);
imulq(r8, rax);
movl(rsi, Address(rcx, 16));
movl(rdi, Address(rcx, 12));
movl(r11, r10);
shrq(r10, 32);
addq(r9, r10);
addq(r11, r8);
movl(r8, r11);
shrq(r11, 32);
addq(r9, r11);
movl(r10, rsi);
imulq(rsi, rdx);
imulq(r10, rax);
movl(r11, rdi);
imulq(rdi, rdx);
movl(rbx, rsi);
shrq(rsi, 32);
addq(r9, rbx);
movl(rbx, r9);
shrq(r9, 32);
addq(r10, rsi);
addq(r10, r9);
shlq(rbx, 32);
orq(r8, rbx);
imulq(r11, rax);
movl(r9, Address(rcx, 8));
movl(rsi, Address(rcx, 4));
movl(rbx, rdi);
shrq(rdi, 32);
addq(r10, rbx);
movl(rbx, r10);
shrq(r10, 32);
addq(r11, rdi);
addq(r11, r10);
movq(rdi, r9);
imulq(r9, rdx);
imulq(rdi, rax);
movl(r10, r9);
shrq(r9, 32);
addq(r11, r10);
movl(r10, r11);
shrq(r11, 32);
addq(rdi, r9);
addq(rdi, r11);
movq(r9, rsi);
imulq(rsi, rdx);
imulq(r9, rax);
shlq(r10, 32);
orq(r10, rbx);
movl(eax, Address(rcx, 0));
movl(r11, rsi);
shrq(rsi, 32);
addq(rdi, r11);
movl(r11, rdi);
shrq(rdi, 32);
addq(r9, rsi);
addq(r9, rdi);
imulq(rdx, rax);
pextrw(rbx, xmm0, 3);
lea(rdi, ExternalAddress(PI_INV_TABLE));
subq(rcx, rdi);
addl(ecx, ecx);
addl(ecx, ecx);
addl(ecx, ecx);
addl(ecx, 19);
movl(rsi, 32768);
andl(rsi, rbx);
shrl(rbx, 4);
andl(rbx, 2047);
subl(rbx, 1023);
subl(ecx, rbx);
addq(r9, rdx);
movl(edx, ecx);
addl(edx, 32);
cmpl(ecx, 1);
jcc(Assembler::less, L_2TAG_PACKET_3_0_1);
negl(ecx);
addl(ecx, 29);
shll(r9);
movl(rdi, r9);
andl(r9, 536870911);
testl(r9, 268435456);
jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1);
shrl(r9);
movl(rbx, 0);
shlq(r9, 32);
orq(r9, r11);
bind(L_2TAG_PACKET_5_0_1);
bind(L_2TAG_PACKET_6_0_1);
cmpq(r9, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_1);
bind(L_2TAG_PACKET_8_0_1);
bsrq(r11, r9);
movl(ecx, 29);
subl(ecx, r11);
jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1);
shlq(r9);
movq(rax, r10);
shlq(r10);
addl(edx, ecx);
negl(ecx);
addl(ecx, 64);
shrq(rax);
shrq(r8);
orq(r9, rax);
orq(r10, r8);
bind(L_2TAG_PACKET_10_0_1);
cvtsi2sdq(xmm0, r9);
shrq(r10, 1);
cvtsi2sdq(xmm3, r10);
xorpd(xmm4, xmm4);
shll(edx, 4);
negl(edx);
addl(edx, 16368);
orl(edx, rsi);
xorl(edx, rbx);
pinsrw(xmm4, edx, 3);
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
xorpd(xmm5, xmm5);
subl(edx, 1008);
pinsrw(xmm5, edx, 3);
mulsd(xmm0, xmm4);
shll(rsi, 16);
sarl(rsi, 31);
mulsd(xmm3, xmm5);
movdqu(xmm1, xmm0);
mulsd(xmm0, xmm2);
shrl(rdi, 29);
addsd(xmm1, xmm3);
mulsd(xmm3, xmm2);
addl(rdi, rsi);
xorl(rdi, rsi);
mulsd(xmm6, xmm1);
movl(eax, rdi);
addsd(xmm6, xmm3);
movdqu(xmm2, xmm0);
addsd(xmm0, xmm6);
subsd(xmm2, xmm0);
addsd(xmm6, xmm2);
bind(L_2TAG_PACKET_11_0_1);
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
mulsd(xmm1, xmm0);
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
pand(xmm4, xmm0);
por(xmm5, xmm4);
addpd(xmm1, xmm5);
cvttsd2siq(rdx, xmm1);
cvtsi2sdq(xmm1, rdx);
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
shll(eax, 3);
addl(edx, 1865232);
movdqu(xmm4, xmm0);
addl(edx, eax);
andl(edx, 63);
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
lea(rax, ExternalAddress(Ctable));
shll(edx, 5);
addq(rax, rdx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
subsd(xmm4, xmm3);
movq(xmm7, Address(rax, 8));
unpcklpd(xmm0, xmm0);
movdqu(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(rax, 0));
subsd(xmm1, xmm3);
movq(xmm3, Address(rax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
subsd(xmm1, xmm6);
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
mulsd(xmm4, Address(rax, 0));
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
mulpd(xmm5, xmm0);
movdqu(xmm0, xmm3);
addsd(xmm3, Address(rax, 8));
mulpd(xmm1, xmm7);
movdqu(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movq(xmm5, Address(rax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(rax, 16));
mulpd(xmm6, xmm2);
addsd(xmm5, xmm0);
addsd(xmm3, xmm7);
addsd(xmm1, xmm5);
addsd(xmm1, xmm3);
addsd(xmm1, xmm6);
unpckhpd(xmm6, xmm6);
movdqu(xmm0, xmm4);
addsd(xmm1, xmm6);
addsd(xmm0, xmm1);
jmp(B1_4);
bind(L_2TAG_PACKET_7_0_1);
addl(edx, 64);
movq(r9, r10);
movq(r10, r8);
movl(r8, 0);
cmpq(r9, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
addl(edx, 64);
movq(r9, r10);
movq(r10, r8);
cmpq(r9, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
xorpd(xmm0, xmm0);
xorpd(xmm6, xmm6);
jmp(L_2TAG_PACKET_11_0_1);
bind(L_2TAG_PACKET_9_0_1);
jcc(Assembler::equal, L_2TAG_PACKET_10_0_1);
negl(ecx);
shrq(r10);
movq(rax, r9);
shrq(r9);
subl(edx, ecx);
negl(ecx);
addl(ecx, 64);
shlq(rax);
orq(r10, rax);
jmp(L_2TAG_PACKET_10_0_1);
bind(L_2TAG_PACKET_3_0_1);
negl(ecx);
shlq(r9, 32);
orq(r9, r11);
shlq(r9);
movq(rdi, r9);
testl(r9, INT_MIN);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1);
shrl(r9);
movl(rbx, 0);
shrq(rdi, 3);
jmp(L_2TAG_PACKET_6_0_1);
bind(L_2TAG_PACKET_4_0_1);
shrl(r9);
movl(rbx, 536870912);
shrl(rbx);
shlq(r9, 32);
orq(r9, r11);
shlq(rbx, 32);
addl(rdi, 536870912);
movl(rcx, 0);
movl(r11, 0);
subq(rcx, r8);
sbbq(r11, r10);
sbbq(rbx, r9);
movq(r8, rcx);
movq(r10, r11);
movq(r9, rbx);
movl(rbx, 32768);
jmp(L_2TAG_PACKET_5_0_1);
bind(L_2TAG_PACKET_12_0_1);
shrl(r9);
mov64(rbx, 0x100000000);
shrq(rbx);
movl(rcx, 0);
movl(r11, 0);
subq(rcx, r8);
sbbq(r11, r10);
sbbq(rbx, r9);
movq(r8, rcx);
movq(r10, r11);
movq(r9, rbx);
movl(rbx, 32768);
shrq(rdi, 3);
addl(rdi, 536870912);
jmp(L_2TAG_PACKET_6_0_1);
bind(L_2TAG_PACKET_2_0_1);
movsd(xmm0, Address(rsp, 8));
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL
movq(Address(rsp, 0), xmm0);
bind(L_2TAG_PACKET_13_0_1);
bind(B1_4);
addq(rsp, 16);
pop(rbx);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_cos[] =
{
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL,
0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table_cos = (address)_static_const_table_cos;
bind(start);
subl(rsp, 120);
movl(Address(rsp, 56), tmp);
lea(tmp, ExternalAddress(static_const_table_cos));
movsd(xmm0, Address(rsp, 128));
pextrw(eax, xmm0, 3);
andl(eax, 32767);
subl(eax, 12336);
cmpl(eax, 4293);
jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
movsd(xmm1, Address(tmp, 2160));
mulsd(xmm1, xmm0);
movdqu(xmm5, Address(tmp, 2240));
movsd(xmm4, Address(tmp, 2224));
pand(xmm4, xmm0);
por(xmm5, xmm4);
movsd(xmm3, Address(tmp, 2128));
movdqu(xmm2, Address(tmp, 2112));
addpd(xmm1, xmm5);
cvttsd2sil(edx, xmm1);
cvtsi2sdl(xmm1, edx);
mulsd(xmm3, xmm1);
unpcklpd(xmm1, xmm1);
addl(edx, 1865232);
movdqu(xmm4, xmm0);
andl(edx, 63);
movdqu(xmm5, Address(tmp, 2096));
lea(eax, Address(tmp, 0));
shll(edx, 5);
addl(eax, edx);
mulpd(xmm2, xmm1);
subsd(xmm0, xmm3);
mulsd(xmm1, Address(tmp, 2144));
subsd(xmm4, xmm3);
movsd(xmm7, Address(eax, 8));
unpcklpd(xmm0, xmm0);
movapd(xmm3, xmm4);
subsd(xmm4, xmm2);
mulpd(xmm5, xmm0);
subpd(xmm0, xmm2);
movdqu(xmm6, Address(tmp, 2064));
mulsd(xmm7, xmm4);
subsd(xmm3, xmm4);
mulpd(xmm5, xmm0);
mulpd(xmm0, xmm0);
subsd(xmm3, xmm2);
movdqu(xmm2, Address(eax, 0));
subsd(xmm1, xmm3);
movsd(xmm3, Address(eax, 24));
addsd(xmm2, xmm3);
subsd(xmm7, xmm2);
mulsd(xmm2, xmm4);
mulpd(xmm6, xmm0);
mulsd(xmm3, xmm4);
mulpd(xmm2, xmm0);
mulpd(xmm0, xmm0);
addpd(xmm5, Address(tmp, 2080));
mulsd(xmm4, Address(eax, 0));
addpd(xmm6, Address(tmp, 2048));
mulpd(xmm5, xmm0);
movapd(xmm0, xmm3);
addsd(xmm3, Address(eax, 8));
mulpd(xmm1, xmm7);
movapd(xmm7, xmm4);
addsd(xmm4, xmm3);
addpd(xmm6, xmm5);
movsd(xmm5, Address(eax, 8));
subsd(xmm5, xmm3);
subsd(xmm3, xmm4);
addsd(xmm1, Address(eax, 16));
mulpd(xmm6, xmm2);
addsd(xmm5, xmm0);
addsd(xmm3, xmm7);
addsd(xmm1, xmm5);
addsd(xmm1, xmm3);
addsd(xmm1, xmm6);
unpckhpd(xmm6, xmm6);
addsd(xmm1, xmm6);
addsd(xmm4, xmm1);
movsd(Address(rsp, 0), xmm4);
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_0_0_2);
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
pextrw(eax, xmm0, 3);
andl(eax, 32767);
pinsrw(xmm0, eax, 3);
movsd(xmm1, Address(tmp, 2192));
subsd(xmm1, xmm0);
movsd(Address(rsp, 0), xmm1);
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2146435072);
cmpl(eax, 2146435072);
jcc(Assembler::equal, L_2TAG_PACKET_3_0_2);
subl(rsp, 32);
movsd(Address(rsp, 0), xmm0);
lea(eax, Address(rsp, 40));
movl(Address(rsp, 8), eax);
movl(eax, 1);
movl(Address(rsp, 12), eax);
call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
addl(rsp, 32);
fld_d(Address(rsp, 8));
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_3_0_2);
fld_d(Address(rsp, 128));
fmul_d(Address(tmp, 2208));
bind(L_2TAG_PACKET_1_0_2);
movl(tmp, Address(rsp, 56));
}
#endif

View file

@ -0,0 +1,674 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - EXP()
// ---------------------
//
// Description:
// Let K = 64 (table size).
// x x/log(2) n
// e = 2 = 2 * T[j] * (1 + P(y))
// where
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
// j/K
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
//
// P(y) is a minimax polynomial approximation of exp(x)-1
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
//
// To avoid problems with arithmetic overflow and underflow,
// n n1 n2
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
// where BIAS is a value of exponent bias.
//
// Special cases:
// exp(NaN) = NaN
// exp(+INF) = +INF
// exp(-INF) = 0
// exp(x) = 1 for subnormals
// for finite argument, only exp(0)=1 is exact
// For IEEE double
// if x > 709.782712893383973096 then exp(x) overflow
// if x < -745.133219101941108420 then exp(x) underflow
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _cv[] =
{
0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
};
ALIGNED_(16) juint _shifter[] =
{
0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
};
ALIGNED_(16) juint _mmask[] =
{
0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
};
ALIGNED_(16) juint _bias[] =
{
0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
};
ALIGNED_(16) juint _Tbl_addr[] =
{
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL
};
ALIGNED_(16) juint _ALLONES[] =
{
0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
};
ALIGNED_(16) juint _ebias[] =
{
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
};
ALIGNED_(4) juint _XMAX[] =
{
0xffffffffUL, 0x7fefffffUL
};
ALIGNED_(4) juint _XMIN[] =
{
0x00000000UL, 0x00100000UL
};
ALIGNED_(4) juint _INF[] =
{
0x00000000UL, 0x7ff00000UL
};
ALIGNED_(4) juint _ZERO[] =
{
0x00000000UL, 0x00000000UL
};
ALIGNED_(4) juint _ONE_val[] =
{
0x00000000UL, 0x3ff00000UL
};
// Registers:
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address cv = (address)_cv;
address Shifter = (address)_shifter;
address mmask = (address)_mmask;
address bias = (address)_bias;
address Tbl_addr = (address)_Tbl_addr;
address ALLONES = (address)_ALLONES;
address ebias = (address)_ebias;
address XMAX = (address)_XMAX;
address XMIN = (address)_XMIN;
address INF = (address)_INF;
address ZERO = (address)_ZERO;
address ONE_val = (address)_ONE_val;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 8), xmm0);
unpcklpd(xmm0, xmm0);
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, ExternalAddress(16 + cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, ExternalAddress(32 + cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
lea(tmp, ExternalAddress(Tbl_addr));
movdqu(xmm2, Address(ecx, tmp));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp(B1_5);
bind(L_2TAG_PACKET_1_0_2);
xorpd(xmm3, xmm3);
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
movl(edx, -1022);
subl(edx, eax);
movdl(xmm5, edx);
psllq(xmm4, xmm5);
movl(ecx, eax);
sarl(eax, 1);
pinsrw(xmm3, eax, 3);
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
psllq(xmm3, 4);
psubd(xmm2, xmm3);
mulsd(xmm0, xmm2);
cmpl(edx, 52);
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
pand(xmm4, xmm2);
paddd(xmm3, xmm6);
subsd(xmm2, xmm4);
addsd(xmm0, xmm2);
cmpl(ecx, 1023);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
pextrw(ecx, xmm0, 3);
andl(ecx, 32768);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
movapd(xmm6, xmm0);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
mulsd(xmm6, xmm3);
mulsd(xmm4, xmm3);
movdqu(xmm0, xmm6);
pxor(xmm6, xmm4);
psrad(xmm6, 31);
pshufd(xmm6, xmm6, 85);
psllq(xmm0, 1);
psrlq(xmm0, 1);
pxor(xmm0, xmm6);
psrlq(xmm6, 63);
paddq(xmm0, xmm6);
paddq(xmm0, xmm4);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
jmp(B1_5);
bind(L_2TAG_PACKET_3_0_2);
addsd(xmm0, xmm4);
mulsd(xmm0, xmm3);
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
jmp(B1_5);
bind(L_2TAG_PACKET_2_0_2);
paddd(xmm3, xmm6);
addpd(xmm0, xmm2);
mulsd(xmm0, xmm3);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_8_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
movl(eax, Address(rsp, 12));
cmpl(eax, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
bind(L_2TAG_PACKET_7_0_2);
movl(Address(rsp, 0), 14);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
mulsd(xmm0, xmm0);
movl(Address(rsp, 0), 15);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_9_0_2);
movl(edx, Address(rsp, 8));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movl(eax, Address(rsp, 12));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_12_0_2);
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
jmp(B1_5);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(rsp, 8));
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 12));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
movsd(Address(rsp, 8), xmm0);
addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
jmp(B1_5);
bind(L_2TAG_PACKET_6_0_2);
movq(Address(rsp, 16), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 16));
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table[] =
{
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
0x00100000UL
};
//registers,
// input: (rbp + 8)
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address static_const_table = (address)_static_const_table;
bind(start);
subl(rsp, 120);
movl(Address(rsp, 64), tmp);
lea(tmp, ExternalAddress(static_const_table));
movdqu(xmm0, Address(rsp, 128));
unpcklpd(xmm0, xmm0);
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
pextrw(eax, xmm0, 3);
andl(eax, 32767);
movl(edx, 16527);
subl(edx, eax);
subl(eax, 15504);
orl(edx, eax);
cmpl(edx, INT_MIN);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
mulpd(xmm1, xmm0);
addpd(xmm1, xmm6);
movapd(xmm7, xmm1);
subpd(xmm1, xmm6);
mulpd(xmm2, xmm1);
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
mulpd(xmm3, xmm1);
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
subpd(xmm0, xmm2);
movdl(eax, xmm7);
movl(ecx, eax);
andl(ecx, 63);
shll(ecx, 4);
sarl(eax, 6);
movl(edx, eax);
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
pand(xmm7, xmm6);
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
paddq(xmm7, xmm6);
psllq(xmm7, 46);
subpd(xmm0, xmm3);
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
mulpd(xmm4, xmm0);
movapd(xmm6, xmm0);
movapd(xmm1, xmm0);
mulpd(xmm6, xmm6);
mulpd(xmm0, xmm6);
addpd(xmm5, xmm4);
mulsd(xmm0, xmm6);
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
addsd(xmm1, xmm2);
unpckhpd(xmm2, xmm2);
mulpd(xmm0, xmm5);
addsd(xmm1, xmm0);
por(xmm2, xmm7);
unpckhpd(xmm0, xmm0);
addsd(xmm0, xmm1);
addsd(xmm0, xmm6);
addl(edx, 894);
cmpl(edx, 1916);
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm2);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_1_0_2);
fnstcw(Address(rsp, 24));
movzwl(edx, Address(rsp, 24));
orl(edx, 768);
movw(Address(rsp, 28), edx);
fldcw(Address(rsp, 28));
movl(edx, eax);
sarl(eax, 1);
subl(edx, eax);
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
pandn(xmm6, xmm2);
addl(eax, 1023);
movdl(xmm3, eax);
psllq(xmm3, 52);
por(xmm6, xmm3);
addl(edx, 1023);
movdl(xmm4, edx);
psllq(xmm4, 52);
movsd(Address(rsp, 8), xmm0);
fld_d(Address(rsp, 8));
movsd(Address(rsp, 16), xmm6);
fld_d(Address(rsp, 16));
fmula(1);
faddp(1);
movsd(Address(rsp, 8), xmm4);
fld_d(Address(rsp, 8));
fmulp(1);
fstp_d(Address(rsp, 8));
movsd(xmm0, Address(rsp, 8));
fldcw(Address(rsp, 24));
pextrw(ecx, xmm0, 3);
andl(ecx, 32752);
cmpl(ecx, 32752);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
cmpl(ecx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_2_0_2);
cmpl(ecx, INT_MIN);
jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
cmpl(ecx, -1064950997);
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
movl(edx, Address(rsp, 128));
cmpl(edx, -17155601);
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
jmp(L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_3_0_2);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_4_0_2);
movl(edx, 15);
bind(L_2TAG_PACKET_5_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 128));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_7_0_2);
cmpl(eax, 2146435072);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, INT_MIN);
jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
mulsd(xmm0, xmm0);
movl(edx, 14);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_9_0_2);
movsd(xmm0, Address(tmp, 1216));
mulsd(xmm0, xmm0);
movl(edx, 15);
jmp(L_2TAG_PACKET_5_0_2);
bind(L_2TAG_PACKET_8_0_2);
movl(edx, Address(rsp, 128));
cmpl(eax, 2146435072);
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
cmpl(edx, 0);
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
movl(eax, Address(rsp, 132));
cmpl(eax, 2146435072);
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_11_0_2);
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_10_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movl(eax, Address(rsp, 132));
andl(eax, 2147483647);
cmpl(eax, 1083179008);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
movsd(xmm0, Address(rsp, 128));
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 48), xmm0);
fld_d(Address(rsp, 48));
bind(L_2TAG_PACKET_6_0_2);
movl(tmp, Address(rsp, 64));
}
#endif

View file

@ -0,0 +1,655 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - LOG()
// ---------------------
//
// x=2^k * mx, mx in [1,2)
//
// Get B~1/mx based on the output of rcpss instruction (B0)
// B = int((B0*2^7+0.5))/2^7
//
// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
//
// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
// p(r) is a degree 7 polynomial
// -log(B) read from data table (high, low parts)
// Result is formed from high and low parts
//
// Special cases:
// log(NaN) = quiet NaN, and raise invalid exception
// log(+INF) = that INF
// log(0) = -INF with divide-by-zero exception raised
// log(1) = +0
// log(x) = NaN with invalid exception raised if x < -0, including -INF
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _L_tbl[] =
{
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x80000000UL
};
ALIGNED_(16) juint _log2[] =
{
0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL
};
ALIGNED_(16) juint _coeff[] =
{
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
0x00000000UL, 0xbfe00000UL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, r8, r11
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2;
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
assert_different_registers(tmp1, tmp2, eax, ecx, edx);
jmp(start);
address L_tbl = (address)_L_tbl;
address log2 = (address)_log2;
address coeff = (address)_coeff;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 0), xmm0);
mov64(rax, 0x3ff0000000000000);
movdq(xmm2, rax);
mov64(rdx, 0x77f0000000000000);
movdq(xmm3, rdx);
movl(ecx, 32768);
movdl(xmm4, rcx);
mov64(tmp1, 0xffffe00000000000);
movdq(xmm5, tmp1);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psrlq(xmm0, 27);
lea(tmp2, ExternalAddress(L_tbl));
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
paddd(xmm0, xmm4);
por(xmm1, xmm3);
movdl(edx, xmm0);
psllq(xmm0, 29);
pand(xmm5, xmm1);
pand(xmm0, xmm6);
subsd(xmm1, xmm5);
mulpd(xmm5, xmm0);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp2, edx));
movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7);
if (VM_Version::supports_sse3()) {
movddup(xmm5, xmm1);
}
else {
movdqu(xmm5, xmm1);
movlhps(xmm5, xmm5);
}
mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
mulpd(xmm5, xmm5);
if (VM_Version::supports_sse3()) {
movddup(xmm6, xmm0);
}
else {
movdqu(xmm6, xmm0);
movlhps(xmm6, xmm6);
}
addsd(xmm0, xmm1);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
subsd(xmm6, xmm0);
mulsd(xmm4, xmm1);
pshufd(xmm2, xmm0, 238);
addsd(xmm1, xmm6);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movq(xmm0, Address(rsp, 0));
movq(xmm1, Address(rsp, 0));
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_3_0_2);
xorpd(xmm1, xmm1);
addsd(xmm1, xmm0);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psrlq(xmm0, 27);
movl(ecx, 18416);
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_6_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movl(Address(rsp, 16), 3);
jmp(L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(Address(rsp, 16), 2);
bind(L_2TAG_PACKET_8_0_2);
movq(Address(rsp, 8), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 8));
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_log[] =
{
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
0xffffe000UL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
Label L_2TAG_PACKET_10_0_2, start;
assert_different_registers(tmp, eax, ecx, edx);
jmp(start);
address static_const_table = (address)_static_const_table_log;
bind(start);
subl(rsp, 104);
movl(Address(rsp, 40), tmp);
lea(tmp, ExternalAddress(static_const_table));
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movsd(xmm0, Address(rsp, 112));
movapd(xmm1, xmm0);
movl(ecx, 32768);
movdl(xmm4, ecx);
movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psllq(xmm0, 5);
movl(ecx, 16352);
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
paddd(xmm0, xmm4);
por(xmm1, xmm3);
movdl(edx, xmm0);
psllq(xmm0, 29);
pand(xmm5, xmm1);
pand(xmm0, xmm6);
subsd(xmm1, xmm5);
mulpd(xmm5, xmm0);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulsd(xmm1, xmm0);
movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL
movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp, edx));
movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
addsd(xmm1, xmm5);
movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
mulpd(xmm5, xmm5);
pshufd(xmm6, xmm0, 228);
addsd(xmm0, xmm1);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
subsd(xmm6, xmm0);
mulsd(xmm4, xmm1);
pshufd(xmm2, xmm0, 238);
addsd(xmm1, xmm6);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movsd(xmm0, Address(rsp, 112));
movdqu(xmm1, xmm0);
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_5_0_2);
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_6_0_2);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
jmp(L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_3_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
movl(edx, 3);
mulsd(xmm0, xmm1);
bind(L_2TAG_PACKET_9_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 112));
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_10_0_2);
bind(L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(edx, 2);
jmp(L_2TAG_PACKET_9_0_2);
bind(L_2TAG_PACKET_4_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movapd(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
psllq(xmm0, 5);
movl(ecx, 18416);
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 228);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 24), xmm0);
fld_d(Address(rsp, 24));
bind(L_2TAG_PACKET_10_0_2);
movl(tmp, Address(rsp, 40));
}
#endif

View file

@ -0,0 +1,687 @@
/*
* Copyright (c) 2016, Intel Corporation.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "asm/assembler.hpp"
#include "asm/assembler.inline.hpp"
#include "runtime/stubRoutines.hpp"
#include "macroAssembler_x86.hpp"
#ifdef _MSC_VER
#define ALIGNED_(x) __declspec(align(x))
#else
#define ALIGNED_(x) __attribute__ ((aligned(x)))
#endif
/******************************************************************************/
// ALGORITHM DESCRIPTION - LOG10()
// ---------------------
//
// Let x=2^k * mx, mx in [1,2)
//
// Get B~1/mx based on the output of rcpss instruction (B0)
// B = int((B0*LH*2^7+0.5))/2^7
// LH is a short approximation for log10(e)
//
// Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
//
// Result: k*log10(2) - log(B) + p(r)
// p(r) is a degree 7 polynomial
// -log(B) read from data table (high, low parts)
// Result is formed from high and low parts
//
// Special cases:
// log10(0) = -INF with divide-by-zero exception raised
// log10(1) = +0
// log10(x) = NaN with invalid exception raised if x < -0, including -INF
// log10(+INF) = +INF
//
/******************************************************************************/
#ifdef _LP64
// The 64 bit code is at most SSE2 compliant
ALIGNED_(16) juint _HIGHSIGMASK_log10[] =
{
0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
};
ALIGNED_(16) juint _LOG10_E[] =
{
0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
};
ALIGNED_(16) juint _L_tbl_log10[] =
{
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL
};
ALIGNED_(16) juint _log2_log10[] =
{
0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
};
ALIGNED_(16) juint _coeff_log10[] =
{
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
0xdc77b115UL, 0xbff27af2UL
};
// Registers:
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, tmp - r11
// Code generated by Intel C compiler for LIBM library
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start;
assert_different_registers(r11, eax, ecx, edx);
address HIGHSIGMASK = (address)_HIGHSIGMASK_log10;
address LOG10_E = (address)_LOG10_E;
address L_tbl = (address)_L_tbl_log10;
address log2 = (address)_log2_log10;
address coeff = (address)_coeff_log10;
bind(start);
subq(rsp, 24);
movsd(Address(rsp, 0), xmm0);
bind(B1_2);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movl(ecx, 1054736384);
movdl(xmm7, ecx);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movdqu(xmm1, xmm0);
movl(edx, 32768);
movdl(xmm4, edx);
movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
mulss(xmm0, xmm7);
por(xmm1, xmm3);
lea(r11, ExternalAddress(L_tbl));
andpd(xmm5, xmm1);
paddd(xmm0, xmm4);
subsd(xmm1, xmm5);
movdl(edx, xmm0);
psllq(xmm0, 29);
andpd(xmm0, xmm6);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0);
movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504));
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL
addsd(xmm1, xmm5);
movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL
mulpd(xmm5, xmm5);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
pshufd(xmm2, xmm0, 228);
addsd(xmm0, xmm1);
mulsd(xmm4, xmm1);
subsd(xmm2, xmm0);
mulsd(xmm6, xmm1);
addsd(xmm1, xmm2);
pshufd(xmm2, xmm0, 238);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addsd(xmm1, xmm6);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(B1_5);
bind(L_2TAG_PACKET_0_0_2);
movq(xmm0, Address(rsp, 0));
movq(xmm1, Address(rsp, 0));
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
bind(L_2TAG_PACKET_4_0_2);
addsd(xmm0, xmm0);
jmp(B1_5);
bind(L_2TAG_PACKET_5_0_2);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
jmp(L_2TAG_PACKET_6_0_2);
bind(L_2TAG_PACKET_3_0_2);
xorpd(xmm1, xmm1);
addsd(xmm1, xmm0);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 18416);
psrlq(xmm0, 27);
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
psrld(xmm0, 2);
rcpps(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_6_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
movl(Address(rsp, 16), 9);
jmp(L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(Address(rsp, 16), 8);
bind(L_2TAG_PACKET_8_0_2);
movq(Address(rsp, 8), xmm0);
bind(B1_3);
movq(xmm0, Address(rsp, 8));
bind(L_2TAG_PACKET_9_0_2);
bind(B1_5);
addq(rsp, 24);
}
#else
// The 32 bit code is at most SSE2 compliant
ALIGNED_(16) juint _static_const_table_log10[] =
{
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL,
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL,
0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
};
//registers,
// input: xmm0
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
// rax, rdx, rcx, rbx (tmp)
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, start;
assert_different_registers(tmp, eax, ecx, edx);
address static_const_table_log10 = (address)_static_const_table_log10;
bind(start);
subl(rsp, 104);
movl(Address(rsp, 40), tmp);
lea(tmp, ExternalAddress(static_const_table_log10));
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movl(ecx, 1054736384);
movdl(xmm7, ecx);
xorpd(xmm3, xmm3);
movl(edx, 30704);
pinsrw(xmm3, edx, 3);
movsd(xmm0, Address(rsp, 112));
movdqu(xmm1, xmm0);
movl(edx, 32768);
movdl(xmm4, edx);
movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 16352);
psllq(xmm0, 5);
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
subl(eax, 16);
cmpl(eax, 32736);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
bind(L_2TAG_PACKET_1_0_2);
mulss(xmm0, xmm7);
por(xmm1, xmm3);
andpd(xmm5, xmm1);
paddd(xmm0, xmm4);
subsd(xmm1, xmm5);
movdl(edx, xmm0);
psllq(xmm0, 29);
andpd(xmm0, xmm6);
andl(eax, 32752);
subl(eax, ecx);
cvtsi2sdl(xmm7, eax);
mulpd(xmm5, xmm0);
mulsd(xmm1, xmm0);
movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL
movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL
subsd(xmm5, xmm2);
andl(edx, 16711680);
shrl(edx, 12);
movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504));
movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL
addsd(xmm1, xmm5);
movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL
mulsd(xmm6, xmm7);
pshufd(xmm5, xmm1, 68);
mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL
mulsd(xmm3, xmm1);
addsd(xmm0, xmm6);
mulpd(xmm4, xmm5);
movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL
mulpd(xmm5, xmm5);
addpd(xmm4, xmm2);
mulpd(xmm3, xmm5);
pshufd(xmm2, xmm0, 228);
addsd(xmm0, xmm1);
mulsd(xmm4, xmm1);
subsd(xmm2, xmm0);
mulsd(xmm6, xmm1);
addsd(xmm1, xmm2);
pshufd(xmm2, xmm0, 238);
mulsd(xmm5, xmm5);
addsd(xmm7, xmm2);
addsd(xmm1, xmm6);
addpd(xmm4, xmm3);
addsd(xmm1, xmm7);
mulpd(xmm4, xmm5);
addsd(xmm1, xmm4);
pshufd(xmm5, xmm4, 238);
addsd(xmm1, xmm5);
addsd(xmm0, xmm1);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_0_0_2);
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
movdqu(xmm1, xmm0);
addl(eax, 16);
cmpl(eax, 32768);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
cmpl(eax, 16);
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
bind(L_2TAG_PACKET_5_0_2);
addsd(xmm0, xmm0);
jmp(L_2TAG_PACKET_2_0_2);
bind(L_2TAG_PACKET_6_0_2);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
cmpl(edx, 0);
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
jmp(L_2TAG_PACKET_7_0_2);
bind(L_2TAG_PACKET_3_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
addl(ecx, ecx);
cmpl(ecx, -2097152);
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
bind(L_2TAG_PACKET_7_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 32752);
pinsrw(xmm1, eax, 3);
movl(edx, 9);
mulsd(xmm0, xmm1);
bind(L_2TAG_PACKET_9_0_2);
movsd(Address(rsp, 0), xmm0);
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
fld_d(Address(rsp, 0));
jmp(L_2TAG_PACKET_10_0_2);
bind(L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
xorpd(xmm0, xmm0);
movl(eax, 49136);
pinsrw(xmm0, eax, 3);
divsd(xmm0, xmm1);
movl(edx, 8);
jmp(L_2TAG_PACKET_9_0_2);
bind(L_2TAG_PACKET_4_0_2);
movdl(edx, xmm1);
psrlq(xmm1, 32);
movdl(ecx, xmm1);
orl(edx, ecx);
cmpl(edx, 0);
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
xorpd(xmm1, xmm1);
movl(eax, 18416);
pinsrw(xmm1, eax, 3);
mulsd(xmm0, xmm1);
xorpd(xmm2, xmm2);
movl(eax, 16368);
pinsrw(xmm2, eax, 3);
movdqu(xmm1, xmm0);
pextrw(eax, xmm0, 3);
por(xmm0, xmm2);
movl(ecx, 18416);
psllq(xmm0, 5);
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
psrlq(xmm0, 34);
rcpss(xmm0, xmm0);
psllq(xmm1, 12);
pshufd(xmm6, xmm5, 78);
psrlq(xmm1, 12);
jmp(L_2TAG_PACKET_1_0_2);
bind(L_2TAG_PACKET_2_0_2);
movsd(Address(rsp, 24), xmm0);
fld_d(Address(rsp, 24));
bind(L_2TAG_PACKET_10_0_2);
movl(tmp, Address(rsp, 40));
}
#endif

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -2093,25 +2093,6 @@ class StubGenerator: public StubCodeGenerator {
entry_checkcast_arraycopy);
}
void generate_math_stubs() {
{
StubCodeMark mark(this, "StubRoutines", "log10");
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ flog10();
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "tan");
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
__ fld_d(Address(rsp, 4));
__ trigfunc('t');
__ ret(0);
}
}
// AES intrinsic stubs
enum {AESBlockSize = 16};
@ -3534,6 +3515,31 @@ class StubGenerator: public StubCodeGenerator {
}
address generate_libmLog10() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmPow() {
address start = __ pc();
@ -3628,6 +3634,44 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_libm_tan_cot_huge() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
BLOCK_COMMENT("Entry:");
__ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
return start;
}
address generate_libmTan() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = rbx;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
// Safefetch stubs.
@ -3853,23 +3897,24 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
}
if (VM_Version::supports_sse2()) {
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
StubRoutines::x86::_L_2il0floatpacket_0_adr = (address)StubRoutines::x86::_L_2il0floatpacket_0;
StubRoutines::x86::_Pi4Inv_adr = (address)StubRoutines::x86::_Pi4Inv;
StubRoutines::x86::_Pi4x3_adr = (address)StubRoutines::x86::_Pi4x3;
StubRoutines::x86::_Pi4x4_adr = (address)StubRoutines::x86::_Pi4x4;
StubRoutines::x86::_ones_adr = (address)StubRoutines::x86::_ones;
StubRoutines::_dexp = generate_libmExp();
StubRoutines::_dlog = generate_libmLog();
StubRoutines::_dlog10 = generate_libmLog10();
StubRoutines::_dpow = generate_libmPow();
if (UseLibmSinIntrinsic || UseLibmCosIntrinsic) {
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
}
if (UseLibmSinIntrinsic) {
StubRoutines::_dsin = generate_libmSin();
}
if (UseLibmCosIntrinsic) {
StubRoutines::_dcos = generate_libmCos();
StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge();
StubRoutines::_dtan = generate_libmTan();
}
}
}
void generate_all() {
// Generates all stubs and initializes the entry points
@ -3889,8 +3934,6 @@ class StubGenerator: public StubCodeGenerator {
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
generate_math_stubs();
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others

View file

@ -2972,35 +2972,6 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
}
void generate_math_stubs() {
{
StubCodeMark mark(this, "StubRoutines", "log10");
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ flog10();
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
{
StubCodeMark mark(this, "StubRoutines", "tan");
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
__ subq(rsp, 8);
__ movdbl(Address(rsp, 0), xmm0);
__ fld_d(Address(rsp, 0));
__ trigfunc('t');
__ fstp_d(Address(rsp, 0));
__ movdbl(xmm0, Address(rsp, 0));
__ addq(rsp, 8);
__ ret(0);
}
}
// AES intrinsic stubs
enum {AESBlockSize = 16};
@ -4731,6 +4702,46 @@ class StubGenerator: public StubCodeGenerator {
#endif
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmLog10() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
#endif
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
@ -4810,6 +4821,8 @@ class StubGenerator: public StubCodeGenerator {
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
@ -4822,6 +4835,8 @@ class StubGenerator: public StubCodeGenerator {
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
@ -4853,6 +4868,8 @@ class StubGenerator: public StubCodeGenerator {
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
@ -4865,6 +4882,55 @@ class StubGenerator: public StubCodeGenerator {
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
__ ret(0);
return start;
}
address generate_libmTan() {
address start = __ pc();
const XMMRegister x0 = xmm0;
const XMMRegister x1 = xmm1;
const XMMRegister x2 = xmm2;
const XMMRegister x3 = xmm3;
const XMMRegister x4 = xmm4;
const XMMRegister x5 = xmm5;
const XMMRegister x6 = xmm6;
const XMMRegister x7 = xmm7;
const Register tmp1 = r8;
const Register tmp2 = r9;
const Register tmp3 = r10;
const Register tmp4 = r11;
BLOCK_COMMENT("Entry:");
__ enter(); // required for proper stackwalking of RuntimeStub frame
#ifdef _WIN64
__ push(rsi);
__ push(rdi);
// save the xmm registers which must be preserved 6-7
__ subptr(rsp, 4 * wordSize);
__ movdqu(Address(rsp, 0), xmm6);
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
#endif
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
#ifdef _WIN64
// restore xmm regs belonging to calling function
__ movdqu(xmm6, Address(rsp, 0));
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
__ addptr(rsp, 4 * wordSize);
__ pop(rdi);
__ pop(rsi);
#endif
__ leave(); // required for proper stackwalking of RuntimeStub frame
@ -5065,18 +5131,30 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
}
if (VM_Version::supports_sse2()) {
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
StubRoutines::_dexp = generate_libmExp();
StubRoutines::_dlog = generate_libmLog();
StubRoutines::_dlog10 = generate_libmLog10();
StubRoutines::_dpow = generate_libmPow();
if (UseLibmSinIntrinsic) {
StubRoutines::_dtan = generate_libmTan();
StubRoutines::_dsin = generate_libmSin();
}
if (UseLibmCosIntrinsic) {
StubRoutines::_dcos = generate_libmCos();
}
}
}
void generate_all() {
// Generates all stubs and initializes the entry points
@ -5119,8 +5197,6 @@ class StubGenerator: public StubCodeGenerator {
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
generate_math_stubs();
// don't bother generating these AES intrinsic stubs unless global flag is set
if (UseAESIntrinsics) {
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others

View file

@ -48,6 +48,29 @@ address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
address StubRoutines::x86::_k256_adr = NULL;
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
//tables common for sin and cos
address StubRoutines::x86::_ONEHALF_adr = NULL;
address StubRoutines::x86::_P_2_adr = NULL;
address StubRoutines::x86::_SC_4_adr = NULL;
address StubRoutines::x86::_Ctable_adr = NULL;
address StubRoutines::x86::_SC_2_adr = NULL;
address StubRoutines::x86::_SC_3_adr = NULL;
address StubRoutines::x86::_SC_1_adr = NULL;
address StubRoutines::x86::_PI_INV_TABLE_adr = NULL;
address StubRoutines::x86::_PI_4_adr = NULL;
address StubRoutines::x86::_PI32INV_adr = NULL;
address StubRoutines::x86::_SIGN_MASK_adr = NULL;
address StubRoutines::x86::_P_1_adr = NULL;
address StubRoutines::x86::_P_3_adr = NULL;
address StubRoutines::x86::_NEG_ZERO_adr = NULL;
//tables common for sincos and tancot
address StubRoutines::x86::_L_2il0floatpacket_0_adr = NULL;
address StubRoutines::x86::_Pi4Inv_adr = NULL;
address StubRoutines::x86::_Pi4x3_adr = NULL;
address StubRoutines::x86::_Pi4x4_adr = NULL;
address StubRoutines::x86::_ones_adr = NULL;
uint64_t StubRoutines::x86::_crc_by128_masks[] =
{
/* The fields in this structure are arranged so that they can be

View file

@ -57,6 +57,48 @@
// byte flip mask for sha256
static address _pshuffle_byte_flip_mask_addr;
//tables common for LIBM sin and cos
static juint _ONEHALF[];
static address _ONEHALF_adr;
static juint _P_2[];
static address _P_2_adr;
static juint _SC_4[];
static address _SC_4_adr;
static juint _Ctable[];
static address _Ctable_adr;
static juint _SC_2[];
static address _SC_2_adr;
static juint _SC_3[];
static address _SC_3_adr;
static juint _SC_1[];
static address _SC_1_adr;
static juint _PI_INV_TABLE[];
static address _PI_INV_TABLE_adr;
static juint _PI_4[];
static address _PI_4_adr;
static juint _PI32INV[];
static address _PI32INV_adr;
static juint _SIGN_MASK[];
static address _SIGN_MASK_adr;
static juint _P_1[];
static address _P_1_adr;
static juint _P_3[];
static address _P_3_adr;
static juint _NEG_ZERO[];
static address _NEG_ZERO_adr;
//tables common for LIBM sincos and tancot
static juint _L_2il0floatpacket_0[];
static address _L_2il0floatpacket_0_adr;
static juint _Pi4Inv[];
static address _Pi4Inv_adr;
static juint _Pi4x3[];
static address _Pi4x3_adr;
static juint _Pi4x4[];
static address _Pi4x4_adr;
static juint _ones[];
static address _ones_adr;
public:
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
@ -69,4 +111,24 @@
static address k256_addr() { return _k256_adr; }
static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
static address _ONEHALF_addr() { return _ONEHALF_adr; }
static address _P_2_addr() { return _P_2_adr; }
static address _SC_4_addr() { return _SC_4_adr; }
static address _Ctable_addr() { return _Ctable_adr; }
static address _SC_2_addr() { return _SC_2_adr; }
static address _SC_3_addr() { return _SC_3_adr; }
static address _SC_1_addr() { return _SC_1_adr; }
static address _PI_INV_TABLE_addr() { return _PI_INV_TABLE_adr; }
static address _PI_4_addr() { return _PI_4_adr; }
static address _PI32INV_addr() { return _PI32INV_adr; }
static address _SIGN_MASK_addr() { return _SIGN_MASK_adr; }
static address _P_1_addr() { return _P_1_adr; }
static address _P_3_addr() { return _P_3_adr; }
static address _NEG_ZERO_addr() { return _NEG_ZERO_adr; }
static address _L_2il0floatpacket_0_addr() { return _L_2il0floatpacket_0_adr; }
static address _Pi4Inv_addr() { return _Pi4Inv_adr; }
static address _Pi4x3_addr() { return _Pi4x3_adr; }
static address _Pi4x4_addr() { return _Pi4x4_adr; }
static address _ones_addr() { return _ones_adr; }
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP

View file

@ -345,13 +345,34 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
__ fld_d(Address(rsp, 1*wordSize));
switch (kind) {
case Interpreter::java_lang_math_sin :
__ trigfunc('s');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_cos :
__ trigfunc('c');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_tan :
__ trigfunc('t');
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dtan() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_sqrt:
__ fsqrt();
@ -362,26 +383,29 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_log:
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dlog() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
}
else {
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_log10:
__ flog10();
// Store to stack to convert 80bit precision back to 64bits
__ push_fTOS();
__ pop_fTOS();
__ subptr(rsp, 2 * wordSize);
__ fstp_d(Address(rsp, 0));
if (StubRoutines::dlog10() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
}
__ addptr(rsp, 2 * wordSize);
break;
case Interpreter::java_lang_math_pow:
__ fld_d(Address(rsp, 3*wordSize)); // second argument
__ subptr(rsp, 4 * wordSize);
__ fstp_d(Address(rsp, 0));
__ fstp_d(Address(rsp, 2 * wordSize));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dpow() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
@ -391,7 +415,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
case Interpreter::java_lang_math_exp:
__ subptr(rsp, 2*wordSize);
__ fstp_d(Address(rsp, 0));
if (VM_Version::supports_sse2()) {
if (StubRoutines::dexp() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));

View file

@ -29,6 +29,7 @@
#include "interpreter/interpreterRuntime.hpp"
#include "interpreter/templateInterpreterGenerator.hpp"
#include "runtime/arguments.hpp"
#include "runtime/sharedRuntime.hpp"
#define __ _masm->
@ -373,32 +374,60 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
__ sqrtsd(xmm0, Address(rsp, wordSize));
} else if (kind == Interpreter::java_lang_math_exp) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dexp() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
}
} else if (kind == Interpreter::java_lang_math_log) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dlog() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
}
} else if (kind == Interpreter::java_lang_math_log10) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dlog10() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
}
} else if (kind == Interpreter::java_lang_math_sin) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dsin() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
}
} else if (kind == Interpreter::java_lang_math_cos) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dcos() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
}
} else if (kind == Interpreter::java_lang_math_pow) {
__ movdbl(xmm1, Address(rsp, wordSize));
__ movdbl(xmm0, Address(rsp, 3 * wordSize));
if (StubRoutines::dpow() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
}
} else if (kind == Interpreter::java_lang_math_tan) {
__ movdbl(xmm0, Address(rsp, wordSize));
if (StubRoutines::dtan() != NULL) {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
} else {
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
}
} else {
__ fld_d(Address(rsp, wordSize));
switch (kind) {
case Interpreter::java_lang_math_sin :
__ trigfunc('s');
break;
case Interpreter::java_lang_math_cos :
__ trigfunc('c');
break;
case Interpreter::java_lang_math_tan :
__ trigfunc('t');
break;
case Interpreter::java_lang_math_abs:
__ fabs();
break;
case Interpreter::java_lang_math_log10:
__ flog10();
break;
default :
ShouldNotReachHere();
}

View file

@ -844,6 +844,11 @@ public:
static uint32_t get_xsave_header_upper_segment() {
return _cpuid_info.xem_xcr0_edx;
}
// SSE2 and later processors implement a 'pause' instruction
// that can be used for efficient implementation of
// the intrinsic for java.lang.Thread.onSpinWait()
static bool supports_on_spin_wait() { return supports_sse2(); }
};
#endif // CPU_X86_VM_VM_VERSION_X86_HPP

View file

@ -1719,6 +1719,10 @@ const bool Matcher::match_rule_supported(int opcode) {
if (!(UseSSE > 4))
ret_value = false;
break;
case Op_OnSpinWait:
if (VM_Version::supports_on_spin_wait() == false)
ret_value = false;
break;
}
return ret_value; // Per default match rules are supported.
@ -2996,6 +3000,24 @@ instruct sqrtD_imm(regD dst, immD con) %{
ins_pipe(pipe_slow);
%}
instruct onspinwait() %{
match(OnSpinWait);
ins_cost(200);
format %{
$$template
if (os::is_MP()) {
$$emit$$"pause\t! membar_onspinwait"
} else {
$$emit$$"MEMBAR-onspinwait ! (empty encoding)"
}
%}
ins_encode %{
__ pause();
%}
ins_pipe(pipe_slow);
%}
// ====================VECTOR INSTRUCTIONS=====================================
// Load vectors (4 bytes long)

View file

@ -9828,27 +9828,6 @@ instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
ins_pipe( pipe_slow );
%}
instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
match(Set dst(TanD src));
format %{ "DTAN $dst" %}
ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
Opcode(0xDD), Opcode(0xD8)); // fstp st
ins_pipe( pipe_slow );
%}
instruct tanD_reg(regD dst, eFlagsReg cr) %{
predicate (UseSSE>=2);
match(Set dst(TanD dst));
effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
format %{ "DTAN $dst" %}
ins_encode( Push_SrcD(dst),
Opcode(0xD9), Opcode(0xF2), // fptan
Opcode(0xDD), Opcode(0xD8), // fstp st
Push_ResultD(dst) );
ins_pipe( pipe_slow );
%}
instruct atanDPR_reg(regDPR dst, regDPR src) %{
predicate (UseSSE<=1);
match(Set dst(AtanD dst src));
@ -9880,41 +9859,6 @@ instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
ins_pipe( pipe_slow );
%}
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
predicate (UseSSE<=1);
// The source Double operand on FPU stack
match(Set dst (Log10D src));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fxch ; swap ST(0) with ST(1)
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "FLDLG2 \t\t\t#Log10\n\t"
"FXCH \n\t"
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
Opcode(0xD9), Opcode(0xC9), // fxch
Opcode(0xD9), Opcode(0xF1)); // fyl2x
ins_pipe( pipe_slow );
%}
instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
predicate (UseSSE>=2);
effect(KILL cr);
match(Set dst (Log10D src));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "FLDLG2 \t\t\t#Log10\n\t"
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
%}
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
Push_SrcD(src),
Opcode(0xD9), Opcode(0xF1), // fyl2x
Push_ResultD(dst));
ins_pipe( pipe_slow );
%}
//-------------Float Instructions-------------------------------
// Float Math

View file

@ -9897,34 +9897,6 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
ins_pipe(pipe_slow);
%}
// -----------Trig and Trancendental Instructions------------------------------
instruct tanD_reg(regD dst) %{
match(Set dst (TanD dst));
format %{ "dtan $dst\n\t" %}
ins_encode( Push_SrcXD(dst),
Opcode(0xD9), Opcode(0xF2), //fptan
Opcode(0xDD), Opcode(0xD8), //fstp st
Push_ResultXD(dst) );
ins_pipe( pipe_slow );
%}
instruct log10D_reg(regD dst) %{
// The source and result Double operands in XMM registers
match(Set dst (Log10D dst));
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
// fyl2x ; compute log_10(2) * log_2(x)
format %{ "fldlg2\t\t\t#Log10\n\t"
"fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
%}
ins_encode(Opcode(0xD9), Opcode(0xEC), // fldlg2
Push_SrcXD(dst),
Opcode(0xD9), Opcode(0xF1), // fyl2x
Push_ResultXD(dst));
ins_pipe( pipe_slow );
%}
//----------Arithmetic Conversion Instructions---------------------------------
instruct roundFloat_nop(regF dst)

View file

@ -120,7 +120,9 @@ public class HotSpotCodeCacheProvider implements CodeCacheProvider {
resultInstalledCode = installedCode;
}
int result = runtime.getCompilerToVM().installCode(target, (HotSpotCompiledCode) compiledCode, resultInstalledCode, (HotSpotSpeculationLog) log);
HotSpotSpeculationLog speculationLog = (log != null && log.hasSpeculations()) ? (HotSpotSpeculationLog) log : null;
int result = runtime.getCompilerToVM().installCode(target, (HotSpotCompiledCode) compiledCode, resultInstalledCode, speculationLog);
if (result != config.codeInstallResultOk) {
String resultDesc = config.getCodeInstallResultDescription(result);
if (compiledCode instanceof HotSpotCompiledNmethod) {

View file

@ -38,7 +38,7 @@ public class HotSpotSpeculationLog implements SpeculationLog {
/** All speculations that have been a deoptimization reason. */
private Set<SpeculationReason> failedSpeculations;
/** Strong references to all reasons embededded in the current nmethod. */
/** Strong references to all reasons embedded in the current nmethod. */
private volatile Collection<SpeculationReason> speculations;
@Override
@ -81,4 +81,9 @@ public class HotSpotSpeculationLog implements SpeculationLog {
return HotSpotObjectConstantImpl.forObject(reason);
}
@Override
public synchronized boolean hasSpeculations() {
return speculations != null && !speculations.isEmpty();
}
}

View file

@ -56,4 +56,11 @@ public interface SpeculationLog {
* argument to the deoptimization function.
*/
JavaConstant speculate(SpeculationReason reason);
/**
* Returns if this log has speculations.
*
* @return true if there are speculations, false otherwise
*/
boolean hasSpeculations();
}

View file

@ -247,7 +247,7 @@ void Canonicalizer::do_ArrayLength (ArrayLength* x) {
} else if ((lf = x->array()->as_LoadField()) != NULL) {
ciField* field = lf->field();
if (field->is_constant() && field->is_static()) {
if (field->is_static_constant()) {
assert(PatchALot || ScavengeRootsInCode < 2, "Constant field loads are folded during parsing");
ciObject* c = field->constant_value().as_object();
if (!c->is_null_object()) {

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -77,17 +77,13 @@ class CodeStub: public CompilationResourceObj {
}
};
define_array(CodeStubArray, CodeStub*)
define_stack(_CodeStubList, CodeStubArray)
class CodeStubList: public _CodeStubList {
class CodeStubList: public GrowableArray<CodeStub*> {
public:
CodeStubList(): _CodeStubList() {}
CodeStubList(): GrowableArray<CodeStub*>() {}
void append(CodeStub* stub) {
if (!contains(stub)) {
_CodeStubList::append(stub);
GrowableArray<CodeStub*>::append(stub);
}
}
};

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -53,12 +53,9 @@ class C1_MacroAssembler;
class CFGPrinter;
typedef LIR_OprDesc* LIR_Opr;
define_array(BasicTypeArray, BasicType)
define_stack(BasicTypeList, BasicTypeArray)
define_array(ExceptionInfoArray, ExceptionInfo*)
define_stack(ExceptionInfoList, ExceptionInfoArray)
typedef GrowableArray<BasicType> BasicTypeArray;
typedef GrowableArray<BasicType> BasicTypeList;
typedef GrowableArray<ExceptionInfo*> ExceptionInfoList;
class Compilation: public StackObj {
friend class CompilationResourceObj;

View file

@ -131,6 +131,9 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
if (!VM_Version::supports_atomic_getset4()) return false;
#endif
break;
case vmIntrinsics::_onSpinWait:
if (!VM_Version::supports_on_spin_wait()) return false;
break;
case vmIntrinsics::_arraycopy:
case vmIntrinsics::_currentTimeMillis:
case vmIntrinsics::_nanoTime:

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -164,7 +164,7 @@ FrameMap::FrameMap(ciMethod* method, int monitors, int reserved_argument_area_si
_reserved_argument_area_size = MAX2(4, reserved_argument_area_size) * BytesPerWord;
_argcount = method->arg_size();
_argument_locations = new intArray(_argcount, -1);
_argument_locations = new intArray(_argcount, _argcount, -1);
_incoming_arguments = java_calling_convention(signature_type_array_for(method), false);
_oop_map_arg_count = _incoming_arguments->reserved_stack_slots();

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -36,8 +36,6 @@
class ciMethod;
class CallingConvention;
class BasicTypeArray;
class BasicTypeList;
//--------------------------------------------------------
// FrameMap

View file

@ -357,7 +357,7 @@ void BlockListBuilder::mark_loops() {
_active = BitMap(BlockBegin::number_of_blocks()); _active.clear();
_visited = BitMap(BlockBegin::number_of_blocks()); _visited.clear();
_loop_map = intArray(BlockBegin::number_of_blocks(), 0);
_loop_map = intArray(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), 0);
_next_loop_index = 0;
_next_block_number = _blocks.length();
@ -1354,7 +1354,7 @@ void GraphBuilder::lookup_switch() {
} else {
// collect successors & keys
BlockList* sux = new BlockList(l + 1, NULL);
intArray* keys = new intArray(l, 0);
intArray* keys = new intArray(l, l, 0);
int i;
bool has_bb = false;
for (i = 0; i < l; i++) {
@ -1521,6 +1521,8 @@ void GraphBuilder::method_return(Value x) {
}
Value GraphBuilder::make_constant(ciConstant field_value, ciField* field) {
if (!field_value.is_valid()) return NULL;
BasicType field_type = field_value.basic_type();
ValueType* value = as_ValueType(field_value);
@ -1588,9 +1590,8 @@ void GraphBuilder::access_field(Bytecodes::Code code) {
case Bytecodes::_getstatic: {
// check for compile-time constants, i.e., initialized static final fields
Value constant = NULL;
if (field->is_constant() && !PatchALot) {
if (field->is_static_constant() && !PatchALot) {
ciConstant field_value = field->constant_value();
// Stable static fields are checked for non-default values in ciField::initialize_from().
assert(!field->is_stable() || !field_value.is_null_or_zero(),
"stable static w/ default value shouldn't be a constant");
constant = make_constant(field_value, field);
@ -1619,36 +1620,23 @@ void GraphBuilder::access_field(Bytecodes::Code code) {
Value constant = NULL;
obj = apop();
ObjectType* obj_type = obj->type()->as_ObjectType();
if (obj_type->is_constant() && !PatchALot) {
if (field->is_constant() && obj_type->is_constant() && !PatchALot) {
ciObject* const_oop = obj_type->constant_value();
if (!const_oop->is_null_object() && const_oop->is_loaded()) {
if (field->is_constant()) {
ciConstant field_value = field->constant_value_of(const_oop);
if (FoldStableValues && field->is_stable() && field_value.is_null_or_zero()) {
// Stable field with default value can't be constant.
constant = NULL;
} else {
if (field_value.is_valid()) {
constant = make_constant(field_value, field);
}
} else {
// For CallSite objects treat the target field as a compile time constant.
if (const_oop->is_call_site()) {
ciCallSite* call_site = const_oop->as_call_site();
// For CallSite objects add a dependency for invalidation of the optimization.
if (field->is_call_site_target()) {
ciMethodHandle* target = call_site->get_target();
if (target != NULL) { // just in case
ciConstant field_val(T_OBJECT, target);
constant = new Constant(as_ValueType(field_val));
// Add a dependence for invalidation of the optimization.
ciCallSite* call_site = const_oop->as_call_site();
if (!call_site->is_constant_call_site()) {
ciMethodHandle* target = field_value.as_object()->as_method_handle();
dependency_recorder()->assert_call_site_target_value(call_site, target);
}
}
}
}
}
}
}
if (constant != NULL) {
push(type, append(constant));
} else {
@ -1722,7 +1710,7 @@ void GraphBuilder::check_args_for_profiling(Values* obj_args, int expected) {
bool ignored_will_link;
ciSignature* declared_signature = NULL;
ciMethod* real_target = method()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
assert(expected == obj_args->length() || real_target->is_method_handle_intrinsic(), "missed on arg?");
assert(expected == obj_args->max_length() || real_target->is_method_handle_intrinsic(), "missed on arg?");
#endif
}
@ -1733,7 +1721,7 @@ Values* GraphBuilder::collect_args_for_profiling(Values* args, ciMethod* target,
if (obj_args == NULL) {
return NULL;
}
int s = obj_args->size();
int s = obj_args->max_length();
// if called through method handle invoke, some arguments may have been popped
for (int i = start, j = 0; j < s && i < args->length(); i++) {
if (args->at(i)->type()->is_object_kind()) {
@ -2170,7 +2158,7 @@ void GraphBuilder::new_multi_array(int dimensions) {
ciKlass* klass = stream()->get_klass(will_link);
ValueStack* state_before = !klass->is_loaded() || PatchALot ? copy_state_before() : copy_state_exhandling();
Values* dims = new Values(dimensions, NULL);
Values* dims = new Values(dimensions, dimensions, NULL);
// fill in all dimensions
int i = dimensions;
while (i-- > 0) dims->at_put(i, ipop());
@ -3773,9 +3761,9 @@ bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, Bytecode
int start = 0;
Values* obj_args = args_list_for_profiling(callee, start, has_receiver);
if (obj_args != NULL) {
int s = obj_args->size();
int s = obj_args->max_length();
// if called through method handle invoke, some arguments may have been popped
for (int i = args_base+start, j = 0; j < obj_args->size() && i < state()->stack_size(); ) {
for (int i = args_base+start, j = 0; j < obj_args->max_length() && i < state()->stack_size(); ) {
Value v = state()->stack_at_inc(i);
if (v->type()->is_object_kind()) {
obj_args->push(v);
@ -4092,7 +4080,7 @@ void GraphBuilder::push_scope_for_jsr(BlockBegin* jsr_continuation, int jsr_dest
// properly clone all blocks in jsr region as well as exception
// handlers containing rets
BlockList* new_bci2block = new BlockList(bci2block()->length());
new_bci2block->push_all(bci2block());
new_bci2block->appendAll(bci2block());
data->set_bci2block(new_bci2block);
data->set_scope(scope());
data->setup_jsr_xhandlers();

View file

@ -531,7 +531,7 @@ ComputeLinearScanOrder::ComputeLinearScanOrder(Compilation* c, BlockBegin* start
_visited_blocks(_max_block_id),
_active_blocks(_max_block_id),
_dominator_blocks(_max_block_id),
_forward_branches(_max_block_id, 0),
_forward_branches(_max_block_id, _max_block_id, 0),
_loop_end_blocks(8),
_work_list(8),
_linear_scan_order(NULL), // initialized later with correct size
@ -849,13 +849,13 @@ bool ComputeLinearScanOrder::ready_for_processing(BlockBegin* cur) {
return false;
}
assert(_linear_scan_order->index_of(cur) == -1, "block already processed (block can be ready only once)");
assert(_work_list.index_of(cur) == -1, "block already in work-list (block can be ready only once)");
assert(_linear_scan_order->find(cur) == -1, "block already processed (block can be ready only once)");
assert(_work_list.find(cur) == -1, "block already in work-list (block can be ready only once)");
return true;
}
void ComputeLinearScanOrder::sort_into_work_list(BlockBegin* cur) {
assert(_work_list.index_of(cur) == -1, "block already in work list");
assert(_work_list.find(cur) == -1, "block already in work list");
int cur_weight = compute_weight(cur);
@ -891,7 +891,7 @@ void ComputeLinearScanOrder::sort_into_work_list(BlockBegin* cur) {
void ComputeLinearScanOrder::append_block(BlockBegin* cur) {
TRACE_LINEAR_SCAN(3, tty->print_cr("appending block B%d (weight 0x%6x) to linear-scan order", cur->block_id(), cur->linear_scan_number()));
assert(_linear_scan_order->index_of(cur) == -1, "cannot add the same block twice");
assert(_linear_scan_order->find(cur) == -1, "cannot add the same block twice");
// currently, the linear scan order and code emit order are equal.
// therefore the linear_scan_number and the weight of a block must also
@ -1116,13 +1116,13 @@ void ComputeLinearScanOrder::verify() {
BlockBegin* cur = _linear_scan_order->at(i);
assert(cur->linear_scan_number() == i, "incorrect linear_scan_number");
assert(cur->linear_scan_number() >= 0 && cur->linear_scan_number() == _linear_scan_order->index_of(cur), "incorrect linear_scan_number");
assert(cur->linear_scan_number() >= 0 && cur->linear_scan_number() == _linear_scan_order->find(cur), "incorrect linear_scan_number");
int j;
for (j = cur->number_of_sux() - 1; j >= 0; j--) {
BlockBegin* sux = cur->sux_at(j);
assert(sux->linear_scan_number() >= 0 && sux->linear_scan_number() == _linear_scan_order->index_of(sux), "incorrect linear_scan_number");
assert(sux->linear_scan_number() >= 0 && sux->linear_scan_number() == _linear_scan_order->find(sux), "incorrect linear_scan_number");
if (!sux->is_set(BlockBegin::backward_branch_target_flag)) {
assert(cur->linear_scan_number() < sux->linear_scan_number(), "invalid order");
}
@ -1134,7 +1134,7 @@ void ComputeLinearScanOrder::verify() {
for (j = cur->number_of_preds() - 1; j >= 0; j--) {
BlockBegin* pred = cur->pred_at(j);
assert(pred->linear_scan_number() >= 0 && pred->linear_scan_number() == _linear_scan_order->index_of(pred), "incorrect linear_scan_number");
assert(pred->linear_scan_number() >= 0 && pred->linear_scan_number() == _linear_scan_order->find(pred), "incorrect linear_scan_number");
if (!cur->is_set(BlockBegin::backward_branch_target_flag)) {
assert(cur->linear_scan_number() > pred->linear_scan_number(), "invalid order");
}
@ -1256,8 +1256,7 @@ void IR::print(bool cfg_only, bool live_only) {
}
define_array(BlockListArray, BlockList*)
define_stack(BlockListList, BlockListArray)
typedef GrowableArray<BlockList*> BlockListList;
class PredecessorValidator : public BlockClosure {
private:
@ -1271,7 +1270,7 @@ class PredecessorValidator : public BlockClosure {
public:
PredecessorValidator(IR* hir) {
ResourceMark rm;
_predecessors = new BlockListList(BlockBegin::number_of_blocks(), NULL);
_predecessors = new BlockListList(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), NULL);
_blocks = new BlockList();
int i;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -104,9 +104,7 @@ class XHandler: public CompilationResourceObj {
bool equals(XHandler* other) const;
};
define_array(_XHandlerArray, XHandler*)
define_stack(_XHandlerList, _XHandlerArray)
typedef GrowableArray<XHandler*> _XHandlerList;
// XHandlers is the C1 internal list of exception handlers for a method
class XHandlers: public CompilationResourceObj {
@ -132,8 +130,7 @@ class XHandlers: public CompilationResourceObj {
class IRScope;
define_array(IRScopeArray, IRScope*)
define_stack(IRScopeList, IRScopeArray)
typedef GrowableArray<IRScope*> IRScopeList;
class Compilation;
class IRScope: public CompilationResourceObj {

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -564,7 +564,7 @@ void BlockBegin::disconnect_edge(BlockBegin* from, BlockBegin* to) {
for (int s = 0; s < from->number_of_sux();) {
BlockBegin* sux = from->sux_at(s);
if (sux == to) {
int index = sux->_predecessors.index_of(from);
int index = sux->_predecessors.find(from);
if (index >= 0) {
sux->_predecessors.remove_at(index);
}
@ -664,7 +664,7 @@ BlockBegin* BlockBegin::insert_block_between(BlockBegin* sux) {
void BlockBegin::remove_successor(BlockBegin* pred) {
int idx;
while ((idx = _successors.index_of(pred)) >= 0) {
while ((idx = _successors.find(pred)) >= 0) {
_successors.remove_at(idx);
}
}
@ -677,7 +677,7 @@ void BlockBegin::add_predecessor(BlockBegin* pred) {
void BlockBegin::remove_predecessor(BlockBegin* pred) {
int idx;
while ((idx = _predecessors.index_of(pred)) >= 0) {
while ((idx = _predecessors.find(pred)) >= 0) {
_predecessors.remove_at(idx);
}
}
@ -722,13 +722,15 @@ void BlockBegin::iterate_postorder(boolArray& mark, BlockClosure* closure) {
void BlockBegin::iterate_preorder(BlockClosure* closure) {
boolArray mark(number_of_blocks(), false);
int mark_len = number_of_blocks();
boolArray mark(mark_len, mark_len, false);
iterate_preorder(mark, closure);
}
void BlockBegin::iterate_postorder(BlockClosure* closure) {
boolArray mark(number_of_blocks(), false);
int mark_len = number_of_blocks();
boolArray mark(mark_len, mark_len, false);
iterate_postorder(mark, closure);
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -115,11 +115,8 @@ class Assert;
// A Value is a reference to the instruction creating the value
typedef Instruction* Value;
define_array(ValueArray, Value)
define_stack(Values, ValueArray)
define_array(ValueStackArray, ValueStack*)
define_stack(ValueStackStack, ValueStackArray)
typedef GrowableArray<Value> Values;
typedef GrowableArray<ValueStack*> ValueStackStack;
// BlockClosure is the base class for block traversal/iteration.
@ -137,14 +134,13 @@ class ValueVisitor: public StackObj {
// Some array and list classes
define_array(BlockBeginArray, BlockBegin*)
define_stack(_BlockList, BlockBeginArray)
typedef GrowableArray<BlockBegin*> BlockBeginArray;
class BlockList: public _BlockList {
class BlockList: public GrowableArray<BlockBegin*> {
public:
BlockList(): _BlockList() {}
BlockList(const int size): _BlockList(size) {}
BlockList(const int size, BlockBegin* init): _BlockList(size, init) {}
BlockList(): GrowableArray<BlockBegin*>() {}
BlockList(const int size): GrowableArray<BlockBegin*>(size) {}
BlockList(const int size, BlockBegin* init): GrowableArray<BlockBegin*>(size, size, init) {}
void iterate_forward(BlockClosure* closure);
void iterate_backward(BlockClosure* closure);
@ -1744,7 +1740,7 @@ LEAF(BlockBegin, StateSplit)
void remove_predecessor(BlockBegin* pred);
bool is_predecessor(BlockBegin* pred) const { return _predecessors.contains(pred); }
int number_of_preds() const { return _predecessors.length(); }
BlockBegin* pred_at(int i) const { return _predecessors[i]; }
BlockBegin* pred_at(int i) const { return _predecessors.at(i); }
// exception handlers potentially invoked by this block
void add_exception_handler(BlockBegin* b);
@ -2609,10 +2605,7 @@ class BlockPair: public CompilationResourceObj {
void set_from(BlockBegin* b) { _from = b; }
};
define_array(BlockPairArray, BlockPair*)
define_stack(BlockPairList, BlockPairArray)
typedef GrowableArray<BlockPair*> BlockPairList;
inline int BlockBegin::number_of_sux() const { assert(_end == NULL || _end->number_of_sux() == _successors.length(), "mismatch"); return _successors.length(); }
inline BlockBegin* BlockBegin::sux_at(int i) const { assert(_end == NULL || _end->sux_at(i) == _successors.at(i), "mismatch"); return _successors.at(i); }

View file

@ -483,6 +483,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
case lir_membar_storestore: // result and info always invalid
case lir_membar_loadstore: // result and info always invalid
case lir_membar_storeload: // result and info always invalid
case lir_on_spin_wait:
{
assert(op->as_Op0() != NULL, "must be");
assert(op->_info == NULL, "info not used by this instruction");
@ -727,31 +728,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
break;
}
case lir_tan:
case lir_log10: {
assert(op->as_Op2() != NULL, "must be");
LIR_Op2* op2 = (LIR_Op2*)op;
// On x86 tan/sin/cos need two temporary fpu stack slots and
// log/log10 need one so handle opr2 and tmp as temp inputs.
// Register input operand as temp to guarantee that it doesn't
// overlap with the input.
assert(op2->_info == NULL, "not used");
assert(op2->_tmp5->is_illegal(), "not used");
assert(op2->_opr1->is_valid(), "used");
do_input(op2->_opr1); do_temp(op2->_opr1);
if (op2->_opr2->is_valid()) do_temp(op2->_opr2);
if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1);
if (op2->_tmp2->is_valid()) do_temp(op2->_tmp2);
if (op2->_tmp3->is_valid()) do_temp(op2->_tmp3);
if (op2->_tmp4->is_valid()) do_temp(op2->_tmp4);
if (op2->_result->is_valid()) do_output(op2->_result);
break;
}
// LIR_Op3
case lir_idiv:
case lir_irem: {
@ -1691,6 +1667,7 @@ const char * LIR_Op::name() const {
case lir_word_align: s = "word_align"; break;
case lir_label: s = "label"; break;
case lir_nop: s = "nop"; break;
case lir_on_spin_wait: s = "on_spin_wait"; break;
case lir_backwardbranch_target: s = "backbranch"; break;
case lir_std_entry: s = "std_entry"; break;
case lir_osr_entry: s = "osr_entry"; break;
@ -1738,8 +1715,6 @@ const char * LIR_Op::name() const {
case lir_rem: s = "rem"; break;
case lir_abs: s = "abs"; break;
case lir_sqrt: s = "sqrt"; break;
case lir_tan: s = "tan"; break;
case lir_log10: s = "log10"; break;
case lir_logic_and: s = "logic_and"; break;
case lir_logic_or: s = "logic_or"; break;
case lir_logic_xor: s = "logic_xor"; break;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -59,17 +59,9 @@ class LIR_OprVisitor;
typedef LIR_OprDesc* LIR_Opr;
typedef int RegNr;
define_array(LIR_OprArray, LIR_Opr)
define_stack(LIR_OprList, LIR_OprArray)
define_array(LIR_OprRefArray, LIR_Opr*)
define_stack(LIR_OprRefList, LIR_OprRefArray)
define_array(CodeEmitInfoArray, CodeEmitInfo*)
define_stack(CodeEmitInfoList, CodeEmitInfoArray)
define_array(LIR_OpArray, LIR_Op*)
define_stack(LIR_OpList, LIR_OpArray)
typedef GrowableArray<LIR_Opr> LIR_OprList;
typedef GrowableArray<LIR_Op*> LIR_OpArray;
typedef GrowableArray<LIR_Op*> LIR_OpList;
// define LIR_OprPtr early so LIR_OprDesc can refer to it
class LIR_OprPtr: public CompilationResourceObj {
@ -920,6 +912,7 @@ enum LIR_Code {
, lir_membar_loadstore
, lir_membar_storeload
, lir_get_thread
, lir_on_spin_wait
, end_op0
, begin_op1
, lir_fxch
@ -2101,6 +2094,8 @@ class LIR_List: public CompilationResourceObj {
void std_entry(LIR_Opr receiver) { append(new LIR_Op0(lir_std_entry, receiver)); }
void osr_entry(LIR_Opr osrPointer) { append(new LIR_Op0(lir_osr_entry, osrPointer)); }
void on_spin_wait() { append(new LIR_Op0(lir_on_spin_wait)); }
void branch_destination(Label* lbl) { append(new LIR_OpLabel(lbl)); }
void negate(LIR_Opr from, LIR_Opr to) { append(new LIR_Op1(lir_neg, from, to)); }

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -127,7 +127,7 @@ void LIR_Assembler::append_code_stub(CodeStub* stub) {
void LIR_Assembler::emit_stubs(CodeStubList* stub_list) {
for (int m = 0; m < stub_list->length(); m++) {
CodeStub* s = (*stub_list)[m];
CodeStub* s = stub_list->at(m);
check_codespace();
CHECK_BAILOUT();
@ -678,6 +678,10 @@ void LIR_Assembler::emit_op0(LIR_Op0* op) {
get_thread(op->result_opr());
break;
case lir_on_spin_wait:
on_spin_wait();
break;
default:
ShouldNotReachHere();
break;

View file

@ -251,6 +251,7 @@ class LIR_Assembler: public CompilationResourceObj {
void membar_storestore();
void membar_loadstore();
void membar_storeload();
void on_spin_wait();
void get_thread(LIR_Opr result);
void verify_oop_map(CodeEmitInfo* info);

View file

@ -150,7 +150,7 @@ PhiResolver::~PhiResolver() {
int i;
// resolve any cycles in moves from and to virtual registers
for (i = virtual_operands().length() - 1; i >= 0; i --) {
ResolveNode* node = virtual_operands()[i];
ResolveNode* node = virtual_operands().at(i);
if (!node->visited()) {
_loop = NULL;
move(NULL, node);
@ -161,7 +161,7 @@ PhiResolver::~PhiResolver() {
// generate move for move from non virtual register to abitrary destination
for (i = other_operands().length() - 1; i >= 0; i --) {
ResolveNode* node = other_operands()[i];
ResolveNode* node = other_operands().at(i);
for (int j = node->no_of_destinations() - 1; j >= 0; j --) {
emit_move(node->operand(), node->destination_at(j)->operand());
}
@ -177,7 +177,7 @@ ResolveNode* PhiResolver::create_node(LIR_Opr opr, bool source) {
assert(node == NULL || node->operand() == opr, "");
if (node == NULL) {
node = new ResolveNode(opr);
vreg_table()[vreg_num] = node;
vreg_table().at_put(vreg_num, node);
}
// Make sure that all virtual operands show up in the list when
// they are used as the source of a move.
@ -3161,7 +3161,9 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
case vmIntrinsics::_fullFence :
if (os::is_MP()) __ membar();
break;
case vmIntrinsics::_onSpinWait:
__ on_spin_wait();
break;
case vmIntrinsics::_Reference_get:
do_Reference_get(x);
break;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -39,8 +39,7 @@ class Invoke;
class SwitchRange;
class LIRItem;
define_array(LIRItemArray, LIRItem*)
define_stack(LIRItemList, LIRItemArray)
typedef GrowableArray<LIRItem*> LIRItemList;
class SwitchRange: public CompilationResourceObj {
private:
@ -56,15 +55,12 @@ class SwitchRange: public CompilationResourceObj {
BlockBegin* sux() const { return _sux; }
};
define_array(SwitchRangeArray, SwitchRange*)
define_stack(SwitchRangeList, SwitchRangeArray)
typedef GrowableArray<SwitchRange*> SwitchRangeArray;
typedef GrowableArray<SwitchRange*> SwitchRangeList;
class ResolveNode;
define_array(NodeArray, ResolveNode*);
define_stack(NodeList, NodeArray);
typedef GrowableArray<ResolveNode*> NodeList;
// Node objects form a directed graph of LIR_Opr
// Edges between Nodes represent moves from one Node to its destinations
@ -86,7 +82,7 @@ class ResolveNode: public CompilationResourceObj {
// accessors
LIR_Opr operand() const { return _operand; }
int no_of_destinations() const { return _destinations.length(); }
ResolveNode* destination_at(int i) { return _destinations[i]; }
ResolveNode* destination_at(int i) { return _destinations.at(i); }
bool assigned() const { return _assigned; }
bool visited() const { return _visited; }
bool start_node() const { return _start_node; }

View file

@ -496,8 +496,8 @@ void LinearScan::number_instructions() {
}
// initialize with correct length
_lir_ops = LIR_OpArray(num_instructions);
_block_of_op = BlockBeginArray(num_instructions);
_lir_ops = LIR_OpArray(num_instructions, num_instructions, NULL);
_block_of_op = BlockBeginArray(num_instructions, num_instructions, NULL);
int op_id = 0;
int idx = 0;
@ -2507,7 +2507,8 @@ LocationValue* _illegal_value = new (ResourceObj::C_HEAP, mtCompiler) Lo
void LinearScan::init_compute_debug_info() {
// cache for frequently used scope values
// (cpu registers and stack slots)
_scope_value_cache = ScopeValueArray((LinearScan::nof_cpu_regs + frame_map()->argcount() + max_spills()) * 2, NULL);
int cache_size = (LinearScan::nof_cpu_regs + frame_map()->argcount() + max_spills()) * 2;
_scope_value_cache = ScopeValueArray(cache_size, cache_size, NULL);
}
MonitorValue* LinearScan::location_for_monitor_index(int monitor_index) {
@ -3042,7 +3043,7 @@ void LinearScan::assign_reg_num(LIR_OpList* instructions, IntervalWalker* iw) {
insert_point++;
}
}
instructions->truncate(insert_point);
instructions->trunc_to(insert_point);
}
}
@ -3446,7 +3447,7 @@ class RegisterVerifier: public StackObj {
RegisterVerifier(LinearScan* allocator)
: _allocator(allocator)
, _work_list(16)
, _saved_states(BlockBegin::number_of_blocks(), NULL)
, _saved_states(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), NULL)
{ }
void verify(BlockBegin* start);
@ -4452,7 +4453,7 @@ Interval* Interval::split(int split_pos) {
new_use_pos_and_kinds.append(_use_pos_and_kinds.at(i));
}
_use_pos_and_kinds.truncate(start_idx + 2);
_use_pos_and_kinds.trunc_to(start_idx + 2);
result->_use_pos_and_kinds = _use_pos_and_kinds;
_use_pos_and_kinds = new_use_pos_and_kinds;
@ -5540,7 +5541,7 @@ void LinearScanWalker::split_and_spill_intersecting_intervals(int reg, int regHi
IntervalList* processed = _spill_intervals[reg];
for (int i = 0; i < _spill_intervals[regHi]->length(); i++) {
Interval* it = _spill_intervals[regHi]->at(i);
if (processed->find_from_end(it) == -1) {
if (processed->find(it) == -1) {
remove_from_list(it);
split_and_spill_interval(it);
}
@ -6211,7 +6212,7 @@ void ControlFlowOptimizer::delete_empty_blocks(BlockList* code) {
_original_preds.clear();
for (j = block->number_of_preds() - 1; j >= 0; j--) {
BlockBegin* pred = block->pred_at(j);
if (_original_preds.index_of(pred) == -1) {
if (_original_preds.find(pred) == -1) {
_original_preds.append(pred);
}
}
@ -6231,7 +6232,7 @@ void ControlFlowOptimizer::delete_empty_blocks(BlockList* code) {
}
old_pos++;
}
code->truncate(new_pos);
code->trunc_to(new_pos);
DEBUG_ONLY(verify(code));
}
@ -6256,7 +6257,7 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
TRACE_LINEAR_SCAN(3, tty->print_cr("Deleting unconditional branch at end of block B%d", block->block_id()));
// delete last branch instruction
instructions->truncate(instructions->length() - 1);
instructions->trunc_to(instructions->length() - 1);
} else {
LIR_Op* prev_op = instructions->at(instructions->length() - 2);
@ -6295,7 +6296,7 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
prev_branch->change_block(last_branch->block());
prev_branch->negate_cond();
prev_cmp->set_condition(prev_branch->cond());
instructions->truncate(instructions->length() - 1);
instructions->trunc_to(instructions->length() - 1);
// if we do change the condition, we have to change the cmove as well
if (prev_cmove != NULL) {
prev_cmove->set_condition(prev_branch->cond());
@ -6378,19 +6379,19 @@ void ControlFlowOptimizer::verify(BlockList* code) {
LIR_OpBranch* op_branch = instructions->at(j)->as_OpBranch();
if (op_branch != NULL) {
assert(op_branch->block() == NULL || code->index_of(op_branch->block()) != -1, "branch target not valid");
assert(op_branch->ublock() == NULL || code->index_of(op_branch->ublock()) != -1, "branch target not valid");
assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid");
assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid");
}
}
for (j = 0; j < block->number_of_sux() - 1; j++) {
BlockBegin* sux = block->sux_at(j);
assert(code->index_of(sux) != -1, "successor not valid");
assert(code->find(sux) != -1, "successor not valid");
}
for (j = 0; j < block->number_of_preds() - 1; j++) {
BlockBegin* pred = block->pred_at(j);
assert(code->index_of(pred) != -1, "successor not valid");
assert(code->find(pred) != -1, "successor not valid");
}
}
}

View file

@ -44,18 +44,9 @@ class Range;
typedef GrowableArray<Interval*> IntervalArray;
typedef GrowableArray<Interval*> IntervalList;
define_array(IntervalsArray, IntervalList*)
define_stack(IntervalsList, IntervalsArray)
define_array(OopMapArray, OopMap*)
define_stack(OopMapList, OopMapArray)
define_array(ScopeValueArray, ScopeValue*)
define_array(LIR_OpListArray, LIR_OpList*);
define_stack(LIR_OpListStack, LIR_OpListArray);
typedef GrowableArray<IntervalList*> IntervalsList;
typedef GrowableArray<ScopeValue*> ScopeValueArray;
typedef GrowableArray<LIR_OpList*> LIR_OpListStack;
enum IntervalUseKind {
// priority of use kinds must be ascending
@ -67,9 +58,6 @@ enum IntervalUseKind {
firstValidKind = 1,
lastValidKind = 3
};
define_array(UseKindArray, IntervalUseKind)
define_stack(UseKindStack, UseKindArray)
enum IntervalKind {
fixedKind = 0, // interval pre-colored by LIR_Generator
@ -619,7 +607,7 @@ class Interval : public CompilationResourceObj {
void add_range(int from, int to);
Interval* split(int split_pos);
Interval* split_from_start(int split_pos);
void remove_first_use_pos() { _use_pos_and_kinds.truncate(_use_pos_and_kinds.length() - 2); }
void remove_first_use_pos() { _use_pos_and_kinds.trunc_to(_use_pos_and_kinds.length() - 2); }
// test intersection
bool covers(int op_id, LIR_OpVisitState::OprMode mode) const;

View file

@ -32,9 +32,7 @@
#include "utilities/bitMap.inline.hpp"
#include "compiler/compileLog.hpp"
define_array(ValueSetArray, ValueSet*);
define_stack(ValueSetList, ValueSetArray);
typedef GrowableArray<ValueSet*> ValueSetList;
Optimizer::Optimizer(IR* ir) {
assert(ir->is_valid(), "IR must be valid");
@ -584,8 +582,8 @@ class NullCheckEliminator: public ValueVisitor {
ValueSet* state() { return _set; }
void set_state_from (ValueSet* state) { _set->set_from(state); }
ValueSet* state_for (BlockBegin* block) { return _block_states[block->block_id()]; }
void set_state_for (BlockBegin* block, ValueSet* stack) { _block_states[block->block_id()] = stack; }
ValueSet* state_for (BlockBegin* block) { return _block_states.at(block->block_id()); }
void set_state_for (BlockBegin* block, ValueSet* stack) { _block_states.at_put(block->block_id(), stack); }
// Returns true if caused a change in the block's state.
bool merge_state_for(BlockBegin* block,
ValueSet* incoming_state);
@ -596,7 +594,7 @@ class NullCheckEliminator: public ValueVisitor {
: _opt(opt)
, _set(new ValueSet())
, _last_explicit_null_check(NULL)
, _block_states(BlockBegin::number_of_blocks(), NULL)
, _block_states(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), NULL)
, _work_list(new BlockList()) {
_visitable_instructions = new ValueSet();
_visitor.set_eliminator(this);
@ -1165,19 +1163,19 @@ void Optimizer::eliminate_null_checks() {
// handlers and iterate over them as well
int nblocks = BlockBegin::number_of_blocks();
BlockList blocks(nblocks);
boolArray visited_block(nblocks, false);
boolArray visited_block(nblocks, nblocks, false);
blocks.push(ir()->start());
visited_block[ir()->start()->block_id()] = true;
visited_block.at_put(ir()->start()->block_id(), true);
for (int i = 0; i < blocks.length(); i++) {
BlockBegin* b = blocks[i];
BlockBegin* b = blocks.at(i);
// exception handlers need to be treated as additional roots
for (int e = b->number_of_exception_handlers(); e-- > 0; ) {
BlockBegin* excp = b->exception_handler_at(e);
int id = excp->block_id();
if (!visited_block[id]) {
if (!visited_block.at(id)) {
blocks.push(excp);
visited_block[id] = true;
visited_block.at_put(id, true);
nce.iterate(excp);
}
}
@ -1186,9 +1184,9 @@ void Optimizer::eliminate_null_checks() {
for (int s = end->number_of_sux(); s-- > 0; ) {
BlockBegin* next = end->sux_at(s);
int id = next->block_id();
if (!visited_block[id]) {
if (!visited_block.at(id)) {
blocks.push(next);
visited_block[id] = true;
visited_block.at_put(id, true);
}
}
}

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -53,8 +53,8 @@ void RangeCheckElimination::eliminate(IR *ir) {
// Constructor
RangeCheckEliminator::RangeCheckEliminator(IR *ir) :
_bounds(Instruction::number_of_instructions(), NULL),
_access_indexed_info(Instruction::number_of_instructions(), NULL)
_bounds(Instruction::number_of_instructions(), Instruction::number_of_instructions(), NULL),
_access_indexed_info(Instruction::number_of_instructions(), Instruction::number_of_instructions(), NULL)
{
_visitor.set_range_check_eliminator(this);
_ir = ir;
@ -303,28 +303,28 @@ RangeCheckEliminator::Bound *RangeCheckEliminator::get_bound(Value v) {
// Wrong type or NULL -> No bound
if (!v || (!v->type()->as_IntType() && !v->type()->as_ObjectType())) return NULL;
if (!_bounds[v->id()]) {
if (!_bounds.at(v->id())) {
// First (default) bound is calculated
// Create BoundStack
_bounds[v->id()] = new BoundStack();
_bounds.at_put(v->id(), new BoundStack());
_visitor.clear_bound();
Value visit_value = v;
visit_value->visit(&_visitor);
Bound *bound = _visitor.bound();
if (bound) {
_bounds[v->id()]->push(bound);
_bounds.at(v->id())->push(bound);
}
if (_bounds[v->id()]->length() == 0) {
if (_bounds.at(v->id())->length() == 0) {
assert(!(v->as_Constant() && v->type()->as_IntConstant()), "constants not handled here");
_bounds[v->id()]->push(new Bound());
_bounds.at(v->id())->push(new Bound());
}
} else if (_bounds[v->id()]->length() == 0) {
} else if (_bounds.at(v->id())->length() == 0) {
// To avoid endless loops, bound is currently in calculation -> nothing known about it
return new Bound();
}
// Return bound
return _bounds[v->id()]->top();
return _bounds.at(v->id())->top();
}
// Update bound
@ -353,28 +353,28 @@ void RangeCheckEliminator::update_bound(IntegerStack &pushed, Value v, Bound *bo
// No bound update for constants
return;
}
if (!_bounds[v->id()]) {
if (!_bounds.at(v->id())) {
get_bound(v);
assert(_bounds[v->id()], "Now Stack must exist");
assert(_bounds.at(v->id()), "Now Stack must exist");
}
Bound *top = NULL;
if (_bounds[v->id()]->length() > 0) {
top = _bounds[v->id()]->top();
if (_bounds.at(v->id())->length() > 0) {
top = _bounds.at(v->id())->top();
}
if (top) {
bound->and_op(top);
}
_bounds[v->id()]->push(bound);
_bounds.at(v->id())->push(bound);
pushed.append(v->id());
}
// Add instruction + idx for in block motion
void RangeCheckEliminator::add_access_indexed_info(InstructionList &indices, int idx, Value instruction, AccessIndexed *ai) {
int id = instruction->id();
AccessIndexedInfo *aii = _access_indexed_info[id];
AccessIndexedInfo *aii = _access_indexed_info.at(id);
if (aii == NULL) {
aii = new AccessIndexedInfo();
_access_indexed_info[id] = aii;
_access_indexed_info.at_put(id, aii);
indices.append(instruction);
aii->_min = idx;
aii->_max = idx;
@ -461,7 +461,7 @@ void RangeCheckEliminator::in_block_motion(BlockBegin *block, AccessIndexedList
if (_optimistic) {
for (int i = 0; i < indices.length(); i++) {
Instruction *index_instruction = indices.at(i);
AccessIndexedInfo *info = _access_indexed_info[index_instruction->id()];
AccessIndexedInfo *info = _access_indexed_info.at(index_instruction->id());
assert(info != NULL, "Info must not be null");
// if idx < 0, max > 0, max + idx may fall between 0 and
@ -562,7 +562,7 @@ void RangeCheckEliminator::in_block_motion(BlockBegin *block, AccessIndexedList
// Clear data structures for next array
for (int i = 0; i < indices.length(); i++) {
Instruction *index_instruction = indices.at(i);
_access_indexed_info[index_instruction->id()] = NULL;
_access_indexed_info.at_put(index_instruction->id(), NULL);
}
indices.clear();
}
@ -1005,7 +1005,7 @@ void RangeCheckEliminator::calc_bounds(BlockBegin *block, BlockBegin *loop_heade
// Reset stack
for (int i=0; i<pushed.length(); i++) {
_bounds[pushed[i]]->pop();
_bounds.at(pushed.at(i))->pop();
}
}
@ -1051,7 +1051,7 @@ void RangeCheckEliminator::dump_condition_stack(BlockBegin *block) {
#endif
// Verification or the IR
RangeCheckEliminator::Verification::Verification(IR *ir) : _used(BlockBegin::number_of_blocks(), false) {
RangeCheckEliminator::Verification::Verification(IR *ir) : _used(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), false) {
this->_ir = ir;
ir->iterate_linear_scan_order(this);
}
@ -1146,14 +1146,14 @@ bool RangeCheckEliminator::Verification::can_reach(BlockBegin *start, BlockBegin
if (start == end) return start != dont_use;
// Simple BSF from start to end
// BlockBeginList _current;
for (int i=0; i<_used.length(); i++) {
_used[i] = false;
for (int i=0; i < _used.length(); i++) {
_used.at_put(i, false);
}
_current.truncate(0);
_successors.truncate(0);
_current.trunc_to(0);
_successors.trunc_to(0);
if (start != dont_use) {
_current.push(start);
_used[start->block_id()] = true;
_used.at_put(start->block_id(), true);
}
// BlockBeginList _successors;
@ -1180,17 +1180,17 @@ bool RangeCheckEliminator::Verification::can_reach(BlockBegin *start, BlockBegin
}
}
for (int i=0; i<_successors.length(); i++) {
BlockBegin *sux = _successors[i];
BlockBegin *sux = _successors.at(i);
assert(sux != NULL, "Successor must not be NULL!");
if (sux == end) {
return true;
}
if (sux != dont_use && !_used[sux->block_id()]) {
_used[sux->block_id()] = true;
if (sux != dont_use && !_used.at(sux->block_id())) {
_used.at_put(sux->block_id(), true);
_current.push(sux);
}
}
_successors.truncate(0);
_successors.trunc_to(0);
}
return false;

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -40,10 +40,8 @@ private:
bool _optimistic; // Insert predicates and deoptimize when they fail
IR *_ir;
define_array(BlockBeginArray, BlockBegin*)
define_stack(BlockBeginList, BlockBeginArray)
define_stack(IntegerStack, intArray)
define_array(IntegerMap, IntegerStack*)
typedef GrowableArray<BlockBegin*> BlockBeginList;
typedef GrowableArray<int> IntegerStack;
class Verification : public BlockClosure {
// RangeCheckEliminator::Verification should never get instatiated on the heap.
@ -180,13 +178,10 @@ public:
void add_assertions(Bound *bound, Instruction *instruction, Instruction *position);
#endif
define_array(BoundArray, Bound *)
define_stack(BoundStack, BoundArray)
define_array(BoundMap, BoundStack *)
define_array(AccessIndexedArray, AccessIndexed *)
define_stack(AccessIndexedList, AccessIndexedArray)
define_array(InstructionArray, Instruction *)
define_stack(InstructionList, InstructionArray)
typedef GrowableArray<Bound*> BoundStack;
typedef GrowableArray<BoundStack*> BoundMap;
typedef GrowableArray<AccessIndexed*> AccessIndexedList;
typedef GrowableArray<Instruction*> InstructionList;
class AccessIndexedInfo : public CompilationResourceObj {
public:
@ -195,7 +190,7 @@ public:
int _max;
};
define_array(AccessIndexedInfoArray, AccessIndexedInfo *)
typedef GrowableArray<AccessIndexedInfo*> AccessIndexedInfoArray;
BoundMap _bounds; // Mapping from Instruction's id to current bound
AccessIndexedInfoArray _access_indexed_info; // Mapping from Instruction's id to AccessIndexedInfo for in block motion
Visitor _visitor;

View file

@ -320,9 +320,11 @@ const char* Runtime1::name_for_address(address entry) {
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
FUNCTION_CASE(entry, StubRoutines::dexp());
FUNCTION_CASE(entry, StubRoutines::dlog());
FUNCTION_CASE(entry, StubRoutines::dlog10());
FUNCTION_CASE(entry, StubRoutines::dpow());
FUNCTION_CASE(entry, StubRoutines::dsin());
FUNCTION_CASE(entry, StubRoutines::dcos());
FUNCTION_CASE(entry, StubRoutines::dtan());
#undef FUNCTION_CASE

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -46,7 +46,7 @@
ValueMap::ValueMap()
: _nesting(0)
, _entries(ValueMapInitialSize, NULL)
, _entries(ValueMapInitialSize, ValueMapInitialSize, NULL)
, _killed_values()
, _entry_count(0)
{
@ -56,7 +56,7 @@ ValueMap::ValueMap()
ValueMap::ValueMap(ValueMap* old)
: _nesting(old->_nesting + 1)
, _entries(old->_entries.length())
, _entries(old->_entries.length(), old->_entries.length(), NULL)
, _killed_values()
, _entry_count(old->_entry_count)
{
@ -72,7 +72,7 @@ void ValueMap::increase_table_size() {
int new_size = old_size * 2 + 1;
ValueMapEntryList worklist(8);
ValueMapEntryArray new_entries(new_size, NULL);
ValueMapEntryArray new_entries(new_size, new_size, NULL);
int new_entry_count = 0;
TRACE_VALUE_NUMBERING(tty->print_cr("increasing table size from %d to %d", old_size, new_size));
@ -486,7 +486,7 @@ bool ShortLoopOptimizer::process(BlockBegin* loop_header) {
GlobalValueNumbering::GlobalValueNumbering(IR* ir)
: _current_map(NULL)
, _value_maps(ir->linear_scan_order()->length(), NULL)
, _value_maps(ir->linear_scan_order()->length(), ir->linear_scan_order()->length(), NULL)
, _compilation(ir->compilation())
{
TRACE_VALUE_NUMBERING(tty->print_cr("****** start of global value numbering"));

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -53,8 +53,8 @@ class ValueMapEntry: public CompilationResourceObj {
void set_next(ValueMapEntry* next) { _next = next; }
};
define_array(ValueMapEntryArray, ValueMapEntry*)
define_stack(ValueMapEntryList, ValueMapEntryArray)
typedef GrowableArray<ValueMapEntry*> ValueMapEntryArray;
typedef GrowableArray<ValueMapEntry*> ValueMapEntryList;
// ValueMap implements nested hash tables for value numbering. It
// maintains a set _killed_values which represents the instructions
@ -129,8 +129,7 @@ class ValueMap: public CompilationResourceObj {
#endif
};
define_array(ValueMapArray, ValueMap*)
typedef GrowableArray<ValueMap*> ValueMapArray;
class ValueNumberingVisitor: public InstructionVisitor {
protected:

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -35,7 +35,7 @@ ValueStack::ValueStack(IRScope* scope, ValueStack* caller_state)
, _caller_state(caller_state)
, _bci(-99)
, _kind(Parsing)
, _locals(scope->method()->max_locals(), NULL)
, _locals(scope->method()->max_locals(), scope->method()->max_locals(), NULL)
, _stack(scope->method()->max_stack())
, _locks()
{
@ -178,7 +178,7 @@ void ValueStack::setup_phi_for_stack(BlockBegin* b, int index) {
ValueType* t = stack_at(index)->type();
Value phi = new Phi(t, b, -index - 1);
_stack[index] = phi;
_stack.at_put(index, phi);
assert(!t->is_double_word() || _stack.at(index + 1) == NULL, "hi-word of doubleword value must be NULL");
}
@ -225,7 +225,7 @@ void ValueStack::print() {
if (locals_size() > 0) {
InstructionPrinter ip;
for (int i = 0; i < locals_size();) {
Value l = _locals[i];
Value l = _locals.at(i);
tty->print("local %d ", i);
if (l == NULL) {
tty->print("null");

View file

@ -124,6 +124,9 @@ public:
}
}
bool is_valid() const {
return basic_type() != T_ILLEGAL;
}
// Debugging output
void print();
};

View file

@ -235,29 +235,16 @@ void ciField::initialize_from(fieldDescriptor* fd) {
_holder = CURRENT_ENV->get_instance_klass(fd->field_holder());
// Check to see if the field is constant.
bool is_final = this->is_final();
bool is_stable = FoldStableValues && this->is_stable();
if (_holder->is_initialized() && (is_final || is_stable)) {
if (!this->is_static()) {
// A field can be constant if it's a final static field or if
// it's a final non-static field of a trusted class (classes in
// java.lang.invoke and sun.invoke packages and subpackages).
if (is_stable || trust_final_non_static_fields(_holder)) {
_is_constant = true;
return;
}
_is_constant = false;
return;
}
Klass* k = _holder->get_Klass();
bool is_stable_field = FoldStableValues && is_stable();
if (is_final() || is_stable_field) {
if (is_static()) {
// This field just may be constant. The only case where it will
// not be constant is when the field is a *special* static&final field
// not be constant is when the field is a *special* static & final field
// whose value may change. The three examples are java.lang.System.in,
// java.lang.System.out, and java.lang.System.err.
KlassHandle k = _holder->get_Klass();
assert( SystemDictionary::System_klass() != NULL, "Check once per vm");
if( k() == SystemDictionary::System_klass() ) {
assert(SystemDictionary::System_klass() != NULL, "Check once per vm");
if (k == SystemDictionary::System_klass()) {
// Check offsets for case 2: System.in, System.out, or System.err
if( _offset == java_lang_System::in_offset_in_bytes() ||
_offset == java_lang_System::out_offset_in_bytes() ||
@ -266,63 +253,57 @@ void ciField::initialize_from(fieldDescriptor* fd) {
return;
}
}
Handle mirror = k->java_mirror();
switch(type()->basic_type()) {
case T_BYTE:
_constant_value = ciConstant(type()->basic_type(), mirror->byte_field(_offset));
break;
case T_CHAR:
_constant_value = ciConstant(type()->basic_type(), mirror->char_field(_offset));
break;
case T_SHORT:
_constant_value = ciConstant(type()->basic_type(), mirror->short_field(_offset));
break;
case T_BOOLEAN:
_constant_value = ciConstant(type()->basic_type(), mirror->bool_field(_offset));
break;
case T_INT:
_constant_value = ciConstant(type()->basic_type(), mirror->int_field(_offset));
break;
case T_FLOAT:
_constant_value = ciConstant(mirror->float_field(_offset));
break;
case T_DOUBLE:
_constant_value = ciConstant(mirror->double_field(_offset));
break;
case T_LONG:
_constant_value = ciConstant(mirror->long_field(_offset));
break;
case T_OBJECT:
case T_ARRAY:
{
oop o = mirror->obj_field(_offset);
// A field will be "constant" if it is known always to be
// a non-null reference to an instance of a particular class,
// or to a particular array. This can happen even if the instance
// or array is not perm. In such a case, an "unloaded" ciArray
// or ciInstance is created. The compiler may be able to use
// information about the object's class (which is exact) or length.
if (o == NULL) {
_constant_value = ciConstant(type()->basic_type(), ciNullObject::make());
} else {
_constant_value = ciConstant(type()->basic_type(), CURRENT_ENV->get_object(o));
assert(_constant_value.as_object() == CURRENT_ENV->get_object(o), "check interning");
}
}
}
if (is_stable && _constant_value.is_null_or_zero()) {
// It is not a constant after all; treat it as uninitialized.
_is_constant = false;
} else {
_is_constant = true;
} else {
// An instance field can be constant if it's a final static field or if
// it's a final non-static field of a trusted class (classes in
// java.lang.invoke and sun.invoke packages and subpackages).
_is_constant = is_stable_field || trust_final_non_static_fields(_holder);
}
} else {
// For CallSite objects treat the target field as a compile time constant.
assert(SystemDictionary::CallSite_klass() != NULL, "should be already initialized");
if (k == SystemDictionary::CallSite_klass() &&
_offset == java_lang_invoke_CallSite::target_offset_in_bytes()) {
_is_constant = true;
} else {
// Non-final & non-stable fields are not constants.
_is_constant = false;
}
}
}
// ------------------------------------------------------------------
// ciField::constant_value
// Get the constant value of a this static field.
ciConstant ciField::constant_value() {
assert(is_static() && is_constant(), "illegal call to constant_value()");
if (!_holder->is_initialized()) {
return ciConstant(); // Not initialized yet
}
if (_constant_value.basic_type() == T_ILLEGAL) {
// Static fields are placed in mirror objects.
VM_ENTRY_MARK;
ciInstance* mirror = CURRENT_ENV->get_instance(_holder->get_Klass()->java_mirror());
_constant_value = mirror->field_value_impl(type()->basic_type(), offset());
}
if (FoldStableValues && is_stable() && _constant_value.is_null_or_zero()) {
return ciConstant();
}
return _constant_value;
}
// ------------------------------------------------------------------
// ciField::constant_value_of
// Get the constant value of non-static final field in the given object.
ciConstant ciField::constant_value_of(ciObject* object) {
assert(!is_static() && is_constant(), "only if field is non-static constant");
assert(object->is_instance(), "must be instance");
ciConstant field_value = object->as_instance()->field_value(this);
if (FoldStableValues && is_stable() && field_value.is_null_or_zero()) {
return ciConstant();
}
return field_value;
}
// ------------------------------------------------------------------

View file

@ -62,7 +62,7 @@ private:
void initialize_from(fieldDescriptor* fd);
public:
ciFlags flags() { return _flags; }
ciFlags flags() const { return _flags; }
// Of which klass is this field a member?
//
@ -89,13 +89,13 @@ public:
//
// In that case the declared holder of f would be B and
// the canonical holder of f would be A.
ciInstanceKlass* holder() { return _holder; }
ciInstanceKlass* holder() const { return _holder; }
// Name of this field?
ciSymbol* name() { return _name; }
ciSymbol* name() const { return _name; }
// Signature of this field?
ciSymbol* signature() { return _signature; }
ciSymbol* signature() const { return _signature; }
// Of what type is this field?
ciType* type() { return (_type == NULL) ? compute_type() : _type; }
@ -107,13 +107,13 @@ public:
int size_in_bytes() { return type2aelembytes(layout_type()); }
// What is the offset of this field?
int offset() {
int offset() const {
assert(_offset >= 1, "illegal call to offset()");
return _offset;
}
// Same question, explicit units. (Fields are aligned to the byte level.)
int offset_in_bytes() {
int offset_in_bytes() const {
return offset();
}
@ -127,31 +127,27 @@ public:
//
// Clarification: A field is considered constant if:
// 1. The field is both static and final
// 2. The canonical holder of the field has undergone
// static initialization.
// 3. The field is not one of the special static/final
// 2. The field is not one of the special static/final
// non-constant fields. These are java.lang.System.in
// and java.lang.System.out. Abomination.
//
// A field is also considered constant if it is marked @Stable
// and is non-null (or non-zero, if a primitive).
// For non-static fields, the null/zero check must be
// arranged by the user, as constant_value().is_null_or_zero().
bool is_constant() { return _is_constant; }
//
// A user should also check the field value (constant_value().is_valid()), since
// constant fields of non-initialized classes don't have values yet.
bool is_constant() const { return _is_constant; }
// Get the constant value of this field.
ciConstant constant_value() {
assert(is_static() && is_constant(), "illegal call to constant_value()");
return _constant_value;
// Get the constant value of the static field.
ciConstant constant_value();
bool is_static_constant() {
return is_static() && is_constant() && constant_value().is_valid();
}
// Get the constant value of non-static final field in the given
// object.
ciConstant constant_value_of(ciObject* object) {
assert(!is_static() && is_constant(), "only if field is non-static constant");
assert(object->is_instance(), "must be instance");
return object->as_instance()->field_value(this);
}
ciConstant constant_value_of(ciObject* object);
// Check for link time errors. Accessing a field from a
// certain class via a certain bytecode may or may not be legal.
@ -165,14 +161,14 @@ public:
Bytecodes::Code bc);
// Java access flags
bool is_public () { return flags().is_public(); }
bool is_private () { return flags().is_private(); }
bool is_protected () { return flags().is_protected(); }
bool is_static () { return flags().is_static(); }
bool is_final () { return flags().is_final(); }
bool is_stable () { return flags().is_stable(); }
bool is_volatile () { return flags().is_volatile(); }
bool is_transient () { return flags().is_transient(); }
bool is_public () const { return flags().is_public(); }
bool is_private () const { return flags().is_private(); }
bool is_protected () const { return flags().is_protected(); }
bool is_static () const { return flags().is_static(); }
bool is_final () const { return flags().is_final(); }
bool is_stable () const { return flags().is_stable(); }
bool is_volatile () const { return flags().is_volatile(); }
bool is_transient () const { return flags().is_transient(); }
bool is_call_site_target() {
ciInstanceKlass* callsite_klass = CURRENT_ENV->CallSite_klass();

View file

@ -56,49 +56,21 @@ ciType* ciInstance::java_mirror_type() {
}
// ------------------------------------------------------------------
// ciInstance::field_value
//
// Constant value of a field.
ciConstant ciInstance::field_value(ciField* field) {
assert(is_loaded(), "invalid access - must be loaded");
assert(field->holder()->is_loaded(), "invalid access - holder must be loaded");
assert(klass()->is_subclass_of(field->holder()), "invalid access - must be subclass");
VM_ENTRY_MARK;
ciConstant result;
// ciInstance::field_value_impl
ciConstant ciInstance::field_value_impl(BasicType field_btype, int offset) {
Handle obj = get_oop();
assert(!obj.is_null(), "bad oop");
BasicType field_btype = field->type()->basic_type();
int offset = field->offset();
switch(field_btype) {
case T_BYTE:
return ciConstant(field_btype, obj->byte_field(offset));
break;
case T_CHAR:
return ciConstant(field_btype, obj->char_field(offset));
break;
case T_SHORT:
return ciConstant(field_btype, obj->short_field(offset));
break;
case T_BOOLEAN:
return ciConstant(field_btype, obj->bool_field(offset));
break;
case T_INT:
return ciConstant(field_btype, obj->int_field(offset));
break;
case T_FLOAT:
return ciConstant(obj->float_field(offset));
break;
case T_DOUBLE:
return ciConstant(obj->double_field(offset));
break;
case T_LONG:
return ciConstant(obj->long_field(offset));
break;
case T_OBJECT:
case T_ARRAY:
{
case T_BYTE: return ciConstant(field_btype, obj->byte_field(offset));
case T_CHAR: return ciConstant(field_btype, obj->char_field(offset));
case T_SHORT: return ciConstant(field_btype, obj->short_field(offset));
case T_BOOLEAN: return ciConstant(field_btype, obj->bool_field(offset));
case T_INT: return ciConstant(field_btype, obj->int_field(offset));
case T_FLOAT: return ciConstant(obj->float_field(offset));
case T_DOUBLE: return ciConstant(obj->double_field(offset));
case T_LONG: return ciConstant(obj->long_field(offset));
case T_OBJECT: // fall through
case T_ARRAY: {
oop o = obj->obj_field(offset);
// A field will be "constant" if it is known always to be
@ -115,11 +87,22 @@ ciConstant ciInstance::field_value(ciField* field) {
}
}
}
ShouldNotReachHere();
// to shut up the compiler
fatal("no field value: %s", type2name(field_btype));
return ciConstant();
}
// ------------------------------------------------------------------
// ciInstance::field_value
//
// Constant value of a field.
ciConstant ciInstance::field_value(ciField* field) {
assert(is_loaded(), "invalid access - must be loaded");
assert(field->holder()->is_loaded(), "invalid access - holder must be loaded");
assert(field->is_static() || klass()->is_subclass_of(field->holder()), "invalid access - must be subclass");
GUARDED_VM_ENTRY(return field_value_impl(field->type()->basic_type(), field->offset());)
}
// ------------------------------------------------------------------
// ciInstance::field_value_by_offset
//

View file

@ -36,6 +36,7 @@
// instance of java.lang.Object.
class ciInstance : public ciObject {
CI_PACKAGE_ACCESS
friend class ciField;
protected:
ciInstance(instanceHandle h_i) : ciObject(h_i) {
@ -50,6 +51,8 @@ protected:
void print_impl(outputStream* st);
ciConstant field_value_impl(BasicType field_btype, int offset);
public:
// If this object is a java mirror, return the corresponding type.
// Otherwise, return NULL.

View file

@ -88,12 +88,7 @@ bool ciKlass::is_subclass_of(ciKlass* that) {
assert(this->is_loaded(), "must be loaded: %s", this->name()->as_quoted_ascii());
assert(that->is_loaded(), "must be loaded: %s", that->name()->as_quoted_ascii());
VM_ENTRY_MARK;
Klass* this_klass = get_Klass();
Klass* that_klass = that->get_Klass();
bool result = this_klass->is_subclass_of(that_klass);
return result;
GUARDED_VM_ENTRY(return get_Klass()->is_subclass_of(that->get_Klass());)
}
// ------------------------------------------------------------------

View file

@ -58,9 +58,7 @@ ciSymbol::ciSymbol(Symbol* s)
//
// The text of the symbol as a null-terminated C string.
const char* ciSymbol::as_utf8() {
VM_QUICK_ENTRY_MARK;
Symbol* s = get_symbol();
return s->as_utf8();
GUARDED_VM_QUICK_ENTRY(return get_symbol()->as_utf8();)
}
// The text of the symbol as a null-terminated C string.

View file

@ -2927,7 +2927,7 @@ static const intArray* sort_methods(Array<Method*>* methods) {
// If JVMTI original method ordering or sharing is enabled construct int
// array remembering the original ordering
if (JvmtiExport::can_maintain_original_method_order() || DumpSharedSpaces) {
method_ordering = new intArray(length);
method_ordering = new intArray(length, length, -1);
for (int index = 0; index < length; index++) {
Method* const m = methods->at(index);
const int old_index = m->vtable_index();

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -42,7 +42,6 @@ class FieldInfo;
template <typename T>
class GrowableArray;
class InstanceKlass;
class intArray;
class Symbol;
class TempNewSymbol;

View file

@ -882,6 +882,10 @@
do_name( newArray_name, "newArray") \
do_signature(newArray_signature, "(Ljava/lang/Class;I)Ljava/lang/Object;") \
\
do_intrinsic(_onSpinWait, java_lang_Thread, onSpinWait_name, onSpinWait_signature, F_S) \
do_name( onSpinWait_name, "onSpinWait") \
do_alias( onSpinWait_signature, void_method_signature) \
\
do_intrinsic(_copyOf, java_util_Arrays, copyOf_name, copyOf_signature, F_S) \
do_name( copyOf_name, "copyOf") \
do_signature(copyOf_signature, "([Ljava/lang/Object;ILjava/lang/Class;)[Ljava/lang/Object;") \

View file

@ -287,7 +287,7 @@ void ParScanThreadState::print_promotion_failure_size() {
}
}
class ParScanThreadStateSet: private ResourceArray {
class ParScanThreadStateSet: StackObj {
public:
// Initializes states for the specified number of threads;
ParScanThreadStateSet(int num_threads,
@ -322,8 +322,10 @@ private:
ParallelTaskTerminator& _term;
ParNewGeneration& _young_gen;
Generation& _old_gen;
ParScanThreadState* _per_thread_states;
const int _num_threads;
public:
bool is_valid(int id) const { return id < length(); }
bool is_valid(int id) const { return id < _num_threads; }
ParallelTaskTerminator* terminator() { return &_term; }
};
@ -336,17 +338,18 @@ ParScanThreadStateSet::ParScanThreadStateSet(int num_threads,
PreservedMarksSet& preserved_marks_set,
size_t desired_plab_sz,
ParallelTaskTerminator& term)
: ResourceArray(sizeof(ParScanThreadState), num_threads),
_young_gen(young_gen),
: _young_gen(young_gen),
_old_gen(old_gen),
_term(term)
_term(term),
_per_thread_states(NEW_RESOURCE_ARRAY(ParScanThreadState, num_threads)),
_num_threads(num_threads)
{
assert(num_threads > 0, "sanity check!");
assert(ParGCUseLocalOverflow == (overflow_stacks != NULL),
"overflow_stack allocation mismatch");
// Initialize states.
for (int i = 0; i < num_threads; ++i) {
new ((ParScanThreadState*)_data + i)
new(_per_thread_states + i)
ParScanThreadState(&to_space, &young_gen, &old_gen, i, &queue_set,
overflow_stacks, preserved_marks_set.get(i),
desired_plab_sz, term);
@ -354,12 +357,12 @@ ParScanThreadStateSet::ParScanThreadStateSet(int num_threads,
}
inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i) {
assert(i >= 0 && i < length(), "sanity check!");
return ((ParScanThreadState*)_data)[i];
assert(i >= 0 && i < _num_threads, "sanity check!");
return _per_thread_states[i];
}
void ParScanThreadStateSet::trace_promotion_failed(const YoungGCTracer* gc_tracer) {
for (int i = 0; i < length(); ++i) {
for (int i = 0; i < _num_threads; ++i) {
if (thread_state(i).promotion_failed()) {
gc_tracer->report_promotion_failed(thread_state(i).promotion_failed_info());
thread_state(i).promotion_failed_info().reset();
@ -370,7 +373,7 @@ void ParScanThreadStateSet::trace_promotion_failed(const YoungGCTracer* gc_trace
void ParScanThreadStateSet::reset(uint active_threads, bool promotion_failed) {
_term.reset_for_reuse(active_threads);
if (promotion_failed) {
for (int i = 0; i < length(); ++i) {
for (int i = 0; i < _num_threads; ++i) {
thread_state(i).print_promotion_failure_size();
}
}
@ -385,7 +388,7 @@ void ParScanThreadState::reset_stats() {
}
void ParScanThreadStateSet::reset_stats() {
for (int i = 0; i < length(); ++i) {
for (int i = 0; i < _num_threads; ++i) {
thread_state(i).reset_stats();
}
}
@ -408,7 +411,7 @@ void ParScanThreadStateSet::print_termination_stats() {
print_termination_stats_hdr(st);
for (int i = 0; i < length(); ++i) {
for (int i = 0; i < _num_threads; ++i) {
const ParScanThreadState & pss = thread_state(i);
const double elapsed_ms = pss.elapsed_time() * 1000.0;
const double s_roots_ms = pss.strong_roots_time() * 1000.0;
@ -436,7 +439,7 @@ void ParScanThreadStateSet::print_taskqueue_stats() {
print_taskqueue_stats_hdr(st);
TaskQueueStats totals;
for (int i = 0; i < length(); ++i) {
for (int i = 0; i < _num_threads; ++i) {
const ParScanThreadState & pss = thread_state(i);
const TaskQueueStats & stats = pss.taskqueue_stats();
st->print("%3d ", i); stats.print(st); st->cr();
@ -459,7 +462,7 @@ void ParScanThreadStateSet::flush() {
// possible since this might otherwise become a bottleneck
// to scaling. Should we add heavy-weight work into this
// loop, consider parallelizing the loop into the worker threads.
for (int i = 0; i < length(); ++i) {
for (int i = 0; i < _num_threads; ++i) {
ParScanThreadState& par_scan_state = thread_state(i);
// Flush stats related to To-space PLAB activity and

View file

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -65,11 +65,12 @@ void Rewriter::compute_index_maps() {
// Record limits of resolved reference map for constant pool cache indices
record_map_limits();
guarantee((int)_cp_cache_map.length()-1 <= (int)((u2)-1),
guarantee((int) _cp_cache_map.length() - 1 <= (int) ((u2)-1),
"all cp cache indexes fit in a u2");
if (saw_mh_symbol)
_method_handle_invokers.initialize(length, (int)0);
if (saw_mh_symbol) {
_method_handle_invokers.at_grow(length, 0);
}
}
// Unrewrite the bytecodes if an error occurs.
@ -193,7 +194,7 @@ void Rewriter::maybe_rewrite_invokehandle(address opc, int cp_index, int cache_i
assert(_pool->tag_at(cp_index).is_method(), "wrong index");
// Determine whether this is a signature-polymorphic method.
if (cp_index >= _method_handle_invokers.length()) return;
int status = _method_handle_invokers[cp_index];
int status = _method_handle_invokers.at(cp_index);
assert(status >= -1 && status <= 1, "oob tri-state");
if (status == 0) {
if (_pool->klass_ref_at_noresolve(cp_index) == vmSymbols::java_lang_invoke_MethodHandle() &&
@ -211,7 +212,7 @@ void Rewriter::maybe_rewrite_invokehandle(address opc, int cp_index, int cache_i
} else {
status = -1;
}
_method_handle_invokers[cp_index] = status;
_method_handle_invokers.at(cp_index) = status;
}
// We use a special internal bytecode for such methods (if non-static).
// The basic reason for this is that such methods need an extra "appendix" argument
@ -287,7 +288,7 @@ void Rewriter::patch_invokedynamic_bytecodes() {
// add delta to each.
int resolved_index = _patch_invokedynamic_refs->at(i);
for (int entry = 0; entry < ConstantPoolCacheEntry::_indy_resolved_references_entries; entry++) {
assert(_invokedynamic_references_map[resolved_index+entry] == cache_index,
assert(_invokedynamic_references_map.at(resolved_index + entry) == cache_index,
"should be the same index");
_invokedynamic_references_map.at_put(resolved_index+entry,
cache_index + delta);
@ -520,7 +521,14 @@ void Rewriter::rewrite(instanceKlassHandle klass, TRAPS) {
Rewriter::Rewriter(instanceKlassHandle klass, const constantPoolHandle& cpool, Array<Method*>* methods, TRAPS)
: _klass(klass),
_pool(cpool),
_methods(methods)
_methods(methods),
_cp_map(cpool->length()),
_cp_cache_map(cpool->length() / 2),
_reference_map(cpool->length()),
_resolved_references_map(cpool->length() / 2),
_invokedynamic_references_map(cpool->length() / 2),
_method_handle_invokers(cpool->length()),
_invokedynamic_cp_cache_map(cpool->length() / 4)
{
// Rewrite bytecodes - exception here exits.

View file

@ -37,13 +37,13 @@ class Rewriter: public StackObj {
instanceKlassHandle _klass;
constantPoolHandle _pool;
Array<Method*>* _methods;
intArray _cp_map;
intStack _cp_cache_map; // for Methodref, Fieldref,
GrowableArray<int> _cp_map;
GrowableArray<int> _cp_cache_map; // for Methodref, Fieldref,
// InterfaceMethodref and InvokeDynamic
intArray _reference_map; // maps from cp index to resolved_refs index (or -1)
intStack _resolved_references_map; // for strings, methodHandle, methodType
intStack _invokedynamic_references_map; // for invokedynamic resolved refs
intArray _method_handle_invokers;
GrowableArray<int> _reference_map; // maps from cp index to resolved_refs index (or -1)
GrowableArray<int> _resolved_references_map; // for strings, methodHandle, methodType
GrowableArray<int> _invokedynamic_references_map; // for invokedynamic resolved refs
GrowableArray<int> _method_handle_invokers;
int _resolved_reference_limit;
// For mapping invokedynamic bytecodes, which are discovered during method
@ -51,28 +51,31 @@ class Rewriter: public StackObj {
// If there are any invokespecial/InterfaceMethodref special case bytecodes,
// these entries are added before invokedynamic entries so that the
// invokespecial bytecode 16 bit index doesn't overflow.
intStack _invokedynamic_cp_cache_map;
GrowableArray<int> _invokedynamic_cp_cache_map;
// For patching.
GrowableArray<address>* _patch_invokedynamic_bcps;
GrowableArray<int>* _patch_invokedynamic_refs;
void init_maps(int length) {
_cp_map.initialize(length, -1);
// Choose an initial value large enough that we don't get frequent
// calls to grow().
_cp_cache_map.initialize(length/2);
_cp_map.trunc_to(0);
_cp_map.at_grow(length, -1);
_cp_cache_map.trunc_to(0);
// Also cache resolved objects, in another different cache.
_reference_map.initialize(length, -1);
_resolved_references_map.initialize(length/2);
_invokedynamic_references_map.initialize(length/2);
_reference_map.trunc_to(0);
_reference_map.at_grow(length, -1);
_method_handle_invokers.trunc_to(0);
_resolved_references_map.trunc_to(0);
_invokedynamic_references_map.trunc_to(0);
_resolved_reference_limit = -1;
_first_iteration_cp_cache_limit = -1;
// invokedynamic specific fields
_invokedynamic_cp_cache_map.initialize(length/4);
_patch_invokedynamic_bcps = new GrowableArray<address>(length/4);
_patch_invokedynamic_refs = new GrowableArray<int>(length/4);
_invokedynamic_cp_cache_map.trunc_to(0);
_patch_invokedynamic_bcps = new GrowableArray<address>(length / 4);
_patch_invokedynamic_refs = new GrowableArray<int>(length / 4);
}
int _first_iteration_cp_cache_limit;
@ -90,10 +93,10 @@ class Rewriter: public StackObj {
return _cp_cache_map.length() - _first_iteration_cp_cache_limit;
}
int cp_entry_to_cp_cache(int i) { assert(has_cp_cache(i), "oob"); return _cp_map[i]; }
bool has_cp_cache(int i) { return (uint)i < (uint)_cp_map.length() && _cp_map[i] >= 0; }
int cp_entry_to_cp_cache(int i) { assert(has_cp_cache(i), "oob"); return _cp_map.at(i); }
bool has_cp_cache(int i) { return (uint) i < (uint) _cp_map.length() && _cp_map.at(i) >= 0; }
int add_map_entry(int cp_index, intArray* cp_map, intStack* cp_cache_map) {
int add_map_entry(int cp_index, GrowableArray<int>* cp_map, GrowableArray<int>* cp_cache_map) {
assert(cp_map->at(cp_index) == -1, "not twice on same cp_index");
int cache_index = cp_cache_map->append(cp_index);
cp_map->at_put(cp_index, cache_index);
@ -121,7 +124,7 @@ class Rewriter: public StackObj {
}
int invokedynamic_cp_cache_entry_pool_index(int cache_index) {
int cp_index = _invokedynamic_cp_cache_map[cache_index];
int cp_index = _invokedynamic_cp_cache_map.at(cache_index);
return cp_index;
}
@ -144,10 +147,10 @@ class Rewriter: public StackObj {
int cp_entry_to_resolved_references(int cp_index) const {
assert(has_entry_in_resolved_references(cp_index), "oob");
return _reference_map[cp_index];
return _reference_map.at(cp_index);
}
bool has_entry_in_resolved_references(int cp_index) const {
return (uint)cp_index < (uint)_reference_map.length() && _reference_map[cp_index] >= 0;
return (uint) cp_index < (uint) _reference_map.length() && _reference_map.at(cp_index) >= 0;
}
// add a new entry to the resolved_references map
@ -174,13 +177,13 @@ class Rewriter: public StackObj {
}
int resolved_references_entry_to_pool_index(int ref_index) {
int cp_index = _resolved_references_map[ref_index];
int cp_index = _resolved_references_map.at(ref_index);
return cp_index;
}
// Access the contents of _cp_cache_map to determine CP cache layout.
int cp_cache_entry_pool_index(int cache_index) {
int cp_index = _cp_cache_map[cache_index];
int cp_index = _cp_cache_map.at(cache_index);
return cp_index;
}

View file

@ -31,7 +31,7 @@
// BitsInByte is a lookup table which tells the number of bits that
// are in the looked-up number. It is very useful in VectorSet_Size.
uint8_t bitsInByte[256] = {
uint8_t bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,

View file

@ -27,6 +27,8 @@
#include "libadt/set.hpp"
#define BITS_IN_BYTE_ARRAY_SIZE 256
// Vector Sets - An Abstract Data Type
//INTERFACE

View file

@ -569,7 +569,7 @@ void ConstantPoolCache::initialize(const intArray& inverse_index_map,
const intArray& invokedynamic_references_map) {
for (int i = 0; i < inverse_index_map.length(); i++) {
ConstantPoolCacheEntry* e = entry_at(i);
int original_index = inverse_index_map[i];
int original_index = inverse_index_map.at(i);
e->initialize_entry(original_index);
assert(entry_at(i) == e, "sanity");
}
@ -579,19 +579,19 @@ void ConstantPoolCache::initialize(const intArray& inverse_index_map,
for (int i = 0; i < invokedynamic_inverse_index_map.length(); i++) {
int offset = i + invokedynamic_offset;
ConstantPoolCacheEntry* e = entry_at(offset);
int original_index = invokedynamic_inverse_index_map[i];
int original_index = invokedynamic_inverse_index_map.at(i);
e->initialize_entry(original_index);
assert(entry_at(offset) == e, "sanity");
}
for (int ref = 0; ref < invokedynamic_references_map.length(); ref++) {
const int cpci = invokedynamic_references_map[ref];
const int cpci = invokedynamic_references_map.at(ref);
if (cpci >= 0) {
#ifdef ASSERT
// invokedynamic and invokehandle have more entries; check if they
// all point to the same constant pool cache entry.
for (int entry = 1; entry < ConstantPoolCacheEntry::_indy_resolved_references_entries; entry++) {
const int cpci_next = invokedynamic_references_map[ref + entry];
const int cpci_next = invokedynamic_references_map.at(ref + entry);
assert(cpci == cpci_next, "%d == %d", cpci, cpci_next);
}
#endif

View file

@ -361,6 +361,9 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_getCallerClass:
if (SystemDictionary::reflect_CallerSensitive_klass() == NULL) return false;
break;
case vmIntrinsics::_onSpinWait:
if (!Matcher::match_rule_supported(Op_OnSpinWait)) return false;
break;
case vmIntrinsics::_hashCode:
case vmIntrinsics::_identityHashCode:
case vmIntrinsics::_getClass:

View file

@ -171,7 +171,6 @@ macro(LoadN)
macro(LoadRange)
macro(LoadS)
macro(Lock)
macro(Log10D)
macro(Loop)
macro(LoopLimit)
macro(Mach)
@ -205,6 +204,7 @@ macro(Multi)
macro(NegD)
macro(NegF)
macro(NeverBranch)
macro(OnSpinWait)
macro(Opaque1)
macro(Opaque2)
macro(Opaque3)
@ -264,7 +264,6 @@ macro(SubI)
macro(SubL)
macro(TailCall)
macro(TailJump)
macro(TanD)
macro(ThreadLocal)
macro(Unlock)
macro(URShiftI)

View file

@ -4467,6 +4467,25 @@ void GraphKit::inflate_string_slow(Node* src, Node* dst, Node* start, Node* coun
set_memory(st, TypeAryPtr::BYTES);
}
Node* GraphKit::make_constant_from_field(ciField* field, Node* obj) {
if (!field->is_constant()) {
return NULL; // Field not marked as constant.
}
ciInstance* holder = NULL;
if (!field->is_static()) {
ciObject* const_oop = obj->bottom_type()->is_oopptr()->const_oop();
if (const_oop != NULL && const_oop->is_instance()) {
holder = const_oop->as_instance();
}
}
const Type* con_type = Type::make_constant_from_field(field, holder, field->layout_type(),
/*is_unsigned_load=*/false);
if (con_type != NULL) {
return makecon(con_type);
}
return NULL;
}
Node* GraphKit::cast_array_to_stable(Node* ary, const TypeAryPtr* ary_type) {
// Reify the property as a CastPP node in Ideal graph to comply with monotonicity
// assumption of CCP analysis.

View file

@ -910,6 +910,8 @@ class GraphKit : public Phase {
void add_predicate(int nargs = 0);
void add_predicate_impl(Deoptimization::DeoptReason reason, int nargs);
Node* make_constant_from_field(ciField* field, Node* obj);
// Produce new array node of stable type
Node* cast_array_to_stable(Node* ary, const TypeAryPtr* ary_type);
};

View file

@ -281,6 +281,7 @@ class LibraryCallKit : public GraphKit {
MemNode::MemOrd access_kind_to_memord(AccessKind access_kind);
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind, AccessKind access_kind);
bool inline_unsafe_fence(vmIntrinsics::ID id);
bool inline_onspinwait();
bool inline_fp_conversions(vmIntrinsics::ID id);
bool inline_number_methods(vmIntrinsics::ID id);
bool inline_reference_get();
@ -696,6 +697,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_storeFence:
case vmIntrinsics::_fullFence: return inline_unsafe_fence(intrinsic_id());
case vmIntrinsics::_onSpinWait: return inline_onspinwait();
case vmIntrinsics::_currentThread: return inline_native_currentThread();
case vmIntrinsics::_isInterrupted: return inline_native_isInterrupted();
@ -1677,7 +1680,6 @@ bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
switch (id) {
case vmIntrinsics::_dabs: n = new AbsDNode( arg); break;
case vmIntrinsics::_dsqrt: n = new SqrtDNode(C, control(), arg); break;
case vmIntrinsics::_dlog10: n = new Log10DNode(C, control(), arg); break;
default: fatal_unexpected_iid(id); break;
}
set_result(_gvn.transform(n));
@ -1691,10 +1693,6 @@ bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
Node* arg = round_double_node(argument(0));
Node* n = NULL;
switch (id) {
case vmIntrinsics::_dtan: n = new TanDNode(C, control(), arg); break;
default: fatal_unexpected_iid(id); break;
}
n = _gvn.transform(n);
// Rounding required? Check for argument reduction!
@ -1812,14 +1810,17 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
return StubRoutines::dcos() != NULL ?
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dcos(), "dcos") :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos), "COS");
case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) :
case vmIntrinsics::_dtan:
return StubRoutines::dtan() != NULL ?
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dtan(), "dtan") :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN");
case vmIntrinsics::_dlog:
return StubRoutines::dlog() != NULL ?
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog(), "dlog") :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog), "LOG");
case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) :
case vmIntrinsics::_dlog10:
return StubRoutines::dlog10() != NULL ?
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog10(), "dlog10") :
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
// These intrinsics are supported on all hardware
@ -2550,13 +2551,9 @@ bool LibraryCallKit::inline_unsafe_access(const bool is_native_ptr, bool is_stor
Node* p = NULL;
// Try to constant fold a load from a constant field
ciField* field = alias_type->field();
if (heap_base_oop != top() &&
field != NULL && field->is_constant() && !mismatched) {
if (heap_base_oop != top() && field != NULL && field->is_constant() && !mismatched) {
// final or stable field
const Type* con_type = Type::make_constant(alias_type->field(), heap_base_oop);
if (con_type != NULL) {
p = makecon(con_type);
}
p = make_constant_from_field(field, heap_base_oop);
}
if (p == NULL) {
// To be valid, unsafe loads may depend on other conditions than
@ -3127,6 +3124,11 @@ bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
}
}
bool LibraryCallKit::inline_onspinwait() {
insert_mem_bar(Op_OnSpinWait);
return true;
}
bool LibraryCallKit::klass_needs_init_guard(Node* kls) {
if (!kls->is_Con()) {
return true;

View file

@ -860,7 +860,7 @@ bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray <Sa
if (basic_elem_type == T_OBJECT || basic_elem_type == T_ARRAY) {
if (!elem_type->is_loaded()) {
field_type = TypeInstPtr::BOTTOM;
} else if (field != NULL && field->is_constant() && field->is_static()) {
} else if (field != NULL && field->is_static_constant()) {
// This can happen if the constant oop is non-perm.
ciObject* con = field->constant_value().as_object();
// Do not "join" in the previous type; it doesn't add value,

View file

@ -944,6 +944,7 @@ static void match_alias_type(Compile* C, Node* n, Node* m) {
case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
case Op_StrInflatedCopy:
case Op_StrCompressedCopy:
case Op_OnSpinWait:
case Op_EncodeISOArray:
nidx = Compile::AliasIdxTop;
nat = NULL;

View file

@ -797,7 +797,7 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP
#endif
{
assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop");
load = new LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo, control_dependency);
load = new LoadPNode(ctl, mem, adr, adr_type, rt->is_ptr(), mo, control_dependency);
}
break;
}
@ -1621,72 +1621,6 @@ LoadNode::load_array_final_field(const TypeKlassPtr *tkls,
return NULL;
}
static ciConstant check_mismatched_access(ciConstant con, BasicType loadbt, bool is_unsigned) {
BasicType conbt = con.basic_type();
switch (conbt) {
case T_BOOLEAN: conbt = T_BYTE; break;
case T_ARRAY: conbt = T_OBJECT; break;
}
switch (loadbt) {
case T_BOOLEAN: loadbt = T_BYTE; break;
case T_NARROWOOP: loadbt = T_OBJECT; break;
case T_ARRAY: loadbt = T_OBJECT; break;
case T_ADDRESS: loadbt = T_OBJECT; break;
}
if (conbt == loadbt) {
if (is_unsigned && conbt == T_BYTE) {
// LoadB (T_BYTE) with a small mask (<=8-bit) is converted to LoadUB (T_BYTE).
return ciConstant(T_INT, con.as_int() & 0xFF);
} else {
return con;
}
}
if (conbt == T_SHORT && loadbt == T_CHAR) {
// LoadS (T_SHORT) with a small mask (<=16-bit) is converted to LoadUS (T_CHAR).
return ciConstant(T_INT, con.as_int() & 0xFFFF);
}
return ciConstant(); // T_ILLEGAL
}
// Try to constant-fold a stable array element.
static const Type* fold_stable_ary_elem(const TypeAryPtr* ary, int off, bool is_unsigned_load, BasicType loadbt) {
assert(ary->const_oop(), "array should be constant");
assert(ary->is_stable(), "array should be stable");
// Decode the results of GraphKit::array_element_address.
ciArray* aobj = ary->const_oop()->as_array();
ciConstant element_value = aobj->element_value_by_offset(off);
if (element_value.basic_type() == T_ILLEGAL) {
return NULL; // wrong offset
}
ciConstant con = check_mismatched_access(element_value, loadbt, is_unsigned_load);
assert(con.basic_type() != T_ILLEGAL, "elembt=%s; loadbt=%s; unsigned=%d",
type2name(element_value.basic_type()), type2name(loadbt), is_unsigned_load);
if (con.basic_type() != T_ILLEGAL && // not a mismatched access
!con.is_null_or_zero()) { // not a default value
const Type* con_type = Type::make_from_constant(con);
if (con_type != NULL) {
if (con_type->isa_aryptr()) {
// Join with the array element type, in case it is also stable.
int dim = ary->stable_dimension();
con_type = con_type->is_aryptr()->cast_to_stable(true, dim-1);
}
if (loadbt == T_NARROWOOP && con_type->isa_oopptr()) {
con_type = con_type->make_narrowoop();
}
#ifndef PRODUCT
if (TraceIterativeGVN) {
tty->print("FoldStableValues: array element [off=%d]: con_type=", off);
con_type->dump(); tty->cr();
}
#endif //PRODUCT
return con_type;
}
}
return NULL;
}
//------------------------------Value-----------------------------------------
const Type* LoadNode::Value(PhaseGVN* phase) const {
// Either input is TOP ==> the result is TOP
@ -1715,10 +1649,14 @@ const Type* LoadNode::Value(PhaseGVN* phase) const {
const bool off_beyond_header = ((uint)off >= (uint)min_base_off);
// Try to constant-fold a stable array element.
if (FoldStableValues && !is_mismatched_access() && ary->is_stable() && ary->const_oop() != NULL) {
if (FoldStableValues && !is_mismatched_access() && ary->is_stable()) {
// Make sure the reference is not into the header and the offset is constant
if (off_beyond_header && adr->is_AddP() && off != Type::OffsetBot) {
const Type* con_type = fold_stable_ary_elem(ary, off, is_unsigned(), memory_type());
ciObject* aobj = ary->const_oop();
if (aobj != NULL && off_beyond_header && adr->is_AddP() && off != Type::OffsetBot) {
int stable_dimension = (ary->stable_dimension() > 0 ? ary->stable_dimension() - 1 : 0);
const Type* con_type = Type::make_constant_from_array_element(aobj->as_array(), off,
stable_dimension,
memory_type(), is_unsigned());
if (con_type != NULL) {
return con_type;
}
@ -1785,28 +1723,10 @@ const Type* LoadNode::Value(PhaseGVN* phase) const {
// For oop loads, we expect the _type to be precise.
// Optimizations for constant objects
ciObject* const_oop = tinst->const_oop();
if (const_oop != NULL) {
// For constant CallSites treat the target field as a compile time constant.
if (const_oop->is_call_site()) {
ciCallSite* call_site = const_oop->as_call_site();
ciField* field = call_site->klass()->as_instance_klass()->get_field_by_offset(off, /*is_static=*/ false);
if (field != NULL && field->is_call_site_target()) {
ciMethodHandle* target = call_site->get_target();
if (target != NULL) { // just in case
ciConstant constant(T_OBJECT, target);
const Type* t;
if (adr->bottom_type()->is_ptr_to_narrowoop()) {
t = TypeNarrowOop::make_from_constant(constant.as_object(), true);
} else {
t = TypeOopPtr::make_from_constant(constant.as_object(), true);
}
// Add a dependence for invalidation of the optimization.
if (!call_site->is_constant_call_site()) {
C->dependencies()->assert_call_site_target_value(call_site, target);
}
return t;
}
}
if (const_oop != NULL && const_oop->is_instance()) {
const Type* con_type = Type::make_constant_from_field(const_oop->as_instance(), off, is_unsigned(), memory_type());
if (con_type != NULL) {
return con_type;
}
}
} else if (tp->base() == Type::KlassPtr) {
@ -2979,6 +2899,7 @@ MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) {
case Op_MemBarReleaseLock: return new MemBarReleaseLockNode(C, atp, pn);
case Op_MemBarVolatile: return new MemBarVolatileNode(C, atp, pn);
case Op_MemBarCPUOrder: return new MemBarCPUOrderNode(C, atp, pn);
case Op_OnSpinWait: return new OnSpinWaitNode(C, atp, pn);
case Op_Initialize: return new InitializeNode(C, atp, pn);
case Op_MemBarStoreStore: return new MemBarStoreStoreNode(C, atp, pn);
default: ShouldNotReachHere(); return NULL;

View file

@ -1186,6 +1186,13 @@ public:
virtual uint ideal_reg() const { return 0; } // not matched in the AD file
};
class OnSpinWaitNode: public MemBarNode {
public:
OnSpinWaitNode(Compile* C, int alias_idx, Node* precedent)
: MemBarNode(C, alias_idx, precedent) {}
virtual int Opcode() const;
};
// Isolation of object setup after an AllocateNode and before next safepoint.
// (See comment in memnode.cpp near InitializeNode::InitializeNode for semantics.)
class InitializeNode: public MemBarNode {

View file

@ -1483,8 +1483,6 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
// Compute the size of the first block
_first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos();
assert(cb->insts_size() < 500000, "method is unreasonably large");
#ifdef ASSERT
for (uint i = 0; i < nblocks; i++) { // For all blocks
if (jmp_target[i] != 0) {

View file

@ -149,9 +149,9 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {
// Does this field have a constant value? If so, just push the value.
if (field->is_constant()) {
// final or stable field
const Type* con_type = Type::make_constant(field, obj);
if (con_type != NULL) {
push_node(con_type->basic_type(), makecon(con_type));
Node* con = make_constant_from_field(field, obj);
if (con != NULL) {
push_node(field->layout_type(), con);
return;
}
}
@ -174,12 +174,16 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {
if (!field->type()->is_loaded()) {
type = TypeInstPtr::BOTTOM;
must_assert_null = true;
} else if (field->is_constant() && field->is_static()) {
} else if (field->is_static_constant()) {
// This can happen if the constant oop is non-perm.
ciObject* con = field->constant_value().as_object();
// Do not "join" in the previous type; it doesn't add value,
// and may yield a vacuous result if the field is of interface type.
if (con->is_null_object()) {
type = TypePtr::NULL_PTR;
} else {
type = TypeOopPtr::make_from_constant(con)->isa_oopptr();
}
assert(type != NULL, "field singleton type must be consistent");
} else {
type = TypeOopPtr::make_from_klass(field_klass->as_klass());

View file

@ -389,7 +389,7 @@ bool RegMask::is_UP() const {
//------------------------------Size-------------------------------------------
// Compute size of register mask in bits
uint RegMask::Size() const {
extern uint8_t bitsInByte[512];
extern uint8_t bitsInByte[BITS_IN_BYTE_ARRAY_SIZE];
uint sum = 0;
for( int i = 0; i < RM_SIZE; i++ )
sum +=

View file

@ -1112,7 +1112,7 @@ Node* PhaseStringOpts::fetch_static_field(GraphKit& kit, ciField* field) {
if( bt == T_OBJECT ) {
if (!field->type()->is_loaded()) {
type = TypeInstPtr::BOTTOM;
} else if (field->is_constant()) {
} else if (field->is_static_constant()) {
// This can happen if the constant oop is non-perm.
ciObject* con = field->constant_value().as_object();
// Do not "join" in the previous type; it doesn't add value,

View file

@ -1533,25 +1533,3 @@ const Type* SqrtDNode::Value(PhaseGVN* phase) const {
if( d < 0.0 ) return Type::DOUBLE;
return TypeD::make( sqrt( d ) );
}
//=============================================================================
//------------------------------Value------------------------------------------
// Compute tan
const Type* TanDNode::Value(PhaseGVN* phase) const {
const Type *t1 = phase->type( in(1) );
if( t1 == Type::TOP ) return Type::TOP;
if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
double d = t1->getd();
return TypeD::make( StubRoutines::intrinsic_tan( d ) );
}
//=============================================================================
//------------------------------Value------------------------------------------
// Compute log10
const Type* Log10DNode::Value(PhaseGVN* phase) const {
const Type *t1 = phase->type( in(1) );
if( t1 == Type::TOP ) return Type::TOP;
if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
double d = t1->getd();
return TypeD::make( StubRoutines::intrinsic_log10( d ) );
}

View file

@ -408,21 +408,6 @@ public:
virtual uint ideal_reg() const { return Op_RegD; }
};
//------------------------------TanDNode---------------------------------------
// tangens of a double
class TanDNode : public Node {
public:
TanDNode(Compile* C, Node *c,Node *in1) : Node(c, in1) {
init_flags(Flag_is_expensive);
C->add_expensive_node(this);
}
virtual int Opcode() const;
const Type *bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; }
virtual const Type* Value(PhaseGVN* phase) const;
};
//------------------------------AtanDNode--------------------------------------
// arcus tangens of a double
class AtanDNode : public Node {
@ -448,20 +433,6 @@ public:
virtual const Type* Value(PhaseGVN* phase) const;
};
//------------------------------Log10DNode---------------------------------------
// Log_10 of a double
class Log10DNode : public Node {
public:
Log10DNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
init_flags(Flag_is_expensive);
C->add_expensive_node(this);
}
virtual int Opcode() const;
const Type *bottom_type() const { return Type::DOUBLE; }
virtual uint ideal_reg() const { return Op_RegD; }
virtual const Type* Value(PhaseGVN* phase) const;
};
//-------------------------------ReverseBytesINode--------------------------------
// reverse bytes of an integer
class ReverseBytesINode : public Node {

View file

@ -225,7 +225,9 @@ const Type* Type::get_typeflow_type(ciType* type) {
//-----------------------make_from_constant------------------------------------
const Type* Type::make_from_constant(ciConstant constant, bool require_constant) {
const Type* Type::make_from_constant(ciConstant constant, bool require_constant,
int stable_dimension, bool is_narrow_oop,
bool is_autobox_cache) {
switch (constant.basic_type()) {
case T_BOOLEAN: return TypeInt::make(constant.as_boolean());
case T_CHAR: return TypeInt::make(constant.as_char());
@ -236,18 +238,32 @@ const Type* Type::make_from_constant(ciConstant constant, bool require_constant)
case T_FLOAT: return TypeF::make(constant.as_float());
case T_DOUBLE: return TypeD::make(constant.as_double());
case T_ARRAY:
case T_OBJECT:
{
case T_OBJECT: {
// cases:
// can_be_constant = (oop not scavengable || ScavengeRootsInCode != 0)
// should_be_constant = (oop not scavengable || ScavengeRootsInCode >= 2)
// An oop is not scavengable if it is in the perm gen.
const Type* con_type = NULL;
ciObject* oop_constant = constant.as_object();
if (oop_constant->is_null_object()) {
return Type::get_zero_type(T_OBJECT);
con_type = Type::get_zero_type(T_OBJECT);
} else if (require_constant || oop_constant->should_be_constant()) {
return TypeOopPtr::make_from_constant(oop_constant, require_constant);
con_type = TypeOopPtr::make_from_constant(oop_constant, require_constant);
if (con_type != NULL) {
if (Compile::current()->eliminate_boxing() && is_autobox_cache) {
con_type = con_type->is_aryptr()->cast_to_autobox_cache(true);
}
if (stable_dimension > 0) {
assert(FoldStableValues, "sanity");
assert(!con_type->is_zero_type(), "default value for stable field");
con_type = con_type->is_aryptr()->cast_to_stable(true, stable_dimension);
}
}
}
if (is_narrow_oop) {
con_type = con_type->make_narrowoop();
}
return con_type;
}
case T_ILLEGAL:
// Invalid ciConstant returned due to OutOfMemoryError in the CI
@ -258,41 +274,107 @@ const Type* Type::make_from_constant(ciConstant constant, bool require_constant)
return NULL;
}
static ciConstant check_mismatched_access(ciConstant con, BasicType loadbt, bool is_unsigned) {
BasicType conbt = con.basic_type();
switch (conbt) {
case T_BOOLEAN: conbt = T_BYTE; break;
case T_ARRAY: conbt = T_OBJECT; break;
}
switch (loadbt) {
case T_BOOLEAN: loadbt = T_BYTE; break;
case T_NARROWOOP: loadbt = T_OBJECT; break;
case T_ARRAY: loadbt = T_OBJECT; break;
case T_ADDRESS: loadbt = T_OBJECT; break;
}
if (conbt == loadbt) {
if (is_unsigned && conbt == T_BYTE) {
// LoadB (T_BYTE) with a small mask (<=8-bit) is converted to LoadUB (T_BYTE).
return ciConstant(T_INT, con.as_int() & 0xFF);
} else {
return con;
}
}
if (conbt == T_SHORT && loadbt == T_CHAR) {
// LoadS (T_SHORT) with a small mask (<=16-bit) is converted to LoadUS (T_CHAR).
return ciConstant(T_INT, con.as_int() & 0xFFFF);
}
return ciConstant(); // T_ILLEGAL
}
const Type* Type::make_constant(ciField* field, Node* obj) {
if (!field->is_constant()) return NULL;
// Try to constant-fold a stable array element.
const Type* Type::make_constant_from_array_element(ciArray* array, int off, int stable_dimension,
BasicType loadbt, bool is_unsigned_load) {
// Decode the results of GraphKit::array_element_address.
ciConstant element_value = array->element_value_by_offset(off);
if (element_value.basic_type() == T_ILLEGAL) {
return NULL; // wrong offset
}
ciConstant con = check_mismatched_access(element_value, loadbt, is_unsigned_load);
const Type* con_type = NULL;
assert(con.basic_type() != T_ILLEGAL, "elembt=%s; loadbt=%s; unsigned=%d",
type2name(element_value.basic_type()), type2name(loadbt), is_unsigned_load);
if (con.is_valid() && // not a mismatched access
!con.is_null_or_zero()) { // not a default value
bool is_narrow_oop = (loadbt == T_NARROWOOP);
return Type::make_from_constant(con, /*require_constant=*/true, stable_dimension, is_narrow_oop, /*is_autobox_cache=*/false);
}
return NULL;
}
const Type* Type::make_constant_from_field(ciInstance* holder, int off, bool is_unsigned_load, BasicType loadbt) {
ciField* field;
ciType* type = holder->java_mirror_type();
if (type != NULL && type->is_instance_klass() && off >= InstanceMirrorKlass::offset_of_static_fields()) {
// Static field
field = type->as_instance_klass()->get_field_by_offset(off, /*is_static=*/true);
} else {
// Instance field
field = holder->klass()->as_instance_klass()->get_field_by_offset(off, /*is_static=*/false);
}
if (field == NULL) {
return NULL; // Wrong offset
}
return Type::make_constant_from_field(field, holder, loadbt, is_unsigned_load);
}
const Type* Type::make_constant_from_field(ciField* field, ciInstance* holder,
BasicType loadbt, bool is_unsigned_load) {
if (!field->is_constant()) {
return NULL; // Non-constant field
}
ciConstant field_value;
if (field->is_static()) {
// final static field
con_type = Type::make_from_constant(field->constant_value(), /*require_const=*/true);
if (Compile::current()->eliminate_boxing() && field->is_autobox_cache() && con_type != NULL) {
con_type = con_type->is_aryptr()->cast_to_autobox_cache(true);
}
} else {
field_value = field->constant_value();
} else if (holder != NULL) {
// final or stable non-static field
// Treat final non-static fields of trusted classes (classes in
// java.lang.invoke and sun.invoke packages and subpackages) as
// compile time constants.
if (obj->is_Con()) {
const TypeOopPtr* oop_ptr = obj->bottom_type()->isa_oopptr();
ciObject* constant_oop = oop_ptr->const_oop();
ciConstant constant = field->constant_value_of(constant_oop);
con_type = Type::make_from_constant(constant, /*require_const=*/true);
field_value = field->constant_value_of(holder);
}
if (!field_value.is_valid()) {
return NULL; // Not a constant
}
if (FoldStableValues && field->is_stable() && con_type != NULL) {
if (con_type->is_zero_type()) {
return NULL; // the field hasn't been initialized yet
} else if (con_type->isa_oopptr()) {
const Type* stable_type = Type::get_const_type(field->type());
if (field->type()->is_array_klass()) {
int stable_dimension = field->type()->as_array_klass()->dimension();
stable_type = stable_type->is_aryptr()->cast_to_stable(true, stable_dimension);
}
if (stable_type != NULL) {
con_type = con_type->join_speculative(stable_type);
}
ciConstant con = check_mismatched_access(field_value, loadbt, is_unsigned_load);
assert(con.is_valid(), "elembt=%s; loadbt=%s; unsigned=%d",
type2name(field_value.basic_type()), type2name(loadbt), is_unsigned_load);
bool is_stable_array = FoldStableValues && field->is_stable() && field->type()->is_array_klass();
int stable_dimension = (is_stable_array ? field->type()->as_array_klass()->dimension() : 0);
bool is_narrow_oop = (loadbt == T_NARROWOOP);
const Type* con_type = make_from_constant(con, /*require_constant=*/ true,
stable_dimension, is_narrow_oop,
field->is_autobox_cache());
if (con_type != NULL && field->is_call_site_target()) {
ciCallSite* call_site = holder->as_call_site();
if (!call_site->is_constant_call_site()) {
ciMethodHandle* target = call_site->get_target();
Compile::current()->dependencies()->assert_call_site_target_value(call_site, target);
}
}
return con_type;

View file

@ -417,9 +417,26 @@ public:
static const Type* get_typeflow_type(ciType* type);
static const Type* make_from_constant(ciConstant constant,
bool require_constant = false);
bool require_constant = false,
int stable_dimension = 0,
bool is_narrow = false,
bool is_autobox_cache = false);
static const Type* make_constant(ciField* field, Node* obj);
static const Type* make_constant_from_field(ciInstance* holder,
int off,
bool is_unsigned_load,
BasicType loadbt);
static const Type* make_constant_from_field(ciField* field,
ciInstance* holder,
BasicType loadbt,
bool is_unsigned_load);
static const Type* make_constant_from_array_element(ciArray* array,
int off,
int stable_dimension,
BasicType loadbt,
bool is_unsigned_load);
// Speculative type helper methods. See TypePtr.
virtual const TypePtr* speculative() const { return NULL; }

Some files were not shown because too many files have changed in this diff Show more