mirror of
https://github.com/openjdk/jdk.git
synced 2025-08-28 15:24:43 +02:00
Merge
This commit is contained in:
commit
b2ead5ac2d
142 changed files with 12761 additions and 10109 deletions
|
@ -83,6 +83,21 @@ suite = {
|
|||
"workingSets" : "API,JVMCI",
|
||||
},
|
||||
|
||||
"jdk.vm.ci.code.test" : {
|
||||
"subDir" : "test/compiler/jvmci",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"mx:JUNIT",
|
||||
"jdk.vm.ci.amd64",
|
||||
"jdk.vm.ci.sparc",
|
||||
"jdk.vm.ci.code",
|
||||
"jdk.vm.ci.hotspot",
|
||||
],
|
||||
"checkstyle" : "jdk.vm.ci.services",
|
||||
"javaCompliance" : "1.8",
|
||||
"workingSets" : "API,JVMCI",
|
||||
},
|
||||
|
||||
"jdk.vm.ci.runtime" : {
|
||||
"subDir" : "src/jdk.vm.ci/share/classes",
|
||||
"sourceDirs" : ["src"],
|
||||
|
@ -164,7 +179,7 @@ suite = {
|
|||
"subDir" : "test/compiler/jvmci",
|
||||
"sourceDirs" : ["src"],
|
||||
"dependencies" : [
|
||||
"mx:TESTNG",
|
||||
"TESTNG",
|
||||
"jdk.vm.ci.hotspot",
|
||||
],
|
||||
"checkstyle" : "jdk.vm.ci.services",
|
||||
|
|
|
@ -4190,55 +4190,6 @@ encode %{
|
|||
}
|
||||
%}
|
||||
|
||||
enc_class aarch64_enc_clear_array_reg_reg(iRegL_R11 cnt, iRegP_R10 base) %{
|
||||
MacroAssembler _masm(&cbuf);
|
||||
Register cnt_reg = as_Register($cnt$$reg);
|
||||
Register base_reg = as_Register($base$$reg);
|
||||
// base is word aligned
|
||||
// cnt is count of words
|
||||
|
||||
Label loop;
|
||||
Label entry;
|
||||
|
||||
// Algorithm:
|
||||
//
|
||||
// scratch1 = cnt & 7;
|
||||
// cnt -= scratch1;
|
||||
// p += scratch1;
|
||||
// switch (scratch1) {
|
||||
// do {
|
||||
// cnt -= 8;
|
||||
// p[-8] = 0;
|
||||
// case 7:
|
||||
// p[-7] = 0;
|
||||
// case 6:
|
||||
// p[-6] = 0;
|
||||
// // ...
|
||||
// case 1:
|
||||
// p[-1] = 0;
|
||||
// case 0:
|
||||
// p += 8;
|
||||
// } while (cnt);
|
||||
// }
|
||||
|
||||
const int unroll = 8; // Number of str(zr) instructions we'll unroll
|
||||
|
||||
__ andr(rscratch1, cnt_reg, unroll - 1); // tmp1 = cnt % unroll
|
||||
__ sub(cnt_reg, cnt_reg, rscratch1); // cnt -= unroll
|
||||
// base_reg always points to the end of the region we're about to zero
|
||||
__ add(base_reg, base_reg, rscratch1, Assembler::LSL, exact_log2(wordSize));
|
||||
__ adr(rscratch2, entry);
|
||||
__ sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
|
||||
__ br(rscratch2);
|
||||
__ bind(loop);
|
||||
__ sub(cnt_reg, cnt_reg, unroll);
|
||||
for (int i = -unroll; i < 0; i++)
|
||||
__ str(zr, Address(base_reg, i * wordSize));
|
||||
__ bind(entry);
|
||||
__ add(base_reg, base_reg, unroll * wordSize);
|
||||
__ cbnz(cnt_reg, loop);
|
||||
%}
|
||||
|
||||
/// mov envcodings
|
||||
|
||||
enc_class aarch64_enc_movw_imm(iRegI dst, immI src) %{
|
||||
|
@ -13363,7 +13314,9 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
|
|||
ins_cost(4 * INSN_COST);
|
||||
format %{ "ClearArray $cnt, $base" %}
|
||||
|
||||
ins_encode(aarch64_enc_clear_array_reg_reg(cnt, base));
|
||||
ins_encode %{
|
||||
__ zero_words($base$$Register, $cnt$$Register);
|
||||
%}
|
||||
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
|
|
@ -2942,6 +2942,10 @@ void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); }
|
|||
|
||||
void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); }
|
||||
|
||||
void LIR_Assembler::on_spin_wait() {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
|
||||
__ mov(result_reg->as_register(), rthread);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
|
@ -1127,7 +1127,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
|
|||
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
|
||||
Values* dims = x->dims();
|
||||
int i = dims->length();
|
||||
LIRItemList* items = new LIRItemList(dims->length(), NULL);
|
||||
LIRItemList* items = new LIRItemList(i, i, NULL);
|
||||
while (i-- > 0) {
|
||||
LIRItem* size = new LIRItem(dims->at(i), this);
|
||||
items->at_put(i, size);
|
||||
|
|
|
@ -4670,6 +4670,61 @@ void MacroAssembler::arrays_equals(Register a1, Register a2,
|
|||
BLOCK_COMMENT(is_string ? "} string_equals" : "} array_equals");
|
||||
}
|
||||
|
||||
// base: Address of a buffer to be zeroed, 8 bytes aligned.
|
||||
// cnt: Count in 8-byte unit.
|
||||
void MacroAssembler::zero_words(Register base, Register cnt)
|
||||
{
|
||||
fill_words(base, cnt, zr);
|
||||
}
|
||||
|
||||
// base: Address of a buffer to be filled, 8 bytes aligned.
|
||||
// cnt: Count in 8-byte unit.
|
||||
// value: Value to be filled with.
|
||||
// base will point to the end of the buffer after filling.
|
||||
void MacroAssembler::fill_words(Register base, Register cnt, Register value)
|
||||
{
|
||||
// Algorithm:
|
||||
//
|
||||
// scratch1 = cnt & 7;
|
||||
// cnt -= scratch1;
|
||||
// p += scratch1;
|
||||
// switch (scratch1) {
|
||||
// do {
|
||||
// cnt -= 8;
|
||||
// p[-8] = v;
|
||||
// case 7:
|
||||
// p[-7] = v;
|
||||
// case 6:
|
||||
// p[-6] = v;
|
||||
// // ...
|
||||
// case 1:
|
||||
// p[-1] = v;
|
||||
// case 0:
|
||||
// p += 8;
|
||||
// } while (cnt);
|
||||
// }
|
||||
|
||||
assert_different_registers(base, cnt, value, rscratch1, rscratch2);
|
||||
|
||||
Label entry, loop;
|
||||
const int unroll = 8; // Number of str instructions we'll unroll
|
||||
|
||||
andr(rscratch1, cnt, unroll - 1); // tmp1 = cnt % unroll
|
||||
cbz(rscratch1, entry);
|
||||
sub(cnt, cnt, rscratch1); // cnt -= tmp1
|
||||
// base always points to the end of the region we're about to fill
|
||||
add(base, base, rscratch1, Assembler::LSL, 3);
|
||||
adr(rscratch2, entry);
|
||||
sub(rscratch2, rscratch2, rscratch1, Assembler::LSL, 2);
|
||||
br(rscratch2);
|
||||
bind(loop);
|
||||
add(base, base, unroll * 8);
|
||||
sub(cnt, cnt, unroll);
|
||||
for (int i = -unroll; i < 0; i++)
|
||||
str(value, Address(base, i * 8));
|
||||
bind(entry);
|
||||
cbnz(cnt, loop);
|
||||
}
|
||||
|
||||
// encode char[] to byte[] in ISO_8859_1
|
||||
void MacroAssembler::encode_iso_array(Register src, Register dst,
|
||||
|
|
|
@ -1184,6 +1184,9 @@ public:
|
|||
Register result, Register cnt1,
|
||||
int elem_size, bool is_string);
|
||||
|
||||
void fill_words(Register base, Register cnt, Register value);
|
||||
void zero_words(Register base, Register cnt);
|
||||
|
||||
void encode_iso_array(Register src, Register dst,
|
||||
Register len, Register result,
|
||||
FloatRegister Vtmp1, FloatRegister Vtmp2,
|
||||
|
|
|
@ -2022,6 +2022,136 @@ class StubGenerator: public StubCodeGenerator {
|
|||
return start;
|
||||
}
|
||||
|
||||
//
|
||||
// Generate stub for array fill. If "aligned" is true, the
|
||||
// "to" address is assumed to be heapword aligned.
|
||||
//
|
||||
// Arguments for generated stub:
|
||||
// to: c_rarg0
|
||||
// value: c_rarg1
|
||||
// count: c_rarg2 treated as signed
|
||||
//
|
||||
address generate_fill(BasicType t, bool aligned, const char *name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", name);
|
||||
address start = __ pc();
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
|
||||
const Register to = c_rarg0; // source array address
|
||||
const Register value = c_rarg1; // value
|
||||
const Register count = c_rarg2; // elements count
|
||||
const Register cnt_words = c_rarg3; // temp register
|
||||
|
||||
__ enter();
|
||||
|
||||
Label L_fill_elements, L_exit1;
|
||||
|
||||
int shift = -1;
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
shift = 0;
|
||||
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ bfi(value, value, 8, 8); // 8 bit -> 16 bit
|
||||
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
|
||||
__ br(Assembler::LO, L_fill_elements);
|
||||
break;
|
||||
case T_SHORT:
|
||||
shift = 1;
|
||||
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ bfi(value, value, 16, 16); // 16 bit -> 32 bit
|
||||
__ br(Assembler::LO, L_fill_elements);
|
||||
break;
|
||||
case T_INT:
|
||||
shift = 2;
|
||||
__ cmpw(count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
|
||||
__ br(Assembler::LO, L_fill_elements);
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
// Align source address at 8 bytes address boundary.
|
||||
Label L_skip_align1, L_skip_align2, L_skip_align4;
|
||||
if (!aligned) {
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
// One byte misalignment happens only for byte arrays.
|
||||
__ tbz(to, 0, L_skip_align1);
|
||||
__ strb(value, Address(__ post(to, 1)));
|
||||
__ subw(count, count, 1);
|
||||
__ bind(L_skip_align1);
|
||||
// Fallthrough
|
||||
case T_SHORT:
|
||||
// Two bytes misalignment happens only for byte and short (char) arrays.
|
||||
__ tbz(to, 1, L_skip_align2);
|
||||
__ strh(value, Address(__ post(to, 2)));
|
||||
__ subw(count, count, 2 >> shift);
|
||||
__ bind(L_skip_align2);
|
||||
// Fallthrough
|
||||
case T_INT:
|
||||
// Align to 8 bytes, we know we are 4 byte aligned to start.
|
||||
__ tbz(to, 2, L_skip_align4);
|
||||
__ strw(value, Address(__ post(to, 4)));
|
||||
__ subw(count, count, 4 >> shift);
|
||||
__ bind(L_skip_align4);
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Fill large chunks
|
||||
//
|
||||
__ lsrw(cnt_words, count, 3 - shift); // number of words
|
||||
__ bfi(value, value, 32, 32); // 32 bit -> 64 bit
|
||||
__ subw(count, count, cnt_words, Assembler::LSL, 3 - shift);
|
||||
__ fill_words(to, cnt_words, value);
|
||||
|
||||
// Remaining count is less than 8 bytes. Fill it by a single store.
|
||||
// Note that the total length is no less than 8 bytes.
|
||||
if (t == T_BYTE || t == T_SHORT) {
|
||||
Label L_exit1;
|
||||
__ cbzw(count, L_exit1);
|
||||
__ add(to, to, count, Assembler::LSL, shift); // points to the end
|
||||
__ str(value, Address(to, -8)); // overwrite some elements
|
||||
__ bind(L_exit1);
|
||||
__ leave();
|
||||
__ ret(lr);
|
||||
}
|
||||
|
||||
// Handle copies less than 8 bytes.
|
||||
Label L_fill_2, L_fill_4, L_exit2;
|
||||
__ bind(L_fill_elements);
|
||||
switch (t) {
|
||||
case T_BYTE:
|
||||
__ tbz(count, 0, L_fill_2);
|
||||
__ strb(value, Address(__ post(to, 1)));
|
||||
__ bind(L_fill_2);
|
||||
__ tbz(count, 1, L_fill_4);
|
||||
__ strh(value, Address(__ post(to, 2)));
|
||||
__ bind(L_fill_4);
|
||||
__ tbz(count, 2, L_exit2);
|
||||
__ strw(value, Address(to));
|
||||
break;
|
||||
case T_SHORT:
|
||||
__ tbz(count, 0, L_fill_4);
|
||||
__ strh(value, Address(__ post(to, 2)));
|
||||
__ bind(L_fill_4);
|
||||
__ tbz(count, 1, L_exit2);
|
||||
__ strw(value, Address(to));
|
||||
break;
|
||||
case T_INT:
|
||||
__ cbzw(count, L_exit2);
|
||||
__ strw(value, Address(to));
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
__ bind(L_exit2);
|
||||
__ leave();
|
||||
__ ret(lr);
|
||||
return start;
|
||||
}
|
||||
|
||||
void generate_arraycopy_stubs() {
|
||||
address entry;
|
||||
address entry_jbyte_arraycopy;
|
||||
|
@ -2125,6 +2255,12 @@ class StubGenerator: public StubCodeGenerator {
|
|||
entry_jlong_arraycopy,
|
||||
entry_checkcast_arraycopy);
|
||||
|
||||
StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
|
||||
StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
|
||||
StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
|
||||
StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
|
||||
StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
|
||||
StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
|
||||
}
|
||||
|
||||
void generate_math_stubs() { Unimplemented(); }
|
||||
|
|
|
@ -2845,6 +2845,9 @@ void LIR_Assembler::membar_storeload() {
|
|||
__ membar(Assembler::StoreLoad);
|
||||
}
|
||||
|
||||
void LIR_Assembler::on_spin_wait() {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
|
||||
LIR_Address* addr = addr_opr->as_address_ptr();
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
|
@ -1055,7 +1055,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
|
|||
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
|
||||
Values* dims = x->dims();
|
||||
int i = dims->length();
|
||||
LIRItemList* items = new LIRItemList(dims->length(), NULL);
|
||||
LIRItemList* items = new LIRItemList(i, i, NULL);
|
||||
while (i-- > 0) {
|
||||
LIRItem* size = new LIRItem(dims->at(i), this);
|
||||
items->at_put(i, size);
|
||||
|
|
|
@ -3313,6 +3313,9 @@ void LIR_Assembler::membar_storeload() {
|
|||
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
|
||||
}
|
||||
|
||||
void LIR_Assembler::on_spin_wait() {
|
||||
Unimplemented();
|
||||
}
|
||||
|
||||
// Pack two sequential registers containing 32 bit values
|
||||
// into a single 64 bit register.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -1033,7 +1033,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
|
|||
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
|
||||
Values* dims = x->dims();
|
||||
int i = dims->length();
|
||||
LIRItemList* items = new LIRItemList(dims->length(), NULL);
|
||||
LIRItemList* items = new LIRItemList(i, i, NULL);
|
||||
while (i-- > 0) {
|
||||
LIRItem* size = new LIRItem(dims->at(i), this);
|
||||
items->at_put(i, size);
|
||||
|
|
|
@ -1827,6 +1827,15 @@ void Assembler::cvttss2sil(Register dst, XMMRegister src) {
|
|||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xE6);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::decl(Address dst) {
|
||||
// Don't use it directly. Use MacroAssembler::decrement() instead.
|
||||
InstructionMark im(this);
|
||||
|
@ -4993,7 +5002,7 @@ void Assembler::paddq(XMMRegister dst, XMMRegister src) {
|
|||
}
|
||||
|
||||
void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
|
||||
assert(VM_Version::supports_sse3(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x01);
|
||||
|
@ -5001,7 +5010,7 @@ void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
|
|||
}
|
||||
|
||||
void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse3(), ""));
|
||||
assert(VM_Version::supports_sse3(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
|
||||
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x02);
|
||||
|
|
|
@ -1048,6 +1048,8 @@ private:
|
|||
void cvttss2sil(Register dst, XMMRegister src);
|
||||
void cvttss2siq(Register dst, XMMRegister src);
|
||||
|
||||
void cvttpd2dq(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// Divide Scalar Double-Precision Floating-Point Values
|
||||
void divsd(XMMRegister dst, Address src);
|
||||
void divsd(XMMRegister dst, XMMRegister src);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -169,18 +169,18 @@ void FpuStackSim::clear() {
|
|||
|
||||
intArray* FpuStackSim::write_state() {
|
||||
intArray* res = new intArray(1 + FrameMap::nof_fpu_regs);
|
||||
(*res)[0] = stack_size();
|
||||
res->append(stack_size());
|
||||
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
|
||||
(*res)[1 + i] = regs_at(i);
|
||||
res->append(regs_at(i));
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void FpuStackSim::read_state(intArray* fpu_stack_state) {
|
||||
_stack_size = (*fpu_stack_state)[0];
|
||||
_stack_size = fpu_stack_state->at(0);
|
||||
for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
|
||||
set_regs_at(i, (*fpu_stack_state)[1 + i]);
|
||||
set_regs_at(i, fpu_stack_state->at(1 + i));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -2365,13 +2365,8 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, L
|
|||
} else if (value->is_double_fpu()) {
|
||||
assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
|
||||
switch(code) {
|
||||
case lir_log10 : __ flog10() ; break;
|
||||
case lir_abs : __ fabs() ; break;
|
||||
case lir_sqrt : __ fsqrt(); break;
|
||||
case lir_tan :
|
||||
// Should consider not saving rbx, if not necessary
|
||||
__ trigfunc('t', op->as_Op2()->fpu_stack_size());
|
||||
break;
|
||||
default : ShouldNotReachHere();
|
||||
}
|
||||
} else {
|
||||
|
@ -3886,6 +3881,10 @@ void LIR_Assembler::membar_storeload() {
|
|||
__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
|
||||
}
|
||||
|
||||
void LIR_Assembler::on_spin_wait() {
|
||||
__ pause ();
|
||||
}
|
||||
|
||||
void LIR_Assembler::get_thread(LIR_Opr result_reg) {
|
||||
assert(result_reg->is_register(), "check");
|
||||
#ifdef _LP64
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -812,7 +812,8 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
|||
|
||||
if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
|
||||
x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
|
||||
x->id() == vmIntrinsics::_dsin) {
|
||||
x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
|
||||
x->id() == vmIntrinsics::_dlog10) {
|
||||
do_LibmIntrinsic(x);
|
||||
return;
|
||||
}
|
||||
|
@ -820,58 +821,17 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
|
|||
LIRItem value(x->argument_at(0), this);
|
||||
|
||||
bool use_fpu = false;
|
||||
if (UseSSE >= 2) {
|
||||
switch(x->id()) {
|
||||
case vmIntrinsics::_dtan:
|
||||
case vmIntrinsics::_dlog10:
|
||||
use_fpu = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if (UseSSE < 2) {
|
||||
value.set_destroys_register();
|
||||
}
|
||||
|
||||
value.load_item();
|
||||
|
||||
LIR_Opr calc_input = value.result();
|
||||
LIR_Opr calc_input2 = NULL;
|
||||
if (x->id() == vmIntrinsics::_dpow) {
|
||||
LIRItem extra_arg(x->argument_at(1), this);
|
||||
if (UseSSE < 2) {
|
||||
extra_arg.set_destroys_register();
|
||||
}
|
||||
extra_arg.load_item();
|
||||
calc_input2 = extra_arg.result();
|
||||
}
|
||||
LIR_Opr calc_result = rlock_result(x);
|
||||
|
||||
// sin, cos, pow and exp need two free fpu stack slots, so register
|
||||
// two temporary operands
|
||||
LIR_Opr tmp1 = FrameMap::caller_save_fpu_reg_at(0);
|
||||
LIR_Opr tmp2 = FrameMap::caller_save_fpu_reg_at(1);
|
||||
|
||||
if (use_fpu) {
|
||||
LIR_Opr tmp = FrameMap::fpu0_double_opr;
|
||||
int tmp_start = 1;
|
||||
if (calc_input2 != NULL) {
|
||||
__ move(calc_input2, tmp);
|
||||
tmp_start = 2;
|
||||
calc_input2 = tmp;
|
||||
}
|
||||
__ move(calc_input, tmp);
|
||||
|
||||
calc_input = tmp;
|
||||
calc_result = tmp;
|
||||
|
||||
tmp1 = FrameMap::caller_save_fpu_reg_at(tmp_start);
|
||||
tmp2 = FrameMap::caller_save_fpu_reg_at(tmp_start + 1);
|
||||
}
|
||||
|
||||
switch(x->id()) {
|
||||
case vmIntrinsics::_dabs: __ abs (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
|
||||
case vmIntrinsics::_dsqrt: __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
|
||||
case vmIntrinsics::_dtan: __ tan (calc_input, calc_result, tmp1, tmp2); break;
|
||||
case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1); break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
||||
|
@ -912,21 +872,28 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
|||
result_reg = tmp;
|
||||
switch(x->id()) {
|
||||
case vmIntrinsics::_dexp:
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog:
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog10:
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dpow:
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
|
||||
|
@ -946,18 +913,44 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
|||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dtan:
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
#else
|
||||
switch (x->id()) {
|
||||
case vmIntrinsics::_dexp:
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog:
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dlog10:
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dpow:
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dsin:
|
||||
if (StubRoutines::dsin() != NULL) {
|
||||
|
@ -973,6 +966,13 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
|
|||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
case vmIntrinsics::_dtan:
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
|
||||
} else {
|
||||
__ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
|
||||
}
|
||||
break;
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
#endif // _LP64
|
||||
|
@ -1260,7 +1260,7 @@ void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
|
|||
void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
|
||||
Values* dims = x->dims();
|
||||
int i = dims->length();
|
||||
LIRItemList* items = new LIRItemList(dims->length(), NULL);
|
||||
LIRItemList* items = new LIRItemList(i, i, NULL);
|
||||
while (i-- > 0) {
|
||||
LIRItem* size = new LIRItem(dims->at(i), this);
|
||||
items->at_put(i, size);
|
||||
|
|
|
@ -786,58 +786,6 @@ void FpuStackAllocator::handle_op2(LIR_Op2* op2) {
|
|||
break;
|
||||
}
|
||||
|
||||
case lir_log10: {
|
||||
// log and log10 need one temporary fpu stack slot, so
|
||||
// there is one temporary registers stored in temp of the
|
||||
// operation. the stack allocator must guarantee that the stack
|
||||
// slots are really free, otherwise there might be a stack
|
||||
// overflow.
|
||||
assert(right->is_illegal(), "must be");
|
||||
assert(left->is_fpu_register(), "must be");
|
||||
assert(res->is_fpu_register(), "must be");
|
||||
assert(op2->tmp1_opr()->is_fpu_register(), "must be");
|
||||
|
||||
insert_free_if_dead(op2->tmp1_opr());
|
||||
insert_free_if_dead(res, left);
|
||||
insert_exchange(left);
|
||||
do_rename(left, res);
|
||||
|
||||
new_left = to_fpu_stack_top(res);
|
||||
new_res = new_left;
|
||||
|
||||
op2->set_fpu_stack_size(sim()->stack_size());
|
||||
assert(sim()->stack_size() <= 7, "at least one stack slot must be free");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
case lir_tan: {
|
||||
// sin, cos and exp need two temporary fpu stack slots, so there are two temporary
|
||||
// registers (stored in right and temp of the operation).
|
||||
// the stack allocator must guarantee that the stack slots are really free,
|
||||
// otherwise there might be a stack overflow.
|
||||
assert(left->is_fpu_register(), "must be");
|
||||
assert(res->is_fpu_register(), "must be");
|
||||
// assert(left->is_last_use(), "old value gets destroyed");
|
||||
assert(right->is_fpu_register(), "right is used as the first temporary register");
|
||||
assert(op2->tmp1_opr()->is_fpu_register(), "temp is used as the second temporary register");
|
||||
assert(fpu_num(left) != fpu_num(right) && fpu_num(right) != fpu_num(op2->tmp1_opr()) && fpu_num(op2->tmp1_opr()) != fpu_num(res), "need distinct temp registers");
|
||||
|
||||
insert_free_if_dead(right);
|
||||
insert_free_if_dead(op2->tmp1_opr());
|
||||
|
||||
insert_free_if_dead(res, left);
|
||||
insert_exchange(left);
|
||||
do_rename(left, res);
|
||||
|
||||
new_left = to_fpu_stack_top(res);
|
||||
new_res = new_left;
|
||||
|
||||
op2->set_fpu_stack_size(sim()->stack_size());
|
||||
assert(sim()->stack_size() <= 6, "at least two stack slots must be free");
|
||||
break;
|
||||
}
|
||||
|
||||
default: {
|
||||
assert(false, "missed a fpu-operation");
|
||||
}
|
||||
|
|
|
@ -194,9 +194,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
|
|||
product(bool, UseBMI2Instructions, false, \
|
||||
"Use BMI2 instructions") \
|
||||
\
|
||||
diagnostic(bool, UseLibmSinIntrinsic, true, \
|
||||
"Use Libm Sin Intrinsic") \
|
||||
\
|
||||
diagnostic(bool, UseLibmCosIntrinsic, true, \
|
||||
"Use Libm Cos Intrinsic")
|
||||
diagnostic(bool, UseLibmIntrinsic, true, \
|
||||
"Use Libm Intrinsics")
|
||||
#endif // CPU_X86_VM_GLOBALS_X86_HPP
|
||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
@ -928,6 +928,10 @@ class MacroAssembler: public Assembler {
|
|||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
|
||||
|
||||
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register r11);
|
||||
|
||||
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||
Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
|
||||
|
@ -941,11 +945,19 @@ class MacroAssembler: public Assembler {
|
|||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1,
|
||||
Register tmp2, Register tmp3, Register tmp4);
|
||||
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1,
|
||||
Register tmp2, Register tmp3, Register tmp4);
|
||||
#else
|
||||
void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp1);
|
||||
|
||||
void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp);
|
||||
|
||||
void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
|
||||
XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
|
||||
Register rdx, Register tmp);
|
||||
|
@ -964,6 +976,14 @@ class MacroAssembler: public Assembler {
|
|||
|
||||
void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
|
||||
Register esi, Register edi, Register ebp, Register esp);
|
||||
|
||||
void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
|
||||
Register edx, Register ebx, Register esi, Register edi,
|
||||
Register ebp, Register esp);
|
||||
|
||||
void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
|
||||
XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
|
||||
Register rax, Register rcx, Register rdx, Register tmp);
|
||||
#endif
|
||||
|
||||
void increase_precision();
|
||||
|
|
889
hotspot/src/cpu/x86/vm/macroAssembler_x86_cos.cpp
Normal file
889
hotspot/src/cpu/x86/vm/macroAssembler_x86_cos.cpp
Normal file
|
@ -0,0 +1,889 @@
|
|||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - COS()
|
||||
// ---------------------
|
||||
//
|
||||
// 1. RANGE REDUCTION
|
||||
//
|
||||
// We perform an initial range reduction from X to r with
|
||||
//
|
||||
// X =~= N * pi/32 + r
|
||||
//
|
||||
// so that |r| <= pi/64 + epsilon. We restrict inputs to those
|
||||
// where |N| <= 932560. Beyond this, the range reduction is
|
||||
// insufficiently accurate. For extremely small inputs,
|
||||
// denormalization can occur internally, impacting performance.
|
||||
// This means that the main path is actually only taken for
|
||||
// 2^-252 <= |X| < 90112.
|
||||
//
|
||||
// To avoid branches, we perform the range reduction to full
|
||||
// accuracy each time.
|
||||
//
|
||||
// X - N * (P_1 + P_2 + P_3)
|
||||
//
|
||||
// where P_1 and P_2 are 32-bit numbers (so multiplication by N
|
||||
// is exact) and P_3 is a 53-bit number. Together, these
|
||||
// approximate pi well enough for all cases in the restricted
|
||||
// range.
|
||||
//
|
||||
// The main reduction sequence is:
|
||||
//
|
||||
// y = 32/pi * x
|
||||
// N = integer(y)
|
||||
// (computed by adding and subtracting off SHIFTER)
|
||||
//
|
||||
// m_1 = N * P_1
|
||||
// m_2 = N * P_2
|
||||
// r_1 = x - m_1
|
||||
// r = r_1 - m_2
|
||||
// (this r can be used for most of the calculation)
|
||||
//
|
||||
// c_1 = r_1 - r
|
||||
// m_3 = N * P_3
|
||||
// c_2 = c_1 - m_2
|
||||
// c = c_2 - m_3
|
||||
//
|
||||
// 2. MAIN ALGORITHM
|
||||
//
|
||||
// The algorithm uses a table lookup based on B = M * pi / 32
|
||||
// where M = N mod 64. The stored values are:
|
||||
// sigma closest power of 2 to cos(B)
|
||||
// C_hl 53-bit cos(B) - sigma
|
||||
// S_hi + S_lo 2 * 53-bit sin(B)
|
||||
//
|
||||
// The computation is organized as follows:
|
||||
//
|
||||
// sin(B + r + c) = [sin(B) + sigma * r] +
|
||||
// r * (cos(B) - sigma) +
|
||||
// sin(B) * [cos(r + c) - 1] +
|
||||
// cos(B) * [sin(r + c) - r]
|
||||
//
|
||||
// which is approximately:
|
||||
//
|
||||
// [S_hi + sigma * r] +
|
||||
// C_hl * r +
|
||||
// S_lo + S_hi * [(cos(r) - 1) - r * c] +
|
||||
// (C_hl + sigma) * [(sin(r) - r) + c]
|
||||
//
|
||||
// and this is what is actually computed. We separate this sum
|
||||
// into four parts:
|
||||
//
|
||||
// hi + med + pols + corr
|
||||
//
|
||||
// where
|
||||
//
|
||||
// hi = S_hi + sigma r
|
||||
// med = C_hl * r
|
||||
// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r)
|
||||
// corr = S_lo + c * ((C_hl + sigma) - S_hi * r)
|
||||
//
|
||||
// 3. POLYNOMIAL
|
||||
//
|
||||
// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) *
|
||||
// (sin(r) - r) can be rearranged freely, since it is quite
|
||||
// small, so we exploit parallelism to the fullest.
|
||||
//
|
||||
// psc4 = SC_4 * r_1
|
||||
// msc4 = psc4 * r
|
||||
// r2 = r * r
|
||||
// msc2 = SC_2 * r2
|
||||
// r4 = r2 * r2
|
||||
// psc3 = SC_3 + msc4
|
||||
// psc1 = SC_1 + msc2
|
||||
// msc3 = r4 * psc3
|
||||
// sincospols = psc1 + msc3
|
||||
// pols = sincospols *
|
||||
// <S_hi * r^2 | (C_hl + sigma) * r^3>
|
||||
//
|
||||
// 4. CORRECTION TERM
|
||||
//
|
||||
// This is where the "c" component of the range reduction is
|
||||
// taken into account; recall that just "r" is used for most of
|
||||
// the calculation.
|
||||
//
|
||||
// -c = m_3 - c_2
|
||||
// -d = S_hi * r - (C_hl + sigma)
|
||||
// corr = -c * -d + S_lo
|
||||
//
|
||||
// 5. COMPENSATED SUMMATIONS
|
||||
//
|
||||
// The two successive compensated summations add up the high
|
||||
// and medium parts, leaving just the low parts to add up at
|
||||
// the end.
|
||||
//
|
||||
// rs = sigma * r
|
||||
// res_int = S_hi + rs
|
||||
// k_0 = S_hi - res_int
|
||||
// k_2 = k_0 + rs
|
||||
// med = C_hl * r
|
||||
// res_hi = res_int + med
|
||||
// k_1 = res_int - res_hi
|
||||
// k_3 = k_1 + med
|
||||
//
|
||||
// 6. FINAL SUMMATION
|
||||
//
|
||||
// We now add up all the small parts:
|
||||
//
|
||||
// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3
|
||||
//
|
||||
// Now the overall result is just:
|
||||
//
|
||||
// res_hi + res_lo
|
||||
//
|
||||
// 7. SMALL ARGUMENTS
|
||||
//
|
||||
// Inputs with |X| < 2^-252 are treated specially as
|
||||
// 1 - |x|.
|
||||
//
|
||||
// Special cases:
|
||||
// cos(NaN) = quiet NaN, and raise invalid exception
|
||||
// cos(INF) = NaN and raise invalid exception
|
||||
// cos(0) = 1
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(8) juint _ONE[] =
|
||||
{
|
||||
0x00000000UL, 0x3ff00000UL
|
||||
};
|
||||
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r8, Register r9, Register r10, Register r11) {
|
||||
|
||||
Label L_2TAG_PACKET_0_0_1, L_2TAG_PACKET_1_0_1, L_2TAG_PACKET_2_0_1, L_2TAG_PACKET_3_0_1;
|
||||
Label L_2TAG_PACKET_4_0_1, L_2TAG_PACKET_5_0_1, L_2TAG_PACKET_6_0_1, L_2TAG_PACKET_7_0_1;
|
||||
Label L_2TAG_PACKET_8_0_1, L_2TAG_PACKET_9_0_1, L_2TAG_PACKET_10_0_1, L_2TAG_PACKET_11_0_1;
|
||||
Label L_2TAG_PACKET_12_0_1, L_2TAG_PACKET_13_0_1, B1_2, B1_3, B1_4, B1_5, start;
|
||||
|
||||
assert_different_registers(r8, r9, r10, r11, eax, ecx, edx);
|
||||
|
||||
address ONEHALF = StubRoutines::x86::_ONEHALF_addr();
|
||||
address P_2 = StubRoutines::x86::_P_2_addr();
|
||||
address SC_4 = StubRoutines::x86::_SC_4_addr();
|
||||
address Ctable = StubRoutines::x86::_Ctable_addr();
|
||||
address SC_2 = StubRoutines::x86::_SC_2_addr();
|
||||
address SC_3 = StubRoutines::x86::_SC_3_addr();
|
||||
address SC_1 = StubRoutines::x86::_SC_1_addr();
|
||||
address PI_INV_TABLE = StubRoutines::x86::_PI_INV_TABLE_addr();
|
||||
address PI_4 = (address)StubRoutines::x86::_PI_4_addr();
|
||||
address PI32INV = (address)StubRoutines::x86::_PI32INV_addr();
|
||||
address SIGN_MASK = (address)StubRoutines::x86::_SIGN_MASK_addr();
|
||||
address P_1 = (address)StubRoutines::x86::_P_1_addr();
|
||||
address P_3 = (address)StubRoutines::x86::_P_3_addr();
|
||||
address ONE = (address)_ONE;
|
||||
address NEG_ZERO = (address)StubRoutines::x86::_NEG_ZERO_addr();
|
||||
|
||||
bind(start);
|
||||
push(rbx);
|
||||
subq(rsp, 16);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
|
||||
bind(B1_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
|
||||
andl(eax, 2147418112);
|
||||
subl(eax, 808452096);
|
||||
cmpl(eax, 281346048);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_0_0_1);
|
||||
mulsd(xmm1, xmm0);
|
||||
movdqu(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
|
||||
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
|
||||
pand(xmm4, xmm0);
|
||||
por(xmm5, xmm4);
|
||||
addpd(xmm1, xmm5);
|
||||
cvttsd2sil(edx, xmm1);
|
||||
cvtsi2sdl(xmm1, edx);
|
||||
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
|
||||
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
|
||||
mulsd(xmm3, xmm1);
|
||||
unpcklpd(xmm1, xmm1);
|
||||
addq(rdx, 1865232);
|
||||
movdqu(xmm4, xmm0);
|
||||
andq(rdx, 63);
|
||||
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
|
||||
lea(rax, ExternalAddress(Ctable));
|
||||
shlq(rdx, 5);
|
||||
addq(rax, rdx);
|
||||
mulpd(xmm2, xmm1);
|
||||
subsd(xmm0, xmm3);
|
||||
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
|
||||
subsd(xmm4, xmm3);
|
||||
movq(xmm7, Address(rax, 8));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm3, xmm4);
|
||||
subsd(xmm4, xmm2);
|
||||
mulpd(xmm5, xmm0);
|
||||
subpd(xmm0, xmm2);
|
||||
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
|
||||
mulsd(xmm7, xmm4);
|
||||
subsd(xmm3, xmm4);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
subsd(xmm3, xmm2);
|
||||
movdqu(xmm2, Address(rax, 0));
|
||||
subsd(xmm1, xmm3);
|
||||
movq(xmm3, Address(rax, 24));
|
||||
addsd(xmm2, xmm3);
|
||||
subsd(xmm7, xmm2);
|
||||
mulsd(xmm2, xmm4);
|
||||
mulpd(xmm6, xmm0);
|
||||
mulsd(xmm3, xmm4);
|
||||
mulpd(xmm2, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
|
||||
mulsd(xmm4, Address(rax, 0));
|
||||
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulpd(xmm5, xmm0);
|
||||
movdqu(xmm0, xmm3);
|
||||
addsd(xmm3, Address(rax, 8));
|
||||
mulpd(xmm1, xmm7);
|
||||
movdqu(xmm7, xmm4);
|
||||
addsd(xmm4, xmm3);
|
||||
addpd(xmm6, xmm5);
|
||||
movq(xmm5, Address(rax, 8));
|
||||
subsd(xmm5, xmm3);
|
||||
subsd(xmm3, xmm4);
|
||||
addsd(xmm1, Address(rax, 16));
|
||||
mulpd(xmm6, xmm2);
|
||||
addsd(xmm0, xmm5);
|
||||
addsd(xmm3, xmm7);
|
||||
addsd(xmm0, xmm1);
|
||||
addsd(xmm0, xmm3);
|
||||
addsd(xmm0, xmm6);
|
||||
unpckhpd(xmm6, xmm6);
|
||||
addsd(xmm0, xmm6);
|
||||
addsd(xmm0, xmm4);
|
||||
jmp(B1_4);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_1);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_1_0_1);
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
movq(xmm1, ExternalAddress(ONE)); //0x00000000UL, 0x3ff00000UL
|
||||
subsd(xmm1, xmm0);
|
||||
movdqu(xmm0, xmm1);
|
||||
jmp(B1_4);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_1);
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32752);
|
||||
cmpl(eax, 32752);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_2_0_1);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
subl(ecx, 16224);
|
||||
shrl(ecx, 7);
|
||||
andl(ecx, 65532);
|
||||
lea(r11, ExternalAddress(PI_INV_TABLE));
|
||||
addq(rcx, r11);
|
||||
movdq(rax, xmm0);
|
||||
movl(r10, Address(rcx, 20));
|
||||
movl(r8, Address(rcx, 24));
|
||||
movl(edx, eax);
|
||||
shrq(rax, 21);
|
||||
orl(eax, INT_MIN);
|
||||
shrl(eax, 11);
|
||||
movl(r9, r10);
|
||||
imulq(r10, rdx);
|
||||
imulq(r9, rax);
|
||||
imulq(r8, rax);
|
||||
movl(rsi, Address(rcx, 16));
|
||||
movl(rdi, Address(rcx, 12));
|
||||
movl(r11, r10);
|
||||
shrq(r10, 32);
|
||||
addq(r9, r10);
|
||||
addq(r11, r8);
|
||||
movl(r8, r11);
|
||||
shrq(r11, 32);
|
||||
addq(r9, r11);
|
||||
movl(r10, rsi);
|
||||
imulq(rsi, rdx);
|
||||
imulq(r10, rax);
|
||||
movl(r11, rdi);
|
||||
imulq(rdi, rdx);
|
||||
movl(rbx, rsi);
|
||||
shrq(rsi, 32);
|
||||
addq(r9, rbx);
|
||||
movl(rbx, r9);
|
||||
shrq(r9, 32);
|
||||
addq(r10, rsi);
|
||||
addq(r10, r9);
|
||||
shlq(rbx, 32);
|
||||
orq(r8, rbx);
|
||||
imulq(r11, rax);
|
||||
movl(r9, Address(rcx, 8));
|
||||
movl(rsi, Address(rcx, 4));
|
||||
movl(rbx, rdi);
|
||||
shrq(rdi, 32);
|
||||
addq(r10, rbx);
|
||||
movl(rbx, r10);
|
||||
shrq(r10, 32);
|
||||
addq(r11, rdi);
|
||||
addq(r11, r10);
|
||||
movq(rdi, r9);
|
||||
imulq(r9, rdx);
|
||||
imulq(rdi, rax);
|
||||
movl(r10, r9);
|
||||
shrq(r9, 32);
|
||||
addq(r11, r10);
|
||||
movl(r10, r11);
|
||||
shrq(r11, 32);
|
||||
addq(rdi, r9);
|
||||
addq(rdi, r11);
|
||||
movq(r9, rsi);
|
||||
imulq(rsi, rdx);
|
||||
imulq(r9, rax);
|
||||
shlq(r10, 32);
|
||||
orq(r10, rbx);
|
||||
movl(eax, Address(rcx, 0));
|
||||
movl(r11, rsi);
|
||||
shrq(rsi, 32);
|
||||
addq(rdi, r11);
|
||||
movl(r11, rdi);
|
||||
shrq(rdi, 32);
|
||||
addq(r9, rsi);
|
||||
addq(r9, rdi);
|
||||
imulq(rdx, rax);
|
||||
pextrw(rbx, xmm0, 3);
|
||||
lea(rdi, ExternalAddress(PI_INV_TABLE));
|
||||
subq(rcx, rdi);
|
||||
addl(ecx, ecx);
|
||||
addl(ecx, ecx);
|
||||
addl(ecx, ecx);
|
||||
addl(ecx, 19);
|
||||
movl(rsi, 32768);
|
||||
andl(rsi, rbx);
|
||||
shrl(rbx, 4);
|
||||
andl(rbx, 2047);
|
||||
subl(rbx, 1023);
|
||||
subl(ecx, rbx);
|
||||
addq(r9, rdx);
|
||||
movl(edx, ecx);
|
||||
addl(edx, 32);
|
||||
cmpl(ecx, 1);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_3_0_1);
|
||||
negl(ecx);
|
||||
addl(ecx, 29);
|
||||
shll(r9);
|
||||
movl(rdi, r9);
|
||||
andl(r9, 536870911);
|
||||
testl(r9, 268435456);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_4_0_1);
|
||||
shrl(r9);
|
||||
movl(rbx, 0);
|
||||
shlq(r9, 32);
|
||||
orq(r9, r11);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_1);
|
||||
cmpq(r9, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_1);
|
||||
bsrq(r11, r9);
|
||||
movl(ecx, 29);
|
||||
subl(ecx, r11);
|
||||
jcc(Assembler::lessEqual, L_2TAG_PACKET_9_0_1);
|
||||
shlq(r9);
|
||||
movq(rax, r10);
|
||||
shlq(r10);
|
||||
addl(edx, ecx);
|
||||
negl(ecx);
|
||||
addl(ecx, 64);
|
||||
shrq(rax);
|
||||
shrq(r8);
|
||||
orq(r9, rax);
|
||||
orq(r10, r8);
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_1);
|
||||
cvtsi2sdq(xmm0, r9);
|
||||
shrq(r10, 1);
|
||||
cvtsi2sdq(xmm3, r10);
|
||||
xorpd(xmm4, xmm4);
|
||||
shll(edx, 4);
|
||||
negl(edx);
|
||||
addl(edx, 16368);
|
||||
orl(edx, rsi);
|
||||
xorl(edx, rbx);
|
||||
pinsrw(xmm4, edx, 3);
|
||||
movq(xmm2, ExternalAddress(PI_4)); //0x40000000UL, 0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
|
||||
movq(xmm6, ExternalAddress(8 + PI_4)); //0x3fe921fbUL, 0x18469899UL, 0x3e64442dUL
|
||||
xorpd(xmm5, xmm5);
|
||||
subl(edx, 1008);
|
||||
pinsrw(xmm5, edx, 3);
|
||||
mulsd(xmm0, xmm4);
|
||||
shll(rsi, 16);
|
||||
sarl(rsi, 31);
|
||||
mulsd(xmm3, xmm5);
|
||||
movdqu(xmm1, xmm0);
|
||||
mulsd(xmm0, xmm2);
|
||||
shrl(rdi, 29);
|
||||
addsd(xmm1, xmm3);
|
||||
mulsd(xmm3, xmm2);
|
||||
addl(rdi, rsi);
|
||||
xorl(rdi, rsi);
|
||||
mulsd(xmm6, xmm1);
|
||||
movl(eax, rdi);
|
||||
addsd(xmm6, xmm3);
|
||||
movdqu(xmm2, xmm0);
|
||||
addsd(xmm0, xmm6);
|
||||
subsd(xmm2, xmm0);
|
||||
addsd(xmm6, xmm2);
|
||||
|
||||
bind(L_2TAG_PACKET_11_0_1);
|
||||
movq(xmm1, ExternalAddress(PI32INV)); //0x6dc9c883UL, 0x40245f30UL
|
||||
mulsd(xmm1, xmm0);
|
||||
movq(xmm5, ExternalAddress(ONEHALF)); //0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
|
||||
movq(xmm4, ExternalAddress(SIGN_MASK)); //0x00000000UL, 0x80000000UL
|
||||
pand(xmm4, xmm0);
|
||||
por(xmm5, xmm4);
|
||||
addpd(xmm1, xmm5);
|
||||
cvttsd2siq(rdx, xmm1);
|
||||
cvtsi2sdq(xmm1, rdx);
|
||||
movq(xmm3, ExternalAddress(P_1)); //0x54400000UL, 0x3fb921fbUL
|
||||
movdqu(xmm2, ExternalAddress(P_2)); //0x1a600000UL, 0x3d90b461UL, 0x1a600000UL, 0x3d90b461UL
|
||||
mulsd(xmm3, xmm1);
|
||||
unpcklpd(xmm1, xmm1);
|
||||
shll(eax, 3);
|
||||
addl(edx, 1865232);
|
||||
movdqu(xmm4, xmm0);
|
||||
addl(edx, eax);
|
||||
andl(edx, 63);
|
||||
movdqu(xmm5, ExternalAddress(SC_4)); //0xa556c734UL, 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL
|
||||
lea(rax, ExternalAddress(Ctable));
|
||||
shll(edx, 5);
|
||||
addq(rax, rdx);
|
||||
mulpd(xmm2, xmm1);
|
||||
subsd(xmm0, xmm3);
|
||||
mulsd(xmm1, ExternalAddress(P_3)); //0x2e037073UL, 0x3b63198aUL
|
||||
subsd(xmm4, xmm3);
|
||||
movq(xmm7, Address(rax, 8));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm3, xmm4);
|
||||
subsd(xmm4, xmm2);
|
||||
mulpd(xmm5, xmm0);
|
||||
subpd(xmm0, xmm2);
|
||||
mulsd(xmm7, xmm4);
|
||||
subsd(xmm3, xmm4);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
subsd(xmm3, xmm2);
|
||||
movdqu(xmm2, Address(rax, 0));
|
||||
subsd(xmm1, xmm3);
|
||||
movq(xmm3, Address(rax, 24));
|
||||
addsd(xmm2, xmm3);
|
||||
subsd(xmm7, xmm2);
|
||||
subsd(xmm1, xmm6);
|
||||
movdqu(xmm6, ExternalAddress(SC_2)); //0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL
|
||||
mulsd(xmm2, xmm4);
|
||||
mulpd(xmm6, xmm0);
|
||||
mulsd(xmm3, xmm4);
|
||||
mulpd(xmm2, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
addpd(xmm5, ExternalAddress(SC_3)); //0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL
|
||||
mulsd(xmm4, Address(rax, 0));
|
||||
addpd(xmm6, ExternalAddress(SC_1)); //0x55555555UL, 0xbfc55555UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulpd(xmm5, xmm0);
|
||||
movdqu(xmm0, xmm3);
|
||||
addsd(xmm3, Address(rax, 8));
|
||||
mulpd(xmm1, xmm7);
|
||||
movdqu(xmm7, xmm4);
|
||||
addsd(xmm4, xmm3);
|
||||
addpd(xmm6, xmm5);
|
||||
movq(xmm5, Address(rax, 8));
|
||||
subsd(xmm5, xmm3);
|
||||
subsd(xmm3, xmm4);
|
||||
addsd(xmm1, Address(rax, 16));
|
||||
mulpd(xmm6, xmm2);
|
||||
addsd(xmm5, xmm0);
|
||||
addsd(xmm3, xmm7);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm1, xmm3);
|
||||
addsd(xmm1, xmm6);
|
||||
unpckhpd(xmm6, xmm6);
|
||||
movdqu(xmm0, xmm4);
|
||||
addsd(xmm1, xmm6);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(B1_4);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_1);
|
||||
addl(edx, 64);
|
||||
movq(r9, r10);
|
||||
movq(r10, r8);
|
||||
movl(r8, 0);
|
||||
cmpq(r9, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
|
||||
addl(edx, 64);
|
||||
movq(r9, r10);
|
||||
movq(r10, r8);
|
||||
cmpq(r9, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_8_0_1);
|
||||
xorpd(xmm0, xmm0);
|
||||
xorpd(xmm6, xmm6);
|
||||
jmp(L_2TAG_PACKET_11_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_1);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_10_0_1);
|
||||
negl(ecx);
|
||||
shrq(r10);
|
||||
movq(rax, r9);
|
||||
shrq(r9);
|
||||
subl(edx, ecx);
|
||||
negl(ecx);
|
||||
addl(ecx, 64);
|
||||
shlq(rax);
|
||||
orq(r10, rax);
|
||||
jmp(L_2TAG_PACKET_10_0_1);
|
||||
bind(L_2TAG_PACKET_3_0_1);
|
||||
negl(ecx);
|
||||
shlq(r9, 32);
|
||||
orq(r9, r11);
|
||||
shlq(r9);
|
||||
movq(rdi, r9);
|
||||
testl(r9, INT_MIN);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_1);
|
||||
shrl(r9);
|
||||
movl(rbx, 0);
|
||||
shrq(rdi, 3);
|
||||
jmp(L_2TAG_PACKET_6_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_1);
|
||||
shrl(r9);
|
||||
movl(rbx, 536870912);
|
||||
shrl(rbx);
|
||||
shlq(r9, 32);
|
||||
orq(r9, r11);
|
||||
shlq(rbx, 32);
|
||||
addl(rdi, 536870912);
|
||||
movl(rcx, 0);
|
||||
movl(r11, 0);
|
||||
subq(rcx, r8);
|
||||
sbbq(r11, r10);
|
||||
sbbq(rbx, r9);
|
||||
movq(r8, rcx);
|
||||
movq(r10, r11);
|
||||
movq(r9, rbx);
|
||||
movl(rbx, 32768);
|
||||
jmp(L_2TAG_PACKET_5_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_12_0_1);
|
||||
shrl(r9);
|
||||
mov64(rbx, 0x100000000);
|
||||
shrq(rbx);
|
||||
movl(rcx, 0);
|
||||
movl(r11, 0);
|
||||
subq(rcx, r8);
|
||||
sbbq(r11, r10);
|
||||
sbbq(rbx, r9);
|
||||
movq(r8, rcx);
|
||||
movq(r10, r11);
|
||||
movq(r9, rbx);
|
||||
movl(rbx, 32768);
|
||||
shrq(rdi, 3);
|
||||
addl(rdi, 536870912);
|
||||
jmp(L_2TAG_PACKET_6_0_1);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_1);
|
||||
movsd(xmm0, Address(rsp, 8));
|
||||
mulsd(xmm0, ExternalAddress(NEG_ZERO)); //0x00000000UL, 0x80000000UL
|
||||
movq(Address(rsp, 0), xmm0);
|
||||
|
||||
bind(L_2TAG_PACKET_13_0_1);
|
||||
|
||||
bind(B1_4);
|
||||
addq(rsp, 16);
|
||||
pop(rbx);
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
|
||||
ALIGNED_(16) juint _static_const_table_cos[] =
|
||||
{
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL,
|
||||
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL,
|
||||
0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL,
|
||||
0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL,
|
||||
0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL,
|
||||
0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL,
|
||||
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL,
|
||||
0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL,
|
||||
0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL,
|
||||
0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL,
|
||||
0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL,
|
||||
0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL,
|
||||
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
|
||||
0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL,
|
||||
0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL,
|
||||
0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL,
|
||||
0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL,
|
||||
0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL,
|
||||
0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL,
|
||||
0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL,
|
||||
0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL,
|
||||
0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL,
|
||||
0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL,
|
||||
0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL,
|
||||
0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL,
|
||||
0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL,
|
||||
0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL,
|
||||
0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL,
|
||||
0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL,
|
||||
0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL,
|
||||
0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL,
|
||||
0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL,
|
||||
0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL,
|
||||
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL,
|
||||
0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL,
|
||||
0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL,
|
||||
0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL,
|
||||
0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL,
|
||||
0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL,
|
||||
0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL,
|
||||
0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL,
|
||||
0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL,
|
||||
0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
|
||||
0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL,
|
||||
0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL,
|
||||
0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL,
|
||||
0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL,
|
||||
0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL,
|
||||
0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL,
|
||||
0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL,
|
||||
0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
|
||||
0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL,
|
||||
0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL,
|
||||
0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL,
|
||||
0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL,
|
||||
0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL,
|
||||
0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL,
|
||||
0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL,
|
||||
0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL,
|
||||
0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL,
|
||||
0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL,
|
||||
0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL,
|
||||
0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL,
|
||||
0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL,
|
||||
0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL,
|
||||
0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL,
|
||||
0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL,
|
||||
0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL,
|
||||
0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL,
|
||||
0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL,
|
||||
0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL,
|
||||
0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL,
|
||||
0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL,
|
||||
0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL,
|
||||
0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL,
|
||||
0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL,
|
||||
0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL,
|
||||
0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL,
|
||||
0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL,
|
||||
0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL,
|
||||
0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL,
|
||||
0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL,
|
||||
0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL,
|
||||
0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL,
|
||||
0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL,
|
||||
0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL,
|
||||
0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL,
|
||||
0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL,
|
||||
0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL,
|
||||
0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL,
|
||||
0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL
|
||||
};
|
||||
//registers,
|
||||
// input: (rbp + 8)
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
|
||||
address static_const_table_cos = (address)_static_const_table_cos;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 120);
|
||||
movl(Address(rsp, 56), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table_cos));
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
subl(eax, 12336);
|
||||
cmpl(eax, 4293);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm1, Address(tmp, 2160));
|
||||
mulsd(xmm1, xmm0);
|
||||
movdqu(xmm5, Address(tmp, 2240));
|
||||
movsd(xmm4, Address(tmp, 2224));
|
||||
pand(xmm4, xmm0);
|
||||
por(xmm5, xmm4);
|
||||
movsd(xmm3, Address(tmp, 2128));
|
||||
movdqu(xmm2, Address(tmp, 2112));
|
||||
addpd(xmm1, xmm5);
|
||||
cvttsd2sil(edx, xmm1);
|
||||
cvtsi2sdl(xmm1, edx);
|
||||
mulsd(xmm3, xmm1);
|
||||
unpcklpd(xmm1, xmm1);
|
||||
addl(edx, 1865232);
|
||||
movdqu(xmm4, xmm0);
|
||||
andl(edx, 63);
|
||||
movdqu(xmm5, Address(tmp, 2096));
|
||||
lea(eax, Address(tmp, 0));
|
||||
shll(edx, 5);
|
||||
addl(eax, edx);
|
||||
mulpd(xmm2, xmm1);
|
||||
subsd(xmm0, xmm3);
|
||||
mulsd(xmm1, Address(tmp, 2144));
|
||||
subsd(xmm4, xmm3);
|
||||
movsd(xmm7, Address(eax, 8));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movapd(xmm3, xmm4);
|
||||
subsd(xmm4, xmm2);
|
||||
mulpd(xmm5, xmm0);
|
||||
subpd(xmm0, xmm2);
|
||||
movdqu(xmm6, Address(tmp, 2064));
|
||||
mulsd(xmm7, xmm4);
|
||||
subsd(xmm3, xmm4);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
subsd(xmm3, xmm2);
|
||||
movdqu(xmm2, Address(eax, 0));
|
||||
subsd(xmm1, xmm3);
|
||||
movsd(xmm3, Address(eax, 24));
|
||||
addsd(xmm2, xmm3);
|
||||
subsd(xmm7, xmm2);
|
||||
mulsd(xmm2, xmm4);
|
||||
mulpd(xmm6, xmm0);
|
||||
mulsd(xmm3, xmm4);
|
||||
mulpd(xmm2, xmm0);
|
||||
mulpd(xmm0, xmm0);
|
||||
addpd(xmm5, Address(tmp, 2080));
|
||||
mulsd(xmm4, Address(eax, 0));
|
||||
addpd(xmm6, Address(tmp, 2048));
|
||||
mulpd(xmm5, xmm0);
|
||||
movapd(xmm0, xmm3);
|
||||
addsd(xmm3, Address(eax, 8));
|
||||
mulpd(xmm1, xmm7);
|
||||
movapd(xmm7, xmm4);
|
||||
addsd(xmm4, xmm3);
|
||||
addpd(xmm6, xmm5);
|
||||
movsd(xmm5, Address(eax, 8));
|
||||
subsd(xmm5, xmm3);
|
||||
subsd(xmm3, xmm4);
|
||||
addsd(xmm1, Address(eax, 16));
|
||||
mulpd(xmm6, xmm2);
|
||||
addsd(xmm5, xmm0);
|
||||
addsd(xmm3, xmm7);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm1, xmm3);
|
||||
addsd(xmm1, xmm6);
|
||||
unpckhpd(xmm6, xmm6);
|
||||
addsd(xmm1, xmm6);
|
||||
addsd(xmm4, xmm1);
|
||||
movsd(Address(rsp, 0), xmm4);
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
movsd(xmm1, Address(tmp, 2192));
|
||||
subsd(xmm1, xmm0);
|
||||
movsd(Address(rsp, 0), xmm1);
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
andl(eax, 2146435072);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_3_0_2);
|
||||
subl(rsp, 32);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
lea(eax, Address(rsp, 40));
|
||||
movl(Address(rsp, 8), eax);
|
||||
movl(eax, 1);
|
||||
movl(Address(rsp, 12), eax);
|
||||
call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge())));
|
||||
addl(rsp, 32);
|
||||
fld_d(Address(rsp, 8));
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
fld_d(Address(rsp, 128));
|
||||
fmul_d(Address(tmp, 2208));
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
movl(tmp, Address(rsp, 56));
|
||||
}
|
||||
#endif
|
674
hotspot/src/cpu/x86/vm/macroAssembler_x86_exp.cpp
Normal file
674
hotspot/src/cpu/x86/vm/macroAssembler_x86_exp.cpp
Normal file
|
@ -0,0 +1,674 @@
|
|||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - EXP()
|
||||
// ---------------------
|
||||
//
|
||||
// Description:
|
||||
// Let K = 64 (table size).
|
||||
// x x/log(2) n
|
||||
// e = 2 = 2 * T[j] * (1 + P(y))
|
||||
// where
|
||||
// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K]
|
||||
// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2]
|
||||
// j/K
|
||||
// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]).
|
||||
//
|
||||
// P(y) is a minimax polynomial approximation of exp(x)-1
|
||||
// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V).
|
||||
//
|
||||
// To avoid problems with arithmetic overflow and underflow,
|
||||
// n n1 n2
|
||||
// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2]
|
||||
// where BIAS is a value of exponent bias.
|
||||
//
|
||||
// Special cases:
|
||||
// exp(NaN) = NaN
|
||||
// exp(+INF) = +INF
|
||||
// exp(-INF) = 0
|
||||
// exp(x) = 1 for subnormals
|
||||
// for finite argument, only exp(0)=1 is exact
|
||||
// For IEEE double
|
||||
// if x > 709.782712893383973096 then exp(x) overflow
|
||||
// if x < -745.133219101941108420 then exp(x) underflow
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _cv[] =
|
||||
{
|
||||
0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, 0xfefa0000UL,
|
||||
0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, 0x3d1cf79aUL,
|
||||
0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL,
|
||||
0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL,
|
||||
0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _shifter[] =
|
||||
{
|
||||
0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _mmask[] =
|
||||
{
|
||||
0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _bias[] =
|
||||
{
|
||||
0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _Tbl_addr[] =
|
||||
{
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
||||
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
||||
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
||||
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
||||
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
||||
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
||||
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
||||
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
||||
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
||||
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
||||
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
||||
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
||||
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
||||
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
||||
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
||||
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
||||
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
||||
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
||||
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
||||
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
||||
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
||||
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
||||
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
||||
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
||||
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
||||
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
||||
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
||||
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
||||
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
||||
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
||||
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
||||
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
||||
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
||||
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
||||
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
||||
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
||||
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
||||
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
||||
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
||||
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
||||
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
||||
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
||||
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
||||
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
||||
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
||||
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
||||
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
||||
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
||||
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
||||
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
||||
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
||||
0x000fa7c1UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _ALLONES[] =
|
||||
{
|
||||
0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _ebias[] =
|
||||
{
|
||||
0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _XMAX[] =
|
||||
{
|
||||
0xffffffffUL, 0x7fefffffUL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _XMIN[] =
|
||||
{
|
||||
0x00000000UL, 0x00100000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _INF[] =
|
||||
{
|
||||
0x00000000UL, 0x7ff00000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _ZERO[] =
|
||||
{
|
||||
0x00000000UL, 0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(4) juint _ONE_val[] =
|
||||
{
|
||||
0x00000000UL, 0x3ff00000UL
|
||||
};
|
||||
|
||||
|
||||
// Registers:
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, tmp - r11
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address cv = (address)_cv;
|
||||
address Shifter = (address)_shifter;
|
||||
address mmask = (address)_mmask;
|
||||
address bias = (address)_bias;
|
||||
address Tbl_addr = (address)_Tbl_addr;
|
||||
address ALLONES = (address)_ALLONES;
|
||||
address ebias = (address)_ebias;
|
||||
address XMAX = (address)_XMAX;
|
||||
address XMIN = (address)_XMIN;
|
||||
address INF = (address)_INF;
|
||||
address ZERO = (address)_ZERO;
|
||||
address ONE_val = (address)_ONE_val;
|
||||
|
||||
bind(start);
|
||||
subq(rsp, 24);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm1, ExternalAddress(cv)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
||||
movdqu(xmm6, ExternalAddress(Shifter)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||
movdqu(xmm2, ExternalAddress(16 + cv)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
||||
movdqu(xmm3, ExternalAddress(32 + cv)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
movl(edx, 16527);
|
||||
subl(edx, eax);
|
||||
subl(eax, 15504);
|
||||
orl(edx, eax);
|
||||
cmpl(edx, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
mulpd(xmm1, xmm0);
|
||||
addpd(xmm1, xmm6);
|
||||
movapd(xmm7, xmm1);
|
||||
subpd(xmm1, xmm6);
|
||||
mulpd(xmm2, xmm1);
|
||||
movdqu(xmm4, ExternalAddress(64 + cv)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
||||
mulpd(xmm3, xmm1);
|
||||
movdqu(xmm5, ExternalAddress(80 + cv)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||
subpd(xmm0, xmm2);
|
||||
movdl(eax, xmm7);
|
||||
movl(ecx, eax);
|
||||
andl(ecx, 63);
|
||||
shll(ecx, 4);
|
||||
sarl(eax, 6);
|
||||
movl(edx, eax);
|
||||
movdqu(xmm6, ExternalAddress(mmask)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||
pand(xmm7, xmm6);
|
||||
movdqu(xmm6, ExternalAddress(bias)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||
paddq(xmm7, xmm6);
|
||||
psllq(xmm7, 46);
|
||||
subpd(xmm0, xmm3);
|
||||
lea(tmp, ExternalAddress(Tbl_addr));
|
||||
movdqu(xmm2, Address(ecx, tmp));
|
||||
mulpd(xmm4, xmm0);
|
||||
movapd(xmm6, xmm0);
|
||||
movapd(xmm1, xmm0);
|
||||
mulpd(xmm6, xmm6);
|
||||
mulpd(xmm0, xmm6);
|
||||
addpd(xmm5, xmm4);
|
||||
mulsd(xmm0, xmm6);
|
||||
mulpd(xmm6, ExternalAddress(48 + cv)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
||||
addsd(xmm1, xmm2);
|
||||
unpckhpd(xmm2, xmm2);
|
||||
mulpd(xmm0, xmm5);
|
||||
addsd(xmm1, xmm0);
|
||||
por(xmm2, xmm7);
|
||||
unpckhpd(xmm0, xmm0);
|
||||
addsd(xmm0, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
addl(edx, 894);
|
||||
cmpl(edx, 1916);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
|
||||
mulsd(xmm0, xmm2);
|
||||
addsd(xmm0, xmm2);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
xorpd(xmm3, xmm3);
|
||||
movdqu(xmm4, ExternalAddress(ALLONES)); // 0xffffffffUL, 0xffffffffUL, 0xffffffffUL, 0xffffffffUL
|
||||
movl(edx, -1022);
|
||||
subl(edx, eax);
|
||||
movdl(xmm5, edx);
|
||||
psllq(xmm4, xmm5);
|
||||
movl(ecx, eax);
|
||||
sarl(eax, 1);
|
||||
pinsrw(xmm3, eax, 3);
|
||||
movdqu(xmm6, ExternalAddress(ebias)); // 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3ff00000UL
|
||||
psllq(xmm3, 4);
|
||||
psubd(xmm2, xmm3);
|
||||
mulsd(xmm0, xmm2);
|
||||
cmpl(edx, 52);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_2_0_2);
|
||||
pand(xmm4, xmm2);
|
||||
paddd(xmm3, xmm6);
|
||||
subsd(xmm2, xmm4);
|
||||
addsd(xmm0, xmm2);
|
||||
cmpl(ecx, 1023);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32768);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
||||
movapd(xmm6, xmm0);
|
||||
addsd(xmm0, xmm4);
|
||||
mulsd(xmm0, xmm3);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
cmpl(ecx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_5_0_2);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
mulsd(xmm6, xmm3);
|
||||
mulsd(xmm4, xmm3);
|
||||
movdqu(xmm0, xmm6);
|
||||
pxor(xmm6, xmm4);
|
||||
psrad(xmm6, 31);
|
||||
pshufd(xmm6, xmm6, 85);
|
||||
psllq(xmm0, 1);
|
||||
psrlq(xmm0, 1);
|
||||
pxor(xmm0, xmm6);
|
||||
psrlq(xmm6, 63);
|
||||
paddq(xmm0, xmm6);
|
||||
paddq(xmm0, xmm4);
|
||||
movl(Address(rsp, 0), 15);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
addsd(xmm0, xmm4);
|
||||
mulsd(xmm0, xmm3);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
addsd(xmm0, xmm4);
|
||||
mulsd(xmm0, xmm3);
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
cmpl(ecx, 32752);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
paddd(xmm3, xmm6);
|
||||
addpd(xmm0, xmm2);
|
||||
mulsd(xmm0, xmm3);
|
||||
movl(Address(rsp, 0), 15);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
cmpl(eax, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_10_0_2);
|
||||
movsd(xmm0, ExternalAddress(XMAX)); // 0xffffffffUL, 0x7fefffffUL
|
||||
mulsd(xmm0, xmm0);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
movl(Address(rsp, 0), 14);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movsd(xmm0, ExternalAddress(XMIN)); // 0x00000000UL, 0x00100000UL
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(Address(rsp, 0), 15);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movl(edx, Address(rsp, 8));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_11_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_12_0_2);
|
||||
movsd(xmm0, ExternalAddress(INF)); // 0x00000000UL, 0x7ff00000UL
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_12_0_2);
|
||||
movsd(xmm0, ExternalAddress(ZERO)); // 0x00000000UL, 0x00000000UL
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(rsp, 8));
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movl(eax, Address(rsp, 12));
|
||||
andl(eax, 2147483647);
|
||||
cmpl(eax, 1083179008);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
addsd(xmm0, ExternalAddress(ONE_val)); // 0x00000000UL, 0x3ff00000UL
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
movq(Address(rsp, 16), xmm0);
|
||||
|
||||
bind(B1_3);
|
||||
movq(xmm0, Address(rsp, 16));
|
||||
|
||||
bind(B1_5);
|
||||
addq(rsp, 24);
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _static_const_table[] =
|
||||
{
|
||||
0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL,
|
||||
0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL,
|
||||
0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL,
|
||||
0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL,
|
||||
0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL,
|
||||
0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL,
|
||||
0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL,
|
||||
0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL,
|
||||
0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL,
|
||||
0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL,
|
||||
0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL,
|
||||
0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL,
|
||||
0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL,
|
||||
0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL,
|
||||
0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL,
|
||||
0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL,
|
||||
0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL,
|
||||
0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL,
|
||||
0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL,
|
||||
0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL,
|
||||
0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL,
|
||||
0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL,
|
||||
0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL,
|
||||
0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL,
|
||||
0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL,
|
||||
0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL,
|
||||
0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL,
|
||||
0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL,
|
||||
0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL,
|
||||
0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL,
|
||||
0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL,
|
||||
0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL,
|
||||
0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL,
|
||||
0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL,
|
||||
0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL,
|
||||
0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL,
|
||||
0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL,
|
||||
0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL,
|
||||
0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL,
|
||||
0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL,
|
||||
0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL,
|
||||
0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL,
|
||||
0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL,
|
||||
0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL,
|
||||
0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL,
|
||||
0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL,
|
||||
0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL,
|
||||
0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL,
|
||||
0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL,
|
||||
0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL,
|
||||
0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL,
|
||||
0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL,
|
||||
0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL,
|
||||
0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL,
|
||||
0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL,
|
||||
0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL,
|
||||
0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL,
|
||||
0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL,
|
||||
0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL,
|
||||
0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL,
|
||||
0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL,
|
||||
0x00100000UL
|
||||
};
|
||||
|
||||
//registers,
|
||||
// input: (rbp + 8)
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address static_const_table = (address)_static_const_table;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 120);
|
||||
movl(Address(rsp, 64), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table));
|
||||
movdqu(xmm0, Address(rsp, 128));
|
||||
unpcklpd(xmm0, xmm0);
|
||||
movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL
|
||||
movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL
|
||||
movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL
|
||||
movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL
|
||||
pextrw(eax, xmm0, 3);
|
||||
andl(eax, 32767);
|
||||
movl(edx, 16527);
|
||||
subl(edx, eax);
|
||||
subl(eax, 15504);
|
||||
orl(edx, eax);
|
||||
cmpl(edx, INT_MIN);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
mulpd(xmm1, xmm0);
|
||||
addpd(xmm1, xmm6);
|
||||
movapd(xmm7, xmm1);
|
||||
subpd(xmm1, xmm6);
|
||||
mulpd(xmm2, xmm1);
|
||||
movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL
|
||||
mulpd(xmm3, xmm1);
|
||||
movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL
|
||||
subpd(xmm0, xmm2);
|
||||
movdl(eax, xmm7);
|
||||
movl(ecx, eax);
|
||||
andl(ecx, 63);
|
||||
shll(ecx, 4);
|
||||
sarl(eax, 6);
|
||||
movl(edx, eax);
|
||||
movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL
|
||||
pand(xmm7, xmm6);
|
||||
movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL
|
||||
paddq(xmm7, xmm6);
|
||||
psllq(xmm7, 46);
|
||||
subpd(xmm0, xmm3);
|
||||
movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160));
|
||||
mulpd(xmm4, xmm0);
|
||||
movapd(xmm6, xmm0);
|
||||
movapd(xmm1, xmm0);
|
||||
mulpd(xmm6, xmm6);
|
||||
mulpd(xmm0, xmm6);
|
||||
addpd(xmm5, xmm4);
|
||||
mulsd(xmm0, xmm6);
|
||||
mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL
|
||||
addsd(xmm1, xmm2);
|
||||
unpckhpd(xmm2, xmm2);
|
||||
mulpd(xmm0, xmm5);
|
||||
addsd(xmm1, xmm0);
|
||||
por(xmm2, xmm7);
|
||||
unpckhpd(xmm0, xmm0);
|
||||
addsd(xmm0, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
addl(edx, 894);
|
||||
cmpl(edx, 1916);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_1_0_2);
|
||||
mulsd(xmm0, xmm2);
|
||||
addsd(xmm0, xmm2);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
fnstcw(Address(rsp, 24));
|
||||
movzwl(edx, Address(rsp, 24));
|
||||
orl(edx, 768);
|
||||
movw(Address(rsp, 28), edx);
|
||||
fldcw(Address(rsp, 28));
|
||||
movl(edx, eax);
|
||||
sarl(eax, 1);
|
||||
subl(edx, eax);
|
||||
movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL
|
||||
pandn(xmm6, xmm2);
|
||||
addl(eax, 1023);
|
||||
movdl(xmm3, eax);
|
||||
psllq(xmm3, 52);
|
||||
por(xmm6, xmm3);
|
||||
addl(edx, 1023);
|
||||
movdl(xmm4, edx);
|
||||
psllq(xmm4, 52);
|
||||
movsd(Address(rsp, 8), xmm0);
|
||||
fld_d(Address(rsp, 8));
|
||||
movsd(Address(rsp, 16), xmm6);
|
||||
fld_d(Address(rsp, 16));
|
||||
fmula(1);
|
||||
faddp(1);
|
||||
movsd(Address(rsp, 8), xmm4);
|
||||
fld_d(Address(rsp, 8));
|
||||
fmulp(1);
|
||||
fstp_d(Address(rsp, 8));
|
||||
movsd(xmm0, Address(rsp, 8));
|
||||
fldcw(Address(rsp, 24));
|
||||
pextrw(ecx, xmm0, 3);
|
||||
andl(ecx, 32752);
|
||||
cmpl(ecx, 32752);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(ecx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_4_0_2);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
cmpl(ecx, INT_MIN);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(ecx, -1064950997);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
|
||||
jcc(Assembler::greater, L_2TAG_PACKET_4_0_2);
|
||||
movl(edx, Address(rsp, 128));
|
||||
cmpl(edx, -17155601);
|
||||
jcc(Assembler::less, L_2TAG_PACKET_2_0_2);
|
||||
jmp(L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movl(edx, 14);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movl(edx, 15);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_8_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
cmpl(eax, INT_MIN);
|
||||
jcc(Assembler::greaterEqual, L_2TAG_PACKET_9_0_2);
|
||||
movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(edx, 14);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(xmm0, Address(tmp, 1216));
|
||||
mulsd(xmm0, xmm0);
|
||||
movl(edx, 15);
|
||||
jmp(L_2TAG_PACKET_5_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
movl(edx, Address(rsp, 128));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_10_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
cmpl(eax, 2146435072);
|
||||
jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_11_0_2);
|
||||
movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movl(eax, Address(rsp, 132));
|
||||
andl(eax, 2147483647);
|
||||
cmpl(eax, 1083179008);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2);
|
||||
movsd(xmm0, Address(rsp, 128));
|
||||
addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 48), xmm0);
|
||||
fld_d(Address(rsp, 48));
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
movl(tmp, Address(rsp, 64));
|
||||
}
|
||||
#endif
|
655
hotspot/src/cpu/x86/vm/macroAssembler_x86_log.cpp
Normal file
655
hotspot/src/cpu/x86/vm/macroAssembler_x86_log.cpp
Normal file
|
@ -0,0 +1,655 @@
|
|||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - LOG()
|
||||
// ---------------------
|
||||
//
|
||||
// x=2^k * mx, mx in [1,2)
|
||||
//
|
||||
// Get B~1/mx based on the output of rcpss instruction (B0)
|
||||
// B = int((B0*2^7+0.5))/2^7
|
||||
//
|
||||
// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
|
||||
//
|
||||
// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and
|
||||
// p(r) is a degree 7 polynomial
|
||||
// -log(B) read from data table (high, low parts)
|
||||
// Result is formed from high and low parts
|
||||
//
|
||||
// Special cases:
|
||||
// log(NaN) = quiet NaN, and raise invalid exception
|
||||
// log(+INF) = that INF
|
||||
// log(0) = -INF with divide-by-zero exception raised
|
||||
// log(1) = +0
|
||||
// log(x) = NaN with invalid exception raised if x < -0, including -INF
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _L_tbl[] =
|
||||
{
|
||||
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
|
||||
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
|
||||
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
|
||||
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
|
||||
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
|
||||
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
|
||||
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
|
||||
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
|
||||
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
|
||||
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
|
||||
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
|
||||
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
|
||||
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
|
||||
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
|
||||
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
|
||||
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
|
||||
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
|
||||
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
|
||||
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
|
||||
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
|
||||
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
|
||||
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
|
||||
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
|
||||
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
|
||||
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
|
||||
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
|
||||
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
|
||||
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
|
||||
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
|
||||
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
|
||||
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
|
||||
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
|
||||
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
|
||||
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
|
||||
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
|
||||
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
|
||||
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
|
||||
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
|
||||
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
|
||||
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
|
||||
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
|
||||
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
|
||||
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
|
||||
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
|
||||
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
|
||||
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
|
||||
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
|
||||
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
|
||||
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
|
||||
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
|
||||
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
|
||||
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
|
||||
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
|
||||
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
|
||||
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
|
||||
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
|
||||
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
|
||||
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
|
||||
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
|
||||
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
|
||||
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
|
||||
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
|
||||
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
|
||||
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
|
||||
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
|
||||
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
|
||||
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
|
||||
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
|
||||
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
|
||||
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
|
||||
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
|
||||
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
|
||||
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
|
||||
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
|
||||
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
|
||||
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
|
||||
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
|
||||
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
|
||||
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
|
||||
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
|
||||
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
|
||||
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
|
||||
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
|
||||
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
|
||||
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
|
||||
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
|
||||
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
|
||||
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
|
||||
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
|
||||
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
|
||||
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
|
||||
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
|
||||
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
|
||||
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
|
||||
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
|
||||
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
|
||||
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
|
||||
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
|
||||
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
|
||||
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
|
||||
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
|
||||
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
|
||||
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x80000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _log2[] =
|
||||
{
|
||||
0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _coeff[] =
|
||||
{
|
||||
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
|
||||
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
|
||||
0x00000000UL, 0xbfe00000UL
|
||||
};
|
||||
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, r8, r11
|
||||
|
||||
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2;
|
||||
Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
|
||||
|
||||
assert_different_registers(tmp1, tmp2, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address L_tbl = (address)_L_tbl;
|
||||
address log2 = (address)_log2;
|
||||
address coeff = (address)_coeff;
|
||||
|
||||
bind(start);
|
||||
subq(rsp, 24);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
mov64(rax, 0x3ff0000000000000);
|
||||
movdq(xmm2, rax);
|
||||
mov64(rdx, 0x77f0000000000000);
|
||||
movdq(xmm3, rdx);
|
||||
movl(ecx, 32768);
|
||||
movdl(xmm4, rcx);
|
||||
mov64(tmp1, 0xffffe00000000000);
|
||||
movdq(xmm5, tmp1);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 16352);
|
||||
psrlq(xmm0, 27);
|
||||
lea(tmp2, ExternalAddress(L_tbl));
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
paddd(xmm0, xmm4);
|
||||
por(xmm1, xmm3);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
pand(xmm5, xmm1);
|
||||
pand(xmm0, xmm6);
|
||||
subsd(xmm1, xmm5);
|
||||
mulpd(xmm5, xmm0);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulsd(xmm1, xmm0);
|
||||
movq(xmm6, ExternalAddress(log2)); // 0xfefa3800UL, 0x3fa62e42UL
|
||||
movdqu(xmm3, ExternalAddress(coeff)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp2, edx));
|
||||
movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulsd(xmm6, xmm7);
|
||||
if (VM_Version::supports_sse3()) {
|
||||
movddup(xmm5, xmm1);
|
||||
}
|
||||
else {
|
||||
movdqu(xmm5, xmm1);
|
||||
movlhps(xmm5, xmm5);
|
||||
}
|
||||
mulsd(xmm7, ExternalAddress(8 + log2)); // 0x93c76730UL, 0x3ceef357UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
mulpd(xmm5, xmm5);
|
||||
if (VM_Version::supports_sse3()) {
|
||||
movddup(xmm6, xmm0);
|
||||
}
|
||||
else {
|
||||
movdqu(xmm6, xmm0);
|
||||
movlhps(xmm6, xmm6);
|
||||
}
|
||||
addsd(xmm0, xmm1);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
subsd(xmm6, xmm0);
|
||||
mulsd(xmm4, xmm1);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
addsd(xmm1, xmm6);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movq(xmm0, Address(rsp, 0));
|
||||
movq(xmm1, Address(rsp, 0));
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
addsd(xmm1, xmm0);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psrlq(xmm0, 27);
|
||||
movl(ecx, 18416);
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 3);
|
||||
jmp(L_2TAG_PACKET_8_0_2);
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
movq(Address(rsp, 8), xmm0);
|
||||
|
||||
bind(B1_3);
|
||||
movq(xmm0, Address(rsp, 8));
|
||||
|
||||
bind(B1_5);
|
||||
addq(rsp, 24);
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _static_const_table_log[] =
|
||||
{
|
||||
0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
|
||||
0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
|
||||
0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
|
||||
0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
|
||||
0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
|
||||
0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
|
||||
0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
|
||||
0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
|
||||
0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
|
||||
0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
|
||||
0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
|
||||
0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
|
||||
0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
|
||||
0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
|
||||
0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
|
||||
0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
|
||||
0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
|
||||
0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
|
||||
0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
|
||||
0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
|
||||
0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
|
||||
0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
|
||||
0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
|
||||
0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
|
||||
0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
|
||||
0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
|
||||
0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
|
||||
0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
|
||||
0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
|
||||
0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
|
||||
0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
|
||||
0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
|
||||
0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
|
||||
0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
|
||||
0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
|
||||
0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
|
||||
0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
|
||||
0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
|
||||
0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
|
||||
0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
|
||||
0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
|
||||
0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
|
||||
0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
|
||||
0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
|
||||
0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
|
||||
0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
|
||||
0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
|
||||
0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
|
||||
0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
|
||||
0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
|
||||
0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
|
||||
0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
|
||||
0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
|
||||
0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
|
||||
0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
|
||||
0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
|
||||
0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
|
||||
0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
|
||||
0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
|
||||
0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
|
||||
0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
|
||||
0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
|
||||
0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
|
||||
0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
|
||||
0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
|
||||
0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
|
||||
0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
|
||||
0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
|
||||
0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
|
||||
0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
|
||||
0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
|
||||
0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
|
||||
0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
|
||||
0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
|
||||
0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
|
||||
0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
|
||||
0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
|
||||
0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
|
||||
0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
|
||||
0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
|
||||
0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
|
||||
0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
|
||||
0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
|
||||
0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
|
||||
0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
|
||||
0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
|
||||
0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
|
||||
0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
|
||||
0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
|
||||
0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
|
||||
0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
|
||||
0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
|
||||
0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
|
||||
0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
|
||||
0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
|
||||
0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
|
||||
0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
|
||||
0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
|
||||
0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
|
||||
0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
|
||||
0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
|
||||
0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
|
||||
0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
|
||||
0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
|
||||
0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
|
||||
0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
|
||||
0xffffe000UL
|
||||
};
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
|
||||
Label L_2TAG_PACKET_10_0_2, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
jmp(start);
|
||||
address static_const_table = (address)_static_const_table_log;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 104);
|
||||
movl(Address(rsp, 40), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table));
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movapd(xmm1, xmm0);
|
||||
movl(ecx, 32768);
|
||||
movdl(xmm4, ecx);
|
||||
movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psllq(xmm0, 5);
|
||||
movl(ecx, 16352);
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
paddd(xmm0, xmm4);
|
||||
por(xmm1, xmm3);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
pand(xmm5, xmm1);
|
||||
pand(xmm0, xmm6);
|
||||
subsd(xmm1, xmm5);
|
||||
mulpd(xmm5, xmm0);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulsd(xmm1, xmm0);
|
||||
movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL
|
||||
movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp, edx));
|
||||
movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
mulpd(xmm5, xmm5);
|
||||
pshufd(xmm6, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
subsd(xmm6, xmm0);
|
||||
mulsd(xmm4, xmm1);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
addsd(xmm1, xmm6);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movdqu(xmm1, xmm0);
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
jmp(L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
movl(edx, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_10_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(edx, 2);
|
||||
jmp(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movapd(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
psllq(xmm0, 5);
|
||||
movl(ecx, 18416);
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 228);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 24), xmm0);
|
||||
fld_d(Address(rsp, 24));
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movl(tmp, Address(rsp, 40));
|
||||
}
|
||||
#endif
|
687
hotspot/src/cpu/x86/vm/macroAssembler_x86_log10.cpp
Normal file
687
hotspot/src/cpu/x86/vm/macroAssembler_x86_log10.cpp
Normal file
|
@ -0,0 +1,687 @@
|
|||
/*
|
||||
* Copyright (c) 2016, Intel Corporation.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "asm/assembler.hpp"
|
||||
#include "asm/assembler.inline.hpp"
|
||||
#include "runtime/stubRoutines.hpp"
|
||||
#include "macroAssembler_x86.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(x) __declspec(align(x))
|
||||
#else
|
||||
#define ALIGNED_(x) __attribute__ ((aligned(x)))
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
// ALGORITHM DESCRIPTION - LOG10()
|
||||
// ---------------------
|
||||
//
|
||||
// Let x=2^k * mx, mx in [1,2)
|
||||
//
|
||||
// Get B~1/mx based on the output of rcpss instruction (B0)
|
||||
// B = int((B0*LH*2^7+0.5))/2^7
|
||||
// LH is a short approximation for log10(e)
|
||||
//
|
||||
// Reduced argument: r=B*mx-LH (computed accurately in high and low parts)
|
||||
//
|
||||
// Result: k*log10(2) - log(B) + p(r)
|
||||
// p(r) is a degree 7 polynomial
|
||||
// -log(B) read from data table (high, low parts)
|
||||
// Result is formed from high and low parts
|
||||
//
|
||||
// Special cases:
|
||||
// log10(0) = -INF with divide-by-zero exception raised
|
||||
// log10(1) = +0
|
||||
// log10(x) = NaN with invalid exception raised if x < -0, including -INF
|
||||
// log10(+INF) = +INF
|
||||
//
|
||||
/******************************************************************************/
|
||||
|
||||
#ifdef _LP64
|
||||
// The 64 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _HIGHSIGMASK_log10[] =
|
||||
{
|
||||
0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _LOG10_E[] =
|
||||
{
|
||||
0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _L_tbl_log10[] =
|
||||
{
|
||||
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
|
||||
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
|
||||
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
|
||||
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
|
||||
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
|
||||
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
|
||||
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
|
||||
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
|
||||
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
|
||||
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
|
||||
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
|
||||
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
|
||||
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
|
||||
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
|
||||
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
|
||||
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
|
||||
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
|
||||
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
|
||||
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
|
||||
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
|
||||
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
|
||||
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
|
||||
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
|
||||
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
|
||||
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
|
||||
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
|
||||
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
|
||||
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
|
||||
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
|
||||
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
|
||||
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
|
||||
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
|
||||
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
|
||||
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
|
||||
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
|
||||
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
|
||||
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
|
||||
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
|
||||
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
|
||||
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
|
||||
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
|
||||
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
|
||||
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
|
||||
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
|
||||
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
|
||||
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
|
||||
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
|
||||
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
|
||||
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
|
||||
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
|
||||
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
|
||||
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
|
||||
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
|
||||
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
|
||||
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
|
||||
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
|
||||
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
|
||||
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
|
||||
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
|
||||
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
|
||||
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
|
||||
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
|
||||
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
|
||||
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
|
||||
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
|
||||
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
|
||||
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
|
||||
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
|
||||
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
|
||||
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
|
||||
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
|
||||
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
|
||||
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
|
||||
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
|
||||
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
|
||||
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
|
||||
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
|
||||
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
|
||||
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
|
||||
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
|
||||
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
|
||||
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
|
||||
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
|
||||
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
|
||||
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
|
||||
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
|
||||
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
|
||||
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
|
||||
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
|
||||
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
|
||||
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
|
||||
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
|
||||
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
|
||||
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
|
||||
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
|
||||
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
|
||||
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
|
||||
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
|
||||
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
|
||||
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
|
||||
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
|
||||
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
|
||||
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _log2_log10[] =
|
||||
{
|
||||
0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
|
||||
};
|
||||
|
||||
ALIGNED_(16) juint _coeff_log10[] =
|
||||
{
|
||||
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
|
||||
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
|
||||
0xdc77b115UL, 0xbff27af2UL
|
||||
};
|
||||
|
||||
// Registers:
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, tmp - r11
|
||||
|
||||
// Code generated by Intel C compiler for LIBM library
|
||||
|
||||
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register r11) {
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, B1_2, B1_3, B1_4, B1_5, start;
|
||||
|
||||
assert_different_registers(r11, eax, ecx, edx);
|
||||
|
||||
address HIGHSIGMASK = (address)_HIGHSIGMASK_log10;
|
||||
address LOG10_E = (address)_LOG10_E;
|
||||
address L_tbl = (address)_L_tbl_log10;
|
||||
address log2 = (address)_log2_log10;
|
||||
address coeff = (address)_coeff_log10;
|
||||
|
||||
bind(start);
|
||||
subq(rsp, 24);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
|
||||
bind(B1_2);
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movl(ecx, 1054736384);
|
||||
movdl(xmm7, ecx);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movdqu(xmm1, xmm0);
|
||||
movl(edx, 32768);
|
||||
movdl(xmm4, edx);
|
||||
movdqu(xmm5, ExternalAddress(HIGHSIGMASK)); //0xf8000000UL, 0xffffffffUL, 0x00000000UL, 0xffffe000UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 16352);
|
||||
psrlq(xmm0, 27);
|
||||
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
mulss(xmm0, xmm7);
|
||||
por(xmm1, xmm3);
|
||||
lea(r11, ExternalAddress(L_tbl));
|
||||
andpd(xmm5, xmm1);
|
||||
paddd(xmm0, xmm4);
|
||||
subsd(xmm1, xmm5);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
andpd(xmm0, xmm6);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulsd(xmm1, xmm0);
|
||||
movq(xmm6, ExternalAddress(log2)); //0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL
|
||||
movdqu(xmm3, ExternalAddress(coeff)); //0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(r11, rdx, Address::times_1, -1504));
|
||||
movdqu(xmm4, ExternalAddress(16 + coeff)); //0x385593b1UL, 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, ExternalAddress(32 + coeff)); //0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL, 0xbff27af2UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, ExternalAddress(8 + log2)); //0x1f12b358UL, 0x3cdfef31UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
movq(xmm6, ExternalAddress(8 + LOG10_E)); //0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
mulpd(xmm5, xmm5);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
pshufd(xmm2, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
mulsd(xmm4, xmm1);
|
||||
subsd(xmm2, xmm0);
|
||||
mulsd(xmm6, xmm1);
|
||||
addsd(xmm1, xmm2);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addsd(xmm1, xmm6);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movq(xmm0, Address(rsp, 0));
|
||||
movq(xmm1, Address(rsp, 0));
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(B1_5);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
|
||||
jmp(L_2TAG_PACKET_6_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
addsd(xmm1, xmm0);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 18416);
|
||||
psrlq(xmm0, 27);
|
||||
movdqu(xmm2, ExternalAddress(LOG10_E)); //0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
psrld(xmm0, 2);
|
||||
rcpps(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 9);
|
||||
jmp(L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(Address(rsp, 16), 8);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
movq(Address(rsp, 8), xmm0);
|
||||
|
||||
bind(B1_3);
|
||||
movq(xmm0, Address(rsp, 8));
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(B1_5);
|
||||
addq(rsp, 24);
|
||||
|
||||
}
|
||||
#else
|
||||
// The 32 bit code is at most SSE2 compliant
|
||||
ALIGNED_(16) juint _static_const_table_log10[] =
|
||||
{
|
||||
0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL,
|
||||
0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL,
|
||||
0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL,
|
||||
0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL,
|
||||
0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL,
|
||||
0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL,
|
||||
0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL,
|
||||
0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL,
|
||||
0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL,
|
||||
0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL,
|
||||
0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL,
|
||||
0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL,
|
||||
0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL,
|
||||
0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL,
|
||||
0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL,
|
||||
0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL,
|
||||
0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL,
|
||||
0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL,
|
||||
0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL,
|
||||
0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL,
|
||||
0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL,
|
||||
0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL,
|
||||
0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL,
|
||||
0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL,
|
||||
0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL,
|
||||
0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL,
|
||||
0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL,
|
||||
0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL,
|
||||
0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL,
|
||||
0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL,
|
||||
0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL,
|
||||
0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL,
|
||||
0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL,
|
||||
0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL,
|
||||
0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL,
|
||||
0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL,
|
||||
0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL,
|
||||
0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL,
|
||||
0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL,
|
||||
0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL,
|
||||
0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL,
|
||||
0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL,
|
||||
0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL,
|
||||
0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL,
|
||||
0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL,
|
||||
0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL,
|
||||
0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL,
|
||||
0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL,
|
||||
0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL,
|
||||
0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL,
|
||||
0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL,
|
||||
0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL,
|
||||
0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL,
|
||||
0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL,
|
||||
0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL,
|
||||
0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL,
|
||||
0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL,
|
||||
0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL,
|
||||
0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL,
|
||||
0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL,
|
||||
0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL,
|
||||
0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL,
|
||||
0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL,
|
||||
0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL,
|
||||
0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL,
|
||||
0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL,
|
||||
0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL,
|
||||
0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL,
|
||||
0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL,
|
||||
0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL,
|
||||
0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL,
|
||||
0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL,
|
||||
0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL,
|
||||
0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL,
|
||||
0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL,
|
||||
0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL,
|
||||
0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL,
|
||||
0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL,
|
||||
0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL,
|
||||
0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL,
|
||||
0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL,
|
||||
0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL,
|
||||
0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL,
|
||||
0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL,
|
||||
0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL,
|
||||
0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL,
|
||||
0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL,
|
||||
0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL,
|
||||
0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL,
|
||||
0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL,
|
||||
0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL,
|
||||
0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL,
|
||||
0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL,
|
||||
0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL,
|
||||
0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL,
|
||||
0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL,
|
||||
0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL,
|
||||
0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL,
|
||||
0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL,
|
||||
0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL,
|
||||
0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL,
|
||||
0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL,
|
||||
0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
|
||||
0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL,
|
||||
0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL,
|
||||
0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL,
|
||||
0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL,
|
||||
0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL
|
||||
};
|
||||
//registers,
|
||||
// input: xmm0
|
||||
// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
|
||||
// rax, rdx, rcx, rbx (tmp)
|
||||
|
||||
void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
|
||||
|
||||
Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
|
||||
Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
|
||||
Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, start;
|
||||
|
||||
assert_different_registers(tmp, eax, ecx, edx);
|
||||
|
||||
address static_const_table_log10 = (address)_static_const_table_log10;
|
||||
|
||||
bind(start);
|
||||
subl(rsp, 104);
|
||||
movl(Address(rsp, 40), tmp);
|
||||
lea(tmp, ExternalAddress(static_const_table_log10));
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movl(ecx, 1054736384);
|
||||
movdl(xmm7, ecx);
|
||||
xorpd(xmm3, xmm3);
|
||||
movl(edx, 30704);
|
||||
pinsrw(xmm3, edx, 3);
|
||||
movsd(xmm0, Address(rsp, 112));
|
||||
movdqu(xmm1, xmm0);
|
||||
movl(edx, 32768);
|
||||
movdl(xmm4, edx);
|
||||
movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 16352);
|
||||
psllq(xmm0, 5);
|
||||
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
subl(eax, 16);
|
||||
cmpl(eax, 32736);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_1_0_2);
|
||||
mulss(xmm0, xmm7);
|
||||
por(xmm1, xmm3);
|
||||
andpd(xmm5, xmm1);
|
||||
paddd(xmm0, xmm4);
|
||||
subsd(xmm1, xmm5);
|
||||
movdl(edx, xmm0);
|
||||
psllq(xmm0, 29);
|
||||
andpd(xmm0, xmm6);
|
||||
andl(eax, 32752);
|
||||
subl(eax, ecx);
|
||||
cvtsi2sdl(xmm7, eax);
|
||||
mulpd(xmm5, xmm0);
|
||||
mulsd(xmm1, xmm0);
|
||||
movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL
|
||||
movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL
|
||||
subsd(xmm5, xmm2);
|
||||
andl(edx, 16711680);
|
||||
shrl(edx, 12);
|
||||
movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504));
|
||||
movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL
|
||||
addsd(xmm1, xmm5);
|
||||
movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL
|
||||
mulsd(xmm6, xmm7);
|
||||
pshufd(xmm5, xmm1, 68);
|
||||
mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL
|
||||
mulsd(xmm3, xmm1);
|
||||
addsd(xmm0, xmm6);
|
||||
mulpd(xmm4, xmm5);
|
||||
movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL
|
||||
mulpd(xmm5, xmm5);
|
||||
addpd(xmm4, xmm2);
|
||||
mulpd(xmm3, xmm5);
|
||||
pshufd(xmm2, xmm0, 228);
|
||||
addsd(xmm0, xmm1);
|
||||
mulsd(xmm4, xmm1);
|
||||
subsd(xmm2, xmm0);
|
||||
mulsd(xmm6, xmm1);
|
||||
addsd(xmm1, xmm2);
|
||||
pshufd(xmm2, xmm0, 238);
|
||||
mulsd(xmm5, xmm5);
|
||||
addsd(xmm7, xmm2);
|
||||
addsd(xmm1, xmm6);
|
||||
addpd(xmm4, xmm3);
|
||||
addsd(xmm1, xmm7);
|
||||
mulpd(xmm4, xmm5);
|
||||
addsd(xmm1, xmm4);
|
||||
pshufd(xmm5, xmm4, 238);
|
||||
addsd(xmm1, xmm5);
|
||||
addsd(xmm0, xmm1);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_0_0_2);
|
||||
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
|
||||
movdqu(xmm1, xmm0);
|
||||
addl(eax, 16);
|
||||
cmpl(eax, 32768);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
|
||||
cmpl(eax, 16);
|
||||
jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_5_0_2);
|
||||
addsd(xmm0, xmm0);
|
||||
jmp(L_2TAG_PACKET_2_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_6_0_2);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
|
||||
jmp(L_2TAG_PACKET_7_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_3_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
addl(ecx, ecx);
|
||||
cmpl(ecx, -2097152);
|
||||
jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_7_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 32752);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
movl(edx, 9);
|
||||
mulsd(xmm0, xmm1);
|
||||
|
||||
bind(L_2TAG_PACKET_9_0_2);
|
||||
movsd(Address(rsp, 0), xmm0);
|
||||
movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL
|
||||
fld_d(Address(rsp, 0));
|
||||
jmp(L_2TAG_PACKET_10_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
xorpd(xmm0, xmm0);
|
||||
movl(eax, 49136);
|
||||
pinsrw(xmm0, eax, 3);
|
||||
divsd(xmm0, xmm1);
|
||||
movl(edx, 8);
|
||||
jmp(L_2TAG_PACKET_9_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_4_0_2);
|
||||
movdl(edx, xmm1);
|
||||
psrlq(xmm1, 32);
|
||||
movdl(ecx, xmm1);
|
||||
orl(edx, ecx);
|
||||
cmpl(edx, 0);
|
||||
jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
|
||||
xorpd(xmm1, xmm1);
|
||||
movl(eax, 18416);
|
||||
pinsrw(xmm1, eax, 3);
|
||||
mulsd(xmm0, xmm1);
|
||||
xorpd(xmm2, xmm2);
|
||||
movl(eax, 16368);
|
||||
pinsrw(xmm2, eax, 3);
|
||||
movdqu(xmm1, xmm0);
|
||||
pextrw(eax, xmm0, 3);
|
||||
por(xmm0, xmm2);
|
||||
movl(ecx, 18416);
|
||||
psllq(xmm0, 5);
|
||||
movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL
|
||||
psrlq(xmm0, 34);
|
||||
rcpss(xmm0, xmm0);
|
||||
psllq(xmm1, 12);
|
||||
pshufd(xmm6, xmm5, 78);
|
||||
psrlq(xmm1, 12);
|
||||
jmp(L_2TAG_PACKET_1_0_2);
|
||||
|
||||
bind(L_2TAG_PACKET_2_0_2);
|
||||
movsd(Address(rsp, 24), xmm0);
|
||||
fld_d(Address(rsp, 24));
|
||||
|
||||
bind(L_2TAG_PACKET_10_0_2);
|
||||
movl(tmp, Address(rsp, 40));
|
||||
|
||||
}
|
||||
#endif
|
3592
hotspot/src/cpu/x86/vm/macroAssembler_x86_pow.cpp
Normal file
3592
hotspot/src/cpu/x86/vm/macroAssembler_x86_pow.cpp
Normal file
File diff suppressed because it is too large
Load diff
2448
hotspot/src/cpu/x86/vm/macroAssembler_x86_sin.cpp
Normal file
2448
hotspot/src/cpu/x86/vm/macroAssembler_x86_sin.cpp
Normal file
File diff suppressed because it is too large
Load diff
2144
hotspot/src/cpu/x86/vm/macroAssembler_x86_tan.cpp
Normal file
2144
hotspot/src/cpu/x86/vm/macroAssembler_x86_tan.cpp
Normal file
File diff suppressed because it is too large
Load diff
|
@ -2093,25 +2093,6 @@ class StubGenerator: public StubCodeGenerator {
|
|||
entry_checkcast_arraycopy);
|
||||
}
|
||||
|
||||
void generate_math_stubs() {
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "log10");
|
||||
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
|
||||
|
||||
__ fld_d(Address(rsp, 4));
|
||||
__ flog10();
|
||||
__ ret(0);
|
||||
}
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "tan");
|
||||
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
|
||||
|
||||
__ fld_d(Address(rsp, 4));
|
||||
__ trigfunc('t');
|
||||
__ ret(0);
|
||||
}
|
||||
}
|
||||
|
||||
// AES intrinsic stubs
|
||||
enum {AESBlockSize = 16};
|
||||
|
||||
|
@ -3534,6 +3515,31 @@ class StubGenerator: public StubCodeGenerator {
|
|||
|
||||
}
|
||||
|
||||
address generate_libmLog10() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp = rbx;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmPow() {
|
||||
address start = __ pc();
|
||||
|
||||
|
@ -3628,6 +3634,44 @@ class StubGenerator: public StubCodeGenerator {
|
|||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libm_tan_cot_huge() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ libm_tancot_huge(x0, x1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmTan() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp = rbx;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
// Safefetch stubs.
|
||||
|
@ -3853,23 +3897,24 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||
}
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
|
||||
StubRoutines::x86::_L_2il0floatpacket_0_adr = (address)StubRoutines::x86::_L_2il0floatpacket_0;
|
||||
StubRoutines::x86::_Pi4Inv_adr = (address)StubRoutines::x86::_Pi4Inv;
|
||||
StubRoutines::x86::_Pi4x3_adr = (address)StubRoutines::x86::_Pi4x3;
|
||||
StubRoutines::x86::_Pi4x4_adr = (address)StubRoutines::x86::_Pi4x4;
|
||||
StubRoutines::x86::_ones_adr = (address)StubRoutines::x86::_ones;
|
||||
StubRoutines::_dexp = generate_libmExp();
|
||||
StubRoutines::_dlog = generate_libmLog();
|
||||
StubRoutines::_dlog10 = generate_libmLog10();
|
||||
StubRoutines::_dpow = generate_libmPow();
|
||||
if (UseLibmSinIntrinsic || UseLibmCosIntrinsic) {
|
||||
StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l();
|
||||
StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge();
|
||||
}
|
||||
if (UseLibmSinIntrinsic) {
|
||||
StubRoutines::_dsin = generate_libmSin();
|
||||
}
|
||||
if (UseLibmCosIntrinsic) {
|
||||
StubRoutines::_dcos = generate_libmCos();
|
||||
StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge();
|
||||
StubRoutines::_dtan = generate_libmTan();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void generate_all() {
|
||||
// Generates all stubs and initializes the entry points
|
||||
|
@ -3889,8 +3934,6 @@ class StubGenerator: public StubCodeGenerator {
|
|||
// arraycopy stubs used by compilers
|
||||
generate_arraycopy_stubs();
|
||||
|
||||
generate_math_stubs();
|
||||
|
||||
// don't bother generating these AES intrinsic stubs unless global flag is set
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // might be needed by the others
|
||||
|
|
|
@ -2972,35 +2972,6 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit;
|
||||
}
|
||||
|
||||
void generate_math_stubs() {
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "log10");
|
||||
StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
|
||||
|
||||
__ subq(rsp, 8);
|
||||
__ movdbl(Address(rsp, 0), xmm0);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ flog10();
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ movdbl(xmm0, Address(rsp, 0));
|
||||
__ addq(rsp, 8);
|
||||
__ ret(0);
|
||||
}
|
||||
{
|
||||
StubCodeMark mark(this, "StubRoutines", "tan");
|
||||
StubRoutines::_intrinsic_tan = (double (*)(double)) __ pc();
|
||||
|
||||
__ subq(rsp, 8);
|
||||
__ movdbl(Address(rsp, 0), xmm0);
|
||||
__ fld_d(Address(rsp, 0));
|
||||
__ trigfunc('t');
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ movdbl(xmm0, Address(rsp, 0));
|
||||
__ addq(rsp, 8);
|
||||
__ ret(0);
|
||||
}
|
||||
}
|
||||
|
||||
// AES intrinsic stubs
|
||||
enum {AESBlockSize = 16};
|
||||
|
||||
|
@ -4731,6 +4702,46 @@ class StubGenerator: public StubCodeGenerator {
|
|||
#endif
|
||||
__ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
|
||||
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmLog10() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp = r11;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||
#endif
|
||||
__ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
|
||||
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
|
@ -4810,6 +4821,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
__ push(rsi);
|
||||
__ push(rdi);
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
|
@ -4822,6 +4835,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
__ pop(rdi);
|
||||
__ pop(rsi);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
@ -4853,6 +4868,8 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
__ push(rsi);
|
||||
__ push(rdi);
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
|
@ -4865,6 +4882,55 @@ class StubGenerator: public StubCodeGenerator {
|
|||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
__ pop(rdi);
|
||||
__ pop(rsi);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
|
||||
return start;
|
||||
|
||||
}
|
||||
|
||||
address generate_libmTan() {
|
||||
address start = __ pc();
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
const XMMRegister x3 = xmm3;
|
||||
|
||||
const XMMRegister x4 = xmm4;
|
||||
const XMMRegister x5 = xmm5;
|
||||
const XMMRegister x6 = xmm6;
|
||||
const XMMRegister x7 = xmm7;
|
||||
|
||||
const Register tmp1 = r8;
|
||||
const Register tmp2 = r9;
|
||||
const Register tmp3 = r10;
|
||||
const Register tmp4 = r11;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WIN64
|
||||
__ push(rsi);
|
||||
__ push(rdi);
|
||||
// save the xmm registers which must be preserved 6-7
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ movdqu(Address(rsp, 0), xmm6);
|
||||
__ movdqu(Address(rsp, 2 * wordSize), xmm7);
|
||||
#endif
|
||||
__ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
|
||||
|
||||
#ifdef _WIN64
|
||||
// restore xmm regs belonging to calling function
|
||||
__ movdqu(xmm6, Address(rsp, 0));
|
||||
__ movdqu(xmm7, Address(rsp, 2 * wordSize));
|
||||
__ addptr(rsp, 4 * wordSize);
|
||||
__ pop(rdi);
|
||||
__ pop(rsi);
|
||||
#endif
|
||||
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
@ -5065,18 +5131,30 @@ class StubGenerator: public StubCodeGenerator {
|
|||
StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
|
||||
StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
|
||||
}
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (VM_Version::supports_sse2() && UseLibmIntrinsic) {
|
||||
StubRoutines::x86::_ONEHALF_adr = (address)StubRoutines::x86::_ONEHALF;
|
||||
StubRoutines::x86::_P_2_adr = (address)StubRoutines::x86::_P_2;
|
||||
StubRoutines::x86::_SC_4_adr = (address)StubRoutines::x86::_SC_4;
|
||||
StubRoutines::x86::_Ctable_adr = (address)StubRoutines::x86::_Ctable;
|
||||
StubRoutines::x86::_SC_2_adr = (address)StubRoutines::x86::_SC_2;
|
||||
StubRoutines::x86::_SC_3_adr = (address)StubRoutines::x86::_SC_3;
|
||||
StubRoutines::x86::_SC_1_adr = (address)StubRoutines::x86::_SC_1;
|
||||
StubRoutines::x86::_PI_INV_TABLE_adr = (address)StubRoutines::x86::_PI_INV_TABLE;
|
||||
StubRoutines::x86::_PI_4_adr = (address)StubRoutines::x86::_PI_4;
|
||||
StubRoutines::x86::_PI32INV_adr = (address)StubRoutines::x86::_PI32INV;
|
||||
StubRoutines::x86::_SIGN_MASK_adr = (address)StubRoutines::x86::_SIGN_MASK;
|
||||
StubRoutines::x86::_P_1_adr = (address)StubRoutines::x86::_P_1;
|
||||
StubRoutines::x86::_P_3_adr = (address)StubRoutines::x86::_P_3;
|
||||
StubRoutines::x86::_NEG_ZERO_adr = (address)StubRoutines::x86::_NEG_ZERO;
|
||||
StubRoutines::_dexp = generate_libmExp();
|
||||
StubRoutines::_dlog = generate_libmLog();
|
||||
StubRoutines::_dlog10 = generate_libmLog10();
|
||||
StubRoutines::_dpow = generate_libmPow();
|
||||
if (UseLibmSinIntrinsic) {
|
||||
StubRoutines::_dtan = generate_libmTan();
|
||||
StubRoutines::_dsin = generate_libmSin();
|
||||
}
|
||||
if (UseLibmCosIntrinsic) {
|
||||
StubRoutines::_dcos = generate_libmCos();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void generate_all() {
|
||||
// Generates all stubs and initializes the entry points
|
||||
|
@ -5119,8 +5197,6 @@ class StubGenerator: public StubCodeGenerator {
|
|||
// arraycopy stubs used by compilers
|
||||
generate_arraycopy_stubs();
|
||||
|
||||
generate_math_stubs();
|
||||
|
||||
// don't bother generating these AES intrinsic stubs unless global flag is set
|
||||
if (UseAESIntrinsics) {
|
||||
StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask(); // needed by the others
|
||||
|
|
|
@ -48,6 +48,29 @@ address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
|
|||
address StubRoutines::x86::_k256_adr = NULL;
|
||||
address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
|
||||
|
||||
//tables common for sin and cos
|
||||
address StubRoutines::x86::_ONEHALF_adr = NULL;
|
||||
address StubRoutines::x86::_P_2_adr = NULL;
|
||||
address StubRoutines::x86::_SC_4_adr = NULL;
|
||||
address StubRoutines::x86::_Ctable_adr = NULL;
|
||||
address StubRoutines::x86::_SC_2_adr = NULL;
|
||||
address StubRoutines::x86::_SC_3_adr = NULL;
|
||||
address StubRoutines::x86::_SC_1_adr = NULL;
|
||||
address StubRoutines::x86::_PI_INV_TABLE_adr = NULL;
|
||||
address StubRoutines::x86::_PI_4_adr = NULL;
|
||||
address StubRoutines::x86::_PI32INV_adr = NULL;
|
||||
address StubRoutines::x86::_SIGN_MASK_adr = NULL;
|
||||
address StubRoutines::x86::_P_1_adr = NULL;
|
||||
address StubRoutines::x86::_P_3_adr = NULL;
|
||||
address StubRoutines::x86::_NEG_ZERO_adr = NULL;
|
||||
|
||||
//tables common for sincos and tancot
|
||||
address StubRoutines::x86::_L_2il0floatpacket_0_adr = NULL;
|
||||
address StubRoutines::x86::_Pi4Inv_adr = NULL;
|
||||
address StubRoutines::x86::_Pi4x3_adr = NULL;
|
||||
address StubRoutines::x86::_Pi4x4_adr = NULL;
|
||||
address StubRoutines::x86::_ones_adr = NULL;
|
||||
|
||||
uint64_t StubRoutines::x86::_crc_by128_masks[] =
|
||||
{
|
||||
/* The fields in this structure are arranged so that they can be
|
||||
|
|
|
@ -57,6 +57,48 @@
|
|||
// byte flip mask for sha256
|
||||
static address _pshuffle_byte_flip_mask_addr;
|
||||
|
||||
//tables common for LIBM sin and cos
|
||||
static juint _ONEHALF[];
|
||||
static address _ONEHALF_adr;
|
||||
static juint _P_2[];
|
||||
static address _P_2_adr;
|
||||
static juint _SC_4[];
|
||||
static address _SC_4_adr;
|
||||
static juint _Ctable[];
|
||||
static address _Ctable_adr;
|
||||
static juint _SC_2[];
|
||||
static address _SC_2_adr;
|
||||
static juint _SC_3[];
|
||||
static address _SC_3_adr;
|
||||
static juint _SC_1[];
|
||||
static address _SC_1_adr;
|
||||
static juint _PI_INV_TABLE[];
|
||||
static address _PI_INV_TABLE_adr;
|
||||
static juint _PI_4[];
|
||||
static address _PI_4_adr;
|
||||
static juint _PI32INV[];
|
||||
static address _PI32INV_adr;
|
||||
static juint _SIGN_MASK[];
|
||||
static address _SIGN_MASK_adr;
|
||||
static juint _P_1[];
|
||||
static address _P_1_adr;
|
||||
static juint _P_3[];
|
||||
static address _P_3_adr;
|
||||
static juint _NEG_ZERO[];
|
||||
static address _NEG_ZERO_adr;
|
||||
|
||||
//tables common for LIBM sincos and tancot
|
||||
static juint _L_2il0floatpacket_0[];
|
||||
static address _L_2il0floatpacket_0_adr;
|
||||
static juint _Pi4Inv[];
|
||||
static address _Pi4Inv_adr;
|
||||
static juint _Pi4x3[];
|
||||
static address _Pi4x3_adr;
|
||||
static juint _Pi4x4[];
|
||||
static address _Pi4x4_adr;
|
||||
static juint _ones[];
|
||||
static address _ones_adr;
|
||||
|
||||
public:
|
||||
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
|
||||
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
|
||||
|
@ -69,4 +111,24 @@
|
|||
static address k256_addr() { return _k256_adr; }
|
||||
static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
|
||||
static void generate_CRC32C_table(bool is_pclmulqdq_supported);
|
||||
static address _ONEHALF_addr() { return _ONEHALF_adr; }
|
||||
static address _P_2_addr() { return _P_2_adr; }
|
||||
static address _SC_4_addr() { return _SC_4_adr; }
|
||||
static address _Ctable_addr() { return _Ctable_adr; }
|
||||
static address _SC_2_addr() { return _SC_2_adr; }
|
||||
static address _SC_3_addr() { return _SC_3_adr; }
|
||||
static address _SC_1_addr() { return _SC_1_adr; }
|
||||
static address _PI_INV_TABLE_addr() { return _PI_INV_TABLE_adr; }
|
||||
static address _PI_4_addr() { return _PI_4_adr; }
|
||||
static address _PI32INV_addr() { return _PI32INV_adr; }
|
||||
static address _SIGN_MASK_addr() { return _SIGN_MASK_adr; }
|
||||
static address _P_1_addr() { return _P_1_adr; }
|
||||
static address _P_3_addr() { return _P_3_adr; }
|
||||
static address _NEG_ZERO_addr() { return _NEG_ZERO_adr; }
|
||||
static address _L_2il0floatpacket_0_addr() { return _L_2il0floatpacket_0_adr; }
|
||||
static address _Pi4Inv_addr() { return _Pi4Inv_adr; }
|
||||
static address _Pi4x3_addr() { return _Pi4x3_adr; }
|
||||
static address _Pi4x4_addr() { return _Pi4x4_adr; }
|
||||
static address _ones_addr() { return _ones_adr; }
|
||||
|
||||
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
|
||||
|
|
|
@ -345,13 +345,34 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
|||
__ fld_d(Address(rsp, 1*wordSize));
|
||||
switch (kind) {
|
||||
case Interpreter::java_lang_math_sin :
|
||||
__ trigfunc('s');
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2() && StubRoutines::dsin() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_cos :
|
||||
__ trigfunc('c');
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2() && StubRoutines::dcos() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_tan :
|
||||
__ trigfunc('t');
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_sqrt:
|
||||
__ fsqrt();
|
||||
|
@ -362,26 +383,29 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
|||
case Interpreter::java_lang_math_log:
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_log10:
|
||||
__ flog10();
|
||||
// Store to stack to convert 80bit precision back to 64bits
|
||||
__ push_fTOS();
|
||||
__ pop_fTOS();
|
||||
__ subptr(rsp, 2 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
|
||||
}
|
||||
__ addptr(rsp, 2 * wordSize);
|
||||
break;
|
||||
case Interpreter::java_lang_math_pow:
|
||||
__ fld_d(Address(rsp, 3*wordSize)); // second argument
|
||||
__ subptr(rsp, 4 * wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
__ fstp_d(Address(rsp, 2 * wordSize));
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
|
||||
|
@ -391,7 +415,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
|||
case Interpreter::java_lang_math_exp:
|
||||
__ subptr(rsp, 2*wordSize);
|
||||
__ fstp_d(Address(rsp, 0));
|
||||
if (VM_Version::supports_sse2()) {
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "interpreter/interpreterRuntime.hpp"
|
||||
#include "interpreter/templateInterpreterGenerator.hpp"
|
||||
#include "runtime/arguments.hpp"
|
||||
#include "runtime/sharedRuntime.hpp"
|
||||
|
||||
#define __ _masm->
|
||||
|
||||
|
@ -373,32 +374,60 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
|
|||
__ sqrtsd(xmm0, Address(rsp, wordSize));
|
||||
} else if (kind == Interpreter::java_lang_math_exp) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dexp() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_log) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dlog() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_log10) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dlog10() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_sin) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dsin() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_cos) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dcos() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_pow) {
|
||||
__ movdbl(xmm1, Address(rsp, wordSize));
|
||||
__ movdbl(xmm0, Address(rsp, 3 * wordSize));
|
||||
if (StubRoutines::dpow() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)));
|
||||
}
|
||||
} else if (kind == Interpreter::java_lang_math_tan) {
|
||||
__ movdbl(xmm0, Address(rsp, wordSize));
|
||||
if (StubRoutines::dtan() != NULL) {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
|
||||
} else {
|
||||
__ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)));
|
||||
}
|
||||
} else {
|
||||
__ fld_d(Address(rsp, wordSize));
|
||||
switch (kind) {
|
||||
case Interpreter::java_lang_math_sin :
|
||||
__ trigfunc('s');
|
||||
break;
|
||||
case Interpreter::java_lang_math_cos :
|
||||
__ trigfunc('c');
|
||||
break;
|
||||
case Interpreter::java_lang_math_tan :
|
||||
__ trigfunc('t');
|
||||
break;
|
||||
case Interpreter::java_lang_math_abs:
|
||||
__ fabs();
|
||||
break;
|
||||
case Interpreter::java_lang_math_log10:
|
||||
__ flog10();
|
||||
break;
|
||||
default :
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
|
|
|
@ -844,6 +844,11 @@ public:
|
|||
static uint32_t get_xsave_header_upper_segment() {
|
||||
return _cpuid_info.xem_xcr0_edx;
|
||||
}
|
||||
|
||||
// SSE2 and later processors implement a 'pause' instruction
|
||||
// that can be used for efficient implementation of
|
||||
// the intrinsic for java.lang.Thread.onSpinWait()
|
||||
static bool supports_on_spin_wait() { return supports_sse2(); }
|
||||
};
|
||||
|
||||
#endif // CPU_X86_VM_VM_VERSION_X86_HPP
|
||||
|
|
|
@ -1719,6 +1719,10 @@ const bool Matcher::match_rule_supported(int opcode) {
|
|||
if (!(UseSSE > 4))
|
||||
ret_value = false;
|
||||
break;
|
||||
case Op_OnSpinWait:
|
||||
if (VM_Version::supports_on_spin_wait() == false)
|
||||
ret_value = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret_value; // Per default match rules are supported.
|
||||
|
@ -2996,6 +3000,24 @@ instruct sqrtD_imm(regD dst, immD con) %{
|
|||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct onspinwait() %{
|
||||
match(OnSpinWait);
|
||||
ins_cost(200);
|
||||
|
||||
format %{
|
||||
$$template
|
||||
if (os::is_MP()) {
|
||||
$$emit$$"pause\t! membar_onspinwait"
|
||||
} else {
|
||||
$$emit$$"MEMBAR-onspinwait ! (empty encoding)"
|
||||
}
|
||||
%}
|
||||
ins_encode %{
|
||||
__ pause();
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// ====================VECTOR INSTRUCTIONS=====================================
|
||||
|
||||
// Load vectors (4 bytes long)
|
||||
|
|
|
@ -9828,27 +9828,6 @@ instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
|
|||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
|
||||
predicate (UseSSE<=1);
|
||||
match(Set dst(TanD src));
|
||||
format %{ "DTAN $dst" %}
|
||||
ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan
|
||||
Opcode(0xDD), Opcode(0xD8)); // fstp st
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct tanD_reg(regD dst, eFlagsReg cr) %{
|
||||
predicate (UseSSE>=2);
|
||||
match(Set dst(TanD dst));
|
||||
effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
|
||||
format %{ "DTAN $dst" %}
|
||||
ins_encode( Push_SrcD(dst),
|
||||
Opcode(0xD9), Opcode(0xF2), // fptan
|
||||
Opcode(0xDD), Opcode(0xD8), // fstp st
|
||||
Push_ResultD(dst) );
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct atanDPR_reg(regDPR dst, regDPR src) %{
|
||||
predicate (UseSSE<=1);
|
||||
match(Set dst(AtanD dst src));
|
||||
|
@ -9880,41 +9859,6 @@ instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
|
|||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
|
||||
predicate (UseSSE<=1);
|
||||
// The source Double operand on FPU stack
|
||||
match(Set dst (Log10D src));
|
||||
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
|
||||
// fxch ; swap ST(0) with ST(1)
|
||||
// fyl2x ; compute log_10(2) * log_2(x)
|
||||
format %{ "FLDLG2 \t\t\t#Log10\n\t"
|
||||
"FXCH \n\t"
|
||||
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
|
||||
%}
|
||||
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
|
||||
Opcode(0xD9), Opcode(0xC9), // fxch
|
||||
Opcode(0xD9), Opcode(0xF1)); // fyl2x
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
|
||||
predicate (UseSSE>=2);
|
||||
effect(KILL cr);
|
||||
match(Set dst (Log10D src));
|
||||
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
|
||||
// fyl2x ; compute log_10(2) * log_2(x)
|
||||
format %{ "FLDLG2 \t\t\t#Log10\n\t"
|
||||
"FYL2X \t\t\t# Q=Log10*Log_2(x)"
|
||||
%}
|
||||
ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2
|
||||
Push_SrcD(src),
|
||||
Opcode(0xD9), Opcode(0xF1), // fyl2x
|
||||
Push_ResultD(dst));
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
//-------------Float Instructions-------------------------------
|
||||
// Float Math
|
||||
|
||||
|
|
|
@ -9897,34 +9897,6 @@ instruct cmpD_imm(rRegI dst, regD src, immD con, rFlagsReg cr) %{
|
|||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// -----------Trig and Trancendental Instructions------------------------------
|
||||
instruct tanD_reg(regD dst) %{
|
||||
match(Set dst (TanD dst));
|
||||
|
||||
format %{ "dtan $dst\n\t" %}
|
||||
ins_encode( Push_SrcXD(dst),
|
||||
Opcode(0xD9), Opcode(0xF2), //fptan
|
||||
Opcode(0xDD), Opcode(0xD8), //fstp st
|
||||
Push_ResultXD(dst) );
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct log10D_reg(regD dst) %{
|
||||
// The source and result Double operands in XMM registers
|
||||
match(Set dst (Log10D dst));
|
||||
// fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number
|
||||
// fyl2x ; compute log_10(2) * log_2(x)
|
||||
format %{ "fldlg2\t\t\t#Log10\n\t"
|
||||
"fyl2x\t\t\t# Q=Log10*Log_2(x)\n\t"
|
||||
%}
|
||||
ins_encode(Opcode(0xD9), Opcode(0xEC), // fldlg2
|
||||
Push_SrcXD(dst),
|
||||
Opcode(0xD9), Opcode(0xF1), // fyl2x
|
||||
Push_ResultXD(dst));
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
//----------Arithmetic Conversion Instructions---------------------------------
|
||||
|
||||
instruct roundFloat_nop(regF dst)
|
||||
|
|
|
@ -120,7 +120,9 @@ public class HotSpotCodeCacheProvider implements CodeCacheProvider {
|
|||
resultInstalledCode = installedCode;
|
||||
}
|
||||
|
||||
int result = runtime.getCompilerToVM().installCode(target, (HotSpotCompiledCode) compiledCode, resultInstalledCode, (HotSpotSpeculationLog) log);
|
||||
HotSpotSpeculationLog speculationLog = (log != null && log.hasSpeculations()) ? (HotSpotSpeculationLog) log : null;
|
||||
|
||||
int result = runtime.getCompilerToVM().installCode(target, (HotSpotCompiledCode) compiledCode, resultInstalledCode, speculationLog);
|
||||
if (result != config.codeInstallResultOk) {
|
||||
String resultDesc = config.getCodeInstallResultDescription(result);
|
||||
if (compiledCode instanceof HotSpotCompiledNmethod) {
|
||||
|
|
|
@ -38,7 +38,7 @@ public class HotSpotSpeculationLog implements SpeculationLog {
|
|||
/** All speculations that have been a deoptimization reason. */
|
||||
private Set<SpeculationReason> failedSpeculations;
|
||||
|
||||
/** Strong references to all reasons embededded in the current nmethod. */
|
||||
/** Strong references to all reasons embedded in the current nmethod. */
|
||||
private volatile Collection<SpeculationReason> speculations;
|
||||
|
||||
@Override
|
||||
|
@ -81,4 +81,9 @@ public class HotSpotSpeculationLog implements SpeculationLog {
|
|||
|
||||
return HotSpotObjectConstantImpl.forObject(reason);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean hasSpeculations() {
|
||||
return speculations != null && !speculations.isEmpty();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,4 +56,11 @@ public interface SpeculationLog {
|
|||
* argument to the deoptimization function.
|
||||
*/
|
||||
JavaConstant speculate(SpeculationReason reason);
|
||||
|
||||
/**
|
||||
* Returns if this log has speculations.
|
||||
*
|
||||
* @return true if there are speculations, false otherwise
|
||||
*/
|
||||
boolean hasSpeculations();
|
||||
}
|
||||
|
|
|
@ -247,7 +247,7 @@ void Canonicalizer::do_ArrayLength (ArrayLength* x) {
|
|||
|
||||
} else if ((lf = x->array()->as_LoadField()) != NULL) {
|
||||
ciField* field = lf->field();
|
||||
if (field->is_constant() && field->is_static()) {
|
||||
if (field->is_static_constant()) {
|
||||
assert(PatchALot || ScavengeRootsInCode < 2, "Constant field loads are folded during parsing");
|
||||
ciObject* c = field->constant_value().as_object();
|
||||
if (!c->is_null_object()) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -77,17 +77,13 @@ class CodeStub: public CompilationResourceObj {
|
|||
}
|
||||
};
|
||||
|
||||
|
||||
define_array(CodeStubArray, CodeStub*)
|
||||
define_stack(_CodeStubList, CodeStubArray)
|
||||
|
||||
class CodeStubList: public _CodeStubList {
|
||||
class CodeStubList: public GrowableArray<CodeStub*> {
|
||||
public:
|
||||
CodeStubList(): _CodeStubList() {}
|
||||
CodeStubList(): GrowableArray<CodeStub*>() {}
|
||||
|
||||
void append(CodeStub* stub) {
|
||||
if (!contains(stub)) {
|
||||
_CodeStubList::append(stub);
|
||||
GrowableArray<CodeStub*>::append(stub);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -53,12 +53,9 @@ class C1_MacroAssembler;
|
|||
class CFGPrinter;
|
||||
typedef LIR_OprDesc* LIR_Opr;
|
||||
|
||||
|
||||
define_array(BasicTypeArray, BasicType)
|
||||
define_stack(BasicTypeList, BasicTypeArray)
|
||||
|
||||
define_array(ExceptionInfoArray, ExceptionInfo*)
|
||||
define_stack(ExceptionInfoList, ExceptionInfoArray)
|
||||
typedef GrowableArray<BasicType> BasicTypeArray;
|
||||
typedef GrowableArray<BasicType> BasicTypeList;
|
||||
typedef GrowableArray<ExceptionInfo*> ExceptionInfoList;
|
||||
|
||||
class Compilation: public StackObj {
|
||||
friend class CompilationResourceObj;
|
||||
|
|
|
@ -131,6 +131,9 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
|
|||
if (!VM_Version::supports_atomic_getset4()) return false;
|
||||
#endif
|
||||
break;
|
||||
case vmIntrinsics::_onSpinWait:
|
||||
if (!VM_Version::supports_on_spin_wait()) return false;
|
||||
break;
|
||||
case vmIntrinsics::_arraycopy:
|
||||
case vmIntrinsics::_currentTimeMillis:
|
||||
case vmIntrinsics::_nanoTime:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -164,7 +164,7 @@ FrameMap::FrameMap(ciMethod* method, int monitors, int reserved_argument_area_si
|
|||
_reserved_argument_area_size = MAX2(4, reserved_argument_area_size) * BytesPerWord;
|
||||
|
||||
_argcount = method->arg_size();
|
||||
_argument_locations = new intArray(_argcount, -1);
|
||||
_argument_locations = new intArray(_argcount, _argcount, -1);
|
||||
_incoming_arguments = java_calling_convention(signature_type_array_for(method), false);
|
||||
_oop_map_arg_count = _incoming_arguments->reserved_stack_slots();
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -36,8 +36,6 @@
|
|||
|
||||
class ciMethod;
|
||||
class CallingConvention;
|
||||
class BasicTypeArray;
|
||||
class BasicTypeList;
|
||||
|
||||
//--------------------------------------------------------
|
||||
// FrameMap
|
||||
|
|
|
@ -357,7 +357,7 @@ void BlockListBuilder::mark_loops() {
|
|||
|
||||
_active = BitMap(BlockBegin::number_of_blocks()); _active.clear();
|
||||
_visited = BitMap(BlockBegin::number_of_blocks()); _visited.clear();
|
||||
_loop_map = intArray(BlockBegin::number_of_blocks(), 0);
|
||||
_loop_map = intArray(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), 0);
|
||||
_next_loop_index = 0;
|
||||
_next_block_number = _blocks.length();
|
||||
|
||||
|
@ -1354,7 +1354,7 @@ void GraphBuilder::lookup_switch() {
|
|||
} else {
|
||||
// collect successors & keys
|
||||
BlockList* sux = new BlockList(l + 1, NULL);
|
||||
intArray* keys = new intArray(l, 0);
|
||||
intArray* keys = new intArray(l, l, 0);
|
||||
int i;
|
||||
bool has_bb = false;
|
||||
for (i = 0; i < l; i++) {
|
||||
|
@ -1521,6 +1521,8 @@ void GraphBuilder::method_return(Value x) {
|
|||
}
|
||||
|
||||
Value GraphBuilder::make_constant(ciConstant field_value, ciField* field) {
|
||||
if (!field_value.is_valid()) return NULL;
|
||||
|
||||
BasicType field_type = field_value.basic_type();
|
||||
ValueType* value = as_ValueType(field_value);
|
||||
|
||||
|
@ -1588,9 +1590,8 @@ void GraphBuilder::access_field(Bytecodes::Code code) {
|
|||
case Bytecodes::_getstatic: {
|
||||
// check for compile-time constants, i.e., initialized static final fields
|
||||
Value constant = NULL;
|
||||
if (field->is_constant() && !PatchALot) {
|
||||
if (field->is_static_constant() && !PatchALot) {
|
||||
ciConstant field_value = field->constant_value();
|
||||
// Stable static fields are checked for non-default values in ciField::initialize_from().
|
||||
assert(!field->is_stable() || !field_value.is_null_or_zero(),
|
||||
"stable static w/ default value shouldn't be a constant");
|
||||
constant = make_constant(field_value, field);
|
||||
|
@ -1619,36 +1620,23 @@ void GraphBuilder::access_field(Bytecodes::Code code) {
|
|||
Value constant = NULL;
|
||||
obj = apop();
|
||||
ObjectType* obj_type = obj->type()->as_ObjectType();
|
||||
if (obj_type->is_constant() && !PatchALot) {
|
||||
if (field->is_constant() && obj_type->is_constant() && !PatchALot) {
|
||||
ciObject* const_oop = obj_type->constant_value();
|
||||
if (!const_oop->is_null_object() && const_oop->is_loaded()) {
|
||||
if (field->is_constant()) {
|
||||
ciConstant field_value = field->constant_value_of(const_oop);
|
||||
if (FoldStableValues && field->is_stable() && field_value.is_null_or_zero()) {
|
||||
// Stable field with default value can't be constant.
|
||||
constant = NULL;
|
||||
} else {
|
||||
if (field_value.is_valid()) {
|
||||
constant = make_constant(field_value, field);
|
||||
}
|
||||
} else {
|
||||
// For CallSite objects treat the target field as a compile time constant.
|
||||
if (const_oop->is_call_site()) {
|
||||
ciCallSite* call_site = const_oop->as_call_site();
|
||||
// For CallSite objects add a dependency for invalidation of the optimization.
|
||||
if (field->is_call_site_target()) {
|
||||
ciMethodHandle* target = call_site->get_target();
|
||||
if (target != NULL) { // just in case
|
||||
ciConstant field_val(T_OBJECT, target);
|
||||
constant = new Constant(as_ValueType(field_val));
|
||||
// Add a dependence for invalidation of the optimization.
|
||||
ciCallSite* call_site = const_oop->as_call_site();
|
||||
if (!call_site->is_constant_call_site()) {
|
||||
ciMethodHandle* target = field_value.as_object()->as_method_handle();
|
||||
dependency_recorder()->assert_call_site_target_value(call_site, target);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (constant != NULL) {
|
||||
push(type, append(constant));
|
||||
} else {
|
||||
|
@ -1722,7 +1710,7 @@ void GraphBuilder::check_args_for_profiling(Values* obj_args, int expected) {
|
|||
bool ignored_will_link;
|
||||
ciSignature* declared_signature = NULL;
|
||||
ciMethod* real_target = method()->get_method_at_bci(bci(), ignored_will_link, &declared_signature);
|
||||
assert(expected == obj_args->length() || real_target->is_method_handle_intrinsic(), "missed on arg?");
|
||||
assert(expected == obj_args->max_length() || real_target->is_method_handle_intrinsic(), "missed on arg?");
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -1733,7 +1721,7 @@ Values* GraphBuilder::collect_args_for_profiling(Values* args, ciMethod* target,
|
|||
if (obj_args == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
int s = obj_args->size();
|
||||
int s = obj_args->max_length();
|
||||
// if called through method handle invoke, some arguments may have been popped
|
||||
for (int i = start, j = 0; j < s && i < args->length(); i++) {
|
||||
if (args->at(i)->type()->is_object_kind()) {
|
||||
|
@ -2170,7 +2158,7 @@ void GraphBuilder::new_multi_array(int dimensions) {
|
|||
ciKlass* klass = stream()->get_klass(will_link);
|
||||
ValueStack* state_before = !klass->is_loaded() || PatchALot ? copy_state_before() : copy_state_exhandling();
|
||||
|
||||
Values* dims = new Values(dimensions, NULL);
|
||||
Values* dims = new Values(dimensions, dimensions, NULL);
|
||||
// fill in all dimensions
|
||||
int i = dimensions;
|
||||
while (i-- > 0) dims->at_put(i, ipop());
|
||||
|
@ -3773,9 +3761,9 @@ bool GraphBuilder::try_inline_full(ciMethod* callee, bool holder_known, Bytecode
|
|||
int start = 0;
|
||||
Values* obj_args = args_list_for_profiling(callee, start, has_receiver);
|
||||
if (obj_args != NULL) {
|
||||
int s = obj_args->size();
|
||||
int s = obj_args->max_length();
|
||||
// if called through method handle invoke, some arguments may have been popped
|
||||
for (int i = args_base+start, j = 0; j < obj_args->size() && i < state()->stack_size(); ) {
|
||||
for (int i = args_base+start, j = 0; j < obj_args->max_length() && i < state()->stack_size(); ) {
|
||||
Value v = state()->stack_at_inc(i);
|
||||
if (v->type()->is_object_kind()) {
|
||||
obj_args->push(v);
|
||||
|
@ -4092,7 +4080,7 @@ void GraphBuilder::push_scope_for_jsr(BlockBegin* jsr_continuation, int jsr_dest
|
|||
// properly clone all blocks in jsr region as well as exception
|
||||
// handlers containing rets
|
||||
BlockList* new_bci2block = new BlockList(bci2block()->length());
|
||||
new_bci2block->push_all(bci2block());
|
||||
new_bci2block->appendAll(bci2block());
|
||||
data->set_bci2block(new_bci2block);
|
||||
data->set_scope(scope());
|
||||
data->setup_jsr_xhandlers();
|
||||
|
|
|
@ -531,7 +531,7 @@ ComputeLinearScanOrder::ComputeLinearScanOrder(Compilation* c, BlockBegin* start
|
|||
_visited_blocks(_max_block_id),
|
||||
_active_blocks(_max_block_id),
|
||||
_dominator_blocks(_max_block_id),
|
||||
_forward_branches(_max_block_id, 0),
|
||||
_forward_branches(_max_block_id, _max_block_id, 0),
|
||||
_loop_end_blocks(8),
|
||||
_work_list(8),
|
||||
_linear_scan_order(NULL), // initialized later with correct size
|
||||
|
@ -849,13 +849,13 @@ bool ComputeLinearScanOrder::ready_for_processing(BlockBegin* cur) {
|
|||
return false;
|
||||
}
|
||||
|
||||
assert(_linear_scan_order->index_of(cur) == -1, "block already processed (block can be ready only once)");
|
||||
assert(_work_list.index_of(cur) == -1, "block already in work-list (block can be ready only once)");
|
||||
assert(_linear_scan_order->find(cur) == -1, "block already processed (block can be ready only once)");
|
||||
assert(_work_list.find(cur) == -1, "block already in work-list (block can be ready only once)");
|
||||
return true;
|
||||
}
|
||||
|
||||
void ComputeLinearScanOrder::sort_into_work_list(BlockBegin* cur) {
|
||||
assert(_work_list.index_of(cur) == -1, "block already in work list");
|
||||
assert(_work_list.find(cur) == -1, "block already in work list");
|
||||
|
||||
int cur_weight = compute_weight(cur);
|
||||
|
||||
|
@ -891,7 +891,7 @@ void ComputeLinearScanOrder::sort_into_work_list(BlockBegin* cur) {
|
|||
|
||||
void ComputeLinearScanOrder::append_block(BlockBegin* cur) {
|
||||
TRACE_LINEAR_SCAN(3, tty->print_cr("appending block B%d (weight 0x%6x) to linear-scan order", cur->block_id(), cur->linear_scan_number()));
|
||||
assert(_linear_scan_order->index_of(cur) == -1, "cannot add the same block twice");
|
||||
assert(_linear_scan_order->find(cur) == -1, "cannot add the same block twice");
|
||||
|
||||
// currently, the linear scan order and code emit order are equal.
|
||||
// therefore the linear_scan_number and the weight of a block must also
|
||||
|
@ -1116,13 +1116,13 @@ void ComputeLinearScanOrder::verify() {
|
|||
BlockBegin* cur = _linear_scan_order->at(i);
|
||||
|
||||
assert(cur->linear_scan_number() == i, "incorrect linear_scan_number");
|
||||
assert(cur->linear_scan_number() >= 0 && cur->linear_scan_number() == _linear_scan_order->index_of(cur), "incorrect linear_scan_number");
|
||||
assert(cur->linear_scan_number() >= 0 && cur->linear_scan_number() == _linear_scan_order->find(cur), "incorrect linear_scan_number");
|
||||
|
||||
int j;
|
||||
for (j = cur->number_of_sux() - 1; j >= 0; j--) {
|
||||
BlockBegin* sux = cur->sux_at(j);
|
||||
|
||||
assert(sux->linear_scan_number() >= 0 && sux->linear_scan_number() == _linear_scan_order->index_of(sux), "incorrect linear_scan_number");
|
||||
assert(sux->linear_scan_number() >= 0 && sux->linear_scan_number() == _linear_scan_order->find(sux), "incorrect linear_scan_number");
|
||||
if (!sux->is_set(BlockBegin::backward_branch_target_flag)) {
|
||||
assert(cur->linear_scan_number() < sux->linear_scan_number(), "invalid order");
|
||||
}
|
||||
|
@ -1134,7 +1134,7 @@ void ComputeLinearScanOrder::verify() {
|
|||
for (j = cur->number_of_preds() - 1; j >= 0; j--) {
|
||||
BlockBegin* pred = cur->pred_at(j);
|
||||
|
||||
assert(pred->linear_scan_number() >= 0 && pred->linear_scan_number() == _linear_scan_order->index_of(pred), "incorrect linear_scan_number");
|
||||
assert(pred->linear_scan_number() >= 0 && pred->linear_scan_number() == _linear_scan_order->find(pred), "incorrect linear_scan_number");
|
||||
if (!cur->is_set(BlockBegin::backward_branch_target_flag)) {
|
||||
assert(cur->linear_scan_number() > pred->linear_scan_number(), "invalid order");
|
||||
}
|
||||
|
@ -1256,8 +1256,7 @@ void IR::print(bool cfg_only, bool live_only) {
|
|||
}
|
||||
|
||||
|
||||
define_array(BlockListArray, BlockList*)
|
||||
define_stack(BlockListList, BlockListArray)
|
||||
typedef GrowableArray<BlockList*> BlockListList;
|
||||
|
||||
class PredecessorValidator : public BlockClosure {
|
||||
private:
|
||||
|
@ -1271,7 +1270,7 @@ class PredecessorValidator : public BlockClosure {
|
|||
public:
|
||||
PredecessorValidator(IR* hir) {
|
||||
ResourceMark rm;
|
||||
_predecessors = new BlockListList(BlockBegin::number_of_blocks(), NULL);
|
||||
_predecessors = new BlockListList(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), NULL);
|
||||
_blocks = new BlockList();
|
||||
|
||||
int i;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -104,9 +104,7 @@ class XHandler: public CompilationResourceObj {
|
|||
bool equals(XHandler* other) const;
|
||||
};
|
||||
|
||||
define_array(_XHandlerArray, XHandler*)
|
||||
define_stack(_XHandlerList, _XHandlerArray)
|
||||
|
||||
typedef GrowableArray<XHandler*> _XHandlerList;
|
||||
|
||||
// XHandlers is the C1 internal list of exception handlers for a method
|
||||
class XHandlers: public CompilationResourceObj {
|
||||
|
@ -132,8 +130,7 @@ class XHandlers: public CompilationResourceObj {
|
|||
|
||||
|
||||
class IRScope;
|
||||
define_array(IRScopeArray, IRScope*)
|
||||
define_stack(IRScopeList, IRScopeArray)
|
||||
typedef GrowableArray<IRScope*> IRScopeList;
|
||||
|
||||
class Compilation;
|
||||
class IRScope: public CompilationResourceObj {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -564,7 +564,7 @@ void BlockBegin::disconnect_edge(BlockBegin* from, BlockBegin* to) {
|
|||
for (int s = 0; s < from->number_of_sux();) {
|
||||
BlockBegin* sux = from->sux_at(s);
|
||||
if (sux == to) {
|
||||
int index = sux->_predecessors.index_of(from);
|
||||
int index = sux->_predecessors.find(from);
|
||||
if (index >= 0) {
|
||||
sux->_predecessors.remove_at(index);
|
||||
}
|
||||
|
@ -664,7 +664,7 @@ BlockBegin* BlockBegin::insert_block_between(BlockBegin* sux) {
|
|||
|
||||
void BlockBegin::remove_successor(BlockBegin* pred) {
|
||||
int idx;
|
||||
while ((idx = _successors.index_of(pred)) >= 0) {
|
||||
while ((idx = _successors.find(pred)) >= 0) {
|
||||
_successors.remove_at(idx);
|
||||
}
|
||||
}
|
||||
|
@ -677,7 +677,7 @@ void BlockBegin::add_predecessor(BlockBegin* pred) {
|
|||
|
||||
void BlockBegin::remove_predecessor(BlockBegin* pred) {
|
||||
int idx;
|
||||
while ((idx = _predecessors.index_of(pred)) >= 0) {
|
||||
while ((idx = _predecessors.find(pred)) >= 0) {
|
||||
_predecessors.remove_at(idx);
|
||||
}
|
||||
}
|
||||
|
@ -722,13 +722,15 @@ void BlockBegin::iterate_postorder(boolArray& mark, BlockClosure* closure) {
|
|||
|
||||
|
||||
void BlockBegin::iterate_preorder(BlockClosure* closure) {
|
||||
boolArray mark(number_of_blocks(), false);
|
||||
int mark_len = number_of_blocks();
|
||||
boolArray mark(mark_len, mark_len, false);
|
||||
iterate_preorder(mark, closure);
|
||||
}
|
||||
|
||||
|
||||
void BlockBegin::iterate_postorder(BlockClosure* closure) {
|
||||
boolArray mark(number_of_blocks(), false);
|
||||
int mark_len = number_of_blocks();
|
||||
boolArray mark(mark_len, mark_len, false);
|
||||
iterate_postorder(mark, closure);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -115,11 +115,8 @@ class Assert;
|
|||
|
||||
// A Value is a reference to the instruction creating the value
|
||||
typedef Instruction* Value;
|
||||
define_array(ValueArray, Value)
|
||||
define_stack(Values, ValueArray)
|
||||
|
||||
define_array(ValueStackArray, ValueStack*)
|
||||
define_stack(ValueStackStack, ValueStackArray)
|
||||
typedef GrowableArray<Value> Values;
|
||||
typedef GrowableArray<ValueStack*> ValueStackStack;
|
||||
|
||||
// BlockClosure is the base class for block traversal/iteration.
|
||||
|
||||
|
@ -137,14 +134,13 @@ class ValueVisitor: public StackObj {
|
|||
|
||||
|
||||
// Some array and list classes
|
||||
define_array(BlockBeginArray, BlockBegin*)
|
||||
define_stack(_BlockList, BlockBeginArray)
|
||||
typedef GrowableArray<BlockBegin*> BlockBeginArray;
|
||||
|
||||
class BlockList: public _BlockList {
|
||||
class BlockList: public GrowableArray<BlockBegin*> {
|
||||
public:
|
||||
BlockList(): _BlockList() {}
|
||||
BlockList(const int size): _BlockList(size) {}
|
||||
BlockList(const int size, BlockBegin* init): _BlockList(size, init) {}
|
||||
BlockList(): GrowableArray<BlockBegin*>() {}
|
||||
BlockList(const int size): GrowableArray<BlockBegin*>(size) {}
|
||||
BlockList(const int size, BlockBegin* init): GrowableArray<BlockBegin*>(size, size, init) {}
|
||||
|
||||
void iterate_forward(BlockClosure* closure);
|
||||
void iterate_backward(BlockClosure* closure);
|
||||
|
@ -1744,7 +1740,7 @@ LEAF(BlockBegin, StateSplit)
|
|||
void remove_predecessor(BlockBegin* pred);
|
||||
bool is_predecessor(BlockBegin* pred) const { return _predecessors.contains(pred); }
|
||||
int number_of_preds() const { return _predecessors.length(); }
|
||||
BlockBegin* pred_at(int i) const { return _predecessors[i]; }
|
||||
BlockBegin* pred_at(int i) const { return _predecessors.at(i); }
|
||||
|
||||
// exception handlers potentially invoked by this block
|
||||
void add_exception_handler(BlockBegin* b);
|
||||
|
@ -2609,10 +2605,7 @@ class BlockPair: public CompilationResourceObj {
|
|||
void set_from(BlockBegin* b) { _from = b; }
|
||||
};
|
||||
|
||||
|
||||
define_array(BlockPairArray, BlockPair*)
|
||||
define_stack(BlockPairList, BlockPairArray)
|
||||
|
||||
typedef GrowableArray<BlockPair*> BlockPairList;
|
||||
|
||||
inline int BlockBegin::number_of_sux() const { assert(_end == NULL || _end->number_of_sux() == _successors.length(), "mismatch"); return _successors.length(); }
|
||||
inline BlockBegin* BlockBegin::sux_at(int i) const { assert(_end == NULL || _end->sux_at(i) == _successors.at(i), "mismatch"); return _successors.at(i); }
|
||||
|
|
|
@ -483,6 +483,7 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
|
|||
case lir_membar_storestore: // result and info always invalid
|
||||
case lir_membar_loadstore: // result and info always invalid
|
||||
case lir_membar_storeload: // result and info always invalid
|
||||
case lir_on_spin_wait:
|
||||
{
|
||||
assert(op->as_Op0() != NULL, "must be");
|
||||
assert(op->_info == NULL, "info not used by this instruction");
|
||||
|
@ -727,31 +728,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
|
|||
break;
|
||||
}
|
||||
|
||||
|
||||
case lir_tan:
|
||||
case lir_log10: {
|
||||
assert(op->as_Op2() != NULL, "must be");
|
||||
LIR_Op2* op2 = (LIR_Op2*)op;
|
||||
|
||||
// On x86 tan/sin/cos need two temporary fpu stack slots and
|
||||
// log/log10 need one so handle opr2 and tmp as temp inputs.
|
||||
// Register input operand as temp to guarantee that it doesn't
|
||||
// overlap with the input.
|
||||
assert(op2->_info == NULL, "not used");
|
||||
assert(op2->_tmp5->is_illegal(), "not used");
|
||||
assert(op2->_opr1->is_valid(), "used");
|
||||
do_input(op2->_opr1); do_temp(op2->_opr1);
|
||||
|
||||
if (op2->_opr2->is_valid()) do_temp(op2->_opr2);
|
||||
if (op2->_tmp1->is_valid()) do_temp(op2->_tmp1);
|
||||
if (op2->_tmp2->is_valid()) do_temp(op2->_tmp2);
|
||||
if (op2->_tmp3->is_valid()) do_temp(op2->_tmp3);
|
||||
if (op2->_tmp4->is_valid()) do_temp(op2->_tmp4);
|
||||
if (op2->_result->is_valid()) do_output(op2->_result);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// LIR_Op3
|
||||
case lir_idiv:
|
||||
case lir_irem: {
|
||||
|
@ -1691,6 +1667,7 @@ const char * LIR_Op::name() const {
|
|||
case lir_word_align: s = "word_align"; break;
|
||||
case lir_label: s = "label"; break;
|
||||
case lir_nop: s = "nop"; break;
|
||||
case lir_on_spin_wait: s = "on_spin_wait"; break;
|
||||
case lir_backwardbranch_target: s = "backbranch"; break;
|
||||
case lir_std_entry: s = "std_entry"; break;
|
||||
case lir_osr_entry: s = "osr_entry"; break;
|
||||
|
@ -1738,8 +1715,6 @@ const char * LIR_Op::name() const {
|
|||
case lir_rem: s = "rem"; break;
|
||||
case lir_abs: s = "abs"; break;
|
||||
case lir_sqrt: s = "sqrt"; break;
|
||||
case lir_tan: s = "tan"; break;
|
||||
case lir_log10: s = "log10"; break;
|
||||
case lir_logic_and: s = "logic_and"; break;
|
||||
case lir_logic_or: s = "logic_or"; break;
|
||||
case lir_logic_xor: s = "logic_xor"; break;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -59,17 +59,9 @@ class LIR_OprVisitor;
|
|||
typedef LIR_OprDesc* LIR_Opr;
|
||||
typedef int RegNr;
|
||||
|
||||
define_array(LIR_OprArray, LIR_Opr)
|
||||
define_stack(LIR_OprList, LIR_OprArray)
|
||||
|
||||
define_array(LIR_OprRefArray, LIR_Opr*)
|
||||
define_stack(LIR_OprRefList, LIR_OprRefArray)
|
||||
|
||||
define_array(CodeEmitInfoArray, CodeEmitInfo*)
|
||||
define_stack(CodeEmitInfoList, CodeEmitInfoArray)
|
||||
|
||||
define_array(LIR_OpArray, LIR_Op*)
|
||||
define_stack(LIR_OpList, LIR_OpArray)
|
||||
typedef GrowableArray<LIR_Opr> LIR_OprList;
|
||||
typedef GrowableArray<LIR_Op*> LIR_OpArray;
|
||||
typedef GrowableArray<LIR_Op*> LIR_OpList;
|
||||
|
||||
// define LIR_OprPtr early so LIR_OprDesc can refer to it
|
||||
class LIR_OprPtr: public CompilationResourceObj {
|
||||
|
@ -920,6 +912,7 @@ enum LIR_Code {
|
|||
, lir_membar_loadstore
|
||||
, lir_membar_storeload
|
||||
, lir_get_thread
|
||||
, lir_on_spin_wait
|
||||
, end_op0
|
||||
, begin_op1
|
||||
, lir_fxch
|
||||
|
@ -2101,6 +2094,8 @@ class LIR_List: public CompilationResourceObj {
|
|||
void std_entry(LIR_Opr receiver) { append(new LIR_Op0(lir_std_entry, receiver)); }
|
||||
void osr_entry(LIR_Opr osrPointer) { append(new LIR_Op0(lir_osr_entry, osrPointer)); }
|
||||
|
||||
void on_spin_wait() { append(new LIR_Op0(lir_on_spin_wait)); }
|
||||
|
||||
void branch_destination(Label* lbl) { append(new LIR_OpLabel(lbl)); }
|
||||
|
||||
void negate(LIR_Opr from, LIR_Opr to) { append(new LIR_Op1(lir_neg, from, to)); }
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -127,7 +127,7 @@ void LIR_Assembler::append_code_stub(CodeStub* stub) {
|
|||
|
||||
void LIR_Assembler::emit_stubs(CodeStubList* stub_list) {
|
||||
for (int m = 0; m < stub_list->length(); m++) {
|
||||
CodeStub* s = (*stub_list)[m];
|
||||
CodeStub* s = stub_list->at(m);
|
||||
|
||||
check_codespace();
|
||||
CHECK_BAILOUT();
|
||||
|
@ -678,6 +678,10 @@ void LIR_Assembler::emit_op0(LIR_Op0* op) {
|
|||
get_thread(op->result_opr());
|
||||
break;
|
||||
|
||||
case lir_on_spin_wait:
|
||||
on_spin_wait();
|
||||
break;
|
||||
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
break;
|
||||
|
|
|
@ -251,6 +251,7 @@ class LIR_Assembler: public CompilationResourceObj {
|
|||
void membar_storestore();
|
||||
void membar_loadstore();
|
||||
void membar_storeload();
|
||||
void on_spin_wait();
|
||||
void get_thread(LIR_Opr result);
|
||||
|
||||
void verify_oop_map(CodeEmitInfo* info);
|
||||
|
|
|
@ -150,7 +150,7 @@ PhiResolver::~PhiResolver() {
|
|||
int i;
|
||||
// resolve any cycles in moves from and to virtual registers
|
||||
for (i = virtual_operands().length() - 1; i >= 0; i --) {
|
||||
ResolveNode* node = virtual_operands()[i];
|
||||
ResolveNode* node = virtual_operands().at(i);
|
||||
if (!node->visited()) {
|
||||
_loop = NULL;
|
||||
move(NULL, node);
|
||||
|
@ -161,7 +161,7 @@ PhiResolver::~PhiResolver() {
|
|||
|
||||
// generate move for move from non virtual register to abitrary destination
|
||||
for (i = other_operands().length() - 1; i >= 0; i --) {
|
||||
ResolveNode* node = other_operands()[i];
|
||||
ResolveNode* node = other_operands().at(i);
|
||||
for (int j = node->no_of_destinations() - 1; j >= 0; j --) {
|
||||
emit_move(node->operand(), node->destination_at(j)->operand());
|
||||
}
|
||||
|
@ -177,7 +177,7 @@ ResolveNode* PhiResolver::create_node(LIR_Opr opr, bool source) {
|
|||
assert(node == NULL || node->operand() == opr, "");
|
||||
if (node == NULL) {
|
||||
node = new ResolveNode(opr);
|
||||
vreg_table()[vreg_num] = node;
|
||||
vreg_table().at_put(vreg_num, node);
|
||||
}
|
||||
// Make sure that all virtual operands show up in the list when
|
||||
// they are used as the source of a move.
|
||||
|
@ -3161,7 +3161,9 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
|
|||
case vmIntrinsics::_fullFence :
|
||||
if (os::is_MP()) __ membar();
|
||||
break;
|
||||
|
||||
case vmIntrinsics::_onSpinWait:
|
||||
__ on_spin_wait();
|
||||
break;
|
||||
case vmIntrinsics::_Reference_get:
|
||||
do_Reference_get(x);
|
||||
break;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -39,8 +39,7 @@ class Invoke;
|
|||
class SwitchRange;
|
||||
class LIRItem;
|
||||
|
||||
define_array(LIRItemArray, LIRItem*)
|
||||
define_stack(LIRItemList, LIRItemArray)
|
||||
typedef GrowableArray<LIRItem*> LIRItemList;
|
||||
|
||||
class SwitchRange: public CompilationResourceObj {
|
||||
private:
|
||||
|
@ -56,15 +55,12 @@ class SwitchRange: public CompilationResourceObj {
|
|||
BlockBegin* sux() const { return _sux; }
|
||||
};
|
||||
|
||||
define_array(SwitchRangeArray, SwitchRange*)
|
||||
define_stack(SwitchRangeList, SwitchRangeArray)
|
||||
|
||||
typedef GrowableArray<SwitchRange*> SwitchRangeArray;
|
||||
typedef GrowableArray<SwitchRange*> SwitchRangeList;
|
||||
|
||||
class ResolveNode;
|
||||
|
||||
define_array(NodeArray, ResolveNode*);
|
||||
define_stack(NodeList, NodeArray);
|
||||
|
||||
typedef GrowableArray<ResolveNode*> NodeList;
|
||||
|
||||
// Node objects form a directed graph of LIR_Opr
|
||||
// Edges between Nodes represent moves from one Node to its destinations
|
||||
|
@ -86,7 +82,7 @@ class ResolveNode: public CompilationResourceObj {
|
|||
// accessors
|
||||
LIR_Opr operand() const { return _operand; }
|
||||
int no_of_destinations() const { return _destinations.length(); }
|
||||
ResolveNode* destination_at(int i) { return _destinations[i]; }
|
||||
ResolveNode* destination_at(int i) { return _destinations.at(i); }
|
||||
bool assigned() const { return _assigned; }
|
||||
bool visited() const { return _visited; }
|
||||
bool start_node() const { return _start_node; }
|
||||
|
|
|
@ -496,8 +496,8 @@ void LinearScan::number_instructions() {
|
|||
}
|
||||
|
||||
// initialize with correct length
|
||||
_lir_ops = LIR_OpArray(num_instructions);
|
||||
_block_of_op = BlockBeginArray(num_instructions);
|
||||
_lir_ops = LIR_OpArray(num_instructions, num_instructions, NULL);
|
||||
_block_of_op = BlockBeginArray(num_instructions, num_instructions, NULL);
|
||||
|
||||
int op_id = 0;
|
||||
int idx = 0;
|
||||
|
@ -2507,7 +2507,8 @@ LocationValue* _illegal_value = new (ResourceObj::C_HEAP, mtCompiler) Lo
|
|||
void LinearScan::init_compute_debug_info() {
|
||||
// cache for frequently used scope values
|
||||
// (cpu registers and stack slots)
|
||||
_scope_value_cache = ScopeValueArray((LinearScan::nof_cpu_regs + frame_map()->argcount() + max_spills()) * 2, NULL);
|
||||
int cache_size = (LinearScan::nof_cpu_regs + frame_map()->argcount() + max_spills()) * 2;
|
||||
_scope_value_cache = ScopeValueArray(cache_size, cache_size, NULL);
|
||||
}
|
||||
|
||||
MonitorValue* LinearScan::location_for_monitor_index(int monitor_index) {
|
||||
|
@ -3042,7 +3043,7 @@ void LinearScan::assign_reg_num(LIR_OpList* instructions, IntervalWalker* iw) {
|
|||
insert_point++;
|
||||
}
|
||||
}
|
||||
instructions->truncate(insert_point);
|
||||
instructions->trunc_to(insert_point);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3446,7 +3447,7 @@ class RegisterVerifier: public StackObj {
|
|||
RegisterVerifier(LinearScan* allocator)
|
||||
: _allocator(allocator)
|
||||
, _work_list(16)
|
||||
, _saved_states(BlockBegin::number_of_blocks(), NULL)
|
||||
, _saved_states(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), NULL)
|
||||
{ }
|
||||
|
||||
void verify(BlockBegin* start);
|
||||
|
@ -4452,7 +4453,7 @@ Interval* Interval::split(int split_pos) {
|
|||
new_use_pos_and_kinds.append(_use_pos_and_kinds.at(i));
|
||||
}
|
||||
|
||||
_use_pos_and_kinds.truncate(start_idx + 2);
|
||||
_use_pos_and_kinds.trunc_to(start_idx + 2);
|
||||
result->_use_pos_and_kinds = _use_pos_and_kinds;
|
||||
_use_pos_and_kinds = new_use_pos_and_kinds;
|
||||
|
||||
|
@ -5540,7 +5541,7 @@ void LinearScanWalker::split_and_spill_intersecting_intervals(int reg, int regHi
|
|||
IntervalList* processed = _spill_intervals[reg];
|
||||
for (int i = 0; i < _spill_intervals[regHi]->length(); i++) {
|
||||
Interval* it = _spill_intervals[regHi]->at(i);
|
||||
if (processed->find_from_end(it) == -1) {
|
||||
if (processed->find(it) == -1) {
|
||||
remove_from_list(it);
|
||||
split_and_spill_interval(it);
|
||||
}
|
||||
|
@ -6211,7 +6212,7 @@ void ControlFlowOptimizer::delete_empty_blocks(BlockList* code) {
|
|||
_original_preds.clear();
|
||||
for (j = block->number_of_preds() - 1; j >= 0; j--) {
|
||||
BlockBegin* pred = block->pred_at(j);
|
||||
if (_original_preds.index_of(pred) == -1) {
|
||||
if (_original_preds.find(pred) == -1) {
|
||||
_original_preds.append(pred);
|
||||
}
|
||||
}
|
||||
|
@ -6231,7 +6232,7 @@ void ControlFlowOptimizer::delete_empty_blocks(BlockList* code) {
|
|||
}
|
||||
old_pos++;
|
||||
}
|
||||
code->truncate(new_pos);
|
||||
code->trunc_to(new_pos);
|
||||
|
||||
DEBUG_ONLY(verify(code));
|
||||
}
|
||||
|
@ -6256,7 +6257,7 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
|
|||
TRACE_LINEAR_SCAN(3, tty->print_cr("Deleting unconditional branch at end of block B%d", block->block_id()));
|
||||
|
||||
// delete last branch instruction
|
||||
instructions->truncate(instructions->length() - 1);
|
||||
instructions->trunc_to(instructions->length() - 1);
|
||||
|
||||
} else {
|
||||
LIR_Op* prev_op = instructions->at(instructions->length() - 2);
|
||||
|
@ -6295,7 +6296,7 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
|
|||
prev_branch->change_block(last_branch->block());
|
||||
prev_branch->negate_cond();
|
||||
prev_cmp->set_condition(prev_branch->cond());
|
||||
instructions->truncate(instructions->length() - 1);
|
||||
instructions->trunc_to(instructions->length() - 1);
|
||||
// if we do change the condition, we have to change the cmove as well
|
||||
if (prev_cmove != NULL) {
|
||||
prev_cmove->set_condition(prev_branch->cond());
|
||||
|
@ -6378,19 +6379,19 @@ void ControlFlowOptimizer::verify(BlockList* code) {
|
|||
LIR_OpBranch* op_branch = instructions->at(j)->as_OpBranch();
|
||||
|
||||
if (op_branch != NULL) {
|
||||
assert(op_branch->block() == NULL || code->index_of(op_branch->block()) != -1, "branch target not valid");
|
||||
assert(op_branch->ublock() == NULL || code->index_of(op_branch->ublock()) != -1, "branch target not valid");
|
||||
assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid");
|
||||
assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid");
|
||||
}
|
||||
}
|
||||
|
||||
for (j = 0; j < block->number_of_sux() - 1; j++) {
|
||||
BlockBegin* sux = block->sux_at(j);
|
||||
assert(code->index_of(sux) != -1, "successor not valid");
|
||||
assert(code->find(sux) != -1, "successor not valid");
|
||||
}
|
||||
|
||||
for (j = 0; j < block->number_of_preds() - 1; j++) {
|
||||
BlockBegin* pred = block->pred_at(j);
|
||||
assert(code->index_of(pred) != -1, "successor not valid");
|
||||
assert(code->find(pred) != -1, "successor not valid");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,18 +44,9 @@ class Range;
|
|||
|
||||
typedef GrowableArray<Interval*> IntervalArray;
|
||||
typedef GrowableArray<Interval*> IntervalList;
|
||||
|
||||
define_array(IntervalsArray, IntervalList*)
|
||||
define_stack(IntervalsList, IntervalsArray)
|
||||
|
||||
define_array(OopMapArray, OopMap*)
|
||||
define_stack(OopMapList, OopMapArray)
|
||||
|
||||
define_array(ScopeValueArray, ScopeValue*)
|
||||
|
||||
define_array(LIR_OpListArray, LIR_OpList*);
|
||||
define_stack(LIR_OpListStack, LIR_OpListArray);
|
||||
|
||||
typedef GrowableArray<IntervalList*> IntervalsList;
|
||||
typedef GrowableArray<ScopeValue*> ScopeValueArray;
|
||||
typedef GrowableArray<LIR_OpList*> LIR_OpListStack;
|
||||
|
||||
enum IntervalUseKind {
|
||||
// priority of use kinds must be ascending
|
||||
|
@ -67,9 +58,6 @@ enum IntervalUseKind {
|
|||
firstValidKind = 1,
|
||||
lastValidKind = 3
|
||||
};
|
||||
define_array(UseKindArray, IntervalUseKind)
|
||||
define_stack(UseKindStack, UseKindArray)
|
||||
|
||||
|
||||
enum IntervalKind {
|
||||
fixedKind = 0, // interval pre-colored by LIR_Generator
|
||||
|
@ -619,7 +607,7 @@ class Interval : public CompilationResourceObj {
|
|||
void add_range(int from, int to);
|
||||
Interval* split(int split_pos);
|
||||
Interval* split_from_start(int split_pos);
|
||||
void remove_first_use_pos() { _use_pos_and_kinds.truncate(_use_pos_and_kinds.length() - 2); }
|
||||
void remove_first_use_pos() { _use_pos_and_kinds.trunc_to(_use_pos_and_kinds.length() - 2); }
|
||||
|
||||
// test intersection
|
||||
bool covers(int op_id, LIR_OpVisitState::OprMode mode) const;
|
||||
|
|
|
@ -32,9 +32,7 @@
|
|||
#include "utilities/bitMap.inline.hpp"
|
||||
#include "compiler/compileLog.hpp"
|
||||
|
||||
define_array(ValueSetArray, ValueSet*);
|
||||
define_stack(ValueSetList, ValueSetArray);
|
||||
|
||||
typedef GrowableArray<ValueSet*> ValueSetList;
|
||||
|
||||
Optimizer::Optimizer(IR* ir) {
|
||||
assert(ir->is_valid(), "IR must be valid");
|
||||
|
@ -584,8 +582,8 @@ class NullCheckEliminator: public ValueVisitor {
|
|||
|
||||
ValueSet* state() { return _set; }
|
||||
void set_state_from (ValueSet* state) { _set->set_from(state); }
|
||||
ValueSet* state_for (BlockBegin* block) { return _block_states[block->block_id()]; }
|
||||
void set_state_for (BlockBegin* block, ValueSet* stack) { _block_states[block->block_id()] = stack; }
|
||||
ValueSet* state_for (BlockBegin* block) { return _block_states.at(block->block_id()); }
|
||||
void set_state_for (BlockBegin* block, ValueSet* stack) { _block_states.at_put(block->block_id(), stack); }
|
||||
// Returns true if caused a change in the block's state.
|
||||
bool merge_state_for(BlockBegin* block,
|
||||
ValueSet* incoming_state);
|
||||
|
@ -596,7 +594,7 @@ class NullCheckEliminator: public ValueVisitor {
|
|||
: _opt(opt)
|
||||
, _set(new ValueSet())
|
||||
, _last_explicit_null_check(NULL)
|
||||
, _block_states(BlockBegin::number_of_blocks(), NULL)
|
||||
, _block_states(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), NULL)
|
||||
, _work_list(new BlockList()) {
|
||||
_visitable_instructions = new ValueSet();
|
||||
_visitor.set_eliminator(this);
|
||||
|
@ -1165,19 +1163,19 @@ void Optimizer::eliminate_null_checks() {
|
|||
// handlers and iterate over them as well
|
||||
int nblocks = BlockBegin::number_of_blocks();
|
||||
BlockList blocks(nblocks);
|
||||
boolArray visited_block(nblocks, false);
|
||||
boolArray visited_block(nblocks, nblocks, false);
|
||||
|
||||
blocks.push(ir()->start());
|
||||
visited_block[ir()->start()->block_id()] = true;
|
||||
visited_block.at_put(ir()->start()->block_id(), true);
|
||||
for (int i = 0; i < blocks.length(); i++) {
|
||||
BlockBegin* b = blocks[i];
|
||||
BlockBegin* b = blocks.at(i);
|
||||
// exception handlers need to be treated as additional roots
|
||||
for (int e = b->number_of_exception_handlers(); e-- > 0; ) {
|
||||
BlockBegin* excp = b->exception_handler_at(e);
|
||||
int id = excp->block_id();
|
||||
if (!visited_block[id]) {
|
||||
if (!visited_block.at(id)) {
|
||||
blocks.push(excp);
|
||||
visited_block[id] = true;
|
||||
visited_block.at_put(id, true);
|
||||
nce.iterate(excp);
|
||||
}
|
||||
}
|
||||
|
@ -1186,9 +1184,9 @@ void Optimizer::eliminate_null_checks() {
|
|||
for (int s = end->number_of_sux(); s-- > 0; ) {
|
||||
BlockBegin* next = end->sux_at(s);
|
||||
int id = next->block_id();
|
||||
if (!visited_block[id]) {
|
||||
if (!visited_block.at(id)) {
|
||||
blocks.push(next);
|
||||
visited_block[id] = true;
|
||||
visited_block.at_put(id, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -53,8 +53,8 @@ void RangeCheckElimination::eliminate(IR *ir) {
|
|||
|
||||
// Constructor
|
||||
RangeCheckEliminator::RangeCheckEliminator(IR *ir) :
|
||||
_bounds(Instruction::number_of_instructions(), NULL),
|
||||
_access_indexed_info(Instruction::number_of_instructions(), NULL)
|
||||
_bounds(Instruction::number_of_instructions(), Instruction::number_of_instructions(), NULL),
|
||||
_access_indexed_info(Instruction::number_of_instructions(), Instruction::number_of_instructions(), NULL)
|
||||
{
|
||||
_visitor.set_range_check_eliminator(this);
|
||||
_ir = ir;
|
||||
|
@ -303,28 +303,28 @@ RangeCheckEliminator::Bound *RangeCheckEliminator::get_bound(Value v) {
|
|||
// Wrong type or NULL -> No bound
|
||||
if (!v || (!v->type()->as_IntType() && !v->type()->as_ObjectType())) return NULL;
|
||||
|
||||
if (!_bounds[v->id()]) {
|
||||
if (!_bounds.at(v->id())) {
|
||||
// First (default) bound is calculated
|
||||
// Create BoundStack
|
||||
_bounds[v->id()] = new BoundStack();
|
||||
_bounds.at_put(v->id(), new BoundStack());
|
||||
_visitor.clear_bound();
|
||||
Value visit_value = v;
|
||||
visit_value->visit(&_visitor);
|
||||
Bound *bound = _visitor.bound();
|
||||
if (bound) {
|
||||
_bounds[v->id()]->push(bound);
|
||||
_bounds.at(v->id())->push(bound);
|
||||
}
|
||||
if (_bounds[v->id()]->length() == 0) {
|
||||
if (_bounds.at(v->id())->length() == 0) {
|
||||
assert(!(v->as_Constant() && v->type()->as_IntConstant()), "constants not handled here");
|
||||
_bounds[v->id()]->push(new Bound());
|
||||
_bounds.at(v->id())->push(new Bound());
|
||||
}
|
||||
} else if (_bounds[v->id()]->length() == 0) {
|
||||
} else if (_bounds.at(v->id())->length() == 0) {
|
||||
// To avoid endless loops, bound is currently in calculation -> nothing known about it
|
||||
return new Bound();
|
||||
}
|
||||
|
||||
// Return bound
|
||||
return _bounds[v->id()]->top();
|
||||
return _bounds.at(v->id())->top();
|
||||
}
|
||||
|
||||
// Update bound
|
||||
|
@ -353,28 +353,28 @@ void RangeCheckEliminator::update_bound(IntegerStack &pushed, Value v, Bound *bo
|
|||
// No bound update for constants
|
||||
return;
|
||||
}
|
||||
if (!_bounds[v->id()]) {
|
||||
if (!_bounds.at(v->id())) {
|
||||
get_bound(v);
|
||||
assert(_bounds[v->id()], "Now Stack must exist");
|
||||
assert(_bounds.at(v->id()), "Now Stack must exist");
|
||||
}
|
||||
Bound *top = NULL;
|
||||
if (_bounds[v->id()]->length() > 0) {
|
||||
top = _bounds[v->id()]->top();
|
||||
if (_bounds.at(v->id())->length() > 0) {
|
||||
top = _bounds.at(v->id())->top();
|
||||
}
|
||||
if (top) {
|
||||
bound->and_op(top);
|
||||
}
|
||||
_bounds[v->id()]->push(bound);
|
||||
_bounds.at(v->id())->push(bound);
|
||||
pushed.append(v->id());
|
||||
}
|
||||
|
||||
// Add instruction + idx for in block motion
|
||||
void RangeCheckEliminator::add_access_indexed_info(InstructionList &indices, int idx, Value instruction, AccessIndexed *ai) {
|
||||
int id = instruction->id();
|
||||
AccessIndexedInfo *aii = _access_indexed_info[id];
|
||||
AccessIndexedInfo *aii = _access_indexed_info.at(id);
|
||||
if (aii == NULL) {
|
||||
aii = new AccessIndexedInfo();
|
||||
_access_indexed_info[id] = aii;
|
||||
_access_indexed_info.at_put(id, aii);
|
||||
indices.append(instruction);
|
||||
aii->_min = idx;
|
||||
aii->_max = idx;
|
||||
|
@ -461,7 +461,7 @@ void RangeCheckEliminator::in_block_motion(BlockBegin *block, AccessIndexedList
|
|||
if (_optimistic) {
|
||||
for (int i = 0; i < indices.length(); i++) {
|
||||
Instruction *index_instruction = indices.at(i);
|
||||
AccessIndexedInfo *info = _access_indexed_info[index_instruction->id()];
|
||||
AccessIndexedInfo *info = _access_indexed_info.at(index_instruction->id());
|
||||
assert(info != NULL, "Info must not be null");
|
||||
|
||||
// if idx < 0, max > 0, max + idx may fall between 0 and
|
||||
|
@ -562,7 +562,7 @@ void RangeCheckEliminator::in_block_motion(BlockBegin *block, AccessIndexedList
|
|||
// Clear data structures for next array
|
||||
for (int i = 0; i < indices.length(); i++) {
|
||||
Instruction *index_instruction = indices.at(i);
|
||||
_access_indexed_info[index_instruction->id()] = NULL;
|
||||
_access_indexed_info.at_put(index_instruction->id(), NULL);
|
||||
}
|
||||
indices.clear();
|
||||
}
|
||||
|
@ -1005,7 +1005,7 @@ void RangeCheckEliminator::calc_bounds(BlockBegin *block, BlockBegin *loop_heade
|
|||
|
||||
// Reset stack
|
||||
for (int i=0; i<pushed.length(); i++) {
|
||||
_bounds[pushed[i]]->pop();
|
||||
_bounds.at(pushed.at(i))->pop();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1051,7 +1051,7 @@ void RangeCheckEliminator::dump_condition_stack(BlockBegin *block) {
|
|||
#endif
|
||||
|
||||
// Verification or the IR
|
||||
RangeCheckEliminator::Verification::Verification(IR *ir) : _used(BlockBegin::number_of_blocks(), false) {
|
||||
RangeCheckEliminator::Verification::Verification(IR *ir) : _used(BlockBegin::number_of_blocks(), BlockBegin::number_of_blocks(), false) {
|
||||
this->_ir = ir;
|
||||
ir->iterate_linear_scan_order(this);
|
||||
}
|
||||
|
@ -1147,13 +1147,13 @@ bool RangeCheckEliminator::Verification::can_reach(BlockBegin *start, BlockBegin
|
|||
// Simple BSF from start to end
|
||||
// BlockBeginList _current;
|
||||
for (int i=0; i < _used.length(); i++) {
|
||||
_used[i] = false;
|
||||
_used.at_put(i, false);
|
||||
}
|
||||
_current.truncate(0);
|
||||
_successors.truncate(0);
|
||||
_current.trunc_to(0);
|
||||
_successors.trunc_to(0);
|
||||
if (start != dont_use) {
|
||||
_current.push(start);
|
||||
_used[start->block_id()] = true;
|
||||
_used.at_put(start->block_id(), true);
|
||||
}
|
||||
|
||||
// BlockBeginList _successors;
|
||||
|
@ -1180,17 +1180,17 @@ bool RangeCheckEliminator::Verification::can_reach(BlockBegin *start, BlockBegin
|
|||
}
|
||||
}
|
||||
for (int i=0; i<_successors.length(); i++) {
|
||||
BlockBegin *sux = _successors[i];
|
||||
BlockBegin *sux = _successors.at(i);
|
||||
assert(sux != NULL, "Successor must not be NULL!");
|
||||
if (sux == end) {
|
||||
return true;
|
||||
}
|
||||
if (sux != dont_use && !_used[sux->block_id()]) {
|
||||
_used[sux->block_id()] = true;
|
||||
if (sux != dont_use && !_used.at(sux->block_id())) {
|
||||
_used.at_put(sux->block_id(), true);
|
||||
_current.push(sux);
|
||||
}
|
||||
}
|
||||
_successors.truncate(0);
|
||||
_successors.trunc_to(0);
|
||||
}
|
||||
|
||||
return false;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -40,10 +40,8 @@ private:
|
|||
bool _optimistic; // Insert predicates and deoptimize when they fail
|
||||
IR *_ir;
|
||||
|
||||
define_array(BlockBeginArray, BlockBegin*)
|
||||
define_stack(BlockBeginList, BlockBeginArray)
|
||||
define_stack(IntegerStack, intArray)
|
||||
define_array(IntegerMap, IntegerStack*)
|
||||
typedef GrowableArray<BlockBegin*> BlockBeginList;
|
||||
typedef GrowableArray<int> IntegerStack;
|
||||
|
||||
class Verification : public BlockClosure {
|
||||
// RangeCheckEliminator::Verification should never get instatiated on the heap.
|
||||
|
@ -180,13 +178,10 @@ public:
|
|||
void add_assertions(Bound *bound, Instruction *instruction, Instruction *position);
|
||||
#endif
|
||||
|
||||
define_array(BoundArray, Bound *)
|
||||
define_stack(BoundStack, BoundArray)
|
||||
define_array(BoundMap, BoundStack *)
|
||||
define_array(AccessIndexedArray, AccessIndexed *)
|
||||
define_stack(AccessIndexedList, AccessIndexedArray)
|
||||
define_array(InstructionArray, Instruction *)
|
||||
define_stack(InstructionList, InstructionArray)
|
||||
typedef GrowableArray<Bound*> BoundStack;
|
||||
typedef GrowableArray<BoundStack*> BoundMap;
|
||||
typedef GrowableArray<AccessIndexed*> AccessIndexedList;
|
||||
typedef GrowableArray<Instruction*> InstructionList;
|
||||
|
||||
class AccessIndexedInfo : public CompilationResourceObj {
|
||||
public:
|
||||
|
@ -195,7 +190,7 @@ public:
|
|||
int _max;
|
||||
};
|
||||
|
||||
define_array(AccessIndexedInfoArray, AccessIndexedInfo *)
|
||||
typedef GrowableArray<AccessIndexedInfo*> AccessIndexedInfoArray;
|
||||
BoundMap _bounds; // Mapping from Instruction's id to current bound
|
||||
AccessIndexedInfoArray _access_indexed_info; // Mapping from Instruction's id to AccessIndexedInfo for in block motion
|
||||
Visitor _visitor;
|
||||
|
|
|
@ -320,9 +320,11 @@ const char* Runtime1::name_for_address(address entry) {
|
|||
FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
|
||||
FUNCTION_CASE(entry, StubRoutines::dexp());
|
||||
FUNCTION_CASE(entry, StubRoutines::dlog());
|
||||
FUNCTION_CASE(entry, StubRoutines::dlog10());
|
||||
FUNCTION_CASE(entry, StubRoutines::dpow());
|
||||
FUNCTION_CASE(entry, StubRoutines::dsin());
|
||||
FUNCTION_CASE(entry, StubRoutines::dcos());
|
||||
FUNCTION_CASE(entry, StubRoutines::dtan());
|
||||
|
||||
#undef FUNCTION_CASE
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -46,7 +46,7 @@
|
|||
|
||||
ValueMap::ValueMap()
|
||||
: _nesting(0)
|
||||
, _entries(ValueMapInitialSize, NULL)
|
||||
, _entries(ValueMapInitialSize, ValueMapInitialSize, NULL)
|
||||
, _killed_values()
|
||||
, _entry_count(0)
|
||||
{
|
||||
|
@ -56,7 +56,7 @@ ValueMap::ValueMap()
|
|||
|
||||
ValueMap::ValueMap(ValueMap* old)
|
||||
: _nesting(old->_nesting + 1)
|
||||
, _entries(old->_entries.length())
|
||||
, _entries(old->_entries.length(), old->_entries.length(), NULL)
|
||||
, _killed_values()
|
||||
, _entry_count(old->_entry_count)
|
||||
{
|
||||
|
@ -72,7 +72,7 @@ void ValueMap::increase_table_size() {
|
|||
int new_size = old_size * 2 + 1;
|
||||
|
||||
ValueMapEntryList worklist(8);
|
||||
ValueMapEntryArray new_entries(new_size, NULL);
|
||||
ValueMapEntryArray new_entries(new_size, new_size, NULL);
|
||||
int new_entry_count = 0;
|
||||
|
||||
TRACE_VALUE_NUMBERING(tty->print_cr("increasing table size from %d to %d", old_size, new_size));
|
||||
|
@ -486,7 +486,7 @@ bool ShortLoopOptimizer::process(BlockBegin* loop_header) {
|
|||
|
||||
GlobalValueNumbering::GlobalValueNumbering(IR* ir)
|
||||
: _current_map(NULL)
|
||||
, _value_maps(ir->linear_scan_order()->length(), NULL)
|
||||
, _value_maps(ir->linear_scan_order()->length(), ir->linear_scan_order()->length(), NULL)
|
||||
, _compilation(ir->compilation())
|
||||
{
|
||||
TRACE_VALUE_NUMBERING(tty->print_cr("****** start of global value numbering"));
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -53,8 +53,8 @@ class ValueMapEntry: public CompilationResourceObj {
|
|||
void set_next(ValueMapEntry* next) { _next = next; }
|
||||
};
|
||||
|
||||
define_array(ValueMapEntryArray, ValueMapEntry*)
|
||||
define_stack(ValueMapEntryList, ValueMapEntryArray)
|
||||
typedef GrowableArray<ValueMapEntry*> ValueMapEntryArray;
|
||||
typedef GrowableArray<ValueMapEntry*> ValueMapEntryList;
|
||||
|
||||
// ValueMap implements nested hash tables for value numbering. It
|
||||
// maintains a set _killed_values which represents the instructions
|
||||
|
@ -129,8 +129,7 @@ class ValueMap: public CompilationResourceObj {
|
|||
#endif
|
||||
};
|
||||
|
||||
define_array(ValueMapArray, ValueMap*)
|
||||
|
||||
typedef GrowableArray<ValueMap*> ValueMapArray;
|
||||
|
||||
class ValueNumberingVisitor: public InstructionVisitor {
|
||||
protected:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -35,7 +35,7 @@ ValueStack::ValueStack(IRScope* scope, ValueStack* caller_state)
|
|||
, _caller_state(caller_state)
|
||||
, _bci(-99)
|
||||
, _kind(Parsing)
|
||||
, _locals(scope->method()->max_locals(), NULL)
|
||||
, _locals(scope->method()->max_locals(), scope->method()->max_locals(), NULL)
|
||||
, _stack(scope->method()->max_stack())
|
||||
, _locks()
|
||||
{
|
||||
|
@ -178,7 +178,7 @@ void ValueStack::setup_phi_for_stack(BlockBegin* b, int index) {
|
|||
|
||||
ValueType* t = stack_at(index)->type();
|
||||
Value phi = new Phi(t, b, -index - 1);
|
||||
_stack[index] = phi;
|
||||
_stack.at_put(index, phi);
|
||||
|
||||
assert(!t->is_double_word() || _stack.at(index + 1) == NULL, "hi-word of doubleword value must be NULL");
|
||||
}
|
||||
|
@ -225,7 +225,7 @@ void ValueStack::print() {
|
|||
if (locals_size() > 0) {
|
||||
InstructionPrinter ip;
|
||||
for (int i = 0; i < locals_size();) {
|
||||
Value l = _locals[i];
|
||||
Value l = _locals.at(i);
|
||||
tty->print("local %d ", i);
|
||||
if (l == NULL) {
|
||||
tty->print("null");
|
||||
|
|
|
@ -124,6 +124,9 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
bool is_valid() const {
|
||||
return basic_type() != T_ILLEGAL;
|
||||
}
|
||||
// Debugging output
|
||||
void print();
|
||||
};
|
||||
|
|
|
@ -235,29 +235,16 @@ void ciField::initialize_from(fieldDescriptor* fd) {
|
|||
_holder = CURRENT_ENV->get_instance_klass(fd->field_holder());
|
||||
|
||||
// Check to see if the field is constant.
|
||||
bool is_final = this->is_final();
|
||||
bool is_stable = FoldStableValues && this->is_stable();
|
||||
if (_holder->is_initialized() && (is_final || is_stable)) {
|
||||
if (!this->is_static()) {
|
||||
// A field can be constant if it's a final static field or if
|
||||
// it's a final non-static field of a trusted class (classes in
|
||||
// java.lang.invoke and sun.invoke packages and subpackages).
|
||||
if (is_stable || trust_final_non_static_fields(_holder)) {
|
||||
_is_constant = true;
|
||||
return;
|
||||
}
|
||||
_is_constant = false;
|
||||
return;
|
||||
}
|
||||
|
||||
Klass* k = _holder->get_Klass();
|
||||
bool is_stable_field = FoldStableValues && is_stable();
|
||||
if (is_final() || is_stable_field) {
|
||||
if (is_static()) {
|
||||
// This field just may be constant. The only case where it will
|
||||
// not be constant is when the field is a *special* static & final field
|
||||
// whose value may change. The three examples are java.lang.System.in,
|
||||
// java.lang.System.out, and java.lang.System.err.
|
||||
|
||||
KlassHandle k = _holder->get_Klass();
|
||||
assert(SystemDictionary::System_klass() != NULL, "Check once per vm");
|
||||
if( k() == SystemDictionary::System_klass() ) {
|
||||
if (k == SystemDictionary::System_klass()) {
|
||||
// Check offsets for case 2: System.in, System.out, or System.err
|
||||
if( _offset == java_lang_System::in_offset_in_bytes() ||
|
||||
_offset == java_lang_System::out_offset_in_bytes() ||
|
||||
|
@ -266,64 +253,58 @@ void ciField::initialize_from(fieldDescriptor* fd) {
|
|||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Handle mirror = k->java_mirror();
|
||||
|
||||
switch(type()->basic_type()) {
|
||||
case T_BYTE:
|
||||
_constant_value = ciConstant(type()->basic_type(), mirror->byte_field(_offset));
|
||||
break;
|
||||
case T_CHAR:
|
||||
_constant_value = ciConstant(type()->basic_type(), mirror->char_field(_offset));
|
||||
break;
|
||||
case T_SHORT:
|
||||
_constant_value = ciConstant(type()->basic_type(), mirror->short_field(_offset));
|
||||
break;
|
||||
case T_BOOLEAN:
|
||||
_constant_value = ciConstant(type()->basic_type(), mirror->bool_field(_offset));
|
||||
break;
|
||||
case T_INT:
|
||||
_constant_value = ciConstant(type()->basic_type(), mirror->int_field(_offset));
|
||||
break;
|
||||
case T_FLOAT:
|
||||
_constant_value = ciConstant(mirror->float_field(_offset));
|
||||
break;
|
||||
case T_DOUBLE:
|
||||
_constant_value = ciConstant(mirror->double_field(_offset));
|
||||
break;
|
||||
case T_LONG:
|
||||
_constant_value = ciConstant(mirror->long_field(_offset));
|
||||
break;
|
||||
case T_OBJECT:
|
||||
case T_ARRAY:
|
||||
{
|
||||
oop o = mirror->obj_field(_offset);
|
||||
|
||||
// A field will be "constant" if it is known always to be
|
||||
// a non-null reference to an instance of a particular class,
|
||||
// or to a particular array. This can happen even if the instance
|
||||
// or array is not perm. In such a case, an "unloaded" ciArray
|
||||
// or ciInstance is created. The compiler may be able to use
|
||||
// information about the object's class (which is exact) or length.
|
||||
|
||||
if (o == NULL) {
|
||||
_constant_value = ciConstant(type()->basic_type(), ciNullObject::make());
|
||||
} else {
|
||||
_constant_value = ciConstant(type()->basic_type(), CURRENT_ENV->get_object(o));
|
||||
assert(_constant_value.as_object() == CURRENT_ENV->get_object(o), "check interning");
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_stable && _constant_value.is_null_or_zero()) {
|
||||
// It is not a constant after all; treat it as uninitialized.
|
||||
_is_constant = false;
|
||||
} else {
|
||||
_is_constant = true;
|
||||
} else {
|
||||
// An instance field can be constant if it's a final static field or if
|
||||
// it's a final non-static field of a trusted class (classes in
|
||||
// java.lang.invoke and sun.invoke packages and subpackages).
|
||||
_is_constant = is_stable_field || trust_final_non_static_fields(_holder);
|
||||
}
|
||||
} else {
|
||||
// For CallSite objects treat the target field as a compile time constant.
|
||||
assert(SystemDictionary::CallSite_klass() != NULL, "should be already initialized");
|
||||
if (k == SystemDictionary::CallSite_klass() &&
|
||||
_offset == java_lang_invoke_CallSite::target_offset_in_bytes()) {
|
||||
_is_constant = true;
|
||||
} else {
|
||||
// Non-final & non-stable fields are not constants.
|
||||
_is_constant = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// ciField::constant_value
|
||||
// Get the constant value of a this static field.
|
||||
ciConstant ciField::constant_value() {
|
||||
assert(is_static() && is_constant(), "illegal call to constant_value()");
|
||||
if (!_holder->is_initialized()) {
|
||||
return ciConstant(); // Not initialized yet
|
||||
}
|
||||
if (_constant_value.basic_type() == T_ILLEGAL) {
|
||||
// Static fields are placed in mirror objects.
|
||||
VM_ENTRY_MARK;
|
||||
ciInstance* mirror = CURRENT_ENV->get_instance(_holder->get_Klass()->java_mirror());
|
||||
_constant_value = mirror->field_value_impl(type()->basic_type(), offset());
|
||||
}
|
||||
if (FoldStableValues && is_stable() && _constant_value.is_null_or_zero()) {
|
||||
return ciConstant();
|
||||
}
|
||||
return _constant_value;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// ciField::constant_value_of
|
||||
// Get the constant value of non-static final field in the given object.
|
||||
ciConstant ciField::constant_value_of(ciObject* object) {
|
||||
assert(!is_static() && is_constant(), "only if field is non-static constant");
|
||||
assert(object->is_instance(), "must be instance");
|
||||
ciConstant field_value = object->as_instance()->field_value(this);
|
||||
if (FoldStableValues && is_stable() && field_value.is_null_or_zero()) {
|
||||
return ciConstant();
|
||||
}
|
||||
return field_value;
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// ciField::compute_type
|
||||
|
|
|
@ -62,7 +62,7 @@ private:
|
|||
void initialize_from(fieldDescriptor* fd);
|
||||
|
||||
public:
|
||||
ciFlags flags() { return _flags; }
|
||||
ciFlags flags() const { return _flags; }
|
||||
|
||||
// Of which klass is this field a member?
|
||||
//
|
||||
|
@ -89,13 +89,13 @@ public:
|
|||
//
|
||||
// In that case the declared holder of f would be B and
|
||||
// the canonical holder of f would be A.
|
||||
ciInstanceKlass* holder() { return _holder; }
|
||||
ciInstanceKlass* holder() const { return _holder; }
|
||||
|
||||
// Name of this field?
|
||||
ciSymbol* name() { return _name; }
|
||||
ciSymbol* name() const { return _name; }
|
||||
|
||||
// Signature of this field?
|
||||
ciSymbol* signature() { return _signature; }
|
||||
ciSymbol* signature() const { return _signature; }
|
||||
|
||||
// Of what type is this field?
|
||||
ciType* type() { return (_type == NULL) ? compute_type() : _type; }
|
||||
|
@ -107,13 +107,13 @@ public:
|
|||
int size_in_bytes() { return type2aelembytes(layout_type()); }
|
||||
|
||||
// What is the offset of this field?
|
||||
int offset() {
|
||||
int offset() const {
|
||||
assert(_offset >= 1, "illegal call to offset()");
|
||||
return _offset;
|
||||
}
|
||||
|
||||
// Same question, explicit units. (Fields are aligned to the byte level.)
|
||||
int offset_in_bytes() {
|
||||
int offset_in_bytes() const {
|
||||
return offset();
|
||||
}
|
||||
|
||||
|
@ -127,31 +127,27 @@ public:
|
|||
//
|
||||
// Clarification: A field is considered constant if:
|
||||
// 1. The field is both static and final
|
||||
// 2. The canonical holder of the field has undergone
|
||||
// static initialization.
|
||||
// 3. The field is not one of the special static/final
|
||||
// 2. The field is not one of the special static/final
|
||||
// non-constant fields. These are java.lang.System.in
|
||||
// and java.lang.System.out. Abomination.
|
||||
//
|
||||
// A field is also considered constant if it is marked @Stable
|
||||
// and is non-null (or non-zero, if a primitive).
|
||||
// For non-static fields, the null/zero check must be
|
||||
// arranged by the user, as constant_value().is_null_or_zero().
|
||||
bool is_constant() { return _is_constant; }
|
||||
//
|
||||
// A user should also check the field value (constant_value().is_valid()), since
|
||||
// constant fields of non-initialized classes don't have values yet.
|
||||
bool is_constant() const { return _is_constant; }
|
||||
|
||||
// Get the constant value of this field.
|
||||
ciConstant constant_value() {
|
||||
assert(is_static() && is_constant(), "illegal call to constant_value()");
|
||||
return _constant_value;
|
||||
// Get the constant value of the static field.
|
||||
ciConstant constant_value();
|
||||
|
||||
bool is_static_constant() {
|
||||
return is_static() && is_constant() && constant_value().is_valid();
|
||||
}
|
||||
|
||||
// Get the constant value of non-static final field in the given
|
||||
// object.
|
||||
ciConstant constant_value_of(ciObject* object) {
|
||||
assert(!is_static() && is_constant(), "only if field is non-static constant");
|
||||
assert(object->is_instance(), "must be instance");
|
||||
return object->as_instance()->field_value(this);
|
||||
}
|
||||
ciConstant constant_value_of(ciObject* object);
|
||||
|
||||
// Check for link time errors. Accessing a field from a
|
||||
// certain class via a certain bytecode may or may not be legal.
|
||||
|
@ -165,14 +161,14 @@ public:
|
|||
Bytecodes::Code bc);
|
||||
|
||||
// Java access flags
|
||||
bool is_public () { return flags().is_public(); }
|
||||
bool is_private () { return flags().is_private(); }
|
||||
bool is_protected () { return flags().is_protected(); }
|
||||
bool is_static () { return flags().is_static(); }
|
||||
bool is_final () { return flags().is_final(); }
|
||||
bool is_stable () { return flags().is_stable(); }
|
||||
bool is_volatile () { return flags().is_volatile(); }
|
||||
bool is_transient () { return flags().is_transient(); }
|
||||
bool is_public () const { return flags().is_public(); }
|
||||
bool is_private () const { return flags().is_private(); }
|
||||
bool is_protected () const { return flags().is_protected(); }
|
||||
bool is_static () const { return flags().is_static(); }
|
||||
bool is_final () const { return flags().is_final(); }
|
||||
bool is_stable () const { return flags().is_stable(); }
|
||||
bool is_volatile () const { return flags().is_volatile(); }
|
||||
bool is_transient () const { return flags().is_transient(); }
|
||||
|
||||
bool is_call_site_target() {
|
||||
ciInstanceKlass* callsite_klass = CURRENT_ENV->CallSite_klass();
|
||||
|
|
|
@ -56,49 +56,21 @@ ciType* ciInstance::java_mirror_type() {
|
|||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// ciInstance::field_value
|
||||
//
|
||||
// Constant value of a field.
|
||||
ciConstant ciInstance::field_value(ciField* field) {
|
||||
assert(is_loaded(), "invalid access - must be loaded");
|
||||
assert(field->holder()->is_loaded(), "invalid access - holder must be loaded");
|
||||
assert(klass()->is_subclass_of(field->holder()), "invalid access - must be subclass");
|
||||
|
||||
VM_ENTRY_MARK;
|
||||
ciConstant result;
|
||||
// ciInstance::field_value_impl
|
||||
ciConstant ciInstance::field_value_impl(BasicType field_btype, int offset) {
|
||||
Handle obj = get_oop();
|
||||
assert(!obj.is_null(), "bad oop");
|
||||
BasicType field_btype = field->type()->basic_type();
|
||||
int offset = field->offset();
|
||||
|
||||
switch(field_btype) {
|
||||
case T_BYTE:
|
||||
return ciConstant(field_btype, obj->byte_field(offset));
|
||||
break;
|
||||
case T_CHAR:
|
||||
return ciConstant(field_btype, obj->char_field(offset));
|
||||
break;
|
||||
case T_SHORT:
|
||||
return ciConstant(field_btype, obj->short_field(offset));
|
||||
break;
|
||||
case T_BOOLEAN:
|
||||
return ciConstant(field_btype, obj->bool_field(offset));
|
||||
break;
|
||||
case T_INT:
|
||||
return ciConstant(field_btype, obj->int_field(offset));
|
||||
break;
|
||||
case T_FLOAT:
|
||||
return ciConstant(obj->float_field(offset));
|
||||
break;
|
||||
case T_DOUBLE:
|
||||
return ciConstant(obj->double_field(offset));
|
||||
break;
|
||||
case T_LONG:
|
||||
return ciConstant(obj->long_field(offset));
|
||||
break;
|
||||
case T_OBJECT:
|
||||
case T_ARRAY:
|
||||
{
|
||||
case T_BYTE: return ciConstant(field_btype, obj->byte_field(offset));
|
||||
case T_CHAR: return ciConstant(field_btype, obj->char_field(offset));
|
||||
case T_SHORT: return ciConstant(field_btype, obj->short_field(offset));
|
||||
case T_BOOLEAN: return ciConstant(field_btype, obj->bool_field(offset));
|
||||
case T_INT: return ciConstant(field_btype, obj->int_field(offset));
|
||||
case T_FLOAT: return ciConstant(obj->float_field(offset));
|
||||
case T_DOUBLE: return ciConstant(obj->double_field(offset));
|
||||
case T_LONG: return ciConstant(obj->long_field(offset));
|
||||
case T_OBJECT: // fall through
|
||||
case T_ARRAY: {
|
||||
oop o = obj->obj_field(offset);
|
||||
|
||||
// A field will be "constant" if it is known always to be
|
||||
|
@ -115,11 +87,22 @@ ciConstant ciInstance::field_value(ciField* field) {
|
|||
}
|
||||
}
|
||||
}
|
||||
ShouldNotReachHere();
|
||||
// to shut up the compiler
|
||||
fatal("no field value: %s", type2name(field_btype));
|
||||
return ciConstant();
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// ciInstance::field_value
|
||||
//
|
||||
// Constant value of a field.
|
||||
ciConstant ciInstance::field_value(ciField* field) {
|
||||
assert(is_loaded(), "invalid access - must be loaded");
|
||||
assert(field->holder()->is_loaded(), "invalid access - holder must be loaded");
|
||||
assert(field->is_static() || klass()->is_subclass_of(field->holder()), "invalid access - must be subclass");
|
||||
|
||||
GUARDED_VM_ENTRY(return field_value_impl(field->type()->basic_type(), field->offset());)
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// ciInstance::field_value_by_offset
|
||||
//
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
// instance of java.lang.Object.
|
||||
class ciInstance : public ciObject {
|
||||
CI_PACKAGE_ACCESS
|
||||
friend class ciField;
|
||||
|
||||
protected:
|
||||
ciInstance(instanceHandle h_i) : ciObject(h_i) {
|
||||
|
@ -50,6 +51,8 @@ protected:
|
|||
|
||||
void print_impl(outputStream* st);
|
||||
|
||||
ciConstant field_value_impl(BasicType field_btype, int offset);
|
||||
|
||||
public:
|
||||
// If this object is a java mirror, return the corresponding type.
|
||||
// Otherwise, return NULL.
|
||||
|
|
|
@ -88,12 +88,7 @@ bool ciKlass::is_subclass_of(ciKlass* that) {
|
|||
assert(this->is_loaded(), "must be loaded: %s", this->name()->as_quoted_ascii());
|
||||
assert(that->is_loaded(), "must be loaded: %s", that->name()->as_quoted_ascii());
|
||||
|
||||
VM_ENTRY_MARK;
|
||||
Klass* this_klass = get_Klass();
|
||||
Klass* that_klass = that->get_Klass();
|
||||
bool result = this_klass->is_subclass_of(that_klass);
|
||||
|
||||
return result;
|
||||
GUARDED_VM_ENTRY(return get_Klass()->is_subclass_of(that->get_Klass());)
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
|
|
|
@ -58,9 +58,7 @@ ciSymbol::ciSymbol(Symbol* s)
|
|||
//
|
||||
// The text of the symbol as a null-terminated C string.
|
||||
const char* ciSymbol::as_utf8() {
|
||||
VM_QUICK_ENTRY_MARK;
|
||||
Symbol* s = get_symbol();
|
||||
return s->as_utf8();
|
||||
GUARDED_VM_QUICK_ENTRY(return get_symbol()->as_utf8();)
|
||||
}
|
||||
|
||||
// The text of the symbol as a null-terminated C string.
|
||||
|
|
|
@ -2927,7 +2927,7 @@ static const intArray* sort_methods(Array<Method*>* methods) {
|
|||
// If JVMTI original method ordering or sharing is enabled construct int
|
||||
// array remembering the original ordering
|
||||
if (JvmtiExport::can_maintain_original_method_order() || DumpSharedSpaces) {
|
||||
method_ordering = new intArray(length);
|
||||
method_ordering = new intArray(length, length, -1);
|
||||
for (int index = 0; index < length; index++) {
|
||||
Method* const m = methods->at(index);
|
||||
const int old_index = m->vtable_index();
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -42,7 +42,6 @@ class FieldInfo;
|
|||
template <typename T>
|
||||
class GrowableArray;
|
||||
class InstanceKlass;
|
||||
class intArray;
|
||||
class Symbol;
|
||||
class TempNewSymbol;
|
||||
|
||||
|
|
|
@ -882,6 +882,10 @@
|
|||
do_name( newArray_name, "newArray") \
|
||||
do_signature(newArray_signature, "(Ljava/lang/Class;I)Ljava/lang/Object;") \
|
||||
\
|
||||
do_intrinsic(_onSpinWait, java_lang_Thread, onSpinWait_name, onSpinWait_signature, F_S) \
|
||||
do_name( onSpinWait_name, "onSpinWait") \
|
||||
do_alias( onSpinWait_signature, void_method_signature) \
|
||||
\
|
||||
do_intrinsic(_copyOf, java_util_Arrays, copyOf_name, copyOf_signature, F_S) \
|
||||
do_name( copyOf_name, "copyOf") \
|
||||
do_signature(copyOf_signature, "([Ljava/lang/Object;ILjava/lang/Class;)[Ljava/lang/Object;") \
|
||||
|
|
|
@ -287,7 +287,7 @@ void ParScanThreadState::print_promotion_failure_size() {
|
|||
}
|
||||
}
|
||||
|
||||
class ParScanThreadStateSet: private ResourceArray {
|
||||
class ParScanThreadStateSet: StackObj {
|
||||
public:
|
||||
// Initializes states for the specified number of threads;
|
||||
ParScanThreadStateSet(int num_threads,
|
||||
|
@ -322,8 +322,10 @@ private:
|
|||
ParallelTaskTerminator& _term;
|
||||
ParNewGeneration& _young_gen;
|
||||
Generation& _old_gen;
|
||||
ParScanThreadState* _per_thread_states;
|
||||
const int _num_threads;
|
||||
public:
|
||||
bool is_valid(int id) const { return id < length(); }
|
||||
bool is_valid(int id) const { return id < _num_threads; }
|
||||
ParallelTaskTerminator* terminator() { return &_term; }
|
||||
};
|
||||
|
||||
|
@ -336,17 +338,18 @@ ParScanThreadStateSet::ParScanThreadStateSet(int num_threads,
|
|||
PreservedMarksSet& preserved_marks_set,
|
||||
size_t desired_plab_sz,
|
||||
ParallelTaskTerminator& term)
|
||||
: ResourceArray(sizeof(ParScanThreadState), num_threads),
|
||||
_young_gen(young_gen),
|
||||
: _young_gen(young_gen),
|
||||
_old_gen(old_gen),
|
||||
_term(term)
|
||||
_term(term),
|
||||
_per_thread_states(NEW_RESOURCE_ARRAY(ParScanThreadState, num_threads)),
|
||||
_num_threads(num_threads)
|
||||
{
|
||||
assert(num_threads > 0, "sanity check!");
|
||||
assert(ParGCUseLocalOverflow == (overflow_stacks != NULL),
|
||||
"overflow_stack allocation mismatch");
|
||||
// Initialize states.
|
||||
for (int i = 0; i < num_threads; ++i) {
|
||||
new ((ParScanThreadState*)_data + i)
|
||||
new(_per_thread_states + i)
|
||||
ParScanThreadState(&to_space, &young_gen, &old_gen, i, &queue_set,
|
||||
overflow_stacks, preserved_marks_set.get(i),
|
||||
desired_plab_sz, term);
|
||||
|
@ -354,12 +357,12 @@ ParScanThreadStateSet::ParScanThreadStateSet(int num_threads,
|
|||
}
|
||||
|
||||
inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i) {
|
||||
assert(i >= 0 && i < length(), "sanity check!");
|
||||
return ((ParScanThreadState*)_data)[i];
|
||||
assert(i >= 0 && i < _num_threads, "sanity check!");
|
||||
return _per_thread_states[i];
|
||||
}
|
||||
|
||||
void ParScanThreadStateSet::trace_promotion_failed(const YoungGCTracer* gc_tracer) {
|
||||
for (int i = 0; i < length(); ++i) {
|
||||
for (int i = 0; i < _num_threads; ++i) {
|
||||
if (thread_state(i).promotion_failed()) {
|
||||
gc_tracer->report_promotion_failed(thread_state(i).promotion_failed_info());
|
||||
thread_state(i).promotion_failed_info().reset();
|
||||
|
@ -370,7 +373,7 @@ void ParScanThreadStateSet::trace_promotion_failed(const YoungGCTracer* gc_trace
|
|||
void ParScanThreadStateSet::reset(uint active_threads, bool promotion_failed) {
|
||||
_term.reset_for_reuse(active_threads);
|
||||
if (promotion_failed) {
|
||||
for (int i = 0; i < length(); ++i) {
|
||||
for (int i = 0; i < _num_threads; ++i) {
|
||||
thread_state(i).print_promotion_failure_size();
|
||||
}
|
||||
}
|
||||
|
@ -385,7 +388,7 @@ void ParScanThreadState::reset_stats() {
|
|||
}
|
||||
|
||||
void ParScanThreadStateSet::reset_stats() {
|
||||
for (int i = 0; i < length(); ++i) {
|
||||
for (int i = 0; i < _num_threads; ++i) {
|
||||
thread_state(i).reset_stats();
|
||||
}
|
||||
}
|
||||
|
@ -408,7 +411,7 @@ void ParScanThreadStateSet::print_termination_stats() {
|
|||
|
||||
print_termination_stats_hdr(st);
|
||||
|
||||
for (int i = 0; i < length(); ++i) {
|
||||
for (int i = 0; i < _num_threads; ++i) {
|
||||
const ParScanThreadState & pss = thread_state(i);
|
||||
const double elapsed_ms = pss.elapsed_time() * 1000.0;
|
||||
const double s_roots_ms = pss.strong_roots_time() * 1000.0;
|
||||
|
@ -436,7 +439,7 @@ void ParScanThreadStateSet::print_taskqueue_stats() {
|
|||
print_taskqueue_stats_hdr(st);
|
||||
|
||||
TaskQueueStats totals;
|
||||
for (int i = 0; i < length(); ++i) {
|
||||
for (int i = 0; i < _num_threads; ++i) {
|
||||
const ParScanThreadState & pss = thread_state(i);
|
||||
const TaskQueueStats & stats = pss.taskqueue_stats();
|
||||
st->print("%3d ", i); stats.print(st); st->cr();
|
||||
|
@ -459,7 +462,7 @@ void ParScanThreadStateSet::flush() {
|
|||
// possible since this might otherwise become a bottleneck
|
||||
// to scaling. Should we add heavy-weight work into this
|
||||
// loop, consider parallelizing the loop into the worker threads.
|
||||
for (int i = 0; i < length(); ++i) {
|
||||
for (int i = 0; i < _num_threads; ++i) {
|
||||
ParScanThreadState& par_scan_state = thread_state(i);
|
||||
|
||||
// Flush stats related to To-space PLAB activity and
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
|
@ -68,8 +68,9 @@ void Rewriter::compute_index_maps() {
|
|||
guarantee((int) _cp_cache_map.length() - 1 <= (int) ((u2)-1),
|
||||
"all cp cache indexes fit in a u2");
|
||||
|
||||
if (saw_mh_symbol)
|
||||
_method_handle_invokers.initialize(length, (int)0);
|
||||
if (saw_mh_symbol) {
|
||||
_method_handle_invokers.at_grow(length, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// Unrewrite the bytecodes if an error occurs.
|
||||
|
@ -193,7 +194,7 @@ void Rewriter::maybe_rewrite_invokehandle(address opc, int cp_index, int cache_i
|
|||
assert(_pool->tag_at(cp_index).is_method(), "wrong index");
|
||||
// Determine whether this is a signature-polymorphic method.
|
||||
if (cp_index >= _method_handle_invokers.length()) return;
|
||||
int status = _method_handle_invokers[cp_index];
|
||||
int status = _method_handle_invokers.at(cp_index);
|
||||
assert(status >= -1 && status <= 1, "oob tri-state");
|
||||
if (status == 0) {
|
||||
if (_pool->klass_ref_at_noresolve(cp_index) == vmSymbols::java_lang_invoke_MethodHandle() &&
|
||||
|
@ -211,7 +212,7 @@ void Rewriter::maybe_rewrite_invokehandle(address opc, int cp_index, int cache_i
|
|||
} else {
|
||||
status = -1;
|
||||
}
|
||||
_method_handle_invokers[cp_index] = status;
|
||||
_method_handle_invokers.at(cp_index) = status;
|
||||
}
|
||||
// We use a special internal bytecode for such methods (if non-static).
|
||||
// The basic reason for this is that such methods need an extra "appendix" argument
|
||||
|
@ -287,7 +288,7 @@ void Rewriter::patch_invokedynamic_bytecodes() {
|
|||
// add delta to each.
|
||||
int resolved_index = _patch_invokedynamic_refs->at(i);
|
||||
for (int entry = 0; entry < ConstantPoolCacheEntry::_indy_resolved_references_entries; entry++) {
|
||||
assert(_invokedynamic_references_map[resolved_index+entry] == cache_index,
|
||||
assert(_invokedynamic_references_map.at(resolved_index + entry) == cache_index,
|
||||
"should be the same index");
|
||||
_invokedynamic_references_map.at_put(resolved_index+entry,
|
||||
cache_index + delta);
|
||||
|
@ -520,7 +521,14 @@ void Rewriter::rewrite(instanceKlassHandle klass, TRAPS) {
|
|||
Rewriter::Rewriter(instanceKlassHandle klass, const constantPoolHandle& cpool, Array<Method*>* methods, TRAPS)
|
||||
: _klass(klass),
|
||||
_pool(cpool),
|
||||
_methods(methods)
|
||||
_methods(methods),
|
||||
_cp_map(cpool->length()),
|
||||
_cp_cache_map(cpool->length() / 2),
|
||||
_reference_map(cpool->length()),
|
||||
_resolved_references_map(cpool->length() / 2),
|
||||
_invokedynamic_references_map(cpool->length() / 2),
|
||||
_method_handle_invokers(cpool->length()),
|
||||
_invokedynamic_cp_cache_map(cpool->length() / 4)
|
||||
{
|
||||
|
||||
// Rewrite bytecodes - exception here exits.
|
||||
|
|
|
@ -37,13 +37,13 @@ class Rewriter: public StackObj {
|
|||
instanceKlassHandle _klass;
|
||||
constantPoolHandle _pool;
|
||||
Array<Method*>* _methods;
|
||||
intArray _cp_map;
|
||||
intStack _cp_cache_map; // for Methodref, Fieldref,
|
||||
GrowableArray<int> _cp_map;
|
||||
GrowableArray<int> _cp_cache_map; // for Methodref, Fieldref,
|
||||
// InterfaceMethodref and InvokeDynamic
|
||||
intArray _reference_map; // maps from cp index to resolved_refs index (or -1)
|
||||
intStack _resolved_references_map; // for strings, methodHandle, methodType
|
||||
intStack _invokedynamic_references_map; // for invokedynamic resolved refs
|
||||
intArray _method_handle_invokers;
|
||||
GrowableArray<int> _reference_map; // maps from cp index to resolved_refs index (or -1)
|
||||
GrowableArray<int> _resolved_references_map; // for strings, methodHandle, methodType
|
||||
GrowableArray<int> _invokedynamic_references_map; // for invokedynamic resolved refs
|
||||
GrowableArray<int> _method_handle_invokers;
|
||||
int _resolved_reference_limit;
|
||||
|
||||
// For mapping invokedynamic bytecodes, which are discovered during method
|
||||
|
@ -51,26 +51,29 @@ class Rewriter: public StackObj {
|
|||
// If there are any invokespecial/InterfaceMethodref special case bytecodes,
|
||||
// these entries are added before invokedynamic entries so that the
|
||||
// invokespecial bytecode 16 bit index doesn't overflow.
|
||||
intStack _invokedynamic_cp_cache_map;
|
||||
GrowableArray<int> _invokedynamic_cp_cache_map;
|
||||
|
||||
// For patching.
|
||||
GrowableArray<address>* _patch_invokedynamic_bcps;
|
||||
GrowableArray<int>* _patch_invokedynamic_refs;
|
||||
|
||||
void init_maps(int length) {
|
||||
_cp_map.initialize(length, -1);
|
||||
// Choose an initial value large enough that we don't get frequent
|
||||
// calls to grow().
|
||||
_cp_cache_map.initialize(length/2);
|
||||
_cp_map.trunc_to(0);
|
||||
_cp_map.at_grow(length, -1);
|
||||
|
||||
_cp_cache_map.trunc_to(0);
|
||||
// Also cache resolved objects, in another different cache.
|
||||
_reference_map.initialize(length, -1);
|
||||
_resolved_references_map.initialize(length/2);
|
||||
_invokedynamic_references_map.initialize(length/2);
|
||||
_reference_map.trunc_to(0);
|
||||
_reference_map.at_grow(length, -1);
|
||||
|
||||
_method_handle_invokers.trunc_to(0);
|
||||
_resolved_references_map.trunc_to(0);
|
||||
_invokedynamic_references_map.trunc_to(0);
|
||||
_resolved_reference_limit = -1;
|
||||
_first_iteration_cp_cache_limit = -1;
|
||||
|
||||
// invokedynamic specific fields
|
||||
_invokedynamic_cp_cache_map.initialize(length/4);
|
||||
_invokedynamic_cp_cache_map.trunc_to(0);
|
||||
_patch_invokedynamic_bcps = new GrowableArray<address>(length / 4);
|
||||
_patch_invokedynamic_refs = new GrowableArray<int>(length / 4);
|
||||
}
|
||||
|
@ -90,10 +93,10 @@ class Rewriter: public StackObj {
|
|||
return _cp_cache_map.length() - _first_iteration_cp_cache_limit;
|
||||
}
|
||||
|
||||
int cp_entry_to_cp_cache(int i) { assert(has_cp_cache(i), "oob"); return _cp_map[i]; }
|
||||
bool has_cp_cache(int i) { return (uint)i < (uint)_cp_map.length() && _cp_map[i] >= 0; }
|
||||
int cp_entry_to_cp_cache(int i) { assert(has_cp_cache(i), "oob"); return _cp_map.at(i); }
|
||||
bool has_cp_cache(int i) { return (uint) i < (uint) _cp_map.length() && _cp_map.at(i) >= 0; }
|
||||
|
||||
int add_map_entry(int cp_index, intArray* cp_map, intStack* cp_cache_map) {
|
||||
int add_map_entry(int cp_index, GrowableArray<int>* cp_map, GrowableArray<int>* cp_cache_map) {
|
||||
assert(cp_map->at(cp_index) == -1, "not twice on same cp_index");
|
||||
int cache_index = cp_cache_map->append(cp_index);
|
||||
cp_map->at_put(cp_index, cache_index);
|
||||
|
@ -121,7 +124,7 @@ class Rewriter: public StackObj {
|
|||
}
|
||||
|
||||
int invokedynamic_cp_cache_entry_pool_index(int cache_index) {
|
||||
int cp_index = _invokedynamic_cp_cache_map[cache_index];
|
||||
int cp_index = _invokedynamic_cp_cache_map.at(cache_index);
|
||||
return cp_index;
|
||||
}
|
||||
|
||||
|
@ -144,10 +147,10 @@ class Rewriter: public StackObj {
|
|||
|
||||
int cp_entry_to_resolved_references(int cp_index) const {
|
||||
assert(has_entry_in_resolved_references(cp_index), "oob");
|
||||
return _reference_map[cp_index];
|
||||
return _reference_map.at(cp_index);
|
||||
}
|
||||
bool has_entry_in_resolved_references(int cp_index) const {
|
||||
return (uint)cp_index < (uint)_reference_map.length() && _reference_map[cp_index] >= 0;
|
||||
return (uint) cp_index < (uint) _reference_map.length() && _reference_map.at(cp_index) >= 0;
|
||||
}
|
||||
|
||||
// add a new entry to the resolved_references map
|
||||
|
@ -174,13 +177,13 @@ class Rewriter: public StackObj {
|
|||
}
|
||||
|
||||
int resolved_references_entry_to_pool_index(int ref_index) {
|
||||
int cp_index = _resolved_references_map[ref_index];
|
||||
int cp_index = _resolved_references_map.at(ref_index);
|
||||
return cp_index;
|
||||
}
|
||||
|
||||
// Access the contents of _cp_cache_map to determine CP cache layout.
|
||||
int cp_cache_entry_pool_index(int cache_index) {
|
||||
int cp_index = _cp_cache_map[cache_index];
|
||||
int cp_index = _cp_cache_map.at(cache_index);
|
||||
return cp_index;
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
// BitsInByte is a lookup table which tells the number of bits that
|
||||
// are in the looked-up number. It is very useful in VectorSet_Size.
|
||||
|
||||
uint8_t bitsInByte[256] = {
|
||||
uint8_t bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = {
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
|
|
|
@ -27,6 +27,8 @@
|
|||
|
||||
#include "libadt/set.hpp"
|
||||
|
||||
#define BITS_IN_BYTE_ARRAY_SIZE 256
|
||||
|
||||
// Vector Sets - An Abstract Data Type
|
||||
//INTERFACE
|
||||
|
||||
|
|
|
@ -569,7 +569,7 @@ void ConstantPoolCache::initialize(const intArray& inverse_index_map,
|
|||
const intArray& invokedynamic_references_map) {
|
||||
for (int i = 0; i < inverse_index_map.length(); i++) {
|
||||
ConstantPoolCacheEntry* e = entry_at(i);
|
||||
int original_index = inverse_index_map[i];
|
||||
int original_index = inverse_index_map.at(i);
|
||||
e->initialize_entry(original_index);
|
||||
assert(entry_at(i) == e, "sanity");
|
||||
}
|
||||
|
@ -579,19 +579,19 @@ void ConstantPoolCache::initialize(const intArray& inverse_index_map,
|
|||
for (int i = 0; i < invokedynamic_inverse_index_map.length(); i++) {
|
||||
int offset = i + invokedynamic_offset;
|
||||
ConstantPoolCacheEntry* e = entry_at(offset);
|
||||
int original_index = invokedynamic_inverse_index_map[i];
|
||||
int original_index = invokedynamic_inverse_index_map.at(i);
|
||||
e->initialize_entry(original_index);
|
||||
assert(entry_at(offset) == e, "sanity");
|
||||
}
|
||||
|
||||
for (int ref = 0; ref < invokedynamic_references_map.length(); ref++) {
|
||||
const int cpci = invokedynamic_references_map[ref];
|
||||
const int cpci = invokedynamic_references_map.at(ref);
|
||||
if (cpci >= 0) {
|
||||
#ifdef ASSERT
|
||||
// invokedynamic and invokehandle have more entries; check if they
|
||||
// all point to the same constant pool cache entry.
|
||||
for (int entry = 1; entry < ConstantPoolCacheEntry::_indy_resolved_references_entries; entry++) {
|
||||
const int cpci_next = invokedynamic_references_map[ref + entry];
|
||||
const int cpci_next = invokedynamic_references_map.at(ref + entry);
|
||||
assert(cpci == cpci_next, "%d == %d", cpci, cpci_next);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -361,6 +361,9 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
|||
case vmIntrinsics::_getCallerClass:
|
||||
if (SystemDictionary::reflect_CallerSensitive_klass() == NULL) return false;
|
||||
break;
|
||||
case vmIntrinsics::_onSpinWait:
|
||||
if (!Matcher::match_rule_supported(Op_OnSpinWait)) return false;
|
||||
break;
|
||||
case vmIntrinsics::_hashCode:
|
||||
case vmIntrinsics::_identityHashCode:
|
||||
case vmIntrinsics::_getClass:
|
||||
|
|
|
@ -171,7 +171,6 @@ macro(LoadN)
|
|||
macro(LoadRange)
|
||||
macro(LoadS)
|
||||
macro(Lock)
|
||||
macro(Log10D)
|
||||
macro(Loop)
|
||||
macro(LoopLimit)
|
||||
macro(Mach)
|
||||
|
@ -205,6 +204,7 @@ macro(Multi)
|
|||
macro(NegD)
|
||||
macro(NegF)
|
||||
macro(NeverBranch)
|
||||
macro(OnSpinWait)
|
||||
macro(Opaque1)
|
||||
macro(Opaque2)
|
||||
macro(Opaque3)
|
||||
|
@ -264,7 +264,6 @@ macro(SubI)
|
|||
macro(SubL)
|
||||
macro(TailCall)
|
||||
macro(TailJump)
|
||||
macro(TanD)
|
||||
macro(ThreadLocal)
|
||||
macro(Unlock)
|
||||
macro(URShiftI)
|
||||
|
|
|
@ -4467,6 +4467,25 @@ void GraphKit::inflate_string_slow(Node* src, Node* dst, Node* start, Node* coun
|
|||
set_memory(st, TypeAryPtr::BYTES);
|
||||
}
|
||||
|
||||
Node* GraphKit::make_constant_from_field(ciField* field, Node* obj) {
|
||||
if (!field->is_constant()) {
|
||||
return NULL; // Field not marked as constant.
|
||||
}
|
||||
ciInstance* holder = NULL;
|
||||
if (!field->is_static()) {
|
||||
ciObject* const_oop = obj->bottom_type()->is_oopptr()->const_oop();
|
||||
if (const_oop != NULL && const_oop->is_instance()) {
|
||||
holder = const_oop->as_instance();
|
||||
}
|
||||
}
|
||||
const Type* con_type = Type::make_constant_from_field(field, holder, field->layout_type(),
|
||||
/*is_unsigned_load=*/false);
|
||||
if (con_type != NULL) {
|
||||
return makecon(con_type);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Node* GraphKit::cast_array_to_stable(Node* ary, const TypeAryPtr* ary_type) {
|
||||
// Reify the property as a CastPP node in Ideal graph to comply with monotonicity
|
||||
// assumption of CCP analysis.
|
||||
|
|
|
@ -910,6 +910,8 @@ class GraphKit : public Phase {
|
|||
void add_predicate(int nargs = 0);
|
||||
void add_predicate_impl(Deoptimization::DeoptReason reason, int nargs);
|
||||
|
||||
Node* make_constant_from_field(ciField* field, Node* obj);
|
||||
|
||||
// Produce new array node of stable type
|
||||
Node* cast_array_to_stable(Node* ary, const TypeAryPtr* ary_type);
|
||||
};
|
||||
|
|
|
@ -281,6 +281,7 @@ class LibraryCallKit : public GraphKit {
|
|||
MemNode::MemOrd access_kind_to_memord(AccessKind access_kind);
|
||||
bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind, AccessKind access_kind);
|
||||
bool inline_unsafe_fence(vmIntrinsics::ID id);
|
||||
bool inline_onspinwait();
|
||||
bool inline_fp_conversions(vmIntrinsics::ID id);
|
||||
bool inline_number_methods(vmIntrinsics::ID id);
|
||||
bool inline_reference_get();
|
||||
|
@ -696,6 +697,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
|||
case vmIntrinsics::_storeFence:
|
||||
case vmIntrinsics::_fullFence: return inline_unsafe_fence(intrinsic_id());
|
||||
|
||||
case vmIntrinsics::_onSpinWait: return inline_onspinwait();
|
||||
|
||||
case vmIntrinsics::_currentThread: return inline_native_currentThread();
|
||||
case vmIntrinsics::_isInterrupted: return inline_native_isInterrupted();
|
||||
|
||||
|
@ -1677,7 +1680,6 @@ bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
|
|||
switch (id) {
|
||||
case vmIntrinsics::_dabs: n = new AbsDNode( arg); break;
|
||||
case vmIntrinsics::_dsqrt: n = new SqrtDNode(C, control(), arg); break;
|
||||
case vmIntrinsics::_dlog10: n = new Log10DNode(C, control(), arg); break;
|
||||
default: fatal_unexpected_iid(id); break;
|
||||
}
|
||||
set_result(_gvn.transform(n));
|
||||
|
@ -1691,10 +1693,6 @@ bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
|
|||
Node* arg = round_double_node(argument(0));
|
||||
Node* n = NULL;
|
||||
|
||||
switch (id) {
|
||||
case vmIntrinsics::_dtan: n = new TanDNode(C, control(), arg); break;
|
||||
default: fatal_unexpected_iid(id); break;
|
||||
}
|
||||
n = _gvn.transform(n);
|
||||
|
||||
// Rounding required? Check for argument reduction!
|
||||
|
@ -1812,14 +1810,17 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
|
|||
return StubRoutines::dcos() != NULL ?
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dcos(), "dcos") :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos), "COS");
|
||||
case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) :
|
||||
case vmIntrinsics::_dtan:
|
||||
return StubRoutines::dtan() != NULL ?
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dtan(), "dtan") :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN");
|
||||
|
||||
case vmIntrinsics::_dlog:
|
||||
return StubRoutines::dlog() != NULL ?
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog(), "dlog") :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog), "LOG");
|
||||
case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) :
|
||||
case vmIntrinsics::_dlog10:
|
||||
return StubRoutines::dlog10() != NULL ?
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), StubRoutines::dlog10(), "dlog10") :
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
|
||||
|
||||
// These intrinsics are supported on all hardware
|
||||
|
@ -2550,13 +2551,9 @@ bool LibraryCallKit::inline_unsafe_access(const bool is_native_ptr, bool is_stor
|
|||
Node* p = NULL;
|
||||
// Try to constant fold a load from a constant field
|
||||
ciField* field = alias_type->field();
|
||||
if (heap_base_oop != top() &&
|
||||
field != NULL && field->is_constant() && !mismatched) {
|
||||
if (heap_base_oop != top() && field != NULL && field->is_constant() && !mismatched) {
|
||||
// final or stable field
|
||||
const Type* con_type = Type::make_constant(alias_type->field(), heap_base_oop);
|
||||
if (con_type != NULL) {
|
||||
p = makecon(con_type);
|
||||
}
|
||||
p = make_constant_from_field(field, heap_base_oop);
|
||||
}
|
||||
if (p == NULL) {
|
||||
// To be valid, unsafe loads may depend on other conditions than
|
||||
|
@ -3127,6 +3124,11 @@ bool LibraryCallKit::inline_unsafe_fence(vmIntrinsics::ID id) {
|
|||
}
|
||||
}
|
||||
|
||||
bool LibraryCallKit::inline_onspinwait() {
|
||||
insert_mem_bar(Op_OnSpinWait);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LibraryCallKit::klass_needs_init_guard(Node* kls) {
|
||||
if (!kls->is_Con()) {
|
||||
return true;
|
||||
|
|
|
@ -860,7 +860,7 @@ bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray <Sa
|
|||
if (basic_elem_type == T_OBJECT || basic_elem_type == T_ARRAY) {
|
||||
if (!elem_type->is_loaded()) {
|
||||
field_type = TypeInstPtr::BOTTOM;
|
||||
} else if (field != NULL && field->is_constant() && field->is_static()) {
|
||||
} else if (field != NULL && field->is_static_constant()) {
|
||||
// This can happen if the constant oop is non-perm.
|
||||
ciObject* con = field->constant_value().as_object();
|
||||
// Do not "join" in the previous type; it doesn't add value,
|
||||
|
|
|
@ -944,6 +944,7 @@ static void match_alias_type(Compile* C, Node* n, Node* m) {
|
|||
case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
|
||||
case Op_StrInflatedCopy:
|
||||
case Op_StrCompressedCopy:
|
||||
case Op_OnSpinWait:
|
||||
case Op_EncodeISOArray:
|
||||
nidx = Compile::AliasIdxTop;
|
||||
nat = NULL;
|
||||
|
|
|
@ -797,7 +797,7 @@ Node *LoadNode::make(PhaseGVN& gvn, Node *ctl, Node *mem, Node *adr, const TypeP
|
|||
#endif
|
||||
{
|
||||
assert(!adr->bottom_type()->is_ptr_to_narrowoop() && !adr->bottom_type()->is_ptr_to_narrowklass(), "should have got back a narrow oop");
|
||||
load = new LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr(), mo, control_dependency);
|
||||
load = new LoadPNode(ctl, mem, adr, adr_type, rt->is_ptr(), mo, control_dependency);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1621,72 +1621,6 @@ LoadNode::load_array_final_field(const TypeKlassPtr *tkls,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static ciConstant check_mismatched_access(ciConstant con, BasicType loadbt, bool is_unsigned) {
|
||||
BasicType conbt = con.basic_type();
|
||||
switch (conbt) {
|
||||
case T_BOOLEAN: conbt = T_BYTE; break;
|
||||
case T_ARRAY: conbt = T_OBJECT; break;
|
||||
}
|
||||
switch (loadbt) {
|
||||
case T_BOOLEAN: loadbt = T_BYTE; break;
|
||||
case T_NARROWOOP: loadbt = T_OBJECT; break;
|
||||
case T_ARRAY: loadbt = T_OBJECT; break;
|
||||
case T_ADDRESS: loadbt = T_OBJECT; break;
|
||||
}
|
||||
if (conbt == loadbt) {
|
||||
if (is_unsigned && conbt == T_BYTE) {
|
||||
// LoadB (T_BYTE) with a small mask (<=8-bit) is converted to LoadUB (T_BYTE).
|
||||
return ciConstant(T_INT, con.as_int() & 0xFF);
|
||||
} else {
|
||||
return con;
|
||||
}
|
||||
}
|
||||
if (conbt == T_SHORT && loadbt == T_CHAR) {
|
||||
// LoadS (T_SHORT) with a small mask (<=16-bit) is converted to LoadUS (T_CHAR).
|
||||
return ciConstant(T_INT, con.as_int() & 0xFFFF);
|
||||
}
|
||||
return ciConstant(); // T_ILLEGAL
|
||||
}
|
||||
|
||||
// Try to constant-fold a stable array element.
|
||||
static const Type* fold_stable_ary_elem(const TypeAryPtr* ary, int off, bool is_unsigned_load, BasicType loadbt) {
|
||||
assert(ary->const_oop(), "array should be constant");
|
||||
assert(ary->is_stable(), "array should be stable");
|
||||
|
||||
// Decode the results of GraphKit::array_element_address.
|
||||
ciArray* aobj = ary->const_oop()->as_array();
|
||||
ciConstant element_value = aobj->element_value_by_offset(off);
|
||||
if (element_value.basic_type() == T_ILLEGAL) {
|
||||
return NULL; // wrong offset
|
||||
}
|
||||
ciConstant con = check_mismatched_access(element_value, loadbt, is_unsigned_load);
|
||||
assert(con.basic_type() != T_ILLEGAL, "elembt=%s; loadbt=%s; unsigned=%d",
|
||||
type2name(element_value.basic_type()), type2name(loadbt), is_unsigned_load);
|
||||
|
||||
if (con.basic_type() != T_ILLEGAL && // not a mismatched access
|
||||
!con.is_null_or_zero()) { // not a default value
|
||||
const Type* con_type = Type::make_from_constant(con);
|
||||
if (con_type != NULL) {
|
||||
if (con_type->isa_aryptr()) {
|
||||
// Join with the array element type, in case it is also stable.
|
||||
int dim = ary->stable_dimension();
|
||||
con_type = con_type->is_aryptr()->cast_to_stable(true, dim-1);
|
||||
}
|
||||
if (loadbt == T_NARROWOOP && con_type->isa_oopptr()) {
|
||||
con_type = con_type->make_narrowoop();
|
||||
}
|
||||
#ifndef PRODUCT
|
||||
if (TraceIterativeGVN) {
|
||||
tty->print("FoldStableValues: array element [off=%d]: con_type=", off);
|
||||
con_type->dump(); tty->cr();
|
||||
}
|
||||
#endif //PRODUCT
|
||||
return con_type;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
//------------------------------Value-----------------------------------------
|
||||
const Type* LoadNode::Value(PhaseGVN* phase) const {
|
||||
// Either input is TOP ==> the result is TOP
|
||||
|
@ -1715,10 +1649,14 @@ const Type* LoadNode::Value(PhaseGVN* phase) const {
|
|||
const bool off_beyond_header = ((uint)off >= (uint)min_base_off);
|
||||
|
||||
// Try to constant-fold a stable array element.
|
||||
if (FoldStableValues && !is_mismatched_access() && ary->is_stable() && ary->const_oop() != NULL) {
|
||||
if (FoldStableValues && !is_mismatched_access() && ary->is_stable()) {
|
||||
// Make sure the reference is not into the header and the offset is constant
|
||||
if (off_beyond_header && adr->is_AddP() && off != Type::OffsetBot) {
|
||||
const Type* con_type = fold_stable_ary_elem(ary, off, is_unsigned(), memory_type());
|
||||
ciObject* aobj = ary->const_oop();
|
||||
if (aobj != NULL && off_beyond_header && adr->is_AddP() && off != Type::OffsetBot) {
|
||||
int stable_dimension = (ary->stable_dimension() > 0 ? ary->stable_dimension() - 1 : 0);
|
||||
const Type* con_type = Type::make_constant_from_array_element(aobj->as_array(), off,
|
||||
stable_dimension,
|
||||
memory_type(), is_unsigned());
|
||||
if (con_type != NULL) {
|
||||
return con_type;
|
||||
}
|
||||
|
@ -1785,28 +1723,10 @@ const Type* LoadNode::Value(PhaseGVN* phase) const {
|
|||
// For oop loads, we expect the _type to be precise.
|
||||
// Optimizations for constant objects
|
||||
ciObject* const_oop = tinst->const_oop();
|
||||
if (const_oop != NULL) {
|
||||
// For constant CallSites treat the target field as a compile time constant.
|
||||
if (const_oop->is_call_site()) {
|
||||
ciCallSite* call_site = const_oop->as_call_site();
|
||||
ciField* field = call_site->klass()->as_instance_klass()->get_field_by_offset(off, /*is_static=*/ false);
|
||||
if (field != NULL && field->is_call_site_target()) {
|
||||
ciMethodHandle* target = call_site->get_target();
|
||||
if (target != NULL) { // just in case
|
||||
ciConstant constant(T_OBJECT, target);
|
||||
const Type* t;
|
||||
if (adr->bottom_type()->is_ptr_to_narrowoop()) {
|
||||
t = TypeNarrowOop::make_from_constant(constant.as_object(), true);
|
||||
} else {
|
||||
t = TypeOopPtr::make_from_constant(constant.as_object(), true);
|
||||
}
|
||||
// Add a dependence for invalidation of the optimization.
|
||||
if (!call_site->is_constant_call_site()) {
|
||||
C->dependencies()->assert_call_site_target_value(call_site, target);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
}
|
||||
if (const_oop != NULL && const_oop->is_instance()) {
|
||||
const Type* con_type = Type::make_constant_from_field(const_oop->as_instance(), off, is_unsigned(), memory_type());
|
||||
if (con_type != NULL) {
|
||||
return con_type;
|
||||
}
|
||||
}
|
||||
} else if (tp->base() == Type::KlassPtr) {
|
||||
|
@ -2979,6 +2899,7 @@ MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) {
|
|||
case Op_MemBarReleaseLock: return new MemBarReleaseLockNode(C, atp, pn);
|
||||
case Op_MemBarVolatile: return new MemBarVolatileNode(C, atp, pn);
|
||||
case Op_MemBarCPUOrder: return new MemBarCPUOrderNode(C, atp, pn);
|
||||
case Op_OnSpinWait: return new OnSpinWaitNode(C, atp, pn);
|
||||
case Op_Initialize: return new InitializeNode(C, atp, pn);
|
||||
case Op_MemBarStoreStore: return new MemBarStoreStoreNode(C, atp, pn);
|
||||
default: ShouldNotReachHere(); return NULL;
|
||||
|
|
|
@ -1186,6 +1186,13 @@ public:
|
|||
virtual uint ideal_reg() const { return 0; } // not matched in the AD file
|
||||
};
|
||||
|
||||
class OnSpinWaitNode: public MemBarNode {
|
||||
public:
|
||||
OnSpinWaitNode(Compile* C, int alias_idx, Node* precedent)
|
||||
: MemBarNode(C, alias_idx, precedent) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
// Isolation of object setup after an AllocateNode and before next safepoint.
|
||||
// (See comment in memnode.cpp near InitializeNode::InitializeNode for semantics.)
|
||||
class InitializeNode: public MemBarNode {
|
||||
|
|
|
@ -1483,8 +1483,6 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
|
|||
// Compute the size of the first block
|
||||
_first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos();
|
||||
|
||||
assert(cb->insts_size() < 500000, "method is unreasonably large");
|
||||
|
||||
#ifdef ASSERT
|
||||
for (uint i = 0; i < nblocks; i++) { // For all blocks
|
||||
if (jmp_target[i] != 0) {
|
||||
|
|
|
@ -149,9 +149,9 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {
|
|||
// Does this field have a constant value? If so, just push the value.
|
||||
if (field->is_constant()) {
|
||||
// final or stable field
|
||||
const Type* con_type = Type::make_constant(field, obj);
|
||||
if (con_type != NULL) {
|
||||
push_node(con_type->basic_type(), makecon(con_type));
|
||||
Node* con = make_constant_from_field(field, obj);
|
||||
if (con != NULL) {
|
||||
push_node(field->layout_type(), con);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -174,12 +174,16 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) {
|
|||
if (!field->type()->is_loaded()) {
|
||||
type = TypeInstPtr::BOTTOM;
|
||||
must_assert_null = true;
|
||||
} else if (field->is_constant() && field->is_static()) {
|
||||
} else if (field->is_static_constant()) {
|
||||
// This can happen if the constant oop is non-perm.
|
||||
ciObject* con = field->constant_value().as_object();
|
||||
// Do not "join" in the previous type; it doesn't add value,
|
||||
// and may yield a vacuous result if the field is of interface type.
|
||||
if (con->is_null_object()) {
|
||||
type = TypePtr::NULL_PTR;
|
||||
} else {
|
||||
type = TypeOopPtr::make_from_constant(con)->isa_oopptr();
|
||||
}
|
||||
assert(type != NULL, "field singleton type must be consistent");
|
||||
} else {
|
||||
type = TypeOopPtr::make_from_klass(field_klass->as_klass());
|
||||
|
|
|
@ -389,7 +389,7 @@ bool RegMask::is_UP() const {
|
|||
//------------------------------Size-------------------------------------------
|
||||
// Compute size of register mask in bits
|
||||
uint RegMask::Size() const {
|
||||
extern uint8_t bitsInByte[512];
|
||||
extern uint8_t bitsInByte[BITS_IN_BYTE_ARRAY_SIZE];
|
||||
uint sum = 0;
|
||||
for( int i = 0; i < RM_SIZE; i++ )
|
||||
sum +=
|
||||
|
|
|
@ -1112,7 +1112,7 @@ Node* PhaseStringOpts::fetch_static_field(GraphKit& kit, ciField* field) {
|
|||
if( bt == T_OBJECT ) {
|
||||
if (!field->type()->is_loaded()) {
|
||||
type = TypeInstPtr::BOTTOM;
|
||||
} else if (field->is_constant()) {
|
||||
} else if (field->is_static_constant()) {
|
||||
// This can happen if the constant oop is non-perm.
|
||||
ciObject* con = field->constant_value().as_object();
|
||||
// Do not "join" in the previous type; it doesn't add value,
|
||||
|
|
|
@ -1533,25 +1533,3 @@ const Type* SqrtDNode::Value(PhaseGVN* phase) const {
|
|||
if( d < 0.0 ) return Type::DOUBLE;
|
||||
return TypeD::make( sqrt( d ) );
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------Value------------------------------------------
|
||||
// Compute tan
|
||||
const Type* TanDNode::Value(PhaseGVN* phase) const {
|
||||
const Type *t1 = phase->type( in(1) );
|
||||
if( t1 == Type::TOP ) return Type::TOP;
|
||||
if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
|
||||
double d = t1->getd();
|
||||
return TypeD::make( StubRoutines::intrinsic_tan( d ) );
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
//------------------------------Value------------------------------------------
|
||||
// Compute log10
|
||||
const Type* Log10DNode::Value(PhaseGVN* phase) const {
|
||||
const Type *t1 = phase->type( in(1) );
|
||||
if( t1 == Type::TOP ) return Type::TOP;
|
||||
if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
|
||||
double d = t1->getd();
|
||||
return TypeD::make( StubRoutines::intrinsic_log10( d ) );
|
||||
}
|
||||
|
|
|
@ -408,21 +408,6 @@ public:
|
|||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
};
|
||||
|
||||
//------------------------------TanDNode---------------------------------------
|
||||
// tangens of a double
|
||||
class TanDNode : public Node {
|
||||
public:
|
||||
TanDNode(Compile* C, Node *c,Node *in1) : Node(c, in1) {
|
||||
init_flags(Flag_is_expensive);
|
||||
C->add_expensive_node(this);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return Type::DOUBLE; }
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
};
|
||||
|
||||
|
||||
//------------------------------AtanDNode--------------------------------------
|
||||
// arcus tangens of a double
|
||||
class AtanDNode : public Node {
|
||||
|
@ -448,20 +433,6 @@ public:
|
|||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
};
|
||||
|
||||
//------------------------------Log10DNode---------------------------------------
|
||||
// Log_10 of a double
|
||||
class Log10DNode : public Node {
|
||||
public:
|
||||
Log10DNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
|
||||
init_flags(Flag_is_expensive);
|
||||
C->add_expensive_node(this);
|
||||
}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return Type::DOUBLE; }
|
||||
virtual uint ideal_reg() const { return Op_RegD; }
|
||||
virtual const Type* Value(PhaseGVN* phase) const;
|
||||
};
|
||||
|
||||
//-------------------------------ReverseBytesINode--------------------------------
|
||||
// reverse bytes of an integer
|
||||
class ReverseBytesINode : public Node {
|
||||
|
|
|
@ -225,7 +225,9 @@ const Type* Type::get_typeflow_type(ciType* type) {
|
|||
|
||||
|
||||
//-----------------------make_from_constant------------------------------------
|
||||
const Type* Type::make_from_constant(ciConstant constant, bool require_constant) {
|
||||
const Type* Type::make_from_constant(ciConstant constant, bool require_constant,
|
||||
int stable_dimension, bool is_narrow_oop,
|
||||
bool is_autobox_cache) {
|
||||
switch (constant.basic_type()) {
|
||||
case T_BOOLEAN: return TypeInt::make(constant.as_boolean());
|
||||
case T_CHAR: return TypeInt::make(constant.as_char());
|
||||
|
@ -236,18 +238,32 @@ const Type* Type::make_from_constant(ciConstant constant, bool require_constant)
|
|||
case T_FLOAT: return TypeF::make(constant.as_float());
|
||||
case T_DOUBLE: return TypeD::make(constant.as_double());
|
||||
case T_ARRAY:
|
||||
case T_OBJECT:
|
||||
{
|
||||
case T_OBJECT: {
|
||||
// cases:
|
||||
// can_be_constant = (oop not scavengable || ScavengeRootsInCode != 0)
|
||||
// should_be_constant = (oop not scavengable || ScavengeRootsInCode >= 2)
|
||||
// An oop is not scavengable if it is in the perm gen.
|
||||
const Type* con_type = NULL;
|
||||
ciObject* oop_constant = constant.as_object();
|
||||
if (oop_constant->is_null_object()) {
|
||||
return Type::get_zero_type(T_OBJECT);
|
||||
con_type = Type::get_zero_type(T_OBJECT);
|
||||
} else if (require_constant || oop_constant->should_be_constant()) {
|
||||
return TypeOopPtr::make_from_constant(oop_constant, require_constant);
|
||||
con_type = TypeOopPtr::make_from_constant(oop_constant, require_constant);
|
||||
if (con_type != NULL) {
|
||||
if (Compile::current()->eliminate_boxing() && is_autobox_cache) {
|
||||
con_type = con_type->is_aryptr()->cast_to_autobox_cache(true);
|
||||
}
|
||||
if (stable_dimension > 0) {
|
||||
assert(FoldStableValues, "sanity");
|
||||
assert(!con_type->is_zero_type(), "default value for stable field");
|
||||
con_type = con_type->is_aryptr()->cast_to_stable(true, stable_dimension);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (is_narrow_oop) {
|
||||
con_type = con_type->make_narrowoop();
|
||||
}
|
||||
return con_type;
|
||||
}
|
||||
case T_ILLEGAL:
|
||||
// Invalid ciConstant returned due to OutOfMemoryError in the CI
|
||||
|
@ -258,41 +274,107 @@ const Type* Type::make_from_constant(ciConstant constant, bool require_constant)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static ciConstant check_mismatched_access(ciConstant con, BasicType loadbt, bool is_unsigned) {
|
||||
BasicType conbt = con.basic_type();
|
||||
switch (conbt) {
|
||||
case T_BOOLEAN: conbt = T_BYTE; break;
|
||||
case T_ARRAY: conbt = T_OBJECT; break;
|
||||
}
|
||||
switch (loadbt) {
|
||||
case T_BOOLEAN: loadbt = T_BYTE; break;
|
||||
case T_NARROWOOP: loadbt = T_OBJECT; break;
|
||||
case T_ARRAY: loadbt = T_OBJECT; break;
|
||||
case T_ADDRESS: loadbt = T_OBJECT; break;
|
||||
}
|
||||
if (conbt == loadbt) {
|
||||
if (is_unsigned && conbt == T_BYTE) {
|
||||
// LoadB (T_BYTE) with a small mask (<=8-bit) is converted to LoadUB (T_BYTE).
|
||||
return ciConstant(T_INT, con.as_int() & 0xFF);
|
||||
} else {
|
||||
return con;
|
||||
}
|
||||
}
|
||||
if (conbt == T_SHORT && loadbt == T_CHAR) {
|
||||
// LoadS (T_SHORT) with a small mask (<=16-bit) is converted to LoadUS (T_CHAR).
|
||||
return ciConstant(T_INT, con.as_int() & 0xFFFF);
|
||||
}
|
||||
return ciConstant(); // T_ILLEGAL
|
||||
}
|
||||
|
||||
const Type* Type::make_constant(ciField* field, Node* obj) {
|
||||
if (!field->is_constant()) return NULL;
|
||||
// Try to constant-fold a stable array element.
|
||||
const Type* Type::make_constant_from_array_element(ciArray* array, int off, int stable_dimension,
|
||||
BasicType loadbt, bool is_unsigned_load) {
|
||||
// Decode the results of GraphKit::array_element_address.
|
||||
ciConstant element_value = array->element_value_by_offset(off);
|
||||
if (element_value.basic_type() == T_ILLEGAL) {
|
||||
return NULL; // wrong offset
|
||||
}
|
||||
ciConstant con = check_mismatched_access(element_value, loadbt, is_unsigned_load);
|
||||
|
||||
const Type* con_type = NULL;
|
||||
assert(con.basic_type() != T_ILLEGAL, "elembt=%s; loadbt=%s; unsigned=%d",
|
||||
type2name(element_value.basic_type()), type2name(loadbt), is_unsigned_load);
|
||||
|
||||
if (con.is_valid() && // not a mismatched access
|
||||
!con.is_null_or_zero()) { // not a default value
|
||||
bool is_narrow_oop = (loadbt == T_NARROWOOP);
|
||||
return Type::make_from_constant(con, /*require_constant=*/true, stable_dimension, is_narrow_oop, /*is_autobox_cache=*/false);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const Type* Type::make_constant_from_field(ciInstance* holder, int off, bool is_unsigned_load, BasicType loadbt) {
|
||||
ciField* field;
|
||||
ciType* type = holder->java_mirror_type();
|
||||
if (type != NULL && type->is_instance_klass() && off >= InstanceMirrorKlass::offset_of_static_fields()) {
|
||||
// Static field
|
||||
field = type->as_instance_klass()->get_field_by_offset(off, /*is_static=*/true);
|
||||
} else {
|
||||
// Instance field
|
||||
field = holder->klass()->as_instance_klass()->get_field_by_offset(off, /*is_static=*/false);
|
||||
}
|
||||
if (field == NULL) {
|
||||
return NULL; // Wrong offset
|
||||
}
|
||||
return Type::make_constant_from_field(field, holder, loadbt, is_unsigned_load);
|
||||
}
|
||||
|
||||
const Type* Type::make_constant_from_field(ciField* field, ciInstance* holder,
|
||||
BasicType loadbt, bool is_unsigned_load) {
|
||||
if (!field->is_constant()) {
|
||||
return NULL; // Non-constant field
|
||||
}
|
||||
ciConstant field_value;
|
||||
if (field->is_static()) {
|
||||
// final static field
|
||||
con_type = Type::make_from_constant(field->constant_value(), /*require_const=*/true);
|
||||
if (Compile::current()->eliminate_boxing() && field->is_autobox_cache() && con_type != NULL) {
|
||||
con_type = con_type->is_aryptr()->cast_to_autobox_cache(true);
|
||||
}
|
||||
} else {
|
||||
field_value = field->constant_value();
|
||||
} else if (holder != NULL) {
|
||||
// final or stable non-static field
|
||||
// Treat final non-static fields of trusted classes (classes in
|
||||
// java.lang.invoke and sun.invoke packages and subpackages) as
|
||||
// compile time constants.
|
||||
if (obj->is_Con()) {
|
||||
const TypeOopPtr* oop_ptr = obj->bottom_type()->isa_oopptr();
|
||||
ciObject* constant_oop = oop_ptr->const_oop();
|
||||
ciConstant constant = field->constant_value_of(constant_oop);
|
||||
con_type = Type::make_from_constant(constant, /*require_const=*/true);
|
||||
field_value = field->constant_value_of(holder);
|
||||
}
|
||||
if (!field_value.is_valid()) {
|
||||
return NULL; // Not a constant
|
||||
}
|
||||
if (FoldStableValues && field->is_stable() && con_type != NULL) {
|
||||
if (con_type->is_zero_type()) {
|
||||
return NULL; // the field hasn't been initialized yet
|
||||
} else if (con_type->isa_oopptr()) {
|
||||
const Type* stable_type = Type::get_const_type(field->type());
|
||||
if (field->type()->is_array_klass()) {
|
||||
int stable_dimension = field->type()->as_array_klass()->dimension();
|
||||
stable_type = stable_type->is_aryptr()->cast_to_stable(true, stable_dimension);
|
||||
}
|
||||
if (stable_type != NULL) {
|
||||
con_type = con_type->join_speculative(stable_type);
|
||||
}
|
||||
|
||||
ciConstant con = check_mismatched_access(field_value, loadbt, is_unsigned_load);
|
||||
|
||||
assert(con.is_valid(), "elembt=%s; loadbt=%s; unsigned=%d",
|
||||
type2name(field_value.basic_type()), type2name(loadbt), is_unsigned_load);
|
||||
|
||||
bool is_stable_array = FoldStableValues && field->is_stable() && field->type()->is_array_klass();
|
||||
int stable_dimension = (is_stable_array ? field->type()->as_array_klass()->dimension() : 0);
|
||||
bool is_narrow_oop = (loadbt == T_NARROWOOP);
|
||||
|
||||
const Type* con_type = make_from_constant(con, /*require_constant=*/ true,
|
||||
stable_dimension, is_narrow_oop,
|
||||
field->is_autobox_cache());
|
||||
if (con_type != NULL && field->is_call_site_target()) {
|
||||
ciCallSite* call_site = holder->as_call_site();
|
||||
if (!call_site->is_constant_call_site()) {
|
||||
ciMethodHandle* target = call_site->get_target();
|
||||
Compile::current()->dependencies()->assert_call_site_target_value(call_site, target);
|
||||
}
|
||||
}
|
||||
return con_type;
|
||||
|
|
|
@ -417,9 +417,26 @@ public:
|
|||
static const Type* get_typeflow_type(ciType* type);
|
||||
|
||||
static const Type* make_from_constant(ciConstant constant,
|
||||
bool require_constant = false);
|
||||
bool require_constant = false,
|
||||
int stable_dimension = 0,
|
||||
bool is_narrow = false,
|
||||
bool is_autobox_cache = false);
|
||||
|
||||
static const Type* make_constant(ciField* field, Node* obj);
|
||||
static const Type* make_constant_from_field(ciInstance* holder,
|
||||
int off,
|
||||
bool is_unsigned_load,
|
||||
BasicType loadbt);
|
||||
|
||||
static const Type* make_constant_from_field(ciField* field,
|
||||
ciInstance* holder,
|
||||
BasicType loadbt,
|
||||
bool is_unsigned_load);
|
||||
|
||||
static const Type* make_constant_from_array_element(ciArray* array,
|
||||
int off,
|
||||
int stable_dimension,
|
||||
BasicType loadbt,
|
||||
bool is_unsigned_load);
|
||||
|
||||
// Speculative type helper methods. See TypePtr.
|
||||
virtual const TypePtr* speculative() const { return NULL; }
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue